etlplus 0.16.9__tar.gz → 0.17.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {etlplus-0.16.9 → etlplus-0.17.2}/.github/workflows/ci.yml +5 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/.ruff.toml +4 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/Makefile +10 -2
- {etlplus-0.16.9/etlplus.egg-info → etlplus-0.17.2}/PKG-INFO +44 -26
- {etlplus-0.16.9 → etlplus-0.17.2}/README.md +31 -25
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/README.md +33 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/arrow.py +35 -5
- etlplus-0.17.2/etlplus/file/bson.py +124 -0
- etlplus-0.17.2/etlplus/file/cbor.py +104 -0
- etlplus-0.17.2/etlplus/file/dat.py +117 -0
- etlplus-0.17.2/etlplus/file/dta.py +113 -0
- etlplus-0.17.2/etlplus/file/duckdb.py +234 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/enums.py +29 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/fwf.py +37 -5
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/hdf5.py +41 -3
- etlplus-0.17.2/etlplus/file/ini.py +131 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/msgpack.py +33 -9
- etlplus-0.17.2/etlplus/file/nc.py +128 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/ods.py +39 -6
- etlplus-0.17.2/etlplus/file/pb.py +93 -0
- etlplus-0.17.2/etlplus/file/properties.py +120 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/proto.py +24 -12
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/psv.py +5 -5
- etlplus-0.17.2/etlplus/file/rda.py +154 -0
- etlplus-0.17.2/etlplus/file/rds.py +147 -0
- etlplus-0.17.2/etlplus/file/sas7bdat.py +110 -0
- etlplus-0.17.2/etlplus/file/sav.py +107 -0
- etlplus-0.17.2/etlplus/file/sqlite.py +198 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/tab.py +6 -7
- etlplus-0.17.2/etlplus/file/toml.py +122 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/xlsm.py +39 -5
- etlplus-0.17.2/etlplus/file/xpt.py +136 -0
- {etlplus-0.16.9 → etlplus-0.17.2/etlplus.egg-info}/PKG-INFO +44 -26
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus.egg-info/SOURCES.txt +8 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus.egg-info/requires.txt +13 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/pyproject.toml +14 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/setup.py +14 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/__init__.py +1 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/conftest.py +186 -28
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/integration/conftest.py +3 -6
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/integration/test_i_cli.py +13 -7
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/integration/test_i_config_load.py +2 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/integration/test_i_examples_data_parity.py +5 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/integration/test_i_pagination_strategy.py +31 -4
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/integration/test_i_run.py +5 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/integration/test_i_run_profile_pagination_defaults.py +2 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/integration/test_i_run_profile_rate_limit_defaults.py +2 -1
- etlplus-0.17.2/tests/smoke/__init__.py +12 -0
- etlplus-0.17.2/tests/smoke/conftest.py +214 -0
- etlplus-0.17.2/tests/smoke/test_s_cli_check.py +48 -0
- etlplus-0.17.2/tests/smoke/test_s_cli_extract.py +45 -0
- etlplus-0.17.2/tests/smoke/test_s_cli_load.py +57 -0
- etlplus-0.17.2/tests/smoke/test_s_cli_render.py +65 -0
- etlplus-0.17.2/tests/smoke/test_s_cli_transform.py +52 -0
- etlplus-0.17.2/tests/smoke/test_s_cli_validate.py +47 -0
- etlplus-0.17.2/tests/smoke/test_s_pipeline.py +72 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/api/conftest.py +19 -99
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/api/test_u_api_auth.py +3 -2
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/api/test_u_api_config.py +2 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/api/test_u_api_endpoint_client.py +6 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/api/test_u_api_enums.py +2 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/api/test_u_api_pagination_client.py +2 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/api/test_u_api_pagination_config.py +3 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/api/test_u_api_paginator.py +3 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/api/test_u_api_rate_limit_config.py +3 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/api/test_u_api_rate_limiter.py +3 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/api/test_u_api_request_manager.py +5 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/api/test_u_api_retry_manager.py +2 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/api/test_u_api_transport.py +3 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/api/test_u_api_types.py +2 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/api/test_u_api_utils.py +5 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/cli/conftest.py +37 -17
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/cli/test_u_cli_handlers.py +2 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/cli/test_u_cli_io.py +2 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/cli/test_u_cli_main.py +6 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/cli/test_u_cli_state.py +2 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/conftest.py +17 -123
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/connector/test_u_connector_enums.py +2 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/connector/test_u_connector_utils.py +3 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/database/test_u_database_ddl.py +2 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/database/test_u_database_engine.py +6 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/database/test_u_database_orm.py +2 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/database/test_u_database_schema.py +6 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/file/test_u_file_core.py +165 -52
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/file/test_u_file_enums.py +5 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/file/test_u_file_yaml.py +5 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/ops/test_u_ops_enums.py +2 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/ops/test_u_ops_extract.py +5 -6
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/ops/test_u_ops_load.py +5 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/ops/test_u_ops_run.py +5 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/ops/test_u_ops_transform.py +5 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/ops/test_u_ops_utils.py +7 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/ops/test_u_ops_validate.py +2 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/test_u_config.py +5 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/test_u_main.py +6 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/test_u_mixins.py +3 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/test_u_utils.py +2 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/test_u_version.py +5 -1
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/workflow/test_u_workflow_jobs.py +9 -4
- etlplus-0.16.9/etlplus/file/bson.py +0 -79
- etlplus-0.16.9/etlplus/file/cbor.py +0 -80
- etlplus-0.16.9/etlplus/file/dat.py +0 -79
- etlplus-0.16.9/etlplus/file/dta.py +0 -78
- etlplus-0.16.9/etlplus/file/duckdb.py +0 -80
- etlplus-0.16.9/etlplus/file/ini.py +0 -81
- etlplus-0.16.9/etlplus/file/nc.py +0 -79
- etlplus-0.16.9/etlplus/file/pb.py +0 -80
- etlplus-0.16.9/etlplus/file/properties.py +0 -80
- etlplus-0.16.9/etlplus/file/rda.py +0 -80
- etlplus-0.16.9/etlplus/file/rds.py +0 -79
- etlplus-0.16.9/etlplus/file/sas7bdat.py +0 -80
- etlplus-0.16.9/etlplus/file/sav.py +0 -78
- etlplus-0.16.9/etlplus/file/sqlite.py +0 -80
- etlplus-0.16.9/etlplus/file/toml.py +0 -80
- etlplus-0.16.9/etlplus/file/xpt.py +0 -80
- etlplus-0.16.9/tests/smoke/test_s_pipeline.py +0 -109
- {etlplus-0.16.9 → etlplus-0.17.2}/.coveragerc +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/.editorconfig +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/.gitattributes +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/.github/actions/python-bootstrap/action.yml +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/.gitignore +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/.pre-commit-config.yaml +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/CODE_OF_CONDUCT.md +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/CONTRIBUTING.md +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/DEMO.md +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/LICENSE +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/MANIFEST.in +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/REFERENCES.md +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/SECURITY.md +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/SUPPORT.md +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/docs/README.md +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/docs/pipeline-guide.md +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/docs/snippets/installation_version.md +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/README.md +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/__init__.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/__main__.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/__version__.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/README.md +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/__init__.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/auth.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/config.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/endpoint_client.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/enums.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/errors.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/pagination/__init__.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/pagination/client.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/pagination/config.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/pagination/paginator.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/rate_limiting/__init__.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/rate_limiting/config.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/rate_limiting/rate_limiter.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/request_manager.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/retry_manager.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/transport.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/types.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/api/utils.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/cli/README.md +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/cli/__init__.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/cli/commands.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/cli/constants.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/cli/handlers.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/cli/io.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/cli/main.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/cli/options.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/cli/state.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/cli/types.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/config.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/connector/__init__.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/connector/api.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/connector/connector.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/connector/core.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/connector/database.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/connector/enums.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/connector/file.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/connector/types.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/connector/utils.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/database/README.md +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/database/__init__.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/database/ddl.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/database/engine.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/database/orm.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/database/schema.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/database/types.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/enums.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/__init__.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/_imports.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/_io.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/accdb.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/avro.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/cfg.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/conf.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/core.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/csv.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/feather.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/gz.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/hbs.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/ion.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/jinja2.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/json.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/log.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/mat.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/mdb.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/mustache.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/ndjson.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/numbers.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/orc.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/parquet.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/pbf.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/stub.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/sylk.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/tsv.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/txt.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/vm.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/wks.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/xls.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/xlsx.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/xml.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/yaml.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/zip.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/file/zsav.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/mixins.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/ops/README.md +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/ops/__init__.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/ops/enums.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/ops/extract.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/ops/load.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/ops/run.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/ops/transform.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/ops/types.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/ops/utils.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/ops/validate.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/py.typed +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/templates/README.md +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/templates/__init__.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/templates/ddl.sql.j2 +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/templates/view.sql.j2 +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/types.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/utils.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/workflow/README.md +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/workflow/__init__.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/workflow/dag.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/workflow/jobs.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus/workflow/profile.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus.egg-info/dependency_links.txt +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus.egg-info/entry_points.txt +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/etlplus.egg-info/top_level.txt +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/examples/README.md +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/examples/configs/ddl_spec.yml +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/examples/configs/pipeline.yml +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/examples/data/sample.csv +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/examples/data/sample.json +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/examples/data/sample.xml +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/examples/data/sample.xsd +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/examples/data/sample.yaml +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/examples/quickstart.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/pytest.ini +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/setup.cfg +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/README.md +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/tests/unit/api/test_u_api_mocks.py +0 -0
- {etlplus-0.16.9 → etlplus-0.17.2}/tools/update_demo_snippets.py +0 -0
|
@@ -62,7 +62,11 @@ jobs:
|
|
|
62
62
|
run: |
|
|
63
63
|
ruff version
|
|
64
64
|
ruff check .
|
|
65
|
-
ruff format --check .
|
|
65
|
+
# ruff format --check .
|
|
66
|
+
files="$(git ls-files '*.py')"
|
|
67
|
+
if [ -n "$files" ]; then
|
|
68
|
+
autopep8 --diff --exit-code --max-line-length=79 $files
|
|
69
|
+
fi
|
|
66
70
|
|
|
67
71
|
test:
|
|
68
72
|
runs-on: ubuntu-latest
|
|
@@ -172,6 +172,11 @@ dist: ## Build sdist and wheel into ./dist using pyproject.toml
|
|
|
172
172
|
@$(PYTHON) -m twine check dist/*
|
|
173
173
|
@$(call ECHO_OK,"Built and validated distribution artifacts in ./dist")
|
|
174
174
|
|
|
175
|
+
.PHONY: file
|
|
176
|
+
file: venv ## Install package + file extras
|
|
177
|
+
@$(PYTHON) -m pip install -e $(PKG_DIR)[file]
|
|
178
|
+
@$(call ECHO_OK,"Installed etlplus + file extras")
|
|
179
|
+
|
|
175
180
|
.PHONY: fix
|
|
176
181
|
fix: ## Auto-fix with ruff
|
|
177
182
|
@$(VENV_BIN)/ruff check . --fix || (echo "Hint: run 'make dev' first" && false)
|
|
@@ -211,8 +216,11 @@ doclint: ## Run docstring linters (pydocstyle + pydoclint if available)
|
|
|
211
216
|
.PHONY: fmt
|
|
212
217
|
fmt: ## Format code with ruff (imports + fixes) and black
|
|
213
218
|
@$(VENV_BIN)/ruff check . --fix || (echo "Hint: run 'make dev' first" && false)
|
|
214
|
-
|
|
215
|
-
|
|
219
|
+
# @$(VENV_BIN)/ruff format . || true
|
|
220
|
+
# @$(VENV_BIN)/black . || true
|
|
221
|
+
@$(VENV_BIN)/autopep8 --in-place --max-line-length=79 \
|
|
222
|
+
--exclude .venv,dist,build,etlplus.egg-info,.mypy_cache,.pytest_cache \
|
|
223
|
+
$(shell git ls-files '*.py') || true
|
|
216
224
|
|
|
217
225
|
.PHONY: run
|
|
218
226
|
run: ## Run the etlplus CLI (dry-run) using $(ENV)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: etlplus
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.17.2
|
|
4
4
|
Summary: A Swiss Army knife for simple ETL operations
|
|
5
5
|
Home-page: https://github.com/Dagitali/ETLPlus
|
|
6
6
|
Author: ETLPlus Team
|
|
@@ -32,6 +32,7 @@ Requires-Dist: typer>=0.21.0
|
|
|
32
32
|
Requires-Dist: xlrd>=2.0.2
|
|
33
33
|
Requires-Dist: xlwt>=1.3.0
|
|
34
34
|
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: autopep8>=2.3.2; extra == "dev"
|
|
35
36
|
Requires-Dist: black>=25.9.0; extra == "dev"
|
|
36
37
|
Requires-Dist: build>=1.2.2; extra == "dev"
|
|
37
38
|
Requires-Dist: flake8>=7.3.0; extra == "dev"
|
|
@@ -44,6 +45,17 @@ Requires-Dist: ruff>=0.14.4; extra == "dev"
|
|
|
44
45
|
Provides-Extra: docs
|
|
45
46
|
Requires-Dist: sphinx>=4.0.0; extra == "docs"
|
|
46
47
|
Requires-Dist: sphinx-rtd-theme>=1.0.0; extra == "docs"
|
|
48
|
+
Provides-Extra: file
|
|
49
|
+
Requires-Dist: pymongo>=4.9.1; extra == "file"
|
|
50
|
+
Requires-Dist: cbor2>=5.6.4; extra == "file"
|
|
51
|
+
Requires-Dist: duckdb>=1.1.0; extra == "file"
|
|
52
|
+
Requires-Dist: msgpack>=1.0.8; extra == "file"
|
|
53
|
+
Requires-Dist: netCDF4>=1.7.2; extra == "file"
|
|
54
|
+
Requires-Dist: odfpy>=1.4.1; extra == "file"
|
|
55
|
+
Requires-Dist: pyreadr>=0.5.2; extra == "file"
|
|
56
|
+
Requires-Dist: pyreadstat>=1.3.3; extra == "file"
|
|
57
|
+
Requires-Dist: tomli-w>=1.2.0; extra == "file"
|
|
58
|
+
Requires-Dist: xarray>=2024.9.0; extra == "file"
|
|
47
59
|
Dynamic: home-page
|
|
48
60
|
Dynamic: license-file
|
|
49
61
|
Dynamic: requires-python
|
|
@@ -176,6 +188,12 @@ For development:
|
|
|
176
188
|
pip install -e ".[dev]"
|
|
177
189
|
```
|
|
178
190
|
|
|
191
|
+
For full file-format support (optional extras):
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
pip install -e ".[file]"
|
|
195
|
+
```
|
|
196
|
+
|
|
179
197
|
## Quickstart
|
|
180
198
|
|
|
181
199
|
Get up and running in under a minute.
|
|
@@ -240,10 +258,10 @@ Recognized file formats are listed in the tables below. Support for reading to o
|
|
|
240
258
|
| Format | Read | Write | Description |
|
|
241
259
|
| --- | --- | --- | --- |
|
|
242
260
|
| `csv` | Y | Y | Comma-Separated Values |
|
|
243
|
-
| `dat` |
|
|
244
|
-
| `fwf` |
|
|
245
|
-
| `psv` |
|
|
246
|
-
| `tab` |
|
|
261
|
+
| `dat` | Y | Y | Generic data file, often delimited or fixed-width |
|
|
262
|
+
| `fwf` | Y | Y | Fixed-Width Fields |
|
|
263
|
+
| `psv` | Y | Y | Pipe-Separated Values |
|
|
264
|
+
| `tab` | Y | Y | Often synonymous with TSV |
|
|
247
265
|
| `tsv` | Y | Y | Tab-Separated Values |
|
|
248
266
|
| `txt` | Y | Y | Plain text, often delimited or fixed-width |
|
|
249
267
|
|
|
@@ -253,11 +271,11 @@ Recognized file formats are listed in the tables below. Support for reading to o
|
|
|
253
271
|
| --- | --- | --- | --- |
|
|
254
272
|
| `cfg` | N | N | Config-style key-value pairs |
|
|
255
273
|
| `conf` | N | N | Config-style key-value pairs |
|
|
256
|
-
| `ini` |
|
|
274
|
+
| `ini` | Y | Y | Config-style key-value pairs |
|
|
257
275
|
| `json` | Y | Y | JavaScript Object Notation |
|
|
258
276
|
| `ndjson` | Y | Y | Newline-Delimited JSON |
|
|
259
|
-
| `properties` |
|
|
260
|
-
| `toml` |
|
|
277
|
+
| `properties` | Y | Y | Java-style key-value pairs |
|
|
278
|
+
| `toml` | Y | Y | Tom's Obvious Minimal Language |
|
|
261
279
|
| `xml` | Y | Y | Extensible Markup Language |
|
|
262
280
|
| `yaml` | Y | Y | YAML Ain't Markup Language |
|
|
263
281
|
|
|
@@ -265,7 +283,7 @@ Recognized file formats are listed in the tables below. Support for reading to o
|
|
|
265
283
|
|
|
266
284
|
| Format | Read | Write | Description |
|
|
267
285
|
| --- | --- | --- | --- |
|
|
268
|
-
| `arrow` |
|
|
286
|
+
| `arrow` | Y | Y | Apache Arrow IPC |
|
|
269
287
|
| `feather` | Y | Y | Apache Arrow Feather |
|
|
270
288
|
| `orc` | Y | Y | Optimized Row Columnar; common in Hadoop |
|
|
271
289
|
| `parquet` | Y | Y | Apache Parquet; common in Big Data |
|
|
@@ -275,48 +293,48 @@ Recognized file formats are listed in the tables below. Support for reading to o
|
|
|
275
293
|
| Format | Read | Write | Description |
|
|
276
294
|
| --- | --- | --- | --- |
|
|
277
295
|
| `avro` | Y | Y | Apache Avro |
|
|
278
|
-
| `bson` |
|
|
279
|
-
| `cbor` |
|
|
296
|
+
| `bson` | Y | Y | Binary JSON; common with MongoDB exports/dumps |
|
|
297
|
+
| `cbor` | Y | Y | Concise Binary Object Representation |
|
|
280
298
|
| `ion` | N | N | Amazon Ion |
|
|
281
|
-
| `msgpack` |
|
|
282
|
-
| `pb` |
|
|
299
|
+
| `msgpack` | Y | Y | MessagePack |
|
|
300
|
+
| `pb` | Y | Y | Protocol Buffers (Google Protobuf) |
|
|
283
301
|
| `pbf` | N | N | Protocolbuffer Binary Format; often for GIS data |
|
|
284
|
-
| `proto` |
|
|
302
|
+
| `proto` | Y | Y | Protocol Buffers schema; often in .pb / .bin |
|
|
285
303
|
|
|
286
304
|
#### Databases and Embedded Storage
|
|
287
305
|
|
|
288
306
|
| Format | Read | Write | Description |
|
|
289
307
|
| --- | --- | --- | --- |
|
|
290
308
|
| `accdb` | N | N | Microsoft Access (newer format) |
|
|
291
|
-
| `duckdb` |
|
|
309
|
+
| `duckdb` | Y | Y | DuckDB |
|
|
292
310
|
| `mdb` | N | N | Microsoft Access (older format) |
|
|
293
|
-
| `sqlite` |
|
|
311
|
+
| `sqlite` | Y | Y | SQLite |
|
|
294
312
|
|
|
295
313
|
#### Spreadsheets
|
|
296
314
|
|
|
297
315
|
| Format | Read | Write | Description |
|
|
298
316
|
| --- | --- | --- | --- |
|
|
299
317
|
| `numbers` | N | N | Apple Numbers |
|
|
300
|
-
| `ods` |
|
|
318
|
+
| `ods` | Y | Y | OpenDocument |
|
|
301
319
|
| `wks` | N | N | Lotus 1-2-3 |
|
|
302
320
|
| `xls` | Y | N | Microsoft Excel (BIFF; read-only) |
|
|
303
|
-
| `xlsm` |
|
|
321
|
+
| `xlsm` | Y | Y | Microsoft Excel Macro-Enabled (Open XML) |
|
|
304
322
|
| `xlsx` | Y | Y | Microsoft Excel (Open XML) |
|
|
305
323
|
|
|
306
324
|
#### Statistical / Scientific / Numeric Computing
|
|
307
325
|
|
|
308
326
|
| Format | Read | Write | Description |
|
|
309
327
|
| --- | --- | --- | --- |
|
|
310
|
-
| `dta` |
|
|
311
|
-
| `hdf5` |
|
|
328
|
+
| `dta` | Y | Y | Stata |
|
|
329
|
+
| `hdf5` | Y | N | Hierarchical Data Format |
|
|
312
330
|
| `mat` | N | N | MATLAB |
|
|
313
|
-
| `nc` |
|
|
314
|
-
| `rda` |
|
|
315
|
-
| `rds` |
|
|
316
|
-
| `sas7bdat` |
|
|
317
|
-
| `sav` |
|
|
331
|
+
| `nc` | Y | Y | NetCDF |
|
|
332
|
+
| `rda` | Y | Y | RData workspace/object |
|
|
333
|
+
| `rds` | Y | Y | R data |
|
|
334
|
+
| `sas7bdat` | Y | N | SAS data |
|
|
335
|
+
| `sav` | Y | Y | SPSS data |
|
|
318
336
|
| `sylk` | N | N | Symbolic Link |
|
|
319
|
-
| `xpt` |
|
|
337
|
+
| `xpt` | Y | Y | SAS Transport |
|
|
320
338
|
| `zsav` | N | N | Compressed SPSS data |
|
|
321
339
|
|
|
322
340
|
#### Logs and Event Streams
|
|
@@ -126,6 +126,12 @@ For development:
|
|
|
126
126
|
pip install -e ".[dev]"
|
|
127
127
|
```
|
|
128
128
|
|
|
129
|
+
For full file-format support (optional extras):
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
pip install -e ".[file]"
|
|
133
|
+
```
|
|
134
|
+
|
|
129
135
|
## Quickstart
|
|
130
136
|
|
|
131
137
|
Get up and running in under a minute.
|
|
@@ -190,10 +196,10 @@ Recognized file formats are listed in the tables below. Support for reading to o
|
|
|
190
196
|
| Format | Read | Write | Description |
|
|
191
197
|
| --- | --- | --- | --- |
|
|
192
198
|
| `csv` | Y | Y | Comma-Separated Values |
|
|
193
|
-
| `dat` |
|
|
194
|
-
| `fwf` |
|
|
195
|
-
| `psv` |
|
|
196
|
-
| `tab` |
|
|
199
|
+
| `dat` | Y | Y | Generic data file, often delimited or fixed-width |
|
|
200
|
+
| `fwf` | Y | Y | Fixed-Width Fields |
|
|
201
|
+
| `psv` | Y | Y | Pipe-Separated Values |
|
|
202
|
+
| `tab` | Y | Y | Often synonymous with TSV |
|
|
197
203
|
| `tsv` | Y | Y | Tab-Separated Values |
|
|
198
204
|
| `txt` | Y | Y | Plain text, often delimited or fixed-width |
|
|
199
205
|
|
|
@@ -203,11 +209,11 @@ Recognized file formats are listed in the tables below. Support for reading to o
|
|
|
203
209
|
| --- | --- | --- | --- |
|
|
204
210
|
| `cfg` | N | N | Config-style key-value pairs |
|
|
205
211
|
| `conf` | N | N | Config-style key-value pairs |
|
|
206
|
-
| `ini` |
|
|
212
|
+
| `ini` | Y | Y | Config-style key-value pairs |
|
|
207
213
|
| `json` | Y | Y | JavaScript Object Notation |
|
|
208
214
|
| `ndjson` | Y | Y | Newline-Delimited JSON |
|
|
209
|
-
| `properties` |
|
|
210
|
-
| `toml` |
|
|
215
|
+
| `properties` | Y | Y | Java-style key-value pairs |
|
|
216
|
+
| `toml` | Y | Y | Tom's Obvious Minimal Language |
|
|
211
217
|
| `xml` | Y | Y | Extensible Markup Language |
|
|
212
218
|
| `yaml` | Y | Y | YAML Ain't Markup Language |
|
|
213
219
|
|
|
@@ -215,7 +221,7 @@ Recognized file formats are listed in the tables below. Support for reading to o
|
|
|
215
221
|
|
|
216
222
|
| Format | Read | Write | Description |
|
|
217
223
|
| --- | --- | --- | --- |
|
|
218
|
-
| `arrow` |
|
|
224
|
+
| `arrow` | Y | Y | Apache Arrow IPC |
|
|
219
225
|
| `feather` | Y | Y | Apache Arrow Feather |
|
|
220
226
|
| `orc` | Y | Y | Optimized Row Columnar; common in Hadoop |
|
|
221
227
|
| `parquet` | Y | Y | Apache Parquet; common in Big Data |
|
|
@@ -225,48 +231,48 @@ Recognized file formats are listed in the tables below. Support for reading to o
|
|
|
225
231
|
| Format | Read | Write | Description |
|
|
226
232
|
| --- | --- | --- | --- |
|
|
227
233
|
| `avro` | Y | Y | Apache Avro |
|
|
228
|
-
| `bson` |
|
|
229
|
-
| `cbor` |
|
|
234
|
+
| `bson` | Y | Y | Binary JSON; common with MongoDB exports/dumps |
|
|
235
|
+
| `cbor` | Y | Y | Concise Binary Object Representation |
|
|
230
236
|
| `ion` | N | N | Amazon Ion |
|
|
231
|
-
| `msgpack` |
|
|
232
|
-
| `pb` |
|
|
237
|
+
| `msgpack` | Y | Y | MessagePack |
|
|
238
|
+
| `pb` | Y | Y | Protocol Buffers (Google Protobuf) |
|
|
233
239
|
| `pbf` | N | N | Protocolbuffer Binary Format; often for GIS data |
|
|
234
|
-
| `proto` |
|
|
240
|
+
| `proto` | Y | Y | Protocol Buffers schema; often in .pb / .bin |
|
|
235
241
|
|
|
236
242
|
#### Databases and Embedded Storage
|
|
237
243
|
|
|
238
244
|
| Format | Read | Write | Description |
|
|
239
245
|
| --- | --- | --- | --- |
|
|
240
246
|
| `accdb` | N | N | Microsoft Access (newer format) |
|
|
241
|
-
| `duckdb` |
|
|
247
|
+
| `duckdb` | Y | Y | DuckDB |
|
|
242
248
|
| `mdb` | N | N | Microsoft Access (older format) |
|
|
243
|
-
| `sqlite` |
|
|
249
|
+
| `sqlite` | Y | Y | SQLite |
|
|
244
250
|
|
|
245
251
|
#### Spreadsheets
|
|
246
252
|
|
|
247
253
|
| Format | Read | Write | Description |
|
|
248
254
|
| --- | --- | --- | --- |
|
|
249
255
|
| `numbers` | N | N | Apple Numbers |
|
|
250
|
-
| `ods` |
|
|
256
|
+
| `ods` | Y | Y | OpenDocument |
|
|
251
257
|
| `wks` | N | N | Lotus 1-2-3 |
|
|
252
258
|
| `xls` | Y | N | Microsoft Excel (BIFF; read-only) |
|
|
253
|
-
| `xlsm` |
|
|
259
|
+
| `xlsm` | Y | Y | Microsoft Excel Macro-Enabled (Open XML) |
|
|
254
260
|
| `xlsx` | Y | Y | Microsoft Excel (Open XML) |
|
|
255
261
|
|
|
256
262
|
#### Statistical / Scientific / Numeric Computing
|
|
257
263
|
|
|
258
264
|
| Format | Read | Write | Description |
|
|
259
265
|
| --- | --- | --- | --- |
|
|
260
|
-
| `dta` |
|
|
261
|
-
| `hdf5` |
|
|
266
|
+
| `dta` | Y | Y | Stata |
|
|
267
|
+
| `hdf5` | Y | N | Hierarchical Data Format |
|
|
262
268
|
| `mat` | N | N | MATLAB |
|
|
263
|
-
| `nc` |
|
|
264
|
-
| `rda` |
|
|
265
|
-
| `rds` |
|
|
266
|
-
| `sas7bdat` |
|
|
267
|
-
| `sav` |
|
|
269
|
+
| `nc` | Y | Y | NetCDF |
|
|
270
|
+
| `rda` | Y | Y | RData workspace/object |
|
|
271
|
+
| `rds` | Y | Y | R data |
|
|
272
|
+
| `sas7bdat` | Y | N | SAS data |
|
|
273
|
+
| `sav` | Y | Y | SPSS data |
|
|
268
274
|
| `sylk` | N | N | Symbolic Link |
|
|
269
|
-
| `xpt` |
|
|
275
|
+
| `xpt` | Y | Y | SAS Transport |
|
|
270
276
|
| `zsav` | N | N | Compressed SPSS data |
|
|
271
277
|
|
|
272
278
|
#### Logs and Event Streams
|
|
@@ -9,6 +9,12 @@ and writing data files.
|
|
|
9
9
|
types
|
|
10
10
|
- Exposes a `File` class with instance methods for reading and writing data
|
|
11
11
|
|
|
12
|
+
Some formats require optional dependencies. Install with:
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
pip install -e ".[file]"
|
|
16
|
+
```
|
|
17
|
+
|
|
12
18
|
Back to project overview: see the top-level [README](../../README.md).
|
|
13
19
|
|
|
14
20
|
- [`etlplus.file` Subpackage](#etlplusfile-subpackage)
|
|
@@ -29,21 +35,48 @@ matrix across all `FileFormat` values, see the top-level [README](../../README.m
|
|
|
29
35
|
| Format | Description |
|
|
30
36
|
|-----------|---------------------------------------------|
|
|
31
37
|
| avro | Apache Avro binary serialization |
|
|
38
|
+
| arrow | Apache Arrow IPC |
|
|
39
|
+
| bson | Binary JSON (BSON) |
|
|
40
|
+
| cbor | Concise Binary Object Representation |
|
|
32
41
|
| csv | Comma-separated values text files |
|
|
42
|
+
| dat | Generic data files (delimited) |
|
|
43
|
+
| dta | Stata datasets |
|
|
44
|
+
| duckdb | DuckDB database file |
|
|
33
45
|
| feather | Apache Arrow Feather columnar format |
|
|
46
|
+
| fwf | Fixed-width formatted text files |
|
|
34
47
|
| gz | Gzip-compressed files (see Compression) |
|
|
48
|
+
| hdf5 | Hierarchical Data Format |
|
|
49
|
+
| ini | INI config files |
|
|
35
50
|
| json | Standard JSON files |
|
|
51
|
+
| msgpack | MessagePack binary serialization |
|
|
52
|
+
| nc | NetCDF datasets |
|
|
36
53
|
| ndjson | Newline-delimited JSON (JSON Lines) |
|
|
54
|
+
| ods | OpenDocument spreadsheets |
|
|
37
55
|
| orc | Apache ORC columnar format |
|
|
38
56
|
| parquet | Apache Parquet columnar format |
|
|
57
|
+
| pb | Protocol Buffers binary |
|
|
58
|
+
| properties | Java-style properties |
|
|
59
|
+
| proto | Protocol Buffers schema |
|
|
60
|
+
| psv | Pipe-separated values text files |
|
|
61
|
+
| rda | RData workspace bundles |
|
|
62
|
+
| rds | RDS datasets |
|
|
63
|
+
| sas7bdat | SAS datasets |
|
|
64
|
+
| sav | SPSS datasets |
|
|
65
|
+
| sqlite | SQLite database file |
|
|
66
|
+
| tab | Tab-delimited text files |
|
|
67
|
+
| toml | TOML config files |
|
|
39
68
|
| tsv | Tab-separated values text files |
|
|
40
69
|
| txt | Plain text files |
|
|
41
70
|
| xls | Microsoft Excel (legacy .xls; read-only) |
|
|
71
|
+
| xlsm | Microsoft Excel Macro-Enabled (XLSM) |
|
|
42
72
|
| xlsx | Microsoft Excel (modern .xlsx) |
|
|
73
|
+
| xpt | SAS transport files |
|
|
43
74
|
| zip | ZIP-compressed files (see Compression) |
|
|
44
75
|
| xml | XML files |
|
|
45
76
|
| yaml | YAML files |
|
|
46
77
|
|
|
78
|
+
Note: HDF5 support is read-only; writing is currently disabled.
|
|
79
|
+
|
|
47
80
|
Compression formats (gz, zip) are also supported as wrappers for other formats. Formats not listed
|
|
48
81
|
here are currently stubbed and will raise `NotImplementedError` on read/write.
|
|
49
82
|
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.arrow` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
yet).
|
|
4
|
+
Helpers for reading/writing Apache Arrow (ARROW) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -20,10 +19,13 @@ Notes
|
|
|
20
19
|
from __future__ import annotations
|
|
21
20
|
|
|
22
21
|
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
|
+
from typing import cast
|
|
23
24
|
|
|
24
25
|
from ..types import JSONData
|
|
25
26
|
from ..types import JSONList
|
|
26
|
-
from . import
|
|
27
|
+
from ._imports import get_optional_module
|
|
28
|
+
from ._io import normalize_records
|
|
27
29
|
|
|
28
30
|
# SECTION: EXPORTS ========================================================== #
|
|
29
31
|
|
|
@@ -35,6 +37,20 @@ __all__ = [
|
|
|
35
37
|
]
|
|
36
38
|
|
|
37
39
|
|
|
40
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _get_pyarrow() -> Any:
|
|
44
|
+
"""Return the pyarrow module, importing it on first use."""
|
|
45
|
+
return get_optional_module(
|
|
46
|
+
'pyarrow',
|
|
47
|
+
error_message=(
|
|
48
|
+
'ARROW support requires optional dependency "pyarrow".\n'
|
|
49
|
+
'Install with: pip install pyarrow'
|
|
50
|
+
),
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
38
54
|
# SECTION: FUNCTIONS ======================================================== #
|
|
39
55
|
|
|
40
56
|
|
|
@@ -54,7 +70,11 @@ def read(
|
|
|
54
70
|
JSONList
|
|
55
71
|
The list of dictionaries read from the Apache Arrow file.
|
|
56
72
|
"""
|
|
57
|
-
|
|
73
|
+
pyarrow = _get_pyarrow()
|
|
74
|
+
with pyarrow.memory_map(str(path), 'r') as source:
|
|
75
|
+
reader = pyarrow.ipc.open_file(source)
|
|
76
|
+
table = reader.read_all()
|
|
77
|
+
return cast(JSONList, table.to_pylist())
|
|
58
78
|
|
|
59
79
|
|
|
60
80
|
def write(
|
|
@@ -77,4 +97,14 @@ def write(
|
|
|
77
97
|
int
|
|
78
98
|
The number of rows written to the ARROW file.
|
|
79
99
|
"""
|
|
80
|
-
|
|
100
|
+
records = normalize_records(data, 'ARROW')
|
|
101
|
+
if not records:
|
|
102
|
+
return 0
|
|
103
|
+
|
|
104
|
+
pyarrow = _get_pyarrow()
|
|
105
|
+
table = pyarrow.Table.from_pylist(records)
|
|
106
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
107
|
+
with pyarrow.OSFile(str(path), 'wb') as sink:
|
|
108
|
+
with pyarrow.ipc.new_file(sink, table.schema) as writer:
|
|
109
|
+
writer.write_table(table)
|
|
110
|
+
return len(records)
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.bson` module.
|
|
3
|
+
|
|
4
|
+
Helpers for reading/writing Binary JSON (BSON) files.
|
|
5
|
+
|
|
6
|
+
Notes
|
|
7
|
+
-----
|
|
8
|
+
- A BSON file is a binary-encoded serialization of JSON-like documents.
|
|
9
|
+
- Common cases:
|
|
10
|
+
- Data storage in MongoDB.
|
|
11
|
+
- Efficient data interchange between systems.
|
|
12
|
+
- Handling of complex data types not supported in standard JSON.
|
|
13
|
+
- Rule of thumb:
|
|
14
|
+
- If the file follows the BSON specification, use this module for reading
|
|
15
|
+
and writing.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Any
|
|
22
|
+
from typing import cast
|
|
23
|
+
|
|
24
|
+
from ..types import JSONData
|
|
25
|
+
from ..types import JSONList
|
|
26
|
+
from ._imports import get_optional_module
|
|
27
|
+
from ._io import normalize_records
|
|
28
|
+
|
|
29
|
+
# SECTION: EXPORTS ========================================================== #
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
# Functions
|
|
34
|
+
'read',
|
|
35
|
+
'write',
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _decode_all(bson_module: Any, payload: bytes) -> list[dict[str, Any]]:
|
|
43
|
+
if hasattr(bson_module, 'decode_all'):
|
|
44
|
+
return bson_module.decode_all(payload)
|
|
45
|
+
if hasattr(bson_module, 'BSON'):
|
|
46
|
+
return bson_module.BSON.decode_all(payload)
|
|
47
|
+
raise AttributeError('bson module lacks decode_all()')
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _encode_doc(bson_module: Any, doc: dict[str, Any]) -> bytes:
|
|
51
|
+
if hasattr(bson_module, 'encode'):
|
|
52
|
+
return bson_module.encode(doc)
|
|
53
|
+
if hasattr(bson_module, 'BSON'):
|
|
54
|
+
return bson_module.BSON.encode(doc)
|
|
55
|
+
raise AttributeError('bson module lacks encode()')
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _get_bson() -> Any:
|
|
59
|
+
"""Return the bson module, importing it on first use."""
|
|
60
|
+
return get_optional_module(
|
|
61
|
+
'bson',
|
|
62
|
+
error_message=(
|
|
63
|
+
'BSON support requires optional dependency "pymongo".\n'
|
|
64
|
+
'Install with: pip install pymongo'
|
|
65
|
+
),
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def read(
|
|
73
|
+
path: Path,
|
|
74
|
+
) -> JSONList:
|
|
75
|
+
"""
|
|
76
|
+
Read BSON content from *path*.
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
path : Path
|
|
81
|
+
Path to the BSON file on disk.
|
|
82
|
+
|
|
83
|
+
Returns
|
|
84
|
+
-------
|
|
85
|
+
JSONList
|
|
86
|
+
The list of dictionaries read from the BSON file.
|
|
87
|
+
"""
|
|
88
|
+
bson = _get_bson()
|
|
89
|
+
with path.open('rb') as handle:
|
|
90
|
+
payload = handle.read()
|
|
91
|
+
docs = _decode_all(bson, payload)
|
|
92
|
+
return cast(JSONList, docs)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def write(
|
|
96
|
+
path: Path,
|
|
97
|
+
data: JSONData,
|
|
98
|
+
) -> int:
|
|
99
|
+
"""
|
|
100
|
+
Write *data* to BSON at *path* and return record count.
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
path : Path
|
|
105
|
+
Path to the BSON file on disk.
|
|
106
|
+
data : JSONData
|
|
107
|
+
Data to write as BSON. Should be a list of dictionaries or a
|
|
108
|
+
single dictionary.
|
|
109
|
+
|
|
110
|
+
Returns
|
|
111
|
+
-------
|
|
112
|
+
int
|
|
113
|
+
The number of rows written to the BSON file.
|
|
114
|
+
"""
|
|
115
|
+
bson = _get_bson()
|
|
116
|
+
records = normalize_records(data, 'BSON')
|
|
117
|
+
if not records:
|
|
118
|
+
return 0
|
|
119
|
+
|
|
120
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
121
|
+
with path.open('wb') as handle:
|
|
122
|
+
for record in records:
|
|
123
|
+
handle.write(_encode_doc(bson, record))
|
|
124
|
+
return len(records)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.cbor` module.
|
|
3
|
+
|
|
4
|
+
Helpers for reading/writing Concise Binary Object Representation (CBOR) files.
|
|
5
|
+
|
|
6
|
+
Notes
|
|
7
|
+
-----
|
|
8
|
+
- A CBOR file is a binary data format designed for small code size and message
|
|
9
|
+
size, suitable for constrained environments.
|
|
10
|
+
- Common cases:
|
|
11
|
+
- IoT data interchange.
|
|
12
|
+
- Efficient data serialization.
|
|
13
|
+
- Storage of structured data in a compact binary form.
|
|
14
|
+
- Rule of thumb:
|
|
15
|
+
- If the file follows the CBOR specification, use this module for reading
|
|
16
|
+
and writing.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
from ..types import JSONData
|
|
25
|
+
from ._imports import get_optional_module
|
|
26
|
+
from ._io import coerce_record_payload
|
|
27
|
+
from ._io import normalize_records
|
|
28
|
+
|
|
29
|
+
# SECTION: EXPORTS ========================================================== #
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
# Functions
|
|
34
|
+
'read',
|
|
35
|
+
'write',
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _get_cbor() -> Any:
|
|
43
|
+
"""Return the cbor2 module, importing it on first use."""
|
|
44
|
+
return get_optional_module(
|
|
45
|
+
'cbor2',
|
|
46
|
+
error_message=(
|
|
47
|
+
'CBOR support requires optional dependency "cbor2".\n'
|
|
48
|
+
'Install with: pip install cbor2'
|
|
49
|
+
),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def read(
|
|
57
|
+
path: Path,
|
|
58
|
+
) -> JSONData:
|
|
59
|
+
"""
|
|
60
|
+
Read CBOR content from *path*.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
path : Path
|
|
65
|
+
Path to the CBOR file on disk.
|
|
66
|
+
|
|
67
|
+
Returns
|
|
68
|
+
-------
|
|
69
|
+
JSONData
|
|
70
|
+
The structured data read from the CBOR file.
|
|
71
|
+
"""
|
|
72
|
+
cbor2 = _get_cbor()
|
|
73
|
+
with path.open('rb') as handle:
|
|
74
|
+
payload = cbor2.loads(handle.read())
|
|
75
|
+
return coerce_record_payload(payload, format_name='CBOR')
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def write(
|
|
79
|
+
path: Path,
|
|
80
|
+
data: JSONData,
|
|
81
|
+
) -> int:
|
|
82
|
+
"""
|
|
83
|
+
Write *data* to CBOR file at *path* and return record count.
|
|
84
|
+
|
|
85
|
+
Parameters
|
|
86
|
+
----------
|
|
87
|
+
path : Path
|
|
88
|
+
Path to the CBOR file on disk.
|
|
89
|
+
data : JSONData
|
|
90
|
+
Data to write as CBOR file. Should be a list of dictionaries or a
|
|
91
|
+
single dictionary.
|
|
92
|
+
|
|
93
|
+
Returns
|
|
94
|
+
-------
|
|
95
|
+
int
|
|
96
|
+
The number of rows written to the CBOR file.
|
|
97
|
+
"""
|
|
98
|
+
cbor2 = _get_cbor()
|
|
99
|
+
records = normalize_records(data, 'CBOR')
|
|
100
|
+
payload: JSONData = records if isinstance(data, list) else records[0]
|
|
101
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
102
|
+
with path.open('wb') as handle:
|
|
103
|
+
handle.write(cbor2.dumps(payload))
|
|
104
|
+
return len(records)
|