etlplus 0.11.2__tar.gz → 0.11.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {etlplus-0.11.2/etlplus.egg-info → etlplus-0.11.10}/PKG-INFO +1 -1
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/cli/handlers.py +1 -1
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/database/ddl.py +1 -1
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/database/engine.py +1 -1
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/database/schema.py +1 -1
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/file/__init__.py +0 -2
- etlplus-0.11.10/etlplus/file/avro.py +59 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/file/core.py +105 -105
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/file/enums.py +0 -28
- etlplus-0.11.10/etlplus/file/feather.py +59 -0
- etlplus-0.11.10/etlplus/file/gz.py +59 -0
- etlplus-0.11.10/etlplus/file/ndjson.py +59 -0
- etlplus-0.11.10/etlplus/file/orc.py +59 -0
- etlplus-0.11.10/etlplus/file/parquet.py +59 -0
- etlplus-0.11.10/etlplus/file/tsv.py +59 -0
- etlplus-0.11.10/etlplus/file/txt.py +59 -0
- etlplus-0.11.10/etlplus/file/xls.py +59 -0
- etlplus-0.11.10/etlplus/file/xlsx.py +59 -0
- etlplus-0.11.10/etlplus/file/zip.py +49 -0
- {etlplus-0.11.2 → etlplus-0.11.10/etlplus.egg-info}/PKG-INFO +1 -1
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus.egg-info/SOURCES.txt +14 -1
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/integration/test_i_examples_data_parity.py +2 -2
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/cli/test_u_cli_handlers.py +4 -9
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/database/test_u_database_engine.py +5 -4
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/database/test_u_database_schema.py +12 -14
- etlplus-0.11.2/tests/unit/test_u_file.py → etlplus-0.11.10/tests/unit/file/test_u_file_core.py +6 -154
- etlplus-0.11.10/tests/unit/file/test_u_file_enums.py +90 -0
- etlplus-0.11.10/tests/unit/file/test_u_file_yaml.py +110 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/.coveragerc +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/.editorconfig +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/.gitattributes +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/.github/actions/python-bootstrap/action.yml +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/.github/workflows/ci.yml +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/.gitignore +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/.pre-commit-config.yaml +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/.ruff.toml +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/CODE_OF_CONDUCT.md +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/CONTRIBUTING.md +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/DEMO.md +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/LICENSE +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/MANIFEST.in +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/Makefile +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/README.md +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/REFERENCES.md +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/docs/README.md +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/docs/pipeline-guide.md +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/docs/snippets/installation_version.md +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/__init__.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/__main__.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/__version__.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/api/README.md +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/api/__init__.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/api/auth.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/api/config.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/api/endpoint_client.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/api/errors.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/api/pagination/__init__.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/api/pagination/client.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/api/pagination/config.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/api/pagination/paginator.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/api/rate_limiting/__init__.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/api/rate_limiting/config.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/api/rate_limiting/rate_limiter.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/api/request_manager.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/api/retry_manager.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/api/transport.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/api/types.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/cli/__init__.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/cli/commands.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/cli/constants.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/cli/io.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/cli/main.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/cli/options.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/cli/state.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/cli/types.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/config/__init__.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/config/connector.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/config/jobs.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/config/pipeline.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/config/profile.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/config/types.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/config/utils.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/database/__init__.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/database/orm.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/database/types.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/enums.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/extract.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/file/csv.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/file/json.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/file/xml.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/file/yaml.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/load.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/mixins.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/py.typed +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/run.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/run_helpers.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/templates/__init__.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/templates/ddl.sql.j2 +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/templates/view.sql.j2 +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/transform.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/types.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/utils.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/validate.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/validation/__init__.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus/validation/utils.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus.egg-info/dependency_links.txt +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus.egg-info/entry_points.txt +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus.egg-info/requires.txt +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/etlplus.egg-info/top_level.txt +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/examples/README.md +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/examples/configs/ddl_spec.yml +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/examples/configs/pipeline.yml +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/examples/data/sample.csv +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/examples/data/sample.json +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/examples/data/sample.xml +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/examples/data/sample.xsd +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/examples/data/sample.yaml +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/examples/quickstart_python.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/pyproject.toml +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/pytest.ini +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/setup.cfg +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/setup.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/__init__.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/conftest.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/integration/conftest.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/integration/test_i_cli.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/integration/test_i_pagination_strategy.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/integration/test_i_pipeline_smoke.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/integration/test_i_pipeline_yaml_load.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/integration/test_i_run.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/integration/test_i_run_profile_pagination_defaults.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/integration/test_i_run_profile_rate_limit_defaults.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/api/conftest.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/api/test_u_auth.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/api/test_u_config.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/api/test_u_endpoint_client.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/api/test_u_mocks.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/api/test_u_pagination_client.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/api/test_u_pagination_config.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/api/test_u_paginator.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/api/test_u_rate_limit_config.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/api/test_u_rate_limiter.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/api/test_u_request_manager.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/api/test_u_retry_manager.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/api/test_u_transport.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/api/test_u_types.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/cli/conftest.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/cli/test_u_cli_io.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/cli/test_u_cli_main.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/cli/test_u_cli_state.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/config/test_u_config_utils.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/config/test_u_connector.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/config/test_u_jobs.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/config/test_u_pipeline.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/conftest.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/database/test_u_database_ddl.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/database/test_u_database_orm.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/test_u_enums.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/test_u_extract.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/test_u_load.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/test_u_main.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/test_u_mixins.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/test_u_run.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/test_u_run_helpers.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/test_u_transform.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/test_u_utils.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/test_u_validate.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/test_u_version.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tests/unit/validation/test_u_validation_utils.py +0 -0
- {etlplus-0.11.2 → etlplus-0.11.10}/tools/update_demo_snippets.py +0 -0
|
@@ -570,7 +570,7 @@ def transform_handler(
|
|
|
570
570
|
data = transform(payload, cast(TransformOperations, operations_payload))
|
|
571
571
|
|
|
572
572
|
if target and target != '-':
|
|
573
|
-
File
|
|
573
|
+
File(target, file_format=target_format).write(data)
|
|
574
574
|
print(f'Data transformed and saved to {target}')
|
|
575
575
|
return 0
|
|
576
576
|
|
|
@@ -203,7 +203,7 @@ def load_table_spec(
|
|
|
203
203
|
raise ValueError('Spec must be .json, .yml, or .yaml')
|
|
204
204
|
|
|
205
205
|
try:
|
|
206
|
-
spec = File
|
|
206
|
+
spec = File(spec_path).read()
|
|
207
207
|
except ImportError as e:
|
|
208
208
|
if suffix in {'.yml', '.yaml'}:
|
|
209
209
|
raise RuntimeError(
|
|
@@ -113,7 +113,7 @@ def load_database_url_from_config(
|
|
|
113
113
|
ValueError
|
|
114
114
|
If no connection string/URL/DSN is found for the specified entry.
|
|
115
115
|
"""
|
|
116
|
-
cfg = File
|
|
116
|
+
cfg = File(Path(path)).read()
|
|
117
117
|
if not isinstance(cfg, Mapping):
|
|
118
118
|
raise TypeError('Database config must be a mapping')
|
|
119
119
|
|
|
@@ -9,7 +9,6 @@ from __future__ import annotations
|
|
|
9
9
|
from .core import File
|
|
10
10
|
from .enums import CompressionFormat
|
|
11
11
|
from .enums import FileFormat
|
|
12
|
-
from .enums import coerce_file_format
|
|
13
12
|
from .enums import infer_file_format_and_compression
|
|
14
13
|
|
|
15
14
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -22,6 +21,5 @@ __all__ = [
|
|
|
22
21
|
'CompressionFormat',
|
|
23
22
|
'FileFormat',
|
|
24
23
|
# Functions
|
|
25
|
-
'coerce_file_format',
|
|
26
24
|
'infer_file_format_and_compression',
|
|
27
25
|
]
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.avro` module.
|
|
3
|
+
|
|
4
|
+
Stub helpers for AVRO read/write.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from ..types import JSONData
|
|
12
|
+
|
|
13
|
+
# SECTION: EXPORTS ========================================================== #
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def read(path: Path) -> JSONData:
|
|
17
|
+
"""
|
|
18
|
+
Read AVRO content from ``path``.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
path : Path
|
|
23
|
+
Path to the AVRO file on disk.
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
JSONData
|
|
28
|
+
Parsed payload.
|
|
29
|
+
|
|
30
|
+
Raises
|
|
31
|
+
------
|
|
32
|
+
NotImplementedError
|
|
33
|
+
AVRO :func:`read` is not implemented yet.
|
|
34
|
+
"""
|
|
35
|
+
raise NotImplementedError('AVRO read is not implemented yet')
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def write(path: Path, data: JSONData) -> int:
|
|
39
|
+
"""
|
|
40
|
+
Write ``data`` to AVRO at ``path``.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
path : Path
|
|
45
|
+
Path to the AVRO file on disk.
|
|
46
|
+
data : JSONData
|
|
47
|
+
Data to write.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
int
|
|
52
|
+
Number of records written.
|
|
53
|
+
|
|
54
|
+
Raises
|
|
55
|
+
------
|
|
56
|
+
NotImplementedError
|
|
57
|
+
AVRO :func:`write` is not implemented yet.
|
|
58
|
+
"""
|
|
59
|
+
raise NotImplementedError('AVRO write is not implemented yet')
|
|
@@ -11,11 +11,21 @@ from dataclasses import dataclass
|
|
|
11
11
|
from pathlib import Path
|
|
12
12
|
|
|
13
13
|
from ..types import JSONData
|
|
14
|
-
from
|
|
14
|
+
from . import avro
|
|
15
15
|
from . import csv
|
|
16
|
+
from . import feather
|
|
17
|
+
from . import gz
|
|
16
18
|
from . import json
|
|
19
|
+
from . import ndjson
|
|
20
|
+
from . import orc
|
|
21
|
+
from . import parquet
|
|
22
|
+
from . import tsv
|
|
23
|
+
from . import txt
|
|
24
|
+
from . import xls
|
|
25
|
+
from . import xlsx
|
|
17
26
|
from . import xml
|
|
18
27
|
from . import yaml
|
|
28
|
+
from . import zip
|
|
19
29
|
from .enums import FileFormat
|
|
20
30
|
from .enums import infer_file_format_and_compression
|
|
21
31
|
|
|
@@ -43,7 +53,15 @@ class File:
|
|
|
43
53
|
Path to the file on disk.
|
|
44
54
|
file_format : FileFormat | None, optional
|
|
45
55
|
Explicit format. If omitted, the format is inferred from the file
|
|
46
|
-
extension (``.csv``, ``.json``,
|
|
56
|
+
extension (``.csv``, ``.json``, etc.).
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
----------
|
|
60
|
+
path : StrPath
|
|
61
|
+
Path to the file on disk.
|
|
62
|
+
file_format : FileFormat | str | None, optional
|
|
63
|
+
Explicit format. If omitted, the format is inferred from the file
|
|
64
|
+
extension (``.csv``, ``.json``, etc.).
|
|
47
65
|
"""
|
|
48
66
|
|
|
49
67
|
# -- Attributes -- #
|
|
@@ -62,16 +80,10 @@ class File:
|
|
|
62
80
|
extension is unknown, the attribute is left as ``None`` and will be
|
|
63
81
|
validated later by :meth:`_ensure_format`.
|
|
64
82
|
"""
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
self.path = Path(self.path)
|
|
68
|
-
|
|
83
|
+
self.path = Path(self.path)
|
|
84
|
+
self.file_format = self._coerce_format(self.file_format)
|
|
69
85
|
if self.file_format is None:
|
|
70
|
-
|
|
71
|
-
self.file_format = self._guess_format()
|
|
72
|
-
except ValueError:
|
|
73
|
-
# Leave as None; _ensure_format() will raise on use if needed.
|
|
74
|
-
pass
|
|
86
|
+
self.file_format = self._maybe_guess_format()
|
|
75
87
|
|
|
76
88
|
# -- Internal Instance Methods -- #
|
|
77
89
|
|
|
@@ -84,6 +96,28 @@ class File:
|
|
|
84
96
|
if not self.path.exists():
|
|
85
97
|
raise FileNotFoundError(f'File not found: {self.path}')
|
|
86
98
|
|
|
99
|
+
def _coerce_format(
|
|
100
|
+
self,
|
|
101
|
+
file_format: FileFormat | str | None,
|
|
102
|
+
) -> FileFormat | None:
|
|
103
|
+
"""
|
|
104
|
+
Normalize the file format input.
|
|
105
|
+
|
|
106
|
+
Parameters
|
|
107
|
+
----------
|
|
108
|
+
file_format : FileFormat | str | None
|
|
109
|
+
File format specifier. Strings are coerced into
|
|
110
|
+
:class:`FileFormat`.
|
|
111
|
+
|
|
112
|
+
Returns
|
|
113
|
+
-------
|
|
114
|
+
FileFormat | None
|
|
115
|
+
A normalized file format, or ``None`` when unspecified.
|
|
116
|
+
"""
|
|
117
|
+
if file_format is None or isinstance(file_format, FileFormat):
|
|
118
|
+
return file_format
|
|
119
|
+
return FileFormat.coerce(file_format)
|
|
120
|
+
|
|
87
121
|
def _ensure_format(self) -> FileFormat:
|
|
88
122
|
"""
|
|
89
123
|
Resolve the active format, guessing from extension if needed.
|
|
@@ -125,7 +159,22 @@ class File:
|
|
|
125
159
|
f'Cannot infer file format from extension {self.path.suffix!r}',
|
|
126
160
|
)
|
|
127
161
|
|
|
128
|
-
|
|
162
|
+
def _maybe_guess_format(self) -> FileFormat | None:
|
|
163
|
+
"""
|
|
164
|
+
Try to infer the format, returning ``None`` if it cannot be inferred.
|
|
165
|
+
|
|
166
|
+
Returns
|
|
167
|
+
-------
|
|
168
|
+
FileFormat | None
|
|
169
|
+
The inferred format, or ``None`` if inference fails.
|
|
170
|
+
"""
|
|
171
|
+
try:
|
|
172
|
+
return self._guess_format()
|
|
173
|
+
except ValueError:
|
|
174
|
+
# Leave as None; _ensure_format() will raise on use if needed.
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
# -- Instance Methods -- #
|
|
129
178
|
|
|
130
179
|
def read(self) -> JSONData:
|
|
131
180
|
"""
|
|
@@ -144,14 +193,36 @@ class File:
|
|
|
144
193
|
self._assert_exists()
|
|
145
194
|
fmt = self._ensure_format()
|
|
146
195
|
match fmt:
|
|
196
|
+
case FileFormat.AVRO:
|
|
197
|
+
return avro.read(self.path)
|
|
147
198
|
case FileFormat.CSV:
|
|
148
199
|
return csv.read(self.path)
|
|
200
|
+
case FileFormat.FEATHER:
|
|
201
|
+
return feather.read(self.path)
|
|
202
|
+
case FileFormat.GZ:
|
|
203
|
+
return gz.read(self.path)
|
|
149
204
|
case FileFormat.JSON:
|
|
150
205
|
return json.read(self.path)
|
|
206
|
+
case FileFormat.NDJSON:
|
|
207
|
+
return ndjson.read(self.path)
|
|
208
|
+
case FileFormat.ORC:
|
|
209
|
+
return orc.read(self.path)
|
|
210
|
+
case FileFormat.PARQUET:
|
|
211
|
+
return parquet.read(self.path)
|
|
212
|
+
case FileFormat.TSV:
|
|
213
|
+
return tsv.read(self.path)
|
|
214
|
+
case FileFormat.TXT:
|
|
215
|
+
return txt.read(self.path)
|
|
216
|
+
case FileFormat.XLS:
|
|
217
|
+
return xls.read(self.path)
|
|
218
|
+
case FileFormat.XLSX:
|
|
219
|
+
return xlsx.read(self.path)
|
|
151
220
|
case FileFormat.XML:
|
|
152
221
|
return xml.read(self.path)
|
|
153
222
|
case FileFormat.YAML:
|
|
154
223
|
return yaml.read(self.path)
|
|
224
|
+
case FileFormat.ZIP:
|
|
225
|
+
return zip.read(self.path)
|
|
155
226
|
raise ValueError(f'Unsupported format: {fmt}')
|
|
156
227
|
|
|
157
228
|
def write(
|
|
@@ -183,105 +254,34 @@ class File:
|
|
|
183
254
|
"""
|
|
184
255
|
fmt = self._ensure_format()
|
|
185
256
|
match fmt:
|
|
257
|
+
case FileFormat.AVRO:
|
|
258
|
+
return avro.write(self.path, data)
|
|
186
259
|
case FileFormat.CSV:
|
|
187
260
|
return csv.write(self.path, data)
|
|
261
|
+
case FileFormat.FEATHER:
|
|
262
|
+
return feather.write(self.path, data)
|
|
263
|
+
case FileFormat.GZ:
|
|
264
|
+
return gz.write(self.path, data)
|
|
188
265
|
case FileFormat.JSON:
|
|
189
266
|
return json.write(self.path, data)
|
|
267
|
+
case FileFormat.NDJSON:
|
|
268
|
+
return ndjson.write(self.path, data)
|
|
269
|
+
case FileFormat.ORC:
|
|
270
|
+
return orc.write(self.path, data)
|
|
271
|
+
case FileFormat.PARQUET:
|
|
272
|
+
return parquet.write(self.path, data)
|
|
273
|
+
case FileFormat.TSV:
|
|
274
|
+
return tsv.write(self.path, data)
|
|
275
|
+
case FileFormat.TXT:
|
|
276
|
+
return txt.write(self.path, data)
|
|
277
|
+
case FileFormat.XLS:
|
|
278
|
+
return xls.write(self.path, data)
|
|
279
|
+
case FileFormat.XLSX:
|
|
280
|
+
return xlsx.write(self.path, data)
|
|
190
281
|
case FileFormat.XML:
|
|
191
282
|
return xml.write(self.path, data, root_tag=root_tag)
|
|
192
283
|
case FileFormat.YAML:
|
|
193
284
|
return yaml.write(self.path, data)
|
|
285
|
+
case FileFormat.ZIP:
|
|
286
|
+
return zip.write(self.path, data)
|
|
194
287
|
raise ValueError(f'Unsupported format: {fmt}')
|
|
195
|
-
|
|
196
|
-
# -- Class Methods -- #
|
|
197
|
-
|
|
198
|
-
@classmethod
|
|
199
|
-
def from_path(
|
|
200
|
-
cls,
|
|
201
|
-
path: StrPath,
|
|
202
|
-
*,
|
|
203
|
-
file_format: FileFormat | str | None = None,
|
|
204
|
-
) -> File:
|
|
205
|
-
"""
|
|
206
|
-
Create a :class:`File` from any path-like and optional format.
|
|
207
|
-
|
|
208
|
-
Parameters
|
|
209
|
-
----------
|
|
210
|
-
path : StrPath
|
|
211
|
-
Path to the file on disk.
|
|
212
|
-
file_format : FileFormat | str | None, optional
|
|
213
|
-
Explicit format. If omitted, the format is inferred from the file
|
|
214
|
-
extension (``.csv``, ``.json``, or ``.xml``).
|
|
215
|
-
|
|
216
|
-
Returns
|
|
217
|
-
-------
|
|
218
|
-
File
|
|
219
|
-
The constructed :class:`File` instance.
|
|
220
|
-
"""
|
|
221
|
-
resolved = Path(path)
|
|
222
|
-
ff: FileFormat | None
|
|
223
|
-
if isinstance(file_format, str):
|
|
224
|
-
ff = FileFormat.coerce(file_format)
|
|
225
|
-
else:
|
|
226
|
-
ff = file_format
|
|
227
|
-
|
|
228
|
-
return cls(resolved, ff)
|
|
229
|
-
|
|
230
|
-
@classmethod
|
|
231
|
-
def read_file(
|
|
232
|
-
cls,
|
|
233
|
-
path: StrPath,
|
|
234
|
-
file_format: FileFormat | str | None = None,
|
|
235
|
-
) -> JSONData:
|
|
236
|
-
"""
|
|
237
|
-
Read structured data.
|
|
238
|
-
|
|
239
|
-
Parameters
|
|
240
|
-
----------
|
|
241
|
-
path : StrPath
|
|
242
|
-
Path to the file on disk.
|
|
243
|
-
file_format : FileFormat | str | None, optional
|
|
244
|
-
Explicit format. If omitted, the format is inferred from the file
|
|
245
|
-
extension (``.csv``, ``.json``, or ``.xml``).
|
|
246
|
-
|
|
247
|
-
Returns
|
|
248
|
-
-------
|
|
249
|
-
JSONData
|
|
250
|
-
The structured data read from the file.
|
|
251
|
-
"""
|
|
252
|
-
return cls.from_path(path, file_format=file_format).read()
|
|
253
|
-
|
|
254
|
-
@classmethod
|
|
255
|
-
def write_file(
|
|
256
|
-
cls,
|
|
257
|
-
path: StrPath,
|
|
258
|
-
data: JSONData,
|
|
259
|
-
file_format: FileFormat | str | None = None,
|
|
260
|
-
*,
|
|
261
|
-
root_tag: str = xml.DEFAULT_XML_ROOT,
|
|
262
|
-
) -> int:
|
|
263
|
-
"""
|
|
264
|
-
Write structured data and count written records.
|
|
265
|
-
|
|
266
|
-
Parameters
|
|
267
|
-
----------
|
|
268
|
-
path : StrPath
|
|
269
|
-
Path to the file on disk.
|
|
270
|
-
data : JSONData
|
|
271
|
-
Data to write to the file.
|
|
272
|
-
file_format : FileFormat | str | None, optional
|
|
273
|
-
Explicit format. If omitted, the format is inferred from the file
|
|
274
|
-
extension (``.csv``, ``.json``, or ``.xml``).
|
|
275
|
-
root_tag : str, optional
|
|
276
|
-
Root tag name to use when writing XML files. Defaults to
|
|
277
|
-
``'root'``.
|
|
278
|
-
|
|
279
|
-
Returns
|
|
280
|
-
-------
|
|
281
|
-
int
|
|
282
|
-
The number of records written to the file.
|
|
283
|
-
"""
|
|
284
|
-
return cls.from_path(path, file_format=file_format).write(
|
|
285
|
-
data,
|
|
286
|
-
root_tag=root_tag,
|
|
287
|
-
)
|
|
@@ -16,8 +16,6 @@ from ..types import StrStrMap
|
|
|
16
16
|
__all__ = [
|
|
17
17
|
'CompressionFormat',
|
|
18
18
|
'FileFormat',
|
|
19
|
-
'coerce_compression_format',
|
|
20
|
-
'coerce_file_format',
|
|
21
19
|
'infer_file_format_and_compression',
|
|
22
20
|
]
|
|
23
21
|
|
|
@@ -164,32 +162,6 @@ _COMPRESSION_FILE_FORMATS: set[FileFormat] = {
|
|
|
164
162
|
# SECTION: FUNCTIONS ======================================================== #
|
|
165
163
|
|
|
166
164
|
|
|
167
|
-
# TODO: Deprecate in favor of using the enum methods directly.
|
|
168
|
-
def coerce_compression_format(
|
|
169
|
-
compression_format: CompressionFormat | str,
|
|
170
|
-
) -> CompressionFormat:
|
|
171
|
-
"""
|
|
172
|
-
Normalize textual compression format values to :class:`CompressionFormat`.
|
|
173
|
-
|
|
174
|
-
This thin wrapper is kept for backward compatibility; prefer
|
|
175
|
-
:meth:`CompressionFormat.coerce` going forward.
|
|
176
|
-
"""
|
|
177
|
-
return CompressionFormat.coerce(compression_format)
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
# TODO: Deprecate in favor of using the enum methods directly.
|
|
181
|
-
def coerce_file_format(
|
|
182
|
-
file_format: FileFormat | str,
|
|
183
|
-
) -> FileFormat:
|
|
184
|
-
"""
|
|
185
|
-
Normalize textual file format values to :class:`FileFormat`.
|
|
186
|
-
|
|
187
|
-
This thin wrapper is kept for backward compatibility; prefer
|
|
188
|
-
:meth:`FileFormat.coerce` going forward.
|
|
189
|
-
"""
|
|
190
|
-
return FileFormat.coerce(file_format)
|
|
191
|
-
|
|
192
|
-
|
|
193
165
|
# TODO: Convert to a method on FileFormat or CompressionFormat?
|
|
194
166
|
def infer_file_format_and_compression(
|
|
195
167
|
value: object,
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.feather` module.
|
|
3
|
+
|
|
4
|
+
Stub helpers for FEATHER read/write.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from ..types import JSONData
|
|
12
|
+
|
|
13
|
+
# SECTION: EXPORTS ========================================================== #
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def read(path: Path) -> JSONData:
|
|
17
|
+
"""
|
|
18
|
+
Read FEATHER content from ``path``.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
path : Path
|
|
23
|
+
Path to the FEATHER file on disk.
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
JSONData
|
|
28
|
+
Parsed payload.
|
|
29
|
+
|
|
30
|
+
Raises
|
|
31
|
+
------
|
|
32
|
+
NotImplementedError
|
|
33
|
+
FEATHER :func:`read` is not implemented yet.
|
|
34
|
+
"""
|
|
35
|
+
raise NotImplementedError('FEATHER read is not implemented yet')
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def write(path: Path, data: JSONData) -> int:
|
|
39
|
+
"""
|
|
40
|
+
Write ``data`` to FEATHER at ``path``.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
path : Path
|
|
45
|
+
Path to the FEATHER file on disk.
|
|
46
|
+
data : JSONData
|
|
47
|
+
Data to write.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
int
|
|
52
|
+
Number of records written.
|
|
53
|
+
|
|
54
|
+
Raises
|
|
55
|
+
------
|
|
56
|
+
NotImplementedError
|
|
57
|
+
FEATHER :func:`write` is not implemented yet.
|
|
58
|
+
"""
|
|
59
|
+
raise NotImplementedError('FEATHER write is not implemented yet')
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.gz` module.
|
|
3
|
+
|
|
4
|
+
Stub helpers for GZ read/write.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from ..types import JSONData
|
|
12
|
+
|
|
13
|
+
# SECTION: EXPORTS ========================================================== #
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def read(path: Path) -> JSONData:
|
|
17
|
+
"""
|
|
18
|
+
Read GZ content from ``path``.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
path : Path
|
|
23
|
+
Path to the GZ file on disk.
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
JSONData
|
|
28
|
+
Parsed payload.
|
|
29
|
+
|
|
30
|
+
Raises
|
|
31
|
+
------
|
|
32
|
+
NotImplementedError
|
|
33
|
+
GZ :func:`read` is not implemented yet.
|
|
34
|
+
"""
|
|
35
|
+
raise NotImplementedError('GZ read is not implemented yet')
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def write(path: Path, data: JSONData) -> int:
|
|
39
|
+
"""
|
|
40
|
+
Write ``data`` to GZ at ``path``.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
path : Path
|
|
45
|
+
Path to the GZ file on disk.
|
|
46
|
+
data : JSONData
|
|
47
|
+
Data to write.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
int
|
|
52
|
+
Number of records written.
|
|
53
|
+
|
|
54
|
+
Raises
|
|
55
|
+
------
|
|
56
|
+
NotImplementedError
|
|
57
|
+
GZ :func:`write` is not implemented yet.
|
|
58
|
+
"""
|
|
59
|
+
raise NotImplementedError('GZ write is not implemented yet')
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.ndjson` module.
|
|
3
|
+
|
|
4
|
+
Stub helpers for NDJSON read/write.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from ..types import JSONData
|
|
12
|
+
|
|
13
|
+
# SECTION: EXPORTS ========================================================== #
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def read(path: Path) -> JSONData:
|
|
17
|
+
"""
|
|
18
|
+
Read NDJSON content from ``path``.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
path : Path
|
|
23
|
+
Path to the NDJSON file on disk.
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
JSONData
|
|
28
|
+
Parsed payload.
|
|
29
|
+
|
|
30
|
+
Raises
|
|
31
|
+
------
|
|
32
|
+
NotImplementedError
|
|
33
|
+
NDJSON :func:`read` is not implemented yet.
|
|
34
|
+
"""
|
|
35
|
+
raise NotImplementedError('NDJSON read is not implemented yet')
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def write(path: Path, data: JSONData) -> int:
|
|
39
|
+
"""
|
|
40
|
+
Write ``data`` to NDJSON at ``path``.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
path : Path
|
|
45
|
+
Path to the NDJSON file on disk.
|
|
46
|
+
data : JSONData
|
|
47
|
+
Data to write.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
int
|
|
52
|
+
Number of records written.
|
|
53
|
+
|
|
54
|
+
Raises
|
|
55
|
+
------
|
|
56
|
+
NotImplementedError
|
|
57
|
+
NDJSON :func:`write` is not implemented yet.
|
|
58
|
+
"""
|
|
59
|
+
raise NotImplementedError('NDJSON write is not implemented yet')
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.orc` module.
|
|
3
|
+
|
|
4
|
+
Stub helpers for ORC read/write.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from ..types import JSONData
|
|
12
|
+
|
|
13
|
+
# SECTION: EXPORTS ========================================================== #
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def read(path: Path) -> JSONData:
|
|
17
|
+
"""
|
|
18
|
+
Read ORC content from ``path``.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
path : Path
|
|
23
|
+
Path to the ORC file on disk.
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
JSONData
|
|
28
|
+
Parsed payload.
|
|
29
|
+
|
|
30
|
+
Raises
|
|
31
|
+
------
|
|
32
|
+
NotImplementedError
|
|
33
|
+
ORC :func:`read` is not implemented yet.
|
|
34
|
+
"""
|
|
35
|
+
raise NotImplementedError('ORC read is not implemented yet')
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def write(path: Path, data: JSONData) -> int:
|
|
39
|
+
"""
|
|
40
|
+
Write ``data`` to ORC at ``path``.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
path : Path
|
|
45
|
+
Path to the ORC file on disk.
|
|
46
|
+
data : JSONData
|
|
47
|
+
Data to write.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
int
|
|
52
|
+
Number of records written.
|
|
53
|
+
|
|
54
|
+
Raises
|
|
55
|
+
------
|
|
56
|
+
NotImplementedError
|
|
57
|
+
ORC :func:`write` is not implemented yet.
|
|
58
|
+
"""
|
|
59
|
+
raise NotImplementedError('ORC write is not implemented yet')
|