etlplus 0.10.5__tar.gz → 0.11.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {etlplus-0.10.5/etlplus.egg-info → etlplus-0.11.1}/PKG-INFO +1 -1
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/cli/commands.py +1 -1
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/cli/constants.py +1 -1
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/cli/io.py +2 -2
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/config/pipeline.py +2 -2
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/enums.py +2 -240
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/extract.py +2 -2
- etlplus-0.11.1/etlplus/file/__init__.py +27 -0
- etlplus-0.11.1/etlplus/file/core.py +287 -0
- etlplus-0.11.1/etlplus/file/csv.py +82 -0
- etlplus-0.11.1/etlplus/file/enums.py +266 -0
- etlplus-0.11.1/etlplus/file/json.py +87 -0
- etlplus-0.11.1/etlplus/file/xml.py +165 -0
- etlplus-0.11.1/etlplus/file/yaml.py +125 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/load.py +4 -4
- {etlplus-0.10.5 → etlplus-0.11.1/etlplus.egg-info}/PKG-INFO +1 -1
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus.egg-info/SOURCES.txt +7 -1
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/cli/test_u_cli_io.py +2 -2
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/database/test_u_database_ddl.py +6 -3
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/test_u_enums.py +2 -40
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/test_u_file.py +59 -24
- etlplus-0.10.5/etlplus/file.py +0 -652
- {etlplus-0.10.5 → etlplus-0.11.1}/.coveragerc +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/.editorconfig +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/.gitattributes +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/.github/actions/python-bootstrap/action.yml +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/.github/workflows/ci.yml +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/.gitignore +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/.pre-commit-config.yaml +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/.ruff.toml +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/CODE_OF_CONDUCT.md +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/CONTRIBUTING.md +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/DEMO.md +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/LICENSE +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/MANIFEST.in +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/Makefile +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/README.md +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/REFERENCES.md +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/docs/README.md +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/docs/pipeline-guide.md +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/docs/snippets/installation_version.md +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/__init__.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/__main__.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/__version__.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/api/README.md +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/api/__init__.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/api/auth.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/api/config.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/api/endpoint_client.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/api/errors.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/api/pagination/__init__.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/api/pagination/client.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/api/pagination/config.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/api/pagination/paginator.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/api/rate_limiting/__init__.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/api/rate_limiting/config.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/api/rate_limiting/rate_limiter.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/api/request_manager.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/api/retry_manager.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/api/transport.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/api/types.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/cli/__init__.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/cli/handlers.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/cli/main.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/cli/options.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/cli/state.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/cli/types.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/config/__init__.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/config/connector.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/config/jobs.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/config/profile.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/config/types.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/config/utils.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/database/__init__.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/database/ddl.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/database/engine.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/database/orm.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/database/schema.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/database/types.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/mixins.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/py.typed +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/run.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/run_helpers.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/templates/__init__.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/templates/ddl.sql.j2 +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/templates/view.sql.j2 +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/transform.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/types.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/utils.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/validate.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/validation/__init__.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus/validation/utils.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus.egg-info/dependency_links.txt +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus.egg-info/entry_points.txt +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus.egg-info/requires.txt +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/etlplus.egg-info/top_level.txt +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/examples/README.md +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/examples/configs/ddl_spec.yml +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/examples/configs/pipeline.yml +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/examples/data/sample.csv +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/examples/data/sample.json +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/examples/data/sample.xml +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/examples/data/sample.xsd +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/examples/data/sample.yaml +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/examples/quickstart_python.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/pyproject.toml +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/pytest.ini +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/setup.cfg +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/setup.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/__init__.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/conftest.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/integration/conftest.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/integration/test_i_cli.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/integration/test_i_examples_data_parity.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/integration/test_i_pagination_strategy.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/integration/test_i_pipeline_smoke.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/integration/test_i_pipeline_yaml_load.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/integration/test_i_run.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/integration/test_i_run_profile_pagination_defaults.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/integration/test_i_run_profile_rate_limit_defaults.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/api/conftest.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/api/test_u_auth.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/api/test_u_config.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/api/test_u_endpoint_client.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/api/test_u_mocks.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/api/test_u_pagination_client.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/api/test_u_pagination_config.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/api/test_u_paginator.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/api/test_u_rate_limit_config.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/api/test_u_rate_limiter.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/api/test_u_request_manager.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/api/test_u_retry_manager.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/api/test_u_transport.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/api/test_u_types.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/cli/conftest.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/cli/test_u_cli_handlers.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/cli/test_u_cli_main.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/cli/test_u_cli_state.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/config/test_u_config_utils.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/config/test_u_connector.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/config/test_u_jobs.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/config/test_u_pipeline.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/conftest.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/database/test_u_database_engine.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/database/test_u_database_orm.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/database/test_u_database_schema.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/test_u_extract.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/test_u_load.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/test_u_main.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/test_u_mixins.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/test_u_run.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/test_u_run_helpers.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/test_u_transform.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/test_u_utils.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/test_u_validate.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/test_u_version.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tests/unit/validation/test_u_validation_utils.py +0 -0
- {etlplus-0.10.5 → etlplus-0.11.1}/tools/update_demo_snippets.py +0 -0
|
@@ -15,8 +15,8 @@ from pathlib import Path
|
|
|
15
15
|
from typing import Any
|
|
16
16
|
from typing import cast
|
|
17
17
|
|
|
18
|
-
from ..enums import FileFormat
|
|
19
18
|
from ..file import File
|
|
19
|
+
from ..file import FileFormat
|
|
20
20
|
from ..types import JSONData
|
|
21
21
|
from ..utils import print_json
|
|
22
22
|
|
|
@@ -331,6 +331,6 @@ def write_json_output(
|
|
|
331
331
|
"""
|
|
332
332
|
if not output_path or output_path == '-':
|
|
333
333
|
return False
|
|
334
|
-
File(Path(output_path), FileFormat.JSON).
|
|
334
|
+
File(Path(output_path), FileFormat.JSON).write(data)
|
|
335
335
|
print(f'{success_message} {output_path}')
|
|
336
336
|
return True
|
|
@@ -24,8 +24,8 @@ from typing import Any
|
|
|
24
24
|
from typing import Self
|
|
25
25
|
|
|
26
26
|
from ..api import ApiConfig
|
|
27
|
-
from ..enums import FileFormat
|
|
28
27
|
from ..file import File
|
|
28
|
+
from ..file import FileFormat
|
|
29
29
|
from ..types import StrAnyMap
|
|
30
30
|
from ..utils import coerce_dict
|
|
31
31
|
from ..utils import maybe_mapping
|
|
@@ -246,7 +246,7 @@ class PipelineConfig:
|
|
|
246
246
|
TypeError
|
|
247
247
|
If the YAML root is not a mapping/object.
|
|
248
248
|
"""
|
|
249
|
-
raw = File(Path(path), FileFormat.YAML).
|
|
249
|
+
raw = File(Path(path), FileFormat.YAML).read()
|
|
250
250
|
if not isinstance(raw, dict):
|
|
251
251
|
raise TypeError('Pipeline YAML must have a mapping/object root')
|
|
252
252
|
|
|
@@ -8,7 +8,6 @@ from __future__ import annotations
|
|
|
8
8
|
|
|
9
9
|
import enum
|
|
10
10
|
import operator as _op
|
|
11
|
-
from pathlib import PurePath
|
|
12
11
|
from statistics import fmean
|
|
13
12
|
from typing import Self
|
|
14
13
|
|
|
@@ -23,18 +22,13 @@ __all__ = [
|
|
|
23
22
|
# Enums
|
|
24
23
|
'AggregateName',
|
|
25
24
|
'CoercibleStrEnum',
|
|
26
|
-
'CompressionFormat',
|
|
27
25
|
'DataConnectorType',
|
|
28
|
-
'FileFormat',
|
|
29
26
|
'HttpMethod',
|
|
30
27
|
'OperatorName',
|
|
31
28
|
'PipelineStep',
|
|
32
29
|
# Functions
|
|
33
|
-
'coerce_compression_format',
|
|
34
30
|
'coerce_data_connector_type',
|
|
35
|
-
'coerce_file_format',
|
|
36
31
|
'coerce_http_method',
|
|
37
|
-
'infer_file_format_and_compression',
|
|
38
32
|
]
|
|
39
33
|
|
|
40
34
|
|
|
@@ -178,39 +172,6 @@ class AggregateName(CoercibleStrEnum):
|
|
|
178
172
|
return lambda xs, n: (fmean(xs) if xs else 0.0)
|
|
179
173
|
|
|
180
174
|
|
|
181
|
-
class CompressionFormat(CoercibleStrEnum):
|
|
182
|
-
"""Supported compression formats for data files."""
|
|
183
|
-
|
|
184
|
-
# -- Constants -- #
|
|
185
|
-
|
|
186
|
-
GZ = 'gz'
|
|
187
|
-
ZIP = 'zip'
|
|
188
|
-
|
|
189
|
-
# -- Class Methods -- #
|
|
190
|
-
|
|
191
|
-
@classmethod
|
|
192
|
-
def aliases(cls) -> StrStrMap:
|
|
193
|
-
"""
|
|
194
|
-
Return a mapping of common aliases for each enum member.
|
|
195
|
-
|
|
196
|
-
Returns
|
|
197
|
-
-------
|
|
198
|
-
StrStrMap
|
|
199
|
-
A mapping of alias names to their corresponding enum member names.
|
|
200
|
-
"""
|
|
201
|
-
return {
|
|
202
|
-
# File extensions
|
|
203
|
-
'.gz': 'gz',
|
|
204
|
-
'.gzip': 'gz',
|
|
205
|
-
'.zip': 'zip',
|
|
206
|
-
# MIME types
|
|
207
|
-
'application/gzip': 'gz',
|
|
208
|
-
'application/x-gzip': 'gz',
|
|
209
|
-
'application/zip': 'zip',
|
|
210
|
-
'application/x-zip-compressed': 'zip',
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
|
|
214
175
|
class DataConnectorType(CoercibleStrEnum):
|
|
215
176
|
"""Supported data connector types."""
|
|
216
177
|
|
|
@@ -242,99 +203,6 @@ class DataConnectorType(CoercibleStrEnum):
|
|
|
242
203
|
}
|
|
243
204
|
|
|
244
205
|
|
|
245
|
-
class FileFormat(CoercibleStrEnum):
|
|
246
|
-
"""Supported file formats for extraction."""
|
|
247
|
-
|
|
248
|
-
# -- Constants -- #
|
|
249
|
-
|
|
250
|
-
AVRO = 'avro'
|
|
251
|
-
CSV = 'csv'
|
|
252
|
-
FEATHER = 'feather'
|
|
253
|
-
GZ = 'gz'
|
|
254
|
-
JSON = 'json'
|
|
255
|
-
NDJSON = 'ndjson'
|
|
256
|
-
ORC = 'orc'
|
|
257
|
-
PARQUET = 'parquet'
|
|
258
|
-
TSV = 'tsv'
|
|
259
|
-
TXT = 'txt'
|
|
260
|
-
XLS = 'xls'
|
|
261
|
-
XLSX = 'xlsx'
|
|
262
|
-
ZIP = 'zip'
|
|
263
|
-
XML = 'xml'
|
|
264
|
-
YAML = 'yaml'
|
|
265
|
-
|
|
266
|
-
# -- Class Methods -- #
|
|
267
|
-
|
|
268
|
-
@classmethod
|
|
269
|
-
def aliases(cls) -> StrStrMap:
|
|
270
|
-
"""
|
|
271
|
-
Return a mapping of common aliases for each enum member.
|
|
272
|
-
|
|
273
|
-
Returns
|
|
274
|
-
-------
|
|
275
|
-
StrStrMap
|
|
276
|
-
A mapping of alias names to their corresponding enum member names.
|
|
277
|
-
"""
|
|
278
|
-
return {
|
|
279
|
-
# Common shorthand
|
|
280
|
-
'parq': 'parquet',
|
|
281
|
-
'yml': 'yaml',
|
|
282
|
-
# File extensions
|
|
283
|
-
'.avro': 'avro',
|
|
284
|
-
'.csv': 'csv',
|
|
285
|
-
'.feather': 'feather',
|
|
286
|
-
'.gz': 'gz',
|
|
287
|
-
'.json': 'json',
|
|
288
|
-
'.jsonl': 'ndjson',
|
|
289
|
-
'.ndjson': 'ndjson',
|
|
290
|
-
'.orc': 'orc',
|
|
291
|
-
'.parquet': 'parquet',
|
|
292
|
-
'.pq': 'parquet',
|
|
293
|
-
'.tsv': 'tsv',
|
|
294
|
-
'.txt': 'txt',
|
|
295
|
-
'.xls': 'xls',
|
|
296
|
-
'.xlsx': 'xlsx',
|
|
297
|
-
'.zip': 'zip',
|
|
298
|
-
'.xml': 'xml',
|
|
299
|
-
'.yaml': 'yaml',
|
|
300
|
-
'.yml': 'yaml',
|
|
301
|
-
# MIME types
|
|
302
|
-
'application/avro': 'avro',
|
|
303
|
-
'application/csv': 'csv',
|
|
304
|
-
'application/feather': 'feather',
|
|
305
|
-
'application/gzip': 'gz',
|
|
306
|
-
'application/json': 'json',
|
|
307
|
-
'application/jsonlines': 'ndjson',
|
|
308
|
-
'application/ndjson': 'ndjson',
|
|
309
|
-
'application/orc': 'orc',
|
|
310
|
-
'application/parquet': 'parquet',
|
|
311
|
-
'application/vnd.apache.avro': 'avro',
|
|
312
|
-
'application/vnd.apache.parquet': 'parquet',
|
|
313
|
-
'application/vnd.apache.arrow.file': 'feather',
|
|
314
|
-
'application/vnd.apache.orc': 'orc',
|
|
315
|
-
'application/vnd.ms-excel': 'xls',
|
|
316
|
-
(
|
|
317
|
-
'application/vnd.openxmlformats-'
|
|
318
|
-
'officedocument.spreadsheetml.sheet'
|
|
319
|
-
): 'xlsx',
|
|
320
|
-
'application/x-avro': 'avro',
|
|
321
|
-
'application/x-csv': 'csv',
|
|
322
|
-
'application/x-feather': 'feather',
|
|
323
|
-
'application/x-orc': 'orc',
|
|
324
|
-
'application/x-ndjson': 'ndjson',
|
|
325
|
-
'application/x-parquet': 'parquet',
|
|
326
|
-
'application/x-yaml': 'yaml',
|
|
327
|
-
'application/xml': 'xml',
|
|
328
|
-
'application/zip': 'zip',
|
|
329
|
-
'text/csv': 'csv',
|
|
330
|
-
'text/plain': 'txt',
|
|
331
|
-
'text/tab-separated-values': 'tsv',
|
|
332
|
-
'text/tsv': 'tsv',
|
|
333
|
-
'text/xml': 'xml',
|
|
334
|
-
'text/yaml': 'yaml',
|
|
335
|
-
}
|
|
336
|
-
|
|
337
|
-
|
|
338
206
|
class HttpMethod(CoercibleStrEnum):
|
|
339
207
|
"""Supported HTTP verbs that accept JSON payloads."""
|
|
340
208
|
|
|
@@ -360,8 +228,8 @@ class HttpMethod(CoercibleStrEnum):
|
|
|
360
228
|
Notes
|
|
361
229
|
-----
|
|
362
230
|
- RFCs do not strictly forbid bodies on some other methods (e.g.,
|
|
363
|
-
|
|
364
|
-
|
|
231
|
+
``DELETE``), but many servers/clients do not expect them. We mark
|
|
232
|
+
``POST``, ``PUT``, and ``PATCH`` as True.
|
|
365
233
|
"""
|
|
366
234
|
return self in {HttpMethod.POST, HttpMethod.PUT, HttpMethod.PATCH}
|
|
367
235
|
|
|
@@ -465,13 +333,6 @@ class PipelineStep(CoercibleStrEnum):
|
|
|
465
333
|
# SECTION: INTERNAL CONSTANTS ============================================== #
|
|
466
334
|
|
|
467
335
|
|
|
468
|
-
# Compression formats that are also file formats.
|
|
469
|
-
_COMPRESSION_FILE_FORMATS: set[FileFormat] = {
|
|
470
|
-
FileFormat.GZ,
|
|
471
|
-
FileFormat.ZIP,
|
|
472
|
-
}
|
|
473
|
-
|
|
474
|
-
|
|
475
336
|
# Precomputed order index for PipelineStep; avoids recomputing on each access.
|
|
476
337
|
_PIPELINE_ORDER_INDEX: dict[PipelineStep, int] = {
|
|
477
338
|
PipelineStep.FILTER: 0,
|
|
@@ -497,30 +358,6 @@ def coerce_data_connector_type(
|
|
|
497
358
|
return DataConnectorType.coerce(connector)
|
|
498
359
|
|
|
499
360
|
|
|
500
|
-
def coerce_file_format(
|
|
501
|
-
file_format: FileFormat | str,
|
|
502
|
-
) -> FileFormat:
|
|
503
|
-
"""
|
|
504
|
-
Normalize textual file format values to :class:`FileFormat`.
|
|
505
|
-
|
|
506
|
-
This thin wrapper is kept for backward compatibility; prefer
|
|
507
|
-
:meth:`FileFormat.coerce` going forward.
|
|
508
|
-
"""
|
|
509
|
-
return FileFormat.coerce(file_format)
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
def coerce_compression_format(
|
|
513
|
-
compression_format: CompressionFormat | str,
|
|
514
|
-
) -> CompressionFormat:
|
|
515
|
-
"""
|
|
516
|
-
Normalize textual compression format values to :class:`CompressionFormat`.
|
|
517
|
-
|
|
518
|
-
This thin wrapper is kept for backward compatibility; prefer
|
|
519
|
-
:meth:`CompressionFormat.coerce` going forward.
|
|
520
|
-
"""
|
|
521
|
-
return CompressionFormat.coerce(compression_format)
|
|
522
|
-
|
|
523
|
-
|
|
524
361
|
def coerce_http_method(
|
|
525
362
|
http_method: HttpMethod | str,
|
|
526
363
|
) -> HttpMethod:
|
|
@@ -531,78 +368,3 @@ def coerce_http_method(
|
|
|
531
368
|
:meth:`HttpMethod.coerce` going forward.
|
|
532
369
|
"""
|
|
533
370
|
return HttpMethod.coerce(http_method)
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
def infer_file_format_and_compression(
|
|
537
|
-
value: object,
|
|
538
|
-
filename: object | None = None,
|
|
539
|
-
) -> tuple[FileFormat | None, CompressionFormat | None]:
|
|
540
|
-
"""
|
|
541
|
-
Infer data format and compression from a filename, extension, or MIME type.
|
|
542
|
-
|
|
543
|
-
Parameters
|
|
544
|
-
----------
|
|
545
|
-
value : object
|
|
546
|
-
A filename, extension, MIME type, or existing enum member.
|
|
547
|
-
filename : object | None, optional
|
|
548
|
-
A filename to consult for extension-based inference (e.g. when
|
|
549
|
-
``value`` is ``application/octet-stream``).
|
|
550
|
-
|
|
551
|
-
Returns
|
|
552
|
-
-------
|
|
553
|
-
tuple[FileFormat | None, CompressionFormat | None]
|
|
554
|
-
The inferred data format and compression, if any.
|
|
555
|
-
"""
|
|
556
|
-
if isinstance(value, FileFormat):
|
|
557
|
-
if value in _COMPRESSION_FILE_FORMATS:
|
|
558
|
-
return None, CompressionFormat.coerce(value.value)
|
|
559
|
-
return value, None
|
|
560
|
-
if isinstance(value, CompressionFormat):
|
|
561
|
-
return None, value
|
|
562
|
-
|
|
563
|
-
text = str(value).strip()
|
|
564
|
-
if not text:
|
|
565
|
-
return None, None
|
|
566
|
-
|
|
567
|
-
normalized = text.casefold()
|
|
568
|
-
mime = normalized.split(';', 1)[0].strip()
|
|
569
|
-
|
|
570
|
-
is_octet_stream = mime == 'application/octet-stream'
|
|
571
|
-
compression = CompressionFormat.try_coerce(mime)
|
|
572
|
-
fmt = None if is_octet_stream else FileFormat.try_coerce(mime)
|
|
573
|
-
|
|
574
|
-
is_mime = mime.startswith(
|
|
575
|
-
(
|
|
576
|
-
'application/',
|
|
577
|
-
'text/',
|
|
578
|
-
'audio/',
|
|
579
|
-
'image/',
|
|
580
|
-
'video/',
|
|
581
|
-
'multipart/',
|
|
582
|
-
),
|
|
583
|
-
)
|
|
584
|
-
suffix_source: object | None = filename if filename is not None else text
|
|
585
|
-
if is_mime and filename is None:
|
|
586
|
-
suffix_source = None
|
|
587
|
-
|
|
588
|
-
suffixes = (
|
|
589
|
-
PurePath(str(suffix_source)).suffixes
|
|
590
|
-
if suffix_source is not None
|
|
591
|
-
else []
|
|
592
|
-
)
|
|
593
|
-
if suffixes:
|
|
594
|
-
normalized_suffixes = [suffix.casefold() for suffix in suffixes]
|
|
595
|
-
compression = (
|
|
596
|
-
CompressionFormat.try_coerce(normalized_suffixes[-1])
|
|
597
|
-
or compression
|
|
598
|
-
)
|
|
599
|
-
if compression is not None:
|
|
600
|
-
normalized_suffixes = normalized_suffixes[:-1]
|
|
601
|
-
if normalized_suffixes:
|
|
602
|
-
fmt = FileFormat.try_coerce(normalized_suffixes[-1]) or fmt
|
|
603
|
-
|
|
604
|
-
if fmt in _COMPRESSION_FILE_FORMATS:
|
|
605
|
-
compression = compression or CompressionFormat.coerce(fmt.value)
|
|
606
|
-
fmt = None
|
|
607
|
-
|
|
608
|
-
return fmt, compression
|
|
@@ -13,11 +13,11 @@ from typing import cast
|
|
|
13
13
|
import requests # type: ignore[import]
|
|
14
14
|
|
|
15
15
|
from .enums import DataConnectorType
|
|
16
|
-
from .enums import FileFormat
|
|
17
16
|
from .enums import HttpMethod
|
|
18
17
|
from .enums import coerce_data_connector_type
|
|
19
|
-
from .enums import coerce_file_format
|
|
20
18
|
from .file import File
|
|
19
|
+
from .file import FileFormat
|
|
20
|
+
from .file import coerce_file_format
|
|
21
21
|
from .types import JSONData
|
|
22
22
|
from .types import JSONDict
|
|
23
23
|
from .types import JSONList
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file` package.
|
|
3
|
+
|
|
4
|
+
Public file IO helpers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from .core import File
|
|
10
|
+
from .enums import CompressionFormat
|
|
11
|
+
from .enums import FileFormat
|
|
12
|
+
from .enums import coerce_file_format
|
|
13
|
+
from .enums import infer_file_format_and_compression
|
|
14
|
+
|
|
15
|
+
# SECTION: EXPORTS ========================================================== #
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
# Class
|
|
20
|
+
'File',
|
|
21
|
+
# Enums
|
|
22
|
+
'CompressionFormat',
|
|
23
|
+
'FileFormat',
|
|
24
|
+
# Functions
|
|
25
|
+
'coerce_file_format',
|
|
26
|
+
'infer_file_format_and_compression',
|
|
27
|
+
]
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.core` module.
|
|
3
|
+
|
|
4
|
+
Shared helpers for reading and writing structured and semi-structured data
|
|
5
|
+
files.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from ..types import JSONData
|
|
14
|
+
from ..types import StrPath
|
|
15
|
+
from . import csv
|
|
16
|
+
from . import json
|
|
17
|
+
from . import xml
|
|
18
|
+
from . import yaml
|
|
19
|
+
from .enums import FileFormat
|
|
20
|
+
from .enums import infer_file_format_and_compression
|
|
21
|
+
|
|
22
|
+
# SECTION: EXPORTS ========================================================== #
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
__all__ = ['File']
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# SECTION: CLASSES ========================================================== #
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass(slots=True)
|
|
32
|
+
class File:
|
|
33
|
+
"""
|
|
34
|
+
Convenience wrapper around structured file IO.
|
|
35
|
+
|
|
36
|
+
This class encapsulates the one-off helpers in this module as convenient
|
|
37
|
+
instance methods while retaining the original function API for
|
|
38
|
+
backward compatibility (those functions delegate to this class).
|
|
39
|
+
|
|
40
|
+
Attributes
|
|
41
|
+
----------
|
|
42
|
+
path : Path
|
|
43
|
+
Path to the file on disk.
|
|
44
|
+
file_format : FileFormat | None, optional
|
|
45
|
+
Explicit format. If omitted, the format is inferred from the file
|
|
46
|
+
extension (``.csv``, ``.json``, or ``.xml``).
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
# -- Attributes -- #
|
|
50
|
+
|
|
51
|
+
path: Path
|
|
52
|
+
file_format: FileFormat | None = None
|
|
53
|
+
|
|
54
|
+
# -- Magic Methods (Object Lifecycle) -- #
|
|
55
|
+
|
|
56
|
+
def __post_init__(self) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Auto-detect and set the file format on initialization.
|
|
59
|
+
|
|
60
|
+
If no explicit ``file_format`` is provided, attempt to infer it from
|
|
61
|
+
the file path's extension and update :attr:`file_format`. If the
|
|
62
|
+
extension is unknown, the attribute is left as ``None`` and will be
|
|
63
|
+
validated later by :meth:`_ensure_format`.
|
|
64
|
+
"""
|
|
65
|
+
# Normalize incoming path (allow str in constructor) to Path.
|
|
66
|
+
if isinstance(self.path, str):
|
|
67
|
+
self.path = Path(self.path)
|
|
68
|
+
|
|
69
|
+
if self.file_format is None:
|
|
70
|
+
try:
|
|
71
|
+
self.file_format = self._guess_format()
|
|
72
|
+
except ValueError:
|
|
73
|
+
# Leave as None; _ensure_format() will raise on use if needed.
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
# -- Internal Instance Methods -- #
|
|
77
|
+
|
|
78
|
+
def _assert_exists(self) -> None:
|
|
79
|
+
"""
|
|
80
|
+
Raise FileNotFoundError if :attr:`path` does not exist.
|
|
81
|
+
|
|
82
|
+
This centralizes existence checks across multiple read methods.
|
|
83
|
+
"""
|
|
84
|
+
if not self.path.exists():
|
|
85
|
+
raise FileNotFoundError(f'File not found: {self.path}')
|
|
86
|
+
|
|
87
|
+
def _ensure_format(self) -> FileFormat:
|
|
88
|
+
"""
|
|
89
|
+
Resolve the active format, guessing from extension if needed.
|
|
90
|
+
|
|
91
|
+
Returns
|
|
92
|
+
-------
|
|
93
|
+
FileFormat
|
|
94
|
+
The resolved file format.
|
|
95
|
+
"""
|
|
96
|
+
return (
|
|
97
|
+
self.file_format
|
|
98
|
+
if self.file_format is not None
|
|
99
|
+
else self._guess_format()
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
def _guess_format(self) -> FileFormat:
|
|
103
|
+
"""
|
|
104
|
+
Infer the file format from the filename extension.
|
|
105
|
+
|
|
106
|
+
Returns
|
|
107
|
+
-------
|
|
108
|
+
FileFormat
|
|
109
|
+
The inferred file format based on the file extension.
|
|
110
|
+
|
|
111
|
+
Raises
|
|
112
|
+
------
|
|
113
|
+
ValueError
|
|
114
|
+
If the extension is unknown or unsupported.
|
|
115
|
+
"""
|
|
116
|
+
fmt, compression = infer_file_format_and_compression(self.path)
|
|
117
|
+
if fmt is not None:
|
|
118
|
+
return fmt
|
|
119
|
+
if compression is not None:
|
|
120
|
+
raise ValueError(
|
|
121
|
+
'Cannot infer file format from compressed file '
|
|
122
|
+
f'{self.path!r} with compression {compression.value!r}',
|
|
123
|
+
)
|
|
124
|
+
raise ValueError(
|
|
125
|
+
f'Cannot infer file format from extension {self.path.suffix!r}',
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# -- Instance Methods (Generic API) -- #
|
|
129
|
+
|
|
130
|
+
def read(self) -> JSONData:
|
|
131
|
+
"""
|
|
132
|
+
Read structured data from :attr:`path` using :attr:`file_format`.
|
|
133
|
+
|
|
134
|
+
Returns
|
|
135
|
+
-------
|
|
136
|
+
JSONData
|
|
137
|
+
The structured data read from the file.
|
|
138
|
+
|
|
139
|
+
Raises
|
|
140
|
+
------
|
|
141
|
+
ValueError
|
|
142
|
+
If the resolved file format is unsupported.
|
|
143
|
+
"""
|
|
144
|
+
self._assert_exists()
|
|
145
|
+
fmt = self._ensure_format()
|
|
146
|
+
match fmt:
|
|
147
|
+
case FileFormat.CSV:
|
|
148
|
+
return csv.read(self.path)
|
|
149
|
+
case FileFormat.JSON:
|
|
150
|
+
return json.read(self.path)
|
|
151
|
+
case FileFormat.XML:
|
|
152
|
+
return xml.read(self.path)
|
|
153
|
+
case FileFormat.YAML:
|
|
154
|
+
return yaml.read(self.path)
|
|
155
|
+
raise ValueError(f'Unsupported format: {fmt}')
|
|
156
|
+
|
|
157
|
+
def write(
|
|
158
|
+
self,
|
|
159
|
+
data: JSONData,
|
|
160
|
+
*,
|
|
161
|
+
root_tag: str = xml.DEFAULT_XML_ROOT,
|
|
162
|
+
) -> int:
|
|
163
|
+
"""
|
|
164
|
+
Write ``data`` to :attr:`path` using :attr:`file_format`.
|
|
165
|
+
|
|
166
|
+
Parameters
|
|
167
|
+
----------
|
|
168
|
+
data : JSONData
|
|
169
|
+
Data to write to the file.
|
|
170
|
+
root_tag : str, optional
|
|
171
|
+
Root tag name to use when writing XML files. Defaults to
|
|
172
|
+
``'root'``.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
int
|
|
177
|
+
The number of records written.
|
|
178
|
+
|
|
179
|
+
Raises
|
|
180
|
+
------
|
|
181
|
+
ValueError
|
|
182
|
+
If the resolved file format is unsupported.
|
|
183
|
+
"""
|
|
184
|
+
fmt = self._ensure_format()
|
|
185
|
+
match fmt:
|
|
186
|
+
case FileFormat.CSV:
|
|
187
|
+
return csv.write(self.path, data)
|
|
188
|
+
case FileFormat.JSON:
|
|
189
|
+
return json.write(self.path, data)
|
|
190
|
+
case FileFormat.XML:
|
|
191
|
+
return xml.write(self.path, data, root_tag=root_tag)
|
|
192
|
+
case FileFormat.YAML:
|
|
193
|
+
return yaml.write(self.path, data)
|
|
194
|
+
raise ValueError(f'Unsupported format: {fmt}')
|
|
195
|
+
|
|
196
|
+
# -- Class Methods -- #
|
|
197
|
+
|
|
198
|
+
@classmethod
|
|
199
|
+
def from_path(
|
|
200
|
+
cls,
|
|
201
|
+
path: StrPath,
|
|
202
|
+
*,
|
|
203
|
+
file_format: FileFormat | str | None = None,
|
|
204
|
+
) -> File:
|
|
205
|
+
"""
|
|
206
|
+
Create a :class:`File` from any path-like and optional format.
|
|
207
|
+
|
|
208
|
+
Parameters
|
|
209
|
+
----------
|
|
210
|
+
path : StrPath
|
|
211
|
+
Path to the file on disk.
|
|
212
|
+
file_format : FileFormat | str | None, optional
|
|
213
|
+
Explicit format. If omitted, the format is inferred from the file
|
|
214
|
+
extension (``.csv``, ``.json``, or ``.xml``).
|
|
215
|
+
|
|
216
|
+
Returns
|
|
217
|
+
-------
|
|
218
|
+
File
|
|
219
|
+
The constructed :class:`File` instance.
|
|
220
|
+
"""
|
|
221
|
+
resolved = Path(path)
|
|
222
|
+
ff: FileFormat | None
|
|
223
|
+
if isinstance(file_format, str):
|
|
224
|
+
ff = FileFormat.coerce(file_format)
|
|
225
|
+
else:
|
|
226
|
+
ff = file_format
|
|
227
|
+
|
|
228
|
+
return cls(resolved, ff)
|
|
229
|
+
|
|
230
|
+
@classmethod
|
|
231
|
+
def read_file(
|
|
232
|
+
cls,
|
|
233
|
+
path: StrPath,
|
|
234
|
+
file_format: FileFormat | str | None = None,
|
|
235
|
+
) -> JSONData:
|
|
236
|
+
"""
|
|
237
|
+
Read structured data.
|
|
238
|
+
|
|
239
|
+
Parameters
|
|
240
|
+
----------
|
|
241
|
+
path : StrPath
|
|
242
|
+
Path to the file on disk.
|
|
243
|
+
file_format : FileFormat | str | None, optional
|
|
244
|
+
Explicit format. If omitted, the format is inferred from the file
|
|
245
|
+
extension (``.csv``, ``.json``, or ``.xml``).
|
|
246
|
+
|
|
247
|
+
Returns
|
|
248
|
+
-------
|
|
249
|
+
JSONData
|
|
250
|
+
The structured data read from the file.
|
|
251
|
+
"""
|
|
252
|
+
return cls.from_path(path, file_format=file_format).read()
|
|
253
|
+
|
|
254
|
+
@classmethod
|
|
255
|
+
def write_file(
|
|
256
|
+
cls,
|
|
257
|
+
path: StrPath,
|
|
258
|
+
data: JSONData,
|
|
259
|
+
file_format: FileFormat | str | None = None,
|
|
260
|
+
*,
|
|
261
|
+
root_tag: str = xml.DEFAULT_XML_ROOT,
|
|
262
|
+
) -> int:
|
|
263
|
+
"""
|
|
264
|
+
Write structured data and count written records.
|
|
265
|
+
|
|
266
|
+
Parameters
|
|
267
|
+
----------
|
|
268
|
+
path : StrPath
|
|
269
|
+
Path to the file on disk.
|
|
270
|
+
data : JSONData
|
|
271
|
+
Data to write to the file.
|
|
272
|
+
file_format : FileFormat | str | None, optional
|
|
273
|
+
Explicit format. If omitted, the format is inferred from the file
|
|
274
|
+
extension (``.csv``, ``.json``, or ``.xml``).
|
|
275
|
+
root_tag : str, optional
|
|
276
|
+
Root tag name to use when writing XML files. Defaults to
|
|
277
|
+
``'root'``.
|
|
278
|
+
|
|
279
|
+
Returns
|
|
280
|
+
-------
|
|
281
|
+
int
|
|
282
|
+
The number of records written to the file.
|
|
283
|
+
"""
|
|
284
|
+
return cls.from_path(path, file_format=file_format).write(
|
|
285
|
+
data,
|
|
286
|
+
root_tag=root_tag,
|
|
287
|
+
)
|