etlplus 0.9.0__py3-none-any.whl → 0.11.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/cli/commands.py +19 -19
- etlplus/cli/constants.py +1 -1
- etlplus/cli/io.py +2 -2
- etlplus/config/pipeline.py +2 -2
- etlplus/database/ddl.py +1 -1
- etlplus/enums.py +3 -77
- etlplus/extract.py +5 -7
- etlplus/file/__init__.py +25 -0
- etlplus/file/core.py +287 -0
- etlplus/file/csv.py +82 -0
- etlplus/file/enums.py +238 -0
- etlplus/file/json.py +87 -0
- etlplus/file/xml.py +165 -0
- etlplus/file/yaml.py +125 -0
- etlplus/load.py +9 -12
- etlplus/run.py +6 -9
- {etlplus-0.9.0.dist-info → etlplus-0.11.5.dist-info}/METADATA +1 -1
- {etlplus-0.9.0.dist-info → etlplus-0.11.5.dist-info}/RECORD +22 -16
- etlplus/file.py +0 -657
- {etlplus-0.9.0.dist-info → etlplus-0.11.5.dist-info}/WHEEL +0 -0
- {etlplus-0.9.0.dist-info → etlplus-0.11.5.dist-info}/entry_points.txt +0 -0
- {etlplus-0.9.0.dist-info → etlplus-0.11.5.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.9.0.dist-info → etlplus-0.11.5.dist-info}/top_level.txt +0 -0
etlplus/cli/commands.py
CHANGED
|
@@ -36,7 +36,7 @@ from typing import cast
|
|
|
36
36
|
import typer
|
|
37
37
|
|
|
38
38
|
from .. import __version__
|
|
39
|
-
from ..
|
|
39
|
+
from ..file import FileFormat
|
|
40
40
|
from . import handlers
|
|
41
41
|
from .constants import CLI_DESCRIPTION
|
|
42
42
|
from .constants import CLI_EPILOG
|
|
@@ -443,9 +443,9 @@ def extract_cmd(
|
|
|
443
443
|
Source (JSON payload, file/folder path, URL/URI, or - for STDIN)
|
|
444
444
|
from which to extract data. Default is ``-``.
|
|
445
445
|
source_format : SourceFormatOption, optional
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
446
|
+
Data source format. Overrides the inferred format (``csv``, ``json``,
|
|
447
|
+
etc.) based on filename extension or STDIN content. Default is
|
|
448
|
+
``None``.
|
|
449
449
|
source_type : SourceTypeOption, optional
|
|
450
450
|
Data source type. Overrides the inferred type (``api``, ``database``,
|
|
451
451
|
``file``, ``folder``) based on URI/URL schema. Default is ``None``.
|
|
@@ -523,15 +523,15 @@ def load_cmd(
|
|
|
523
523
|
ctx : typer.Context
|
|
524
524
|
The Typer context.
|
|
525
525
|
source_format : SourceFormatOption, optional
|
|
526
|
-
|
|
527
|
-
|
|
526
|
+
Data source format. Overrides the inferred format (``csv``, ``json``,
|
|
527
|
+
etc.) based on filename extension or STDIN content. Default is
|
|
528
|
+
``None``.
|
|
528
529
|
target : TargetArg, optional
|
|
529
530
|
Target (file/folder path, URL/URI, or - for STDOUT) into which to load
|
|
530
531
|
data. Default is ``-``.
|
|
531
532
|
target_format : TargetFormatOption, optional
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
``None``.
|
|
533
|
+
Target data format. Overrides the inferred format (``csv``, ``json``,
|
|
534
|
+
etc.) based on filename extension. Default is ``None``.
|
|
535
535
|
target_type : TargetTypeOption, optional
|
|
536
536
|
Data target type. Overrides the inferred type (``api``, ``database``,
|
|
537
537
|
``file``, ``folder``) based on URI/URL schema. Default is ``None``.
|
|
@@ -760,9 +760,9 @@ def transform_cmd(
|
|
|
760
760
|
Source (JSON payload, file/folder path, URL/URI, or - for STDIN) from
|
|
761
761
|
which to extract data. Default is ``-``.
|
|
762
762
|
source_format : SourceFormatOption, optional
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
763
|
+
Data source format. Overrides the inferred format (``csv``, ``json``,
|
|
764
|
+
etc.) based on filename extension or STDIN content. Default is
|
|
765
|
+
``None``.
|
|
766
766
|
source_type : SourceTypeOption, optional
|
|
767
767
|
Data source type. Overrides the inferred type (``api``, ``database``,
|
|
768
768
|
``file``, ``folder``) based on URI/URL schema. Default is ``None``.
|
|
@@ -770,9 +770,8 @@ def transform_cmd(
|
|
|
770
770
|
Target (file/folder path, URL/URI, or - for STDOUT) into which to load
|
|
771
771
|
data. Default is ``-``.
|
|
772
772
|
target_format : TargetFormatOption, optional
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
``None``.
|
|
773
|
+
Target data format. Overrides the inferred format (``csv``, ``json``,
|
|
774
|
+
etc.) based on filename extension. Default is ``None``.
|
|
776
775
|
target_type : TargetTypeOption, optional
|
|
777
776
|
Data target type. Overrides the inferred type (``api``, ``database``,
|
|
778
777
|
``file``, ``folder``) based on URI/URL schema. Default is ``None``.
|
|
@@ -876,11 +875,12 @@ def validate_cmd(
|
|
|
876
875
|
source : SourceArg
|
|
877
876
|
Data source to validate (path, JSON payload, or - for STDIN).
|
|
878
877
|
source_format : SourceFormatOption, optional
|
|
879
|
-
|
|
880
|
-
Default is
|
|
881
|
-
source_type : SourceTypeOption, optional
|
|
882
|
-
Override the inferred source type (file, database, api). Default is
|
|
878
|
+
Data source format. Overrides the inferred format (``csv``, ``json``,
|
|
879
|
+
etc.) based on filename extension or STDIN content. Default is
|
|
883
880
|
``None``.
|
|
881
|
+
source_type : SourceTypeOption, optional
|
|
882
|
+
Data source type. Overrides the inferred type (``api``, ``database``,
|
|
883
|
+
``file``, ``folder``) based on URI/URL schema. Default is ``None``.
|
|
884
884
|
output : OutputOption, optional
|
|
885
885
|
Output file for validated output (- for STDOUT). Default is ``None``.
|
|
886
886
|
|
etlplus/cli/constants.py
CHANGED
etlplus/cli/io.py
CHANGED
|
@@ -15,8 +15,8 @@ from pathlib import Path
|
|
|
15
15
|
from typing import Any
|
|
16
16
|
from typing import cast
|
|
17
17
|
|
|
18
|
-
from ..enums import FileFormat
|
|
19
18
|
from ..file import File
|
|
19
|
+
from ..file import FileFormat
|
|
20
20
|
from ..types import JSONData
|
|
21
21
|
from ..utils import print_json
|
|
22
22
|
|
|
@@ -331,6 +331,6 @@ def write_json_output(
|
|
|
331
331
|
"""
|
|
332
332
|
if not output_path or output_path == '-':
|
|
333
333
|
return False
|
|
334
|
-
File(Path(output_path), FileFormat.JSON).
|
|
334
|
+
File(Path(output_path), FileFormat.JSON).write(data)
|
|
335
335
|
print(f'{success_message} {output_path}')
|
|
336
336
|
return True
|
etlplus/config/pipeline.py
CHANGED
|
@@ -24,8 +24,8 @@ from typing import Any
|
|
|
24
24
|
from typing import Self
|
|
25
25
|
|
|
26
26
|
from ..api import ApiConfig
|
|
27
|
-
from ..enums import FileFormat
|
|
28
27
|
from ..file import File
|
|
28
|
+
from ..file import FileFormat
|
|
29
29
|
from ..types import StrAnyMap
|
|
30
30
|
from ..utils import coerce_dict
|
|
31
31
|
from ..utils import maybe_mapping
|
|
@@ -246,7 +246,7 @@ class PipelineConfig:
|
|
|
246
246
|
TypeError
|
|
247
247
|
If the YAML root is not a mapping/object.
|
|
248
248
|
"""
|
|
249
|
-
raw = File(Path(path), FileFormat.YAML).
|
|
249
|
+
raw = File(Path(path), FileFormat.YAML).read()
|
|
250
250
|
if not isinstance(raw, dict):
|
|
251
251
|
raise TypeError('Pipeline YAML must have a mapping/object root')
|
|
252
252
|
|
etlplus/database/ddl.py
CHANGED
|
@@ -203,7 +203,7 @@ def load_table_spec(
|
|
|
203
203
|
raise ValueError('Spec must be .json, .yml, or .yaml')
|
|
204
204
|
|
|
205
205
|
try:
|
|
206
|
-
spec = File.
|
|
206
|
+
spec = File.from_path(spec_path).read()
|
|
207
207
|
except ImportError as e:
|
|
208
208
|
if suffix in {'.yml', '.yaml'}:
|
|
209
209
|
raise RuntimeError(
|
etlplus/enums.py
CHANGED
|
@@ -19,16 +19,13 @@ from .types import StrStrMap
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
__all__ = [
|
|
22
|
+
# Enums
|
|
22
23
|
'AggregateName',
|
|
23
24
|
'CoercibleStrEnum',
|
|
24
25
|
'DataConnectorType',
|
|
25
|
-
'FileFormat',
|
|
26
26
|
'HttpMethod',
|
|
27
27
|
'OperatorName',
|
|
28
28
|
'PipelineStep',
|
|
29
|
-
'coerce_data_connector_type',
|
|
30
|
-
'coerce_file_format',
|
|
31
|
-
'coerce_http_method',
|
|
32
29
|
]
|
|
33
30
|
|
|
34
31
|
|
|
@@ -203,38 +200,6 @@ class DataConnectorType(CoercibleStrEnum):
|
|
|
203
200
|
}
|
|
204
201
|
|
|
205
202
|
|
|
206
|
-
class FileFormat(CoercibleStrEnum):
|
|
207
|
-
"""Supported file formats for extraction."""
|
|
208
|
-
|
|
209
|
-
# -- Constants -- #
|
|
210
|
-
|
|
211
|
-
CSV = 'csv'
|
|
212
|
-
JSON = 'json'
|
|
213
|
-
XML = 'xml'
|
|
214
|
-
YAML = 'yaml'
|
|
215
|
-
|
|
216
|
-
# -- Class Methods -- #
|
|
217
|
-
|
|
218
|
-
@classmethod
|
|
219
|
-
def aliases(cls) -> StrStrMap:
|
|
220
|
-
"""
|
|
221
|
-
Return a mapping of common aliases for each enum member.
|
|
222
|
-
|
|
223
|
-
Returns
|
|
224
|
-
-------
|
|
225
|
-
StrStrMap
|
|
226
|
-
A mapping of alias names to their corresponding enum member names.
|
|
227
|
-
"""
|
|
228
|
-
return {
|
|
229
|
-
# Common shorthand
|
|
230
|
-
'yml': 'yaml',
|
|
231
|
-
# MIME types
|
|
232
|
-
'text/csv': 'csv',
|
|
233
|
-
'application/json': 'json',
|
|
234
|
-
'application/xml': 'xml',
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
|
|
238
203
|
class HttpMethod(CoercibleStrEnum):
|
|
239
204
|
"""Supported HTTP verbs that accept JSON payloads."""
|
|
240
205
|
|
|
@@ -260,8 +225,8 @@ class HttpMethod(CoercibleStrEnum):
|
|
|
260
225
|
Notes
|
|
261
226
|
-----
|
|
262
227
|
- RFCs do not strictly forbid bodies on some other methods (e.g.,
|
|
263
|
-
|
|
264
|
-
|
|
228
|
+
``DELETE``), but many servers/clients do not expect them. We mark
|
|
229
|
+
``POST``, ``PUT``, and ``PATCH`` as True.
|
|
265
230
|
"""
|
|
266
231
|
return self in {HttpMethod.POST, HttpMethod.PUT, HttpMethod.PATCH}
|
|
267
232
|
|
|
@@ -373,42 +338,3 @@ _PIPELINE_ORDER_INDEX: dict[PipelineStep, int] = {
|
|
|
373
338
|
PipelineStep.SORT: 3,
|
|
374
339
|
PipelineStep.AGGREGATE: 4,
|
|
375
340
|
}
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
# SECTION: FUNCTIONS ======================================================== #
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
def coerce_data_connector_type(
|
|
382
|
-
connector: DataConnectorType | str,
|
|
383
|
-
) -> DataConnectorType:
|
|
384
|
-
"""
|
|
385
|
-
Normalize textual data connector values to :class:`DataConnectorType`.
|
|
386
|
-
|
|
387
|
-
This thin wrapper is kept for backward compatibility; prefer
|
|
388
|
-
:meth:`DataConnectorType.coerce` going forward.
|
|
389
|
-
"""
|
|
390
|
-
return DataConnectorType.coerce(connector)
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
def coerce_file_format(
|
|
394
|
-
file_format: FileFormat | str,
|
|
395
|
-
) -> FileFormat:
|
|
396
|
-
"""
|
|
397
|
-
Normalize textual file format values to :class:`FileFormat`.
|
|
398
|
-
|
|
399
|
-
This thin wrapper is kept for backward compatibility; prefer
|
|
400
|
-
:meth:`FileFormat.coerce` going forward.
|
|
401
|
-
"""
|
|
402
|
-
return FileFormat.coerce(file_format)
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
def coerce_http_method(
|
|
406
|
-
http_method: HttpMethod | str,
|
|
407
|
-
) -> HttpMethod:
|
|
408
|
-
"""
|
|
409
|
-
Normalize textual HTTP method values to :class:`HttpMethod`.
|
|
410
|
-
|
|
411
|
-
This thin wrapper is kept for backward compatibility; prefer
|
|
412
|
-
:meth:`HttpMethod.coerce` going forward.
|
|
413
|
-
"""
|
|
414
|
-
return HttpMethod.coerce(http_method)
|
etlplus/extract.py
CHANGED
|
@@ -13,11 +13,9 @@ from typing import cast
|
|
|
13
13
|
import requests # type: ignore[import]
|
|
14
14
|
|
|
15
15
|
from .enums import DataConnectorType
|
|
16
|
-
from .enums import FileFormat
|
|
17
16
|
from .enums import HttpMethod
|
|
18
|
-
from .enums import coerce_data_connector_type
|
|
19
|
-
from .enums import coerce_file_format
|
|
20
17
|
from .file import File
|
|
18
|
+
from .file import FileFormat
|
|
21
19
|
from .types import JSONData
|
|
22
20
|
from .types import JSONDict
|
|
23
21
|
from .types import JSONList
|
|
@@ -55,7 +53,7 @@ def extract_from_file(
|
|
|
55
53
|
# If no explicit format is provided, let File infer from extension.
|
|
56
54
|
if file_format is None:
|
|
57
55
|
return File(path, None).read()
|
|
58
|
-
fmt =
|
|
56
|
+
fmt = FileFormat.coerce(file_format)
|
|
59
57
|
|
|
60
58
|
# Let file module perform existence and format validation.
|
|
61
59
|
return File(path, fmt).read()
|
|
@@ -202,7 +200,7 @@ def extract(
|
|
|
202
200
|
ValueError
|
|
203
201
|
If `source_type` is not one of the supported values.
|
|
204
202
|
"""
|
|
205
|
-
match
|
|
203
|
+
match DataConnectorType.coerce(source_type):
|
|
206
204
|
case DataConnectorType.FILE:
|
|
207
205
|
# Prefer explicit format if provided, else infer from filename.
|
|
208
206
|
return extract_from_file(source, file_format)
|
|
@@ -213,6 +211,6 @@ def extract(
|
|
|
213
211
|
# ``file_format`` is ignored for APIs.
|
|
214
212
|
return extract_from_api(str(source), **kwargs)
|
|
215
213
|
case _:
|
|
216
|
-
#
|
|
217
|
-
# explicit guard for defensive programming.
|
|
214
|
+
# :meth:`coerce` already raises for invalid connector types, but
|
|
215
|
+
# keep explicit guard for defensive programming.
|
|
218
216
|
raise ValueError(f'Invalid source type: {source_type}')
|
etlplus/file/__init__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file` package.
|
|
3
|
+
|
|
4
|
+
Public file IO helpers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from .core import File
|
|
10
|
+
from .enums import CompressionFormat
|
|
11
|
+
from .enums import FileFormat
|
|
12
|
+
from .enums import infer_file_format_and_compression
|
|
13
|
+
|
|
14
|
+
# SECTION: EXPORTS ========================================================== #
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
# Class
|
|
19
|
+
'File',
|
|
20
|
+
# Enums
|
|
21
|
+
'CompressionFormat',
|
|
22
|
+
'FileFormat',
|
|
23
|
+
# Functions
|
|
24
|
+
'infer_file_format_and_compression',
|
|
25
|
+
]
|
etlplus/file/core.py
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.core` module.
|
|
3
|
+
|
|
4
|
+
Shared helpers for reading and writing structured and semi-structured data
|
|
5
|
+
files.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from ..types import JSONData
|
|
14
|
+
from ..types import StrPath
|
|
15
|
+
from . import csv
|
|
16
|
+
from . import json
|
|
17
|
+
from . import xml
|
|
18
|
+
from . import yaml
|
|
19
|
+
from .enums import FileFormat
|
|
20
|
+
from .enums import infer_file_format_and_compression
|
|
21
|
+
|
|
22
|
+
# SECTION: EXPORTS ========================================================== #
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
__all__ = ['File']
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# SECTION: CLASSES ========================================================== #
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass(slots=True)
|
|
32
|
+
class File:
|
|
33
|
+
"""
|
|
34
|
+
Convenience wrapper around structured file IO.
|
|
35
|
+
|
|
36
|
+
This class encapsulates the one-off helpers in this module as convenient
|
|
37
|
+
instance methods while retaining the original function API for
|
|
38
|
+
backward compatibility (those functions delegate to this class).
|
|
39
|
+
|
|
40
|
+
Attributes
|
|
41
|
+
----------
|
|
42
|
+
path : Path
|
|
43
|
+
Path to the file on disk.
|
|
44
|
+
file_format : FileFormat | None, optional
|
|
45
|
+
Explicit format. If omitted, the format is inferred from the file
|
|
46
|
+
extension (``.csv``, ``.json``, or ``.xml``).
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
# -- Attributes -- #
|
|
50
|
+
|
|
51
|
+
path: Path
|
|
52
|
+
file_format: FileFormat | None = None
|
|
53
|
+
|
|
54
|
+
# -- Magic Methods (Object Lifecycle) -- #
|
|
55
|
+
|
|
56
|
+
def __post_init__(self) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Auto-detect and set the file format on initialization.
|
|
59
|
+
|
|
60
|
+
If no explicit ``file_format`` is provided, attempt to infer it from
|
|
61
|
+
the file path's extension and update :attr:`file_format`. If the
|
|
62
|
+
extension is unknown, the attribute is left as ``None`` and will be
|
|
63
|
+
validated later by :meth:`_ensure_format`.
|
|
64
|
+
"""
|
|
65
|
+
# Normalize incoming path (allow str in constructor) to Path.
|
|
66
|
+
if isinstance(self.path, str):
|
|
67
|
+
self.path = Path(self.path)
|
|
68
|
+
|
|
69
|
+
if self.file_format is None:
|
|
70
|
+
try:
|
|
71
|
+
self.file_format = self._guess_format()
|
|
72
|
+
except ValueError:
|
|
73
|
+
# Leave as None; _ensure_format() will raise on use if needed.
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
# -- Internal Instance Methods -- #
|
|
77
|
+
|
|
78
|
+
def _assert_exists(self) -> None:
|
|
79
|
+
"""
|
|
80
|
+
Raise FileNotFoundError if :attr:`path` does not exist.
|
|
81
|
+
|
|
82
|
+
This centralizes existence checks across multiple read methods.
|
|
83
|
+
"""
|
|
84
|
+
if not self.path.exists():
|
|
85
|
+
raise FileNotFoundError(f'File not found: {self.path}')
|
|
86
|
+
|
|
87
|
+
def _ensure_format(self) -> FileFormat:
|
|
88
|
+
"""
|
|
89
|
+
Resolve the active format, guessing from extension if needed.
|
|
90
|
+
|
|
91
|
+
Returns
|
|
92
|
+
-------
|
|
93
|
+
FileFormat
|
|
94
|
+
The resolved file format.
|
|
95
|
+
"""
|
|
96
|
+
return (
|
|
97
|
+
self.file_format
|
|
98
|
+
if self.file_format is not None
|
|
99
|
+
else self._guess_format()
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
def _guess_format(self) -> FileFormat:
|
|
103
|
+
"""
|
|
104
|
+
Infer the file format from the filename extension.
|
|
105
|
+
|
|
106
|
+
Returns
|
|
107
|
+
-------
|
|
108
|
+
FileFormat
|
|
109
|
+
The inferred file format based on the file extension.
|
|
110
|
+
|
|
111
|
+
Raises
|
|
112
|
+
------
|
|
113
|
+
ValueError
|
|
114
|
+
If the extension is unknown or unsupported.
|
|
115
|
+
"""
|
|
116
|
+
fmt, compression = infer_file_format_and_compression(self.path)
|
|
117
|
+
if fmt is not None:
|
|
118
|
+
return fmt
|
|
119
|
+
if compression is not None:
|
|
120
|
+
raise ValueError(
|
|
121
|
+
'Cannot infer file format from compressed file '
|
|
122
|
+
f'{self.path!r} with compression {compression.value!r}',
|
|
123
|
+
)
|
|
124
|
+
raise ValueError(
|
|
125
|
+
f'Cannot infer file format from extension {self.path.suffix!r}',
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# -- Instance Methods (Generic API) -- #
|
|
129
|
+
|
|
130
|
+
def read(self) -> JSONData:
|
|
131
|
+
"""
|
|
132
|
+
Read structured data from :attr:`path` using :attr:`file_format`.
|
|
133
|
+
|
|
134
|
+
Returns
|
|
135
|
+
-------
|
|
136
|
+
JSONData
|
|
137
|
+
The structured data read from the file.
|
|
138
|
+
|
|
139
|
+
Raises
|
|
140
|
+
------
|
|
141
|
+
ValueError
|
|
142
|
+
If the resolved file format is unsupported.
|
|
143
|
+
"""
|
|
144
|
+
self._assert_exists()
|
|
145
|
+
fmt = self._ensure_format()
|
|
146
|
+
match fmt:
|
|
147
|
+
case FileFormat.CSV:
|
|
148
|
+
return csv.read(self.path)
|
|
149
|
+
case FileFormat.JSON:
|
|
150
|
+
return json.read(self.path)
|
|
151
|
+
case FileFormat.XML:
|
|
152
|
+
return xml.read(self.path)
|
|
153
|
+
case FileFormat.YAML:
|
|
154
|
+
return yaml.read(self.path)
|
|
155
|
+
raise ValueError(f'Unsupported format: {fmt}')
|
|
156
|
+
|
|
157
|
+
def write(
|
|
158
|
+
self,
|
|
159
|
+
data: JSONData,
|
|
160
|
+
*,
|
|
161
|
+
root_tag: str = xml.DEFAULT_XML_ROOT,
|
|
162
|
+
) -> int:
|
|
163
|
+
"""
|
|
164
|
+
Write ``data`` to :attr:`path` using :attr:`file_format`.
|
|
165
|
+
|
|
166
|
+
Parameters
|
|
167
|
+
----------
|
|
168
|
+
data : JSONData
|
|
169
|
+
Data to write to the file.
|
|
170
|
+
root_tag : str, optional
|
|
171
|
+
Root tag name to use when writing XML files. Defaults to
|
|
172
|
+
``'root'``.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
int
|
|
177
|
+
The number of records written.
|
|
178
|
+
|
|
179
|
+
Raises
|
|
180
|
+
------
|
|
181
|
+
ValueError
|
|
182
|
+
If the resolved file format is unsupported.
|
|
183
|
+
"""
|
|
184
|
+
fmt = self._ensure_format()
|
|
185
|
+
match fmt:
|
|
186
|
+
case FileFormat.CSV:
|
|
187
|
+
return csv.write(self.path, data)
|
|
188
|
+
case FileFormat.JSON:
|
|
189
|
+
return json.write(self.path, data)
|
|
190
|
+
case FileFormat.XML:
|
|
191
|
+
return xml.write(self.path, data, root_tag=root_tag)
|
|
192
|
+
case FileFormat.YAML:
|
|
193
|
+
return yaml.write(self.path, data)
|
|
194
|
+
raise ValueError(f'Unsupported format: {fmt}')
|
|
195
|
+
|
|
196
|
+
# -- Class Methods -- #
|
|
197
|
+
|
|
198
|
+
@classmethod
|
|
199
|
+
def from_path(
|
|
200
|
+
cls,
|
|
201
|
+
path: StrPath,
|
|
202
|
+
*,
|
|
203
|
+
file_format: FileFormat | str | None = None,
|
|
204
|
+
) -> File:
|
|
205
|
+
"""
|
|
206
|
+
Create a :class:`File` from any path-like and optional format.
|
|
207
|
+
|
|
208
|
+
Parameters
|
|
209
|
+
----------
|
|
210
|
+
path : StrPath
|
|
211
|
+
Path to the file on disk.
|
|
212
|
+
file_format : FileFormat | str | None, optional
|
|
213
|
+
Explicit format. If omitted, the format is inferred from the file
|
|
214
|
+
extension (``.csv``, ``.json``, or ``.xml``).
|
|
215
|
+
|
|
216
|
+
Returns
|
|
217
|
+
-------
|
|
218
|
+
File
|
|
219
|
+
The constructed :class:`File` instance.
|
|
220
|
+
"""
|
|
221
|
+
resolved = Path(path)
|
|
222
|
+
ff: FileFormat | None
|
|
223
|
+
if isinstance(file_format, str):
|
|
224
|
+
ff = FileFormat.coerce(file_format)
|
|
225
|
+
else:
|
|
226
|
+
ff = file_format
|
|
227
|
+
|
|
228
|
+
return cls(resolved, ff)
|
|
229
|
+
|
|
230
|
+
@classmethod
|
|
231
|
+
def read_file(
|
|
232
|
+
cls,
|
|
233
|
+
path: StrPath,
|
|
234
|
+
file_format: FileFormat | str | None = None,
|
|
235
|
+
) -> JSONData:
|
|
236
|
+
"""
|
|
237
|
+
Read structured data.
|
|
238
|
+
|
|
239
|
+
Parameters
|
|
240
|
+
----------
|
|
241
|
+
path : StrPath
|
|
242
|
+
Path to the file on disk.
|
|
243
|
+
file_format : FileFormat | str | None, optional
|
|
244
|
+
Explicit format. If omitted, the format is inferred from the file
|
|
245
|
+
extension (``.csv``, ``.json``, or ``.xml``).
|
|
246
|
+
|
|
247
|
+
Returns
|
|
248
|
+
-------
|
|
249
|
+
JSONData
|
|
250
|
+
The structured data read from the file.
|
|
251
|
+
"""
|
|
252
|
+
return cls.from_path(path, file_format=file_format).read()
|
|
253
|
+
|
|
254
|
+
@classmethod
|
|
255
|
+
def write_file(
|
|
256
|
+
cls,
|
|
257
|
+
path: StrPath,
|
|
258
|
+
data: JSONData,
|
|
259
|
+
file_format: FileFormat | str | None = None,
|
|
260
|
+
*,
|
|
261
|
+
root_tag: str = xml.DEFAULT_XML_ROOT,
|
|
262
|
+
) -> int:
|
|
263
|
+
"""
|
|
264
|
+
Write structured data and count written records.
|
|
265
|
+
|
|
266
|
+
Parameters
|
|
267
|
+
----------
|
|
268
|
+
path : StrPath
|
|
269
|
+
Path to the file on disk.
|
|
270
|
+
data : JSONData
|
|
271
|
+
Data to write to the file.
|
|
272
|
+
file_format : FileFormat | str | None, optional
|
|
273
|
+
Explicit format. If omitted, the format is inferred from the file
|
|
274
|
+
extension (``.csv``, ``.json``, or ``.xml``).
|
|
275
|
+
root_tag : str, optional
|
|
276
|
+
Root tag name to use when writing XML files. Defaults to
|
|
277
|
+
``'root'``.
|
|
278
|
+
|
|
279
|
+
Returns
|
|
280
|
+
-------
|
|
281
|
+
int
|
|
282
|
+
The number of records written to the file.
|
|
283
|
+
"""
|
|
284
|
+
return cls.from_path(path, file_format=file_format).write(
|
|
285
|
+
data,
|
|
286
|
+
root_tag=root_tag,
|
|
287
|
+
)
|
etlplus/file/csv.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.csv` module.
|
|
3
|
+
|
|
4
|
+
CSV read/write helpers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import csv
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import cast
|
|
12
|
+
|
|
13
|
+
from ..types import JSONData
|
|
14
|
+
from ..types import JSONDict
|
|
15
|
+
from ..types import JSONList
|
|
16
|
+
|
|
17
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def read(
|
|
21
|
+
path: Path,
|
|
22
|
+
) -> JSONList:
|
|
23
|
+
"""
|
|
24
|
+
Load CSV content as a list of dictionaries.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
path : Path
|
|
29
|
+
Path to the CSV file on disk.
|
|
30
|
+
|
|
31
|
+
Returns
|
|
32
|
+
-------
|
|
33
|
+
JSONList
|
|
34
|
+
The list of dictionaries read from the CSV file.
|
|
35
|
+
"""
|
|
36
|
+
with path.open('r', encoding='utf-8', newline='') as handle:
|
|
37
|
+
reader: csv.DictReader[str] = csv.DictReader(handle)
|
|
38
|
+
rows: JSONList = []
|
|
39
|
+
for row in reader:
|
|
40
|
+
if not any(row.values()):
|
|
41
|
+
continue
|
|
42
|
+
rows.append(cast(JSONDict, dict(row)))
|
|
43
|
+
return rows
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def write(
|
|
47
|
+
path: Path,
|
|
48
|
+
data: JSONData,
|
|
49
|
+
) -> int:
|
|
50
|
+
"""
|
|
51
|
+
Write CSV rows to ``path`` and return the number of rows.
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
path : Path
|
|
56
|
+
Path to the CSV file on disk.
|
|
57
|
+
data : JSONData
|
|
58
|
+
Data to write as CSV. Should be a list of dictionaries or a
|
|
59
|
+
single dictionary.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
int
|
|
64
|
+
The number of rows written to the CSV file.
|
|
65
|
+
"""
|
|
66
|
+
rows: list[JSONDict]
|
|
67
|
+
if isinstance(data, list):
|
|
68
|
+
rows = [row for row in data if isinstance(row, dict)]
|
|
69
|
+
else:
|
|
70
|
+
rows = [data]
|
|
71
|
+
|
|
72
|
+
if not rows:
|
|
73
|
+
return 0
|
|
74
|
+
|
|
75
|
+
fieldnames = sorted({key for row in rows for key in row})
|
|
76
|
+
with path.open('w', encoding='utf-8', newline='') as handle:
|
|
77
|
+
writer = csv.DictWriter(handle, fieldnames=fieldnames)
|
|
78
|
+
writer.writeheader()
|
|
79
|
+
for row in rows:
|
|
80
|
+
writer.writerow({field: row.get(field) for field in fieldnames})
|
|
81
|
+
|
|
82
|
+
return len(rows)
|