etlplus 0.16.9__py3-none-any.whl → 0.17.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/file/enums.py CHANGED
@@ -199,19 +199,48 @@ class FileFormat(CoercibleStrEnum):
199
199
  'yml': 'yaml',
200
200
  # File extensions
201
201
  '.avro': 'avro',
202
+ '.arrow': 'arrow',
202
203
  '.csv': 'csv',
204
+ '.duckdb': 'duckdb',
205
+ '.dat': 'dat',
203
206
  '.feather': 'feather',
207
+ '.fwf': 'fwf',
204
208
  '.gz': 'gz',
209
+ '.hdf': 'hdf5',
210
+ '.hdf5': 'hdf5',
211
+ '.h5': 'hdf5',
212
+ '.ini': 'ini',
205
213
  '.json': 'json',
206
214
  '.jsonl': 'ndjson',
215
+ '.bson': 'bson',
216
+ '.cbor': 'cbor',
217
+ '.msgpack': 'msgpack',
207
218
  '.ndjson': 'ndjson',
219
+ '.ods': 'ods',
208
220
  '.orc': 'orc',
209
221
  '.parquet': 'parquet',
210
222
  '.pq': 'parquet',
223
+ '.pb': 'pb',
224
+ '.proto': 'proto',
225
+ '.psv': 'psv',
226
+ '.sqlite': 'sqlite',
227
+ '.sqlite3': 'sqlite',
211
228
  '.stub': 'stub',
229
+ '.tab': 'tab',
230
+ '.dta': 'dta',
231
+ '.sas7bdat': 'sas7bdat',
232
+ '.xpt': 'xpt',
233
+ '.rds': 'rds',
234
+ '.rda': 'rda',
235
+ '.nc': 'nc',
236
+ '.sav': 'sav',
237
+ '.properties': 'properties',
238
+ '.prop': 'properties',
239
+ '.toml': 'toml',
212
240
  '.tsv': 'tsv',
213
241
  '.txt': 'txt',
214
242
  '.xls': 'xls',
243
+ '.xlsm': 'xlsm',
215
244
  '.xlsx': 'xlsx',
216
245
  '.zip': 'zip',
217
246
  '.xml': 'xml',
etlplus/file/fwf.py CHANGED
@@ -1,8 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.fwf` module.
3
3
 
4
- Stub helpers for reading/writing Fixed-Width Fields (FWF) files (not
5
- implemented yet).
4
+ Helpers for reading/writing Fixed-Width Fields (FWF) files.
6
5
 
7
6
  Notes
8
7
  -----
@@ -19,10 +18,13 @@ Notes
19
18
  from __future__ import annotations
20
19
 
21
20
  from pathlib import Path
21
+ from typing import Any
22
+ from typing import cast
22
23
 
23
24
  from ..types import JSONData
24
25
  from ..types import JSONList
25
- from . import stub
26
+ from ._imports import get_pandas
27
+ from ._io import normalize_records
26
28
 
27
29
  # SECTION: EXPORTS ========================================================== #
28
30
 
@@ -53,7 +55,9 @@ def read(
53
55
  JSONList
54
56
  The list of dictionaries read from the FWF file.
55
57
  """
56
- return stub.read(path, format_name='FWF')
58
+ pandas = get_pandas('FWF')
59
+ frame = pandas.read_fwf(path)
60
+ return cast(JSONList, frame.to_dict(orient='records'))
57
61
 
58
62
 
59
63
  def write(
@@ -76,4 +80,32 @@ def write(
76
80
  int
77
81
  The number of rows written to the FWF file.
78
82
  """
79
- return stub.write(path, data, format_name='FWF')
83
+ records = normalize_records(data, 'FWF')
84
+ if not records:
85
+ return 0
86
+
87
+ fieldnames = sorted({key for row in records for key in row})
88
+ if not fieldnames:
89
+ return 0
90
+
91
+ def stringify(value: Any) -> str:
92
+ if value is None:
93
+ return ''
94
+ return str(value)
95
+
96
+ widths: dict[str, int] = {name: len(name) for name in fieldnames}
97
+ for row in records:
98
+ for name in fieldnames:
99
+ widths[name] = max(widths[name], len(stringify(row.get(name))))
100
+
101
+ path.parent.mkdir(parents=True, exist_ok=True)
102
+ with path.open('w', encoding='utf-8', newline='') as handle:
103
+ header = ' '.join(name.ljust(widths[name]) for name in fieldnames)
104
+ handle.write(header + '\n')
105
+ for row in records:
106
+ line = ' '.join(
107
+ stringify(row.get(name)).ljust(widths[name])
108
+ for name in fieldnames
109
+ )
110
+ handle.write(line + '\n')
111
+ return len(records)
etlplus/file/hdf5.py CHANGED
@@ -1,8 +1,8 @@
1
1
  """
2
2
  :mod:`etlplus.file.hdf5` module.
3
3
 
4
- Stub helpers for reading/writing Hierarchical Data Format (HDF5) files (not
5
- implemented yet).
4
+ Helpers for reading Hierarchical Data Format (HDF5) files. Stub helpers for
5
+ writing such files (not implemented yet).
6
6
 
7
7
  Notes
8
8
  -----
@@ -20,10 +20,12 @@ Notes
20
20
  from __future__ import annotations
21
21
 
22
22
  from pathlib import Path
23
+ from typing import cast
23
24
 
24
25
  from ..types import JSONData
25
26
  from ..types import JSONList
26
27
  from . import stub
28
+ from ._imports import get_pandas
27
29
 
28
30
  # SECTION: EXPORTS ========================================================== #
29
31
 
@@ -35,6 +37,22 @@ __all__ = [
35
37
  ]
36
38
 
37
39
 
40
+ # SECTION: INTERNAL CONSTANTS ============================================== #
41
+
42
+
43
+ DEFAULT_KEY = 'data'
44
+
45
+
46
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
47
+
48
+
49
+ def _raise_tables_error(err: ImportError) -> None:
50
+ raise ImportError(
51
+ 'HDF5 support requires optional dependency "tables".\n'
52
+ 'Install with: pip install tables',
53
+ ) from err
54
+
55
+
38
56
  # SECTION: FUNCTIONS ======================================================== #
39
57
 
40
58
 
@@ -54,7 +72,27 @@ def read(
54
72
  JSONList
55
73
  The list of dictionaries read from the HDF5 file.
56
74
  """
57
- return stub.read(path, format_name='HDF5')
75
+ pandas = get_pandas('HDF5')
76
+ try:
77
+ store = pandas.HDFStore(path)
78
+ except ImportError as err: # pragma: no cover
79
+ _raise_tables_error(err)
80
+
81
+ with store:
82
+ keys = [key.lstrip('/') for key in store.keys()]
83
+ if not keys:
84
+ return []
85
+ if DEFAULT_KEY in keys:
86
+ key = DEFAULT_KEY
87
+ elif len(keys) == 1:
88
+ key = keys[0]
89
+ else:
90
+ raise ValueError(
91
+ 'Multiple datasets found in HDF5 file; expected "data" or '
92
+ 'a single dataset',
93
+ )
94
+ frame = store.get(key)
95
+ return cast(JSONList, frame.to_dict(orient='records'))
58
96
 
59
97
 
60
98
  def write(
etlplus/file/ini.py CHANGED
@@ -1,8 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.ini` module.
3
3
 
4
- Stub helpers for reading/writing initialization (INI) files (not implemented
5
- yet).
4
+ Helpers for reading/writing initialization (INI) files.
6
5
 
7
6
  Notes
8
7
  -----
@@ -20,11 +19,12 @@ Notes
20
19
 
21
20
  from __future__ import annotations
22
21
 
22
+ import configparser
23
23
  from pathlib import Path
24
+ from typing import Any
24
25
 
25
26
  from ..types import JSONData
26
- from ..types import JSONList
27
- from . import stub
27
+ from ..types import JSONDict
28
28
 
29
29
  # SECTION: EXPORTS ========================================================== #
30
30
 
@@ -36,12 +36,22 @@ __all__ = [
36
36
  ]
37
37
 
38
38
 
39
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
40
+
41
+
42
+ def _stringify(value: Any) -> str:
43
+ """Normalize INI values into strings."""
44
+ if value is None:
45
+ return ''
46
+ return str(value)
47
+
48
+
39
49
  # SECTION: FUNCTIONS ======================================================== #
40
50
 
41
51
 
42
52
  def read(
43
53
  path: Path,
44
- ) -> JSONList:
54
+ ) -> JSONData:
45
55
  """
46
56
  Read INI content from *path*.
47
57
 
@@ -52,10 +62,22 @@ def read(
52
62
 
53
63
  Returns
54
64
  -------
55
- JSONList
56
- The list of dictionaries read from the INI file.
65
+ JSONData
66
+ The structured data read from the INI file.
57
67
  """
58
- return stub.read(path, format_name='INI')
68
+ parser = configparser.ConfigParser()
69
+ parser.read(path, encoding='utf-8')
70
+
71
+ payload: JSONDict = {}
72
+ if parser.defaults():
73
+ payload['DEFAULT'] = dict(parser.defaults())
74
+ defaults = dict(parser.defaults())
75
+ for section in parser.sections():
76
+ raw_section = dict(parser.items(section))
77
+ for key in defaults:
78
+ raw_section.pop(key, None)
79
+ payload[section] = raw_section
80
+ return payload
59
81
 
60
82
 
61
83
  def write(
@@ -70,12 +92,40 @@ def write(
70
92
  path : Path
71
93
  Path to the INI file on disk.
72
94
  data : JSONData
73
- Data to write as INI. Should be a list of dictionaries or a
74
- single dictionary.
95
+ Data to write as INI. Should be a dictionary.
75
96
 
76
97
  Returns
77
98
  -------
78
99
  int
79
- The number of rows written to the INI file.
100
+ The number of records written to the INI file.
101
+
102
+ Raises
103
+ ------
104
+ TypeError
105
+ If *data* is not a dictionary.
80
106
  """
81
- return stub.write(path, data, format_name='INI')
107
+ if isinstance(data, list):
108
+ raise TypeError('INI payloads must be a dict')
109
+ if not isinstance(data, dict):
110
+ raise TypeError('INI payloads must be a dict')
111
+
112
+ parser = configparser.ConfigParser()
113
+ for section, values in data.items():
114
+ if section == 'DEFAULT':
115
+ if isinstance(values, dict):
116
+ parser['DEFAULT'] = {
117
+ key: _stringify(value) for key, value in values.items()
118
+ }
119
+ else:
120
+ raise TypeError('INI DEFAULT section must be a dict')
121
+ continue
122
+ if not isinstance(values, dict):
123
+ raise TypeError('INI sections must map to dicts')
124
+ parser[section] = {
125
+ key: _stringify(value) for key, value in values.items()
126
+ }
127
+
128
+ path.parent.mkdir(parents=True, exist_ok=True)
129
+ with path.open('w', encoding='utf-8', newline='') as handle:
130
+ parser.write(handle)
131
+ return 1
etlplus/file/msgpack.py CHANGED
@@ -1,8 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.msgpack` module.
3
3
 
4
- Stub helpers for reading/writing MessagePack (MSGPACK) files (not implemented
5
- yet).
4
+ Helpers for reading/writing MessagePack (MSGPACK) files.
6
5
 
7
6
  Notes
8
7
  -----
@@ -20,10 +19,12 @@ Notes
20
19
  from __future__ import annotations
21
20
 
22
21
  from pathlib import Path
22
+ from typing import Any
23
23
 
24
24
  from ..types import JSONData
25
- from ..types import JSONList
26
- from . import stub
25
+ from ._imports import get_optional_module
26
+ from ._io import coerce_record_payload
27
+ from ._io import normalize_records
27
28
 
28
29
  # SECTION: EXPORTS ========================================================== #
29
30
 
@@ -35,12 +36,26 @@ __all__ = [
35
36
  ]
36
37
 
37
38
 
39
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
40
+
41
+
42
+ def _get_msgpack() -> Any:
43
+ """Return the msgpack module, importing it on first use."""
44
+ return get_optional_module(
45
+ 'msgpack',
46
+ error_message=(
47
+ 'MSGPACK support requires optional dependency "msgpack".\n'
48
+ 'Install with: pip install msgpack'
49
+ ),
50
+ )
51
+
52
+
38
53
  # SECTION: FUNCTIONS ======================================================== #
39
54
 
40
55
 
41
56
  def read(
42
57
  path: Path,
43
- ) -> JSONList:
58
+ ) -> JSONData:
44
59
  """
45
60
  Read MsgPack content from *path*.
46
61
 
@@ -51,10 +66,13 @@ def read(
51
66
 
52
67
  Returns
53
68
  -------
54
- JSONList
55
- The list of dictionaries read from the MsgPack file.
69
+ JSONData
70
+ The structured data read from the MsgPack file.
56
71
  """
57
- return stub.read(path, format_name='MSGPACK')
72
+ msgpack = _get_msgpack()
73
+ with path.open('rb') as handle:
74
+ payload = msgpack.unpackb(handle.read(), raw=False)
75
+ return coerce_record_payload(payload, format_name='MSGPACK')
58
76
 
59
77
 
60
78
  def write(
@@ -77,4 +95,10 @@ def write(
77
95
  int
78
96
  The number of rows written to the MsgPack file.
79
97
  """
80
- return stub.write(path, data, format_name='MSGPACK')
98
+ msgpack = _get_msgpack()
99
+ records = normalize_records(data, 'MSGPACK')
100
+ payload: JSONData = records if isinstance(data, list) else records[0]
101
+ path.parent.mkdir(parents=True, exist_ok=True)
102
+ with path.open('wb') as handle:
103
+ handle.write(msgpack.packb(payload, use_bin_type=True))
104
+ return len(records)
etlplus/file/nc.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.nc` module.
3
3
 
4
- Stub helpers for reading/writing NetCDF (NC) data files (not implemented yet).
4
+ Helpers for reading/writing NetCDF (NC) data files.
5
5
 
6
6
  Notes
7
7
  -----
@@ -12,17 +12,21 @@ Notes
12
12
  - Sharing large datasets in research communities.
13
13
  - Efficient data access and manipulation.
14
14
  - Rule of thumb:
15
- - If the file follows the NetCDF standard, use this module for
16
- reading and writing.
15
+ - If the file follows the NetCDF standard, use this module for reading and
16
+ writing.
17
17
  """
18
18
 
19
19
  from __future__ import annotations
20
20
 
21
21
  from pathlib import Path
22
+ from typing import Any
23
+ from typing import cast
22
24
 
23
25
  from ..types import JSONData
24
26
  from ..types import JSONList
25
- from . import stub
27
+ from ._imports import get_optional_module
28
+ from ._imports import get_pandas
29
+ from ._io import normalize_records
26
30
 
27
31
  # SECTION: EXPORTS ========================================================== #
28
32
 
@@ -34,6 +38,27 @@ __all__ = [
34
38
  ]
35
39
 
36
40
 
41
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
42
+
43
+
44
+ def _get_xarray() -> Any:
45
+ """Return the xarray module, importing it on first use."""
46
+ return get_optional_module(
47
+ 'xarray',
48
+ error_message=(
49
+ 'NC support requires optional dependency "xarray".\n'
50
+ 'Install with: pip install xarray'
51
+ ),
52
+ )
53
+
54
+
55
+ def _raise_engine_error(err: ImportError) -> None:
56
+ raise ImportError(
57
+ 'NC support requires optional dependency "netCDF4" or "h5netcdf".\n'
58
+ 'Install with: pip install netCDF4',
59
+ ) from err
60
+
61
+
37
62
  # SECTION: FUNCTIONS ======================================================== #
38
63
 
39
64
 
@@ -53,7 +78,18 @@ def read(
53
78
  JSONList
54
79
  The list of dictionaries read from the NC file.
55
80
  """
56
- return stub.read(path, format_name='NC')
81
+ xarray = _get_xarray()
82
+ try:
83
+ dataset = xarray.open_dataset(path)
84
+ except ImportError as err: # pragma: no cover
85
+ _raise_engine_error(err)
86
+ with dataset:
87
+ frame = dataset.to_dataframe().reset_index()
88
+ if 'index' in frame.columns:
89
+ values = list(frame['index'])
90
+ if values == list(range(len(values))):
91
+ frame = frame.drop(columns=['index'])
92
+ return cast(JSONList, frame.to_dict(orient='records'))
57
93
 
58
94
 
59
95
  def write(
@@ -76,4 +112,17 @@ def write(
76
112
  int
77
113
  The number of rows written to the NC file.
78
114
  """
79
- return stub.write(path, data, format_name='NC')
115
+ records = normalize_records(data, 'NC')
116
+ if not records:
117
+ return 0
118
+
119
+ xarray = _get_xarray()
120
+ pandas = get_pandas('NC')
121
+ frame = pandas.DataFrame.from_records(records)
122
+ dataset = xarray.Dataset.from_dataframe(frame)
123
+ path.parent.mkdir(parents=True, exist_ok=True)
124
+ try:
125
+ dataset.to_netcdf(path)
126
+ except ImportError as err: # pragma: no cover
127
+ _raise_engine_error(err)
128
+ return len(records)
etlplus/file/ods.py CHANGED
@@ -1,8 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.ods` module.
3
3
 
4
- Stub helpers for reading/writing OpenDocument (ODS) spreadsheet files (not
5
- implemented yet).
4
+ Helpers for reading/writing OpenDocument (ODS) spreadsheet files.
6
5
 
7
6
  Notes
8
7
  -----
@@ -21,10 +20,12 @@ Notes
21
20
  from __future__ import annotations
22
21
 
23
22
  from pathlib import Path
23
+ from typing import cast
24
24
 
25
25
  from ..types import JSONData
26
26
  from ..types import JSONList
27
- from . import stub
27
+ from ._imports import get_pandas
28
+ from ._io import normalize_records
28
29
 
29
30
  # SECTION: EXPORTS ========================================================== #
30
31
 
@@ -54,8 +55,21 @@ def read(
54
55
  -------
55
56
  JSONList
56
57
  The list of dictionaries read from the ODS file.
58
+
59
+ Raises
60
+ ------
61
+ ImportError
62
+ If optional dependencies for ODS support are missing.
57
63
  """
58
- return stub.read(path, format_name='ODS')
64
+ pandas = get_pandas('ODS')
65
+ try:
66
+ frame = pandas.read_excel(path, engine='odf')
67
+ except ImportError as err: # pragma: no cover
68
+ raise ImportError(
69
+ 'ODS support requires optional dependency "odfpy".\n'
70
+ 'Install with: pip install odfpy',
71
+ ) from err
72
+ return cast(JSONList, frame.to_dict(orient='records'))
59
73
 
60
74
 
61
75
  def write(
@@ -70,12 +84,31 @@ def write(
70
84
  path : Path
71
85
  Path to the ODS file on disk.
72
86
  data : JSONData
73
- Data to write as ODS file. Should be a list of dictionaries or a
87
+ Data to write as ODS. Should be a list of dictionaries or a
74
88
  single dictionary.
75
89
 
76
90
  Returns
77
91
  -------
78
92
  int
79
93
  The number of rows written to the ODS file.
94
+
95
+ Raises
96
+ ------
97
+ ImportError
98
+ If optional dependencies for ODS support are missing.
80
99
  """
81
- return stub.write(path, data, format_name='ODS')
100
+ records = normalize_records(data, 'ODS')
101
+ if not records:
102
+ return 0
103
+
104
+ pandas = get_pandas('ODS')
105
+ path.parent.mkdir(parents=True, exist_ok=True)
106
+ frame = pandas.DataFrame.from_records(records)
107
+ try:
108
+ frame.to_excel(path, index=False, engine='odf')
109
+ except ImportError as err: # pragma: no cover
110
+ raise ImportError(
111
+ 'ODS support requires optional dependency "odfpy".\n'
112
+ 'Install with: pip install odfpy',
113
+ ) from err
114
+ return len(records)
etlplus/file/pb.py CHANGED
@@ -1,29 +1,24 @@
1
1
  """
2
2
  :mod:`etlplus.file.pb` module.
3
3
 
4
- Stub helpers for reading/writing Protocol Buffer (PB) files (not implemented
5
- yet).
4
+ Helpers for reading/writing Protocol Buffers binary (PB) files.
6
5
 
7
6
  Notes
8
7
  -----
9
- - PB (a.k.a. Protobuff) is a binary serialization format developed by Google
10
- for structured data.
8
+ - A PB file contains Protocol Buffers (Protobuff) binary-encoded messages.
11
9
  - Common cases:
12
- - Data interchange between services.
13
- - Efficient storage of structured data.
14
- - Communication in distributed systems.
10
+ - Serialized payloads emitted by services or SDKs.
11
+ - Binary payload dumps for debugging or transport.
15
12
  - Rule of thumb:
16
- - If the file follows the Protocol Buffer specification, use this module
17
- for reading and writing.
13
+ - Use this module when you need to store or transport raw protobuf bytes.
18
14
  """
19
15
 
20
16
  from __future__ import annotations
21
17
 
18
+ import base64
22
19
  from pathlib import Path
23
20
 
24
21
  from ..types import JSONData
25
- from ..types import JSONList
26
- from . import stub
27
22
 
28
23
  # SECTION: EXPORTS ========================================================== #
29
24
 
@@ -40,7 +35,7 @@ __all__ = [
40
35
 
41
36
  def read(
42
37
  path: Path,
43
- ) -> JSONList:
38
+ ) -> JSONData:
44
39
  """
45
40
  Read PB content from *path*.
46
41
 
@@ -51,10 +46,12 @@ def read(
51
46
 
52
47
  Returns
53
48
  -------
54
- JSONList
55
- The list of dictionaries read from the PB file.
49
+ JSONData
50
+ The structured data read from the PB file.
56
51
  """
57
- return stub.read(path, format_name='PB')
52
+ payload = path.read_bytes()
53
+ encoded = base64.b64encode(payload).decode('ascii')
54
+ return {'payload_base64': encoded}
58
55
 
59
56
 
60
57
  def write(
@@ -69,12 +66,28 @@ def write(
69
66
  path : Path
70
67
  Path to the PB file on disk.
71
68
  data : JSONData
72
- Data to write as PB. Should be a list of dictionaries or a
73
- single dictionary.
69
+ Data to write as PB. Should be a dictionary with ``payload_base64``.
74
70
 
75
71
  Returns
76
72
  -------
77
73
  int
78
- The number of rows written to the PB file.
74
+ The number of records written to the PB file.
75
+
76
+ Raises
77
+ ------
78
+ TypeError
79
+ If *data* is not a dictionary or missing ``payload_base64``.
79
80
  """
80
- return stub.write(path, data, format_name='PB')
81
+ if isinstance(data, list):
82
+ raise TypeError('PB payloads must be a dict')
83
+ if not isinstance(data, dict):
84
+ raise TypeError('PB payloads must be a dict')
85
+
86
+ payload_base64 = data.get('payload_base64')
87
+ if not isinstance(payload_base64, str):
88
+ raise TypeError('PB payloads must include a "payload_base64" string')
89
+
90
+ payload = base64.b64decode(payload_base64.encode('ascii'))
91
+ path.parent.mkdir(parents=True, exist_ok=True)
92
+ path.write_bytes(payload)
93
+ return 1