etlplus 0.16.9__py3-none-any.whl → 0.17.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/file/README.md +33 -0
- etlplus/file/arrow.py +35 -5
- etlplus/file/bson.py +50 -5
- etlplus/file/cbor.py +35 -11
- etlplus/file/dat.py +44 -6
- etlplus/file/dta.py +46 -11
- etlplus/file/duckdb.py +159 -5
- etlplus/file/enums.py +29 -0
- etlplus/file/fwf.py +37 -5
- etlplus/file/hdf5.py +41 -3
- etlplus/file/ini.py +62 -12
- etlplus/file/msgpack.py +33 -9
- etlplus/file/nc.py +55 -6
- etlplus/file/ods.py +39 -6
- etlplus/file/pb.py +32 -19
- etlplus/file/properties.py +52 -12
- etlplus/file/proto.py +24 -12
- etlplus/file/psv.py +5 -5
- etlplus/file/rda.py +83 -9
- etlplus/file/rds.py +76 -8
- etlplus/file/sas7bdat.py +41 -11
- etlplus/file/sav.py +40 -11
- etlplus/file/sqlite.py +123 -5
- etlplus/file/tab.py +6 -7
- etlplus/file/toml.py +54 -12
- etlplus/file/xlsm.py +39 -5
- etlplus/file/xpt.py +61 -5
- {etlplus-0.16.9.dist-info → etlplus-0.17.2.dist-info}/METADATA +44 -26
- {etlplus-0.16.9.dist-info → etlplus-0.17.2.dist-info}/RECORD +33 -33
- {etlplus-0.16.9.dist-info → etlplus-0.17.2.dist-info}/WHEEL +0 -0
- {etlplus-0.16.9.dist-info → etlplus-0.17.2.dist-info}/entry_points.txt +0 -0
- {etlplus-0.16.9.dist-info → etlplus-0.17.2.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.16.9.dist-info → etlplus-0.17.2.dist-info}/top_level.txt +0 -0
etlplus/file/enums.py
CHANGED
|
@@ -199,19 +199,48 @@ class FileFormat(CoercibleStrEnum):
|
|
|
199
199
|
'yml': 'yaml',
|
|
200
200
|
# File extensions
|
|
201
201
|
'.avro': 'avro',
|
|
202
|
+
'.arrow': 'arrow',
|
|
202
203
|
'.csv': 'csv',
|
|
204
|
+
'.duckdb': 'duckdb',
|
|
205
|
+
'.dat': 'dat',
|
|
203
206
|
'.feather': 'feather',
|
|
207
|
+
'.fwf': 'fwf',
|
|
204
208
|
'.gz': 'gz',
|
|
209
|
+
'.hdf': 'hdf5',
|
|
210
|
+
'.hdf5': 'hdf5',
|
|
211
|
+
'.h5': 'hdf5',
|
|
212
|
+
'.ini': 'ini',
|
|
205
213
|
'.json': 'json',
|
|
206
214
|
'.jsonl': 'ndjson',
|
|
215
|
+
'.bson': 'bson',
|
|
216
|
+
'.cbor': 'cbor',
|
|
217
|
+
'.msgpack': 'msgpack',
|
|
207
218
|
'.ndjson': 'ndjson',
|
|
219
|
+
'.ods': 'ods',
|
|
208
220
|
'.orc': 'orc',
|
|
209
221
|
'.parquet': 'parquet',
|
|
210
222
|
'.pq': 'parquet',
|
|
223
|
+
'.pb': 'pb',
|
|
224
|
+
'.proto': 'proto',
|
|
225
|
+
'.psv': 'psv',
|
|
226
|
+
'.sqlite': 'sqlite',
|
|
227
|
+
'.sqlite3': 'sqlite',
|
|
211
228
|
'.stub': 'stub',
|
|
229
|
+
'.tab': 'tab',
|
|
230
|
+
'.dta': 'dta',
|
|
231
|
+
'.sas7bdat': 'sas7bdat',
|
|
232
|
+
'.xpt': 'xpt',
|
|
233
|
+
'.rds': 'rds',
|
|
234
|
+
'.rda': 'rda',
|
|
235
|
+
'.nc': 'nc',
|
|
236
|
+
'.sav': 'sav',
|
|
237
|
+
'.properties': 'properties',
|
|
238
|
+
'.prop': 'properties',
|
|
239
|
+
'.toml': 'toml',
|
|
212
240
|
'.tsv': 'tsv',
|
|
213
241
|
'.txt': 'txt',
|
|
214
242
|
'.xls': 'xls',
|
|
243
|
+
'.xlsm': 'xlsm',
|
|
215
244
|
'.xlsx': 'xlsx',
|
|
216
245
|
'.zip': 'zip',
|
|
217
246
|
'.xml': 'xml',
|
etlplus/file/fwf.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.fwf` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
implemented yet).
|
|
4
|
+
Helpers for reading/writing Fixed-Width Fields (FWF) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -19,10 +18,13 @@ Notes
|
|
|
19
18
|
from __future__ import annotations
|
|
20
19
|
|
|
21
20
|
from pathlib import Path
|
|
21
|
+
from typing import Any
|
|
22
|
+
from typing import cast
|
|
22
23
|
|
|
23
24
|
from ..types import JSONData
|
|
24
25
|
from ..types import JSONList
|
|
25
|
-
from . import
|
|
26
|
+
from ._imports import get_pandas
|
|
27
|
+
from ._io import normalize_records
|
|
26
28
|
|
|
27
29
|
# SECTION: EXPORTS ========================================================== #
|
|
28
30
|
|
|
@@ -53,7 +55,9 @@ def read(
|
|
|
53
55
|
JSONList
|
|
54
56
|
The list of dictionaries read from the FWF file.
|
|
55
57
|
"""
|
|
56
|
-
|
|
58
|
+
pandas = get_pandas('FWF')
|
|
59
|
+
frame = pandas.read_fwf(path)
|
|
60
|
+
return cast(JSONList, frame.to_dict(orient='records'))
|
|
57
61
|
|
|
58
62
|
|
|
59
63
|
def write(
|
|
@@ -76,4 +80,32 @@ def write(
|
|
|
76
80
|
int
|
|
77
81
|
The number of rows written to the FWF file.
|
|
78
82
|
"""
|
|
79
|
-
|
|
83
|
+
records = normalize_records(data, 'FWF')
|
|
84
|
+
if not records:
|
|
85
|
+
return 0
|
|
86
|
+
|
|
87
|
+
fieldnames = sorted({key for row in records for key in row})
|
|
88
|
+
if not fieldnames:
|
|
89
|
+
return 0
|
|
90
|
+
|
|
91
|
+
def stringify(value: Any) -> str:
|
|
92
|
+
if value is None:
|
|
93
|
+
return ''
|
|
94
|
+
return str(value)
|
|
95
|
+
|
|
96
|
+
widths: dict[str, int] = {name: len(name) for name in fieldnames}
|
|
97
|
+
for row in records:
|
|
98
|
+
for name in fieldnames:
|
|
99
|
+
widths[name] = max(widths[name], len(stringify(row.get(name))))
|
|
100
|
+
|
|
101
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
102
|
+
with path.open('w', encoding='utf-8', newline='') as handle:
|
|
103
|
+
header = ' '.join(name.ljust(widths[name]) for name in fieldnames)
|
|
104
|
+
handle.write(header + '\n')
|
|
105
|
+
for row in records:
|
|
106
|
+
line = ' '.join(
|
|
107
|
+
stringify(row.get(name)).ljust(widths[name])
|
|
108
|
+
for name in fieldnames
|
|
109
|
+
)
|
|
110
|
+
handle.write(line + '\n')
|
|
111
|
+
return len(records)
|
etlplus/file/hdf5.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.hdf5` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
implemented yet).
|
|
4
|
+
Helpers for reading Hierarchical Data Format (HDF5) files. Stub helpers for
|
|
5
|
+
writing such files (not implemented yet).
|
|
6
6
|
|
|
7
7
|
Notes
|
|
8
8
|
-----
|
|
@@ -20,10 +20,12 @@ Notes
|
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
22
|
from pathlib import Path
|
|
23
|
+
from typing import cast
|
|
23
24
|
|
|
24
25
|
from ..types import JSONData
|
|
25
26
|
from ..types import JSONList
|
|
26
27
|
from . import stub
|
|
28
|
+
from ._imports import get_pandas
|
|
27
29
|
|
|
28
30
|
# SECTION: EXPORTS ========================================================== #
|
|
29
31
|
|
|
@@ -35,6 +37,22 @@ __all__ = [
|
|
|
35
37
|
]
|
|
36
38
|
|
|
37
39
|
|
|
40
|
+
# SECTION: INTERNAL CONSTANTS ============================================== #
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
DEFAULT_KEY = 'data'
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _raise_tables_error(err: ImportError) -> None:
|
|
50
|
+
raise ImportError(
|
|
51
|
+
'HDF5 support requires optional dependency "tables".\n'
|
|
52
|
+
'Install with: pip install tables',
|
|
53
|
+
) from err
|
|
54
|
+
|
|
55
|
+
|
|
38
56
|
# SECTION: FUNCTIONS ======================================================== #
|
|
39
57
|
|
|
40
58
|
|
|
@@ -54,7 +72,27 @@ def read(
|
|
|
54
72
|
JSONList
|
|
55
73
|
The list of dictionaries read from the HDF5 file.
|
|
56
74
|
"""
|
|
57
|
-
|
|
75
|
+
pandas = get_pandas('HDF5')
|
|
76
|
+
try:
|
|
77
|
+
store = pandas.HDFStore(path)
|
|
78
|
+
except ImportError as err: # pragma: no cover
|
|
79
|
+
_raise_tables_error(err)
|
|
80
|
+
|
|
81
|
+
with store:
|
|
82
|
+
keys = [key.lstrip('/') for key in store.keys()]
|
|
83
|
+
if not keys:
|
|
84
|
+
return []
|
|
85
|
+
if DEFAULT_KEY in keys:
|
|
86
|
+
key = DEFAULT_KEY
|
|
87
|
+
elif len(keys) == 1:
|
|
88
|
+
key = keys[0]
|
|
89
|
+
else:
|
|
90
|
+
raise ValueError(
|
|
91
|
+
'Multiple datasets found in HDF5 file; expected "data" or '
|
|
92
|
+
'a single dataset',
|
|
93
|
+
)
|
|
94
|
+
frame = store.get(key)
|
|
95
|
+
return cast(JSONList, frame.to_dict(orient='records'))
|
|
58
96
|
|
|
59
97
|
|
|
60
98
|
def write(
|
etlplus/file/ini.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.ini` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
yet).
|
|
4
|
+
Helpers for reading/writing initialization (INI) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -20,11 +19,12 @@ Notes
|
|
|
20
19
|
|
|
21
20
|
from __future__ import annotations
|
|
22
21
|
|
|
22
|
+
import configparser
|
|
23
23
|
from pathlib import Path
|
|
24
|
+
from typing import Any
|
|
24
25
|
|
|
25
26
|
from ..types import JSONData
|
|
26
|
-
from ..types import
|
|
27
|
-
from . import stub
|
|
27
|
+
from ..types import JSONDict
|
|
28
28
|
|
|
29
29
|
# SECTION: EXPORTS ========================================================== #
|
|
30
30
|
|
|
@@ -36,12 +36,22 @@ __all__ = [
|
|
|
36
36
|
]
|
|
37
37
|
|
|
38
38
|
|
|
39
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _stringify(value: Any) -> str:
|
|
43
|
+
"""Normalize INI values into strings."""
|
|
44
|
+
if value is None:
|
|
45
|
+
return ''
|
|
46
|
+
return str(value)
|
|
47
|
+
|
|
48
|
+
|
|
39
49
|
# SECTION: FUNCTIONS ======================================================== #
|
|
40
50
|
|
|
41
51
|
|
|
42
52
|
def read(
|
|
43
53
|
path: Path,
|
|
44
|
-
) ->
|
|
54
|
+
) -> JSONData:
|
|
45
55
|
"""
|
|
46
56
|
Read INI content from *path*.
|
|
47
57
|
|
|
@@ -52,10 +62,22 @@ def read(
|
|
|
52
62
|
|
|
53
63
|
Returns
|
|
54
64
|
-------
|
|
55
|
-
|
|
56
|
-
The
|
|
65
|
+
JSONData
|
|
66
|
+
The structured data read from the INI file.
|
|
57
67
|
"""
|
|
58
|
-
|
|
68
|
+
parser = configparser.ConfigParser()
|
|
69
|
+
parser.read(path, encoding='utf-8')
|
|
70
|
+
|
|
71
|
+
payload: JSONDict = {}
|
|
72
|
+
if parser.defaults():
|
|
73
|
+
payload['DEFAULT'] = dict(parser.defaults())
|
|
74
|
+
defaults = dict(parser.defaults())
|
|
75
|
+
for section in parser.sections():
|
|
76
|
+
raw_section = dict(parser.items(section))
|
|
77
|
+
for key in defaults:
|
|
78
|
+
raw_section.pop(key, None)
|
|
79
|
+
payload[section] = raw_section
|
|
80
|
+
return payload
|
|
59
81
|
|
|
60
82
|
|
|
61
83
|
def write(
|
|
@@ -70,12 +92,40 @@ def write(
|
|
|
70
92
|
path : Path
|
|
71
93
|
Path to the INI file on disk.
|
|
72
94
|
data : JSONData
|
|
73
|
-
Data to write as INI. Should be a
|
|
74
|
-
single dictionary.
|
|
95
|
+
Data to write as INI. Should be a dictionary.
|
|
75
96
|
|
|
76
97
|
Returns
|
|
77
98
|
-------
|
|
78
99
|
int
|
|
79
|
-
The number of
|
|
100
|
+
The number of records written to the INI file.
|
|
101
|
+
|
|
102
|
+
Raises
|
|
103
|
+
------
|
|
104
|
+
TypeError
|
|
105
|
+
If *data* is not a dictionary.
|
|
80
106
|
"""
|
|
81
|
-
|
|
107
|
+
if isinstance(data, list):
|
|
108
|
+
raise TypeError('INI payloads must be a dict')
|
|
109
|
+
if not isinstance(data, dict):
|
|
110
|
+
raise TypeError('INI payloads must be a dict')
|
|
111
|
+
|
|
112
|
+
parser = configparser.ConfigParser()
|
|
113
|
+
for section, values in data.items():
|
|
114
|
+
if section == 'DEFAULT':
|
|
115
|
+
if isinstance(values, dict):
|
|
116
|
+
parser['DEFAULT'] = {
|
|
117
|
+
key: _stringify(value) for key, value in values.items()
|
|
118
|
+
}
|
|
119
|
+
else:
|
|
120
|
+
raise TypeError('INI DEFAULT section must be a dict')
|
|
121
|
+
continue
|
|
122
|
+
if not isinstance(values, dict):
|
|
123
|
+
raise TypeError('INI sections must map to dicts')
|
|
124
|
+
parser[section] = {
|
|
125
|
+
key: _stringify(value) for key, value in values.items()
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
129
|
+
with path.open('w', encoding='utf-8', newline='') as handle:
|
|
130
|
+
parser.write(handle)
|
|
131
|
+
return 1
|
etlplus/file/msgpack.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.msgpack` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
yet).
|
|
4
|
+
Helpers for reading/writing MessagePack (MSGPACK) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -20,10 +19,12 @@ Notes
|
|
|
20
19
|
from __future__ import annotations
|
|
21
20
|
|
|
22
21
|
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
23
|
|
|
24
24
|
from ..types import JSONData
|
|
25
|
-
from
|
|
26
|
-
from . import
|
|
25
|
+
from ._imports import get_optional_module
|
|
26
|
+
from ._io import coerce_record_payload
|
|
27
|
+
from ._io import normalize_records
|
|
27
28
|
|
|
28
29
|
# SECTION: EXPORTS ========================================================== #
|
|
29
30
|
|
|
@@ -35,12 +36,26 @@ __all__ = [
|
|
|
35
36
|
]
|
|
36
37
|
|
|
37
38
|
|
|
39
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _get_msgpack() -> Any:
|
|
43
|
+
"""Return the msgpack module, importing it on first use."""
|
|
44
|
+
return get_optional_module(
|
|
45
|
+
'msgpack',
|
|
46
|
+
error_message=(
|
|
47
|
+
'MSGPACK support requires optional dependency "msgpack".\n'
|
|
48
|
+
'Install with: pip install msgpack'
|
|
49
|
+
),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
38
53
|
# SECTION: FUNCTIONS ======================================================== #
|
|
39
54
|
|
|
40
55
|
|
|
41
56
|
def read(
|
|
42
57
|
path: Path,
|
|
43
|
-
) ->
|
|
58
|
+
) -> JSONData:
|
|
44
59
|
"""
|
|
45
60
|
Read MsgPack content from *path*.
|
|
46
61
|
|
|
@@ -51,10 +66,13 @@ def read(
|
|
|
51
66
|
|
|
52
67
|
Returns
|
|
53
68
|
-------
|
|
54
|
-
|
|
55
|
-
The
|
|
69
|
+
JSONData
|
|
70
|
+
The structured data read from the MsgPack file.
|
|
56
71
|
"""
|
|
57
|
-
|
|
72
|
+
msgpack = _get_msgpack()
|
|
73
|
+
with path.open('rb') as handle:
|
|
74
|
+
payload = msgpack.unpackb(handle.read(), raw=False)
|
|
75
|
+
return coerce_record_payload(payload, format_name='MSGPACK')
|
|
58
76
|
|
|
59
77
|
|
|
60
78
|
def write(
|
|
@@ -77,4 +95,10 @@ def write(
|
|
|
77
95
|
int
|
|
78
96
|
The number of rows written to the MsgPack file.
|
|
79
97
|
"""
|
|
80
|
-
|
|
98
|
+
msgpack = _get_msgpack()
|
|
99
|
+
records = normalize_records(data, 'MSGPACK')
|
|
100
|
+
payload: JSONData = records if isinstance(data, list) else records[0]
|
|
101
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
102
|
+
with path.open('wb') as handle:
|
|
103
|
+
handle.write(msgpack.packb(payload, use_bin_type=True))
|
|
104
|
+
return len(records)
|
etlplus/file/nc.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.nc` module.
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
Helpers for reading/writing NetCDF (NC) data files.
|
|
5
5
|
|
|
6
6
|
Notes
|
|
7
7
|
-----
|
|
@@ -12,17 +12,21 @@ Notes
|
|
|
12
12
|
- Sharing large datasets in research communities.
|
|
13
13
|
- Efficient data access and manipulation.
|
|
14
14
|
- Rule of thumb:
|
|
15
|
-
- If the file follows the NetCDF standard, use this module for
|
|
16
|
-
|
|
15
|
+
- If the file follows the NetCDF standard, use this module for reading and
|
|
16
|
+
writing.
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
21
|
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
|
+
from typing import cast
|
|
22
24
|
|
|
23
25
|
from ..types import JSONData
|
|
24
26
|
from ..types import JSONList
|
|
25
|
-
from . import
|
|
27
|
+
from ._imports import get_optional_module
|
|
28
|
+
from ._imports import get_pandas
|
|
29
|
+
from ._io import normalize_records
|
|
26
30
|
|
|
27
31
|
# SECTION: EXPORTS ========================================================== #
|
|
28
32
|
|
|
@@ -34,6 +38,27 @@ __all__ = [
|
|
|
34
38
|
]
|
|
35
39
|
|
|
36
40
|
|
|
41
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _get_xarray() -> Any:
|
|
45
|
+
"""Return the xarray module, importing it on first use."""
|
|
46
|
+
return get_optional_module(
|
|
47
|
+
'xarray',
|
|
48
|
+
error_message=(
|
|
49
|
+
'NC support requires optional dependency "xarray".\n'
|
|
50
|
+
'Install with: pip install xarray'
|
|
51
|
+
),
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _raise_engine_error(err: ImportError) -> None:
|
|
56
|
+
raise ImportError(
|
|
57
|
+
'NC support requires optional dependency "netCDF4" or "h5netcdf".\n'
|
|
58
|
+
'Install with: pip install netCDF4',
|
|
59
|
+
) from err
|
|
60
|
+
|
|
61
|
+
|
|
37
62
|
# SECTION: FUNCTIONS ======================================================== #
|
|
38
63
|
|
|
39
64
|
|
|
@@ -53,7 +78,18 @@ def read(
|
|
|
53
78
|
JSONList
|
|
54
79
|
The list of dictionaries read from the NC file.
|
|
55
80
|
"""
|
|
56
|
-
|
|
81
|
+
xarray = _get_xarray()
|
|
82
|
+
try:
|
|
83
|
+
dataset = xarray.open_dataset(path)
|
|
84
|
+
except ImportError as err: # pragma: no cover
|
|
85
|
+
_raise_engine_error(err)
|
|
86
|
+
with dataset:
|
|
87
|
+
frame = dataset.to_dataframe().reset_index()
|
|
88
|
+
if 'index' in frame.columns:
|
|
89
|
+
values = list(frame['index'])
|
|
90
|
+
if values == list(range(len(values))):
|
|
91
|
+
frame = frame.drop(columns=['index'])
|
|
92
|
+
return cast(JSONList, frame.to_dict(orient='records'))
|
|
57
93
|
|
|
58
94
|
|
|
59
95
|
def write(
|
|
@@ -76,4 +112,17 @@ def write(
|
|
|
76
112
|
int
|
|
77
113
|
The number of rows written to the NC file.
|
|
78
114
|
"""
|
|
79
|
-
|
|
115
|
+
records = normalize_records(data, 'NC')
|
|
116
|
+
if not records:
|
|
117
|
+
return 0
|
|
118
|
+
|
|
119
|
+
xarray = _get_xarray()
|
|
120
|
+
pandas = get_pandas('NC')
|
|
121
|
+
frame = pandas.DataFrame.from_records(records)
|
|
122
|
+
dataset = xarray.Dataset.from_dataframe(frame)
|
|
123
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
124
|
+
try:
|
|
125
|
+
dataset.to_netcdf(path)
|
|
126
|
+
except ImportError as err: # pragma: no cover
|
|
127
|
+
_raise_engine_error(err)
|
|
128
|
+
return len(records)
|
etlplus/file/ods.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.ods` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
implemented yet).
|
|
4
|
+
Helpers for reading/writing OpenDocument (ODS) spreadsheet files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -21,10 +20,12 @@ Notes
|
|
|
21
20
|
from __future__ import annotations
|
|
22
21
|
|
|
23
22
|
from pathlib import Path
|
|
23
|
+
from typing import cast
|
|
24
24
|
|
|
25
25
|
from ..types import JSONData
|
|
26
26
|
from ..types import JSONList
|
|
27
|
-
from . import
|
|
27
|
+
from ._imports import get_pandas
|
|
28
|
+
from ._io import normalize_records
|
|
28
29
|
|
|
29
30
|
# SECTION: EXPORTS ========================================================== #
|
|
30
31
|
|
|
@@ -54,8 +55,21 @@ def read(
|
|
|
54
55
|
-------
|
|
55
56
|
JSONList
|
|
56
57
|
The list of dictionaries read from the ODS file.
|
|
58
|
+
|
|
59
|
+
Raises
|
|
60
|
+
------
|
|
61
|
+
ImportError
|
|
62
|
+
If optional dependencies for ODS support are missing.
|
|
57
63
|
"""
|
|
58
|
-
|
|
64
|
+
pandas = get_pandas('ODS')
|
|
65
|
+
try:
|
|
66
|
+
frame = pandas.read_excel(path, engine='odf')
|
|
67
|
+
except ImportError as err: # pragma: no cover
|
|
68
|
+
raise ImportError(
|
|
69
|
+
'ODS support requires optional dependency "odfpy".\n'
|
|
70
|
+
'Install with: pip install odfpy',
|
|
71
|
+
) from err
|
|
72
|
+
return cast(JSONList, frame.to_dict(orient='records'))
|
|
59
73
|
|
|
60
74
|
|
|
61
75
|
def write(
|
|
@@ -70,12 +84,31 @@ def write(
|
|
|
70
84
|
path : Path
|
|
71
85
|
Path to the ODS file on disk.
|
|
72
86
|
data : JSONData
|
|
73
|
-
Data to write as ODS
|
|
87
|
+
Data to write as ODS. Should be a list of dictionaries or a
|
|
74
88
|
single dictionary.
|
|
75
89
|
|
|
76
90
|
Returns
|
|
77
91
|
-------
|
|
78
92
|
int
|
|
79
93
|
The number of rows written to the ODS file.
|
|
94
|
+
|
|
95
|
+
Raises
|
|
96
|
+
------
|
|
97
|
+
ImportError
|
|
98
|
+
If optional dependencies for ODS support are missing.
|
|
80
99
|
"""
|
|
81
|
-
|
|
100
|
+
records = normalize_records(data, 'ODS')
|
|
101
|
+
if not records:
|
|
102
|
+
return 0
|
|
103
|
+
|
|
104
|
+
pandas = get_pandas('ODS')
|
|
105
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
106
|
+
frame = pandas.DataFrame.from_records(records)
|
|
107
|
+
try:
|
|
108
|
+
frame.to_excel(path, index=False, engine='odf')
|
|
109
|
+
except ImportError as err: # pragma: no cover
|
|
110
|
+
raise ImportError(
|
|
111
|
+
'ODS support requires optional dependency "odfpy".\n'
|
|
112
|
+
'Install with: pip install odfpy',
|
|
113
|
+
) from err
|
|
114
|
+
return len(records)
|
etlplus/file/pb.py
CHANGED
|
@@ -1,29 +1,24 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.pb` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
yet).
|
|
4
|
+
Helpers for reading/writing Protocol Buffers binary (PB) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
9
|
-
- PB (
|
|
10
|
-
for structured data.
|
|
8
|
+
- A PB file contains Protocol Buffers (Protobuff) binary-encoded messages.
|
|
11
9
|
- Common cases:
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
- Communication in distributed systems.
|
|
10
|
+
- Serialized payloads emitted by services or SDKs.
|
|
11
|
+
- Binary payload dumps for debugging or transport.
|
|
15
12
|
- Rule of thumb:
|
|
16
|
-
-
|
|
17
|
-
for reading and writing.
|
|
13
|
+
- Use this module when you need to store or transport raw protobuf bytes.
|
|
18
14
|
"""
|
|
19
15
|
|
|
20
16
|
from __future__ import annotations
|
|
21
17
|
|
|
18
|
+
import base64
|
|
22
19
|
from pathlib import Path
|
|
23
20
|
|
|
24
21
|
from ..types import JSONData
|
|
25
|
-
from ..types import JSONList
|
|
26
|
-
from . import stub
|
|
27
22
|
|
|
28
23
|
# SECTION: EXPORTS ========================================================== #
|
|
29
24
|
|
|
@@ -40,7 +35,7 @@ __all__ = [
|
|
|
40
35
|
|
|
41
36
|
def read(
|
|
42
37
|
path: Path,
|
|
43
|
-
) ->
|
|
38
|
+
) -> JSONData:
|
|
44
39
|
"""
|
|
45
40
|
Read PB content from *path*.
|
|
46
41
|
|
|
@@ -51,10 +46,12 @@ def read(
|
|
|
51
46
|
|
|
52
47
|
Returns
|
|
53
48
|
-------
|
|
54
|
-
|
|
55
|
-
The
|
|
49
|
+
JSONData
|
|
50
|
+
The structured data read from the PB file.
|
|
56
51
|
"""
|
|
57
|
-
|
|
52
|
+
payload = path.read_bytes()
|
|
53
|
+
encoded = base64.b64encode(payload).decode('ascii')
|
|
54
|
+
return {'payload_base64': encoded}
|
|
58
55
|
|
|
59
56
|
|
|
60
57
|
def write(
|
|
@@ -69,12 +66,28 @@ def write(
|
|
|
69
66
|
path : Path
|
|
70
67
|
Path to the PB file on disk.
|
|
71
68
|
data : JSONData
|
|
72
|
-
Data to write as PB. Should be a
|
|
73
|
-
single dictionary.
|
|
69
|
+
Data to write as PB. Should be a dictionary with ``payload_base64``.
|
|
74
70
|
|
|
75
71
|
Returns
|
|
76
72
|
-------
|
|
77
73
|
int
|
|
78
|
-
The number of
|
|
74
|
+
The number of records written to the PB file.
|
|
75
|
+
|
|
76
|
+
Raises
|
|
77
|
+
------
|
|
78
|
+
TypeError
|
|
79
|
+
If *data* is not a dictionary or missing ``payload_base64``.
|
|
79
80
|
"""
|
|
80
|
-
|
|
81
|
+
if isinstance(data, list):
|
|
82
|
+
raise TypeError('PB payloads must be a dict')
|
|
83
|
+
if not isinstance(data, dict):
|
|
84
|
+
raise TypeError('PB payloads must be a dict')
|
|
85
|
+
|
|
86
|
+
payload_base64 = data.get('payload_base64')
|
|
87
|
+
if not isinstance(payload_base64, str):
|
|
88
|
+
raise TypeError('PB payloads must include a "payload_base64" string')
|
|
89
|
+
|
|
90
|
+
payload = base64.b64decode(payload_base64.encode('ascii'))
|
|
91
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
92
|
+
path.write_bytes(payload)
|
|
93
|
+
return 1
|