etlplus 0.16.10__py3-none-any.whl → 0.17.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/file/README.md +33 -0
- etlplus/file/_imports.py +35 -20
- etlplus/file/_io.py +138 -15
- etlplus/file/_r.py +48 -0
- etlplus/file/_sql.py +224 -0
- etlplus/file/accdb.py +7 -6
- etlplus/file/arrow.py +29 -10
- etlplus/file/avro.py +13 -10
- etlplus/file/bson.py +94 -10
- etlplus/file/cbor.py +29 -17
- etlplus/file/cfg.py +7 -6
- etlplus/file/conf.py +7 -6
- etlplus/file/core.py +1 -1
- etlplus/file/csv.py +8 -7
- etlplus/file/dat.py +52 -11
- etlplus/file/dta.py +36 -16
- etlplus/file/duckdb.py +72 -11
- etlplus/file/enums.py +29 -0
- etlplus/file/feather.py +15 -30
- etlplus/file/fwf.py +44 -10
- etlplus/file/gz.py +12 -7
- etlplus/file/hbs.py +7 -6
- etlplus/file/hdf5.py +71 -8
- etlplus/file/ini.py +60 -17
- etlplus/file/ion.py +7 -6
- etlplus/file/jinja2.py +7 -6
- etlplus/file/json.py +10 -11
- etlplus/file/log.py +7 -6
- etlplus/file/mat.py +7 -6
- etlplus/file/mdb.py +7 -6
- etlplus/file/msgpack.py +27 -15
- etlplus/file/mustache.py +7 -6
- etlplus/file/nc.py +69 -11
- etlplus/file/ndjson.py +10 -6
- etlplus/file/numbers.py +7 -6
- etlplus/file/ods.py +48 -11
- etlplus/file/orc.py +15 -30
- etlplus/file/parquet.py +10 -6
- etlplus/file/pb.py +36 -24
- etlplus/file/pbf.py +7 -6
- etlplus/file/properties.py +44 -18
- etlplus/file/proto.py +24 -18
- etlplus/file/psv.py +12 -11
- etlplus/file/rda.py +57 -15
- etlplus/file/rds.py +50 -14
- etlplus/file/sas7bdat.py +26 -16
- etlplus/file/sav.py +34 -16
- etlplus/file/sqlite.py +70 -10
- etlplus/file/stub.py +8 -6
- etlplus/file/sylk.py +7 -6
- etlplus/file/tab.py +13 -13
- etlplus/file/toml.py +56 -17
- etlplus/file/tsv.py +8 -7
- etlplus/file/txt.py +10 -7
- etlplus/file/vm.py +7 -6
- etlplus/file/wks.py +7 -6
- etlplus/file/xls.py +8 -5
- etlplus/file/xlsm.py +48 -10
- etlplus/file/xlsx.py +10 -6
- etlplus/file/xml.py +11 -9
- etlplus/file/xpt.py +46 -10
- etlplus/file/yaml.py +10 -11
- etlplus/file/zip.py +10 -5
- etlplus/file/zsav.py +7 -6
- {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/METADATA +44 -26
- {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/RECORD +70 -68
- {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/WHEEL +0 -0
- {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/entry_points.txt +0 -0
- {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/top_level.txt +0 -0
etlplus/file/msgpack.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.msgpack` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
yet).
|
|
4
|
+
Helpers for reading/writing MessagePack (MSGPACK) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -19,11 +18,13 @@ Notes
|
|
|
19
18
|
|
|
20
19
|
from __future__ import annotations
|
|
21
20
|
|
|
22
|
-
from pathlib import Path
|
|
23
|
-
|
|
24
21
|
from ..types import JSONData
|
|
25
|
-
from ..types import
|
|
26
|
-
from . import
|
|
22
|
+
from ..types import StrPath
|
|
23
|
+
from ._imports import get_dependency
|
|
24
|
+
from ._io import coerce_path
|
|
25
|
+
from ._io import coerce_record_payload
|
|
26
|
+
from ._io import ensure_parent_dir
|
|
27
|
+
from ._io import normalize_records
|
|
27
28
|
|
|
28
29
|
# SECTION: EXPORTS ========================================================== #
|
|
29
30
|
|
|
@@ -39,26 +40,30 @@ __all__ = [
|
|
|
39
40
|
|
|
40
41
|
|
|
41
42
|
def read(
|
|
42
|
-
path:
|
|
43
|
-
) ->
|
|
43
|
+
path: StrPath,
|
|
44
|
+
) -> JSONData:
|
|
44
45
|
"""
|
|
45
46
|
Read MsgPack content from *path*.
|
|
46
47
|
|
|
47
48
|
Parameters
|
|
48
49
|
----------
|
|
49
|
-
path :
|
|
50
|
+
path : StrPath
|
|
50
51
|
Path to the MsgPack file on disk.
|
|
51
52
|
|
|
52
53
|
Returns
|
|
53
54
|
-------
|
|
54
|
-
|
|
55
|
-
The
|
|
55
|
+
JSONData
|
|
56
|
+
The structured data read from the MsgPack file.
|
|
56
57
|
"""
|
|
57
|
-
|
|
58
|
+
path = coerce_path(path)
|
|
59
|
+
msgpack = get_dependency('msgpack', format_name='MSGPACK')
|
|
60
|
+
with path.open('rb') as handle:
|
|
61
|
+
payload = msgpack.unpackb(handle.read(), raw=False)
|
|
62
|
+
return coerce_record_payload(payload, format_name='MSGPACK')
|
|
58
63
|
|
|
59
64
|
|
|
60
65
|
def write(
|
|
61
|
-
path:
|
|
66
|
+
path: StrPath,
|
|
62
67
|
data: JSONData,
|
|
63
68
|
) -> int:
|
|
64
69
|
"""
|
|
@@ -66,7 +71,7 @@ def write(
|
|
|
66
71
|
|
|
67
72
|
Parameters
|
|
68
73
|
----------
|
|
69
|
-
path :
|
|
74
|
+
path : StrPath
|
|
70
75
|
Path to the MsgPack file on disk.
|
|
71
76
|
data : JSONData
|
|
72
77
|
Data to write as MsgPack. Should be a list of dictionaries or a
|
|
@@ -77,4 +82,11 @@ def write(
|
|
|
77
82
|
int
|
|
78
83
|
The number of rows written to the MsgPack file.
|
|
79
84
|
"""
|
|
80
|
-
|
|
85
|
+
path = coerce_path(path)
|
|
86
|
+
msgpack = get_dependency('msgpack', format_name='MSGPACK')
|
|
87
|
+
records = normalize_records(data, 'MSGPACK')
|
|
88
|
+
payload: JSONData = records if isinstance(data, list) else records[0]
|
|
89
|
+
ensure_parent_dir(path)
|
|
90
|
+
with path.open('wb') as handle:
|
|
91
|
+
handle.write(msgpack.packb(payload, use_bin_type=True))
|
|
92
|
+
return len(records)
|
etlplus/file/mustache.py
CHANGED
|
@@ -19,11 +19,11 @@ Notes
|
|
|
19
19
|
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
|
-
from pathlib import Path
|
|
23
|
-
|
|
24
22
|
from ..types import JSONData
|
|
25
23
|
from ..types import JSONList
|
|
24
|
+
from ..types import StrPath
|
|
26
25
|
from . import stub
|
|
26
|
+
from ._io import coerce_path
|
|
27
27
|
|
|
28
28
|
# SECTION: EXPORTS ========================================================== #
|
|
29
29
|
|
|
@@ -39,14 +39,14 @@ __all__ = [
|
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
def read(
|
|
42
|
-
path:
|
|
42
|
+
path: StrPath,
|
|
43
43
|
) -> JSONList:
|
|
44
44
|
"""
|
|
45
45
|
Read MUSTACHE content from *path*.
|
|
46
46
|
|
|
47
47
|
Parameters
|
|
48
48
|
----------
|
|
49
|
-
path :
|
|
49
|
+
path : StrPath
|
|
50
50
|
Path to the MUSTACHE file on disk.
|
|
51
51
|
|
|
52
52
|
Returns
|
|
@@ -58,7 +58,7 @@ def read(
|
|
|
58
58
|
|
|
59
59
|
|
|
60
60
|
def write(
|
|
61
|
-
path:
|
|
61
|
+
path: StrPath,
|
|
62
62
|
data: JSONData,
|
|
63
63
|
) -> int:
|
|
64
64
|
"""
|
|
@@ -66,7 +66,7 @@ def write(
|
|
|
66
66
|
|
|
67
67
|
Parameters
|
|
68
68
|
----------
|
|
69
|
-
path :
|
|
69
|
+
path : StrPath
|
|
70
70
|
Path to the MUSTACHE file on disk.
|
|
71
71
|
data : JSONData
|
|
72
72
|
Data to write as MUSTACHE file. Should be a list of dictionaries or a
|
|
@@ -77,4 +77,5 @@ def write(
|
|
|
77
77
|
int
|
|
78
78
|
The number of rows written to the MUSTACHE file.
|
|
79
79
|
"""
|
|
80
|
+
path = coerce_path(path)
|
|
80
81
|
return stub.write(path, data, format_name='MUSTACHE')
|
etlplus/file/nc.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.nc` module.
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
Helpers for reading/writing NetCDF (NC) data files.
|
|
5
5
|
|
|
6
6
|
Notes
|
|
7
7
|
-----
|
|
@@ -12,17 +12,22 @@ Notes
|
|
|
12
12
|
- Sharing large datasets in research communities.
|
|
13
13
|
- Efficient data access and manipulation.
|
|
14
14
|
- Rule of thumb:
|
|
15
|
-
- If the file follows the NetCDF standard, use this module for
|
|
16
|
-
|
|
15
|
+
- If the file follows the NetCDF standard, use this module for reading and
|
|
16
|
+
writing.
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
-
from
|
|
21
|
+
from typing import cast
|
|
22
22
|
|
|
23
23
|
from ..types import JSONData
|
|
24
24
|
from ..types import JSONList
|
|
25
|
-
from
|
|
25
|
+
from ..types import StrPath
|
|
26
|
+
from ._imports import get_dependency
|
|
27
|
+
from ._imports import get_pandas
|
|
28
|
+
from ._io import coerce_path
|
|
29
|
+
from ._io import ensure_parent_dir
|
|
30
|
+
from ._io import normalize_records
|
|
26
31
|
|
|
27
32
|
# SECTION: EXPORTS ========================================================== #
|
|
28
33
|
|
|
@@ -34,18 +39,45 @@ __all__ = [
|
|
|
34
39
|
]
|
|
35
40
|
|
|
36
41
|
|
|
42
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _raise_engine_error(
|
|
46
|
+
err: ImportError,
|
|
47
|
+
) -> None:
|
|
48
|
+
"""
|
|
49
|
+
Raise a consistent ImportError for missing NetCDF engine support.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
err : ImportError
|
|
54
|
+
The original ImportError raised when trying to use NetCDF support
|
|
55
|
+
without the required dependency.
|
|
56
|
+
|
|
57
|
+
Raises
|
|
58
|
+
------
|
|
59
|
+
ImportError
|
|
60
|
+
Consistent ImportError indicating that NetCDF support requires
|
|
61
|
+
optional dependencies.
|
|
62
|
+
"""
|
|
63
|
+
raise ImportError(
|
|
64
|
+
'NC support requires optional dependency "netCDF4" or "h5netcdf".\n'
|
|
65
|
+
'Install with: pip install netCDF4',
|
|
66
|
+
) from err
|
|
67
|
+
|
|
68
|
+
|
|
37
69
|
# SECTION: FUNCTIONS ======================================================== #
|
|
38
70
|
|
|
39
71
|
|
|
40
72
|
def read(
|
|
41
|
-
path:
|
|
73
|
+
path: StrPath,
|
|
42
74
|
) -> JSONList:
|
|
43
75
|
"""
|
|
44
76
|
Read NC content from *path*.
|
|
45
77
|
|
|
46
78
|
Parameters
|
|
47
79
|
----------
|
|
48
|
-
path :
|
|
80
|
+
path : StrPath
|
|
49
81
|
Path to the NC file on disk.
|
|
50
82
|
|
|
51
83
|
Returns
|
|
@@ -53,11 +85,23 @@ def read(
|
|
|
53
85
|
JSONList
|
|
54
86
|
The list of dictionaries read from the NC file.
|
|
55
87
|
"""
|
|
56
|
-
|
|
88
|
+
path = coerce_path(path)
|
|
89
|
+
xarray = get_dependency('xarray', format_name='NC')
|
|
90
|
+
try:
|
|
91
|
+
dataset = xarray.open_dataset(path)
|
|
92
|
+
except ImportError as err: # pragma: no cover
|
|
93
|
+
_raise_engine_error(err)
|
|
94
|
+
with dataset:
|
|
95
|
+
frame = dataset.to_dataframe().reset_index()
|
|
96
|
+
if 'index' in frame.columns:
|
|
97
|
+
values = list(frame['index'])
|
|
98
|
+
if values == list(range(len(values))):
|
|
99
|
+
frame = frame.drop(columns=['index'])
|
|
100
|
+
return cast(JSONList, frame.to_dict(orient='records'))
|
|
57
101
|
|
|
58
102
|
|
|
59
103
|
def write(
|
|
60
|
-
path:
|
|
104
|
+
path: StrPath,
|
|
61
105
|
data: JSONData,
|
|
62
106
|
) -> int:
|
|
63
107
|
"""
|
|
@@ -65,7 +109,7 @@ def write(
|
|
|
65
109
|
|
|
66
110
|
Parameters
|
|
67
111
|
----------
|
|
68
|
-
path :
|
|
112
|
+
path : StrPath
|
|
69
113
|
Path to the NC file on disk.
|
|
70
114
|
data : JSONData
|
|
71
115
|
Data to write as NC file. Should be a list of dictionaries or a
|
|
@@ -76,4 +120,18 @@ def write(
|
|
|
76
120
|
int
|
|
77
121
|
The number of rows written to the NC file.
|
|
78
122
|
"""
|
|
79
|
-
|
|
123
|
+
path = coerce_path(path)
|
|
124
|
+
records = normalize_records(data, 'NC')
|
|
125
|
+
if not records:
|
|
126
|
+
return 0
|
|
127
|
+
|
|
128
|
+
xarray = get_dependency('xarray', format_name='NC')
|
|
129
|
+
pandas = get_pandas('NC')
|
|
130
|
+
frame = pandas.DataFrame.from_records(records)
|
|
131
|
+
dataset = xarray.Dataset.from_dataframe(frame)
|
|
132
|
+
ensure_parent_dir(path)
|
|
133
|
+
try:
|
|
134
|
+
dataset.to_netcdf(path)
|
|
135
|
+
except ImportError as err: # pragma: no cover
|
|
136
|
+
_raise_engine_error(err)
|
|
137
|
+
return len(records)
|
etlplus/file/ndjson.py
CHANGED
|
@@ -18,13 +18,15 @@ Notes
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
20
|
import json
|
|
21
|
-
from pathlib import Path
|
|
22
21
|
from typing import cast
|
|
23
22
|
|
|
24
23
|
from ..types import JSONData
|
|
25
24
|
from ..types import JSONDict
|
|
26
25
|
from ..types import JSONList
|
|
26
|
+
from ..types import StrPath
|
|
27
27
|
from ..utils import count_records
|
|
28
|
+
from ._io import coerce_path
|
|
29
|
+
from ._io import ensure_parent_dir
|
|
28
30
|
from ._io import normalize_records
|
|
29
31
|
|
|
30
32
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -41,14 +43,14 @@ __all__ = [
|
|
|
41
43
|
|
|
42
44
|
|
|
43
45
|
def read(
|
|
44
|
-
path:
|
|
46
|
+
path: StrPath,
|
|
45
47
|
) -> JSONList:
|
|
46
48
|
"""
|
|
47
49
|
Read NDJSON content from *path*.
|
|
48
50
|
|
|
49
51
|
Parameters
|
|
50
52
|
----------
|
|
51
|
-
path :
|
|
53
|
+
path : StrPath
|
|
52
54
|
Path to the NDJSON file on disk.
|
|
53
55
|
|
|
54
56
|
Returns
|
|
@@ -61,6 +63,7 @@ def read(
|
|
|
61
63
|
TypeError
|
|
62
64
|
If any line in the NDJSON file is not a JSON object (dict).
|
|
63
65
|
"""
|
|
66
|
+
path = coerce_path(path)
|
|
64
67
|
rows: JSONList = []
|
|
65
68
|
with path.open('r', encoding='utf-8') as handle:
|
|
66
69
|
for idx, line in enumerate(handle, start=1):
|
|
@@ -77,7 +80,7 @@ def read(
|
|
|
77
80
|
|
|
78
81
|
|
|
79
82
|
def write(
|
|
80
|
-
path:
|
|
83
|
+
path: StrPath,
|
|
81
84
|
data: JSONData,
|
|
82
85
|
) -> int:
|
|
83
86
|
"""
|
|
@@ -85,7 +88,7 @@ def write(
|
|
|
85
88
|
|
|
86
89
|
Parameters
|
|
87
90
|
----------
|
|
88
|
-
path :
|
|
91
|
+
path : StrPath
|
|
89
92
|
Path to the NDJSON file on disk.
|
|
90
93
|
data : JSONData
|
|
91
94
|
Data to write.
|
|
@@ -95,12 +98,13 @@ def write(
|
|
|
95
98
|
int
|
|
96
99
|
Number of records written.
|
|
97
100
|
"""
|
|
101
|
+
path = coerce_path(path)
|
|
98
102
|
rows = normalize_records(data, 'NDJSON')
|
|
99
103
|
|
|
100
104
|
if not rows:
|
|
101
105
|
return 0
|
|
102
106
|
|
|
103
|
-
path
|
|
107
|
+
ensure_parent_dir(path)
|
|
104
108
|
with path.open('w', encoding='utf-8') as handle:
|
|
105
109
|
for row in rows:
|
|
106
110
|
handle.write(json.dumps(row, ensure_ascii=False))
|
etlplus/file/numbers.py
CHANGED
|
@@ -16,11 +16,11 @@ Notes
|
|
|
16
16
|
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
-
from pathlib import Path
|
|
20
|
-
|
|
21
19
|
from ..types import JSONData
|
|
22
20
|
from ..types import JSONList
|
|
21
|
+
from ..types import StrPath
|
|
23
22
|
from . import stub
|
|
23
|
+
from ._io import coerce_path
|
|
24
24
|
|
|
25
25
|
# SECTION: EXPORTS ========================================================== #
|
|
26
26
|
|
|
@@ -36,14 +36,14 @@ __all__ = [
|
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
def read(
|
|
39
|
-
path:
|
|
39
|
+
path: StrPath,
|
|
40
40
|
) -> JSONList:
|
|
41
41
|
"""
|
|
42
42
|
Read NUMBERS content from *path*.
|
|
43
43
|
|
|
44
44
|
Parameters
|
|
45
45
|
----------
|
|
46
|
-
path :
|
|
46
|
+
path : StrPath
|
|
47
47
|
Path to the NUMBERS file on disk.
|
|
48
48
|
|
|
49
49
|
Returns
|
|
@@ -55,7 +55,7 @@ def read(
|
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
def write(
|
|
58
|
-
path:
|
|
58
|
+
path: StrPath,
|
|
59
59
|
data: JSONData,
|
|
60
60
|
) -> int:
|
|
61
61
|
"""
|
|
@@ -63,7 +63,7 @@ def write(
|
|
|
63
63
|
|
|
64
64
|
Parameters
|
|
65
65
|
----------
|
|
66
|
-
path :
|
|
66
|
+
path : StrPath
|
|
67
67
|
Path to the NUMBERS file on disk.
|
|
68
68
|
data : JSONData
|
|
69
69
|
Data to write as NUMBERS file. Should be a list of dictionaries or a
|
|
@@ -74,4 +74,5 @@ def write(
|
|
|
74
74
|
int
|
|
75
75
|
The number of rows written to the NUMBERS file.
|
|
76
76
|
"""
|
|
77
|
+
path = coerce_path(path)
|
|
77
78
|
return stub.write(path, data, format_name='NUMBERS')
|
etlplus/file/ods.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.ods` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
implemented yet).
|
|
4
|
+
Helpers for reading/writing OpenDocument (ODS) spreadsheet files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -20,11 +19,15 @@ Notes
|
|
|
20
19
|
|
|
21
20
|
from __future__ import annotations
|
|
22
21
|
|
|
23
|
-
from
|
|
22
|
+
from typing import cast
|
|
24
23
|
|
|
25
24
|
from ..types import JSONData
|
|
26
25
|
from ..types import JSONList
|
|
27
|
-
from
|
|
26
|
+
from ..types import StrPath
|
|
27
|
+
from ._imports import get_pandas
|
|
28
|
+
from ._io import coerce_path
|
|
29
|
+
from ._io import ensure_parent_dir
|
|
30
|
+
from ._io import normalize_records
|
|
28
31
|
|
|
29
32
|
# SECTION: EXPORTS ========================================================== #
|
|
30
33
|
|
|
@@ -40,26 +43,40 @@ __all__ = [
|
|
|
40
43
|
|
|
41
44
|
|
|
42
45
|
def read(
|
|
43
|
-
path:
|
|
46
|
+
path: StrPath,
|
|
44
47
|
) -> JSONList:
|
|
45
48
|
"""
|
|
46
49
|
Read ODS content from *path*.
|
|
47
50
|
|
|
48
51
|
Parameters
|
|
49
52
|
----------
|
|
50
|
-
path :
|
|
53
|
+
path : StrPath
|
|
51
54
|
Path to the ODS file on disk.
|
|
52
55
|
|
|
53
56
|
Returns
|
|
54
57
|
-------
|
|
55
58
|
JSONList
|
|
56
59
|
The list of dictionaries read from the ODS file.
|
|
60
|
+
|
|
61
|
+
Raises
|
|
62
|
+
------
|
|
63
|
+
ImportError
|
|
64
|
+
If optional dependencies for ODS support are missing.
|
|
57
65
|
"""
|
|
58
|
-
|
|
66
|
+
path = coerce_path(path)
|
|
67
|
+
pandas = get_pandas('ODS')
|
|
68
|
+
try:
|
|
69
|
+
frame = pandas.read_excel(path, engine='odf')
|
|
70
|
+
except ImportError as err: # pragma: no cover
|
|
71
|
+
raise ImportError(
|
|
72
|
+
'ODS support requires optional dependency "odfpy".\n'
|
|
73
|
+
'Install with: pip install odfpy',
|
|
74
|
+
) from err
|
|
75
|
+
return cast(JSONList, frame.to_dict(orient='records'))
|
|
59
76
|
|
|
60
77
|
|
|
61
78
|
def write(
|
|
62
|
-
path:
|
|
79
|
+
path: StrPath,
|
|
63
80
|
data: JSONData,
|
|
64
81
|
) -> int:
|
|
65
82
|
"""
|
|
@@ -67,15 +84,35 @@ def write(
|
|
|
67
84
|
|
|
68
85
|
Parameters
|
|
69
86
|
----------
|
|
70
|
-
path :
|
|
87
|
+
path : StrPath
|
|
71
88
|
Path to the ODS file on disk.
|
|
72
89
|
data : JSONData
|
|
73
|
-
Data to write as ODS
|
|
90
|
+
Data to write as ODS. Should be a list of dictionaries or a
|
|
74
91
|
single dictionary.
|
|
75
92
|
|
|
76
93
|
Returns
|
|
77
94
|
-------
|
|
78
95
|
int
|
|
79
96
|
The number of rows written to the ODS file.
|
|
97
|
+
|
|
98
|
+
Raises
|
|
99
|
+
------
|
|
100
|
+
ImportError
|
|
101
|
+
If optional dependencies for ODS support are missing.
|
|
80
102
|
"""
|
|
81
|
-
|
|
103
|
+
path = coerce_path(path)
|
|
104
|
+
records = normalize_records(data, 'ODS')
|
|
105
|
+
if not records:
|
|
106
|
+
return 0
|
|
107
|
+
|
|
108
|
+
pandas = get_pandas('ODS')
|
|
109
|
+
ensure_parent_dir(path)
|
|
110
|
+
frame = pandas.DataFrame.from_records(records)
|
|
111
|
+
try:
|
|
112
|
+
frame.to_excel(path, index=False, engine='odf')
|
|
113
|
+
except ImportError as err: # pragma: no cover
|
|
114
|
+
raise ImportError(
|
|
115
|
+
'ODS support requires optional dependency "odfpy".\n'
|
|
116
|
+
'Install with: pip install odfpy',
|
|
117
|
+
) from err
|
|
118
|
+
return len(records)
|
etlplus/file/orc.py
CHANGED
|
@@ -18,12 +18,15 @@ Notes
|
|
|
18
18
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
-
from pathlib import Path
|
|
22
21
|
from typing import cast
|
|
23
22
|
|
|
24
23
|
from ..types import JSONData
|
|
25
24
|
from ..types import JSONList
|
|
25
|
+
from ..types import StrPath
|
|
26
|
+
from ._imports import get_dependency
|
|
26
27
|
from ._imports import get_pandas
|
|
28
|
+
from ._io import coerce_path
|
|
29
|
+
from ._io import ensure_parent_dir
|
|
27
30
|
from ._io import normalize_records
|
|
28
31
|
|
|
29
32
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -40,39 +43,30 @@ __all__ = [
|
|
|
40
43
|
|
|
41
44
|
|
|
42
45
|
def read(
|
|
43
|
-
path:
|
|
46
|
+
path: StrPath,
|
|
44
47
|
) -> JSONList:
|
|
45
48
|
"""
|
|
46
49
|
Read ORC content from *path*.
|
|
47
50
|
|
|
48
51
|
Parameters
|
|
49
52
|
----------
|
|
50
|
-
path :
|
|
53
|
+
path : StrPath
|
|
51
54
|
Path to the ORC file on disk.
|
|
52
55
|
|
|
53
56
|
Returns
|
|
54
57
|
-------
|
|
55
58
|
JSONList
|
|
56
59
|
The list of dictionaries read from the ORC file.
|
|
57
|
-
|
|
58
|
-
Raises
|
|
59
|
-
------
|
|
60
|
-
ImportError
|
|
61
|
-
When optional dependency "pyarrow" is missing.
|
|
62
60
|
"""
|
|
61
|
+
path = coerce_path(path)
|
|
62
|
+
get_dependency('pyarrow', format_name='ORC')
|
|
63
63
|
pandas = get_pandas('ORC')
|
|
64
|
-
|
|
65
|
-
frame = pandas.read_orc(path)
|
|
66
|
-
except ImportError as e: # pragma: no cover
|
|
67
|
-
raise ImportError(
|
|
68
|
-
'ORC support requires optional dependency "pyarrow".\n'
|
|
69
|
-
'Install with: pip install pyarrow',
|
|
70
|
-
) from e
|
|
64
|
+
frame = pandas.read_orc(path)
|
|
71
65
|
return cast(JSONList, frame.to_dict(orient='records'))
|
|
72
66
|
|
|
73
67
|
|
|
74
68
|
def write(
|
|
75
|
-
path:
|
|
69
|
+
path: StrPath,
|
|
76
70
|
data: JSONData,
|
|
77
71
|
) -> int:
|
|
78
72
|
"""
|
|
@@ -80,7 +74,7 @@ def write(
|
|
|
80
74
|
|
|
81
75
|
Parameters
|
|
82
76
|
----------
|
|
83
|
-
path :
|
|
77
|
+
path : StrPath
|
|
84
78
|
Path to the ORC file on disk.
|
|
85
79
|
data : JSONData
|
|
86
80
|
Data to write.
|
|
@@ -89,24 +83,15 @@ def write(
|
|
|
89
83
|
-------
|
|
90
84
|
int
|
|
91
85
|
Number of records written.
|
|
92
|
-
|
|
93
|
-
Raises
|
|
94
|
-
------
|
|
95
|
-
ImportError
|
|
96
|
-
When optional dependency "pyarrow" is missing.
|
|
97
86
|
"""
|
|
87
|
+
path = coerce_path(path)
|
|
98
88
|
records = normalize_records(data, 'ORC')
|
|
99
89
|
if not records:
|
|
100
90
|
return 0
|
|
101
91
|
|
|
92
|
+
get_dependency('pyarrow', format_name='ORC')
|
|
102
93
|
pandas = get_pandas('ORC')
|
|
103
|
-
path
|
|
94
|
+
ensure_parent_dir(path)
|
|
104
95
|
frame = pandas.DataFrame.from_records(records)
|
|
105
|
-
|
|
106
|
-
frame.to_orc(path, index=False)
|
|
107
|
-
except ImportError as e: # pragma: no cover
|
|
108
|
-
raise ImportError(
|
|
109
|
-
'ORC support requires optional dependency "pyarrow".\n'
|
|
110
|
-
'Install with: pip install pyarrow',
|
|
111
|
-
) from e
|
|
96
|
+
frame.to_orc(path, index=False)
|
|
112
97
|
return len(records)
|
etlplus/file/parquet.py
CHANGED
|
@@ -18,12 +18,14 @@ Notes
|
|
|
18
18
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
-
from pathlib import Path
|
|
22
21
|
from typing import cast
|
|
23
22
|
|
|
24
23
|
from ..types import JSONData
|
|
25
24
|
from ..types import JSONList
|
|
25
|
+
from ..types import StrPath
|
|
26
26
|
from ._imports import get_pandas
|
|
27
|
+
from ._io import coerce_path
|
|
28
|
+
from ._io import ensure_parent_dir
|
|
27
29
|
from ._io import normalize_records
|
|
28
30
|
|
|
29
31
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -40,14 +42,14 @@ __all__ = [
|
|
|
40
42
|
|
|
41
43
|
|
|
42
44
|
def read(
|
|
43
|
-
path:
|
|
45
|
+
path: StrPath,
|
|
44
46
|
) -> JSONList:
|
|
45
47
|
"""
|
|
46
48
|
Read Parquet content from *path*.
|
|
47
49
|
|
|
48
50
|
Parameters
|
|
49
51
|
----------
|
|
50
|
-
path :
|
|
52
|
+
path : StrPath
|
|
51
53
|
Path to the PARQUET file on disk.
|
|
52
54
|
|
|
53
55
|
Returns
|
|
@@ -60,6 +62,7 @@ def read(
|
|
|
60
62
|
ImportError
|
|
61
63
|
If optional dependencies for Parquet support are missing.
|
|
62
64
|
"""
|
|
65
|
+
path = coerce_path(path)
|
|
63
66
|
pandas = get_pandas('Parquet')
|
|
64
67
|
try:
|
|
65
68
|
frame = pandas.read_parquet(path)
|
|
@@ -73,7 +76,7 @@ def read(
|
|
|
73
76
|
|
|
74
77
|
|
|
75
78
|
def write(
|
|
76
|
-
path:
|
|
79
|
+
path: StrPath,
|
|
77
80
|
data: JSONData,
|
|
78
81
|
) -> int:
|
|
79
82
|
"""
|
|
@@ -81,7 +84,7 @@ def write(
|
|
|
81
84
|
|
|
82
85
|
Parameters
|
|
83
86
|
----------
|
|
84
|
-
path :
|
|
87
|
+
path : StrPath
|
|
85
88
|
Path to the PARQUET file on disk.
|
|
86
89
|
data : JSONData
|
|
87
90
|
Data to write.
|
|
@@ -96,12 +99,13 @@ def write(
|
|
|
96
99
|
ImportError
|
|
97
100
|
If optional dependencies for Parquet support are missing.
|
|
98
101
|
"""
|
|
102
|
+
path = coerce_path(path)
|
|
99
103
|
records = normalize_records(data, 'Parquet')
|
|
100
104
|
if not records:
|
|
101
105
|
return 0
|
|
102
106
|
|
|
103
107
|
pandas = get_pandas('Parquet')
|
|
104
|
-
path
|
|
108
|
+
ensure_parent_dir(path)
|
|
105
109
|
frame = pandas.DataFrame.from_records(records)
|
|
106
110
|
try:
|
|
107
111
|
frame.to_parquet(path, index=False)
|