etlplus 0.16.10__py3-none-any.whl → 0.17.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/file/README.md +33 -0
- etlplus/file/_imports.py +35 -20
- etlplus/file/_io.py +138 -15
- etlplus/file/_r.py +48 -0
- etlplus/file/_sql.py +224 -0
- etlplus/file/accdb.py +7 -6
- etlplus/file/arrow.py +29 -10
- etlplus/file/avro.py +13 -10
- etlplus/file/bson.py +94 -10
- etlplus/file/cbor.py +29 -17
- etlplus/file/cfg.py +7 -6
- etlplus/file/conf.py +7 -6
- etlplus/file/core.py +1 -1
- etlplus/file/csv.py +8 -7
- etlplus/file/dat.py +52 -11
- etlplus/file/dta.py +36 -16
- etlplus/file/duckdb.py +72 -11
- etlplus/file/enums.py +29 -0
- etlplus/file/feather.py +15 -30
- etlplus/file/fwf.py +44 -10
- etlplus/file/gz.py +12 -7
- etlplus/file/hbs.py +7 -6
- etlplus/file/hdf5.py +71 -8
- etlplus/file/ini.py +60 -17
- etlplus/file/ion.py +7 -6
- etlplus/file/jinja2.py +7 -6
- etlplus/file/json.py +10 -11
- etlplus/file/log.py +7 -6
- etlplus/file/mat.py +7 -6
- etlplus/file/mdb.py +7 -6
- etlplus/file/msgpack.py +27 -15
- etlplus/file/mustache.py +7 -6
- etlplus/file/nc.py +69 -11
- etlplus/file/ndjson.py +10 -6
- etlplus/file/numbers.py +7 -6
- etlplus/file/ods.py +48 -11
- etlplus/file/orc.py +15 -30
- etlplus/file/parquet.py +10 -6
- etlplus/file/pb.py +36 -24
- etlplus/file/pbf.py +7 -6
- etlplus/file/properties.py +44 -18
- etlplus/file/proto.py +24 -18
- etlplus/file/psv.py +12 -11
- etlplus/file/rda.py +57 -15
- etlplus/file/rds.py +50 -14
- etlplus/file/sas7bdat.py +26 -16
- etlplus/file/sav.py +34 -16
- etlplus/file/sqlite.py +70 -10
- etlplus/file/stub.py +8 -6
- etlplus/file/sylk.py +7 -6
- etlplus/file/tab.py +13 -13
- etlplus/file/toml.py +56 -17
- etlplus/file/tsv.py +8 -7
- etlplus/file/txt.py +10 -7
- etlplus/file/vm.py +7 -6
- etlplus/file/wks.py +7 -6
- etlplus/file/xls.py +8 -5
- etlplus/file/xlsm.py +48 -10
- etlplus/file/xlsx.py +10 -6
- etlplus/file/xml.py +11 -9
- etlplus/file/xpt.py +46 -10
- etlplus/file/yaml.py +10 -11
- etlplus/file/zip.py +10 -5
- etlplus/file/zsav.py +7 -6
- {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/METADATA +44 -26
- {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/RECORD +70 -68
- {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/WHEEL +0 -0
- {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/entry_points.txt +0 -0
- {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/top_level.txt +0 -0
etlplus/file/pb.py
CHANGED
|
@@ -1,29 +1,28 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.pb` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
yet).
|
|
4
|
+
Helpers for reading/writing Protocol Buffers binary (PB) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
9
|
-
- PB
|
|
10
|
-
for structured data.
|
|
8
|
+
- A PB file contains Protocol Buffers (Protobuf) binary-encoded messages.
|
|
11
9
|
- Common cases:
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
- Communication in distributed systems.
|
|
10
|
+
- Serialized payloads emitted by services or SDKs.
|
|
11
|
+
- Binary payload dumps for debugging or transport.
|
|
15
12
|
- Rule of thumb:
|
|
16
|
-
-
|
|
17
|
-
for reading and writing.
|
|
13
|
+
- Use this module when you need to store or transport raw protobuf bytes.
|
|
18
14
|
"""
|
|
19
15
|
|
|
20
16
|
from __future__ import annotations
|
|
21
17
|
|
|
22
|
-
|
|
18
|
+
import base64
|
|
23
19
|
|
|
24
20
|
from ..types import JSONData
|
|
25
|
-
from ..types import
|
|
26
|
-
from . import
|
|
21
|
+
from ..types import StrPath
|
|
22
|
+
from ._io import coerce_path
|
|
23
|
+
from ._io import ensure_parent_dir
|
|
24
|
+
from ._io import require_dict_payload
|
|
25
|
+
from ._io import require_str_key
|
|
27
26
|
|
|
28
27
|
# SECTION: EXPORTS ========================================================== #
|
|
29
28
|
|
|
@@ -39,26 +38,29 @@ __all__ = [
|
|
|
39
38
|
|
|
40
39
|
|
|
41
40
|
def read(
|
|
42
|
-
path:
|
|
43
|
-
) ->
|
|
41
|
+
path: StrPath,
|
|
42
|
+
) -> JSONData:
|
|
44
43
|
"""
|
|
45
44
|
Read PB content from *path*.
|
|
46
45
|
|
|
47
46
|
Parameters
|
|
48
47
|
----------
|
|
49
|
-
path :
|
|
48
|
+
path : StrPath
|
|
50
49
|
Path to the PB file on disk.
|
|
51
50
|
|
|
52
51
|
Returns
|
|
53
52
|
-------
|
|
54
|
-
|
|
55
|
-
The
|
|
53
|
+
JSONData
|
|
54
|
+
The structured data read from the PB file.
|
|
56
55
|
"""
|
|
57
|
-
|
|
56
|
+
path = coerce_path(path)
|
|
57
|
+
payload = path.read_bytes()
|
|
58
|
+
encoded = base64.b64encode(payload).decode('ascii')
|
|
59
|
+
return {'payload_base64': encoded}
|
|
58
60
|
|
|
59
61
|
|
|
60
62
|
def write(
|
|
61
|
-
path:
|
|
63
|
+
path: StrPath,
|
|
62
64
|
data: JSONData,
|
|
63
65
|
) -> int:
|
|
64
66
|
"""
|
|
@@ -66,15 +68,25 @@ def write(
|
|
|
66
68
|
|
|
67
69
|
Parameters
|
|
68
70
|
----------
|
|
69
|
-
path :
|
|
71
|
+
path : StrPath
|
|
70
72
|
Path to the PB file on disk.
|
|
71
73
|
data : JSONData
|
|
72
|
-
Data to write as PB. Should be a
|
|
73
|
-
single dictionary.
|
|
74
|
+
Data to write as PB. Should be a dictionary with ``payload_base64``.
|
|
74
75
|
|
|
75
76
|
Returns
|
|
76
77
|
-------
|
|
77
78
|
int
|
|
78
|
-
The number of
|
|
79
|
+
The number of records written to the PB file.
|
|
79
80
|
"""
|
|
80
|
-
|
|
81
|
+
path = coerce_path(path)
|
|
82
|
+
payload = require_dict_payload(data, format_name='PB')
|
|
83
|
+
payload_base64 = require_str_key(
|
|
84
|
+
payload,
|
|
85
|
+
format_name='PB',
|
|
86
|
+
key='payload_base64',
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
decoded = base64.b64decode(payload_base64.encode('ascii'))
|
|
90
|
+
ensure_parent_dir(path)
|
|
91
|
+
path.write_bytes(decoded)
|
|
92
|
+
return 1
|
etlplus/file/pbf.py
CHANGED
|
@@ -18,11 +18,11 @@ Notes
|
|
|
18
18
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
-
from pathlib import Path
|
|
22
|
-
|
|
23
21
|
from ..types import JSONData
|
|
24
22
|
from ..types import JSONList
|
|
23
|
+
from ..types import StrPath
|
|
25
24
|
from . import stub
|
|
25
|
+
from ._io import coerce_path
|
|
26
26
|
|
|
27
27
|
# SECTION: EXPORTS ========================================================== #
|
|
28
28
|
|
|
@@ -38,14 +38,14 @@ __all__ = [
|
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
def read(
|
|
41
|
-
path:
|
|
41
|
+
path: StrPath,
|
|
42
42
|
) -> JSONList:
|
|
43
43
|
"""
|
|
44
44
|
Read PBF content from *path*.
|
|
45
45
|
|
|
46
46
|
Parameters
|
|
47
47
|
----------
|
|
48
|
-
path :
|
|
48
|
+
path : StrPath
|
|
49
49
|
Path to the PBF file on disk.
|
|
50
50
|
|
|
51
51
|
Returns
|
|
@@ -57,7 +57,7 @@ def read(
|
|
|
57
57
|
|
|
58
58
|
|
|
59
59
|
def write(
|
|
60
|
-
path:
|
|
60
|
+
path: StrPath,
|
|
61
61
|
data: JSONData,
|
|
62
62
|
) -> int:
|
|
63
63
|
"""
|
|
@@ -65,7 +65,7 @@ def write(
|
|
|
65
65
|
|
|
66
66
|
Parameters
|
|
67
67
|
----------
|
|
68
|
-
path :
|
|
68
|
+
path : StrPath
|
|
69
69
|
Path to the PBF file on disk.
|
|
70
70
|
data : JSONData
|
|
71
71
|
Data to write as PBF. Should be a list of dictionaries or a
|
|
@@ -76,4 +76,5 @@ def write(
|
|
|
76
76
|
int
|
|
77
77
|
The number of rows written to the PBF file.
|
|
78
78
|
"""
|
|
79
|
+
path = coerce_path(path)
|
|
79
80
|
return stub.write(path, data, format_name='PBF')
|
etlplus/file/properties.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.properties` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
yet).
|
|
4
|
+
Helpers for reading/writing properties (PROPERTIES) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -19,11 +18,13 @@ Notes
|
|
|
19
18
|
|
|
20
19
|
from __future__ import annotations
|
|
21
20
|
|
|
22
|
-
from pathlib import Path
|
|
23
|
-
|
|
24
21
|
from ..types import JSONData
|
|
25
|
-
from ..types import
|
|
26
|
-
from
|
|
22
|
+
from ..types import JSONDict
|
|
23
|
+
from ..types import StrPath
|
|
24
|
+
from ._io import coerce_path
|
|
25
|
+
from ._io import ensure_parent_dir
|
|
26
|
+
from ._io import require_dict_payload
|
|
27
|
+
from ._io import stringify_value
|
|
27
28
|
|
|
28
29
|
# SECTION: EXPORTS ========================================================== #
|
|
29
30
|
|
|
@@ -39,26 +40,45 @@ __all__ = [
|
|
|
39
40
|
|
|
40
41
|
|
|
41
42
|
def read(
|
|
42
|
-
path:
|
|
43
|
-
) ->
|
|
43
|
+
path: StrPath,
|
|
44
|
+
) -> JSONData:
|
|
44
45
|
"""
|
|
45
46
|
Read PROPERTIES content from *path*.
|
|
46
47
|
|
|
47
48
|
Parameters
|
|
48
49
|
----------
|
|
49
|
-
path :
|
|
50
|
+
path : StrPath
|
|
50
51
|
Path to the PROPERTIES file on disk.
|
|
51
52
|
|
|
52
53
|
Returns
|
|
53
54
|
-------
|
|
54
|
-
|
|
55
|
-
The
|
|
55
|
+
JSONData
|
|
56
|
+
The structured data read from the PROPERTIES file.
|
|
56
57
|
"""
|
|
57
|
-
|
|
58
|
+
path = coerce_path(path)
|
|
59
|
+
payload: JSONDict = {}
|
|
60
|
+
for line in path.read_text(encoding='utf-8').splitlines():
|
|
61
|
+
stripped = line.strip()
|
|
62
|
+
if not stripped or stripped.startswith(('#', '!')):
|
|
63
|
+
continue
|
|
64
|
+
separator_index = -1
|
|
65
|
+
for sep in ('=', ':'):
|
|
66
|
+
if sep in stripped:
|
|
67
|
+
separator_index = stripped.find(sep)
|
|
68
|
+
break
|
|
69
|
+
if separator_index == -1:
|
|
70
|
+
key = stripped
|
|
71
|
+
value = ''
|
|
72
|
+
else:
|
|
73
|
+
key = stripped[:separator_index].strip()
|
|
74
|
+
value = stripped[separator_index + 1:].strip()
|
|
75
|
+
if key:
|
|
76
|
+
payload[key] = value
|
|
77
|
+
return payload
|
|
58
78
|
|
|
59
79
|
|
|
60
80
|
def write(
|
|
61
|
-
path:
|
|
81
|
+
path: StrPath,
|
|
62
82
|
data: JSONData,
|
|
63
83
|
) -> int:
|
|
64
84
|
"""
|
|
@@ -66,15 +86,21 @@ def write(
|
|
|
66
86
|
|
|
67
87
|
Parameters
|
|
68
88
|
----------
|
|
69
|
-
path :
|
|
89
|
+
path : StrPath
|
|
70
90
|
Path to the PROPERTIES file on disk.
|
|
71
91
|
data : JSONData
|
|
72
|
-
Data to write as PROPERTIES. Should be a
|
|
73
|
-
single dictionary.
|
|
92
|
+
Data to write as PROPERTIES. Should be a dictionary.
|
|
74
93
|
|
|
75
94
|
Returns
|
|
76
95
|
-------
|
|
77
96
|
int
|
|
78
|
-
The number of
|
|
97
|
+
The number of records written to the PROPERTIES file.
|
|
79
98
|
"""
|
|
80
|
-
|
|
99
|
+
path = coerce_path(path)
|
|
100
|
+
payload = require_dict_payload(data, format_name='PROPERTIES')
|
|
101
|
+
|
|
102
|
+
ensure_parent_dir(path)
|
|
103
|
+
with path.open('w', encoding='utf-8', newline='') as handle:
|
|
104
|
+
for key in sorted(payload.keys()):
|
|
105
|
+
handle.write(f'{key}={stringify_value(payload[key])}\n')
|
|
106
|
+
return 1
|
etlplus/file/proto.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.proto` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
implemented yet).
|
|
4
|
+
Helpers for reading/writing Protocol Buffers schema (PROTO) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -18,11 +17,12 @@ Notes
|
|
|
18
17
|
|
|
19
18
|
from __future__ import annotations
|
|
20
19
|
|
|
21
|
-
from pathlib import Path
|
|
22
|
-
|
|
23
20
|
from ..types import JSONData
|
|
24
|
-
from ..types import
|
|
25
|
-
from . import
|
|
21
|
+
from ..types import StrPath
|
|
22
|
+
from ._io import coerce_path
|
|
23
|
+
from ._io import ensure_parent_dir
|
|
24
|
+
from ._io import require_dict_payload
|
|
25
|
+
from ._io import require_str_key
|
|
26
26
|
|
|
27
27
|
# SECTION: EXPORTS ========================================================== #
|
|
28
28
|
|
|
@@ -38,26 +38,27 @@ __all__ = [
|
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
def read(
|
|
41
|
-
path:
|
|
42
|
-
) ->
|
|
41
|
+
path: StrPath,
|
|
42
|
+
) -> JSONData:
|
|
43
43
|
"""
|
|
44
44
|
Read PROTO content from *path*.
|
|
45
45
|
|
|
46
46
|
Parameters
|
|
47
47
|
----------
|
|
48
|
-
path :
|
|
48
|
+
path : StrPath
|
|
49
49
|
Path to the PROTO file on disk.
|
|
50
50
|
|
|
51
51
|
Returns
|
|
52
52
|
-------
|
|
53
|
-
|
|
54
|
-
The
|
|
53
|
+
JSONData
|
|
54
|
+
The structured data read from the PROTO file.
|
|
55
55
|
"""
|
|
56
|
-
|
|
56
|
+
path = coerce_path(path)
|
|
57
|
+
return {'schema': path.read_text(encoding='utf-8')}
|
|
57
58
|
|
|
58
59
|
|
|
59
60
|
def write(
|
|
60
|
-
path:
|
|
61
|
+
path: StrPath,
|
|
61
62
|
data: JSONData,
|
|
62
63
|
) -> int:
|
|
63
64
|
"""
|
|
@@ -65,15 +66,20 @@ def write(
|
|
|
65
66
|
|
|
66
67
|
Parameters
|
|
67
68
|
----------
|
|
68
|
-
path :
|
|
69
|
+
path : StrPath
|
|
69
70
|
Path to the PROTO file on disk.
|
|
70
71
|
data : JSONData
|
|
71
|
-
Data to write as PROTO. Should be a
|
|
72
|
-
single dictionary.
|
|
72
|
+
Data to write as PROTO. Should be a dictionary with ``schema``.
|
|
73
73
|
|
|
74
74
|
Returns
|
|
75
75
|
-------
|
|
76
76
|
int
|
|
77
|
-
The number of
|
|
77
|
+
The number of records written to the PROTO file.
|
|
78
78
|
"""
|
|
79
|
-
|
|
79
|
+
path = coerce_path(path)
|
|
80
|
+
payload = require_dict_payload(data, format_name='PROTO')
|
|
81
|
+
schema = require_str_key(payload, format_name='PROTO', key='schema')
|
|
82
|
+
|
|
83
|
+
ensure_parent_dir(path)
|
|
84
|
+
path.write_text(schema, encoding='utf-8')
|
|
85
|
+
return 1
|
etlplus/file/psv.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.psv` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
implemented yet).
|
|
4
|
+
Helpers for reading/writing Pipe-Separated Values (PSV) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -20,11 +19,12 @@ Notes
|
|
|
20
19
|
|
|
21
20
|
from __future__ import annotations
|
|
22
21
|
|
|
23
|
-
from pathlib import Path
|
|
24
|
-
|
|
25
22
|
from ..types import JSONData
|
|
26
23
|
from ..types import JSONList
|
|
27
|
-
from
|
|
24
|
+
from ..types import StrPath
|
|
25
|
+
from ._io import coerce_path
|
|
26
|
+
from ._io import read_delimited
|
|
27
|
+
from ._io import write_delimited
|
|
28
28
|
|
|
29
29
|
# SECTION: EXPORTS ========================================================== #
|
|
30
30
|
|
|
@@ -40,14 +40,14 @@ __all__ = [
|
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
def read(
|
|
43
|
-
path:
|
|
43
|
+
path: StrPath,
|
|
44
44
|
) -> JSONList:
|
|
45
45
|
"""
|
|
46
46
|
Read PSV content from *path*.
|
|
47
47
|
|
|
48
48
|
Parameters
|
|
49
49
|
----------
|
|
50
|
-
path :
|
|
50
|
+
path : StrPath
|
|
51
51
|
Path to the PSV file on disk.
|
|
52
52
|
|
|
53
53
|
Returns
|
|
@@ -55,11 +55,11 @@ def read(
|
|
|
55
55
|
JSONList
|
|
56
56
|
The list of dictionaries read from the PSV file.
|
|
57
57
|
"""
|
|
58
|
-
return
|
|
58
|
+
return read_delimited(path, delimiter='|')
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
def write(
|
|
62
|
-
path:
|
|
62
|
+
path: StrPath,
|
|
63
63
|
data: JSONData,
|
|
64
64
|
) -> int:
|
|
65
65
|
"""
|
|
@@ -67,7 +67,7 @@ def write(
|
|
|
67
67
|
|
|
68
68
|
Parameters
|
|
69
69
|
----------
|
|
70
|
-
path :
|
|
70
|
+
path : StrPath
|
|
71
71
|
Path to the PSV file on disk.
|
|
72
72
|
data : JSONData
|
|
73
73
|
Data to write as PSV file. Should be a list of dictionaries or a
|
|
@@ -78,4 +78,5 @@ def write(
|
|
|
78
78
|
int
|
|
79
79
|
The number of rows written to the PSV file.
|
|
80
80
|
"""
|
|
81
|
-
|
|
81
|
+
path = coerce_path(path)
|
|
82
|
+
return write_delimited(path, data, delimiter='|', format_name='PSV')
|
etlplus/file/rda.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.rda` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
implemented yet).
|
|
4
|
+
Helpers for reading/writing RData workspace/object bundle (RDA) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -19,11 +18,15 @@ Notes
|
|
|
19
18
|
|
|
20
19
|
from __future__ import annotations
|
|
21
20
|
|
|
22
|
-
from pathlib import Path
|
|
23
|
-
|
|
24
21
|
from ..types import JSONData
|
|
25
|
-
from ..types import
|
|
26
|
-
from
|
|
22
|
+
from ..types import JSONDict
|
|
23
|
+
from ..types import StrPath
|
|
24
|
+
from ._imports import get_dependency
|
|
25
|
+
from ._imports import get_pandas
|
|
26
|
+
from ._io import coerce_path
|
|
27
|
+
from ._io import ensure_parent_dir
|
|
28
|
+
from ._io import normalize_records
|
|
29
|
+
from ._r import coerce_r_object
|
|
27
30
|
|
|
28
31
|
# SECTION: EXPORTS ========================================================== #
|
|
29
32
|
|
|
@@ -39,26 +42,38 @@ __all__ = [
|
|
|
39
42
|
|
|
40
43
|
|
|
41
44
|
def read(
|
|
42
|
-
path:
|
|
43
|
-
) ->
|
|
45
|
+
path: StrPath,
|
|
46
|
+
) -> JSONData:
|
|
44
47
|
"""
|
|
45
48
|
Read RDA content from *path*.
|
|
46
49
|
|
|
47
50
|
Parameters
|
|
48
51
|
----------
|
|
49
|
-
path :
|
|
52
|
+
path : StrPath
|
|
50
53
|
Path to the RDA file on disk.
|
|
51
54
|
|
|
52
55
|
Returns
|
|
53
56
|
-------
|
|
54
|
-
|
|
55
|
-
The
|
|
57
|
+
JSONData
|
|
58
|
+
The structured data read from the RDA file.
|
|
56
59
|
"""
|
|
57
|
-
|
|
60
|
+
path = coerce_path(path)
|
|
61
|
+
pyreadr = get_dependency('pyreadr', format_name='RDA')
|
|
62
|
+
pandas = get_pandas('RDA')
|
|
63
|
+
result = pyreadr.read_r(str(path))
|
|
64
|
+
if not result:
|
|
65
|
+
return []
|
|
66
|
+
if len(result) == 1:
|
|
67
|
+
value = next(iter(result.values()))
|
|
68
|
+
return coerce_r_object(value, pandas)
|
|
69
|
+
payload: JSONDict = {}
|
|
70
|
+
for key, value in result.items():
|
|
71
|
+
payload[str(key)] = coerce_r_object(value, pandas)
|
|
72
|
+
return payload
|
|
58
73
|
|
|
59
74
|
|
|
60
75
|
def write(
|
|
61
|
-
path:
|
|
76
|
+
path: StrPath,
|
|
62
77
|
data: JSONData,
|
|
63
78
|
) -> int:
|
|
64
79
|
"""
|
|
@@ -66,7 +81,7 @@ def write(
|
|
|
66
81
|
|
|
67
82
|
Parameters
|
|
68
83
|
----------
|
|
69
|
-
path :
|
|
84
|
+
path : StrPath
|
|
70
85
|
Path to the RDA file on disk.
|
|
71
86
|
data : JSONData
|
|
72
87
|
Data to write as RDA file. Should be a list of dictionaries or a
|
|
@@ -76,5 +91,32 @@ def write(
|
|
|
76
91
|
-------
|
|
77
92
|
int
|
|
78
93
|
The number of rows written to the RDA file.
|
|
94
|
+
|
|
95
|
+
Raises
|
|
96
|
+
------
|
|
97
|
+
ImportError
|
|
98
|
+
If "pyreadr" is not installed with write support.
|
|
79
99
|
"""
|
|
80
|
-
|
|
100
|
+
path = coerce_path(path)
|
|
101
|
+
pyreadr = get_dependency('pyreadr', format_name='RDA')
|
|
102
|
+
pandas = get_pandas('RDA')
|
|
103
|
+
records = normalize_records(data, 'RDA')
|
|
104
|
+
frame = pandas.DataFrame.from_records(records)
|
|
105
|
+
count = len(records)
|
|
106
|
+
|
|
107
|
+
writer = getattr(pyreadr, 'write_rdata', None) or getattr(
|
|
108
|
+
pyreadr,
|
|
109
|
+
'write_rda',
|
|
110
|
+
None,
|
|
111
|
+
)
|
|
112
|
+
if writer is None:
|
|
113
|
+
raise ImportError(
|
|
114
|
+
'RDA write support requires "pyreadr" with write_rdata().',
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
ensure_parent_dir(path)
|
|
118
|
+
try:
|
|
119
|
+
writer(str(path), frame, df_name='data')
|
|
120
|
+
except TypeError:
|
|
121
|
+
writer(str(path), frame)
|
|
122
|
+
return count
|
etlplus/file/rds.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.rds` module.
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
Helpers for reading/writing R (RDS) data files.
|
|
5
5
|
|
|
6
6
|
Notes
|
|
7
7
|
-----
|
|
@@ -18,11 +18,15 @@ Notes
|
|
|
18
18
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
-
from pathlib import Path
|
|
22
|
-
|
|
23
21
|
from ..types import JSONData
|
|
24
|
-
from ..types import
|
|
25
|
-
from
|
|
22
|
+
from ..types import JSONDict
|
|
23
|
+
from ..types import StrPath
|
|
24
|
+
from ._imports import get_dependency
|
|
25
|
+
from ._imports import get_pandas
|
|
26
|
+
from ._io import coerce_path
|
|
27
|
+
from ._io import ensure_parent_dir
|
|
28
|
+
from ._io import normalize_records
|
|
29
|
+
from ._r import coerce_r_object
|
|
26
30
|
|
|
27
31
|
# SECTION: EXPORTS ========================================================== #
|
|
28
32
|
|
|
@@ -38,26 +42,38 @@ __all__ = [
|
|
|
38
42
|
|
|
39
43
|
|
|
40
44
|
def read(
|
|
41
|
-
path:
|
|
42
|
-
) ->
|
|
45
|
+
path: StrPath,
|
|
46
|
+
) -> JSONData:
|
|
43
47
|
"""
|
|
44
48
|
Read RDS content from *path*.
|
|
45
49
|
|
|
46
50
|
Parameters
|
|
47
51
|
----------
|
|
48
|
-
path :
|
|
52
|
+
path : StrPath
|
|
49
53
|
Path to the RDS file on disk.
|
|
50
54
|
|
|
51
55
|
Returns
|
|
52
56
|
-------
|
|
53
|
-
|
|
54
|
-
The
|
|
57
|
+
JSONData
|
|
58
|
+
The structured data read from the RDS file.
|
|
55
59
|
"""
|
|
56
|
-
|
|
60
|
+
path = coerce_path(path)
|
|
61
|
+
pyreadr = get_dependency('pyreadr', format_name='RDS')
|
|
62
|
+
pandas = get_pandas('RDS')
|
|
63
|
+
result = pyreadr.read_r(str(path))
|
|
64
|
+
if not result:
|
|
65
|
+
return []
|
|
66
|
+
if len(result) == 1:
|
|
67
|
+
value = next(iter(result.values()))
|
|
68
|
+
return coerce_r_object(value, pandas)
|
|
69
|
+
payload: JSONDict = {}
|
|
70
|
+
for key, value in result.items():
|
|
71
|
+
payload[str(key)] = coerce_r_object(value, pandas)
|
|
72
|
+
return payload
|
|
57
73
|
|
|
58
74
|
|
|
59
75
|
def write(
|
|
60
|
-
path:
|
|
76
|
+
path: StrPath,
|
|
61
77
|
data: JSONData,
|
|
62
78
|
) -> int:
|
|
63
79
|
"""
|
|
@@ -65,7 +81,7 @@ def write(
|
|
|
65
81
|
|
|
66
82
|
Parameters
|
|
67
83
|
----------
|
|
68
|
-
path :
|
|
84
|
+
path : StrPath
|
|
69
85
|
Path to the RDS file on disk.
|
|
70
86
|
data : JSONData
|
|
71
87
|
Data to write as RDS file. Should be a list of dictionaries or a
|
|
@@ -75,5 +91,25 @@ def write(
|
|
|
75
91
|
-------
|
|
76
92
|
int
|
|
77
93
|
The number of rows written to the RDS file.
|
|
94
|
+
|
|
95
|
+
Raises
|
|
96
|
+
------
|
|
97
|
+
ImportError
|
|
98
|
+
If "pyreadr" is not installed with write support.
|
|
78
99
|
"""
|
|
79
|
-
|
|
100
|
+
path = coerce_path(path)
|
|
101
|
+
pyreadr = get_dependency('pyreadr', format_name='RDS')
|
|
102
|
+
pandas = get_pandas('RDS')
|
|
103
|
+
records = normalize_records(data, 'RDS')
|
|
104
|
+
frame = pandas.DataFrame.from_records(records)
|
|
105
|
+
count = len(records)
|
|
106
|
+
|
|
107
|
+
writer = getattr(pyreadr, 'write_rds', None)
|
|
108
|
+
if writer is None:
|
|
109
|
+
raise ImportError(
|
|
110
|
+
'RDS write support requires "pyreadr" with write_rds().',
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
ensure_parent_dir(path)
|
|
114
|
+
writer(str(path), frame)
|
|
115
|
+
return count
|