etlplus 0.16.10__py3-none-any.whl → 0.17.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/file/README.md +33 -0
- etlplus/file/arrow.py +35 -5
- etlplus/file/bson.py +50 -5
- etlplus/file/cbor.py +35 -11
- etlplus/file/dat.py +44 -6
- etlplus/file/dta.py +46 -11
- etlplus/file/duckdb.py +159 -5
- etlplus/file/enums.py +29 -0
- etlplus/file/fwf.py +37 -5
- etlplus/file/hdf5.py +41 -3
- etlplus/file/ini.py +62 -12
- etlplus/file/msgpack.py +33 -9
- etlplus/file/nc.py +55 -6
- etlplus/file/ods.py +39 -6
- etlplus/file/pb.py +32 -19
- etlplus/file/properties.py +52 -12
- etlplus/file/proto.py +24 -12
- etlplus/file/psv.py +5 -5
- etlplus/file/rda.py +83 -9
- etlplus/file/rds.py +76 -8
- etlplus/file/sas7bdat.py +41 -11
- etlplus/file/sav.py +40 -11
- etlplus/file/sqlite.py +123 -5
- etlplus/file/tab.py +6 -7
- etlplus/file/toml.py +54 -12
- etlplus/file/xlsm.py +39 -5
- etlplus/file/xpt.py +61 -5
- {etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/METADATA +44 -26
- {etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/RECORD +33 -33
- {etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/WHEEL +0 -0
- {etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/entry_points.txt +0 -0
- {etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/top_level.txt +0 -0
etlplus/file/properties.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.properties` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
yet).
|
|
4
|
+
Helpers for reading/writing properties (PROPERTIES) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -20,10 +19,10 @@ Notes
|
|
|
20
19
|
from __future__ import annotations
|
|
21
20
|
|
|
22
21
|
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
23
|
|
|
24
24
|
from ..types import JSONData
|
|
25
|
-
from ..types import
|
|
26
|
-
from . import stub
|
|
25
|
+
from ..types import JSONDict
|
|
27
26
|
|
|
28
27
|
# SECTION: EXPORTS ========================================================== #
|
|
29
28
|
|
|
@@ -35,12 +34,22 @@ __all__ = [
|
|
|
35
34
|
]
|
|
36
35
|
|
|
37
36
|
|
|
37
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _stringify(value: Any) -> str:
|
|
41
|
+
"""Normalize properties values into strings."""
|
|
42
|
+
if value is None:
|
|
43
|
+
return ''
|
|
44
|
+
return str(value)
|
|
45
|
+
|
|
46
|
+
|
|
38
47
|
# SECTION: FUNCTIONS ======================================================== #
|
|
39
48
|
|
|
40
49
|
|
|
41
50
|
def read(
|
|
42
51
|
path: Path,
|
|
43
|
-
) ->
|
|
52
|
+
) -> JSONData:
|
|
44
53
|
"""
|
|
45
54
|
Read PROPERTIES content from *path*.
|
|
46
55
|
|
|
@@ -51,10 +60,28 @@ def read(
|
|
|
51
60
|
|
|
52
61
|
Returns
|
|
53
62
|
-------
|
|
54
|
-
|
|
55
|
-
The
|
|
63
|
+
JSONData
|
|
64
|
+
The structured data read from the PROPERTIES file.
|
|
56
65
|
"""
|
|
57
|
-
|
|
66
|
+
payload: JSONDict = {}
|
|
67
|
+
for line in path.read_text(encoding='utf-8').splitlines():
|
|
68
|
+
stripped = line.strip()
|
|
69
|
+
if not stripped or stripped.startswith(('#', '!')):
|
|
70
|
+
continue
|
|
71
|
+
separator_index = -1
|
|
72
|
+
for sep in ('=', ':'):
|
|
73
|
+
if sep in stripped:
|
|
74
|
+
separator_index = stripped.find(sep)
|
|
75
|
+
break
|
|
76
|
+
if separator_index == -1:
|
|
77
|
+
key = stripped
|
|
78
|
+
value = ''
|
|
79
|
+
else:
|
|
80
|
+
key = stripped[:separator_index].strip()
|
|
81
|
+
value = stripped[separator_index + 1:].strip()
|
|
82
|
+
if key:
|
|
83
|
+
payload[key] = value
|
|
84
|
+
return payload
|
|
58
85
|
|
|
59
86
|
|
|
60
87
|
def write(
|
|
@@ -69,12 +96,25 @@ def write(
|
|
|
69
96
|
path : Path
|
|
70
97
|
Path to the PROPERTIES file on disk.
|
|
71
98
|
data : JSONData
|
|
72
|
-
Data to write as PROPERTIES. Should be a
|
|
73
|
-
single dictionary.
|
|
99
|
+
Data to write as PROPERTIES. Should be a dictionary.
|
|
74
100
|
|
|
75
101
|
Returns
|
|
76
102
|
-------
|
|
77
103
|
int
|
|
78
|
-
The number of
|
|
104
|
+
The number of records written to the PROPERTIES file.
|
|
105
|
+
|
|
106
|
+
Raises
|
|
107
|
+
------
|
|
108
|
+
TypeError
|
|
109
|
+
If *data* is not a dictionary.
|
|
79
110
|
"""
|
|
80
|
-
|
|
111
|
+
if isinstance(data, list):
|
|
112
|
+
raise TypeError('PROPERTIES payloads must be a dict')
|
|
113
|
+
if not isinstance(data, dict):
|
|
114
|
+
raise TypeError('PROPERTIES payloads must be a dict')
|
|
115
|
+
|
|
116
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
117
|
+
with path.open('w', encoding='utf-8', newline='') as handle:
|
|
118
|
+
for key in sorted(data.keys()):
|
|
119
|
+
handle.write(f'{key}={_stringify(data[key])}\n')
|
|
120
|
+
return 1
|
etlplus/file/proto.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.proto` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
implemented yet).
|
|
4
|
+
Helpers for reading/writing Protocol Buffers schema (PROTO) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -21,8 +20,6 @@ from __future__ import annotations
|
|
|
21
20
|
from pathlib import Path
|
|
22
21
|
|
|
23
22
|
from ..types import JSONData
|
|
24
|
-
from ..types import JSONList
|
|
25
|
-
from . import stub
|
|
26
23
|
|
|
27
24
|
# SECTION: EXPORTS ========================================================== #
|
|
28
25
|
|
|
@@ -39,7 +36,7 @@ __all__ = [
|
|
|
39
36
|
|
|
40
37
|
def read(
|
|
41
38
|
path: Path,
|
|
42
|
-
) ->
|
|
39
|
+
) -> JSONData:
|
|
43
40
|
"""
|
|
44
41
|
Read PROTO content from *path*.
|
|
45
42
|
|
|
@@ -50,10 +47,10 @@ def read(
|
|
|
50
47
|
|
|
51
48
|
Returns
|
|
52
49
|
-------
|
|
53
|
-
|
|
54
|
-
The
|
|
50
|
+
JSONData
|
|
51
|
+
The structured data read from the PROTO file.
|
|
55
52
|
"""
|
|
56
|
-
return
|
|
53
|
+
return {'schema': path.read_text(encoding='utf-8')}
|
|
57
54
|
|
|
58
55
|
|
|
59
56
|
def write(
|
|
@@ -68,12 +65,27 @@ def write(
|
|
|
68
65
|
path : Path
|
|
69
66
|
Path to the PROTO file on disk.
|
|
70
67
|
data : JSONData
|
|
71
|
-
Data to write as PROTO. Should be a
|
|
72
|
-
single dictionary.
|
|
68
|
+
Data to write as PROTO. Should be a dictionary with ``schema``.
|
|
73
69
|
|
|
74
70
|
Returns
|
|
75
71
|
-------
|
|
76
72
|
int
|
|
77
|
-
The number of
|
|
73
|
+
The number of records written to the PROTO file.
|
|
74
|
+
|
|
75
|
+
Raises
|
|
76
|
+
------
|
|
77
|
+
TypeError
|
|
78
|
+
If *data* is not a dictionary or is missing a ``schema`` string.
|
|
78
79
|
"""
|
|
79
|
-
|
|
80
|
+
if isinstance(data, list):
|
|
81
|
+
raise TypeError('PROTO payloads must be a dict')
|
|
82
|
+
if not isinstance(data, dict):
|
|
83
|
+
raise TypeError('PROTO payloads must be a dict')
|
|
84
|
+
|
|
85
|
+
schema = data.get('schema')
|
|
86
|
+
if not isinstance(schema, str):
|
|
87
|
+
raise TypeError('PROTO payloads must include a "schema" string')
|
|
88
|
+
|
|
89
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
90
|
+
path.write_text(schema, encoding='utf-8')
|
|
91
|
+
return 1
|
etlplus/file/psv.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.psv` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
implemented yet).
|
|
4
|
+
Helpers for reading/writing Pipe-Separated Values (PSV) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -24,7 +23,8 @@ from pathlib import Path
|
|
|
24
23
|
|
|
25
24
|
from ..types import JSONData
|
|
26
25
|
from ..types import JSONList
|
|
27
|
-
from . import
|
|
26
|
+
from ._io import read_delimited
|
|
27
|
+
from ._io import write_delimited
|
|
28
28
|
|
|
29
29
|
# SECTION: EXPORTS ========================================================== #
|
|
30
30
|
|
|
@@ -55,7 +55,7 @@ def read(
|
|
|
55
55
|
JSONList
|
|
56
56
|
The list of dictionaries read from the PSV file.
|
|
57
57
|
"""
|
|
58
|
-
return
|
|
58
|
+
return read_delimited(path, delimiter='|')
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
def write(
|
|
@@ -78,4 +78,4 @@ def write(
|
|
|
78
78
|
int
|
|
79
79
|
The number of rows written to the PSV file.
|
|
80
80
|
"""
|
|
81
|
-
return
|
|
81
|
+
return write_delimited(path, data, delimiter='|')
|
etlplus/file/rda.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.rda` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
implemented yet).
|
|
4
|
+
Helpers for reading/writing RData workspace/object bundle (RDA) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -20,10 +19,13 @@ Notes
|
|
|
20
19
|
from __future__ import annotations
|
|
21
20
|
|
|
22
21
|
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
23
|
|
|
24
24
|
from ..types import JSONData
|
|
25
|
-
from ..types import
|
|
26
|
-
from . import
|
|
25
|
+
from ..types import JSONDict
|
|
26
|
+
from ._imports import get_optional_module
|
|
27
|
+
from ._imports import get_pandas
|
|
28
|
+
from ._io import normalize_records
|
|
27
29
|
|
|
28
30
|
# SECTION: EXPORTS ========================================================== #
|
|
29
31
|
|
|
@@ -35,12 +37,38 @@ __all__ = [
|
|
|
35
37
|
]
|
|
36
38
|
|
|
37
39
|
|
|
40
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _get_pyreadr() -> Any:
|
|
44
|
+
"""Return the pyreadr module, importing it on first use."""
|
|
45
|
+
return get_optional_module(
|
|
46
|
+
'pyreadr',
|
|
47
|
+
error_message=(
|
|
48
|
+
'RDA support requires optional dependency "pyreadr".\n'
|
|
49
|
+
'Install with: pip install pyreadr'
|
|
50
|
+
),
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _coerce_r_object(value: Any, pandas: Any) -> JSONData:
|
|
55
|
+
if isinstance(value, pandas.DataFrame):
|
|
56
|
+
return value.to_dict(orient='records')
|
|
57
|
+
if isinstance(value, dict):
|
|
58
|
+
return value
|
|
59
|
+
if isinstance(value, list) and all(
|
|
60
|
+
isinstance(item, dict) for item in value
|
|
61
|
+
):
|
|
62
|
+
return value
|
|
63
|
+
return {'value': value}
|
|
64
|
+
|
|
65
|
+
|
|
38
66
|
# SECTION: FUNCTIONS ======================================================== #
|
|
39
67
|
|
|
40
68
|
|
|
41
69
|
def read(
|
|
42
70
|
path: Path,
|
|
43
|
-
) ->
|
|
71
|
+
) -> JSONData:
|
|
44
72
|
"""
|
|
45
73
|
Read RDA content from *path*.
|
|
46
74
|
|
|
@@ -51,10 +79,21 @@ def read(
|
|
|
51
79
|
|
|
52
80
|
Returns
|
|
53
81
|
-------
|
|
54
|
-
|
|
55
|
-
The
|
|
82
|
+
JSONData
|
|
83
|
+
The structured data read from the RDA file.
|
|
56
84
|
"""
|
|
57
|
-
|
|
85
|
+
pyreadr = _get_pyreadr()
|
|
86
|
+
pandas = get_pandas('RDA')
|
|
87
|
+
result = pyreadr.read_r(str(path))
|
|
88
|
+
if not result:
|
|
89
|
+
return []
|
|
90
|
+
if len(result) == 1:
|
|
91
|
+
value = next(iter(result.values()))
|
|
92
|
+
return _coerce_r_object(value, pandas)
|
|
93
|
+
payload: JSONDict = {}
|
|
94
|
+
for key, value in result.items():
|
|
95
|
+
payload[str(key)] = _coerce_r_object(value, pandas)
|
|
96
|
+
return payload
|
|
58
97
|
|
|
59
98
|
|
|
60
99
|
def write(
|
|
@@ -76,5 +115,40 @@ def write(
|
|
|
76
115
|
-------
|
|
77
116
|
int
|
|
78
117
|
The number of rows written to the RDA file.
|
|
118
|
+
|
|
119
|
+
Raises
|
|
120
|
+
------
|
|
121
|
+
ImportError
|
|
122
|
+
If "pyreadr" is not installed with write support.
|
|
123
|
+
TypeError
|
|
124
|
+
If *data* is not a dictionary or list of dictionaries.
|
|
79
125
|
"""
|
|
80
|
-
|
|
126
|
+
pyreadr = _get_pyreadr()
|
|
127
|
+
pandas = get_pandas('RDA')
|
|
128
|
+
|
|
129
|
+
if isinstance(data, list):
|
|
130
|
+
records = normalize_records(data, 'RDA')
|
|
131
|
+
frame = pandas.DataFrame.from_records(records)
|
|
132
|
+
count = len(records)
|
|
133
|
+
elif isinstance(data, dict):
|
|
134
|
+
frame = pandas.DataFrame.from_records([data])
|
|
135
|
+
count = 1
|
|
136
|
+
else:
|
|
137
|
+
raise TypeError('RDA payloads must be a dict or list of dicts')
|
|
138
|
+
|
|
139
|
+
writer = getattr(pyreadr, 'write_rdata', None) or getattr(
|
|
140
|
+
pyreadr,
|
|
141
|
+
'write_rda',
|
|
142
|
+
None,
|
|
143
|
+
)
|
|
144
|
+
if writer is None:
|
|
145
|
+
raise ImportError(
|
|
146
|
+
'RDA write support requires "pyreadr" with write_rdata().',
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
150
|
+
try:
|
|
151
|
+
writer(str(path), frame, df_name='data')
|
|
152
|
+
except TypeError:
|
|
153
|
+
writer(str(path), frame)
|
|
154
|
+
return count
|
etlplus/file/rds.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.rds` module.
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
Helpers for reading/writing R (RDS) data files.
|
|
5
5
|
|
|
6
6
|
Notes
|
|
7
7
|
-----
|
|
@@ -19,10 +19,13 @@ Notes
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
21
|
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
22
23
|
|
|
23
24
|
from ..types import JSONData
|
|
24
|
-
from ..types import
|
|
25
|
-
from . import
|
|
25
|
+
from ..types import JSONDict
|
|
26
|
+
from ._imports import get_optional_module
|
|
27
|
+
from ._imports import get_pandas
|
|
28
|
+
from ._io import normalize_records
|
|
26
29
|
|
|
27
30
|
# SECTION: EXPORTS ========================================================== #
|
|
28
31
|
|
|
@@ -34,12 +37,38 @@ __all__ = [
|
|
|
34
37
|
]
|
|
35
38
|
|
|
36
39
|
|
|
40
|
+
# SECTION: INTERNAL HELPERS ================================================ #
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _get_pyreadr() -> Any:
|
|
44
|
+
"""Return the pyreadr module, importing it on first use."""
|
|
45
|
+
return get_optional_module(
|
|
46
|
+
'pyreadr',
|
|
47
|
+
error_message=(
|
|
48
|
+
'RDS support requires optional dependency "pyreadr".\n'
|
|
49
|
+
'Install with: pip install pyreadr'
|
|
50
|
+
),
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _coerce_r_object(value: Any, pandas: Any) -> JSONData:
|
|
55
|
+
if isinstance(value, pandas.DataFrame):
|
|
56
|
+
return value.to_dict(orient='records')
|
|
57
|
+
if isinstance(value, dict):
|
|
58
|
+
return value
|
|
59
|
+
if isinstance(value, list) and all(
|
|
60
|
+
isinstance(item, dict) for item in value
|
|
61
|
+
):
|
|
62
|
+
return value
|
|
63
|
+
return {'value': value}
|
|
64
|
+
|
|
65
|
+
|
|
37
66
|
# SECTION: FUNCTIONS ======================================================== #
|
|
38
67
|
|
|
39
68
|
|
|
40
69
|
def read(
|
|
41
70
|
path: Path,
|
|
42
|
-
) ->
|
|
71
|
+
) -> JSONData:
|
|
43
72
|
"""
|
|
44
73
|
Read RDS content from *path*.
|
|
45
74
|
|
|
@@ -50,10 +79,21 @@ def read(
|
|
|
50
79
|
|
|
51
80
|
Returns
|
|
52
81
|
-------
|
|
53
|
-
|
|
54
|
-
The
|
|
82
|
+
JSONData
|
|
83
|
+
The structured data read from the RDS file.
|
|
55
84
|
"""
|
|
56
|
-
|
|
85
|
+
pyreadr = _get_pyreadr()
|
|
86
|
+
pandas = get_pandas('RDS')
|
|
87
|
+
result = pyreadr.read_r(str(path))
|
|
88
|
+
if not result:
|
|
89
|
+
return []
|
|
90
|
+
if len(result) == 1:
|
|
91
|
+
value = next(iter(result.values()))
|
|
92
|
+
return _coerce_r_object(value, pandas)
|
|
93
|
+
payload: JSONDict = {}
|
|
94
|
+
for key, value in result.items():
|
|
95
|
+
payload[str(key)] = _coerce_r_object(value, pandas)
|
|
96
|
+
return payload
|
|
57
97
|
|
|
58
98
|
|
|
59
99
|
def write(
|
|
@@ -75,5 +115,33 @@ def write(
|
|
|
75
115
|
-------
|
|
76
116
|
int
|
|
77
117
|
The number of rows written to the RDS file.
|
|
118
|
+
|
|
119
|
+
Raises
|
|
120
|
+
------
|
|
121
|
+
ImportError
|
|
122
|
+
If "pyreadr" is not installed with write support.
|
|
123
|
+
TypeError
|
|
124
|
+
If *data* is not a dictionary or list of dictionaries.
|
|
78
125
|
"""
|
|
79
|
-
|
|
126
|
+
pyreadr = _get_pyreadr()
|
|
127
|
+
pandas = get_pandas('RDS')
|
|
128
|
+
|
|
129
|
+
if isinstance(data, list):
|
|
130
|
+
records = normalize_records(data, 'RDS')
|
|
131
|
+
frame = pandas.DataFrame.from_records(records)
|
|
132
|
+
count = len(records)
|
|
133
|
+
elif isinstance(data, dict):
|
|
134
|
+
frame = pandas.DataFrame.from_records([data])
|
|
135
|
+
count = 1
|
|
136
|
+
else:
|
|
137
|
+
raise TypeError('RDS payloads must be a dict or list of dicts')
|
|
138
|
+
|
|
139
|
+
writer = getattr(pyreadr, 'write_rds', None)
|
|
140
|
+
if writer is None:
|
|
141
|
+
raise ImportError(
|
|
142
|
+
'RDS write support requires "pyreadr" with write_rds().',
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
146
|
+
writer(str(path), frame)
|
|
147
|
+
return count
|
etlplus/file/sas7bdat.py
CHANGED
|
@@ -1,29 +1,31 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.sas7bdat` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
yet).
|
|
4
|
+
Helpers for reading/writing SAS (SAS7BDAT) data files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
9
|
-
- A SAS7BDAT file is a binary file format
|
|
10
|
-
including variables, labels, and data types.
|
|
8
|
+
- A SAS7BDAT file is a proprietary binary file format created by SAS to store
|
|
9
|
+
datasets, including variables, labels, and data types.
|
|
11
10
|
- Common cases:
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
- Custom formats specific to certain applications.
|
|
11
|
+
- Statistical analysis pipelines.
|
|
12
|
+
- Data exchange with SAS tooling.
|
|
15
13
|
- Rule of thumb:
|
|
16
|
-
- If the file
|
|
17
|
-
|
|
14
|
+
- If the file follows the SAS7BDAT specification, use this module for
|
|
15
|
+
reading and writing.
|
|
18
16
|
"""
|
|
19
17
|
|
|
20
18
|
from __future__ import annotations
|
|
21
19
|
|
|
22
20
|
from pathlib import Path
|
|
21
|
+
from typing import Any
|
|
22
|
+
from typing import cast
|
|
23
23
|
|
|
24
24
|
from ..types import JSONData
|
|
25
25
|
from ..types import JSONList
|
|
26
26
|
from . import stub
|
|
27
|
+
from ._imports import get_optional_module
|
|
28
|
+
from ._imports import get_pandas
|
|
27
29
|
|
|
28
30
|
# SECTION: EXPORTS ========================================================== #
|
|
29
31
|
|
|
@@ -35,6 +37,27 @@ __all__ = [
|
|
|
35
37
|
]
|
|
36
38
|
|
|
37
39
|
|
|
40
|
+
# SECTION: INTERNAL HELPERS ================================================ #
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _get_pyreadstat() -> Any:
|
|
44
|
+
"""Return the pyreadstat module, importing it on first use."""
|
|
45
|
+
return get_optional_module(
|
|
46
|
+
'pyreadstat',
|
|
47
|
+
error_message=(
|
|
48
|
+
'SAS7BDAT support requires optional dependency "pyreadstat".\n'
|
|
49
|
+
'Install with: pip install pyreadstat'
|
|
50
|
+
),
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _raise_readstat_error(err: ImportError) -> None:
|
|
55
|
+
raise ImportError(
|
|
56
|
+
'SAS7BDAT support requires optional dependency "pyreadstat".\n'
|
|
57
|
+
'Install with: pip install pyreadstat',
|
|
58
|
+
) from err
|
|
59
|
+
|
|
60
|
+
|
|
38
61
|
# SECTION: FUNCTIONS ======================================================== #
|
|
39
62
|
|
|
40
63
|
|
|
@@ -42,7 +65,7 @@ def read(
|
|
|
42
65
|
path: Path,
|
|
43
66
|
) -> JSONList:
|
|
44
67
|
"""
|
|
45
|
-
Read
|
|
68
|
+
Read SAS7BDAT content from *path*.
|
|
46
69
|
|
|
47
70
|
Parameters
|
|
48
71
|
----------
|
|
@@ -54,7 +77,14 @@ def read(
|
|
|
54
77
|
JSONList
|
|
55
78
|
The list of dictionaries read from the SAS7BDAT file.
|
|
56
79
|
"""
|
|
57
|
-
|
|
80
|
+
pandas = get_pandas('SAS7BDAT')
|
|
81
|
+
try:
|
|
82
|
+
frame = pandas.read_sas(path, format='sas7bdat')
|
|
83
|
+
except TypeError:
|
|
84
|
+
frame = pandas.read_sas(path)
|
|
85
|
+
except ImportError as err: # pragma: no cover
|
|
86
|
+
_raise_readstat_error(err)
|
|
87
|
+
return cast(JSONList, frame.to_dict(orient='records'))
|
|
58
88
|
|
|
59
89
|
|
|
60
90
|
def write(
|
etlplus/file/sav.py
CHANGED
|
@@ -1,27 +1,31 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.sav` module.
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
Helpers for reading/writing SPSS (SAV) files.
|
|
5
5
|
|
|
6
6
|
Notes
|
|
7
7
|
-----
|
|
8
|
-
- A SAV file is a
|
|
9
|
-
variables, labels, and data types.
|
|
8
|
+
- A SAV file is a dataset created by SPSS.
|
|
10
9
|
- Common cases:
|
|
11
|
-
-
|
|
12
|
-
-
|
|
10
|
+
- Survey and market research datasets.
|
|
11
|
+
- Statistical analysis workflows.
|
|
12
|
+
- Exchange with SPSS and compatible tools.
|
|
13
13
|
- Rule of thumb:
|
|
14
|
-
- If
|
|
14
|
+
- If the file follows the SAV specification, use this module for reading
|
|
15
15
|
and writing.
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
20
|
from pathlib import Path
|
|
21
|
+
from typing import Any
|
|
22
|
+
from typing import cast
|
|
21
23
|
|
|
22
24
|
from ..types import JSONData
|
|
23
25
|
from ..types import JSONList
|
|
24
|
-
from . import
|
|
26
|
+
from ._imports import get_optional_module
|
|
27
|
+
from ._imports import get_pandas
|
|
28
|
+
from ._io import normalize_records
|
|
25
29
|
|
|
26
30
|
# SECTION: EXPORTS ========================================================== #
|
|
27
31
|
|
|
@@ -33,6 +37,20 @@ __all__ = [
|
|
|
33
37
|
]
|
|
34
38
|
|
|
35
39
|
|
|
40
|
+
# SECTION: INTERNAL FUNCTION ================================================ #
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _get_pyreadstat() -> Any:
|
|
44
|
+
"""Return the pyreadstat module, importing it on first use."""
|
|
45
|
+
return get_optional_module(
|
|
46
|
+
'pyreadstat',
|
|
47
|
+
error_message=(
|
|
48
|
+
'SAV support requires optional dependency "pyreadstat".\n'
|
|
49
|
+
'Install with: pip install pyreadstat'
|
|
50
|
+
),
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
36
54
|
# SECTION: FUNCTIONS ======================================================== #
|
|
37
55
|
|
|
38
56
|
|
|
@@ -52,7 +70,9 @@ def read(
|
|
|
52
70
|
JSONList
|
|
53
71
|
The list of dictionaries read from the SAV file.
|
|
54
72
|
"""
|
|
55
|
-
|
|
73
|
+
pyreadstat = _get_pyreadstat()
|
|
74
|
+
frame, _meta = pyreadstat.read_sav(str(path))
|
|
75
|
+
return cast(JSONList, frame.to_dict(orient='records'))
|
|
56
76
|
|
|
57
77
|
|
|
58
78
|
def write(
|
|
@@ -60,14 +80,14 @@ def write(
|
|
|
60
80
|
data: JSONData,
|
|
61
81
|
) -> int:
|
|
62
82
|
"""
|
|
63
|
-
Write *data* to SAV
|
|
83
|
+
Write *data* to SAV at *path* and return record count.
|
|
64
84
|
|
|
65
85
|
Parameters
|
|
66
86
|
----------
|
|
67
87
|
path : Path
|
|
68
88
|
Path to the SAV file on disk.
|
|
69
89
|
data : JSONData
|
|
70
|
-
Data to write as SAV
|
|
90
|
+
Data to write as SAV. Should be a list of dictionaries or a
|
|
71
91
|
single dictionary.
|
|
72
92
|
|
|
73
93
|
Returns
|
|
@@ -75,4 +95,13 @@ def write(
|
|
|
75
95
|
int
|
|
76
96
|
The number of rows written to the SAV file.
|
|
77
97
|
"""
|
|
78
|
-
|
|
98
|
+
records = normalize_records(data, 'SAV')
|
|
99
|
+
if not records:
|
|
100
|
+
return 0
|
|
101
|
+
|
|
102
|
+
pyreadstat = _get_pyreadstat()
|
|
103
|
+
pandas = get_pandas('SAV')
|
|
104
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
105
|
+
frame = pandas.DataFrame.from_records(records)
|
|
106
|
+
pyreadstat.write_sav(frame, str(path))
|
|
107
|
+
return len(records)
|