etlplus 0.17.2__py3-none-any.whl → 0.17.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/file/_imports.py +35 -20
- etlplus/file/_io.py +138 -15
- etlplus/file/_r.py +48 -0
- etlplus/file/_sql.py +224 -0
- etlplus/file/accdb.py +7 -6
- etlplus/file/arrow.py +13 -24
- etlplus/file/avro.py +13 -10
- etlplus/file/bson.py +61 -22
- etlplus/file/cbor.py +13 -25
- etlplus/file/cfg.py +7 -6
- etlplus/file/conf.py +7 -6
- etlplus/file/core.py +1 -1
- etlplus/file/csv.py +8 -7
- etlplus/file/dat.py +9 -6
- etlplus/file/dta.py +15 -30
- etlplus/file/duckdb.py +29 -122
- etlplus/file/feather.py +15 -30
- etlplus/file/fwf.py +16 -14
- etlplus/file/gz.py +12 -7
- etlplus/file/hbs.py +7 -6
- etlplus/file/hdf5.py +31 -6
- etlplus/file/ini.py +17 -24
- etlplus/file/ion.py +7 -6
- etlplus/file/jinja2.py +7 -6
- etlplus/file/json.py +10 -11
- etlplus/file/log.py +7 -6
- etlplus/file/mat.py +7 -6
- etlplus/file/mdb.py +7 -6
- etlplus/file/msgpack.py +13 -25
- etlplus/file/mustache.py +7 -6
- etlplus/file/nc.py +30 -21
- etlplus/file/ndjson.py +10 -6
- etlplus/file/numbers.py +7 -6
- etlplus/file/ods.py +10 -6
- etlplus/file/orc.py +15 -30
- etlplus/file/parquet.py +10 -6
- etlplus/file/pb.py +22 -23
- etlplus/file/pbf.py +7 -6
- etlplus/file/properties.py +15 -29
- etlplus/file/proto.py +14 -20
- etlplus/file/psv.py +8 -7
- etlplus/file/rda.py +19 -51
- etlplus/file/rds.py +19 -51
- etlplus/file/sas7bdat.py +10 -30
- etlplus/file/sav.py +13 -24
- etlplus/file/sqlite.py +25 -83
- etlplus/file/stub.py +8 -6
- etlplus/file/sylk.py +7 -6
- etlplus/file/tab.py +8 -7
- etlplus/file/toml.py +14 -17
- etlplus/file/tsv.py +8 -7
- etlplus/file/txt.py +10 -7
- etlplus/file/vm.py +7 -6
- etlplus/file/wks.py +7 -6
- etlplus/file/xls.py +8 -5
- etlplus/file/xlsm.py +10 -6
- etlplus/file/xlsx.py +10 -6
- etlplus/file/xml.py +11 -9
- etlplus/file/xpt.py +13 -33
- etlplus/file/yaml.py +10 -11
- etlplus/file/zip.py +10 -5
- etlplus/file/zsav.py +7 -6
- {etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/METADATA +1 -1
- {etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/RECORD +68 -66
- {etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/WHEEL +0 -0
- {etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/entry_points.txt +0 -0
- {etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/top_level.txt +0 -0
etlplus/file/properties.py
CHANGED
|
@@ -18,11 +18,13 @@ Notes
|
|
|
18
18
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
-
from pathlib import Path
|
|
22
|
-
from typing import Any
|
|
23
|
-
|
|
24
21
|
from ..types import JSONData
|
|
25
22
|
from ..types import JSONDict
|
|
23
|
+
from ..types import StrPath
|
|
24
|
+
from ._io import coerce_path
|
|
25
|
+
from ._io import ensure_parent_dir
|
|
26
|
+
from ._io import require_dict_payload
|
|
27
|
+
from ._io import stringify_value
|
|
26
28
|
|
|
27
29
|
# SECTION: EXPORTS ========================================================== #
|
|
28
30
|
|
|
@@ -34,28 +36,18 @@ __all__ = [
|
|
|
34
36
|
]
|
|
35
37
|
|
|
36
38
|
|
|
37
|
-
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def _stringify(value: Any) -> str:
|
|
41
|
-
"""Normalize properties values into strings."""
|
|
42
|
-
if value is None:
|
|
43
|
-
return ''
|
|
44
|
-
return str(value)
|
|
45
|
-
|
|
46
|
-
|
|
47
39
|
# SECTION: FUNCTIONS ======================================================== #
|
|
48
40
|
|
|
49
41
|
|
|
50
42
|
def read(
|
|
51
|
-
path:
|
|
43
|
+
path: StrPath,
|
|
52
44
|
) -> JSONData:
|
|
53
45
|
"""
|
|
54
46
|
Read PROPERTIES content from *path*.
|
|
55
47
|
|
|
56
48
|
Parameters
|
|
57
49
|
----------
|
|
58
|
-
path :
|
|
50
|
+
path : StrPath
|
|
59
51
|
Path to the PROPERTIES file on disk.
|
|
60
52
|
|
|
61
53
|
Returns
|
|
@@ -63,6 +55,7 @@ def read(
|
|
|
63
55
|
JSONData
|
|
64
56
|
The structured data read from the PROPERTIES file.
|
|
65
57
|
"""
|
|
58
|
+
path = coerce_path(path)
|
|
66
59
|
payload: JSONDict = {}
|
|
67
60
|
for line in path.read_text(encoding='utf-8').splitlines():
|
|
68
61
|
stripped = line.strip()
|
|
@@ -85,7 +78,7 @@ def read(
|
|
|
85
78
|
|
|
86
79
|
|
|
87
80
|
def write(
|
|
88
|
-
path:
|
|
81
|
+
path: StrPath,
|
|
89
82
|
data: JSONData,
|
|
90
83
|
) -> int:
|
|
91
84
|
"""
|
|
@@ -93,7 +86,7 @@ def write(
|
|
|
93
86
|
|
|
94
87
|
Parameters
|
|
95
88
|
----------
|
|
96
|
-
path :
|
|
89
|
+
path : StrPath
|
|
97
90
|
Path to the PROPERTIES file on disk.
|
|
98
91
|
data : JSONData
|
|
99
92
|
Data to write as PROPERTIES. Should be a dictionary.
|
|
@@ -102,19 +95,12 @@ def write(
|
|
|
102
95
|
-------
|
|
103
96
|
int
|
|
104
97
|
The number of records written to the PROPERTIES file.
|
|
105
|
-
|
|
106
|
-
Raises
|
|
107
|
-
------
|
|
108
|
-
TypeError
|
|
109
|
-
If *data* is not a dictionary.
|
|
110
98
|
"""
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
if not isinstance(data, dict):
|
|
114
|
-
raise TypeError('PROPERTIES payloads must be a dict')
|
|
99
|
+
path = coerce_path(path)
|
|
100
|
+
payload = require_dict_payload(data, format_name='PROPERTIES')
|
|
115
101
|
|
|
116
|
-
path
|
|
102
|
+
ensure_parent_dir(path)
|
|
117
103
|
with path.open('w', encoding='utf-8', newline='') as handle:
|
|
118
|
-
for key in sorted(
|
|
119
|
-
handle.write(f'{key}={
|
|
104
|
+
for key in sorted(payload.keys()):
|
|
105
|
+
handle.write(f'{key}={stringify_value(payload[key])}\n')
|
|
120
106
|
return 1
|
etlplus/file/proto.py
CHANGED
|
@@ -17,9 +17,12 @@ Notes
|
|
|
17
17
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
|
-
from pathlib import Path
|
|
21
|
-
|
|
22
20
|
from ..types import JSONData
|
|
21
|
+
from ..types import StrPath
|
|
22
|
+
from ._io import coerce_path
|
|
23
|
+
from ._io import ensure_parent_dir
|
|
24
|
+
from ._io import require_dict_payload
|
|
25
|
+
from ._io import require_str_key
|
|
23
26
|
|
|
24
27
|
# SECTION: EXPORTS ========================================================== #
|
|
25
28
|
|
|
@@ -35,14 +38,14 @@ __all__ = [
|
|
|
35
38
|
|
|
36
39
|
|
|
37
40
|
def read(
|
|
38
|
-
path:
|
|
41
|
+
path: StrPath,
|
|
39
42
|
) -> JSONData:
|
|
40
43
|
"""
|
|
41
44
|
Read PROTO content from *path*.
|
|
42
45
|
|
|
43
46
|
Parameters
|
|
44
47
|
----------
|
|
45
|
-
path :
|
|
48
|
+
path : StrPath
|
|
46
49
|
Path to the PROTO file on disk.
|
|
47
50
|
|
|
48
51
|
Returns
|
|
@@ -50,11 +53,12 @@ def read(
|
|
|
50
53
|
JSONData
|
|
51
54
|
The structured data read from the PROTO file.
|
|
52
55
|
"""
|
|
56
|
+
path = coerce_path(path)
|
|
53
57
|
return {'schema': path.read_text(encoding='utf-8')}
|
|
54
58
|
|
|
55
59
|
|
|
56
60
|
def write(
|
|
57
|
-
path:
|
|
61
|
+
path: StrPath,
|
|
58
62
|
data: JSONData,
|
|
59
63
|
) -> int:
|
|
60
64
|
"""
|
|
@@ -62,7 +66,7 @@ def write(
|
|
|
62
66
|
|
|
63
67
|
Parameters
|
|
64
68
|
----------
|
|
65
|
-
path :
|
|
69
|
+
path : StrPath
|
|
66
70
|
Path to the PROTO file on disk.
|
|
67
71
|
data : JSONData
|
|
68
72
|
Data to write as PROTO. Should be a dictionary with ``schema``.
|
|
@@ -71,21 +75,11 @@ def write(
|
|
|
71
75
|
-------
|
|
72
76
|
int
|
|
73
77
|
The number of records written to the PROTO file.
|
|
74
|
-
|
|
75
|
-
Raises
|
|
76
|
-
------
|
|
77
|
-
TypeError
|
|
78
|
-
If *data* is not a dictionary or is missing a ``schema`` string.
|
|
79
78
|
"""
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
raise TypeError('PROTO payloads must be a dict')
|
|
84
|
-
|
|
85
|
-
schema = data.get('schema')
|
|
86
|
-
if not isinstance(schema, str):
|
|
87
|
-
raise TypeError('PROTO payloads must include a "schema" string')
|
|
79
|
+
path = coerce_path(path)
|
|
80
|
+
payload = require_dict_payload(data, format_name='PROTO')
|
|
81
|
+
schema = require_str_key(payload, format_name='PROTO', key='schema')
|
|
88
82
|
|
|
89
|
-
path
|
|
83
|
+
ensure_parent_dir(path)
|
|
90
84
|
path.write_text(schema, encoding='utf-8')
|
|
91
85
|
return 1
|
etlplus/file/psv.py
CHANGED
|
@@ -19,10 +19,10 @@ Notes
|
|
|
19
19
|
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
|
-
from pathlib import Path
|
|
23
|
-
|
|
24
22
|
from ..types import JSONData
|
|
25
23
|
from ..types import JSONList
|
|
24
|
+
from ..types import StrPath
|
|
25
|
+
from ._io import coerce_path
|
|
26
26
|
from ._io import read_delimited
|
|
27
27
|
from ._io import write_delimited
|
|
28
28
|
|
|
@@ -40,14 +40,14 @@ __all__ = [
|
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
def read(
|
|
43
|
-
path:
|
|
43
|
+
path: StrPath,
|
|
44
44
|
) -> JSONList:
|
|
45
45
|
"""
|
|
46
46
|
Read PSV content from *path*.
|
|
47
47
|
|
|
48
48
|
Parameters
|
|
49
49
|
----------
|
|
50
|
-
path :
|
|
50
|
+
path : StrPath
|
|
51
51
|
Path to the PSV file on disk.
|
|
52
52
|
|
|
53
53
|
Returns
|
|
@@ -59,7 +59,7 @@ def read(
|
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
def write(
|
|
62
|
-
path:
|
|
62
|
+
path: StrPath,
|
|
63
63
|
data: JSONData,
|
|
64
64
|
) -> int:
|
|
65
65
|
"""
|
|
@@ -67,7 +67,7 @@ def write(
|
|
|
67
67
|
|
|
68
68
|
Parameters
|
|
69
69
|
----------
|
|
70
|
-
path :
|
|
70
|
+
path : StrPath
|
|
71
71
|
Path to the PSV file on disk.
|
|
72
72
|
data : JSONData
|
|
73
73
|
Data to write as PSV file. Should be a list of dictionaries or a
|
|
@@ -78,4 +78,5 @@ def write(
|
|
|
78
78
|
int
|
|
79
79
|
The number of rows written to the PSV file.
|
|
80
80
|
"""
|
|
81
|
-
|
|
81
|
+
path = coerce_path(path)
|
|
82
|
+
return write_delimited(path, data, delimiter='|', format_name='PSV')
|
etlplus/file/rda.py
CHANGED
|
@@ -18,14 +18,15 @@ Notes
|
|
|
18
18
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
-
from pathlib import Path
|
|
22
|
-
from typing import Any
|
|
23
|
-
|
|
24
21
|
from ..types import JSONData
|
|
25
22
|
from ..types import JSONDict
|
|
26
|
-
from
|
|
23
|
+
from ..types import StrPath
|
|
24
|
+
from ._imports import get_dependency
|
|
27
25
|
from ._imports import get_pandas
|
|
26
|
+
from ._io import coerce_path
|
|
27
|
+
from ._io import ensure_parent_dir
|
|
28
28
|
from ._io import normalize_records
|
|
29
|
+
from ._r import coerce_r_object
|
|
29
30
|
|
|
30
31
|
# SECTION: EXPORTS ========================================================== #
|
|
31
32
|
|
|
@@ -37,44 +38,18 @@ __all__ = [
|
|
|
37
38
|
]
|
|
38
39
|
|
|
39
40
|
|
|
40
|
-
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def _get_pyreadr() -> Any:
|
|
44
|
-
"""Return the pyreadr module, importing it on first use."""
|
|
45
|
-
return get_optional_module(
|
|
46
|
-
'pyreadr',
|
|
47
|
-
error_message=(
|
|
48
|
-
'RDA support requires optional dependency "pyreadr".\n'
|
|
49
|
-
'Install with: pip install pyreadr'
|
|
50
|
-
),
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def _coerce_r_object(value: Any, pandas: Any) -> JSONData:
|
|
55
|
-
if isinstance(value, pandas.DataFrame):
|
|
56
|
-
return value.to_dict(orient='records')
|
|
57
|
-
if isinstance(value, dict):
|
|
58
|
-
return value
|
|
59
|
-
if isinstance(value, list) and all(
|
|
60
|
-
isinstance(item, dict) for item in value
|
|
61
|
-
):
|
|
62
|
-
return value
|
|
63
|
-
return {'value': value}
|
|
64
|
-
|
|
65
|
-
|
|
66
41
|
# SECTION: FUNCTIONS ======================================================== #
|
|
67
42
|
|
|
68
43
|
|
|
69
44
|
def read(
|
|
70
|
-
path:
|
|
45
|
+
path: StrPath,
|
|
71
46
|
) -> JSONData:
|
|
72
47
|
"""
|
|
73
48
|
Read RDA content from *path*.
|
|
74
49
|
|
|
75
50
|
Parameters
|
|
76
51
|
----------
|
|
77
|
-
path :
|
|
52
|
+
path : StrPath
|
|
78
53
|
Path to the RDA file on disk.
|
|
79
54
|
|
|
80
55
|
Returns
|
|
@@ -82,22 +57,23 @@ def read(
|
|
|
82
57
|
JSONData
|
|
83
58
|
The structured data read from the RDA file.
|
|
84
59
|
"""
|
|
85
|
-
|
|
60
|
+
path = coerce_path(path)
|
|
61
|
+
pyreadr = get_dependency('pyreadr', format_name='RDA')
|
|
86
62
|
pandas = get_pandas('RDA')
|
|
87
63
|
result = pyreadr.read_r(str(path))
|
|
88
64
|
if not result:
|
|
89
65
|
return []
|
|
90
66
|
if len(result) == 1:
|
|
91
67
|
value = next(iter(result.values()))
|
|
92
|
-
return
|
|
68
|
+
return coerce_r_object(value, pandas)
|
|
93
69
|
payload: JSONDict = {}
|
|
94
70
|
for key, value in result.items():
|
|
95
|
-
payload[str(key)] =
|
|
71
|
+
payload[str(key)] = coerce_r_object(value, pandas)
|
|
96
72
|
return payload
|
|
97
73
|
|
|
98
74
|
|
|
99
75
|
def write(
|
|
100
|
-
path:
|
|
76
|
+
path: StrPath,
|
|
101
77
|
data: JSONData,
|
|
102
78
|
) -> int:
|
|
103
79
|
"""
|
|
@@ -105,7 +81,7 @@ def write(
|
|
|
105
81
|
|
|
106
82
|
Parameters
|
|
107
83
|
----------
|
|
108
|
-
path :
|
|
84
|
+
path : StrPath
|
|
109
85
|
Path to the RDA file on disk.
|
|
110
86
|
data : JSONData
|
|
111
87
|
Data to write as RDA file. Should be a list of dictionaries or a
|
|
@@ -120,21 +96,13 @@ def write(
|
|
|
120
96
|
------
|
|
121
97
|
ImportError
|
|
122
98
|
If "pyreadr" is not installed with write support.
|
|
123
|
-
TypeError
|
|
124
|
-
If *data* is not a dictionary or list of dictionaries.
|
|
125
99
|
"""
|
|
126
|
-
|
|
100
|
+
path = coerce_path(path)
|
|
101
|
+
pyreadr = get_dependency('pyreadr', format_name='RDA')
|
|
127
102
|
pandas = get_pandas('RDA')
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
frame = pandas.DataFrame.from_records(records)
|
|
132
|
-
count = len(records)
|
|
133
|
-
elif isinstance(data, dict):
|
|
134
|
-
frame = pandas.DataFrame.from_records([data])
|
|
135
|
-
count = 1
|
|
136
|
-
else:
|
|
137
|
-
raise TypeError('RDA payloads must be a dict or list of dicts')
|
|
103
|
+
records = normalize_records(data, 'RDA')
|
|
104
|
+
frame = pandas.DataFrame.from_records(records)
|
|
105
|
+
count = len(records)
|
|
138
106
|
|
|
139
107
|
writer = getattr(pyreadr, 'write_rdata', None) or getattr(
|
|
140
108
|
pyreadr,
|
|
@@ -146,7 +114,7 @@ def write(
|
|
|
146
114
|
'RDA write support requires "pyreadr" with write_rdata().',
|
|
147
115
|
)
|
|
148
116
|
|
|
149
|
-
path
|
|
117
|
+
ensure_parent_dir(path)
|
|
150
118
|
try:
|
|
151
119
|
writer(str(path), frame, df_name='data')
|
|
152
120
|
except TypeError:
|
etlplus/file/rds.py
CHANGED
|
@@ -18,14 +18,15 @@ Notes
|
|
|
18
18
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
-
from pathlib import Path
|
|
22
|
-
from typing import Any
|
|
23
|
-
|
|
24
21
|
from ..types import JSONData
|
|
25
22
|
from ..types import JSONDict
|
|
26
|
-
from
|
|
23
|
+
from ..types import StrPath
|
|
24
|
+
from ._imports import get_dependency
|
|
27
25
|
from ._imports import get_pandas
|
|
26
|
+
from ._io import coerce_path
|
|
27
|
+
from ._io import ensure_parent_dir
|
|
28
28
|
from ._io import normalize_records
|
|
29
|
+
from ._r import coerce_r_object
|
|
29
30
|
|
|
30
31
|
# SECTION: EXPORTS ========================================================== #
|
|
31
32
|
|
|
@@ -37,44 +38,18 @@ __all__ = [
|
|
|
37
38
|
]
|
|
38
39
|
|
|
39
40
|
|
|
40
|
-
# SECTION: INTERNAL HELPERS ================================================ #
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def _get_pyreadr() -> Any:
|
|
44
|
-
"""Return the pyreadr module, importing it on first use."""
|
|
45
|
-
return get_optional_module(
|
|
46
|
-
'pyreadr',
|
|
47
|
-
error_message=(
|
|
48
|
-
'RDS support requires optional dependency "pyreadr".\n'
|
|
49
|
-
'Install with: pip install pyreadr'
|
|
50
|
-
),
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def _coerce_r_object(value: Any, pandas: Any) -> JSONData:
|
|
55
|
-
if isinstance(value, pandas.DataFrame):
|
|
56
|
-
return value.to_dict(orient='records')
|
|
57
|
-
if isinstance(value, dict):
|
|
58
|
-
return value
|
|
59
|
-
if isinstance(value, list) and all(
|
|
60
|
-
isinstance(item, dict) for item in value
|
|
61
|
-
):
|
|
62
|
-
return value
|
|
63
|
-
return {'value': value}
|
|
64
|
-
|
|
65
|
-
|
|
66
41
|
# SECTION: FUNCTIONS ======================================================== #
|
|
67
42
|
|
|
68
43
|
|
|
69
44
|
def read(
|
|
70
|
-
path:
|
|
45
|
+
path: StrPath,
|
|
71
46
|
) -> JSONData:
|
|
72
47
|
"""
|
|
73
48
|
Read RDS content from *path*.
|
|
74
49
|
|
|
75
50
|
Parameters
|
|
76
51
|
----------
|
|
77
|
-
path :
|
|
52
|
+
path : StrPath
|
|
78
53
|
Path to the RDS file on disk.
|
|
79
54
|
|
|
80
55
|
Returns
|
|
@@ -82,22 +57,23 @@ def read(
|
|
|
82
57
|
JSONData
|
|
83
58
|
The structured data read from the RDS file.
|
|
84
59
|
"""
|
|
85
|
-
|
|
60
|
+
path = coerce_path(path)
|
|
61
|
+
pyreadr = get_dependency('pyreadr', format_name='RDS')
|
|
86
62
|
pandas = get_pandas('RDS')
|
|
87
63
|
result = pyreadr.read_r(str(path))
|
|
88
64
|
if not result:
|
|
89
65
|
return []
|
|
90
66
|
if len(result) == 1:
|
|
91
67
|
value = next(iter(result.values()))
|
|
92
|
-
return
|
|
68
|
+
return coerce_r_object(value, pandas)
|
|
93
69
|
payload: JSONDict = {}
|
|
94
70
|
for key, value in result.items():
|
|
95
|
-
payload[str(key)] =
|
|
71
|
+
payload[str(key)] = coerce_r_object(value, pandas)
|
|
96
72
|
return payload
|
|
97
73
|
|
|
98
74
|
|
|
99
75
|
def write(
|
|
100
|
-
path:
|
|
76
|
+
path: StrPath,
|
|
101
77
|
data: JSONData,
|
|
102
78
|
) -> int:
|
|
103
79
|
"""
|
|
@@ -105,7 +81,7 @@ def write(
|
|
|
105
81
|
|
|
106
82
|
Parameters
|
|
107
83
|
----------
|
|
108
|
-
path :
|
|
84
|
+
path : StrPath
|
|
109
85
|
Path to the RDS file on disk.
|
|
110
86
|
data : JSONData
|
|
111
87
|
Data to write as RDS file. Should be a list of dictionaries or a
|
|
@@ -120,21 +96,13 @@ def write(
|
|
|
120
96
|
------
|
|
121
97
|
ImportError
|
|
122
98
|
If "pyreadr" is not installed with write support.
|
|
123
|
-
TypeError
|
|
124
|
-
If *data* is not a dictionary or list of dictionaries.
|
|
125
99
|
"""
|
|
126
|
-
|
|
100
|
+
path = coerce_path(path)
|
|
101
|
+
pyreadr = get_dependency('pyreadr', format_name='RDS')
|
|
127
102
|
pandas = get_pandas('RDS')
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
frame = pandas.DataFrame.from_records(records)
|
|
132
|
-
count = len(records)
|
|
133
|
-
elif isinstance(data, dict):
|
|
134
|
-
frame = pandas.DataFrame.from_records([data])
|
|
135
|
-
count = 1
|
|
136
|
-
else:
|
|
137
|
-
raise TypeError('RDS payloads must be a dict or list of dicts')
|
|
103
|
+
records = normalize_records(data, 'RDS')
|
|
104
|
+
frame = pandas.DataFrame.from_records(records)
|
|
105
|
+
count = len(records)
|
|
138
106
|
|
|
139
107
|
writer = getattr(pyreadr, 'write_rds', None)
|
|
140
108
|
if writer is None:
|
|
@@ -142,6 +110,6 @@ def write(
|
|
|
142
110
|
'RDS write support requires "pyreadr" with write_rds().',
|
|
143
111
|
)
|
|
144
112
|
|
|
145
|
-
path
|
|
113
|
+
ensure_parent_dir(path)
|
|
146
114
|
writer(str(path), frame)
|
|
147
115
|
return count
|
etlplus/file/sas7bdat.py
CHANGED
|
@@ -17,15 +17,15 @@ Notes
|
|
|
17
17
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
|
-
from pathlib import Path
|
|
21
|
-
from typing import Any
|
|
22
20
|
from typing import cast
|
|
23
21
|
|
|
24
22
|
from ..types import JSONData
|
|
25
23
|
from ..types import JSONList
|
|
24
|
+
from ..types import StrPath
|
|
26
25
|
from . import stub
|
|
27
|
-
from ._imports import
|
|
26
|
+
from ._imports import get_dependency
|
|
28
27
|
from ._imports import get_pandas
|
|
28
|
+
from ._io import coerce_path
|
|
29
29
|
|
|
30
30
|
# SECTION: EXPORTS ========================================================== #
|
|
31
31
|
|
|
@@ -37,39 +37,18 @@ __all__ = [
|
|
|
37
37
|
]
|
|
38
38
|
|
|
39
39
|
|
|
40
|
-
# SECTION: INTERNAL HELPERS ================================================ #
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def _get_pyreadstat() -> Any:
|
|
44
|
-
"""Return the pyreadstat module, importing it on first use."""
|
|
45
|
-
return get_optional_module(
|
|
46
|
-
'pyreadstat',
|
|
47
|
-
error_message=(
|
|
48
|
-
'SAS7BDAT support requires optional dependency "pyreadstat".\n'
|
|
49
|
-
'Install with: pip install pyreadstat'
|
|
50
|
-
),
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def _raise_readstat_error(err: ImportError) -> None:
|
|
55
|
-
raise ImportError(
|
|
56
|
-
'SAS7BDAT support requires optional dependency "pyreadstat".\n'
|
|
57
|
-
'Install with: pip install pyreadstat',
|
|
58
|
-
) from err
|
|
59
|
-
|
|
60
|
-
|
|
61
40
|
# SECTION: FUNCTIONS ======================================================== #
|
|
62
41
|
|
|
63
42
|
|
|
64
43
|
def read(
|
|
65
|
-
path:
|
|
44
|
+
path: StrPath,
|
|
66
45
|
) -> JSONList:
|
|
67
46
|
"""
|
|
68
47
|
Read SAS7BDAT content from *path*.
|
|
69
48
|
|
|
70
49
|
Parameters
|
|
71
50
|
----------
|
|
72
|
-
path :
|
|
51
|
+
path : StrPath
|
|
73
52
|
Path to the SAS7BDAT file on disk.
|
|
74
53
|
|
|
75
54
|
Returns
|
|
@@ -77,18 +56,18 @@ def read(
|
|
|
77
56
|
JSONList
|
|
78
57
|
The list of dictionaries read from the SAS7BDAT file.
|
|
79
58
|
"""
|
|
59
|
+
path = coerce_path(path)
|
|
60
|
+
get_dependency('pyreadstat', format_name='SAS7BDAT')
|
|
80
61
|
pandas = get_pandas('SAS7BDAT')
|
|
81
62
|
try:
|
|
82
63
|
frame = pandas.read_sas(path, format='sas7bdat')
|
|
83
64
|
except TypeError:
|
|
84
65
|
frame = pandas.read_sas(path)
|
|
85
|
-
except ImportError as err: # pragma: no cover
|
|
86
|
-
_raise_readstat_error(err)
|
|
87
66
|
return cast(JSONList, frame.to_dict(orient='records'))
|
|
88
67
|
|
|
89
68
|
|
|
90
69
|
def write(
|
|
91
|
-
path:
|
|
70
|
+
path: StrPath,
|
|
92
71
|
data: JSONData,
|
|
93
72
|
) -> int:
|
|
94
73
|
"""
|
|
@@ -96,7 +75,7 @@ def write(
|
|
|
96
75
|
|
|
97
76
|
Parameters
|
|
98
77
|
----------
|
|
99
|
-
path :
|
|
78
|
+
path : StrPath
|
|
100
79
|
Path to the SAS7BDAT file on disk.
|
|
101
80
|
data : JSONData
|
|
102
81
|
Data to write as SAS7BDAT file. Should be a list of dictionaries or a
|
|
@@ -107,4 +86,5 @@ def write(
|
|
|
107
86
|
int
|
|
108
87
|
The number of rows written to the SAS7BDAT file.
|
|
109
88
|
"""
|
|
89
|
+
path = coerce_path(path)
|
|
110
90
|
return stub.write(path, data, format_name='SAS7BDAT')
|
etlplus/file/sav.py
CHANGED
|
@@ -17,14 +17,15 @@ Notes
|
|
|
17
17
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
|
-
from pathlib import Path
|
|
21
|
-
from typing import Any
|
|
22
20
|
from typing import cast
|
|
23
21
|
|
|
24
22
|
from ..types import JSONData
|
|
25
23
|
from ..types import JSONList
|
|
26
|
-
from
|
|
24
|
+
from ..types import StrPath
|
|
25
|
+
from ._imports import get_dependency
|
|
27
26
|
from ._imports import get_pandas
|
|
27
|
+
from ._io import coerce_path
|
|
28
|
+
from ._io import ensure_parent_dir
|
|
28
29
|
from ._io import normalize_records
|
|
29
30
|
|
|
30
31
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -37,32 +38,18 @@ __all__ = [
|
|
|
37
38
|
]
|
|
38
39
|
|
|
39
40
|
|
|
40
|
-
# SECTION: INTERNAL FUNCTION ================================================ #
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def _get_pyreadstat() -> Any:
|
|
44
|
-
"""Return the pyreadstat module, importing it on first use."""
|
|
45
|
-
return get_optional_module(
|
|
46
|
-
'pyreadstat',
|
|
47
|
-
error_message=(
|
|
48
|
-
'SAV support requires optional dependency "pyreadstat".\n'
|
|
49
|
-
'Install with: pip install pyreadstat'
|
|
50
|
-
),
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
|
|
54
41
|
# SECTION: FUNCTIONS ======================================================== #
|
|
55
42
|
|
|
56
43
|
|
|
57
44
|
def read(
|
|
58
|
-
path:
|
|
45
|
+
path: StrPath,
|
|
59
46
|
) -> JSONList:
|
|
60
47
|
"""
|
|
61
48
|
Read SAV content from *path*.
|
|
62
49
|
|
|
63
50
|
Parameters
|
|
64
51
|
----------
|
|
65
|
-
path :
|
|
52
|
+
path : StrPath
|
|
66
53
|
Path to the SAV file on disk.
|
|
67
54
|
|
|
68
55
|
Returns
|
|
@@ -70,13 +57,14 @@ def read(
|
|
|
70
57
|
JSONList
|
|
71
58
|
The list of dictionaries read from the SAV file.
|
|
72
59
|
"""
|
|
73
|
-
|
|
60
|
+
path = coerce_path(path)
|
|
61
|
+
pyreadstat = get_dependency('pyreadstat', format_name='SAV')
|
|
74
62
|
frame, _meta = pyreadstat.read_sav(str(path))
|
|
75
63
|
return cast(JSONList, frame.to_dict(orient='records'))
|
|
76
64
|
|
|
77
65
|
|
|
78
66
|
def write(
|
|
79
|
-
path:
|
|
67
|
+
path: StrPath,
|
|
80
68
|
data: JSONData,
|
|
81
69
|
) -> int:
|
|
82
70
|
"""
|
|
@@ -84,7 +72,7 @@ def write(
|
|
|
84
72
|
|
|
85
73
|
Parameters
|
|
86
74
|
----------
|
|
87
|
-
path :
|
|
75
|
+
path : StrPath
|
|
88
76
|
Path to the SAV file on disk.
|
|
89
77
|
data : JSONData
|
|
90
78
|
Data to write as SAV. Should be a list of dictionaries or a
|
|
@@ -95,13 +83,14 @@ def write(
|
|
|
95
83
|
int
|
|
96
84
|
The number of rows written to the SAV file.
|
|
97
85
|
"""
|
|
86
|
+
path = coerce_path(path)
|
|
98
87
|
records = normalize_records(data, 'SAV')
|
|
99
88
|
if not records:
|
|
100
89
|
return 0
|
|
101
90
|
|
|
102
|
-
pyreadstat =
|
|
91
|
+
pyreadstat = get_dependency('pyreadstat', format_name='SAV')
|
|
103
92
|
pandas = get_pandas('SAV')
|
|
104
|
-
path
|
|
93
|
+
ensure_parent_dir(path)
|
|
105
94
|
frame = pandas.DataFrame.from_records(records)
|
|
106
95
|
pyreadstat.write_sav(frame, str(path))
|
|
107
96
|
return len(records)
|