etlplus 0.17.2__py3-none-any.whl → 0.17.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/file/_imports.py +35 -20
- etlplus/file/_io.py +138 -15
- etlplus/file/_r.py +48 -0
- etlplus/file/_sql.py +224 -0
- etlplus/file/accdb.py +7 -6
- etlplus/file/arrow.py +13 -24
- etlplus/file/avro.py +13 -10
- etlplus/file/bson.py +61 -22
- etlplus/file/cbor.py +13 -25
- etlplus/file/cfg.py +7 -6
- etlplus/file/conf.py +7 -6
- etlplus/file/core.py +1 -1
- etlplus/file/csv.py +8 -7
- etlplus/file/dat.py +9 -6
- etlplus/file/dta.py +15 -30
- etlplus/file/duckdb.py +29 -122
- etlplus/file/feather.py +15 -30
- etlplus/file/fwf.py +16 -14
- etlplus/file/gz.py +12 -7
- etlplus/file/hbs.py +7 -6
- etlplus/file/hdf5.py +31 -6
- etlplus/file/ini.py +17 -24
- etlplus/file/ion.py +7 -6
- etlplus/file/jinja2.py +7 -6
- etlplus/file/json.py +10 -11
- etlplus/file/log.py +7 -6
- etlplus/file/mat.py +7 -6
- etlplus/file/mdb.py +7 -6
- etlplus/file/msgpack.py +13 -25
- etlplus/file/mustache.py +7 -6
- etlplus/file/nc.py +30 -21
- etlplus/file/ndjson.py +10 -6
- etlplus/file/numbers.py +7 -6
- etlplus/file/ods.py +10 -6
- etlplus/file/orc.py +15 -30
- etlplus/file/parquet.py +10 -6
- etlplus/file/pb.py +22 -23
- etlplus/file/pbf.py +7 -6
- etlplus/file/properties.py +15 -29
- etlplus/file/proto.py +14 -20
- etlplus/file/psv.py +8 -7
- etlplus/file/rda.py +19 -51
- etlplus/file/rds.py +19 -51
- etlplus/file/sas7bdat.py +10 -30
- etlplus/file/sav.py +13 -24
- etlplus/file/sqlite.py +25 -83
- etlplus/file/stub.py +8 -6
- etlplus/file/sylk.py +7 -6
- etlplus/file/tab.py +8 -7
- etlplus/file/toml.py +14 -17
- etlplus/file/tsv.py +8 -7
- etlplus/file/txt.py +10 -7
- etlplus/file/vm.py +7 -6
- etlplus/file/wks.py +7 -6
- etlplus/file/xls.py +8 -5
- etlplus/file/xlsm.py +10 -6
- etlplus/file/xlsx.py +10 -6
- etlplus/file/xml.py +11 -9
- etlplus/file/xpt.py +13 -33
- etlplus/file/yaml.py +10 -11
- etlplus/file/zip.py +10 -5
- etlplus/file/zsav.py +7 -6
- {etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/METADATA +1 -1
- {etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/RECORD +68 -66
- {etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/WHEEL +0 -0
- {etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/entry_points.txt +0 -0
- {etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/top_level.txt +0 -0
etlplus/file/nc.py
CHANGED
|
@@ -18,14 +18,15 @@ Notes
|
|
|
18
18
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
-
from pathlib import Path
|
|
22
|
-
from typing import Any
|
|
23
21
|
from typing import cast
|
|
24
22
|
|
|
25
23
|
from ..types import JSONData
|
|
26
24
|
from ..types import JSONList
|
|
27
|
-
from
|
|
25
|
+
from ..types import StrPath
|
|
26
|
+
from ._imports import get_dependency
|
|
28
27
|
from ._imports import get_pandas
|
|
28
|
+
from ._io import coerce_path
|
|
29
|
+
from ._io import ensure_parent_dir
|
|
29
30
|
from ._io import normalize_records
|
|
30
31
|
|
|
31
32
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -41,18 +42,24 @@ __all__ = [
|
|
|
41
42
|
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
42
43
|
|
|
43
44
|
|
|
44
|
-
def
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
'NC support requires optional dependency "xarray".\n'
|
|
50
|
-
'Install with: pip install xarray'
|
|
51
|
-
),
|
|
52
|
-
)
|
|
53
|
-
|
|
45
|
+
def _raise_engine_error(
|
|
46
|
+
err: ImportError,
|
|
47
|
+
) -> None:
|
|
48
|
+
"""
|
|
49
|
+
Raise a consistent ImportError for missing NetCDF engine support.
|
|
54
50
|
|
|
55
|
-
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
err : ImportError
|
|
54
|
+
The original ImportError raised when trying to use NetCDF support
|
|
55
|
+
without the required dependency.
|
|
56
|
+
|
|
57
|
+
Raises
|
|
58
|
+
------
|
|
59
|
+
ImportError
|
|
60
|
+
Consistent ImportError indicating that NetCDF support requires
|
|
61
|
+
optional dependencies.
|
|
62
|
+
"""
|
|
56
63
|
raise ImportError(
|
|
57
64
|
'NC support requires optional dependency "netCDF4" or "h5netcdf".\n'
|
|
58
65
|
'Install with: pip install netCDF4',
|
|
@@ -63,14 +70,14 @@ def _raise_engine_error(err: ImportError) -> None:
|
|
|
63
70
|
|
|
64
71
|
|
|
65
72
|
def read(
|
|
66
|
-
path:
|
|
73
|
+
path: StrPath,
|
|
67
74
|
) -> JSONList:
|
|
68
75
|
"""
|
|
69
76
|
Read NC content from *path*.
|
|
70
77
|
|
|
71
78
|
Parameters
|
|
72
79
|
----------
|
|
73
|
-
path :
|
|
80
|
+
path : StrPath
|
|
74
81
|
Path to the NC file on disk.
|
|
75
82
|
|
|
76
83
|
Returns
|
|
@@ -78,7 +85,8 @@ def read(
|
|
|
78
85
|
JSONList
|
|
79
86
|
The list of dictionaries read from the NC file.
|
|
80
87
|
"""
|
|
81
|
-
|
|
88
|
+
path = coerce_path(path)
|
|
89
|
+
xarray = get_dependency('xarray', format_name='NC')
|
|
82
90
|
try:
|
|
83
91
|
dataset = xarray.open_dataset(path)
|
|
84
92
|
except ImportError as err: # pragma: no cover
|
|
@@ -93,7 +101,7 @@ def read(
|
|
|
93
101
|
|
|
94
102
|
|
|
95
103
|
def write(
|
|
96
|
-
path:
|
|
104
|
+
path: StrPath,
|
|
97
105
|
data: JSONData,
|
|
98
106
|
) -> int:
|
|
99
107
|
"""
|
|
@@ -101,7 +109,7 @@ def write(
|
|
|
101
109
|
|
|
102
110
|
Parameters
|
|
103
111
|
----------
|
|
104
|
-
path :
|
|
112
|
+
path : StrPath
|
|
105
113
|
Path to the NC file on disk.
|
|
106
114
|
data : JSONData
|
|
107
115
|
Data to write as NC file. Should be a list of dictionaries or a
|
|
@@ -112,15 +120,16 @@ def write(
|
|
|
112
120
|
int
|
|
113
121
|
The number of rows written to the NC file.
|
|
114
122
|
"""
|
|
123
|
+
path = coerce_path(path)
|
|
115
124
|
records = normalize_records(data, 'NC')
|
|
116
125
|
if not records:
|
|
117
126
|
return 0
|
|
118
127
|
|
|
119
|
-
xarray =
|
|
128
|
+
xarray = get_dependency('xarray', format_name='NC')
|
|
120
129
|
pandas = get_pandas('NC')
|
|
121
130
|
frame = pandas.DataFrame.from_records(records)
|
|
122
131
|
dataset = xarray.Dataset.from_dataframe(frame)
|
|
123
|
-
path
|
|
132
|
+
ensure_parent_dir(path)
|
|
124
133
|
try:
|
|
125
134
|
dataset.to_netcdf(path)
|
|
126
135
|
except ImportError as err: # pragma: no cover
|
etlplus/file/ndjson.py
CHANGED
|
@@ -18,13 +18,15 @@ Notes
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
20
|
import json
|
|
21
|
-
from pathlib import Path
|
|
22
21
|
from typing import cast
|
|
23
22
|
|
|
24
23
|
from ..types import JSONData
|
|
25
24
|
from ..types import JSONDict
|
|
26
25
|
from ..types import JSONList
|
|
26
|
+
from ..types import StrPath
|
|
27
27
|
from ..utils import count_records
|
|
28
|
+
from ._io import coerce_path
|
|
29
|
+
from ._io import ensure_parent_dir
|
|
28
30
|
from ._io import normalize_records
|
|
29
31
|
|
|
30
32
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -41,14 +43,14 @@ __all__ = [
|
|
|
41
43
|
|
|
42
44
|
|
|
43
45
|
def read(
|
|
44
|
-
path:
|
|
46
|
+
path: StrPath,
|
|
45
47
|
) -> JSONList:
|
|
46
48
|
"""
|
|
47
49
|
Read NDJSON content from *path*.
|
|
48
50
|
|
|
49
51
|
Parameters
|
|
50
52
|
----------
|
|
51
|
-
path :
|
|
53
|
+
path : StrPath
|
|
52
54
|
Path to the NDJSON file on disk.
|
|
53
55
|
|
|
54
56
|
Returns
|
|
@@ -61,6 +63,7 @@ def read(
|
|
|
61
63
|
TypeError
|
|
62
64
|
If any line in the NDJSON file is not a JSON object (dict).
|
|
63
65
|
"""
|
|
66
|
+
path = coerce_path(path)
|
|
64
67
|
rows: JSONList = []
|
|
65
68
|
with path.open('r', encoding='utf-8') as handle:
|
|
66
69
|
for idx, line in enumerate(handle, start=1):
|
|
@@ -77,7 +80,7 @@ def read(
|
|
|
77
80
|
|
|
78
81
|
|
|
79
82
|
def write(
|
|
80
|
-
path:
|
|
83
|
+
path: StrPath,
|
|
81
84
|
data: JSONData,
|
|
82
85
|
) -> int:
|
|
83
86
|
"""
|
|
@@ -85,7 +88,7 @@ def write(
|
|
|
85
88
|
|
|
86
89
|
Parameters
|
|
87
90
|
----------
|
|
88
|
-
path :
|
|
91
|
+
path : StrPath
|
|
89
92
|
Path to the NDJSON file on disk.
|
|
90
93
|
data : JSONData
|
|
91
94
|
Data to write.
|
|
@@ -95,12 +98,13 @@ def write(
|
|
|
95
98
|
int
|
|
96
99
|
Number of records written.
|
|
97
100
|
"""
|
|
101
|
+
path = coerce_path(path)
|
|
98
102
|
rows = normalize_records(data, 'NDJSON')
|
|
99
103
|
|
|
100
104
|
if not rows:
|
|
101
105
|
return 0
|
|
102
106
|
|
|
103
|
-
path
|
|
107
|
+
ensure_parent_dir(path)
|
|
104
108
|
with path.open('w', encoding='utf-8') as handle:
|
|
105
109
|
for row in rows:
|
|
106
110
|
handle.write(json.dumps(row, ensure_ascii=False))
|
etlplus/file/numbers.py
CHANGED
|
@@ -16,11 +16,11 @@ Notes
|
|
|
16
16
|
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
-
from pathlib import Path
|
|
20
|
-
|
|
21
19
|
from ..types import JSONData
|
|
22
20
|
from ..types import JSONList
|
|
21
|
+
from ..types import StrPath
|
|
23
22
|
from . import stub
|
|
23
|
+
from ._io import coerce_path
|
|
24
24
|
|
|
25
25
|
# SECTION: EXPORTS ========================================================== #
|
|
26
26
|
|
|
@@ -36,14 +36,14 @@ __all__ = [
|
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
def read(
|
|
39
|
-
path:
|
|
39
|
+
path: StrPath,
|
|
40
40
|
) -> JSONList:
|
|
41
41
|
"""
|
|
42
42
|
Read NUMBERS content from *path*.
|
|
43
43
|
|
|
44
44
|
Parameters
|
|
45
45
|
----------
|
|
46
|
-
path :
|
|
46
|
+
path : StrPath
|
|
47
47
|
Path to the NUMBERS file on disk.
|
|
48
48
|
|
|
49
49
|
Returns
|
|
@@ -55,7 +55,7 @@ def read(
|
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
def write(
|
|
58
|
-
path:
|
|
58
|
+
path: StrPath,
|
|
59
59
|
data: JSONData,
|
|
60
60
|
) -> int:
|
|
61
61
|
"""
|
|
@@ -63,7 +63,7 @@ def write(
|
|
|
63
63
|
|
|
64
64
|
Parameters
|
|
65
65
|
----------
|
|
66
|
-
path :
|
|
66
|
+
path : StrPath
|
|
67
67
|
Path to the NUMBERS file on disk.
|
|
68
68
|
data : JSONData
|
|
69
69
|
Data to write as NUMBERS file. Should be a list of dictionaries or a
|
|
@@ -74,4 +74,5 @@ def write(
|
|
|
74
74
|
int
|
|
75
75
|
The number of rows written to the NUMBERS file.
|
|
76
76
|
"""
|
|
77
|
+
path = coerce_path(path)
|
|
77
78
|
return stub.write(path, data, format_name='NUMBERS')
|
etlplus/file/ods.py
CHANGED
|
@@ -19,12 +19,14 @@ Notes
|
|
|
19
19
|
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
|
-
from pathlib import Path
|
|
23
22
|
from typing import cast
|
|
24
23
|
|
|
25
24
|
from ..types import JSONData
|
|
26
25
|
from ..types import JSONList
|
|
26
|
+
from ..types import StrPath
|
|
27
27
|
from ._imports import get_pandas
|
|
28
|
+
from ._io import coerce_path
|
|
29
|
+
from ._io import ensure_parent_dir
|
|
28
30
|
from ._io import normalize_records
|
|
29
31
|
|
|
30
32
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -41,14 +43,14 @@ __all__ = [
|
|
|
41
43
|
|
|
42
44
|
|
|
43
45
|
def read(
|
|
44
|
-
path:
|
|
46
|
+
path: StrPath,
|
|
45
47
|
) -> JSONList:
|
|
46
48
|
"""
|
|
47
49
|
Read ODS content from *path*.
|
|
48
50
|
|
|
49
51
|
Parameters
|
|
50
52
|
----------
|
|
51
|
-
path :
|
|
53
|
+
path : StrPath
|
|
52
54
|
Path to the ODS file on disk.
|
|
53
55
|
|
|
54
56
|
Returns
|
|
@@ -61,6 +63,7 @@ def read(
|
|
|
61
63
|
ImportError
|
|
62
64
|
If optional dependencies for ODS support are missing.
|
|
63
65
|
"""
|
|
66
|
+
path = coerce_path(path)
|
|
64
67
|
pandas = get_pandas('ODS')
|
|
65
68
|
try:
|
|
66
69
|
frame = pandas.read_excel(path, engine='odf')
|
|
@@ -73,7 +76,7 @@ def read(
|
|
|
73
76
|
|
|
74
77
|
|
|
75
78
|
def write(
|
|
76
|
-
path:
|
|
79
|
+
path: StrPath,
|
|
77
80
|
data: JSONData,
|
|
78
81
|
) -> int:
|
|
79
82
|
"""
|
|
@@ -81,7 +84,7 @@ def write(
|
|
|
81
84
|
|
|
82
85
|
Parameters
|
|
83
86
|
----------
|
|
84
|
-
path :
|
|
87
|
+
path : StrPath
|
|
85
88
|
Path to the ODS file on disk.
|
|
86
89
|
data : JSONData
|
|
87
90
|
Data to write as ODS. Should be a list of dictionaries or a
|
|
@@ -97,12 +100,13 @@ def write(
|
|
|
97
100
|
ImportError
|
|
98
101
|
If optional dependencies for ODS support are missing.
|
|
99
102
|
"""
|
|
103
|
+
path = coerce_path(path)
|
|
100
104
|
records = normalize_records(data, 'ODS')
|
|
101
105
|
if not records:
|
|
102
106
|
return 0
|
|
103
107
|
|
|
104
108
|
pandas = get_pandas('ODS')
|
|
105
|
-
path
|
|
109
|
+
ensure_parent_dir(path)
|
|
106
110
|
frame = pandas.DataFrame.from_records(records)
|
|
107
111
|
try:
|
|
108
112
|
frame.to_excel(path, index=False, engine='odf')
|
etlplus/file/orc.py
CHANGED
|
@@ -18,12 +18,15 @@ Notes
|
|
|
18
18
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
-
from pathlib import Path
|
|
22
21
|
from typing import cast
|
|
23
22
|
|
|
24
23
|
from ..types import JSONData
|
|
25
24
|
from ..types import JSONList
|
|
25
|
+
from ..types import StrPath
|
|
26
|
+
from ._imports import get_dependency
|
|
26
27
|
from ._imports import get_pandas
|
|
28
|
+
from ._io import coerce_path
|
|
29
|
+
from ._io import ensure_parent_dir
|
|
27
30
|
from ._io import normalize_records
|
|
28
31
|
|
|
29
32
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -40,39 +43,30 @@ __all__ = [
|
|
|
40
43
|
|
|
41
44
|
|
|
42
45
|
def read(
|
|
43
|
-
path:
|
|
46
|
+
path: StrPath,
|
|
44
47
|
) -> JSONList:
|
|
45
48
|
"""
|
|
46
49
|
Read ORC content from *path*.
|
|
47
50
|
|
|
48
51
|
Parameters
|
|
49
52
|
----------
|
|
50
|
-
path :
|
|
53
|
+
path : StrPath
|
|
51
54
|
Path to the ORC file on disk.
|
|
52
55
|
|
|
53
56
|
Returns
|
|
54
57
|
-------
|
|
55
58
|
JSONList
|
|
56
59
|
The list of dictionaries read from the ORC file.
|
|
57
|
-
|
|
58
|
-
Raises
|
|
59
|
-
------
|
|
60
|
-
ImportError
|
|
61
|
-
When optional dependency "pyarrow" is missing.
|
|
62
60
|
"""
|
|
61
|
+
path = coerce_path(path)
|
|
62
|
+
get_dependency('pyarrow', format_name='ORC')
|
|
63
63
|
pandas = get_pandas('ORC')
|
|
64
|
-
|
|
65
|
-
frame = pandas.read_orc(path)
|
|
66
|
-
except ImportError as e: # pragma: no cover
|
|
67
|
-
raise ImportError(
|
|
68
|
-
'ORC support requires optional dependency "pyarrow".\n'
|
|
69
|
-
'Install with: pip install pyarrow',
|
|
70
|
-
) from e
|
|
64
|
+
frame = pandas.read_orc(path)
|
|
71
65
|
return cast(JSONList, frame.to_dict(orient='records'))
|
|
72
66
|
|
|
73
67
|
|
|
74
68
|
def write(
|
|
75
|
-
path:
|
|
69
|
+
path: StrPath,
|
|
76
70
|
data: JSONData,
|
|
77
71
|
) -> int:
|
|
78
72
|
"""
|
|
@@ -80,7 +74,7 @@ def write(
|
|
|
80
74
|
|
|
81
75
|
Parameters
|
|
82
76
|
----------
|
|
83
|
-
path :
|
|
77
|
+
path : StrPath
|
|
84
78
|
Path to the ORC file on disk.
|
|
85
79
|
data : JSONData
|
|
86
80
|
Data to write.
|
|
@@ -89,24 +83,15 @@ def write(
|
|
|
89
83
|
-------
|
|
90
84
|
int
|
|
91
85
|
Number of records written.
|
|
92
|
-
|
|
93
|
-
Raises
|
|
94
|
-
------
|
|
95
|
-
ImportError
|
|
96
|
-
When optional dependency "pyarrow" is missing.
|
|
97
86
|
"""
|
|
87
|
+
path = coerce_path(path)
|
|
98
88
|
records = normalize_records(data, 'ORC')
|
|
99
89
|
if not records:
|
|
100
90
|
return 0
|
|
101
91
|
|
|
92
|
+
get_dependency('pyarrow', format_name='ORC')
|
|
102
93
|
pandas = get_pandas('ORC')
|
|
103
|
-
path
|
|
94
|
+
ensure_parent_dir(path)
|
|
104
95
|
frame = pandas.DataFrame.from_records(records)
|
|
105
|
-
|
|
106
|
-
frame.to_orc(path, index=False)
|
|
107
|
-
except ImportError as e: # pragma: no cover
|
|
108
|
-
raise ImportError(
|
|
109
|
-
'ORC support requires optional dependency "pyarrow".\n'
|
|
110
|
-
'Install with: pip install pyarrow',
|
|
111
|
-
) from e
|
|
96
|
+
frame.to_orc(path, index=False)
|
|
112
97
|
return len(records)
|
etlplus/file/parquet.py
CHANGED
|
@@ -18,12 +18,14 @@ Notes
|
|
|
18
18
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
-
from pathlib import Path
|
|
22
21
|
from typing import cast
|
|
23
22
|
|
|
24
23
|
from ..types import JSONData
|
|
25
24
|
from ..types import JSONList
|
|
25
|
+
from ..types import StrPath
|
|
26
26
|
from ._imports import get_pandas
|
|
27
|
+
from ._io import coerce_path
|
|
28
|
+
from ._io import ensure_parent_dir
|
|
27
29
|
from ._io import normalize_records
|
|
28
30
|
|
|
29
31
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -40,14 +42,14 @@ __all__ = [
|
|
|
40
42
|
|
|
41
43
|
|
|
42
44
|
def read(
|
|
43
|
-
path:
|
|
45
|
+
path: StrPath,
|
|
44
46
|
) -> JSONList:
|
|
45
47
|
"""
|
|
46
48
|
Read Parquet content from *path*.
|
|
47
49
|
|
|
48
50
|
Parameters
|
|
49
51
|
----------
|
|
50
|
-
path :
|
|
52
|
+
path : StrPath
|
|
51
53
|
Path to the PARQUET file on disk.
|
|
52
54
|
|
|
53
55
|
Returns
|
|
@@ -60,6 +62,7 @@ def read(
|
|
|
60
62
|
ImportError
|
|
61
63
|
If optional dependencies for Parquet support are missing.
|
|
62
64
|
"""
|
|
65
|
+
path = coerce_path(path)
|
|
63
66
|
pandas = get_pandas('Parquet')
|
|
64
67
|
try:
|
|
65
68
|
frame = pandas.read_parquet(path)
|
|
@@ -73,7 +76,7 @@ def read(
|
|
|
73
76
|
|
|
74
77
|
|
|
75
78
|
def write(
|
|
76
|
-
path:
|
|
79
|
+
path: StrPath,
|
|
77
80
|
data: JSONData,
|
|
78
81
|
) -> int:
|
|
79
82
|
"""
|
|
@@ -81,7 +84,7 @@ def write(
|
|
|
81
84
|
|
|
82
85
|
Parameters
|
|
83
86
|
----------
|
|
84
|
-
path :
|
|
87
|
+
path : StrPath
|
|
85
88
|
Path to the PARQUET file on disk.
|
|
86
89
|
data : JSONData
|
|
87
90
|
Data to write.
|
|
@@ -96,12 +99,13 @@ def write(
|
|
|
96
99
|
ImportError
|
|
97
100
|
If optional dependencies for Parquet support are missing.
|
|
98
101
|
"""
|
|
102
|
+
path = coerce_path(path)
|
|
99
103
|
records = normalize_records(data, 'Parquet')
|
|
100
104
|
if not records:
|
|
101
105
|
return 0
|
|
102
106
|
|
|
103
107
|
pandas = get_pandas('Parquet')
|
|
104
|
-
path
|
|
108
|
+
ensure_parent_dir(path)
|
|
105
109
|
frame = pandas.DataFrame.from_records(records)
|
|
106
110
|
try:
|
|
107
111
|
frame.to_parquet(path, index=False)
|
etlplus/file/pb.py
CHANGED
|
@@ -5,7 +5,7 @@ Helpers for reading/writing Protocol Buffers binary (PB) files.
|
|
|
5
5
|
|
|
6
6
|
Notes
|
|
7
7
|
-----
|
|
8
|
-
- A PB file contains Protocol Buffers (
|
|
8
|
+
- A PB file contains Protocol Buffers (Protobuf) binary-encoded messages.
|
|
9
9
|
- Common cases:
|
|
10
10
|
- Serialized payloads emitted by services or SDKs.
|
|
11
11
|
- Binary payload dumps for debugging or transport.
|
|
@@ -16,9 +16,13 @@ Notes
|
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
18
|
import base64
|
|
19
|
-
from pathlib import Path
|
|
20
19
|
|
|
21
20
|
from ..types import JSONData
|
|
21
|
+
from ..types import StrPath
|
|
22
|
+
from ._io import coerce_path
|
|
23
|
+
from ._io import ensure_parent_dir
|
|
24
|
+
from ._io import require_dict_payload
|
|
25
|
+
from ._io import require_str_key
|
|
22
26
|
|
|
23
27
|
# SECTION: EXPORTS ========================================================== #
|
|
24
28
|
|
|
@@ -34,14 +38,14 @@ __all__ = [
|
|
|
34
38
|
|
|
35
39
|
|
|
36
40
|
def read(
|
|
37
|
-
path:
|
|
41
|
+
path: StrPath,
|
|
38
42
|
) -> JSONData:
|
|
39
43
|
"""
|
|
40
44
|
Read PB content from *path*.
|
|
41
45
|
|
|
42
46
|
Parameters
|
|
43
47
|
----------
|
|
44
|
-
path :
|
|
48
|
+
path : StrPath
|
|
45
49
|
Path to the PB file on disk.
|
|
46
50
|
|
|
47
51
|
Returns
|
|
@@ -49,13 +53,14 @@ def read(
|
|
|
49
53
|
JSONData
|
|
50
54
|
The structured data read from the PB file.
|
|
51
55
|
"""
|
|
56
|
+
path = coerce_path(path)
|
|
52
57
|
payload = path.read_bytes()
|
|
53
58
|
encoded = base64.b64encode(payload).decode('ascii')
|
|
54
59
|
return {'payload_base64': encoded}
|
|
55
60
|
|
|
56
61
|
|
|
57
62
|
def write(
|
|
58
|
-
path:
|
|
63
|
+
path: StrPath,
|
|
59
64
|
data: JSONData,
|
|
60
65
|
) -> int:
|
|
61
66
|
"""
|
|
@@ -63,7 +68,7 @@ def write(
|
|
|
63
68
|
|
|
64
69
|
Parameters
|
|
65
70
|
----------
|
|
66
|
-
path :
|
|
71
|
+
path : StrPath
|
|
67
72
|
Path to the PB file on disk.
|
|
68
73
|
data : JSONData
|
|
69
74
|
Data to write as PB. Should be a dictionary with ``payload_base64``.
|
|
@@ -72,22 +77,16 @@ def write(
|
|
|
72
77
|
-------
|
|
73
78
|
int
|
|
74
79
|
The number of records written to the PB file.
|
|
75
|
-
|
|
76
|
-
Raises
|
|
77
|
-
------
|
|
78
|
-
TypeError
|
|
79
|
-
If *data* is not a dictionary or missing ``payload_base64``.
|
|
80
80
|
"""
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
path.
|
|
92
|
-
path.write_bytes(payload)
|
|
81
|
+
path = coerce_path(path)
|
|
82
|
+
payload = require_dict_payload(data, format_name='PB')
|
|
83
|
+
payload_base64 = require_str_key(
|
|
84
|
+
payload,
|
|
85
|
+
format_name='PB',
|
|
86
|
+
key='payload_base64',
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
decoded = base64.b64decode(payload_base64.encode('ascii'))
|
|
90
|
+
ensure_parent_dir(path)
|
|
91
|
+
path.write_bytes(decoded)
|
|
93
92
|
return 1
|
etlplus/file/pbf.py
CHANGED
|
@@ -18,11 +18,11 @@ Notes
|
|
|
18
18
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
-
from pathlib import Path
|
|
22
|
-
|
|
23
21
|
from ..types import JSONData
|
|
24
22
|
from ..types import JSONList
|
|
23
|
+
from ..types import StrPath
|
|
25
24
|
from . import stub
|
|
25
|
+
from ._io import coerce_path
|
|
26
26
|
|
|
27
27
|
# SECTION: EXPORTS ========================================================== #
|
|
28
28
|
|
|
@@ -38,14 +38,14 @@ __all__ = [
|
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
def read(
|
|
41
|
-
path:
|
|
41
|
+
path: StrPath,
|
|
42
42
|
) -> JSONList:
|
|
43
43
|
"""
|
|
44
44
|
Read PBF content from *path*.
|
|
45
45
|
|
|
46
46
|
Parameters
|
|
47
47
|
----------
|
|
48
|
-
path :
|
|
48
|
+
path : StrPath
|
|
49
49
|
Path to the PBF file on disk.
|
|
50
50
|
|
|
51
51
|
Returns
|
|
@@ -57,7 +57,7 @@ def read(
|
|
|
57
57
|
|
|
58
58
|
|
|
59
59
|
def write(
|
|
60
|
-
path:
|
|
60
|
+
path: StrPath,
|
|
61
61
|
data: JSONData,
|
|
62
62
|
) -> int:
|
|
63
63
|
"""
|
|
@@ -65,7 +65,7 @@ def write(
|
|
|
65
65
|
|
|
66
66
|
Parameters
|
|
67
67
|
----------
|
|
68
|
-
path :
|
|
68
|
+
path : StrPath
|
|
69
69
|
Path to the PBF file on disk.
|
|
70
70
|
data : JSONData
|
|
71
71
|
Data to write as PBF. Should be a list of dictionaries or a
|
|
@@ -76,4 +76,5 @@ def write(
|
|
|
76
76
|
int
|
|
77
77
|
The number of rows written to the PBF file.
|
|
78
78
|
"""
|
|
79
|
+
path = coerce_path(path)
|
|
79
80
|
return stub.write(path, data, format_name='PBF')
|