etlplus 0.16.10__py3-none-any.whl → 0.17.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/file/README.md +33 -0
- etlplus/file/_imports.py +35 -20
- etlplus/file/_io.py +138 -15
- etlplus/file/_r.py +48 -0
- etlplus/file/_sql.py +224 -0
- etlplus/file/accdb.py +7 -6
- etlplus/file/arrow.py +29 -10
- etlplus/file/avro.py +13 -10
- etlplus/file/bson.py +94 -10
- etlplus/file/cbor.py +29 -17
- etlplus/file/cfg.py +7 -6
- etlplus/file/conf.py +7 -6
- etlplus/file/core.py +1 -1
- etlplus/file/csv.py +8 -7
- etlplus/file/dat.py +52 -11
- etlplus/file/dta.py +36 -16
- etlplus/file/duckdb.py +72 -11
- etlplus/file/enums.py +29 -0
- etlplus/file/feather.py +15 -30
- etlplus/file/fwf.py +44 -10
- etlplus/file/gz.py +12 -7
- etlplus/file/hbs.py +7 -6
- etlplus/file/hdf5.py +71 -8
- etlplus/file/ini.py +60 -17
- etlplus/file/ion.py +7 -6
- etlplus/file/jinja2.py +7 -6
- etlplus/file/json.py +10 -11
- etlplus/file/log.py +7 -6
- etlplus/file/mat.py +7 -6
- etlplus/file/mdb.py +7 -6
- etlplus/file/msgpack.py +27 -15
- etlplus/file/mustache.py +7 -6
- etlplus/file/nc.py +69 -11
- etlplus/file/ndjson.py +10 -6
- etlplus/file/numbers.py +7 -6
- etlplus/file/ods.py +48 -11
- etlplus/file/orc.py +15 -30
- etlplus/file/parquet.py +10 -6
- etlplus/file/pb.py +36 -24
- etlplus/file/pbf.py +7 -6
- etlplus/file/properties.py +44 -18
- etlplus/file/proto.py +24 -18
- etlplus/file/psv.py +12 -11
- etlplus/file/rda.py +57 -15
- etlplus/file/rds.py +50 -14
- etlplus/file/sas7bdat.py +26 -16
- etlplus/file/sav.py +34 -16
- etlplus/file/sqlite.py +70 -10
- etlplus/file/stub.py +8 -6
- etlplus/file/sylk.py +7 -6
- etlplus/file/tab.py +13 -13
- etlplus/file/toml.py +56 -17
- etlplus/file/tsv.py +8 -7
- etlplus/file/txt.py +10 -7
- etlplus/file/vm.py +7 -6
- etlplus/file/wks.py +7 -6
- etlplus/file/xls.py +8 -5
- etlplus/file/xlsm.py +48 -10
- etlplus/file/xlsx.py +10 -6
- etlplus/file/xml.py +11 -9
- etlplus/file/xpt.py +46 -10
- etlplus/file/yaml.py +10 -11
- etlplus/file/zip.py +10 -5
- etlplus/file/zsav.py +7 -6
- {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/METADATA +44 -26
- {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/RECORD +70 -68
- {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/WHEEL +0 -0
- {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/entry_points.txt +0 -0
- {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/top_level.txt +0 -0
etlplus/file/sas7bdat.py
CHANGED
|
@@ -1,29 +1,31 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.sas7bdat` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
yet).
|
|
4
|
+
Helpers for reading/writing SAS (SAS7BDAT) data files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
9
|
-
- A SAS7BDAT file is a binary file format
|
|
10
|
-
including variables, labels, and data types.
|
|
8
|
+
- A SAS7BDAT file is a proprietary binary file format created by SAS to store
|
|
9
|
+
datasets, including variables, labels, and data types.
|
|
11
10
|
- Common cases:
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
- Custom formats specific to certain applications.
|
|
11
|
+
- Statistical analysis pipelines.
|
|
12
|
+
- Data exchange with SAS tooling.
|
|
15
13
|
- Rule of thumb:
|
|
16
|
-
- If the file
|
|
17
|
-
|
|
14
|
+
- If the file follows the SAS7BDAT specification, use this module for
|
|
15
|
+
reading and writing.
|
|
18
16
|
"""
|
|
19
17
|
|
|
20
18
|
from __future__ import annotations
|
|
21
19
|
|
|
22
|
-
from
|
|
20
|
+
from typing import cast
|
|
23
21
|
|
|
24
22
|
from ..types import JSONData
|
|
25
23
|
from ..types import JSONList
|
|
24
|
+
from ..types import StrPath
|
|
26
25
|
from . import stub
|
|
26
|
+
from ._imports import get_dependency
|
|
27
|
+
from ._imports import get_pandas
|
|
28
|
+
from ._io import coerce_path
|
|
27
29
|
|
|
28
30
|
# SECTION: EXPORTS ========================================================== #
|
|
29
31
|
|
|
@@ -39,14 +41,14 @@ __all__ = [
|
|
|
39
41
|
|
|
40
42
|
|
|
41
43
|
def read(
|
|
42
|
-
path:
|
|
44
|
+
path: StrPath,
|
|
43
45
|
) -> JSONList:
|
|
44
46
|
"""
|
|
45
|
-
Read
|
|
47
|
+
Read SAS7BDAT content from *path*.
|
|
46
48
|
|
|
47
49
|
Parameters
|
|
48
50
|
----------
|
|
49
|
-
path :
|
|
51
|
+
path : StrPath
|
|
50
52
|
Path to the SAS7BDAT file on disk.
|
|
51
53
|
|
|
52
54
|
Returns
|
|
@@ -54,11 +56,18 @@ def read(
|
|
|
54
56
|
JSONList
|
|
55
57
|
The list of dictionaries read from the SAS7BDAT file.
|
|
56
58
|
"""
|
|
57
|
-
|
|
59
|
+
path = coerce_path(path)
|
|
60
|
+
get_dependency('pyreadstat', format_name='SAS7BDAT')
|
|
61
|
+
pandas = get_pandas('SAS7BDAT')
|
|
62
|
+
try:
|
|
63
|
+
frame = pandas.read_sas(path, format='sas7bdat')
|
|
64
|
+
except TypeError:
|
|
65
|
+
frame = pandas.read_sas(path)
|
|
66
|
+
return cast(JSONList, frame.to_dict(orient='records'))
|
|
58
67
|
|
|
59
68
|
|
|
60
69
|
def write(
|
|
61
|
-
path:
|
|
70
|
+
path: StrPath,
|
|
62
71
|
data: JSONData,
|
|
63
72
|
) -> int:
|
|
64
73
|
"""
|
|
@@ -66,7 +75,7 @@ def write(
|
|
|
66
75
|
|
|
67
76
|
Parameters
|
|
68
77
|
----------
|
|
69
|
-
path :
|
|
78
|
+
path : StrPath
|
|
70
79
|
Path to the SAS7BDAT file on disk.
|
|
71
80
|
data : JSONData
|
|
72
81
|
Data to write as SAS7BDAT file. Should be a list of dictionaries or a
|
|
@@ -77,4 +86,5 @@ def write(
|
|
|
77
86
|
int
|
|
78
87
|
The number of rows written to the SAS7BDAT file.
|
|
79
88
|
"""
|
|
89
|
+
path = coerce_path(path)
|
|
80
90
|
return stub.write(path, data, format_name='SAS7BDAT')
|
etlplus/file/sav.py
CHANGED
|
@@ -1,27 +1,32 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.sav` module.
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
Helpers for reading/writing SPSS (SAV) files.
|
|
5
5
|
|
|
6
6
|
Notes
|
|
7
7
|
-----
|
|
8
|
-
- A SAV file is a
|
|
9
|
-
variables, labels, and data types.
|
|
8
|
+
- A SAV file is a dataset created by SPSS.
|
|
10
9
|
- Common cases:
|
|
11
|
-
-
|
|
12
|
-
-
|
|
10
|
+
- Survey and market research datasets.
|
|
11
|
+
- Statistical analysis workflows.
|
|
12
|
+
- Exchange with SPSS and compatible tools.
|
|
13
13
|
- Rule of thumb:
|
|
14
|
-
- If
|
|
14
|
+
- If the file follows the SAV specification, use this module for reading
|
|
15
15
|
and writing.
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
|
-
from
|
|
20
|
+
from typing import cast
|
|
21
21
|
|
|
22
22
|
from ..types import JSONData
|
|
23
23
|
from ..types import JSONList
|
|
24
|
-
from
|
|
24
|
+
from ..types import StrPath
|
|
25
|
+
from ._imports import get_dependency
|
|
26
|
+
from ._imports import get_pandas
|
|
27
|
+
from ._io import coerce_path
|
|
28
|
+
from ._io import ensure_parent_dir
|
|
29
|
+
from ._io import normalize_records
|
|
25
30
|
|
|
26
31
|
# SECTION: EXPORTS ========================================================== #
|
|
27
32
|
|
|
@@ -37,14 +42,14 @@ __all__ = [
|
|
|
37
42
|
|
|
38
43
|
|
|
39
44
|
def read(
|
|
40
|
-
path:
|
|
45
|
+
path: StrPath,
|
|
41
46
|
) -> JSONList:
|
|
42
47
|
"""
|
|
43
48
|
Read SAV content from *path*.
|
|
44
49
|
|
|
45
50
|
Parameters
|
|
46
51
|
----------
|
|
47
|
-
path :
|
|
52
|
+
path : StrPath
|
|
48
53
|
Path to the SAV file on disk.
|
|
49
54
|
|
|
50
55
|
Returns
|
|
@@ -52,22 +57,25 @@ def read(
|
|
|
52
57
|
JSONList
|
|
53
58
|
The list of dictionaries read from the SAV file.
|
|
54
59
|
"""
|
|
55
|
-
|
|
60
|
+
path = coerce_path(path)
|
|
61
|
+
pyreadstat = get_dependency('pyreadstat', format_name='SAV')
|
|
62
|
+
frame, _meta = pyreadstat.read_sav(str(path))
|
|
63
|
+
return cast(JSONList, frame.to_dict(orient='records'))
|
|
56
64
|
|
|
57
65
|
|
|
58
66
|
def write(
|
|
59
|
-
path:
|
|
67
|
+
path: StrPath,
|
|
60
68
|
data: JSONData,
|
|
61
69
|
) -> int:
|
|
62
70
|
"""
|
|
63
|
-
Write *data* to SAV
|
|
71
|
+
Write *data* to SAV at *path* and return record count.
|
|
64
72
|
|
|
65
73
|
Parameters
|
|
66
74
|
----------
|
|
67
|
-
path :
|
|
75
|
+
path : StrPath
|
|
68
76
|
Path to the SAV file on disk.
|
|
69
77
|
data : JSONData
|
|
70
|
-
Data to write as SAV
|
|
78
|
+
Data to write as SAV. Should be a list of dictionaries or a
|
|
71
79
|
single dictionary.
|
|
72
80
|
|
|
73
81
|
Returns
|
|
@@ -75,4 +83,14 @@ def write(
|
|
|
75
83
|
int
|
|
76
84
|
The number of rows written to the SAV file.
|
|
77
85
|
"""
|
|
78
|
-
|
|
86
|
+
path = coerce_path(path)
|
|
87
|
+
records = normalize_records(data, 'SAV')
|
|
88
|
+
if not records:
|
|
89
|
+
return 0
|
|
90
|
+
|
|
91
|
+
pyreadstat = get_dependency('pyreadstat', format_name='SAV')
|
|
92
|
+
pandas = get_pandas('SAV')
|
|
93
|
+
ensure_parent_dir(path)
|
|
94
|
+
frame = pandas.DataFrame.from_records(records)
|
|
95
|
+
pyreadstat.write_sav(frame, str(path))
|
|
96
|
+
return len(records)
|
etlplus/file/sqlite.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.sqlite` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
implemented yet).
|
|
4
|
+
Helpers for reading/writing SQLite database (SQLITE) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -19,11 +18,21 @@ Notes
|
|
|
19
18
|
|
|
20
19
|
from __future__ import annotations
|
|
21
20
|
|
|
22
|
-
|
|
21
|
+
import sqlite3
|
|
23
22
|
|
|
24
23
|
from ..types import JSONData
|
|
25
24
|
from ..types import JSONList
|
|
26
|
-
from
|
|
25
|
+
from ..types import StrPath
|
|
26
|
+
from ._io import coerce_path
|
|
27
|
+
from ._io import ensure_parent_dir
|
|
28
|
+
from ._io import normalize_records
|
|
29
|
+
from ._sql import DEFAULT_TABLE
|
|
30
|
+
from ._sql import SQLITE_DIALECT
|
|
31
|
+
from ._sql import coerce_sql_value
|
|
32
|
+
from ._sql import collect_column_values
|
|
33
|
+
from ._sql import infer_column_type
|
|
34
|
+
from ._sql import quote_identifier
|
|
35
|
+
from ._sql import resolve_table
|
|
27
36
|
|
|
28
37
|
# SECTION: EXPORTS ========================================================== #
|
|
29
38
|
|
|
@@ -39,14 +48,14 @@ __all__ = [
|
|
|
39
48
|
|
|
40
49
|
|
|
41
50
|
def read(
|
|
42
|
-
path:
|
|
51
|
+
path: StrPath,
|
|
43
52
|
) -> JSONList:
|
|
44
53
|
"""
|
|
45
54
|
Read SQLITE content from *path*.
|
|
46
55
|
|
|
47
56
|
Parameters
|
|
48
57
|
----------
|
|
49
|
-
path :
|
|
58
|
+
path : StrPath
|
|
50
59
|
Path to the SQLITE file on disk.
|
|
51
60
|
|
|
52
61
|
Returns
|
|
@@ -54,11 +63,28 @@ def read(
|
|
|
54
63
|
JSONList
|
|
55
64
|
The list of dictionaries read from the SQLITE file.
|
|
56
65
|
"""
|
|
57
|
-
|
|
66
|
+
path = coerce_path(path)
|
|
67
|
+
conn = sqlite3.connect(str(path))
|
|
68
|
+
try:
|
|
69
|
+
conn.row_factory = sqlite3.Row
|
|
70
|
+
cursor = conn.execute(
|
|
71
|
+
'SELECT name FROM sqlite_master '
|
|
72
|
+
"WHERE type='table' AND name NOT LIKE 'sqlite_%' "
|
|
73
|
+
'ORDER BY name',
|
|
74
|
+
)
|
|
75
|
+
tables = [row[0] for row in cursor.fetchall()]
|
|
76
|
+
table = resolve_table(tables, engine_name='SQLite')
|
|
77
|
+
if table is None:
|
|
78
|
+
return []
|
|
79
|
+
query = f'SELECT * FROM {quote_identifier(table)}'
|
|
80
|
+
rows = conn.execute(query).fetchall()
|
|
81
|
+
return [dict(row) for row in rows]
|
|
82
|
+
finally:
|
|
83
|
+
conn.close()
|
|
58
84
|
|
|
59
85
|
|
|
60
86
|
def write(
|
|
61
|
-
path:
|
|
87
|
+
path: StrPath,
|
|
62
88
|
data: JSONData,
|
|
63
89
|
) -> int:
|
|
64
90
|
"""
|
|
@@ -66,7 +92,7 @@ def write(
|
|
|
66
92
|
|
|
67
93
|
Parameters
|
|
68
94
|
----------
|
|
69
|
-
path :
|
|
95
|
+
path : StrPath
|
|
70
96
|
Path to the SQLITE file on disk.
|
|
71
97
|
data : JSONData
|
|
72
98
|
Data to write as SQLITE. Should be a list of dictionaries or a
|
|
@@ -77,4 +103,38 @@ def write(
|
|
|
77
103
|
int
|
|
78
104
|
The number of rows written to the SQLITE file.
|
|
79
105
|
"""
|
|
80
|
-
|
|
106
|
+
path = coerce_path(path)
|
|
107
|
+
records = normalize_records(data, 'SQLITE')
|
|
108
|
+
if not records:
|
|
109
|
+
return 0
|
|
110
|
+
|
|
111
|
+
columns, column_values = collect_column_values(records)
|
|
112
|
+
if not columns:
|
|
113
|
+
return 0
|
|
114
|
+
|
|
115
|
+
column_defs = ', '.join(
|
|
116
|
+
f'{quote_identifier(column)} '
|
|
117
|
+
f'{infer_column_type(values, SQLITE_DIALECT)}'
|
|
118
|
+
for column, values in column_values.items()
|
|
119
|
+
)
|
|
120
|
+
table_ident = quote_identifier(DEFAULT_TABLE)
|
|
121
|
+
insert_columns = ', '.join(quote_identifier(column) for column in columns)
|
|
122
|
+
placeholders = ', '.join('?' for _ in columns)
|
|
123
|
+
insert_sql = (
|
|
124
|
+
f'INSERT INTO {table_ident} ({insert_columns}) VALUES ({placeholders})'
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
ensure_parent_dir(path)
|
|
128
|
+
conn = sqlite3.connect(str(path))
|
|
129
|
+
try:
|
|
130
|
+
conn.execute(f'DROP TABLE IF EXISTS {table_ident}')
|
|
131
|
+
conn.execute(f'CREATE TABLE {table_ident} ({column_defs})')
|
|
132
|
+
rows = [
|
|
133
|
+
tuple(coerce_sql_value(row.get(column)) for column in columns)
|
|
134
|
+
for row in records
|
|
135
|
+
]
|
|
136
|
+
conn.executemany(insert_sql, rows)
|
|
137
|
+
conn.commit()
|
|
138
|
+
finally:
|
|
139
|
+
conn.close()
|
|
140
|
+
return len(records)
|
etlplus/file/stub.py
CHANGED
|
@@ -6,10 +6,10 @@ Helpers for reading/writing stubbed files.
|
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
|
|
11
9
|
from ..types import JSONData
|
|
12
10
|
from ..types import JSONList
|
|
11
|
+
from ..types import StrPath
|
|
12
|
+
from ._io import coerce_path
|
|
13
13
|
|
|
14
14
|
# SECTION: EXPORTS ========================================================== #
|
|
15
15
|
|
|
@@ -25,7 +25,7 @@ __all__ = [
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
def read(
|
|
28
|
-
path:
|
|
28
|
+
path: StrPath,
|
|
29
29
|
format_name: str = 'Stubbed',
|
|
30
30
|
) -> JSONList:
|
|
31
31
|
"""
|
|
@@ -33,7 +33,7 @@ def read(
|
|
|
33
33
|
|
|
34
34
|
Parameters
|
|
35
35
|
----------
|
|
36
|
-
path :
|
|
36
|
+
path : StrPath
|
|
37
37
|
Path to the stubbed file on disk.
|
|
38
38
|
format_name : str
|
|
39
39
|
Human-readable format name.
|
|
@@ -48,12 +48,13 @@ def read(
|
|
|
48
48
|
NotImplementedError
|
|
49
49
|
Always, since this is a stub implementation.
|
|
50
50
|
"""
|
|
51
|
+
path = coerce_path(path)
|
|
51
52
|
_ = path
|
|
52
53
|
raise NotImplementedError(f'{format_name} read is not implemented yet')
|
|
53
54
|
|
|
54
55
|
|
|
55
56
|
def write(
|
|
56
|
-
path:
|
|
57
|
+
path: StrPath,
|
|
57
58
|
data: JSONData,
|
|
58
59
|
format_name: str = 'Stubbed',
|
|
59
60
|
) -> int:
|
|
@@ -62,7 +63,7 @@ def write(
|
|
|
62
63
|
|
|
63
64
|
Parameters
|
|
64
65
|
----------
|
|
65
|
-
path :
|
|
66
|
+
path : StrPath
|
|
66
67
|
Path to the stubbed file on disk.
|
|
67
68
|
data : JSONData
|
|
68
69
|
Data to write as stubbed file. Should be a list of dictionaries or a
|
|
@@ -80,6 +81,7 @@ def write(
|
|
|
80
81
|
NotImplementedError
|
|
81
82
|
Always, since this is a stub implementation.
|
|
82
83
|
"""
|
|
84
|
+
path = coerce_path(path)
|
|
83
85
|
_ = path
|
|
84
86
|
_ = data
|
|
85
87
|
raise NotImplementedError(f'{format_name} write is not implemented yet')
|
etlplus/file/sylk.py
CHANGED
|
@@ -18,11 +18,11 @@ Notes
|
|
|
18
18
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
-
from pathlib import Path
|
|
22
|
-
|
|
23
21
|
from ..types import JSONData
|
|
24
22
|
from ..types import JSONList
|
|
23
|
+
from ..types import StrPath
|
|
25
24
|
from . import stub
|
|
25
|
+
from ._io import coerce_path
|
|
26
26
|
|
|
27
27
|
# SECTION: EXPORTS ========================================================== #
|
|
28
28
|
|
|
@@ -38,14 +38,14 @@ __all__ = [
|
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
def read(
|
|
41
|
-
path:
|
|
41
|
+
path: StrPath,
|
|
42
42
|
) -> JSONList:
|
|
43
43
|
"""
|
|
44
44
|
Read SYLK content from *path*.
|
|
45
45
|
|
|
46
46
|
Parameters
|
|
47
47
|
----------
|
|
48
|
-
path :
|
|
48
|
+
path : StrPath
|
|
49
49
|
Path to the SYLK file on disk.
|
|
50
50
|
|
|
51
51
|
Returns
|
|
@@ -57,7 +57,7 @@ def read(
|
|
|
57
57
|
|
|
58
58
|
|
|
59
59
|
def write(
|
|
60
|
-
path:
|
|
60
|
+
path: StrPath,
|
|
61
61
|
data: JSONData,
|
|
62
62
|
) -> int:
|
|
63
63
|
"""
|
|
@@ -65,7 +65,7 @@ def write(
|
|
|
65
65
|
|
|
66
66
|
Parameters
|
|
67
67
|
----------
|
|
68
|
-
path :
|
|
68
|
+
path : StrPath
|
|
69
69
|
Path to the SYLK file on disk.
|
|
70
70
|
data : JSONData
|
|
71
71
|
Data to write as SYLK file. Should be a list of dictionaries or a
|
|
@@ -76,4 +76,5 @@ def write(
|
|
|
76
76
|
int
|
|
77
77
|
The number of rows written to the SYLK file.
|
|
78
78
|
"""
|
|
79
|
+
path = coerce_path(path)
|
|
79
80
|
return stub.write(path, data, format_name='SYLK')
|
etlplus/file/tab.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.tab` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
yet).
|
|
4
|
+
Helpers for reading/writing "tab"-formatted (TAB) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -15,18 +14,18 @@ Notes
|
|
|
15
14
|
- Header/metadata lines or multi-line records that break TSV assumptions.
|
|
16
15
|
- Not actually tab-delimited despite the name.
|
|
17
16
|
- Rule of thumb:
|
|
18
|
-
-
|
|
17
|
+
- This implementation treats TAB as tab-delimited text.
|
|
19
18
|
- If the file has fixed-width fields, use :mod:`etlplus.file.fwf`.
|
|
20
|
-
- Otherwise, use :mod:`etlplus.file.tab` (i.e., this module).
|
|
21
19
|
"""
|
|
22
20
|
|
|
23
21
|
from __future__ import annotations
|
|
24
22
|
|
|
25
|
-
from pathlib import Path
|
|
26
|
-
|
|
27
23
|
from ..types import JSONData
|
|
28
24
|
from ..types import JSONList
|
|
29
|
-
from
|
|
25
|
+
from ..types import StrPath
|
|
26
|
+
from ._io import coerce_path
|
|
27
|
+
from ._io import read_delimited
|
|
28
|
+
from ._io import write_delimited
|
|
30
29
|
|
|
31
30
|
# SECTION: EXPORTS ========================================================== #
|
|
32
31
|
|
|
@@ -42,14 +41,14 @@ __all__ = [
|
|
|
42
41
|
|
|
43
42
|
|
|
44
43
|
def read(
|
|
45
|
-
path:
|
|
44
|
+
path: StrPath,
|
|
46
45
|
) -> JSONList:
|
|
47
46
|
"""
|
|
48
47
|
Read TAB content from *path*.
|
|
49
48
|
|
|
50
49
|
Parameters
|
|
51
50
|
----------
|
|
52
|
-
path :
|
|
51
|
+
path : StrPath
|
|
53
52
|
Path to the TAB file on disk.
|
|
54
53
|
|
|
55
54
|
Returns
|
|
@@ -57,11 +56,11 @@ def read(
|
|
|
57
56
|
JSONList
|
|
58
57
|
The list of dictionaries read from the TAB file.
|
|
59
58
|
"""
|
|
60
|
-
return
|
|
59
|
+
return read_delimited(path, delimiter='\t')
|
|
61
60
|
|
|
62
61
|
|
|
63
62
|
def write(
|
|
64
|
-
path:
|
|
63
|
+
path: StrPath,
|
|
65
64
|
data: JSONData,
|
|
66
65
|
) -> int:
|
|
67
66
|
"""
|
|
@@ -69,7 +68,7 @@ def write(
|
|
|
69
68
|
|
|
70
69
|
Parameters
|
|
71
70
|
----------
|
|
72
|
-
path :
|
|
71
|
+
path : StrPath
|
|
73
72
|
Path to the TAB file on disk.
|
|
74
73
|
data : JSONData
|
|
75
74
|
Data to write as TAB file. Should be a list of dictionaries or a
|
|
@@ -80,4 +79,5 @@ def write(
|
|
|
80
79
|
int
|
|
81
80
|
The number of rows written to the TAB file.
|
|
82
81
|
"""
|
|
83
|
-
|
|
82
|
+
path = coerce_path(path)
|
|
83
|
+
return write_delimited(path, data, delimiter='\t', format_name='TAB')
|
etlplus/file/toml.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.toml` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
(not implemented yet).
|
|
4
|
+
Helpers for reading/writing Tom's Obvious Minimal Language (TOML) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -19,11 +18,17 @@ Notes
|
|
|
19
18
|
|
|
20
19
|
from __future__ import annotations
|
|
21
20
|
|
|
22
|
-
|
|
21
|
+
import tomllib
|
|
22
|
+
from typing import Any
|
|
23
|
+
from typing import cast
|
|
23
24
|
|
|
24
25
|
from ..types import JSONData
|
|
25
|
-
from ..types import
|
|
26
|
-
from
|
|
26
|
+
from ..types import JSONDict
|
|
27
|
+
from ..types import StrPath
|
|
28
|
+
from ._imports import get_optional_module
|
|
29
|
+
from ._io import coerce_path
|
|
30
|
+
from ._io import ensure_parent_dir
|
|
31
|
+
from ._io import require_dict_payload
|
|
27
32
|
|
|
28
33
|
# SECTION: EXPORTS ========================================================== #
|
|
29
34
|
|
|
@@ -39,26 +44,35 @@ __all__ = [
|
|
|
39
44
|
|
|
40
45
|
|
|
41
46
|
def read(
|
|
42
|
-
path:
|
|
43
|
-
) ->
|
|
47
|
+
path: StrPath,
|
|
48
|
+
) -> JSONData:
|
|
44
49
|
"""
|
|
45
50
|
Read TOML content from *path*.
|
|
46
51
|
|
|
47
52
|
Parameters
|
|
48
53
|
----------
|
|
49
|
-
path :
|
|
54
|
+
path : StrPath
|
|
50
55
|
Path to the TOML file on disk.
|
|
51
56
|
|
|
52
57
|
Returns
|
|
53
58
|
-------
|
|
54
|
-
|
|
55
|
-
The
|
|
59
|
+
JSONData
|
|
60
|
+
The structured data read from the TOML file.
|
|
61
|
+
|
|
62
|
+
Raises
|
|
63
|
+
------
|
|
64
|
+
TypeError
|
|
65
|
+
If the TOML root is not a table (dictionary).
|
|
56
66
|
"""
|
|
57
|
-
|
|
67
|
+
path = coerce_path(path)
|
|
68
|
+
payload = tomllib.loads(path.read_text(encoding='utf-8'))
|
|
69
|
+
if isinstance(payload, dict):
|
|
70
|
+
return payload
|
|
71
|
+
raise TypeError('TOML root must be a table (dict)')
|
|
58
72
|
|
|
59
73
|
|
|
60
74
|
def write(
|
|
61
|
-
path:
|
|
75
|
+
path: StrPath,
|
|
62
76
|
data: JSONData,
|
|
63
77
|
) -> int:
|
|
64
78
|
"""
|
|
@@ -66,15 +80,40 @@ def write(
|
|
|
66
80
|
|
|
67
81
|
Parameters
|
|
68
82
|
----------
|
|
69
|
-
path :
|
|
83
|
+
path : StrPath
|
|
70
84
|
Path to the TOML file on disk.
|
|
71
85
|
data : JSONData
|
|
72
|
-
Data to write as TOML. Should be a
|
|
73
|
-
single dictionary.
|
|
86
|
+
Data to write as TOML. Should be a dictionary.
|
|
74
87
|
|
|
75
88
|
Returns
|
|
76
89
|
-------
|
|
77
90
|
int
|
|
78
|
-
The number of
|
|
91
|
+
The number of records written to the TOML file.
|
|
79
92
|
"""
|
|
80
|
-
|
|
93
|
+
path = coerce_path(path)
|
|
94
|
+
payload = require_dict_payload(data, format_name='TOML')
|
|
95
|
+
|
|
96
|
+
toml_writer: Any
|
|
97
|
+
try:
|
|
98
|
+
toml_writer = get_optional_module(
|
|
99
|
+
'tomli_w',
|
|
100
|
+
error_message=(
|
|
101
|
+
'TOML write support requires optional dependency "tomli_w".\n'
|
|
102
|
+
'Install with: pip install tomli-w'
|
|
103
|
+
),
|
|
104
|
+
)
|
|
105
|
+
content = toml_writer.dumps(cast(JSONDict, payload))
|
|
106
|
+
except ImportError:
|
|
107
|
+
toml = get_optional_module(
|
|
108
|
+
'toml',
|
|
109
|
+
error_message=(
|
|
110
|
+
'TOML write support requires optional dependency "tomli_w" '
|
|
111
|
+
'or "toml".\n'
|
|
112
|
+
'Install with: pip install tomli-w'
|
|
113
|
+
),
|
|
114
|
+
)
|
|
115
|
+
content = toml.dumps(cast(JSONDict, payload))
|
|
116
|
+
|
|
117
|
+
ensure_parent_dir(path)
|
|
118
|
+
path.write_text(content, encoding='utf-8')
|
|
119
|
+
return 1
|
etlplus/file/tsv.py
CHANGED
|
@@ -19,10 +19,10 @@ Notes
|
|
|
19
19
|
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
|
-
from pathlib import Path
|
|
23
|
-
|
|
24
22
|
from ..types import JSONData
|
|
25
23
|
from ..types import JSONList
|
|
24
|
+
from ..types import StrPath
|
|
25
|
+
from ._io import coerce_path
|
|
26
26
|
from ._io import read_delimited
|
|
27
27
|
from ._io import write_delimited
|
|
28
28
|
|
|
@@ -40,14 +40,14 @@ __all__ = [
|
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
def read(
|
|
43
|
-
path:
|
|
43
|
+
path: StrPath,
|
|
44
44
|
) -> JSONList:
|
|
45
45
|
"""
|
|
46
46
|
Read TSV content from *path*.
|
|
47
47
|
|
|
48
48
|
Parameters
|
|
49
49
|
----------
|
|
50
|
-
path :
|
|
50
|
+
path : StrPath
|
|
51
51
|
Path to the TSV file on disk.
|
|
52
52
|
|
|
53
53
|
Returns
|
|
@@ -59,7 +59,7 @@ def read(
|
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
def write(
|
|
62
|
-
path:
|
|
62
|
+
path: StrPath,
|
|
63
63
|
data: JSONData,
|
|
64
64
|
) -> int:
|
|
65
65
|
"""
|
|
@@ -67,7 +67,7 @@ def write(
|
|
|
67
67
|
|
|
68
68
|
Parameters
|
|
69
69
|
----------
|
|
70
|
-
path :
|
|
70
|
+
path : StrPath
|
|
71
71
|
Path to the TSV file on disk.
|
|
72
72
|
data : JSONData
|
|
73
73
|
Data to write as TSV. Should be a list of dictionaries or a
|
|
@@ -78,4 +78,5 @@ def write(
|
|
|
78
78
|
int
|
|
79
79
|
The number of rows written to the TSV file.
|
|
80
80
|
"""
|
|
81
|
-
|
|
81
|
+
path = coerce_path(path)
|
|
82
|
+
return write_delimited(path, data, delimiter='\t', format_name='TSV')
|