etlplus 0.16.10__py3-none-any.whl → 0.17.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/file/README.md +33 -0
- etlplus/file/arrow.py +35 -5
- etlplus/file/bson.py +50 -5
- etlplus/file/cbor.py +35 -11
- etlplus/file/dat.py +44 -6
- etlplus/file/dta.py +46 -11
- etlplus/file/duckdb.py +159 -5
- etlplus/file/enums.py +29 -0
- etlplus/file/fwf.py +37 -5
- etlplus/file/hdf5.py +41 -3
- etlplus/file/ini.py +62 -12
- etlplus/file/msgpack.py +33 -9
- etlplus/file/nc.py +55 -6
- etlplus/file/ods.py +39 -6
- etlplus/file/pb.py +32 -19
- etlplus/file/properties.py +52 -12
- etlplus/file/proto.py +24 -12
- etlplus/file/psv.py +5 -5
- etlplus/file/rda.py +83 -9
- etlplus/file/rds.py +76 -8
- etlplus/file/sas7bdat.py +41 -11
- etlplus/file/sav.py +40 -11
- etlplus/file/sqlite.py +123 -5
- etlplus/file/tab.py +6 -7
- etlplus/file/toml.py +54 -12
- etlplus/file/xlsm.py +39 -5
- etlplus/file/xpt.py +61 -5
- {etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/METADATA +44 -26
- {etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/RECORD +33 -33
- {etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/WHEEL +0 -0
- {etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/entry_points.txt +0 -0
- {etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/top_level.txt +0 -0
etlplus/file/sqlite.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.sqlite` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
implemented yet).
|
|
4
|
+
Helpers for reading/writing SQLite database (SQLITE) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -19,11 +18,14 @@ Notes
|
|
|
19
18
|
|
|
20
19
|
from __future__ import annotations
|
|
21
20
|
|
|
21
|
+
import json
|
|
22
|
+
import sqlite3
|
|
22
23
|
from pathlib import Path
|
|
24
|
+
from typing import Any
|
|
23
25
|
|
|
24
26
|
from ..types import JSONData
|
|
25
27
|
from ..types import JSONList
|
|
26
|
-
from . import
|
|
28
|
+
from ._io import normalize_records
|
|
27
29
|
|
|
28
30
|
# SECTION: EXPORTS ========================================================== #
|
|
29
31
|
|
|
@@ -35,6 +37,69 @@ __all__ = [
|
|
|
35
37
|
]
|
|
36
38
|
|
|
37
39
|
|
|
40
|
+
# SECTION: INTERNAL CONSTANTS ============================================== #
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
DEFAULT_TABLE = 'data'
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _quote_identifier(value: str) -> str:
|
|
50
|
+
"""Return a safely quoted SQL identifier."""
|
|
51
|
+
escaped = value.replace('"', '""')
|
|
52
|
+
return f'"{escaped}"'
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _coerce_sql_value(value: Any) -> Any:
|
|
56
|
+
"""Normalize values into SQLite-compatible types."""
|
|
57
|
+
if value is None or isinstance(value, (str, int, float, bool)):
|
|
58
|
+
return value
|
|
59
|
+
return json.dumps(value, ensure_ascii=True)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _infer_column_type(values: list[Any]) -> str:
|
|
63
|
+
"""Infer a basic SQLite column type from sample values."""
|
|
64
|
+
seen_bool = False
|
|
65
|
+
seen_int = False
|
|
66
|
+
seen_float = False
|
|
67
|
+
seen_other = False
|
|
68
|
+
for value in values:
|
|
69
|
+
if value is None:
|
|
70
|
+
continue
|
|
71
|
+
if isinstance(value, bool):
|
|
72
|
+
seen_bool = True
|
|
73
|
+
elif isinstance(value, int):
|
|
74
|
+
seen_int = True
|
|
75
|
+
elif isinstance(value, float):
|
|
76
|
+
seen_float = True
|
|
77
|
+
else:
|
|
78
|
+
seen_other = True
|
|
79
|
+
break
|
|
80
|
+
if seen_other:
|
|
81
|
+
return 'TEXT'
|
|
82
|
+
if seen_float:
|
|
83
|
+
return 'REAL'
|
|
84
|
+
if seen_int or seen_bool:
|
|
85
|
+
return 'INTEGER'
|
|
86
|
+
return 'TEXT'
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _resolve_table(tables: list[str]) -> str | None:
|
|
90
|
+
"""Pick a table name for read operations."""
|
|
91
|
+
if not tables:
|
|
92
|
+
return None
|
|
93
|
+
if DEFAULT_TABLE in tables:
|
|
94
|
+
return DEFAULT_TABLE
|
|
95
|
+
if len(tables) == 1:
|
|
96
|
+
return tables[0]
|
|
97
|
+
raise ValueError(
|
|
98
|
+
'Multiple tables found in SQLite file; expected "data" or a '
|
|
99
|
+
'single table',
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
38
103
|
# SECTION: FUNCTIONS ======================================================== #
|
|
39
104
|
|
|
40
105
|
|
|
@@ -54,7 +119,23 @@ def read(
|
|
|
54
119
|
JSONList
|
|
55
120
|
The list of dictionaries read from the SQLITE file.
|
|
56
121
|
"""
|
|
57
|
-
|
|
122
|
+
conn = sqlite3.connect(str(path))
|
|
123
|
+
try:
|
|
124
|
+
conn.row_factory = sqlite3.Row
|
|
125
|
+
cursor = conn.execute(
|
|
126
|
+
'SELECT name FROM sqlite_master '
|
|
127
|
+
"WHERE type='table' AND name NOT LIKE 'sqlite_%' "
|
|
128
|
+
'ORDER BY name',
|
|
129
|
+
)
|
|
130
|
+
tables = [row[0] for row in cursor.fetchall()]
|
|
131
|
+
table = _resolve_table(tables)
|
|
132
|
+
if table is None:
|
|
133
|
+
return []
|
|
134
|
+
query = f'SELECT * FROM {_quote_identifier(table)}'
|
|
135
|
+
rows = conn.execute(query).fetchall()
|
|
136
|
+
return [dict(row) for row in rows]
|
|
137
|
+
finally:
|
|
138
|
+
conn.close()
|
|
58
139
|
|
|
59
140
|
|
|
60
141
|
def write(
|
|
@@ -77,4 +158,41 @@ def write(
|
|
|
77
158
|
int
|
|
78
159
|
The number of rows written to the SQLITE file.
|
|
79
160
|
"""
|
|
80
|
-
|
|
161
|
+
records = normalize_records(data, 'SQLITE')
|
|
162
|
+
if not records:
|
|
163
|
+
return 0
|
|
164
|
+
|
|
165
|
+
columns = sorted({key for row in records for key in row})
|
|
166
|
+
if not columns:
|
|
167
|
+
return 0
|
|
168
|
+
|
|
169
|
+
column_values: dict[str, list[Any]] = {col: [] for col in columns}
|
|
170
|
+
for row in records:
|
|
171
|
+
for column in columns:
|
|
172
|
+
column_values[column].append(row.get(column))
|
|
173
|
+
|
|
174
|
+
column_defs = ', '.join(
|
|
175
|
+
f'{_quote_identifier(column)} {_infer_column_type(values)}'
|
|
176
|
+
for column, values in column_values.items()
|
|
177
|
+
)
|
|
178
|
+
table_ident = _quote_identifier(DEFAULT_TABLE)
|
|
179
|
+
insert_columns = ', '.join(_quote_identifier(column) for column in columns)
|
|
180
|
+
placeholders = ', '.join('?' for _ in columns)
|
|
181
|
+
insert_sql = (
|
|
182
|
+
f'INSERT INTO {table_ident} ({insert_columns}) VALUES ({placeholders})'
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
186
|
+
conn = sqlite3.connect(str(path))
|
|
187
|
+
try:
|
|
188
|
+
conn.execute(f'DROP TABLE IF EXISTS {table_ident}')
|
|
189
|
+
conn.execute(f'CREATE TABLE {table_ident} ({column_defs})')
|
|
190
|
+
rows = [
|
|
191
|
+
tuple(_coerce_sql_value(row.get(column)) for column in columns)
|
|
192
|
+
for row in records
|
|
193
|
+
]
|
|
194
|
+
conn.executemany(insert_sql, rows)
|
|
195
|
+
conn.commit()
|
|
196
|
+
finally:
|
|
197
|
+
conn.close()
|
|
198
|
+
return len(records)
|
etlplus/file/tab.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.tab` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
yet).
|
|
4
|
+
Helpers for reading/writing "tab"-formatted (TAB) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -15,9 +14,8 @@ Notes
|
|
|
15
14
|
- Header/metadata lines or multi-line records that break TSV assumptions.
|
|
16
15
|
- Not actually tab-delimited despite the name.
|
|
17
16
|
- Rule of thumb:
|
|
18
|
-
-
|
|
17
|
+
- This implementation treats TAB as tab-delimited text.
|
|
19
18
|
- If the file has fixed-width fields, use :mod:`etlplus.file.fwf`.
|
|
20
|
-
- Otherwise, use :mod:`etlplus.file.tab` (i.e., this module).
|
|
21
19
|
"""
|
|
22
20
|
|
|
23
21
|
from __future__ import annotations
|
|
@@ -26,7 +24,8 @@ from pathlib import Path
|
|
|
26
24
|
|
|
27
25
|
from ..types import JSONData
|
|
28
26
|
from ..types import JSONList
|
|
29
|
-
from . import
|
|
27
|
+
from ._io import read_delimited
|
|
28
|
+
from ._io import write_delimited
|
|
30
29
|
|
|
31
30
|
# SECTION: EXPORTS ========================================================== #
|
|
32
31
|
|
|
@@ -57,7 +56,7 @@ def read(
|
|
|
57
56
|
JSONList
|
|
58
57
|
The list of dictionaries read from the TAB file.
|
|
59
58
|
"""
|
|
60
|
-
return
|
|
59
|
+
return read_delimited(path, delimiter='\t')
|
|
61
60
|
|
|
62
61
|
|
|
63
62
|
def write(
|
|
@@ -80,4 +79,4 @@ def write(
|
|
|
80
79
|
int
|
|
81
80
|
The number of rows written to the TAB file.
|
|
82
81
|
"""
|
|
83
|
-
return
|
|
82
|
+
return write_delimited(path, data, delimiter='\t')
|
etlplus/file/toml.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.toml` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
(not implemented yet).
|
|
4
|
+
Helpers for reading/writing Tom's Obvious Minimal Language (TOML) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -19,11 +18,14 @@ Notes
|
|
|
19
18
|
|
|
20
19
|
from __future__ import annotations
|
|
21
20
|
|
|
21
|
+
import tomllib
|
|
22
22
|
from pathlib import Path
|
|
23
|
+
from typing import Any
|
|
24
|
+
from typing import cast
|
|
23
25
|
|
|
24
26
|
from ..types import JSONData
|
|
25
|
-
from ..types import
|
|
26
|
-
from . import
|
|
27
|
+
from ..types import JSONDict
|
|
28
|
+
from ._imports import get_optional_module
|
|
27
29
|
|
|
28
30
|
# SECTION: EXPORTS ========================================================== #
|
|
29
31
|
|
|
@@ -40,7 +42,7 @@ __all__ = [
|
|
|
40
42
|
|
|
41
43
|
def read(
|
|
42
44
|
path: Path,
|
|
43
|
-
) ->
|
|
45
|
+
) -> JSONData:
|
|
44
46
|
"""
|
|
45
47
|
Read TOML content from *path*.
|
|
46
48
|
|
|
@@ -51,10 +53,18 @@ def read(
|
|
|
51
53
|
|
|
52
54
|
Returns
|
|
53
55
|
-------
|
|
54
|
-
|
|
55
|
-
The
|
|
56
|
+
JSONData
|
|
57
|
+
The structured data read from the TOML file.
|
|
58
|
+
|
|
59
|
+
Raises
|
|
60
|
+
------
|
|
61
|
+
TypeError
|
|
62
|
+
If the TOML root is not a table (dictionary).
|
|
56
63
|
"""
|
|
57
|
-
|
|
64
|
+
payload = tomllib.loads(path.read_text(encoding='utf-8'))
|
|
65
|
+
if isinstance(payload, dict):
|
|
66
|
+
return payload
|
|
67
|
+
raise TypeError('TOML root must be a table (dict)')
|
|
58
68
|
|
|
59
69
|
|
|
60
70
|
def write(
|
|
@@ -69,12 +79,44 @@ def write(
|
|
|
69
79
|
path : Path
|
|
70
80
|
Path to the TOML file on disk.
|
|
71
81
|
data : JSONData
|
|
72
|
-
Data to write as TOML. Should be a
|
|
73
|
-
single dictionary.
|
|
82
|
+
Data to write as TOML. Should be a dictionary.
|
|
74
83
|
|
|
75
84
|
Returns
|
|
76
85
|
-------
|
|
77
86
|
int
|
|
78
|
-
The number of
|
|
87
|
+
The number of records written to the TOML file.
|
|
88
|
+
|
|
89
|
+
Raises
|
|
90
|
+
------
|
|
91
|
+
TypeError
|
|
92
|
+
If *data* is not a dictionary.
|
|
79
93
|
"""
|
|
80
|
-
|
|
94
|
+
if isinstance(data, list):
|
|
95
|
+
raise TypeError('TOML payloads must be a dict')
|
|
96
|
+
if not isinstance(data, dict):
|
|
97
|
+
raise TypeError('TOML payloads must be a dict')
|
|
98
|
+
|
|
99
|
+
toml_writer: Any
|
|
100
|
+
try:
|
|
101
|
+
toml_writer = get_optional_module(
|
|
102
|
+
'tomli_w',
|
|
103
|
+
error_message=(
|
|
104
|
+
'TOML write support requires optional dependency "tomli_w".\n'
|
|
105
|
+
'Install with: pip install tomli-w'
|
|
106
|
+
),
|
|
107
|
+
)
|
|
108
|
+
content = toml_writer.dumps(cast(JSONDict, data))
|
|
109
|
+
except ImportError:
|
|
110
|
+
toml = get_optional_module(
|
|
111
|
+
'toml',
|
|
112
|
+
error_message=(
|
|
113
|
+
'TOML write support requires optional dependency "tomli_w" '
|
|
114
|
+
'or "toml".\n'
|
|
115
|
+
'Install with: pip install tomli-w'
|
|
116
|
+
),
|
|
117
|
+
)
|
|
118
|
+
content = toml.dumps(cast(JSONDict, data))
|
|
119
|
+
|
|
120
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
121
|
+
path.write_text(content, encoding='utf-8')
|
|
122
|
+
return 1
|
etlplus/file/xlsm.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.xlsm` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
spreadsheet files
|
|
4
|
+
Helpers for reading/writing Microsoft Excel Macro-Enabled (XLSM)
|
|
5
|
+
spreadsheet files.
|
|
6
6
|
|
|
7
7
|
Notes
|
|
8
8
|
-----
|
|
@@ -20,10 +20,12 @@ Notes
|
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
22
|
from pathlib import Path
|
|
23
|
+
from typing import cast
|
|
23
24
|
|
|
24
25
|
from ..types import JSONData
|
|
25
26
|
from ..types import JSONList
|
|
26
|
-
from . import
|
|
27
|
+
from ._imports import get_pandas
|
|
28
|
+
from ._io import normalize_records
|
|
27
29
|
|
|
28
30
|
# SECTION: EXPORTS ========================================================== #
|
|
29
31
|
|
|
@@ -53,8 +55,21 @@ def read(
|
|
|
53
55
|
-------
|
|
54
56
|
JSONList
|
|
55
57
|
The list of dictionaries read from the XLSM file.
|
|
58
|
+
|
|
59
|
+
Raises
|
|
60
|
+
------
|
|
61
|
+
ImportError
|
|
62
|
+
If optional dependencies for XLSM support are missing.
|
|
56
63
|
"""
|
|
57
|
-
|
|
64
|
+
pandas = get_pandas('XLSM')
|
|
65
|
+
try:
|
|
66
|
+
frame = pandas.read_excel(path)
|
|
67
|
+
except ImportError as e: # pragma: no cover
|
|
68
|
+
raise ImportError(
|
|
69
|
+
'XLSM support requires optional dependency "openpyxl".\n'
|
|
70
|
+
'Install with: pip install openpyxl',
|
|
71
|
+
) from e
|
|
72
|
+
return cast(JSONList, frame.to_dict(orient='records'))
|
|
58
73
|
|
|
59
74
|
|
|
60
75
|
def write(
|
|
@@ -76,5 +91,24 @@ def write(
|
|
|
76
91
|
-------
|
|
77
92
|
int
|
|
78
93
|
The number of rows written to the XLSM file.
|
|
94
|
+
|
|
95
|
+
Raises
|
|
96
|
+
------
|
|
97
|
+
ImportError
|
|
98
|
+
If optional dependencies for XLSM support are missing.
|
|
79
99
|
"""
|
|
80
|
-
|
|
100
|
+
records = normalize_records(data, 'XLSM')
|
|
101
|
+
if not records:
|
|
102
|
+
return 0
|
|
103
|
+
|
|
104
|
+
pandas = get_pandas('XLSM')
|
|
105
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
106
|
+
frame = pandas.DataFrame.from_records(records)
|
|
107
|
+
try:
|
|
108
|
+
frame.to_excel(path, index=False)
|
|
109
|
+
except ImportError as e: # pragma: no cover
|
|
110
|
+
raise ImportError(
|
|
111
|
+
'XLSM support requires optional dependency "openpyxl".\n'
|
|
112
|
+
'Install with: pip install openpyxl',
|
|
113
|
+
) from e
|
|
114
|
+
return len(records)
|
etlplus/file/xpt.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.xpt` module.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
yet).
|
|
4
|
+
Helpers for reading/writing SAS Transport (XPT) files.
|
|
6
5
|
|
|
7
6
|
Notes
|
|
8
7
|
-----
|
|
@@ -20,10 +19,14 @@ Notes
|
|
|
20
19
|
from __future__ import annotations
|
|
21
20
|
|
|
22
21
|
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
|
+
from typing import cast
|
|
23
24
|
|
|
24
25
|
from ..types import JSONData
|
|
25
26
|
from ..types import JSONList
|
|
26
|
-
from . import
|
|
27
|
+
from ._imports import get_optional_module
|
|
28
|
+
from ._imports import get_pandas
|
|
29
|
+
from ._io import normalize_records
|
|
27
30
|
|
|
28
31
|
# SECTION: EXPORTS ========================================================== #
|
|
29
32
|
|
|
@@ -35,6 +38,27 @@ __all__ = [
|
|
|
35
38
|
]
|
|
36
39
|
|
|
37
40
|
|
|
41
|
+
# SECTION: INTERNAL HELPERS ================================================ #
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _get_pyreadstat() -> Any:
|
|
45
|
+
"""Return the pyreadstat module, importing it on first use."""
|
|
46
|
+
return get_optional_module(
|
|
47
|
+
'pyreadstat',
|
|
48
|
+
error_message=(
|
|
49
|
+
'XPT support requires optional dependency "pyreadstat".\n'
|
|
50
|
+
'Install with: pip install pyreadstat'
|
|
51
|
+
),
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _raise_readstat_error(err: ImportError) -> None:
|
|
56
|
+
raise ImportError(
|
|
57
|
+
'XPT support requires optional dependency "pyreadstat".\n'
|
|
58
|
+
'Install with: pip install pyreadstat',
|
|
59
|
+
) from err
|
|
60
|
+
|
|
61
|
+
|
|
38
62
|
# SECTION: FUNCTIONS ======================================================== #
|
|
39
63
|
|
|
40
64
|
|
|
@@ -54,7 +78,19 @@ def read(
|
|
|
54
78
|
JSONList
|
|
55
79
|
The list of dictionaries read from the XPT file.
|
|
56
80
|
"""
|
|
57
|
-
|
|
81
|
+
pandas = get_pandas('XPT')
|
|
82
|
+
pyreadstat = _get_pyreadstat()
|
|
83
|
+
reader = getattr(pyreadstat, 'read_xport', None)
|
|
84
|
+
if reader is not None:
|
|
85
|
+
frame, _meta = reader(str(path))
|
|
86
|
+
return cast(JSONList, frame.to_dict(orient='records'))
|
|
87
|
+
try:
|
|
88
|
+
frame = pandas.read_sas(path, format='xport')
|
|
89
|
+
except TypeError:
|
|
90
|
+
frame = pandas.read_sas(path)
|
|
91
|
+
except ImportError as err: # pragma: no cover
|
|
92
|
+
_raise_readstat_error(err)
|
|
93
|
+
return cast(JSONList, frame.to_dict(orient='records'))
|
|
58
94
|
|
|
59
95
|
|
|
60
96
|
def write(
|
|
@@ -76,5 +112,25 @@ def write(
|
|
|
76
112
|
-------
|
|
77
113
|
int
|
|
78
114
|
The number of rows written to the XPT file.
|
|
115
|
+
|
|
116
|
+
Raises
|
|
117
|
+
------
|
|
118
|
+
ImportError
|
|
119
|
+
If "pyreadstat" is not installed with write support.
|
|
79
120
|
"""
|
|
80
|
-
|
|
121
|
+
records = normalize_records(data, 'XPT')
|
|
122
|
+
if not records:
|
|
123
|
+
return 0
|
|
124
|
+
|
|
125
|
+
pandas = get_pandas('XPT')
|
|
126
|
+
pyreadstat = _get_pyreadstat()
|
|
127
|
+
writer = getattr(pyreadstat, 'write_xport', None)
|
|
128
|
+
if writer is None:
|
|
129
|
+
raise ImportError(
|
|
130
|
+
'XPT write support requires "pyreadstat" with write_xport().',
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
134
|
+
frame = pandas.DataFrame.from_records(records)
|
|
135
|
+
writer(frame, str(path))
|
|
136
|
+
return len(records)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: etlplus
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.17.2
|
|
4
4
|
Summary: A Swiss Army knife for simple ETL operations
|
|
5
5
|
Home-page: https://github.com/Dagitali/ETLPlus
|
|
6
6
|
Author: ETLPlus Team
|
|
@@ -32,6 +32,7 @@ Requires-Dist: typer>=0.21.0
|
|
|
32
32
|
Requires-Dist: xlrd>=2.0.2
|
|
33
33
|
Requires-Dist: xlwt>=1.3.0
|
|
34
34
|
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: autopep8>=2.3.2; extra == "dev"
|
|
35
36
|
Requires-Dist: black>=25.9.0; extra == "dev"
|
|
36
37
|
Requires-Dist: build>=1.2.2; extra == "dev"
|
|
37
38
|
Requires-Dist: flake8>=7.3.0; extra == "dev"
|
|
@@ -44,6 +45,17 @@ Requires-Dist: ruff>=0.14.4; extra == "dev"
|
|
|
44
45
|
Provides-Extra: docs
|
|
45
46
|
Requires-Dist: sphinx>=4.0.0; extra == "docs"
|
|
46
47
|
Requires-Dist: sphinx-rtd-theme>=1.0.0; extra == "docs"
|
|
48
|
+
Provides-Extra: file
|
|
49
|
+
Requires-Dist: pymongo>=4.9.1; extra == "file"
|
|
50
|
+
Requires-Dist: cbor2>=5.6.4; extra == "file"
|
|
51
|
+
Requires-Dist: duckdb>=1.1.0; extra == "file"
|
|
52
|
+
Requires-Dist: msgpack>=1.0.8; extra == "file"
|
|
53
|
+
Requires-Dist: netCDF4>=1.7.2; extra == "file"
|
|
54
|
+
Requires-Dist: odfpy>=1.4.1; extra == "file"
|
|
55
|
+
Requires-Dist: pyreadr>=0.5.2; extra == "file"
|
|
56
|
+
Requires-Dist: pyreadstat>=1.3.3; extra == "file"
|
|
57
|
+
Requires-Dist: tomli-w>=1.2.0; extra == "file"
|
|
58
|
+
Requires-Dist: xarray>=2024.9.0; extra == "file"
|
|
47
59
|
Dynamic: home-page
|
|
48
60
|
Dynamic: license-file
|
|
49
61
|
Dynamic: requires-python
|
|
@@ -176,6 +188,12 @@ For development:
|
|
|
176
188
|
pip install -e ".[dev]"
|
|
177
189
|
```
|
|
178
190
|
|
|
191
|
+
For full file-format support (optional extras):
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
pip install -e ".[file]"
|
|
195
|
+
```
|
|
196
|
+
|
|
179
197
|
## Quickstart
|
|
180
198
|
|
|
181
199
|
Get up and running in under a minute.
|
|
@@ -240,10 +258,10 @@ Recognized file formats are listed in the tables below. Support for reading to o
|
|
|
240
258
|
| Format | Read | Write | Description |
|
|
241
259
|
| --- | --- | --- | --- |
|
|
242
260
|
| `csv` | Y | Y | Comma-Separated Values |
|
|
243
|
-
| `dat` |
|
|
244
|
-
| `fwf` |
|
|
245
|
-
| `psv` |
|
|
246
|
-
| `tab` |
|
|
261
|
+
| `dat` | Y | Y | Generic data file, often delimited or fixed-width |
|
|
262
|
+
| `fwf` | Y | Y | Fixed-Width Fields |
|
|
263
|
+
| `psv` | Y | Y | Pipe-Separated Values |
|
|
264
|
+
| `tab` | Y | Y | Often synonymous with TSV |
|
|
247
265
|
| `tsv` | Y | Y | Tab-Separated Values |
|
|
248
266
|
| `txt` | Y | Y | Plain text, often delimited or fixed-width |
|
|
249
267
|
|
|
@@ -253,11 +271,11 @@ Recognized file formats are listed in the tables below. Support for reading to o
|
|
|
253
271
|
| --- | --- | --- | --- |
|
|
254
272
|
| `cfg` | N | N | Config-style key-value pairs |
|
|
255
273
|
| `conf` | N | N | Config-style key-value pairs |
|
|
256
|
-
| `ini` |
|
|
274
|
+
| `ini` | Y | Y | Config-style key-value pairs |
|
|
257
275
|
| `json` | Y | Y | JavaScript Object Notation |
|
|
258
276
|
| `ndjson` | Y | Y | Newline-Delimited JSON |
|
|
259
|
-
| `properties` |
|
|
260
|
-
| `toml` |
|
|
277
|
+
| `properties` | Y | Y | Java-style key-value pairs |
|
|
278
|
+
| `toml` | Y | Y | Tom's Obvious Minimal Language |
|
|
261
279
|
| `xml` | Y | Y | Extensible Markup Language |
|
|
262
280
|
| `yaml` | Y | Y | YAML Ain't Markup Language |
|
|
263
281
|
|
|
@@ -265,7 +283,7 @@ Recognized file formats are listed in the tables below. Support for reading to o
|
|
|
265
283
|
|
|
266
284
|
| Format | Read | Write | Description |
|
|
267
285
|
| --- | --- | --- | --- |
|
|
268
|
-
| `arrow` |
|
|
286
|
+
| `arrow` | Y | Y | Apache Arrow IPC |
|
|
269
287
|
| `feather` | Y | Y | Apache Arrow Feather |
|
|
270
288
|
| `orc` | Y | Y | Optimized Row Columnar; common in Hadoop |
|
|
271
289
|
| `parquet` | Y | Y | Apache Parquet; common in Big Data |
|
|
@@ -275,48 +293,48 @@ Recognized file formats are listed in the tables below. Support for reading to o
|
|
|
275
293
|
| Format | Read | Write | Description |
|
|
276
294
|
| --- | --- | --- | --- |
|
|
277
295
|
| `avro` | Y | Y | Apache Avro |
|
|
278
|
-
| `bson` |
|
|
279
|
-
| `cbor` |
|
|
296
|
+
| `bson` | Y | Y | Binary JSON; common with MongoDB exports/dumps |
|
|
297
|
+
| `cbor` | Y | Y | Concise Binary Object Representation |
|
|
280
298
|
| `ion` | N | N | Amazon Ion |
|
|
281
|
-
| `msgpack` |
|
|
282
|
-
| `pb` |
|
|
299
|
+
| `msgpack` | Y | Y | MessagePack |
|
|
300
|
+
| `pb` | Y | Y | Protocol Buffers (Google Protobuf) |
|
|
283
301
|
| `pbf` | N | N | Protocolbuffer Binary Format; often for GIS data |
|
|
284
|
-
| `proto` |
|
|
302
|
+
| `proto` | Y | Y | Protocol Buffers schema; often in .pb / .bin |
|
|
285
303
|
|
|
286
304
|
#### Databases and Embedded Storage
|
|
287
305
|
|
|
288
306
|
| Format | Read | Write | Description |
|
|
289
307
|
| --- | --- | --- | --- |
|
|
290
308
|
| `accdb` | N | N | Microsoft Access (newer format) |
|
|
291
|
-
| `duckdb` |
|
|
309
|
+
| `duckdb` | Y | Y | DuckDB |
|
|
292
310
|
| `mdb` | N | N | Microsoft Access (older format) |
|
|
293
|
-
| `sqlite` |
|
|
311
|
+
| `sqlite` | Y | Y | SQLite |
|
|
294
312
|
|
|
295
313
|
#### Spreadsheets
|
|
296
314
|
|
|
297
315
|
| Format | Read | Write | Description |
|
|
298
316
|
| --- | --- | --- | --- |
|
|
299
317
|
| `numbers` | N | N | Apple Numbers |
|
|
300
|
-
| `ods` |
|
|
318
|
+
| `ods` | Y | Y | OpenDocument |
|
|
301
319
|
| `wks` | N | N | Lotus 1-2-3 |
|
|
302
320
|
| `xls` | Y | N | Microsoft Excel (BIFF; read-only) |
|
|
303
|
-
| `xlsm` |
|
|
321
|
+
| `xlsm` | Y | Y | Microsoft Excel Macro-Enabled (Open XML) |
|
|
304
322
|
| `xlsx` | Y | Y | Microsoft Excel (Open XML) |
|
|
305
323
|
|
|
306
324
|
#### Statistical / Scientific / Numeric Computing
|
|
307
325
|
|
|
308
326
|
| Format | Read | Write | Description |
|
|
309
327
|
| --- | --- | --- | --- |
|
|
310
|
-
| `dta` |
|
|
311
|
-
| `hdf5` |
|
|
328
|
+
| `dta` | Y | Y | Stata |
|
|
329
|
+
| `hdf5` | Y | N | Hierarchical Data Format |
|
|
312
330
|
| `mat` | N | N | MATLAB |
|
|
313
|
-
| `nc` |
|
|
314
|
-
| `rda` |
|
|
315
|
-
| `rds` |
|
|
316
|
-
| `sas7bdat` |
|
|
317
|
-
| `sav` |
|
|
331
|
+
| `nc` | Y | Y | NetCDF |
|
|
332
|
+
| `rda` | Y | Y | RData workspace/object |
|
|
333
|
+
| `rds` | Y | Y | R data |
|
|
334
|
+
| `sas7bdat` | Y | N | SAS data |
|
|
335
|
+
| `sav` | Y | Y | SPSS data |
|
|
318
336
|
| `sylk` | N | N | Symbolic Link |
|
|
319
|
-
| `xpt` |
|
|
337
|
+
| `xpt` | Y | Y | SAS Transport |
|
|
320
338
|
| `zsav` | N | N | Compressed SPSS data |
|
|
321
339
|
|
|
322
340
|
#### Logs and Event Streams
|