etlplus 0.17.2__py3-none-any.whl → 0.17.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/file/_imports.py +35 -20
- etlplus/file/_io.py +138 -15
- etlplus/file/_r.py +48 -0
- etlplus/file/_sql.py +224 -0
- etlplus/file/accdb.py +7 -6
- etlplus/file/arrow.py +13 -24
- etlplus/file/avro.py +13 -10
- etlplus/file/bson.py +61 -22
- etlplus/file/cbor.py +13 -25
- etlplus/file/cfg.py +7 -6
- etlplus/file/conf.py +7 -6
- etlplus/file/core.py +1 -1
- etlplus/file/csv.py +8 -7
- etlplus/file/dat.py +9 -6
- etlplus/file/dta.py +15 -30
- etlplus/file/duckdb.py +29 -122
- etlplus/file/feather.py +15 -30
- etlplus/file/fwf.py +16 -14
- etlplus/file/gz.py +12 -7
- etlplus/file/hbs.py +7 -6
- etlplus/file/hdf5.py +31 -6
- etlplus/file/ini.py +17 -24
- etlplus/file/ion.py +7 -6
- etlplus/file/jinja2.py +7 -6
- etlplus/file/json.py +10 -11
- etlplus/file/log.py +7 -6
- etlplus/file/mat.py +7 -6
- etlplus/file/mdb.py +7 -6
- etlplus/file/msgpack.py +13 -25
- etlplus/file/mustache.py +7 -6
- etlplus/file/nc.py +30 -21
- etlplus/file/ndjson.py +10 -6
- etlplus/file/numbers.py +7 -6
- etlplus/file/ods.py +10 -6
- etlplus/file/orc.py +15 -30
- etlplus/file/parquet.py +10 -6
- etlplus/file/pb.py +22 -23
- etlplus/file/pbf.py +7 -6
- etlplus/file/properties.py +15 -29
- etlplus/file/proto.py +14 -20
- etlplus/file/psv.py +8 -7
- etlplus/file/rda.py +19 -51
- etlplus/file/rds.py +19 -51
- etlplus/file/sas7bdat.py +10 -30
- etlplus/file/sav.py +13 -24
- etlplus/file/sqlite.py +25 -83
- etlplus/file/stub.py +8 -6
- etlplus/file/sylk.py +7 -6
- etlplus/file/tab.py +8 -7
- etlplus/file/toml.py +14 -17
- etlplus/file/tsv.py +8 -7
- etlplus/file/txt.py +10 -7
- etlplus/file/vm.py +7 -6
- etlplus/file/wks.py +7 -6
- etlplus/file/xls.py +8 -5
- etlplus/file/xlsm.py +10 -6
- etlplus/file/xlsx.py +10 -6
- etlplus/file/xml.py +11 -9
- etlplus/file/xpt.py +13 -33
- etlplus/file/yaml.py +10 -11
- etlplus/file/zip.py +10 -5
- etlplus/file/zsav.py +7 -6
- {etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/METADATA +1 -1
- {etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/RECORD +68 -66
- {etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/WHEEL +0 -0
- {etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/entry_points.txt +0 -0
- {etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/top_level.txt +0 -0
etlplus/file/dta.py
CHANGED
|
@@ -18,12 +18,15 @@ Notes
|
|
|
18
18
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
-
from pathlib import Path
|
|
22
21
|
from typing import cast
|
|
23
22
|
|
|
24
23
|
from ..types import JSONData
|
|
25
24
|
from ..types import JSONList
|
|
25
|
+
from ..types import StrPath
|
|
26
|
+
from ._imports import get_dependency
|
|
26
27
|
from ._imports import get_pandas
|
|
28
|
+
from ._io import coerce_path
|
|
29
|
+
from ._io import ensure_parent_dir
|
|
27
30
|
from ._io import normalize_records
|
|
28
31
|
|
|
29
32
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -40,39 +43,30 @@ __all__ = [
|
|
|
40
43
|
|
|
41
44
|
|
|
42
45
|
def read(
|
|
43
|
-
path:
|
|
46
|
+
path: StrPath,
|
|
44
47
|
) -> JSONList:
|
|
45
48
|
"""
|
|
46
49
|
Read DTA content from *path*.
|
|
47
50
|
|
|
48
51
|
Parameters
|
|
49
52
|
----------
|
|
50
|
-
path :
|
|
53
|
+
path : StrPath
|
|
51
54
|
Path to the DTA file on disk.
|
|
52
55
|
|
|
53
56
|
Returns
|
|
54
57
|
-------
|
|
55
58
|
JSONList
|
|
56
59
|
The list of dictionaries read from the DTA file.
|
|
57
|
-
|
|
58
|
-
Raises
|
|
59
|
-
------
|
|
60
|
-
ImportError
|
|
61
|
-
If optional dependencies for DTA support are missing.
|
|
62
60
|
"""
|
|
61
|
+
path = coerce_path(path)
|
|
62
|
+
get_dependency('pyreadstat', format_name='DTA')
|
|
63
63
|
pandas = get_pandas('DTA')
|
|
64
|
-
|
|
65
|
-
frame = pandas.read_stata(path)
|
|
66
|
-
except ImportError as err: # pragma: no cover
|
|
67
|
-
raise ImportError(
|
|
68
|
-
'DTA support may require optional dependency "pyreadstat".\n'
|
|
69
|
-
'Install with: pip install pyreadstat',
|
|
70
|
-
) from err
|
|
64
|
+
frame = pandas.read_stata(path)
|
|
71
65
|
return cast(JSONList, frame.to_dict(orient='records'))
|
|
72
66
|
|
|
73
67
|
|
|
74
68
|
def write(
|
|
75
|
-
path:
|
|
69
|
+
path: StrPath,
|
|
76
70
|
data: JSONData,
|
|
77
71
|
) -> int:
|
|
78
72
|
"""
|
|
@@ -80,7 +74,7 @@ def write(
|
|
|
80
74
|
|
|
81
75
|
Parameters
|
|
82
76
|
----------
|
|
83
|
-
path :
|
|
77
|
+
path : StrPath
|
|
84
78
|
Path to the DTA file on disk.
|
|
85
79
|
data : JSONData
|
|
86
80
|
Data to write as DTA file. Should be a list of dictionaries or a single
|
|
@@ -90,24 +84,15 @@ def write(
|
|
|
90
84
|
-------
|
|
91
85
|
int
|
|
92
86
|
The number of rows written to the DTA file.
|
|
93
|
-
|
|
94
|
-
Raises
|
|
95
|
-
------
|
|
96
|
-
ImportError
|
|
97
|
-
If optional dependencies for DTA support are missing.
|
|
98
87
|
"""
|
|
88
|
+
path = coerce_path(path)
|
|
99
89
|
records = normalize_records(data, 'DTA')
|
|
100
90
|
if not records:
|
|
101
91
|
return 0
|
|
102
92
|
|
|
93
|
+
get_dependency('pyreadstat', format_name='DTA')
|
|
103
94
|
pandas = get_pandas('DTA')
|
|
104
|
-
path
|
|
95
|
+
ensure_parent_dir(path)
|
|
105
96
|
frame = pandas.DataFrame.from_records(records)
|
|
106
|
-
|
|
107
|
-
frame.to_stata(path, write_index=False)
|
|
108
|
-
except ImportError as err: # pragma: no cover
|
|
109
|
-
raise ImportError(
|
|
110
|
-
'DTA support may require optional dependency "pyreadstat".\n'
|
|
111
|
-
'Install with: pip install pyreadstat',
|
|
112
|
-
) from err
|
|
97
|
+
frame.to_stata(path, write_index=False)
|
|
113
98
|
return len(records)
|
etlplus/file/duckdb.py
CHANGED
|
@@ -18,14 +18,20 @@ Notes
|
|
|
18
18
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
-
import json
|
|
22
|
-
from pathlib import Path
|
|
23
|
-
from typing import Any
|
|
24
|
-
|
|
25
21
|
from ..types import JSONData
|
|
26
22
|
from ..types import JSONList
|
|
27
|
-
from
|
|
23
|
+
from ..types import StrPath
|
|
24
|
+
from ._imports import get_dependency
|
|
25
|
+
from ._io import coerce_path
|
|
26
|
+
from ._io import ensure_parent_dir
|
|
28
27
|
from ._io import normalize_records
|
|
28
|
+
from ._sql import DEFAULT_TABLE
|
|
29
|
+
from ._sql import DUCKDB_DIALECT
|
|
30
|
+
from ._sql import coerce_sql_value
|
|
31
|
+
from ._sql import collect_column_values
|
|
32
|
+
from ._sql import infer_column_type
|
|
33
|
+
from ._sql import quote_identifier
|
|
34
|
+
from ._sql import resolve_table
|
|
29
35
|
|
|
30
36
|
# SECTION: EXPORTS ========================================================== #
|
|
31
37
|
|
|
@@ -37,115 +43,18 @@ __all__ = [
|
|
|
37
43
|
]
|
|
38
44
|
|
|
39
45
|
|
|
40
|
-
# SECTION: INTERNAL CONSTANTS ============================================== #
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
DEFAULT_TABLE = 'data'
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def _coerce_sql_value(
|
|
50
|
-
value: Any,
|
|
51
|
-
) -> Any:
|
|
52
|
-
"""
|
|
53
|
-
Normalize values into DuckDB-compatible types.
|
|
54
|
-
|
|
55
|
-
Parameters
|
|
56
|
-
----------
|
|
57
|
-
value : Any
|
|
58
|
-
The value to normalize.
|
|
59
|
-
|
|
60
|
-
Returns
|
|
61
|
-
-------
|
|
62
|
-
Any
|
|
63
|
-
The normalized value.
|
|
64
|
-
"""
|
|
65
|
-
if value is None or isinstance(value, (str, int, float, bool)):
|
|
66
|
-
return value
|
|
67
|
-
return json.dumps(value, ensure_ascii=True)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def _get_duckdb() -> Any:
|
|
71
|
-
"""
|
|
72
|
-
Return the duckdb module, importing it on first use.
|
|
73
|
-
|
|
74
|
-
Returns
|
|
75
|
-
-------
|
|
76
|
-
Any
|
|
77
|
-
The duckdb module.
|
|
78
|
-
"""
|
|
79
|
-
return get_optional_module(
|
|
80
|
-
'duckdb',
|
|
81
|
-
error_message=(
|
|
82
|
-
'DUCKDB support requires optional dependency "duckdb".\n'
|
|
83
|
-
'Install with: pip install duckdb'
|
|
84
|
-
),
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
def _infer_column_type(values: list[Any]) -> str:
|
|
89
|
-
"""Infer a basic DuckDB column type from sample values."""
|
|
90
|
-
seen_bool = False
|
|
91
|
-
seen_int = False
|
|
92
|
-
seen_float = False
|
|
93
|
-
seen_other = False
|
|
94
|
-
for value in values:
|
|
95
|
-
if value is None:
|
|
96
|
-
continue
|
|
97
|
-
if isinstance(value, bool):
|
|
98
|
-
seen_bool = True
|
|
99
|
-
elif isinstance(value, int):
|
|
100
|
-
seen_int = True
|
|
101
|
-
elif isinstance(value, float):
|
|
102
|
-
seen_float = True
|
|
103
|
-
else:
|
|
104
|
-
seen_other = True
|
|
105
|
-
break
|
|
106
|
-
if seen_other:
|
|
107
|
-
return 'VARCHAR'
|
|
108
|
-
if seen_float:
|
|
109
|
-
return 'DOUBLE'
|
|
110
|
-
if seen_int:
|
|
111
|
-
return 'BIGINT'
|
|
112
|
-
if seen_bool:
|
|
113
|
-
return 'BOOLEAN'
|
|
114
|
-
return 'VARCHAR'
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def _quote_identifier(value: str) -> str:
|
|
118
|
-
"""Return a safely quoted SQL identifier."""
|
|
119
|
-
escaped = value.replace('"', '""')
|
|
120
|
-
return f'"{escaped}"'
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def _resolve_table(tables: list[str]) -> str | None:
|
|
124
|
-
"""Pick a table name for read operations."""
|
|
125
|
-
if not tables:
|
|
126
|
-
return None
|
|
127
|
-
if DEFAULT_TABLE in tables:
|
|
128
|
-
return DEFAULT_TABLE
|
|
129
|
-
if len(tables) == 1:
|
|
130
|
-
return tables[0]
|
|
131
|
-
raise ValueError(
|
|
132
|
-
'Multiple tables found in DuckDB file; expected "data" or a '
|
|
133
|
-
'single table',
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
|
|
137
46
|
# SECTION: FUNCTIONS ======================================================== #
|
|
138
47
|
|
|
139
48
|
|
|
140
49
|
def read(
|
|
141
|
-
path:
|
|
50
|
+
path: StrPath,
|
|
142
51
|
) -> JSONList:
|
|
143
52
|
"""
|
|
144
53
|
Read DUCKDB content from *path*.
|
|
145
54
|
|
|
146
55
|
Parameters
|
|
147
56
|
----------
|
|
148
|
-
path :
|
|
57
|
+
path : StrPath
|
|
149
58
|
Path to the DUCKDB file on disk.
|
|
150
59
|
|
|
151
60
|
Returns
|
|
@@ -153,20 +62,21 @@ def read(
|
|
|
153
62
|
JSONList
|
|
154
63
|
The list of dictionaries read from the DUCKDB file.
|
|
155
64
|
"""
|
|
156
|
-
|
|
65
|
+
path = coerce_path(path)
|
|
66
|
+
duckdb = get_dependency('duckdb', format_name='DUCKDB')
|
|
157
67
|
conn = duckdb.connect(str(path))
|
|
158
68
|
try:
|
|
159
69
|
tables = [row[0] for row in conn.execute('SHOW TABLES').fetchall()]
|
|
160
|
-
table =
|
|
70
|
+
table = resolve_table(tables, engine_name='DuckDB')
|
|
161
71
|
if table is None:
|
|
162
72
|
return []
|
|
163
|
-
query = f'SELECT * FROM {
|
|
73
|
+
query = f'SELECT * FROM {quote_identifier(table)}'
|
|
164
74
|
cursor = conn.execute(query)
|
|
165
75
|
rows = cursor.fetchall()
|
|
166
76
|
columns = [desc[0] for desc in cursor.description or []]
|
|
167
77
|
if not columns:
|
|
168
78
|
info = conn.execute(
|
|
169
|
-
f'PRAGMA table_info({
|
|
79
|
+
f'PRAGMA table_info({quote_identifier(table)})',
|
|
170
80
|
).fetchall()
|
|
171
81
|
columns = [row[1] for row in info]
|
|
172
82
|
return [dict(zip(columns, row, strict=True)) for row in rows]
|
|
@@ -175,7 +85,7 @@ def read(
|
|
|
175
85
|
|
|
176
86
|
|
|
177
87
|
def write(
|
|
178
|
-
path:
|
|
88
|
+
path: StrPath,
|
|
179
89
|
data: JSONData,
|
|
180
90
|
) -> int:
|
|
181
91
|
"""
|
|
@@ -183,7 +93,7 @@ def write(
|
|
|
183
93
|
|
|
184
94
|
Parameters
|
|
185
95
|
----------
|
|
186
|
-
path :
|
|
96
|
+
path : StrPath
|
|
187
97
|
Path to the DUCKDB file on disk.
|
|
188
98
|
data : JSONData
|
|
189
99
|
Data to write as DUCKDB. Should be a list of dictionaries or a
|
|
@@ -194,38 +104,35 @@ def write(
|
|
|
194
104
|
int
|
|
195
105
|
The number of rows written to the DUCKDB file.
|
|
196
106
|
"""
|
|
107
|
+
path = coerce_path(path)
|
|
197
108
|
records = normalize_records(data, 'DUCKDB')
|
|
198
109
|
if not records:
|
|
199
110
|
return 0
|
|
200
111
|
|
|
201
|
-
columns =
|
|
112
|
+
columns, column_values = collect_column_values(records)
|
|
202
113
|
if not columns:
|
|
203
114
|
return 0
|
|
204
115
|
|
|
205
|
-
column_values: dict[str, list[Any]] = {col: [] for col in columns}
|
|
206
|
-
for row in records:
|
|
207
|
-
for column in columns:
|
|
208
|
-
column_values[column].append(row.get(column))
|
|
209
|
-
|
|
210
116
|
column_defs = ', '.join(
|
|
211
|
-
f'{
|
|
117
|
+
f'{quote_identifier(column)} '
|
|
118
|
+
f'{infer_column_type(values, DUCKDB_DIALECT)}'
|
|
212
119
|
for column, values in column_values.items()
|
|
213
120
|
)
|
|
214
|
-
table_ident =
|
|
215
|
-
insert_columns = ', '.join(
|
|
121
|
+
table_ident = quote_identifier(DEFAULT_TABLE)
|
|
122
|
+
insert_columns = ', '.join(quote_identifier(column) for column in columns)
|
|
216
123
|
placeholders = ', '.join('?' for _ in columns)
|
|
217
124
|
insert_sql = (
|
|
218
125
|
f'INSERT INTO {table_ident} ({insert_columns}) VALUES ({placeholders})'
|
|
219
126
|
)
|
|
220
127
|
|
|
221
|
-
duckdb =
|
|
222
|
-
path
|
|
128
|
+
duckdb = get_dependency('duckdb', format_name='DUCKDB')
|
|
129
|
+
ensure_parent_dir(path)
|
|
223
130
|
conn = duckdb.connect(str(path))
|
|
224
131
|
try:
|
|
225
132
|
conn.execute(f'DROP TABLE IF EXISTS {table_ident}')
|
|
226
133
|
conn.execute(f'CREATE TABLE {table_ident} ({column_defs})')
|
|
227
134
|
rows = [
|
|
228
|
-
tuple(
|
|
135
|
+
tuple(coerce_sql_value(row.get(column)) for column in columns)
|
|
229
136
|
for row in records
|
|
230
137
|
]
|
|
231
138
|
conn.executemany(insert_sql, rows)
|
etlplus/file/feather.py
CHANGED
|
@@ -18,12 +18,15 @@ Notes
|
|
|
18
18
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
-
from pathlib import Path
|
|
22
21
|
from typing import cast
|
|
23
22
|
|
|
24
23
|
from ..types import JSONData
|
|
25
24
|
from ..types import JSONList
|
|
25
|
+
from ..types import StrPath
|
|
26
|
+
from ._imports import get_dependency
|
|
26
27
|
from ._imports import get_pandas
|
|
28
|
+
from ._io import coerce_path
|
|
29
|
+
from ._io import ensure_parent_dir
|
|
27
30
|
from ._io import normalize_records
|
|
28
31
|
|
|
29
32
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -40,39 +43,30 @@ __all__ = [
|
|
|
40
43
|
|
|
41
44
|
|
|
42
45
|
def read(
|
|
43
|
-
path:
|
|
46
|
+
path: StrPath,
|
|
44
47
|
) -> JSONList:
|
|
45
48
|
"""
|
|
46
49
|
Read Feather content from *path*.
|
|
47
50
|
|
|
48
51
|
Parameters
|
|
49
52
|
----------
|
|
50
|
-
path :
|
|
53
|
+
path : StrPath
|
|
51
54
|
Path to the Feather file on disk.
|
|
52
55
|
|
|
53
56
|
Returns
|
|
54
57
|
-------
|
|
55
58
|
JSONList
|
|
56
59
|
The list of dictionaries read from the Feather file.
|
|
57
|
-
|
|
58
|
-
Raises
|
|
59
|
-
------
|
|
60
|
-
ImportError
|
|
61
|
-
When optional dependency "pyarrow" is missing.
|
|
62
60
|
"""
|
|
61
|
+
path = coerce_path(path)
|
|
62
|
+
get_dependency('pyarrow', format_name='Feather')
|
|
63
63
|
pandas = get_pandas('Feather')
|
|
64
|
-
|
|
65
|
-
frame = pandas.read_feather(path)
|
|
66
|
-
except ImportError as e: # pragma: no cover
|
|
67
|
-
raise ImportError(
|
|
68
|
-
'Feather support requires optional dependency "pyarrow".\n'
|
|
69
|
-
'Install with: pip install pyarrow',
|
|
70
|
-
) from e
|
|
64
|
+
frame = pandas.read_feather(path)
|
|
71
65
|
return cast(JSONList, frame.to_dict(orient='records'))
|
|
72
66
|
|
|
73
67
|
|
|
74
68
|
def write(
|
|
75
|
-
path:
|
|
69
|
+
path: StrPath,
|
|
76
70
|
data: JSONData,
|
|
77
71
|
) -> int:
|
|
78
72
|
"""
|
|
@@ -80,7 +74,7 @@ def write(
|
|
|
80
74
|
|
|
81
75
|
Parameters
|
|
82
76
|
----------
|
|
83
|
-
path :
|
|
77
|
+
path : StrPath
|
|
84
78
|
Path to the Feather file on disk.
|
|
85
79
|
data : JSONData
|
|
86
80
|
Data to write.
|
|
@@ -89,24 +83,15 @@ def write(
|
|
|
89
83
|
-------
|
|
90
84
|
int
|
|
91
85
|
Number of records written.
|
|
92
|
-
|
|
93
|
-
Raises
|
|
94
|
-
------
|
|
95
|
-
ImportError
|
|
96
|
-
When optional dependency "pyarrow" is missing.
|
|
97
86
|
"""
|
|
87
|
+
path = coerce_path(path)
|
|
98
88
|
records = normalize_records(data, 'Feather')
|
|
99
89
|
if not records:
|
|
100
90
|
return 0
|
|
101
91
|
|
|
92
|
+
get_dependency('pyarrow', format_name='Feather')
|
|
102
93
|
pandas = get_pandas('Feather')
|
|
103
|
-
path
|
|
94
|
+
ensure_parent_dir(path)
|
|
104
95
|
frame = pandas.DataFrame.from_records(records)
|
|
105
|
-
|
|
106
|
-
frame.to_feather(path)
|
|
107
|
-
except ImportError as e: # pragma: no cover
|
|
108
|
-
raise ImportError(
|
|
109
|
-
'Feather support requires optional dependency "pyarrow".\n'
|
|
110
|
-
'Install with: pip install pyarrow',
|
|
111
|
-
) from e
|
|
96
|
+
frame.to_feather(path)
|
|
112
97
|
return len(records)
|
etlplus/file/fwf.py
CHANGED
|
@@ -17,14 +17,16 @@ Notes
|
|
|
17
17
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
|
-
from pathlib import Path
|
|
21
|
-
from typing import Any
|
|
22
20
|
from typing import cast
|
|
23
21
|
|
|
24
22
|
from ..types import JSONData
|
|
25
23
|
from ..types import JSONList
|
|
24
|
+
from ..types import StrPath
|
|
26
25
|
from ._imports import get_pandas
|
|
26
|
+
from ._io import coerce_path
|
|
27
|
+
from ._io import ensure_parent_dir
|
|
27
28
|
from ._io import normalize_records
|
|
29
|
+
from ._io import stringify_value
|
|
28
30
|
|
|
29
31
|
# SECTION: EXPORTS ========================================================== #
|
|
30
32
|
|
|
@@ -40,14 +42,14 @@ __all__ = [
|
|
|
40
42
|
|
|
41
43
|
|
|
42
44
|
def read(
|
|
43
|
-
path:
|
|
45
|
+
path: StrPath,
|
|
44
46
|
) -> JSONList:
|
|
45
47
|
"""
|
|
46
48
|
Read FWF content from *path*.
|
|
47
49
|
|
|
48
50
|
Parameters
|
|
49
51
|
----------
|
|
50
|
-
path :
|
|
52
|
+
path : StrPath
|
|
51
53
|
Path to the FWF file on disk.
|
|
52
54
|
|
|
53
55
|
Returns
|
|
@@ -55,13 +57,14 @@ def read(
|
|
|
55
57
|
JSONList
|
|
56
58
|
The list of dictionaries read from the FWF file.
|
|
57
59
|
"""
|
|
60
|
+
path = coerce_path(path)
|
|
58
61
|
pandas = get_pandas('FWF')
|
|
59
62
|
frame = pandas.read_fwf(path)
|
|
60
63
|
return cast(JSONList, frame.to_dict(orient='records'))
|
|
61
64
|
|
|
62
65
|
|
|
63
66
|
def write(
|
|
64
|
-
path:
|
|
67
|
+
path: StrPath,
|
|
65
68
|
data: JSONData,
|
|
66
69
|
) -> int:
|
|
67
70
|
"""
|
|
@@ -69,7 +72,7 @@ def write(
|
|
|
69
72
|
|
|
70
73
|
Parameters
|
|
71
74
|
----------
|
|
72
|
-
path :
|
|
75
|
+
path : StrPath
|
|
73
76
|
Path to the FWF file on disk.
|
|
74
77
|
data : JSONData
|
|
75
78
|
Data to write as FWF file. Should be a list of dictionaries or a
|
|
@@ -80,6 +83,7 @@ def write(
|
|
|
80
83
|
int
|
|
81
84
|
The number of rows written to the FWF file.
|
|
82
85
|
"""
|
|
86
|
+
path = coerce_path(path)
|
|
83
87
|
records = normalize_records(data, 'FWF')
|
|
84
88
|
if not records:
|
|
85
89
|
return 0
|
|
@@ -88,23 +92,21 @@ def write(
|
|
|
88
92
|
if not fieldnames:
|
|
89
93
|
return 0
|
|
90
94
|
|
|
91
|
-
def stringify(value: Any) -> str:
|
|
92
|
-
if value is None:
|
|
93
|
-
return ''
|
|
94
|
-
return str(value)
|
|
95
|
-
|
|
96
95
|
widths: dict[str, int] = {name: len(name) for name in fieldnames}
|
|
97
96
|
for row in records:
|
|
98
97
|
for name in fieldnames:
|
|
99
|
-
widths[name] = max(
|
|
98
|
+
widths[name] = max(
|
|
99
|
+
widths[name],
|
|
100
|
+
len(stringify_value(row.get(name))),
|
|
101
|
+
)
|
|
100
102
|
|
|
101
|
-
path
|
|
103
|
+
ensure_parent_dir(path)
|
|
102
104
|
with path.open('w', encoding='utf-8', newline='') as handle:
|
|
103
105
|
header = ' '.join(name.ljust(widths[name]) for name in fieldnames)
|
|
104
106
|
handle.write(header + '\n')
|
|
105
107
|
for row in records:
|
|
106
108
|
line = ' '.join(
|
|
107
|
-
|
|
109
|
+
stringify_value(row.get(name)).ljust(widths[name])
|
|
108
110
|
for name in fieldnames
|
|
109
111
|
)
|
|
110
112
|
handle.write(line + '\n')
|
etlplus/file/gz.py
CHANGED
|
@@ -11,6 +11,9 @@ import tempfile
|
|
|
11
11
|
from pathlib import Path
|
|
12
12
|
|
|
13
13
|
from ..types import JSONData
|
|
14
|
+
from ..types import StrPath
|
|
15
|
+
from ._io import coerce_path
|
|
16
|
+
from ._io import ensure_parent_dir
|
|
14
17
|
from .enums import CompressionFormat
|
|
15
18
|
from .enums import FileFormat
|
|
16
19
|
from .enums import infer_file_format_and_compression
|
|
@@ -29,14 +32,14 @@ __all__ = [
|
|
|
29
32
|
|
|
30
33
|
|
|
31
34
|
def _resolve_format(
|
|
32
|
-
path:
|
|
35
|
+
path: StrPath,
|
|
33
36
|
) -> FileFormat:
|
|
34
37
|
"""
|
|
35
38
|
Resolve the inner file format from a .gz filename.
|
|
36
39
|
|
|
37
40
|
Parameters
|
|
38
41
|
----------
|
|
39
|
-
path :
|
|
42
|
+
path : StrPath
|
|
40
43
|
Path to the GZ file on disk.
|
|
41
44
|
|
|
42
45
|
Returns
|
|
@@ -63,14 +66,14 @@ def _resolve_format(
|
|
|
63
66
|
|
|
64
67
|
|
|
65
68
|
def read(
|
|
66
|
-
path:
|
|
69
|
+
path: StrPath,
|
|
67
70
|
) -> JSONData:
|
|
68
71
|
"""
|
|
69
72
|
Read GZ content from *path* and parse the inner payload.
|
|
70
73
|
|
|
71
74
|
Parameters
|
|
72
75
|
----------
|
|
73
|
-
path :
|
|
76
|
+
path : StrPath
|
|
74
77
|
Path to the GZ file on disk.
|
|
75
78
|
|
|
76
79
|
Returns
|
|
@@ -78,6 +81,7 @@ def read(
|
|
|
78
81
|
JSONData
|
|
79
82
|
Parsed payload.
|
|
80
83
|
"""
|
|
84
|
+
path = coerce_path(path)
|
|
81
85
|
fmt = _resolve_format(path)
|
|
82
86
|
with gzip.open(path, 'rb') as handle:
|
|
83
87
|
payload = handle.read()
|
|
@@ -91,7 +95,7 @@ def read(
|
|
|
91
95
|
|
|
92
96
|
|
|
93
97
|
def write(
|
|
94
|
-
path:
|
|
98
|
+
path: StrPath,
|
|
95
99
|
data: JSONData,
|
|
96
100
|
) -> int:
|
|
97
101
|
"""
|
|
@@ -99,7 +103,7 @@ def write(
|
|
|
99
103
|
|
|
100
104
|
Parameters
|
|
101
105
|
----------
|
|
102
|
-
path :
|
|
106
|
+
path : StrPath
|
|
103
107
|
Path to the GZ file on disk.
|
|
104
108
|
data : JSONData
|
|
105
109
|
Data to write.
|
|
@@ -109,6 +113,7 @@ def write(
|
|
|
109
113
|
int
|
|
110
114
|
Number of records written.
|
|
111
115
|
"""
|
|
116
|
+
path = coerce_path(path)
|
|
112
117
|
fmt = _resolve_format(path)
|
|
113
118
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
114
119
|
tmp_path = Path(tmpdir) / f'payload.{fmt.value}'
|
|
@@ -117,7 +122,7 @@ def write(
|
|
|
117
122
|
count = File(tmp_path, fmt).write(data)
|
|
118
123
|
payload = tmp_path.read_bytes()
|
|
119
124
|
|
|
120
|
-
path
|
|
125
|
+
ensure_parent_dir(path)
|
|
121
126
|
with gzip.open(path, 'wb') as handle:
|
|
122
127
|
handle.write(payload)
|
|
123
128
|
|
etlplus/file/hbs.py
CHANGED
|
@@ -19,11 +19,11 @@ Notes
|
|
|
19
19
|
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
|
-
from pathlib import Path
|
|
23
|
-
|
|
24
22
|
from ..types import JSONData
|
|
25
23
|
from ..types import JSONList
|
|
24
|
+
from ..types import StrPath
|
|
26
25
|
from . import stub
|
|
26
|
+
from ._io import coerce_path
|
|
27
27
|
|
|
28
28
|
# SECTION: EXPORTS ========================================================== #
|
|
29
29
|
|
|
@@ -39,14 +39,14 @@ __all__ = [
|
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
def read(
|
|
42
|
-
path:
|
|
42
|
+
path: StrPath,
|
|
43
43
|
) -> JSONList:
|
|
44
44
|
"""
|
|
45
45
|
Read ZSAV content from *path*.
|
|
46
46
|
|
|
47
47
|
Parameters
|
|
48
48
|
----------
|
|
49
|
-
path :
|
|
49
|
+
path : StrPath
|
|
50
50
|
Path to the HBS file on disk.
|
|
51
51
|
|
|
52
52
|
Returns
|
|
@@ -58,7 +58,7 @@ def read(
|
|
|
58
58
|
|
|
59
59
|
|
|
60
60
|
def write(
|
|
61
|
-
path:
|
|
61
|
+
path: StrPath,
|
|
62
62
|
data: JSONData,
|
|
63
63
|
) -> int:
|
|
64
64
|
"""
|
|
@@ -66,7 +66,7 @@ def write(
|
|
|
66
66
|
|
|
67
67
|
Parameters
|
|
68
68
|
----------
|
|
69
|
-
path :
|
|
69
|
+
path : StrPath
|
|
70
70
|
Path to the HBS file on disk.
|
|
71
71
|
data : JSONData
|
|
72
72
|
Data to write as HBS file. Should be a list of dictionaries or a
|
|
@@ -77,4 +77,5 @@ def write(
|
|
|
77
77
|
int
|
|
78
78
|
The number of rows written to the HBS file.
|
|
79
79
|
"""
|
|
80
|
+
path = coerce_path(path)
|
|
80
81
|
return stub.write(path, data, format_name='HBS')
|