etlplus 0.16.10__py3-none-any.whl → 0.17.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. etlplus/file/README.md +33 -0
  2. etlplus/file/_imports.py +35 -20
  3. etlplus/file/_io.py +138 -15
  4. etlplus/file/_r.py +48 -0
  5. etlplus/file/_sql.py +224 -0
  6. etlplus/file/accdb.py +7 -6
  7. etlplus/file/arrow.py +29 -10
  8. etlplus/file/avro.py +13 -10
  9. etlplus/file/bson.py +94 -10
  10. etlplus/file/cbor.py +29 -17
  11. etlplus/file/cfg.py +7 -6
  12. etlplus/file/conf.py +7 -6
  13. etlplus/file/core.py +1 -1
  14. etlplus/file/csv.py +8 -7
  15. etlplus/file/dat.py +52 -11
  16. etlplus/file/dta.py +36 -16
  17. etlplus/file/duckdb.py +72 -11
  18. etlplus/file/enums.py +29 -0
  19. etlplus/file/feather.py +15 -30
  20. etlplus/file/fwf.py +44 -10
  21. etlplus/file/gz.py +12 -7
  22. etlplus/file/hbs.py +7 -6
  23. etlplus/file/hdf5.py +71 -8
  24. etlplus/file/ini.py +60 -17
  25. etlplus/file/ion.py +7 -6
  26. etlplus/file/jinja2.py +7 -6
  27. etlplus/file/json.py +10 -11
  28. etlplus/file/log.py +7 -6
  29. etlplus/file/mat.py +7 -6
  30. etlplus/file/mdb.py +7 -6
  31. etlplus/file/msgpack.py +27 -15
  32. etlplus/file/mustache.py +7 -6
  33. etlplus/file/nc.py +69 -11
  34. etlplus/file/ndjson.py +10 -6
  35. etlplus/file/numbers.py +7 -6
  36. etlplus/file/ods.py +48 -11
  37. etlplus/file/orc.py +15 -30
  38. etlplus/file/parquet.py +10 -6
  39. etlplus/file/pb.py +36 -24
  40. etlplus/file/pbf.py +7 -6
  41. etlplus/file/properties.py +44 -18
  42. etlplus/file/proto.py +24 -18
  43. etlplus/file/psv.py +12 -11
  44. etlplus/file/rda.py +57 -15
  45. etlplus/file/rds.py +50 -14
  46. etlplus/file/sas7bdat.py +26 -16
  47. etlplus/file/sav.py +34 -16
  48. etlplus/file/sqlite.py +70 -10
  49. etlplus/file/stub.py +8 -6
  50. etlplus/file/sylk.py +7 -6
  51. etlplus/file/tab.py +13 -13
  52. etlplus/file/toml.py +56 -17
  53. etlplus/file/tsv.py +8 -7
  54. etlplus/file/txt.py +10 -7
  55. etlplus/file/vm.py +7 -6
  56. etlplus/file/wks.py +7 -6
  57. etlplus/file/xls.py +8 -5
  58. etlplus/file/xlsm.py +48 -10
  59. etlplus/file/xlsx.py +10 -6
  60. etlplus/file/xml.py +11 -9
  61. etlplus/file/xpt.py +46 -10
  62. etlplus/file/yaml.py +10 -11
  63. etlplus/file/zip.py +10 -5
  64. etlplus/file/zsav.py +7 -6
  65. {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/METADATA +44 -26
  66. {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/RECORD +70 -68
  67. {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/WHEEL +0 -0
  68. {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/entry_points.txt +0 -0
  69. {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/licenses/LICENSE +0 -0
  70. {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/top_level.txt +0 -0
etlplus/file/dat.py CHANGED
@@ -1,12 +1,12 @@
1
1
  """
2
2
  :mod:`etlplus.file.dat` module.
3
3
 
4
- Stub helpers for reading/writing data (DAT) files (not implemented yet).
4
+ Helpers for reading/writing data (DAT) files.
5
5
 
6
6
  Notes
7
7
  -----
8
- - A DAT-formatted” file is a generic data file that may use various
9
- delimiters or fixed-width formats.
8
+ - A DAT file is a generic data file that may use various delimiters or fixed-
9
+ width formats.
10
10
  - Common cases:
11
11
  - Delimited text files (e.g., CSV, TSV).
12
12
  - Fixed-width formatted files.
@@ -18,11 +18,15 @@ Notes
18
18
 
19
19
  from __future__ import annotations
20
20
 
21
- from pathlib import Path
21
+ import csv
22
+ from typing import cast
22
23
 
23
24
  from ..types import JSONData
25
+ from ..types import JSONDict
24
26
  from ..types import JSONList
25
- from . import stub
27
+ from ..types import StrPath
28
+ from ._io import coerce_path
29
+ from ._io import write_delimited
26
30
 
27
31
  # SECTION: EXPORTS ========================================================== #
28
32
 
@@ -38,14 +42,14 @@ __all__ = [
38
42
 
39
43
 
40
44
  def read(
41
- path: Path,
45
+ path: StrPath,
42
46
  ) -> JSONList:
43
47
  """
44
48
  Read DAT content from *path*.
45
49
 
46
50
  Parameters
47
51
  ----------
48
- path : Path
52
+ path : StrPath
49
53
  Path to the DAT file on disk.
50
54
 
51
55
  Returns
@@ -53,11 +57,47 @@ def read(
53
57
  JSONList
54
58
  The list of dictionaries read from the DAT file.
55
59
  """
56
- return stub.read(path, format_name='DAT')
60
+ path = coerce_path(path)
61
+ with path.open('r', encoding='utf-8', newline='') as handle:
62
+ sample = handle.read(4096)
63
+ handle.seek(0)
64
+ sniffer = csv.Sniffer()
65
+ dialect: csv.Dialect
66
+ try:
67
+ dialect = cast(
68
+ csv.Dialect,
69
+ sniffer.sniff(sample, delimiters=',\t|;'),
70
+ )
71
+ except csv.Error:
72
+ dialect = cast(csv.Dialect, csv.get_dialect('excel'))
73
+ try:
74
+ has_header = sniffer.has_header(sample)
75
+ except csv.Error:
76
+ has_header = True
77
+
78
+ reader = csv.reader(handle, dialect)
79
+ rows = [row for row in reader if any(field.strip() for field in row)]
80
+ if not rows:
81
+ return []
82
+
83
+ if has_header:
84
+ header = rows[0]
85
+ data_rows = rows[1:]
86
+ else:
87
+ header = [f'col_{i + 1}' for i in range(len(rows[0]))]
88
+ data_rows = rows
89
+
90
+ records: JSONList = []
91
+ for row in data_rows:
92
+ record: JSONDict = {}
93
+ for index, name in enumerate(header):
94
+ record[name] = row[index] if index < len(row) else None
95
+ records.append(record)
96
+ return records
57
97
 
58
98
 
59
99
  def write(
60
- path: Path,
100
+ path: StrPath,
61
101
  data: JSONData,
62
102
  ) -> int:
63
103
  """
@@ -65,7 +105,7 @@ def write(
65
105
 
66
106
  Parameters
67
107
  ----------
68
- path : Path
108
+ path : StrPath
69
109
  Path to the DAT file on disk.
70
110
  data : JSONData
71
111
  Data to write as DAT file. Should be a list of dictionaries or a
@@ -76,4 +116,5 @@ def write(
76
116
  int
77
117
  The number of rows written to the DAT file.
78
118
  """
79
- return stub.write(path, data, format_name='DAT')
119
+ path = coerce_path(path)
120
+ return write_delimited(path, data, delimiter=',', format_name='DAT')
etlplus/file/dta.py CHANGED
@@ -1,27 +1,33 @@
1
1
  """
2
2
  :mod:`etlplus.file.dta` module.
3
3
 
4
- Stub helpers for reading/writing Stata (DTA) data files (not implemented yet).
4
+ Helpers for reading/writing Stata (DTA) files.
5
5
 
6
6
  Notes
7
7
  -----
8
- - Stata DTA files are binary files used by Stata statistical software that
9
- store datasets with variables, labels, and data types.
8
+ - A DTA file is a proprietary binary format created by Stata to store datasets
9
+ with variables, labels, and data types.
10
10
  - Common cases:
11
- - Reading data for analysis in Python.
12
- - Writing processed data back to Stata format.
11
+ - Statistical analysis workflows.
12
+ - Data sharing in research environments.
13
+ - Interchange between Stata and other analytics tools.
13
14
  - Rule of thumb:
14
- - If you need to work with Stata data files, use this module for reading
15
+ - If the file follows the DTA specification, use this module for reading
15
16
  and writing.
16
17
  """
17
18
 
18
19
  from __future__ import annotations
19
20
 
20
- from pathlib import Path
21
+ from typing import cast
21
22
 
22
23
  from ..types import JSONData
23
24
  from ..types import JSONList
24
- from . import stub
25
+ from ..types import StrPath
26
+ from ._imports import get_dependency
27
+ from ._imports import get_pandas
28
+ from ._io import coerce_path
29
+ from ._io import ensure_parent_dir
30
+ from ._io import normalize_records
25
31
 
26
32
  # SECTION: EXPORTS ========================================================== #
27
33
 
@@ -37,14 +43,14 @@ __all__ = [
37
43
 
38
44
 
39
45
  def read(
40
- path: Path,
46
+ path: StrPath,
41
47
  ) -> JSONList:
42
48
  """
43
49
  Read DTA content from *path*.
44
50
 
45
51
  Parameters
46
52
  ----------
47
- path : Path
53
+ path : StrPath
48
54
  Path to the DTA file on disk.
49
55
 
50
56
  Returns
@@ -52,11 +58,15 @@ def read(
52
58
  JSONList
53
59
  The list of dictionaries read from the DTA file.
54
60
  """
55
- return stub.read(path, format_name='DTA')
61
+ path = coerce_path(path)
62
+ get_dependency('pyreadstat', format_name='DTA')
63
+ pandas = get_pandas('DTA')
64
+ frame = pandas.read_stata(path)
65
+ return cast(JSONList, frame.to_dict(orient='records'))
56
66
 
57
67
 
58
68
  def write(
59
- path: Path,
69
+ path: StrPath,
60
70
  data: JSONData,
61
71
  ) -> int:
62
72
  """
@@ -64,15 +74,25 @@ def write(
64
74
 
65
75
  Parameters
66
76
  ----------
67
- path : Path
77
+ path : StrPath
68
78
  Path to the DTA file on disk.
69
79
  data : JSONData
70
- Data to write as DTA file. Should be a list of dictionaries or a
71
- single dictionary.
80
+ Data to write as DTA file. Should be a list of dictionaries or a single
81
+ dictionary.
72
82
 
73
83
  Returns
74
84
  -------
75
85
  int
76
86
  The number of rows written to the DTA file.
77
87
  """
78
- return stub.write(path, data, format_name='DTA')
88
+ path = coerce_path(path)
89
+ records = normalize_records(data, 'DTA')
90
+ if not records:
91
+ return 0
92
+
93
+ get_dependency('pyreadstat', format_name='DTA')
94
+ pandas = get_pandas('DTA')
95
+ ensure_parent_dir(path)
96
+ frame = pandas.DataFrame.from_records(records)
97
+ frame.to_stata(path, write_index=False)
98
+ return len(records)
etlplus/file/duckdb.py CHANGED
@@ -1,8 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.duckdb` module.
3
3
 
4
- Stub helpers for reading/writing DuckDB database (DUCKDB) files (not
5
- implemented yet).
4
+ Helpers for reading/writing DuckDB database (DUCKDB) files.
6
5
 
7
6
  Notes
8
7
  -----
@@ -19,11 +18,20 @@ Notes
19
18
 
20
19
  from __future__ import annotations
21
20
 
22
- from pathlib import Path
23
-
24
21
  from ..types import JSONData
25
22
  from ..types import JSONList
26
- from . import stub
23
+ from ..types import StrPath
24
+ from ._imports import get_dependency
25
+ from ._io import coerce_path
26
+ from ._io import ensure_parent_dir
27
+ from ._io import normalize_records
28
+ from ._sql import DEFAULT_TABLE
29
+ from ._sql import DUCKDB_DIALECT
30
+ from ._sql import coerce_sql_value
31
+ from ._sql import collect_column_values
32
+ from ._sql import infer_column_type
33
+ from ._sql import quote_identifier
34
+ from ._sql import resolve_table
27
35
 
28
36
  # SECTION: EXPORTS ========================================================== #
29
37
 
@@ -39,14 +47,14 @@ __all__ = [
39
47
 
40
48
 
41
49
  def read(
42
- path: Path,
50
+ path: StrPath,
43
51
  ) -> JSONList:
44
52
  """
45
53
  Read DUCKDB content from *path*.
46
54
 
47
55
  Parameters
48
56
  ----------
49
- path : Path
57
+ path : StrPath
50
58
  Path to the DUCKDB file on disk.
51
59
 
52
60
  Returns
@@ -54,11 +62,30 @@ def read(
54
62
  JSONList
55
63
  The list of dictionaries read from the DUCKDB file.
56
64
  """
57
- return stub.read(path, format_name='DUCKDB')
65
+ path = coerce_path(path)
66
+ duckdb = get_dependency('duckdb', format_name='DUCKDB')
67
+ conn = duckdb.connect(str(path))
68
+ try:
69
+ tables = [row[0] for row in conn.execute('SHOW TABLES').fetchall()]
70
+ table = resolve_table(tables, engine_name='DuckDB')
71
+ if table is None:
72
+ return []
73
+ query = f'SELECT * FROM {quote_identifier(table)}'
74
+ cursor = conn.execute(query)
75
+ rows = cursor.fetchall()
76
+ columns = [desc[0] for desc in cursor.description or []]
77
+ if not columns:
78
+ info = conn.execute(
79
+ f'PRAGMA table_info({quote_identifier(table)})',
80
+ ).fetchall()
81
+ columns = [row[1] for row in info]
82
+ return [dict(zip(columns, row, strict=True)) for row in rows]
83
+ finally:
84
+ conn.close()
58
85
 
59
86
 
60
87
  def write(
61
- path: Path,
88
+ path: StrPath,
62
89
  data: JSONData,
63
90
  ) -> int:
64
91
  """
@@ -66,7 +93,7 @@ def write(
66
93
 
67
94
  Parameters
68
95
  ----------
69
- path : Path
96
+ path : StrPath
70
97
  Path to the DUCKDB file on disk.
71
98
  data : JSONData
72
99
  Data to write as DUCKDB. Should be a list of dictionaries or a
@@ -77,4 +104,38 @@ def write(
77
104
  int
78
105
  The number of rows written to the DUCKDB file.
79
106
  """
80
- return stub.write(path, data, format_name='DUCKDB')
107
+ path = coerce_path(path)
108
+ records = normalize_records(data, 'DUCKDB')
109
+ if not records:
110
+ return 0
111
+
112
+ columns, column_values = collect_column_values(records)
113
+ if not columns:
114
+ return 0
115
+
116
+ column_defs = ', '.join(
117
+ f'{quote_identifier(column)} '
118
+ f'{infer_column_type(values, DUCKDB_DIALECT)}'
119
+ for column, values in column_values.items()
120
+ )
121
+ table_ident = quote_identifier(DEFAULT_TABLE)
122
+ insert_columns = ', '.join(quote_identifier(column) for column in columns)
123
+ placeholders = ', '.join('?' for _ in columns)
124
+ insert_sql = (
125
+ f'INSERT INTO {table_ident} ({insert_columns}) VALUES ({placeholders})'
126
+ )
127
+
128
+ duckdb = get_dependency('duckdb', format_name='DUCKDB')
129
+ ensure_parent_dir(path)
130
+ conn = duckdb.connect(str(path))
131
+ try:
132
+ conn.execute(f'DROP TABLE IF EXISTS {table_ident}')
133
+ conn.execute(f'CREATE TABLE {table_ident} ({column_defs})')
134
+ rows = [
135
+ tuple(coerce_sql_value(row.get(column)) for column in columns)
136
+ for row in records
137
+ ]
138
+ conn.executemany(insert_sql, rows)
139
+ finally:
140
+ conn.close()
141
+ return len(records)
etlplus/file/enums.py CHANGED
@@ -199,19 +199,48 @@ class FileFormat(CoercibleStrEnum):
199
199
  'yml': 'yaml',
200
200
  # File extensions
201
201
  '.avro': 'avro',
202
+ '.arrow': 'arrow',
202
203
  '.csv': 'csv',
204
+ '.duckdb': 'duckdb',
205
+ '.dat': 'dat',
203
206
  '.feather': 'feather',
207
+ '.fwf': 'fwf',
204
208
  '.gz': 'gz',
209
+ '.hdf': 'hdf5',
210
+ '.hdf5': 'hdf5',
211
+ '.h5': 'hdf5',
212
+ '.ini': 'ini',
205
213
  '.json': 'json',
206
214
  '.jsonl': 'ndjson',
215
+ '.bson': 'bson',
216
+ '.cbor': 'cbor',
217
+ '.msgpack': 'msgpack',
207
218
  '.ndjson': 'ndjson',
219
+ '.ods': 'ods',
208
220
  '.orc': 'orc',
209
221
  '.parquet': 'parquet',
210
222
  '.pq': 'parquet',
223
+ '.pb': 'pb',
224
+ '.proto': 'proto',
225
+ '.psv': 'psv',
226
+ '.sqlite': 'sqlite',
227
+ '.sqlite3': 'sqlite',
211
228
  '.stub': 'stub',
229
+ '.tab': 'tab',
230
+ '.dta': 'dta',
231
+ '.sas7bdat': 'sas7bdat',
232
+ '.xpt': 'xpt',
233
+ '.rds': 'rds',
234
+ '.rda': 'rda',
235
+ '.nc': 'nc',
236
+ '.sav': 'sav',
237
+ '.properties': 'properties',
238
+ '.prop': 'properties',
239
+ '.toml': 'toml',
212
240
  '.tsv': 'tsv',
213
241
  '.txt': 'txt',
214
242
  '.xls': 'xls',
243
+ '.xlsm': 'xlsm',
215
244
  '.xlsx': 'xlsx',
216
245
  '.zip': 'zip',
217
246
  '.xml': 'xml',
etlplus/file/feather.py CHANGED
@@ -18,12 +18,15 @@ Notes
18
18
 
19
19
  from __future__ import annotations
20
20
 
21
- from pathlib import Path
22
21
  from typing import cast
23
22
 
24
23
  from ..types import JSONData
25
24
  from ..types import JSONList
25
+ from ..types import StrPath
26
+ from ._imports import get_dependency
26
27
  from ._imports import get_pandas
28
+ from ._io import coerce_path
29
+ from ._io import ensure_parent_dir
27
30
  from ._io import normalize_records
28
31
 
29
32
  # SECTION: EXPORTS ========================================================== #
@@ -40,39 +43,30 @@ __all__ = [
40
43
 
41
44
 
42
45
  def read(
43
- path: Path,
46
+ path: StrPath,
44
47
  ) -> JSONList:
45
48
  """
46
49
  Read Feather content from *path*.
47
50
 
48
51
  Parameters
49
52
  ----------
50
- path : Path
53
+ path : StrPath
51
54
  Path to the Feather file on disk.
52
55
 
53
56
  Returns
54
57
  -------
55
58
  JSONList
56
59
  The list of dictionaries read from the Feather file.
57
-
58
- Raises
59
- ------
60
- ImportError
61
- When optional dependency "pyarrow" is missing.
62
60
  """
61
+ path = coerce_path(path)
62
+ get_dependency('pyarrow', format_name='Feather')
63
63
  pandas = get_pandas('Feather')
64
- try:
65
- frame = pandas.read_feather(path)
66
- except ImportError as e: # pragma: no cover
67
- raise ImportError(
68
- 'Feather support requires optional dependency "pyarrow".\n'
69
- 'Install with: pip install pyarrow',
70
- ) from e
64
+ frame = pandas.read_feather(path)
71
65
  return cast(JSONList, frame.to_dict(orient='records'))
72
66
 
73
67
 
74
68
  def write(
75
- path: Path,
69
+ path: StrPath,
76
70
  data: JSONData,
77
71
  ) -> int:
78
72
  """
@@ -80,7 +74,7 @@ def write(
80
74
 
81
75
  Parameters
82
76
  ----------
83
- path : Path
77
+ path : StrPath
84
78
  Path to the Feather file on disk.
85
79
  data : JSONData
86
80
  Data to write.
@@ -89,24 +83,15 @@ def write(
89
83
  -------
90
84
  int
91
85
  Number of records written.
92
-
93
- Raises
94
- ------
95
- ImportError
96
- When optional dependency "pyarrow" is missing.
97
86
  """
87
+ path = coerce_path(path)
98
88
  records = normalize_records(data, 'Feather')
99
89
  if not records:
100
90
  return 0
101
91
 
92
+ get_dependency('pyarrow', format_name='Feather')
102
93
  pandas = get_pandas('Feather')
103
- path.parent.mkdir(parents=True, exist_ok=True)
94
+ ensure_parent_dir(path)
104
95
  frame = pandas.DataFrame.from_records(records)
105
- try:
106
- frame.to_feather(path)
107
- except ImportError as e: # pragma: no cover
108
- raise ImportError(
109
- 'Feather support requires optional dependency "pyarrow".\n'
110
- 'Install with: pip install pyarrow',
111
- ) from e
96
+ frame.to_feather(path)
112
97
  return len(records)
etlplus/file/fwf.py CHANGED
@@ -1,8 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.fwf` module.
3
3
 
4
- Stub helpers for reading/writing Fixed-Width Fields (FWF) files (not
5
- implemented yet).
4
+ Helpers for reading/writing Fixed-Width Fields (FWF) files.
6
5
 
7
6
  Notes
8
7
  -----
@@ -18,11 +17,16 @@ Notes
18
17
 
19
18
  from __future__ import annotations
20
19
 
21
- from pathlib import Path
20
+ from typing import cast
22
21
 
23
22
  from ..types import JSONData
24
23
  from ..types import JSONList
25
- from . import stub
24
+ from ..types import StrPath
25
+ from ._imports import get_pandas
26
+ from ._io import coerce_path
27
+ from ._io import ensure_parent_dir
28
+ from ._io import normalize_records
29
+ from ._io import stringify_value
26
30
 
27
31
  # SECTION: EXPORTS ========================================================== #
28
32
 
@@ -38,14 +42,14 @@ __all__ = [
38
42
 
39
43
 
40
44
  def read(
41
- path: Path,
45
+ path: StrPath,
42
46
  ) -> JSONList:
43
47
  """
44
48
  Read FWF content from *path*.
45
49
 
46
50
  Parameters
47
51
  ----------
48
- path : Path
52
+ path : StrPath
49
53
  Path to the FWF file on disk.
50
54
 
51
55
  Returns
@@ -53,11 +57,14 @@ def read(
53
57
  JSONList
54
58
  The list of dictionaries read from the FWF file.
55
59
  """
56
- return stub.read(path, format_name='FWF')
60
+ path = coerce_path(path)
61
+ pandas = get_pandas('FWF')
62
+ frame = pandas.read_fwf(path)
63
+ return cast(JSONList, frame.to_dict(orient='records'))
57
64
 
58
65
 
59
66
  def write(
60
- path: Path,
67
+ path: StrPath,
61
68
  data: JSONData,
62
69
  ) -> int:
63
70
  """
@@ -65,7 +72,7 @@ def write(
65
72
 
66
73
  Parameters
67
74
  ----------
68
- path : Path
75
+ path : StrPath
69
76
  Path to the FWF file on disk.
70
77
  data : JSONData
71
78
  Data to write as FWF file. Should be a list of dictionaries or a
@@ -76,4 +83,31 @@ def write(
76
83
  int
77
84
  The number of rows written to the FWF file.
78
85
  """
79
- return stub.write(path, data, format_name='FWF')
86
+ path = coerce_path(path)
87
+ records = normalize_records(data, 'FWF')
88
+ if not records:
89
+ return 0
90
+
91
+ fieldnames = sorted({key for row in records for key in row})
92
+ if not fieldnames:
93
+ return 0
94
+
95
+ widths: dict[str, int] = {name: len(name) for name in fieldnames}
96
+ for row in records:
97
+ for name in fieldnames:
98
+ widths[name] = max(
99
+ widths[name],
100
+ len(stringify_value(row.get(name))),
101
+ )
102
+
103
+ ensure_parent_dir(path)
104
+ with path.open('w', encoding='utf-8', newline='') as handle:
105
+ header = ' '.join(name.ljust(widths[name]) for name in fieldnames)
106
+ handle.write(header + '\n')
107
+ for row in records:
108
+ line = ' '.join(
109
+ stringify_value(row.get(name)).ljust(widths[name])
110
+ for name in fieldnames
111
+ )
112
+ handle.write(line + '\n')
113
+ return len(records)
etlplus/file/gz.py CHANGED
@@ -11,6 +11,9 @@ import tempfile
11
11
  from pathlib import Path
12
12
 
13
13
  from ..types import JSONData
14
+ from ..types import StrPath
15
+ from ._io import coerce_path
16
+ from ._io import ensure_parent_dir
14
17
  from .enums import CompressionFormat
15
18
  from .enums import FileFormat
16
19
  from .enums import infer_file_format_and_compression
@@ -29,14 +32,14 @@ __all__ = [
29
32
 
30
33
 
31
34
  def _resolve_format(
32
- path: Path,
35
+ path: StrPath,
33
36
  ) -> FileFormat:
34
37
  """
35
38
  Resolve the inner file format from a .gz filename.
36
39
 
37
40
  Parameters
38
41
  ----------
39
- path : Path
42
+ path : StrPath
40
43
  Path to the GZ file on disk.
41
44
 
42
45
  Returns
@@ -63,14 +66,14 @@ def _resolve_format(
63
66
 
64
67
 
65
68
  def read(
66
- path: Path,
69
+ path: StrPath,
67
70
  ) -> JSONData:
68
71
  """
69
72
  Read GZ content from *path* and parse the inner payload.
70
73
 
71
74
  Parameters
72
75
  ----------
73
- path : Path
76
+ path : StrPath
74
77
  Path to the GZ file on disk.
75
78
 
76
79
  Returns
@@ -78,6 +81,7 @@ def read(
78
81
  JSONData
79
82
  Parsed payload.
80
83
  """
84
+ path = coerce_path(path)
81
85
  fmt = _resolve_format(path)
82
86
  with gzip.open(path, 'rb') as handle:
83
87
  payload = handle.read()
@@ -91,7 +95,7 @@ def read(
91
95
 
92
96
 
93
97
  def write(
94
- path: Path,
98
+ path: StrPath,
95
99
  data: JSONData,
96
100
  ) -> int:
97
101
  """
@@ -99,7 +103,7 @@ def write(
99
103
 
100
104
  Parameters
101
105
  ----------
102
- path : Path
106
+ path : StrPath
103
107
  Path to the GZ file on disk.
104
108
  data : JSONData
105
109
  Data to write.
@@ -109,6 +113,7 @@ def write(
109
113
  int
110
114
  Number of records written.
111
115
  """
116
+ path = coerce_path(path)
112
117
  fmt = _resolve_format(path)
113
118
  with tempfile.TemporaryDirectory() as tmpdir:
114
119
  tmp_path = Path(tmpdir) / f'payload.{fmt.value}'
@@ -117,7 +122,7 @@ def write(
117
122
  count = File(tmp_path, fmt).write(data)
118
123
  payload = tmp_path.read_bytes()
119
124
 
120
- path.parent.mkdir(parents=True, exist_ok=True)
125
+ ensure_parent_dir(path)
121
126
  with gzip.open(path, 'wb') as handle:
122
127
  handle.write(payload)
123
128