etlplus 0.17.2__py3-none-any.whl → 0.17.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. etlplus/file/_imports.py +35 -20
  2. etlplus/file/_io.py +138 -15
  3. etlplus/file/_r.py +48 -0
  4. etlplus/file/_sql.py +224 -0
  5. etlplus/file/accdb.py +7 -6
  6. etlplus/file/arrow.py +13 -24
  7. etlplus/file/avro.py +13 -10
  8. etlplus/file/bson.py +61 -22
  9. etlplus/file/cbor.py +13 -25
  10. etlplus/file/cfg.py +7 -6
  11. etlplus/file/conf.py +7 -6
  12. etlplus/file/core.py +1 -1
  13. etlplus/file/csv.py +8 -7
  14. etlplus/file/dat.py +9 -6
  15. etlplus/file/dta.py +15 -30
  16. etlplus/file/duckdb.py +29 -122
  17. etlplus/file/feather.py +15 -30
  18. etlplus/file/fwf.py +16 -14
  19. etlplus/file/gz.py +12 -7
  20. etlplus/file/hbs.py +7 -6
  21. etlplus/file/hdf5.py +31 -6
  22. etlplus/file/ini.py +17 -24
  23. etlplus/file/ion.py +7 -6
  24. etlplus/file/jinja2.py +7 -6
  25. etlplus/file/json.py +10 -11
  26. etlplus/file/log.py +7 -6
  27. etlplus/file/mat.py +7 -6
  28. etlplus/file/mdb.py +7 -6
  29. etlplus/file/msgpack.py +13 -25
  30. etlplus/file/mustache.py +7 -6
  31. etlplus/file/nc.py +30 -21
  32. etlplus/file/ndjson.py +10 -6
  33. etlplus/file/numbers.py +7 -6
  34. etlplus/file/ods.py +10 -6
  35. etlplus/file/orc.py +15 -30
  36. etlplus/file/parquet.py +10 -6
  37. etlplus/file/pb.py +22 -23
  38. etlplus/file/pbf.py +7 -6
  39. etlplus/file/properties.py +15 -29
  40. etlplus/file/proto.py +14 -20
  41. etlplus/file/psv.py +8 -7
  42. etlplus/file/rda.py +19 -51
  43. etlplus/file/rds.py +19 -51
  44. etlplus/file/sas7bdat.py +10 -30
  45. etlplus/file/sav.py +13 -24
  46. etlplus/file/sqlite.py +25 -83
  47. etlplus/file/stub.py +8 -6
  48. etlplus/file/sylk.py +7 -6
  49. etlplus/file/tab.py +8 -7
  50. etlplus/file/toml.py +14 -17
  51. etlplus/file/tsv.py +8 -7
  52. etlplus/file/txt.py +10 -7
  53. etlplus/file/vm.py +7 -6
  54. etlplus/file/wks.py +7 -6
  55. etlplus/file/xls.py +8 -5
  56. etlplus/file/xlsm.py +10 -6
  57. etlplus/file/xlsx.py +10 -6
  58. etlplus/file/xml.py +11 -9
  59. etlplus/file/xpt.py +13 -33
  60. etlplus/file/yaml.py +10 -11
  61. etlplus/file/zip.py +10 -5
  62. etlplus/file/zsav.py +7 -6
  63. {etlplus-0.17.2.dist-info → etlplus-0.17.4.dist-info}/METADATA +1 -1
  64. {etlplus-0.17.2.dist-info → etlplus-0.17.4.dist-info}/RECORD +68 -66
  65. {etlplus-0.17.2.dist-info → etlplus-0.17.4.dist-info}/WHEEL +0 -0
  66. {etlplus-0.17.2.dist-info → etlplus-0.17.4.dist-info}/entry_points.txt +0 -0
  67. {etlplus-0.17.2.dist-info → etlplus-0.17.4.dist-info}/licenses/LICENSE +0 -0
  68. {etlplus-0.17.2.dist-info → etlplus-0.17.4.dist-info}/top_level.txt +0 -0
etlplus/file/xls.py CHANGED
@@ -6,12 +6,13 @@ Helpers for reading Excel XLS files (write is not supported).
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
- from pathlib import Path
10
9
  from typing import cast
11
10
 
12
11
  from ..types import JSONData
13
12
  from ..types import JSONList
13
+ from ..types import StrPath
14
14
  from ._imports import get_pandas
15
+ from ._io import coerce_path
15
16
 
16
17
  # SECTION: EXPORTS ========================================================== #
17
18
 
@@ -27,14 +28,14 @@ __all__ = [
27
28
 
28
29
 
29
30
  def read(
30
- path: Path,
31
+ path: StrPath,
31
32
  ) -> JSONList:
32
33
  """
33
34
  Read XLS content from *path*.
34
35
 
35
36
  Parameters
36
37
  ----------
37
- path : Path
38
+ path : StrPath
38
39
  Path to the XLS file on disk.
39
40
 
40
41
  Returns
@@ -47,6 +48,7 @@ def read(
47
48
  ImportError
48
49
  If the optional dependency "xlrd" is not installed.
49
50
  """
51
+ path = coerce_path(path)
50
52
  pandas = get_pandas('XLS')
51
53
  try:
52
54
  frame = pandas.read_excel(path, engine='xlrd')
@@ -59,7 +61,7 @@ def read(
59
61
 
60
62
 
61
63
  def write(
62
- path: Path,
64
+ path: StrPath,
63
65
  data: JSONData,
64
66
  ) -> int:
65
67
  """
@@ -71,7 +73,7 @@ def write(
71
73
 
72
74
  Parameters
73
75
  ----------
74
- path : Path
76
+ path : StrPath
75
77
  Path to the XLS file on disk.
76
78
  data : JSONData
77
79
  Data to write.
@@ -86,4 +88,5 @@ def write(
86
88
  RuntimeError
87
89
  If XLS writing is attempted.
88
90
  """
91
+ path = coerce_path(path)
89
92
  raise RuntimeError('XLS write is not supported; use XLSX instead')
etlplus/file/xlsm.py CHANGED
@@ -19,12 +19,14 @@ Notes
19
19
 
20
20
  from __future__ import annotations
21
21
 
22
- from pathlib import Path
23
22
  from typing import cast
24
23
 
25
24
  from ..types import JSONData
26
25
  from ..types import JSONList
26
+ from ..types import StrPath
27
27
  from ._imports import get_pandas
28
+ from ._io import coerce_path
29
+ from ._io import ensure_parent_dir
28
30
  from ._io import normalize_records
29
31
 
30
32
  # SECTION: EXPORTS ========================================================== #
@@ -41,14 +43,14 @@ __all__ = [
41
43
 
42
44
 
43
45
  def read(
44
- path: Path,
46
+ path: StrPath,
45
47
  ) -> JSONList:
46
48
  """
47
49
  Read XLSM content from *path*.
48
50
 
49
51
  Parameters
50
52
  ----------
51
- path : Path
53
+ path : StrPath
52
54
  Path to the XLSM file on disk.
53
55
 
54
56
  Returns
@@ -61,6 +63,7 @@ def read(
61
63
  ImportError
62
64
  If optional dependencies for XLSM support are missing.
63
65
  """
66
+ path = coerce_path(path)
64
67
  pandas = get_pandas('XLSM')
65
68
  try:
66
69
  frame = pandas.read_excel(path)
@@ -73,7 +76,7 @@ def read(
73
76
 
74
77
 
75
78
  def write(
76
- path: Path,
79
+ path: StrPath,
77
80
  data: JSONData,
78
81
  ) -> int:
79
82
  """
@@ -81,7 +84,7 @@ def write(
81
84
 
82
85
  Parameters
83
86
  ----------
84
- path : Path
87
+ path : StrPath
85
88
  Path to the XLSM file on disk.
86
89
  data : JSONData
87
90
  Data to write as XLSM file. Should be a list of dictionaries or a
@@ -97,12 +100,13 @@ def write(
97
100
  ImportError
98
101
  If optional dependencies for XLSM support are missing.
99
102
  """
103
+ path = coerce_path(path)
100
104
  records = normalize_records(data, 'XLSM')
101
105
  if not records:
102
106
  return 0
103
107
 
104
108
  pandas = get_pandas('XLSM')
105
- path.parent.mkdir(parents=True, exist_ok=True)
109
+ ensure_parent_dir(path)
106
110
  frame = pandas.DataFrame.from_records(records)
107
111
  try:
108
112
  frame.to_excel(path, index=False)
etlplus/file/xlsx.py CHANGED
@@ -6,12 +6,14 @@ Helpers for reading/writing Excel XLSX files.
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
- from pathlib import Path
10
9
  from typing import cast
11
10
 
12
11
  from ..types import JSONData
13
12
  from ..types import JSONList
13
+ from ..types import StrPath
14
14
  from ._imports import get_pandas
15
+ from ._io import coerce_path
16
+ from ._io import ensure_parent_dir
15
17
  from ._io import normalize_records
16
18
 
17
19
  # SECTION: EXPORTS ========================================================== #
@@ -28,14 +30,14 @@ __all__ = [
28
30
 
29
31
 
30
32
  def read(
31
- path: Path,
33
+ path: StrPath,
32
34
  ) -> JSONList:
33
35
  """
34
36
  Read XLSX content from *path*.
35
37
 
36
38
  Parameters
37
39
  ----------
38
- path : Path
40
+ path : StrPath
39
41
  Path to the XLSX file on disk.
40
42
 
41
43
  Returns
@@ -48,6 +50,7 @@ def read(
48
50
  ImportError
49
51
  If optional dependencies for XLSX support are missing.
50
52
  """
53
+ path = coerce_path(path)
51
54
  pandas = get_pandas('XLSX')
52
55
  try:
53
56
  frame = pandas.read_excel(path)
@@ -60,7 +63,7 @@ def read(
60
63
 
61
64
 
62
65
  def write(
63
- path: Path,
66
+ path: StrPath,
64
67
  data: JSONData,
65
68
  ) -> int:
66
69
  """
@@ -68,7 +71,7 @@ def write(
68
71
 
69
72
  Parameters
70
73
  ----------
71
- path : Path
74
+ path : StrPath
72
75
  Path to the XLSX file on disk.
73
76
  data : JSONData
74
77
  Data to write.
@@ -83,12 +86,13 @@ def write(
83
86
  ImportError
84
87
  If optional dependencies for XLSX support are missing.
85
88
  """
89
+ path = coerce_path(path)
86
90
  records = normalize_records(data, 'XLSX')
87
91
  if not records:
88
92
  return 0
89
93
 
90
94
  pandas = get_pandas('XLSX')
91
- path.parent.mkdir(parents=True, exist_ok=True)
95
+ ensure_parent_dir(path)
92
96
  frame = pandas.DataFrame.from_records(records)
93
97
  try:
94
98
  frame.to_excel(path, index=False)
etlplus/file/xml.py CHANGED
@@ -18,12 +18,14 @@ Notes
18
18
  from __future__ import annotations
19
19
 
20
20
  import xml.etree.ElementTree as ET
21
- from pathlib import Path
22
21
  from typing import Any
23
22
 
24
23
  from ..types import JSONData
25
24
  from ..types import JSONDict
25
+ from ..types import StrPath
26
26
  from ..utils import count_records
27
+ from ._io import coerce_path
28
+ from ._io import ensure_parent_dir
27
29
 
28
30
  # SECTION: EXPORTS ========================================================== #
29
31
 
@@ -124,10 +126,7 @@ def _element_to_dict(
124
126
  result[tag] = child_data
125
127
 
126
128
  for key, value in element.attrib.items():
127
- if key in result:
128
- result[f'@{key}'] = value
129
- else:
130
- result[key] = value
129
+ result[f'@{key}'] = value
131
130
  return result
132
131
 
133
132
 
@@ -135,14 +134,14 @@ def _element_to_dict(
135
134
 
136
135
 
137
136
  def read(
138
- path: Path,
137
+ path: StrPath,
139
138
  ) -> JSONDict:
140
139
  """
141
140
  Read XML content from *path*.
142
141
 
143
142
  Parameters
144
143
  ----------
145
- path : Path
144
+ path : StrPath
146
145
  Path to the XML file on disk.
147
146
 
148
147
  Returns
@@ -150,6 +149,7 @@ def read(
150
149
  JSONDict
151
150
  Nested dictionary representation of the XML file.
152
151
  """
152
+ path = coerce_path(path)
153
153
  tree = ET.parse(path)
154
154
  root = tree.getroot()
155
155
 
@@ -157,7 +157,7 @@ def read(
157
157
 
158
158
 
159
159
  def write(
160
- path: Path,
160
+ path: StrPath,
161
161
  data: JSONData,
162
162
  *,
163
163
  root_tag: str,
@@ -167,7 +167,7 @@ def write(
167
167
 
168
168
  Parameters
169
169
  ----------
170
- path : Path
170
+ path : StrPath
171
171
  Path to the XML file on disk.
172
172
  data : JSONData
173
173
  Data to write as XML.
@@ -179,6 +179,7 @@ def write(
179
179
  int
180
180
  The number of records written to the XML file.
181
181
  """
182
+ path = coerce_path(path)
182
183
  if isinstance(data, dict) and len(data) == 1:
183
184
  root_name, payload = next(iter(data.items()))
184
185
  root_element = _dict_to_element(str(root_name), payload)
@@ -186,6 +187,7 @@ def write(
186
187
  root_element = _dict_to_element(root_tag, data)
187
188
 
188
189
  tree = ET.ElementTree(root_element)
190
+ ensure_parent_dir(path)
189
191
  tree.write(path, encoding='utf-8', xml_declaration=True)
190
192
 
191
193
  return count_records(data)
etlplus/file/xpt.py CHANGED
@@ -18,14 +18,15 @@ Notes
18
18
 
19
19
  from __future__ import annotations
20
20
 
21
- from pathlib import Path
22
- from typing import Any
23
21
  from typing import cast
24
22
 
25
23
  from ..types import JSONData
26
24
  from ..types import JSONList
27
- from ._imports import get_optional_module
25
+ from ..types import StrPath
26
+ from ._imports import get_dependency
28
27
  from ._imports import get_pandas
28
+ from ._io import coerce_path
29
+ from ._io import ensure_parent_dir
29
30
  from ._io import normalize_records
30
31
 
31
32
  # SECTION: EXPORTS ========================================================== #
@@ -38,39 +39,18 @@ __all__ = [
38
39
  ]
39
40
 
40
41
 
41
- # SECTION: INTERNAL HELPERS ================================================ #
42
-
43
-
44
- def _get_pyreadstat() -> Any:
45
- """Return the pyreadstat module, importing it on first use."""
46
- return get_optional_module(
47
- 'pyreadstat',
48
- error_message=(
49
- 'XPT support requires optional dependency "pyreadstat".\n'
50
- 'Install with: pip install pyreadstat'
51
- ),
52
- )
53
-
54
-
55
- def _raise_readstat_error(err: ImportError) -> None:
56
- raise ImportError(
57
- 'XPT support requires optional dependency "pyreadstat".\n'
58
- 'Install with: pip install pyreadstat',
59
- ) from err
60
-
61
-
62
42
  # SECTION: FUNCTIONS ======================================================== #
63
43
 
64
44
 
65
45
  def read(
66
- path: Path,
46
+ path: StrPath,
67
47
  ) -> JSONList:
68
48
  """
69
49
  Read XPT content from *path*.
70
50
 
71
51
  Parameters
72
52
  ----------
73
- path : Path
53
+ path : StrPath
74
54
  Path to the XPT file on disk.
75
55
 
76
56
  Returns
@@ -78,8 +58,9 @@ def read(
78
58
  JSONList
79
59
  The list of dictionaries read from the XPT file.
80
60
  """
61
+ path = coerce_path(path)
81
62
  pandas = get_pandas('XPT')
82
- pyreadstat = _get_pyreadstat()
63
+ pyreadstat = get_dependency('pyreadstat', format_name='XPT')
83
64
  reader = getattr(pyreadstat, 'read_xport', None)
84
65
  if reader is not None:
85
66
  frame, _meta = reader(str(path))
@@ -88,13 +69,11 @@ def read(
88
69
  frame = pandas.read_sas(path, format='xport')
89
70
  except TypeError:
90
71
  frame = pandas.read_sas(path)
91
- except ImportError as err: # pragma: no cover
92
- _raise_readstat_error(err)
93
72
  return cast(JSONList, frame.to_dict(orient='records'))
94
73
 
95
74
 
96
75
  def write(
97
- path: Path,
76
+ path: StrPath,
98
77
  data: JSONData,
99
78
  ) -> int:
100
79
  """
@@ -102,7 +81,7 @@ def write(
102
81
 
103
82
  Parameters
104
83
  ----------
105
- path : Path
84
+ path : StrPath
106
85
  Path to the XPT file on disk.
107
86
  data : JSONData
108
87
  Data to write as XPT file. Should be a list of dictionaries or a
@@ -118,19 +97,20 @@ def write(
118
97
  ImportError
119
98
  If "pyreadstat" is not installed with write support.
120
99
  """
100
+ path = coerce_path(path)
121
101
  records = normalize_records(data, 'XPT')
122
102
  if not records:
123
103
  return 0
124
104
 
125
105
  pandas = get_pandas('XPT')
126
- pyreadstat = _get_pyreadstat()
106
+ pyreadstat = get_dependency('pyreadstat', format_name='XPT')
127
107
  writer = getattr(pyreadstat, 'write_xport', None)
128
108
  if writer is None:
129
109
  raise ImportError(
130
110
  'XPT write support requires "pyreadstat" with write_xport().',
131
111
  )
132
112
 
133
- path.parent.mkdir(parents=True, exist_ok=True)
113
+ ensure_parent_dir(path)
134
114
  frame = pandas.DataFrame.from_records(records)
135
115
  writer(frame, str(path))
136
116
  return len(records)
etlplus/file/yaml.py CHANGED
@@ -17,12 +17,13 @@ Notes
17
17
 
18
18
  from __future__ import annotations
19
19
 
20
- from pathlib import Path
21
-
22
20
  from ..types import JSONData
21
+ from ..types import StrPath
23
22
  from ..utils import count_records
24
23
  from ._imports import get_yaml
24
+ from ._io import coerce_path
25
25
  from ._io import coerce_record_payload
26
+ from ._io import ensure_parent_dir
26
27
 
27
28
  # SECTION: EXPORTS ========================================================== #
28
29
 
@@ -38,7 +39,7 @@ __all__ = [
38
39
 
39
40
 
40
41
  def read(
41
- path: Path,
42
+ path: StrPath,
42
43
  ) -> JSONData:
43
44
  """
44
45
  Read YAML content from *path*.
@@ -47,19 +48,15 @@ def read(
47
48
 
48
49
  Parameters
49
50
  ----------
50
- path : Path
51
+ path : StrPath
51
52
  Path to the YAML file on disk.
52
53
 
53
54
  Returns
54
55
  -------
55
56
  JSONData
56
57
  The structured data read from the YAML file.
57
-
58
- Raises
59
- ------
60
- TypeError
61
- If the YAML root is not an object or an array of objects.
62
58
  """
59
+ path = coerce_path(path)
63
60
  with path.open('r', encoding='utf-8') as handle:
64
61
  loaded = get_yaml().safe_load(handle)
65
62
 
@@ -67,7 +64,7 @@ def read(
67
64
 
68
65
 
69
66
  def write(
70
- path: Path,
67
+ path: StrPath,
71
68
  data: JSONData,
72
69
  ) -> int:
73
70
  """
@@ -75,7 +72,7 @@ def write(
75
72
 
76
73
  Parameters
77
74
  ----------
78
- path : Path
75
+ path : StrPath
79
76
  Path to the YAML file on disk.
80
77
  data : JSONData
81
78
  Data to write as YAML.
@@ -85,6 +82,8 @@ def write(
85
82
  int
86
83
  The number of records written.
87
84
  """
85
+ path = coerce_path(path)
86
+ ensure_parent_dir(path)
88
87
  with path.open('w', encoding='utf-8') as handle:
89
88
  get_yaml().safe_dump(
90
89
  data,
etlplus/file/zip.py CHANGED
@@ -12,6 +12,9 @@ from pathlib import Path
12
12
 
13
13
  from ..types import JSONData
14
14
  from ..types import JSONDict
15
+ from ..types import StrPath
16
+ from ._io import coerce_path
17
+ from ._io import ensure_parent_dir
15
18
  from .enums import CompressionFormat
16
19
  from .enums import FileFormat
17
20
  from .enums import infer_file_format_and_compression
@@ -87,14 +90,14 @@ def _extract_payload(
87
90
 
88
91
 
89
92
  def read(
90
- path: Path,
93
+ path: StrPath,
91
94
  ) -> JSONData:
92
95
  """
93
96
  Read ZIP content from *path* and parse the inner payload(s).
94
97
 
95
98
  Parameters
96
99
  ----------
97
- path : Path
100
+ path : StrPath
98
101
  Path to the ZIP file on disk.
99
102
 
100
103
  Returns
@@ -107,6 +110,7 @@ def read(
107
110
  ValueError
108
111
  If the ZIP archive is empty.
109
112
  """
113
+ path = coerce_path(path)
110
114
  with zipfile.ZipFile(path, 'r') as archive:
111
115
  entries = [entry for entry in archive.infolist() if not entry.is_dir()]
112
116
  if not entries:
@@ -137,7 +141,7 @@ def read(
137
141
 
138
142
 
139
143
  def write(
140
- path: Path,
144
+ path: StrPath,
141
145
  data: JSONData,
142
146
  ) -> int:
143
147
  """
@@ -145,7 +149,7 @@ def write(
145
149
 
146
150
  Parameters
147
151
  ----------
148
- path : Path
152
+ path : StrPath
149
153
  Path to the ZIP file on disk.
150
154
  data : JSONData
151
155
  Data to write.
@@ -155,6 +159,7 @@ def write(
155
159
  int
156
160
  Number of records written.
157
161
  """
162
+ path = coerce_path(path)
158
163
  fmt = _resolve_format(path.name)
159
164
  inner_name = Path(path.name).with_suffix('').name
160
165
 
@@ -165,7 +170,7 @@ def write(
165
170
  count = File(tmp_path, fmt).write(data)
166
171
  payload = tmp_path.read_bytes()
167
172
 
168
- path.parent.mkdir(parents=True, exist_ok=True)
173
+ ensure_parent_dir(path)
169
174
  with zipfile.ZipFile(
170
175
  path,
171
176
  'w',
etlplus/file/zsav.py CHANGED
@@ -18,11 +18,11 @@ Notes
18
18
 
19
19
  from __future__ import annotations
20
20
 
21
- from pathlib import Path
22
-
23
21
  from ..types import JSONData
24
22
  from ..types import JSONList
23
+ from ..types import StrPath
25
24
  from . import stub
25
+ from ._io import coerce_path
26
26
 
27
27
  # SECTION: EXPORTS ========================================================== #
28
28
 
@@ -38,14 +38,14 @@ __all__ = [
38
38
 
39
39
 
40
40
  def read(
41
- path: Path,
41
+ path: StrPath,
42
42
  ) -> JSONList:
43
43
  """
44
44
  Read ZSAV content from *path*.
45
45
 
46
46
  Parameters
47
47
  ----------
48
- path : Path
48
+ path : StrPath
49
49
  Path to the ZSAV file on disk.
50
50
 
51
51
  Returns
@@ -57,7 +57,7 @@ def read(
57
57
 
58
58
 
59
59
  def write(
60
- path: Path,
60
+ path: StrPath,
61
61
  data: JSONData,
62
62
  ) -> int:
63
63
  """
@@ -65,7 +65,7 @@ def write(
65
65
 
66
66
  Parameters
67
67
  ----------
68
- path : Path
68
+ path : StrPath
69
69
  Path to the ZSAV file on disk.
70
70
  data : JSONData
71
71
  Data to write as ZSAV file. Should be a list of dictionaries or a
@@ -76,4 +76,5 @@ def write(
76
76
  int
77
77
  The number of rows written to the ZSAV file.
78
78
  """
79
+ path = coerce_path(path)
79
80
  return stub.write(path, data, format_name='ZSAV')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: etlplus
3
- Version: 0.17.2
3
+ Version: 0.17.4
4
4
  Summary: A Swiss Army knife for simple ETL operations
5
5
  Home-page: https://github.com/Dagitali/ETLPlus
6
6
  Author: ETLPlus Team