etlplus 0.16.10__py3-none-any.whl → 0.17.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. etlplus/file/README.md +33 -0
  2. etlplus/file/_imports.py +35 -20
  3. etlplus/file/_io.py +138 -15
  4. etlplus/file/_r.py +48 -0
  5. etlplus/file/_sql.py +224 -0
  6. etlplus/file/accdb.py +7 -6
  7. etlplus/file/arrow.py +29 -10
  8. etlplus/file/avro.py +13 -10
  9. etlplus/file/bson.py +94 -10
  10. etlplus/file/cbor.py +29 -17
  11. etlplus/file/cfg.py +7 -6
  12. etlplus/file/conf.py +7 -6
  13. etlplus/file/core.py +1 -1
  14. etlplus/file/csv.py +8 -7
  15. etlplus/file/dat.py +52 -11
  16. etlplus/file/dta.py +36 -16
  17. etlplus/file/duckdb.py +72 -11
  18. etlplus/file/enums.py +29 -0
  19. etlplus/file/feather.py +15 -30
  20. etlplus/file/fwf.py +44 -10
  21. etlplus/file/gz.py +12 -7
  22. etlplus/file/hbs.py +7 -6
  23. etlplus/file/hdf5.py +71 -8
  24. etlplus/file/ini.py +60 -17
  25. etlplus/file/ion.py +7 -6
  26. etlplus/file/jinja2.py +7 -6
  27. etlplus/file/json.py +10 -11
  28. etlplus/file/log.py +7 -6
  29. etlplus/file/mat.py +7 -6
  30. etlplus/file/mdb.py +7 -6
  31. etlplus/file/msgpack.py +27 -15
  32. etlplus/file/mustache.py +7 -6
  33. etlplus/file/nc.py +69 -11
  34. etlplus/file/ndjson.py +10 -6
  35. etlplus/file/numbers.py +7 -6
  36. etlplus/file/ods.py +48 -11
  37. etlplus/file/orc.py +15 -30
  38. etlplus/file/parquet.py +10 -6
  39. etlplus/file/pb.py +36 -24
  40. etlplus/file/pbf.py +7 -6
  41. etlplus/file/properties.py +44 -18
  42. etlplus/file/proto.py +24 -18
  43. etlplus/file/psv.py +12 -11
  44. etlplus/file/rda.py +57 -15
  45. etlplus/file/rds.py +50 -14
  46. etlplus/file/sas7bdat.py +26 -16
  47. etlplus/file/sav.py +34 -16
  48. etlplus/file/sqlite.py +70 -10
  49. etlplus/file/stub.py +8 -6
  50. etlplus/file/sylk.py +7 -6
  51. etlplus/file/tab.py +13 -13
  52. etlplus/file/toml.py +56 -17
  53. etlplus/file/tsv.py +8 -7
  54. etlplus/file/txt.py +10 -7
  55. etlplus/file/vm.py +7 -6
  56. etlplus/file/wks.py +7 -6
  57. etlplus/file/xls.py +8 -5
  58. etlplus/file/xlsm.py +48 -10
  59. etlplus/file/xlsx.py +10 -6
  60. etlplus/file/xml.py +11 -9
  61. etlplus/file/xpt.py +46 -10
  62. etlplus/file/yaml.py +10 -11
  63. etlplus/file/zip.py +10 -5
  64. etlplus/file/zsav.py +7 -6
  65. {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/METADATA +44 -26
  66. {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/RECORD +70 -68
  67. {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/WHEEL +0 -0
  68. {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/entry_points.txt +0 -0
  69. {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/licenses/LICENSE +0 -0
  70. {etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/top_level.txt +0 -0
etlplus/file/hbs.py CHANGED
@@ -19,11 +19,11 @@ Notes
19
19
 
20
20
  from __future__ import annotations
21
21
 
22
- from pathlib import Path
23
-
24
22
  from ..types import JSONData
25
23
  from ..types import JSONList
24
+ from ..types import StrPath
26
25
  from . import stub
26
+ from ._io import coerce_path
27
27
 
28
28
  # SECTION: EXPORTS ========================================================== #
29
29
 
@@ -39,14 +39,14 @@ __all__ = [
39
39
 
40
40
 
41
41
  def read(
42
- path: Path,
42
+ path: StrPath,
43
43
  ) -> JSONList:
44
44
  """
45
45
  Read ZSAV content from *path*.
46
46
 
47
47
  Parameters
48
48
  ----------
49
- path : Path
49
+ path : StrPath
50
50
  Path to the HBS file on disk.
51
51
 
52
52
  Returns
@@ -58,7 +58,7 @@ def read(
58
58
 
59
59
 
60
60
  def write(
61
- path: Path,
61
+ path: StrPath,
62
62
  data: JSONData,
63
63
  ) -> int:
64
64
  """
@@ -66,7 +66,7 @@ def write(
66
66
 
67
67
  Parameters
68
68
  ----------
69
- path : Path
69
+ path : StrPath
70
70
  Path to the HBS file on disk.
71
71
  data : JSONData
72
72
  Data to write as HBS file. Should be a list of dictionaries or a
@@ -77,4 +77,5 @@ def write(
77
77
  int
78
78
  The number of rows written to the HBS file.
79
79
  """
80
+ path = coerce_path(path)
80
81
  return stub.write(path, data, format_name='HBS')
etlplus/file/hdf5.py CHANGED
@@ -1,8 +1,8 @@
1
1
  """
2
2
  :mod:`etlplus.file.hdf5` module.
3
3
 
4
- Stub helpers for reading/writing Hierarchical Data Format (HDF5) files (not
5
- implemented yet).
4
+ Helpers for reading Hierarchical Data Format (HDF5) files. Stub helpers for
5
+ writing such files (not implemented yet).
6
6
 
7
7
  Notes
8
8
  -----
@@ -19,11 +19,14 @@ Notes
19
19
 
20
20
  from __future__ import annotations
21
21
 
22
- from pathlib import Path
22
+ from typing import cast
23
23
 
24
24
  from ..types import JSONData
25
25
  from ..types import JSONList
26
+ from ..types import StrPath
26
27
  from . import stub
28
+ from ._imports import get_pandas
29
+ from ._io import coerce_path
27
30
 
28
31
  # SECTION: EXPORTS ========================================================== #
29
32
 
@@ -35,30 +38,89 @@ __all__ = [
35
38
  ]
36
39
 
37
40
 
41
+ # SECTION: INTERNAL CONSTANTS ============================================== #
42
+
43
+
44
+ DEFAULT_KEY = 'data'
45
+
46
+
47
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
48
+
49
+
50
+ def _raise_tables_error(
51
+ err: ImportError,
52
+ ) -> None:
53
+ """
54
+ Raise a consistent ImportError for missing PyTables support.
55
+
56
+ Parameters
57
+ ----------
58
+ err : ImportError
59
+ The original ImportError raised when trying to use HDF5 support without
60
+ the required dependency.
61
+
62
+ Raises
63
+ ------
64
+ ImportError
65
+ Consistent ImportError indicating that PyTables is required.
66
+ """
67
+ raise ImportError(
68
+ 'HDF5 support requires optional dependency "tables".\n'
69
+ 'Install with: pip install tables',
70
+ ) from err
71
+
72
+
38
73
  # SECTION: FUNCTIONS ======================================================== #
39
74
 
40
75
 
41
76
  def read(
42
- path: Path,
77
+ path: StrPath,
43
78
  ) -> JSONList:
44
79
  """
45
80
  Read HDF5 content from *path*.
46
81
 
47
82
  Parameters
48
83
  ----------
49
- path : Path
84
+ path : StrPath
50
85
  Path to the HDF5 file on disk.
51
86
 
52
87
  Returns
53
88
  -------
54
89
  JSONList
55
90
  The list of dictionaries read from the HDF5 file.
91
+
92
+ Raises
93
+ ------
94
+ ValueError
95
+ If multiple datasets are found in the HDF5 file without a clear key to
96
+ use.
56
97
  """
57
- return stub.read(path, format_name='HDF5')
98
+ path = coerce_path(path)
99
+ pandas = get_pandas('HDF5')
100
+ try:
101
+ store = pandas.HDFStore(path)
102
+ except ImportError as err: # pragma: no cover
103
+ _raise_tables_error(err)
104
+
105
+ with store:
106
+ keys = [key.lstrip('/') for key in store.keys()]
107
+ if not keys:
108
+ return []
109
+ if DEFAULT_KEY in keys:
110
+ key = DEFAULT_KEY
111
+ elif len(keys) == 1:
112
+ key = keys[0]
113
+ else:
114
+ raise ValueError(
115
+ 'Multiple datasets found in HDF5 file; expected "data" or '
116
+ 'a single dataset',
117
+ )
118
+ frame = store.get(key)
119
+ return cast(JSONList, frame.to_dict(orient='records'))
58
120
 
59
121
 
60
122
  def write(
61
- path: Path,
123
+ path: StrPath,
62
124
  data: JSONData,
63
125
  ) -> int:
64
126
  """
@@ -66,7 +128,7 @@ def write(
66
128
 
67
129
  Parameters
68
130
  ----------
69
- path : Path
131
+ path : StrPath
70
132
  Path to the HDF5 file on disk.
71
133
  data : JSONData
72
134
  Data to write as HDF5 file. Should be a list of dictionaries or a
@@ -77,4 +139,5 @@ def write(
77
139
  int
78
140
  The number of rows written to the HDF5 file.
79
141
  """
142
+ path = coerce_path(path)
80
143
  return stub.write(path, data, format_name='HDF5')
etlplus/file/ini.py CHANGED
@@ -1,8 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.ini` module.
3
3
 
4
- Stub helpers for reading/writing initialization (INI) files (not implemented
5
- yet).
4
+ Helpers for reading/writing initialization (INI) files.
6
5
 
7
6
  Notes
8
7
  -----
@@ -20,11 +19,15 @@ Notes
20
19
 
21
20
  from __future__ import annotations
22
21
 
23
- from pathlib import Path
22
+ import configparser
24
23
 
25
24
  from ..types import JSONData
26
- from ..types import JSONList
27
- from . import stub
25
+ from ..types import JSONDict
26
+ from ..types import StrPath
27
+ from ._io import coerce_path
28
+ from ._io import ensure_parent_dir
29
+ from ._io import require_dict_payload
30
+ from ._io import stringify_value
28
31
 
29
32
  # SECTION: EXPORTS ========================================================== #
30
33
 
@@ -40,26 +43,39 @@ __all__ = [
40
43
 
41
44
 
42
45
  def read(
43
- path: Path,
44
- ) -> JSONList:
46
+ path: StrPath,
47
+ ) -> JSONData:
45
48
  """
46
49
  Read INI content from *path*.
47
50
 
48
51
  Parameters
49
52
  ----------
50
- path : Path
53
+ path : StrPath
51
54
  Path to the INI file on disk.
52
55
 
53
56
  Returns
54
57
  -------
55
- JSONList
56
- The list of dictionaries read from the INI file.
58
+ JSONData
59
+ The structured data read from the INI file.
57
60
  """
58
- return stub.read(path, format_name='INI')
61
+ path = coerce_path(path)
62
+ parser = configparser.ConfigParser()
63
+ parser.read(path, encoding='utf-8')
64
+
65
+ payload: JSONDict = {}
66
+ if parser.defaults():
67
+ payload['DEFAULT'] = dict(parser.defaults())
68
+ defaults = dict(parser.defaults())
69
+ for section in parser.sections():
70
+ raw_section = dict(parser.items(section))
71
+ for key in defaults:
72
+ raw_section.pop(key, None)
73
+ payload[section] = raw_section
74
+ return payload
59
75
 
60
76
 
61
77
  def write(
62
- path: Path,
78
+ path: StrPath,
63
79
  data: JSONData,
64
80
  ) -> int:
65
81
  """
@@ -67,15 +83,42 @@ def write(
67
83
 
68
84
  Parameters
69
85
  ----------
70
- path : Path
86
+ path : StrPath
71
87
  Path to the INI file on disk.
72
88
  data : JSONData
73
- Data to write as INI. Should be a list of dictionaries or a
74
- single dictionary.
89
+ Data to write as INI. Should be a dictionary.
75
90
 
76
91
  Returns
77
92
  -------
78
93
  int
79
- The number of rows written to the INI file.
94
+ The number of records written to the INI file.
95
+
96
+ Raises
97
+ ------
98
+ TypeError
99
+ If *data* is not a dictionary.
80
100
  """
81
- return stub.write(path, data, format_name='INI')
101
+ path = coerce_path(path)
102
+ payload = require_dict_payload(data, format_name='INI')
103
+
104
+ parser = configparser.ConfigParser()
105
+ for section, values in payload.items():
106
+ if section == 'DEFAULT':
107
+ if isinstance(values, dict):
108
+ parser['DEFAULT'] = {
109
+ key: stringify_value(value)
110
+ for key, value in values.items()
111
+ }
112
+ else:
113
+ raise TypeError('INI DEFAULT section must be a dict')
114
+ continue
115
+ if not isinstance(values, dict):
116
+ raise TypeError('INI sections must map to dicts')
117
+ parser[section] = {
118
+ key: stringify_value(value) for key, value in values.items()
119
+ }
120
+
121
+ ensure_parent_dir(path)
122
+ with path.open('w', encoding='utf-8', newline='') as handle:
123
+ parser.write(handle)
124
+ return 1
etlplus/file/ion.py CHANGED
@@ -18,11 +18,11 @@ Notes
18
18
 
19
19
  from __future__ import annotations
20
20
 
21
- from pathlib import Path
22
-
23
21
  from ..types import JSONData
24
22
  from ..types import JSONList
23
+ from ..types import StrPath
25
24
  from . import stub
25
+ from ._io import coerce_path
26
26
 
27
27
  # SECTION: EXPORTS ========================================================== #
28
28
 
@@ -38,14 +38,14 @@ __all__ = [
38
38
 
39
39
 
40
40
  def read(
41
- path: Path,
41
+ path: StrPath,
42
42
  ) -> JSONList:
43
43
  """
44
44
  Read ION content from *path*.
45
45
 
46
46
  Parameters
47
47
  ----------
48
- path : Path
48
+ path : StrPath
49
49
  Path to the ION file on disk.
50
50
 
51
51
  Returns
@@ -57,7 +57,7 @@ def read(
57
57
 
58
58
 
59
59
  def write(
60
- path: Path,
60
+ path: StrPath,
61
61
  data: JSONData,
62
62
  ) -> int:
63
63
  """
@@ -65,7 +65,7 @@ def write(
65
65
 
66
66
  Parameters
67
67
  ----------
68
- path : Path
68
+ path : StrPath
69
69
  Path to the ION file on disk.
70
70
  data : JSONData
71
71
  Data to write as ION. Should be a list of dictionaries or a
@@ -76,4 +76,5 @@ def write(
76
76
  int
77
77
  The number of rows written to the ION file.
78
78
  """
79
+ path = coerce_path(path)
79
80
  return stub.write(path, data, format_name='ION')
etlplus/file/jinja2.py CHANGED
@@ -19,11 +19,11 @@ Notes
19
19
 
20
20
  from __future__ import annotations
21
21
 
22
- from pathlib import Path
23
-
24
22
  from ..types import JSONData
25
23
  from ..types import JSONList
24
+ from ..types import StrPath
26
25
  from . import stub
26
+ from ._io import coerce_path
27
27
 
28
28
  # SECTION: EXPORTS ========================================================== #
29
29
 
@@ -39,14 +39,14 @@ __all__ = [
39
39
 
40
40
 
41
41
  def read(
42
- path: Path,
42
+ path: StrPath,
43
43
  ) -> JSONList:
44
44
  """
45
45
  Read JINJA2 content from *path*.
46
46
 
47
47
  Parameters
48
48
  ----------
49
- path : Path
49
+ path : StrPath
50
50
  Path to the JINJA2 file on disk.
51
51
 
52
52
  Returns
@@ -58,7 +58,7 @@ def read(
58
58
 
59
59
 
60
60
  def write(
61
- path: Path,
61
+ path: StrPath,
62
62
  data: JSONData,
63
63
  ) -> int:
64
64
  """
@@ -66,7 +66,7 @@ def write(
66
66
 
67
67
  Parameters
68
68
  ----------
69
- path : Path
69
+ path : StrPath
70
70
  Path to the JINJA2 file on disk.
71
71
  data : JSONData
72
72
  Data to write as JINJA2 file. Should be a list of dictionaries or a
@@ -77,4 +77,5 @@ def write(
77
77
  int
78
78
  The number of rows written to the JINJA2 file.
79
79
  """
80
+ path = coerce_path(path)
80
81
  return stub.write(path, data, format_name='JINJA2')
etlplus/file/json.py CHANGED
@@ -19,11 +19,13 @@ Notes
19
19
  from __future__ import annotations
20
20
 
21
21
  import json
22
- from pathlib import Path
23
22
 
24
23
  from ..types import JSONData
24
+ from ..types import StrPath
25
25
  from ..utils import count_records
26
+ from ._io import coerce_path
26
27
  from ._io import coerce_record_payload
28
+ from ._io import ensure_parent_dir
27
29
 
28
30
  # SECTION: EXPORTS ========================================================== #
29
31
 
@@ -39,7 +41,7 @@ __all__ = [
39
41
 
40
42
 
41
43
  def read(
42
- path: Path,
44
+ path: StrPath,
43
45
  ) -> JSONData:
44
46
  """
45
47
  Read JSON content from *path*.
@@ -48,19 +50,15 @@ def read(
48
50
 
49
51
  Parameters
50
52
  ----------
51
- path : Path
53
+ path : StrPath
52
54
  Path to the JSON file on disk.
53
55
 
54
56
  Returns
55
57
  -------
56
58
  JSONData
57
59
  The structured data read from the JSON file.
58
-
59
- Raises
60
- ------
61
- TypeError
62
- If the JSON root is not an object or an array of objects.
63
60
  """
61
+ path = coerce_path(path)
64
62
  with path.open('r', encoding='utf-8') as handle:
65
63
  loaded = json.load(handle)
66
64
 
@@ -68,7 +66,7 @@ def read(
68
66
 
69
67
 
70
68
  def write(
71
- path: Path,
69
+ path: StrPath,
72
70
  data: JSONData,
73
71
  ) -> int:
74
72
  """
@@ -76,7 +74,7 @@ def write(
76
74
 
77
75
  Parameters
78
76
  ----------
79
- path : Path
77
+ path : StrPath
80
78
  Path to the JSON file on disk.
81
79
  data : JSONData
82
80
  Data to serialize as JSON.
@@ -86,7 +84,8 @@ def write(
86
84
  int
87
85
  The number of records written to the JSON file.
88
86
  """
89
- path.parent.mkdir(parents=True, exist_ok=True)
87
+ path = coerce_path(path)
88
+ ensure_parent_dir(path)
90
89
  with path.open('w', encoding='utf-8') as handle:
91
90
  json.dump(
92
91
  data,
etlplus/file/log.py CHANGED
@@ -18,11 +18,11 @@ Notes
18
18
 
19
19
  from __future__ import annotations
20
20
 
21
- from pathlib import Path
22
-
23
21
  from ..types import JSONData
24
22
  from ..types import JSONList
23
+ from ..types import StrPath
25
24
  from . import stub
25
+ from ._io import coerce_path
26
26
 
27
27
  # SECTION: EXPORTS ========================================================== #
28
28
 
@@ -38,14 +38,14 @@ __all__ = [
38
38
 
39
39
 
40
40
  def read(
41
- path: Path,
41
+ path: StrPath,
42
42
  ) -> JSONList:
43
43
  """
44
44
  Read LOG content from *path*.
45
45
 
46
46
  Parameters
47
47
  ----------
48
- path : Path
48
+ path : StrPath
49
49
  Path to the LOG file on disk.
50
50
 
51
51
  Returns
@@ -57,7 +57,7 @@ def read(
57
57
 
58
58
 
59
59
  def write(
60
- path: Path,
60
+ path: StrPath,
61
61
  data: JSONData,
62
62
  ) -> int:
63
63
  """
@@ -65,7 +65,7 @@ def write(
65
65
 
66
66
  Parameters
67
67
  ----------
68
- path : Path
68
+ path : StrPath
69
69
  Path to the LOG file on disk.
70
70
  data : JSONData
71
71
  Data to write as LOG. Should be a list of dictionaries or a
@@ -76,4 +76,5 @@ def write(
76
76
  int
77
77
  The number of rows written to the LOG file.
78
78
  """
79
+ path = coerce_path(path)
79
80
  return stub.write(path, data, format_name='LOG')
etlplus/file/mat.py CHANGED
@@ -18,11 +18,11 @@ Notes
18
18
 
19
19
  from __future__ import annotations
20
20
 
21
- from pathlib import Path
22
-
23
21
  from ..types import JSONData
24
22
  from ..types import JSONList
23
+ from ..types import StrPath
25
24
  from . import stub
25
+ from ._io import coerce_path
26
26
 
27
27
  # SECTION: EXPORTS ========================================================== #
28
28
 
@@ -38,14 +38,14 @@ __all__ = [
38
38
 
39
39
 
40
40
  def read(
41
- path: Path,
41
+ path: StrPath,
42
42
  ) -> JSONList:
43
43
  """
44
44
  Read MAT content from *path*.
45
45
 
46
46
  Parameters
47
47
  ----------
48
- path : Path
48
+ path : StrPath
49
49
  Path to the MAT file on disk.
50
50
 
51
51
  Returns
@@ -57,7 +57,7 @@ def read(
57
57
 
58
58
 
59
59
  def write(
60
- path: Path,
60
+ path: StrPath,
61
61
  data: JSONData,
62
62
  ) -> int:
63
63
  """
@@ -65,7 +65,7 @@ def write(
65
65
 
66
66
  Parameters
67
67
  ----------
68
- path : Path
68
+ path : StrPath
69
69
  Path to the MAT file on disk.
70
70
  data : JSONData
71
71
  Data to write as MAT file. Should be a list of dictionaries or a
@@ -76,4 +76,5 @@ def write(
76
76
  int
77
77
  The number of rows written to the MAT file.
78
78
  """
79
+ path = coerce_path(path)
79
80
  return stub.write(path, data, format_name='MAT')
etlplus/file/mdb.py CHANGED
@@ -19,11 +19,11 @@ Notes
19
19
 
20
20
  from __future__ import annotations
21
21
 
22
- from pathlib import Path
23
-
24
22
  from ..types import JSONData
25
23
  from ..types import JSONList
24
+ from ..types import StrPath
26
25
  from . import stub
26
+ from ._io import coerce_path
27
27
 
28
28
  # SECTION: EXPORTS ========================================================== #
29
29
 
@@ -39,14 +39,14 @@ __all__ = [
39
39
 
40
40
 
41
41
  def read(
42
- path: Path,
42
+ path: StrPath,
43
43
  ) -> JSONList:
44
44
  """
45
45
  Read CSV content from *path*.
46
46
 
47
47
  Parameters
48
48
  ----------
49
- path : Path
49
+ path : StrPath
50
50
  Path to the CSV file on disk.
51
51
 
52
52
  Returns
@@ -58,7 +58,7 @@ def read(
58
58
 
59
59
 
60
60
  def write(
61
- path: Path,
61
+ path: StrPath,
62
62
  data: JSONData,
63
63
  ) -> int:
64
64
  """
@@ -66,7 +66,7 @@ def write(
66
66
 
67
67
  Parameters
68
68
  ----------
69
- path : Path
69
+ path : StrPath
70
70
  Path to the CSV file on disk.
71
71
  data : JSONData
72
72
  Data to write as CSV. Should be a list of dictionaries or a
@@ -77,4 +77,5 @@ def write(
77
77
  int
78
78
  The number of rows written to the CSV file.
79
79
  """
80
+ path = coerce_path(path)
80
81
  return stub.write(path, data, format_name='DAT')