etlplus 0.11.11__py3-none-any.whl → 0.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/file/parquet.py CHANGED
@@ -1,21 +1,36 @@
1
1
  """
2
2
  :mod:`etlplus.file.parquet` module.
3
3
 
4
- Stub helpers for PARQUET read/write.
4
+ Helpers for reading/writing Parquet files.
5
5
  """
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
9
  from pathlib import Path
10
+ from typing import cast
10
11
 
11
12
  from ..types import JSONData
13
+ from ..types import JSONList
14
+ from ._io import normalize_records
15
+ from ._pandas import get_pandas
12
16
 
13
17
  # SECTION: EXPORTS ========================================================== #
14
18
 
15
19
 
16
- def read(path: Path) -> JSONData:
20
+ __all__ = [
21
+ 'read',
22
+ 'write',
23
+ ]
24
+
25
+
26
+ # SECTION: FUNCTIONS ======================================================== #
27
+
28
+
29
+ def read(
30
+ path: Path,
31
+ ) -> JSONList:
17
32
  """
18
- Read PARQUET content from ``path``.
33
+ Read Parquet content from ``path``.
19
34
 
20
35
  Parameters
21
36
  ----------
@@ -24,20 +39,32 @@ def read(path: Path) -> JSONData:
24
39
 
25
40
  Returns
26
41
  -------
27
- JSONData
28
- Parsed payload.
42
+ JSONList
43
+ The list of dictionaries read from the Parquet file.
29
44
 
30
45
  Raises
31
46
  ------
32
- NotImplementedError
33
- PARQUET :func:`read` is not implemented yet.
47
+ ImportError
48
+ If optional dependencies for Parquet support are missing.
34
49
  """
35
- raise NotImplementedError('PARQUET read is not implemented yet')
50
+ pandas = get_pandas('Parquet')
51
+ try:
52
+ frame = pandas.read_parquet(path)
53
+ except ImportError as e: # pragma: no cover
54
+ raise ImportError(
55
+ 'Parquet support requires optional dependency '
56
+ '"pyarrow" or "fastparquet".\n'
57
+ 'Install with: pip install pyarrow',
58
+ ) from e
59
+ return cast(JSONList, frame.to_dict(orient='records'))
36
60
 
37
61
 
38
- def write(path: Path, data: JSONData) -> int:
62
+ def write(
63
+ path: Path,
64
+ data: JSONData,
65
+ ) -> int:
39
66
  """
40
- Write ``data`` to PARQUET at ``path``.
67
+ Write ``data`` to Parquet at ``path`` and return record count.
41
68
 
42
69
  Parameters
43
70
  ----------
@@ -53,7 +80,22 @@ def write(path: Path, data: JSONData) -> int:
53
80
 
54
81
  Raises
55
82
  ------
56
- NotImplementedError
57
- PARQUET :func:`write` is not implemented yet.
83
+ ImportError
84
+ If optional dependencies for Parquet support are missing.
58
85
  """
59
- raise NotImplementedError('PARQUET write is not implemented yet')
86
+ records = normalize_records(data, 'Parquet')
87
+ if not records:
88
+ return 0
89
+
90
+ pandas = get_pandas('Parquet')
91
+ path.parent.mkdir(parents=True, exist_ok=True)
92
+ frame = pandas.DataFrame.from_records(records)
93
+ try:
94
+ frame.to_parquet(path, index=False)
95
+ except ImportError as e: # pragma: no cover
96
+ raise ImportError(
97
+ 'Parquet support requires optional dependency '
98
+ '"pyarrow" or "fastparquet".\n'
99
+ 'Install with: pip install pyarrow',
100
+ ) from e
101
+ return len(records)
etlplus/file/tsv.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.tsv` module.
3
3
 
4
- Stub helpers for TSV read/write.
4
+ Helpers for reading/writing TSV files.
5
5
  """
6
6
 
7
7
  from __future__ import annotations
@@ -9,11 +9,25 @@ from __future__ import annotations
9
9
  from pathlib import Path
10
10
 
11
11
  from ..types import JSONData
12
+ from ..types import JSONList
13
+ from ._io import read_delimited
14
+ from ._io import write_delimited
12
15
 
13
16
  # SECTION: EXPORTS ========================================================== #
14
17
 
15
18
 
16
- def read(path: Path) -> JSONData:
19
+ __all__ = [
20
+ 'read',
21
+ 'write',
22
+ ]
23
+
24
+
25
+ # SECTION: FUNCTIONS ======================================================== #
26
+
27
+
28
+ def read(
29
+ path: Path,
30
+ ) -> JSONList:
17
31
  """
18
32
  Read TSV content from ``path``.
19
33
 
@@ -24,36 +38,30 @@ def read(path: Path) -> JSONData:
24
38
 
25
39
  Returns
26
40
  -------
27
- JSONData
28
- Parsed payload.
29
-
30
- Raises
31
- ------
32
- NotImplementedError
33
- TSV :func:`read` is not implemented yet.
41
+ JSONList
42
+ The list of dictionaries read from the TSV file.
34
43
  """
35
- raise NotImplementedError('TSV read is not implemented yet')
44
+ return read_delimited(path, delimiter='\t')
36
45
 
37
46
 
38
- def write(path: Path, data: JSONData) -> int:
47
+ def write(
48
+ path: Path,
49
+ data: JSONData,
50
+ ) -> int:
39
51
  """
40
- Write ``data`` to TSV at ``path``.
52
+ Write ``data`` to TSV at ``path`` and return record count.
41
53
 
42
54
  Parameters
43
55
  ----------
44
56
  path : Path
45
57
  Path to the TSV file on disk.
46
58
  data : JSONData
47
- Data to write.
59
+ Data to write as TSV. Should be a list of dictionaries or a
60
+ single dictionary.
48
61
 
49
62
  Returns
50
63
  -------
51
64
  int
52
- Number of records written.
53
-
54
- Raises
55
- ------
56
- NotImplementedError
57
- TSV :func:`write` is not implemented yet.
65
+ The number of rows written to the TSV file.
58
66
  """
59
- raise NotImplementedError('TSV write is not implemented yet')
67
+ return write_delimited(path, data, delimiter='\t')
etlplus/file/txt.py CHANGED
@@ -1,19 +1,34 @@
1
1
  """
2
2
  :mod:`etlplus.file.txt` module.
3
3
 
4
- Stub helpers for TXT read/write.
4
+ Helpers for reading/writing text files.
5
5
  """
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
9
  from pathlib import Path
10
+ from typing import cast
10
11
 
11
12
  from ..types import JSONData
13
+ from ..types import JSONDict
14
+ from ..types import JSONList
15
+ from ..utils import count_records
12
16
 
13
17
  # SECTION: EXPORTS ========================================================== #
14
18
 
15
19
 
16
- def read(path: Path) -> JSONData:
20
+ __all__ = [
21
+ 'read',
22
+ 'write',
23
+ ]
24
+
25
+
26
+ # SECTION: FUNCTIONS ======================================================== #
27
+
28
+
29
+ def read(
30
+ path: Path,
31
+ ) -> JSONList:
17
32
  """
18
33
  Read TXT content from ``path``.
19
34
 
@@ -24,27 +39,32 @@ def read(path: Path) -> JSONData:
24
39
 
25
40
  Returns
26
41
  -------
27
- JSONData
28
- Parsed payload.
29
-
30
- Raises
31
- ------
32
- NotImplementedError
33
- TXT :func:`read` is not implemented yet.
42
+ JSONList
43
+ The list of dictionaries read from the TXT file.
34
44
  """
35
- raise NotImplementedError('TXT read is not implemented yet')
45
+ rows: JSONList = []
46
+ with path.open('r', encoding='utf-8') as handle:
47
+ for line in handle:
48
+ text = line.rstrip('\n')
49
+ if text == '':
50
+ continue
51
+ rows.append({'text': text})
52
+ return rows
36
53
 
37
54
 
38
- def write(path: Path, data: JSONData) -> int:
55
+ def write(
56
+ path: Path,
57
+ data: JSONData,
58
+ ) -> int:
39
59
  """
40
- Write ``data`` to TXT at ``path``.
60
+ Write ``data`` to TXT at ``path`` and return record count.
41
61
 
42
62
  Parameters
43
63
  ----------
44
64
  path : Path
45
65
  Path to the TXT file on disk.
46
66
  data : JSONData
47
- Data to write.
67
+ Data to write. Expects ``{'text': '...'} `` or a list of those.
48
68
 
49
69
  Returns
50
70
  -------
@@ -53,7 +73,27 @@ def write(path: Path, data: JSONData) -> int:
53
73
 
54
74
  Raises
55
75
  ------
56
- NotImplementedError
57
- TXT :func:`write` is not implemented yet.
76
+ TypeError
77
+ If any item in ``data`` is not a dictionary or if any dictionary
78
+ does not contain a ``'text'`` key.
58
79
  """
59
- raise NotImplementedError('TXT write is not implemented yet')
80
+ rows: JSONList
81
+ if isinstance(data, list):
82
+ if not all(isinstance(item, dict) for item in data):
83
+ raise TypeError('TXT payloads must contain only objects (dicts)')
84
+ rows = cast(JSONList, data)
85
+ else:
86
+ rows = [cast(JSONDict, data)]
87
+
88
+ if not rows:
89
+ return 0
90
+
91
+ path.parent.mkdir(parents=True, exist_ok=True)
92
+ with path.open('w', encoding='utf-8') as handle:
93
+ for row in rows:
94
+ if 'text' not in row:
95
+ raise TypeError('TXT payloads must include a "text" key')
96
+ handle.write(str(row['text']))
97
+ handle.write('\n')
98
+
99
+ return count_records(rows)
etlplus/file/xls.py CHANGED
@@ -1,19 +1,33 @@
1
1
  """
2
2
  :mod:`etlplus.file.xls` module.
3
3
 
4
- Stub helpers for XLS read/write.
4
+ Helpers for reading/writing Excel XLS files.
5
5
  """
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
9
  from pathlib import Path
10
+ from typing import cast
10
11
 
11
12
  from ..types import JSONData
13
+ from ..types import JSONList
14
+ from ._pandas import get_pandas
12
15
 
13
16
  # SECTION: EXPORTS ========================================================== #
14
17
 
15
18
 
16
- def read(path: Path) -> JSONData:
19
+ __all__ = [
20
+ 'read',
21
+ 'write',
22
+ ]
23
+
24
+
25
+ # SECTION: FUNCTIONS ======================================================== #
26
+
27
+
28
+ def read(
29
+ path: Path,
30
+ ) -> JSONList:
17
31
  """
18
32
  Read XLS content from ``path``.
19
33
 
@@ -24,20 +38,35 @@ def read(path: Path) -> JSONData:
24
38
 
25
39
  Returns
26
40
  -------
27
- JSONData
28
- Parsed payload.
41
+ JSONList
42
+ The list of dictionaries read from the XLS file.
29
43
 
30
44
  Raises
31
45
  ------
32
- NotImplementedError
33
- XLS :func:`read` is not implemented yet.
46
+ ImportError
47
+ If the optional dependency "xlrd" is not installed.
34
48
  """
35
- raise NotImplementedError('XLS read is not implemented yet')
49
+ pandas = get_pandas('XLS')
50
+ try:
51
+ frame = pandas.read_excel(path, engine='xlrd')
52
+ except ImportError as e: # pragma: no cover
53
+ raise ImportError(
54
+ 'XLS support requires optional dependency "xlrd".\n'
55
+ 'Install with: pip install xlrd',
56
+ ) from e
57
+ return cast(JSONList, frame.to_dict(orient='records'))
36
58
 
37
59
 
38
- def write(path: Path, data: JSONData) -> int:
60
+ def write(
61
+ path: Path,
62
+ data: JSONData,
63
+ ) -> int:
39
64
  """
40
- Write ``data`` to XLS at ``path``.
65
+ Write ``data`` to XLS at ``path`` and return record count.
66
+
67
+ Notes
68
+ -----
69
+ XLS writing is not supported by pandas 2.x. Use XLSX for writes.
41
70
 
42
71
  Parameters
43
72
  ----------
@@ -53,7 +82,7 @@ def write(path: Path, data: JSONData) -> int:
53
82
 
54
83
  Raises
55
84
  ------
56
- NotImplementedError
57
- XLS :func:`write` is not implemented yet.
85
+ RuntimeError
86
+ If XLS writing is attempted.
58
87
  """
59
- raise NotImplementedError('XLS write is not implemented yet')
88
+ raise RuntimeError('XLS write is not supported; use XLSX instead')
etlplus/file/xlsx.py CHANGED
@@ -1,19 +1,34 @@
1
1
  """
2
2
  :mod:`etlplus.file.xlsx` module.
3
3
 
4
- Stub helpers for XLSX read/write.
4
+ Helpers for reading/writing Excel XLSX files.
5
5
  """
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
9
  from pathlib import Path
10
+ from typing import cast
10
11
 
11
12
  from ..types import JSONData
13
+ from ..types import JSONList
14
+ from ._io import normalize_records
15
+ from ._pandas import get_pandas
12
16
 
13
17
  # SECTION: EXPORTS ========================================================== #
14
18
 
15
19
 
16
- def read(path: Path) -> JSONData:
20
+ __all__ = [
21
+ 'read',
22
+ 'write',
23
+ ]
24
+
25
+
26
+ # SECTION: FUNCTIONS ======================================================== #
27
+
28
+
29
+ def read(
30
+ path: Path,
31
+ ) -> JSONList:
17
32
  """
18
33
  Read XLSX content from ``path``.
19
34
 
@@ -24,20 +39,31 @@ def read(path: Path) -> JSONData:
24
39
 
25
40
  Returns
26
41
  -------
27
- JSONData
28
- Parsed payload.
42
+ JSONList
43
+ The list of dictionaries read from the XLSX file.
29
44
 
30
45
  Raises
31
46
  ------
32
- NotImplementedError
33
- XLSX :func:`read` is not implemented yet.
47
+ ImportError
48
+ If optional dependencies for XLSX support are missing.
34
49
  """
35
- raise NotImplementedError('XLSX read is not implemented yet')
50
+ pandas = get_pandas('XLSX')
51
+ try:
52
+ frame = pandas.read_excel(path)
53
+ except ImportError as e: # pragma: no cover
54
+ raise ImportError(
55
+ 'XLSX support requires optional dependency "openpyxl".\n'
56
+ 'Install with: pip install openpyxl',
57
+ ) from e
58
+ return cast(JSONList, frame.to_dict(orient='records'))
36
59
 
37
60
 
38
- def write(path: Path, data: JSONData) -> int:
61
+ def write(
62
+ path: Path,
63
+ data: JSONData,
64
+ ) -> int:
39
65
  """
40
- Write ``data`` to XLSX at ``path``.
66
+ Write ``data`` to XLSX at ``path`` and return record count.
41
67
 
42
68
  Parameters
43
69
  ----------
@@ -53,7 +79,21 @@ def write(path: Path, data: JSONData) -> int:
53
79
 
54
80
  Raises
55
81
  ------
56
- NotImplementedError
57
- XLSX :func:`write` is not implemented yet.
82
+ ImportError
83
+ If optional dependencies for XLSX support are missing.
58
84
  """
59
- raise NotImplementedError('XLSX write is not implemented yet')
85
+ records = normalize_records(data, 'XLSX')
86
+ if not records:
87
+ return 0
88
+
89
+ pandas = get_pandas('XLSX')
90
+ path.parent.mkdir(parents=True, exist_ok=True)
91
+ frame = pandas.DataFrame.from_records(records)
92
+ try:
93
+ frame.to_excel(path, index=False)
94
+ except ImportError as e: # pragma: no cover
95
+ raise ImportError(
96
+ 'XLSX support requires optional dependency "openpyxl".\n'
97
+ 'Install with: pip install openpyxl',
98
+ ) from e
99
+ return len(records)
etlplus/file/xml.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.xml` module.
3
3
 
4
- XML read/write helpers.
4
+ Helpers for reading/writing XML files.
5
5
  """
6
6
 
7
7
  from __future__ import annotations
@@ -14,6 +14,15 @@ from ..types import JSONData
14
14
  from ..types import JSONDict
15
15
  from ..utils import count_records
16
16
 
17
+ # SECTION: EXPORTS ========================================================== #
18
+
19
+
20
+ __all__ = [
21
+ 'read',
22
+ 'write',
23
+ ]
24
+
25
+
17
26
  # SECTION: CONSTANTS ======================================================== #
18
27
 
19
28
 
@@ -117,7 +126,7 @@ def read(
117
126
  path: Path,
118
127
  ) -> JSONDict:
119
128
  """
120
- Parse XML document at ``path`` into a nested dictionary.
129
+ Read XML content from ``path``.
121
130
 
122
131
  Parameters
123
132
  ----------
@@ -137,7 +146,7 @@ def read(
137
146
 
138
147
  def write(path: Path, data: JSONData, *, root_tag: str) -> int:
139
148
  """
140
- Write ``data`` as XML to ``path`` and return record count.
149
+ Write ``data`` to XML at ``path`` and return record count.
141
150
 
142
151
  Parameters
143
152
  ----------
etlplus/file/yaml.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.yaml` module.
3
3
 
4
- Optional YAML read/write helpers.
4
+ Helpers for reading/writing YAML files.
5
5
  """
6
6
 
7
7
  from __future__ import annotations
@@ -15,6 +15,15 @@ from ..types import JSONDict
15
15
  from ..types import JSONList
16
16
  from ..utils import count_records
17
17
 
18
+ # SECTION: EXPORTS ========================================================== #
19
+
20
+
21
+ __all__ = [
22
+ 'read',
23
+ 'write',
24
+ ]
25
+
26
+
18
27
  # SECTION: INTERNAL CONSTANTS =============================================== #
19
28
 
20
29
 
@@ -59,7 +68,9 @@ def read(
59
68
  path: Path,
60
69
  ) -> JSONData:
61
70
  """
62
- Load and validate YAML payloads from ``path``.
71
+ Read YAML content from ``path``.
72
+
73
+ Validates that the YAML root is a dict or a list of dicts.
63
74
 
64
75
  Parameters
65
76
  ----------