etlplus 0.12.2__py3-none-any.whl → 0.12.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/file/_io.py ADDED
@@ -0,0 +1,120 @@
1
+ """
2
+ :mod:`etlplus.file._io` module.
3
+
4
+ Shared helpers for record normalization and delimited text formats.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import csv
10
+ from pathlib import Path
11
+ from typing import cast
12
+
13
+ from ..types import JSONData
14
+ from ..types import JSONDict
15
+ from ..types import JSONList
16
+
17
+ # SECTION: FUNCTIONS ======================================================== #
18
+
19
+
20
+ def normalize_records(
21
+ data: JSONData,
22
+ format_name: str,
23
+ ) -> JSONList:
24
+ """
25
+ Normalize payloads into a list of dictionaries.
26
+
27
+ Parameters
28
+ ----------
29
+ data : JSONData
30
+ Input payload to normalize.
31
+ format_name : str
32
+ Human-readable format name for error messages.
33
+
34
+ Returns
35
+ -------
36
+ JSONList
37
+ Normalized list of dictionaries.
38
+
39
+ Raises
40
+ ------
41
+ TypeError
42
+ If a list payload contains non-dict items.
43
+ """
44
+ if isinstance(data, list):
45
+ if not all(isinstance(item, dict) for item in data):
46
+ raise TypeError(
47
+ f'{format_name} payloads must contain only objects (dicts)',
48
+ )
49
+ return cast(JSONList, data)
50
+ return [cast(JSONDict, data)]
51
+
52
+
53
+ def read_delimited(path: Path, *, delimiter: str) -> JSONList:
54
+ """
55
+ Read delimited content from ``path``.
56
+
57
+ Parameters
58
+ ----------
59
+ path : Path
60
+ Path to the delimited file on disk.
61
+ delimiter : str
62
+ Delimiter character for parsing.
63
+
64
+ Returns
65
+ -------
66
+ JSONList
67
+ The list of dictionaries read from the delimited file.
68
+ """
69
+ with path.open('r', encoding='utf-8', newline='') as handle:
70
+ reader: csv.DictReader[str] = csv.DictReader(
71
+ handle,
72
+ delimiter=delimiter,
73
+ )
74
+ rows: JSONList = []
75
+ for row in reader:
76
+ if not any(row.values()):
77
+ continue
78
+ rows.append(cast(JSONDict, dict(row)))
79
+ return rows
80
+
81
+
82
+ def write_delimited(path: Path, data: JSONData, *, delimiter: str) -> int:
83
+ """
84
+ Write ``data`` to a delimited file and return record count.
85
+
86
+ Parameters
87
+ ----------
88
+ path : Path
89
+ Path to the delimited file on disk.
90
+ data : JSONData
91
+ Data to write as delimited rows.
92
+ delimiter : str
93
+ Delimiter character for writing.
94
+
95
+ Returns
96
+ -------
97
+ int
98
+ The number of rows written.
99
+ """
100
+ rows: list[JSONDict]
101
+ if isinstance(data, list):
102
+ rows = [row for row in data if isinstance(row, dict)]
103
+ else:
104
+ rows = [data]
105
+
106
+ if not rows:
107
+ return 0
108
+
109
+ fieldnames = sorted({key for row in rows for key in row})
110
+ with path.open('w', encoding='utf-8', newline='') as handle:
111
+ writer = csv.DictWriter(
112
+ handle,
113
+ fieldnames=fieldnames,
114
+ delimiter=delimiter,
115
+ )
116
+ writer.writeheader()
117
+ for row in rows:
118
+ writer.writerow({field: row.get(field) for field in fieldnames})
119
+
120
+ return len(rows)
@@ -0,0 +1,58 @@
1
+ """
2
+ :mod:`etlplus.file._pandas` module.
3
+
4
+ Shared helpers for optional pandas usage.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any
10
+
11
+ # SECTION: EXPORTS ========================================================== #
12
+
13
+
14
+ __all__ = [
15
+ 'get_pandas',
16
+ ]
17
+
18
+ # SECTION: INTERNAL CONSTANTS =============================================== #
19
+
20
+
21
+ _PANDAS_CACHE: dict[str, Any] = {}
22
+
23
+
24
+ # SECTION: FUNCTIONS ======================================================== #
25
+
26
+
27
+ def get_pandas(format_name: str) -> Any:
28
+ """
29
+ Return the pandas module, importing it on first use.
30
+
31
+ Parameters
32
+ ----------
33
+ format_name : str
34
+ Human-readable format name for error messages.
35
+
36
+ Returns
37
+ -------
38
+ Any
39
+ The pandas module.
40
+
41
+ Raises
42
+ ------
43
+ ImportError
44
+ If the optional dependency is missing.
45
+ """
46
+ mod = _PANDAS_CACHE.get('mod')
47
+ if mod is not None: # pragma: no cover - tiny branch
48
+ return mod
49
+ try:
50
+ _pd = __import__('pandas') # type: ignore[assignment]
51
+ except ImportError as e: # pragma: no cover
52
+ raise ImportError(
53
+ f'{format_name} support requires optional dependency "pandas".\n'
54
+ 'Install with: pip install pandas',
55
+ ) from e
56
+ _PANDAS_CACHE['mod'] = _pd
57
+
58
+ return _pd
etlplus/file/avro.py CHANGED
@@ -13,6 +13,7 @@ from typing import cast
13
13
  from ..types import JSONData
14
14
  from ..types import JSONDict
15
15
  from ..types import JSONList
16
+ from ._io import normalize_records
16
17
 
17
18
  # SECTION: EXPORTS ========================================================== #
18
19
 
@@ -63,17 +64,37 @@ def _get_fastavro() -> Any:
63
64
  return _fastavro
64
65
 
65
66
 
66
- def _normalize_records(data: JSONData) -> JSONList:
67
+ def _infer_schema(records: JSONList) -> dict[str, Any]:
67
68
  """
68
- Normalize JSON payloads into a list of dictionaries.
69
+ Infer a basic Avro schema from record payloads.
69
70
 
70
- Raises TypeError when payloads contain non-dict items.
71
+ Only primitive field values are supported; complex values raise TypeError.
71
72
  """
72
- if isinstance(data, list):
73
- if not all(isinstance(item, dict) for item in data):
74
- raise TypeError('AVRO payloads must contain only objects (dicts)')
75
- return cast(JSONList, data)
76
- return [cast(JSONDict, data)]
73
+ field_names = sorted({key for record in records for key in record})
74
+ fields: list[dict[str, Any]] = []
75
+ for name in field_names:
76
+ types: list[str] = []
77
+ for record in records:
78
+ value = record.get(name)
79
+ if value is None:
80
+ types.append('null')
81
+ continue
82
+ if isinstance(value, dict | list):
83
+ raise TypeError(
84
+ 'AVRO payloads must contain only primitive values',
85
+ )
86
+ if not isinstance(value, _PRIMITIVE_TYPES):
87
+ raise TypeError(
88
+ 'AVRO payloads must contain only primitive values',
89
+ )
90
+ types.append(cast(str, _infer_value_type(value)))
91
+ fields.append({'name': name, 'type': _merge_types(types)})
92
+
93
+ return {
94
+ 'name': 'etlplus_record',
95
+ 'type': 'record',
96
+ 'fields': fields,
97
+ }
77
98
 
78
99
 
79
100
  def _infer_value_type(value: object) -> str | list[str]:
@@ -106,39 +127,6 @@ def _merge_types(types: list[str]) -> str | list[str]:
106
127
  return ordered
107
128
 
108
129
 
109
- def _infer_schema(records: JSONList) -> dict[str, Any]:
110
- """
111
- Infer a basic Avro schema from record payloads.
112
-
113
- Only primitive field values are supported; complex values raise TypeError.
114
- """
115
- field_names = sorted({key for record in records for key in record})
116
- fields: list[dict[str, Any]] = []
117
- for name in field_names:
118
- types: list[str] = []
119
- for record in records:
120
- value = record.get(name)
121
- if value is None:
122
- types.append('null')
123
- continue
124
- if isinstance(value, dict | list):
125
- raise TypeError(
126
- 'AVRO payloads must contain only primitive values',
127
- )
128
- if not isinstance(value, _PRIMITIVE_TYPES):
129
- raise TypeError(
130
- 'AVRO payloads must contain only primitive values',
131
- )
132
- types.append(cast(str, _infer_value_type(value)))
133
- fields.append({'name': name, 'type': _merge_types(types)})
134
-
135
- return {
136
- 'name': 'etlplus_record',
137
- 'type': 'record',
138
- 'fields': fields,
139
- }
140
-
141
-
142
130
  # SECTION: FUNCTIONS ======================================================== #
143
131
 
144
132
 
@@ -183,7 +171,7 @@ def write(
183
171
  int
184
172
  Number of records written.
185
173
  """
186
- records = _normalize_records(data)
174
+ records = normalize_records(data, 'AVRO')
187
175
  if not records:
188
176
  return 0
189
177
 
etlplus/file/csv.py CHANGED
@@ -6,13 +6,12 @@ Helpers for reading/writing CSV files.
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
- import csv
10
9
  from pathlib import Path
11
- from typing import cast
12
10
 
13
11
  from ..types import JSONData
14
- from ..types import JSONDict
15
12
  from ..types import JSONList
13
+ from ._io import read_delimited
14
+ from ._io import write_delimited
16
15
 
17
16
  # SECTION: EXPORTS ========================================================== #
18
17
 
@@ -42,14 +41,7 @@ def read(
42
41
  JSONList
43
42
  The list of dictionaries read from the CSV file.
44
43
  """
45
- with path.open('r', encoding='utf-8', newline='') as handle:
46
- reader: csv.DictReader[str] = csv.DictReader(handle)
47
- rows: JSONList = []
48
- for row in reader:
49
- if not any(row.values()):
50
- continue
51
- rows.append(cast(JSONDict, dict(row)))
52
- return rows
44
+ return read_delimited(path, delimiter=',')
53
45
 
54
46
 
55
47
  def write(
@@ -72,20 +64,4 @@ def write(
72
64
  int
73
65
  The number of rows written to the CSV file.
74
66
  """
75
- rows: list[JSONDict]
76
- if isinstance(data, list):
77
- rows = [row for row in data if isinstance(row, dict)]
78
- else:
79
- rows = [data]
80
-
81
- if not rows:
82
- return 0
83
-
84
- fieldnames = sorted({key for row in rows for key in row})
85
- with path.open('w', encoding='utf-8', newline='') as handle:
86
- writer = csv.DictWriter(handle, fieldnames=fieldnames)
87
- writer.writeheader()
88
- for row in rows:
89
- writer.writerow({field: row.get(field) for field in fieldnames})
90
-
91
- return len(rows)
67
+ return write_delimited(path, data, delimiter=',')
etlplus/file/feather.py CHANGED
@@ -7,12 +7,12 @@ Helpers for reading/writing Feather files.
7
7
  from __future__ import annotations
8
8
 
9
9
  from pathlib import Path
10
- from typing import Any
11
10
  from typing import cast
12
11
 
13
12
  from ..types import JSONData
14
- from ..types import JSONDict
15
13
  from ..types import JSONList
14
+ from ._io import normalize_records
15
+ from ._pandas import get_pandas
16
16
 
17
17
  # SECTION: EXPORTS ========================================================== #
18
18
 
@@ -23,51 +23,6 @@ __all__ = [
23
23
  ]
24
24
 
25
25
 
26
- # SECTION: INTERNAL CONSTANTS =============================================== #
27
-
28
-
29
- _PANDAS_CACHE: dict[str, Any] = {}
30
-
31
-
32
- # SECTION: INTERNAL FUNCTIONS =============================================== #
33
-
34
-
35
- def _get_pandas() -> Any:
36
- """
37
- Return the pandas module, importing it on first use.
38
-
39
- Raises an informative ImportError if the optional dependency is missing.
40
- """
41
- mod = _PANDAS_CACHE.get('mod')
42
- if mod is not None: # pragma: no cover - tiny branch
43
- return mod
44
- try:
45
- _pd = __import__('pandas') # type: ignore[assignment]
46
- except ImportError as e: # pragma: no cover
47
- raise ImportError(
48
- 'Feather support requires optional dependency "pandas".\n'
49
- 'Install with: pip install pandas',
50
- ) from e
51
- _PANDAS_CACHE['mod'] = _pd
52
-
53
- return _pd
54
-
55
-
56
- def _normalize_records(data: JSONData) -> JSONList:
57
- """
58
- Normalize JSON payloads into a list of dictionaries.
59
-
60
- Raises TypeError when payloads contain non-dict items.
61
- """
62
- if isinstance(data, list):
63
- if not all(isinstance(item, dict) for item in data):
64
- raise TypeError(
65
- 'Feather payloads must contain only objects (dicts)',
66
- )
67
- return cast(JSONList, data)
68
- return [cast(JSONDict, data)]
69
-
70
-
71
26
  # SECTION: FUNCTIONS ======================================================== #
72
27
 
73
28
 
@@ -92,7 +47,7 @@ def read(
92
47
  ImportError
93
48
  When optional dependency "pyarrow" is missing.
94
49
  """
95
- pandas = _get_pandas()
50
+ pandas = get_pandas('Feather')
96
51
  try:
97
52
  frame = pandas.read_feather(path)
98
53
  except ImportError as e: # pragma: no cover
@@ -127,11 +82,11 @@ def write(
127
82
  ImportError
128
83
  When optional dependency "pyarrow" is missing.
129
84
  """
130
- records = _normalize_records(data)
85
+ records = normalize_records(data, 'Feather')
131
86
  if not records:
132
87
  return 0
133
88
 
134
- pandas = _get_pandas()
89
+ pandas = get_pandas('Feather')
135
90
  path.parent.mkdir(parents=True, exist_ok=True)
136
91
  frame = pandas.DataFrame.from_records(records)
137
92
  try:
etlplus/file/orc.py CHANGED
@@ -7,12 +7,12 @@ Helpers for reading/writing ORC files.
7
7
  from __future__ import annotations
8
8
 
9
9
  from pathlib import Path
10
- from typing import Any
11
10
  from typing import cast
12
11
 
13
12
  from ..types import JSONData
14
- from ..types import JSONDict
15
13
  from ..types import JSONList
14
+ from ._io import normalize_records
15
+ from ._pandas import get_pandas
16
16
 
17
17
  # SECTION: EXPORTS ========================================================== #
18
18
 
@@ -23,49 +23,6 @@ __all__ = [
23
23
  ]
24
24
 
25
25
 
26
- # SECTION: INTERNAL CONSTANTS =============================================== #
27
-
28
-
29
- _PANDAS_CACHE: dict[str, Any] = {}
30
-
31
-
32
- # SECTION: INTERNAL FUNCTIONS =============================================== #
33
-
34
-
35
- def _get_pandas() -> Any:
36
- """
37
- Return the pandas module, importing it on first use.
38
-
39
- Raises an informative ImportError if the optional dependency is missing.
40
- """
41
- mod = _PANDAS_CACHE.get('mod')
42
- if mod is not None: # pragma: no cover - tiny branch
43
- return mod
44
- try:
45
- _pd = __import__('pandas') # type: ignore[assignment]
46
- except ImportError as e: # pragma: no cover
47
- raise ImportError(
48
- 'ORC support requires optional dependency "pandas".\n'
49
- 'Install with: pip install pandas',
50
- ) from e
51
- _PANDAS_CACHE['mod'] = _pd
52
-
53
- return _pd
54
-
55
-
56
- def _normalize_records(data: JSONData) -> JSONList:
57
- """
58
- Normalize JSON payloads into a list of dictionaries.
59
-
60
- Raises TypeError when payloads contain non-dict items.
61
- """
62
- if isinstance(data, list):
63
- if not all(isinstance(item, dict) for item in data):
64
- raise TypeError('ORC payloads must contain only objects (dicts)')
65
- return cast(JSONList, data)
66
- return [cast(JSONDict, data)]
67
-
68
-
69
26
  # SECTION: FUNCTIONS ======================================================== #
70
27
 
71
28
 
@@ -90,7 +47,7 @@ def read(
90
47
  ImportError
91
48
  When optional dependency "pyarrow" is missing.
92
49
  """
93
- pandas = _get_pandas()
50
+ pandas = get_pandas('ORC')
94
51
  try:
95
52
  frame = pandas.read_orc(path)
96
53
  except ImportError as e: # pragma: no cover
@@ -125,11 +82,11 @@ def write(
125
82
  ImportError
126
83
  When optional dependency "pyarrow" is missing.
127
84
  """
128
- records = _normalize_records(data)
85
+ records = normalize_records(data, 'ORC')
129
86
  if not records:
130
87
  return 0
131
88
 
132
- pandas = _get_pandas()
89
+ pandas = get_pandas('ORC')
133
90
  path.parent.mkdir(parents=True, exist_ok=True)
134
91
  frame = pandas.DataFrame.from_records(records)
135
92
  try:
etlplus/file/parquet.py CHANGED
@@ -7,12 +7,12 @@ Helpers for reading/writing Parquet files.
7
7
  from __future__ import annotations
8
8
 
9
9
  from pathlib import Path
10
- from typing import Any
11
10
  from typing import cast
12
11
 
13
12
  from ..types import JSONData
14
- from ..types import JSONDict
15
13
  from ..types import JSONList
14
+ from ._io import normalize_records
15
+ from ._pandas import get_pandas
16
16
 
17
17
  # SECTION: EXPORTS ========================================================== #
18
18
 
@@ -23,51 +23,6 @@ __all__ = [
23
23
  ]
24
24
 
25
25
 
26
- # SECTION: INTERNAL CONSTANTS =============================================== #
27
-
28
-
29
- _PANDAS_CACHE: dict[str, Any] = {}
30
-
31
-
32
- # SECTION: INTERNAL FUNCTIONS =============================================== #
33
-
34
-
35
- def _get_pandas() -> Any:
36
- """
37
- Return the pandas module, importing it on first use.
38
-
39
- Raises an informative ImportError if the optional dependency is missing.
40
- """
41
- mod = _PANDAS_CACHE.get('mod')
42
- if mod is not None: # pragma: no cover - tiny branch
43
- return mod
44
- try:
45
- _pd = __import__('pandas') # type: ignore[assignment]
46
- except ImportError as e: # pragma: no cover
47
- raise ImportError(
48
- 'Parquet support requires optional dependency "pandas".\n'
49
- 'Install with: pip install pandas',
50
- ) from e
51
- _PANDAS_CACHE['mod'] = _pd
52
-
53
- return _pd
54
-
55
-
56
- def _normalize_records(data: JSONData) -> JSONList:
57
- """
58
- Normalize JSON payloads into a list of dictionaries.
59
-
60
- Raises TypeError when payloads contain non-dict items.
61
- """
62
- if isinstance(data, list):
63
- if not all(isinstance(item, dict) for item in data):
64
- raise TypeError(
65
- 'Parquet payloads must contain only objects (dicts)',
66
- )
67
- return cast(JSONList, data)
68
- return [cast(JSONDict, data)]
69
-
70
-
71
26
  # SECTION: FUNCTIONS ======================================================== #
72
27
 
73
28
 
@@ -92,7 +47,7 @@ def read(
92
47
  ImportError
93
48
  If optional dependencies for Parquet support are missing.
94
49
  """
95
- pandas = _get_pandas()
50
+ pandas = get_pandas('Parquet')
96
51
  try:
97
52
  frame = pandas.read_parquet(path)
98
53
  except ImportError as e: # pragma: no cover
@@ -128,11 +83,11 @@ def write(
128
83
  ImportError
129
84
  If optional dependencies for Parquet support are missing.
130
85
  """
131
- records = _normalize_records(data)
86
+ records = normalize_records(data, 'Parquet')
132
87
  if not records:
133
88
  return 0
134
89
 
135
- pandas = _get_pandas()
90
+ pandas = get_pandas('Parquet')
136
91
  path.parent.mkdir(parents=True, exist_ok=True)
137
92
  frame = pandas.DataFrame.from_records(records)
138
93
  try:
etlplus/file/tsv.py CHANGED
@@ -6,13 +6,12 @@ Helpers for reading/writing TSV files.
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
- import csv
10
9
  from pathlib import Path
11
- from typing import cast
12
10
 
13
11
  from ..types import JSONData
14
- from ..types import JSONDict
15
12
  from ..types import JSONList
13
+ from ._io import read_delimited
14
+ from ._io import write_delimited
16
15
 
17
16
  # SECTION: EXPORTS ========================================================== #
18
17
 
@@ -42,14 +41,7 @@ def read(
42
41
  JSONList
43
42
  The list of dictionaries read from the TSV file.
44
43
  """
45
- with path.open('r', encoding='utf-8', newline='') as handle:
46
- reader: csv.DictReader[str] = csv.DictReader(handle, delimiter='\t')
47
- rows: JSONList = []
48
- for row in reader:
49
- if not any(row.values()):
50
- continue
51
- rows.append(cast(JSONDict, dict(row)))
52
- return rows
44
+ return read_delimited(path, delimiter='\t')
53
45
 
54
46
 
55
47
  def write(
@@ -72,20 +64,4 @@ def write(
72
64
  int
73
65
  The number of rows written to the TSV file.
74
66
  """
75
- rows: list[JSONDict]
76
- if isinstance(data, list):
77
- rows = [row for row in data if isinstance(row, dict)]
78
- else:
79
- rows = [data]
80
-
81
- if not rows:
82
- return 0
83
-
84
- fieldnames = sorted({key for row in rows for key in row})
85
- with path.open('w', encoding='utf-8', newline='') as handle:
86
- writer = csv.DictWriter(handle, fieldnames=fieldnames, delimiter='\t')
87
- writer.writeheader()
88
- for row in rows:
89
- writer.writerow({field: row.get(field) for field in fieldnames})
90
-
91
- return len(rows)
67
+ return write_delimited(path, data, delimiter='\t')
etlplus/file/xls.py CHANGED
@@ -7,12 +7,11 @@ Helpers for reading/writing Excel XLS files.
7
7
  from __future__ import annotations
8
8
 
9
9
  from pathlib import Path
10
- from typing import Any
11
10
  from typing import cast
12
11
 
13
12
  from ..types import JSONData
14
- from ..types import JSONDict
15
13
  from ..types import JSONList
14
+ from ._pandas import get_pandas
16
15
 
17
16
  # SECTION: EXPORTS ========================================================== #
18
17
 
@@ -23,49 +22,6 @@ __all__ = [
23
22
  ]
24
23
 
25
24
 
26
- # SECTION: INTERNAL CONSTANTS =============================================== #
27
-
28
-
29
- _PANDAS_CACHE: dict[str, Any] = {}
30
-
31
-
32
- # SECTION: INTERNAL FUNCTIONS =============================================== #
33
-
34
-
35
- def _get_pandas() -> Any:
36
- """
37
- Return the pandas module, importing it on first use.
38
-
39
- Raises an informative ImportError if the optional dependency is missing.
40
- """
41
- mod = _PANDAS_CACHE.get('mod')
42
- if mod is not None: # pragma: no cover - tiny branch
43
- return mod
44
- try:
45
- _pd = __import__('pandas') # type: ignore[assignment]
46
- except ImportError as e: # pragma: no cover
47
- raise ImportError(
48
- 'XLS support requires optional dependency "pandas".\n'
49
- 'Install with: pip install pandas',
50
- ) from e
51
- _PANDAS_CACHE['mod'] = _pd
52
-
53
- return _pd
54
-
55
-
56
- def _normalize_records(data: JSONData) -> JSONList:
57
- """
58
- Normalize JSON payloads into a list of dictionaries.
59
-
60
- Raises TypeError when payloads contain non-dict items.
61
- """
62
- if isinstance(data, list):
63
- if not all(isinstance(item, dict) for item in data):
64
- raise TypeError('XLS payloads must contain only objects (dicts)')
65
- return cast(JSONList, data)
66
- return [cast(JSONDict, data)]
67
-
68
-
69
25
  # SECTION: FUNCTIONS ======================================================== #
70
26
 
71
27
 
@@ -90,7 +46,7 @@ def read(
90
46
  ImportError
91
47
  If the optional dependency "xlrd" is not installed.
92
48
  """
93
- pandas = _get_pandas()
49
+ pandas = get_pandas('XLS')
94
50
  try:
95
51
  frame = pandas.read_excel(path, engine='xlrd')
96
52
  except ImportError as e: # pragma: no cover
@@ -126,7 +82,7 @@ def write(
126
82
 
127
83
  Raises
128
84
  ------
129
- ImportError
130
- If the optional dependency "xlwt" is not installed.
85
+ RuntimeError
86
+ If XLS writing is attempted.
131
87
  """
132
88
  raise RuntimeError('XLS write is not supported; use XLSX instead')
etlplus/file/xlsx.py CHANGED
@@ -7,12 +7,12 @@ Helpers for reading/writing Excel XLSX files.
7
7
  from __future__ import annotations
8
8
 
9
9
  from pathlib import Path
10
- from typing import Any
11
10
  from typing import cast
12
11
 
13
12
  from ..types import JSONData
14
- from ..types import JSONDict
15
13
  from ..types import JSONList
14
+ from ._io import normalize_records
15
+ from ._pandas import get_pandas
16
16
 
17
17
  # SECTION: EXPORTS ========================================================== #
18
18
 
@@ -23,49 +23,6 @@ __all__ = [
23
23
  ]
24
24
 
25
25
 
26
- # SECTION: INTERNAL CONSTANTS =============================================== #
27
-
28
-
29
- _PANDAS_CACHE: dict[str, Any] = {}
30
-
31
-
32
- # SECTION: INTERNAL FUNCTIONS =============================================== #
33
-
34
-
35
- def _get_pandas() -> Any:
36
- """
37
- Return the pandas module, importing it on first use.
38
-
39
- Raises an informative ImportError if the optional dependency is missing.
40
- """
41
- mod = _PANDAS_CACHE.get('mod')
42
- if mod is not None: # pragma: no cover - tiny branch
43
- return mod
44
- try:
45
- _pd = __import__('pandas') # type: ignore[assignment]
46
- except ImportError as e: # pragma: no cover
47
- raise ImportError(
48
- 'XLSX support requires optional dependency "pandas".\n'
49
- 'Install with: pip install pandas',
50
- ) from e
51
- _PANDAS_CACHE['mod'] = _pd
52
-
53
- return _pd
54
-
55
-
56
- def _normalize_records(data: JSONData) -> JSONList:
57
- """
58
- Normalize JSON payloads into a list of dictionaries.
59
-
60
- Raises TypeError when payloads contain non-dict items.
61
- """
62
- if isinstance(data, list):
63
- if not all(isinstance(item, dict) for item in data):
64
- raise TypeError('XLSX payloads must contain only objects (dicts)')
65
- return cast(JSONList, data)
66
- return [cast(JSONDict, data)]
67
-
68
-
69
26
  # SECTION: FUNCTIONS ======================================================== #
70
27
 
71
28
 
@@ -90,7 +47,7 @@ def read(
90
47
  ImportError
91
48
  If optional dependencies for XLSX support are missing.
92
49
  """
93
- pandas = _get_pandas()
50
+ pandas = get_pandas('XLSX')
94
51
  try:
95
52
  frame = pandas.read_excel(path)
96
53
  except ImportError as e: # pragma: no cover
@@ -125,11 +82,11 @@ def write(
125
82
  ImportError
126
83
  If optional dependencies for XLSX support are missing.
127
84
  """
128
- records = _normalize_records(data)
85
+ records = normalize_records(data, 'XLSX')
129
86
  if not records:
130
87
  return 0
131
88
 
132
- pandas = _get_pandas()
89
+ pandas = get_pandas('XLSX')
133
90
  path.parent.mkdir(parents=True, exist_ok=True)
134
91
  frame = pandas.DataFrame.from_records(records)
135
92
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: etlplus
3
- Version: 0.12.2
3
+ Version: 0.12.4
4
4
  Summary: A Swiss Army knife for simple ETL operations
5
5
  Home-page: https://github.com/Dagitali/ETLPlus
6
6
  Author: ETLPlus Team
@@ -57,20 +57,22 @@ etlplus/database/schema.py,sha256=813C0Dd3WE53KTYot4dgjAxctgKXLXx-8_Rk_4r2e28,70
57
57
  etlplus/database/types.py,sha256=_pkQyC14TzAlgyeIqZG4F5LWYknZbHw3TW68Auk7Ya0,795
58
58
  etlplus/file/README.md,sha256=avWnyeKfs3uP3qa-DVBJ6t05jS2oFUPeQ3xf1Ph0eC0,3626
59
59
  etlplus/file/__init__.py,sha256=X03bosSM-uSd6dh3ur0un6_ozFRw2Tm4PE6kVUjtXK8,475
60
- etlplus/file/avro.py,sha256=CsHJ8spxfGPgGwGsOLBS5_-sEvXAF23bTwEvFo46X9E,5007
60
+ etlplus/file/_io.py,sha256=kSbe4Bc9J8br7g856IzBvmKIWSSlng8vo66XN9Z2aiw,2917
61
+ etlplus/file/_pandas.py,sha256=6ZqU7QzEMBq7OFl3mfEtotnKunpS3XV_GGRgz7SIHsI,1282
62
+ etlplus/file/avro.py,sha256=JHK95zrwuHHICRe8f20xfKmeWzv1wP0Br5pOnINdLSc,4621
61
63
  etlplus/file/core.py,sha256=BkCliUez8SBEgpagxSeDbJixnX9QvD5XQp0dbYOOw0k,8692
62
- etlplus/file/csv.py,sha256=ZH4M98F_Xm97YC5EYADmAMddshyh0gZFDOjiBmU9oC4,2007
64
+ etlplus/file/csv.py,sha256=gtEUWJO54veEtgaLB_QnmR8yOpeToq78nrtAPVTTl44,1269
63
65
  etlplus/file/enums.py,sha256=rwrbwj6PejG0c5v6jzcsmeNu9cSqDyWB1foIuM5UyJo,6648
64
- etlplus/file/feather.py,sha256=zCjrnc34v6SdgZJv7AaKbrhl0rAIkmjRwiLIB-QIWlw,3426
66
+ etlplus/file/feather.py,sha256=WYZBn2f_Z7KDZZJ1eX0RS-934MnYIMydD0p2Oo30do4,2182
65
67
  etlplus/file/gz.py,sha256=NKsvIV7TIWn8USbvuZmRH9hr6OrXh4TzTfDykHD41Kk,2631
66
68
  etlplus/file/json.py,sha256=_KAXb4rZ1C8xnaV10IkihuFh1lhbWvajFOlMrBCNVjQ,2099
67
69
  etlplus/file/ndjson.py,sha256=gT-kgcqCUUSxtm2j-JMejoh65jk-njMvFwxKCquLZw0,2393
68
- etlplus/file/orc.py,sha256=LRQocbfXqvAldegXXj2Q8jfNmqbZIZWaHMJOQtqx3vI,3356
69
- etlplus/file/parquet.py,sha256=7oHwAxaNMRTXBXeVgn-h8yvkDnlT1g1SG_OmPdbM4Fo,3525
70
- etlplus/file/tsv.py,sha256=WAZpyGCzjqVJpBURgB2QXbC8AKpJrEq7InDQitKFmh8,2039
70
+ etlplus/file/orc.py,sha256=GUrq9rgXCLBJ0i8Jd0Xsl4DzldDBg0FDxYhytb4OgxQ,2139
71
+ etlplus/file/parquet.py,sha256=Tp2bi_PAIUdkzc25nArJp7beuUaudw5NdciV6IFHsdQ,2281
72
+ etlplus/file/tsv.py,sha256=NiqF84Ck8e_DinaiO8yKRR6fVUTnUhpThzo4E1QUD8k,1271
71
73
  etlplus/file/txt.py,sha256=BStC7crpkGT4qddEeAD1_1mi_2-vQSXLj2DI-ddPFQE,2206
72
- etlplus/file/xls.py,sha256=9LyjIdoj-pRpro7x83ogT58BjKJerynZmBxWMndVX2s,3055
73
- etlplus/file/xlsx.py,sha256=2xvEVY9i10xVm_6y_nxct8LXU7oYo_ShgTXbV2LGmdM,3397
74
+ etlplus/file/xls.py,sha256=83BbBJGxHAdbKH8Imz1l4mOgQT34uo-tyujp2WONRY4,1771
75
+ etlplus/file/xlsx.py,sha256=mBKc3dSci9tk4KjQX3CaODwG1ueGtFAfztNUOaWYQAE,2181
74
76
  etlplus/file/xml.py,sha256=rYtCPvyLn9djClN2xKeqRCPsMXnvCH4R8zj94NJRdQc,4018
75
77
  etlplus/file/yaml.py,sha256=pWJf0rWyiRpOVOBAwVOosPsdIzuywZ_Cv8_tXLZ6RFw,3183
76
78
  etlplus/file/zip.py,sha256=nd26V3S0edklriKnKOGDTLlO8RBXTda_zLLEQrJgKL4,4185
@@ -81,9 +83,9 @@ etlplus/templates/view.sql.j2,sha256=Iy8DHfhq5yyvrUKDxqp_aHIEXY4Tm6j4wT7YDEFWAhk
81
83
  etlplus/validation/README.md,sha256=qusyiyJu2DsaK80jlwfXVZ0iDgeuTPOX2EL3a_fcFiw,1401
82
84
  etlplus/validation/__init__.py,sha256=Pe5Xg1_EA4uiNZGYu5WTF3j7odjmyxnAJ8rcioaplSQ,1254
83
85
  etlplus/validation/utils.py,sha256=Mtqg449VIke0ziy_wd2r6yrwJzQkA1iulZC87FzXMjo,10201
84
- etlplus-0.12.2.dist-info/licenses/LICENSE,sha256=MuNO63i6kWmgnV2pbP2SLqP54mk1BGmu7CmbtxMmT-U,1069
85
- etlplus-0.12.2.dist-info/METADATA,sha256=y2wnRX4-3zOF5lWbXfsH3W6dmaaH0iQz9Dovws7wkEc,22878
86
- etlplus-0.12.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
87
- etlplus-0.12.2.dist-info/entry_points.txt,sha256=6w-2-jzuPa55spzK34h-UKh2JTEShh38adFRONNP9QE,45
88
- etlplus-0.12.2.dist-info/top_level.txt,sha256=aWWF-udn_sLGuHTM6W6MLh99ArS9ROkUWO8Mi8y1_2U,8
89
- etlplus-0.12.2.dist-info/RECORD,,
86
+ etlplus-0.12.4.dist-info/licenses/LICENSE,sha256=MuNO63i6kWmgnV2pbP2SLqP54mk1BGmu7CmbtxMmT-U,1069
87
+ etlplus-0.12.4.dist-info/METADATA,sha256=FS-Se52lzyRJ2yAlzaIjAeyJ9GBJIN36nRr-wZRCLtM,22878
88
+ etlplus-0.12.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
89
+ etlplus-0.12.4.dist-info/entry_points.txt,sha256=6w-2-jzuPa55spzK34h-UKh2JTEShh38adFRONNP9QE,45
90
+ etlplus-0.12.4.dist-info/top_level.txt,sha256=aWWF-udn_sLGuHTM6W6MLh99ArS9ROkUWO8Mi8y1_2U,8
91
+ etlplus-0.12.4.dist-info/RECORD,,