etlplus 0.10.4__py3-none-any.whl → 0.12.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. etlplus/README.md +37 -0
  2. etlplus/api/README.md +20 -3
  3. etlplus/cli/README.md +40 -0
  4. etlplus/cli/commands.py +1 -1
  5. etlplus/cli/constants.py +1 -1
  6. etlplus/cli/handlers.py +1 -1
  7. etlplus/cli/io.py +2 -2
  8. etlplus/config/README.md +52 -0
  9. etlplus/config/pipeline.py +2 -2
  10. etlplus/database/README.md +48 -0
  11. etlplus/database/ddl.py +1 -1
  12. etlplus/database/engine.py +1 -1
  13. etlplus/database/schema.py +1 -1
  14. etlplus/enums.py +2 -270
  15. etlplus/extract.py +5 -7
  16. etlplus/file/README.md +105 -0
  17. etlplus/file/__init__.py +25 -0
  18. etlplus/file/avro.py +198 -0
  19. etlplus/file/core.py +287 -0
  20. etlplus/file/csv.py +91 -0
  21. etlplus/file/enums.py +238 -0
  22. etlplus/file/feather.py +144 -0
  23. etlplus/file/gz.py +123 -0
  24. etlplus/file/json.py +98 -0
  25. etlplus/file/ndjson.py +109 -0
  26. etlplus/file/orc.py +142 -0
  27. etlplus/file/parquet.py +146 -0
  28. etlplus/file/tsv.py +91 -0
  29. etlplus/file/txt.py +99 -0
  30. etlplus/file/xls.py +132 -0
  31. etlplus/file/xlsx.py +142 -0
  32. etlplus/file/xml.py +174 -0
  33. etlplus/file/yaml.py +136 -0
  34. etlplus/file/zip.py +175 -0
  35. etlplus/load.py +9 -12
  36. etlplus/run.py +6 -9
  37. etlplus/templates/README.md +46 -0
  38. etlplus/validation/README.md +50 -0
  39. {etlplus-0.10.4.dist-info → etlplus-0.12.2.dist-info}/METADATA +58 -14
  40. {etlplus-0.10.4.dist-info → etlplus-0.12.2.dist-info}/RECORD +44 -20
  41. etlplus/file.py +0 -652
  42. {etlplus-0.10.4.dist-info → etlplus-0.12.2.dist-info}/WHEEL +0 -0
  43. {etlplus-0.10.4.dist-info → etlplus-0.12.2.dist-info}/entry_points.txt +0 -0
  44. {etlplus-0.10.4.dist-info → etlplus-0.12.2.dist-info}/licenses/LICENSE +0 -0
  45. {etlplus-0.10.4.dist-info → etlplus-0.12.2.dist-info}/top_level.txt +0 -0
etlplus/file/README.md ADDED
@@ -0,0 +1,105 @@
1
+ # etlplus.file subpackage
2
+
3
+ Documentation for the `etlplus.file` subpackage: unified file format support and helpers for reading
4
+ and writing data files.
5
+
6
+ - Provides a consistent interface for reading and writing files in various formats
7
+ - Supports all formats defined in `FileFormat` (see below)
8
+ - Includes helpers for inferring file format and compression from filenames, extensions, or MIME
9
+ types
10
+ - Exposes a `File` class with instance methods for reading and writing data
11
+
12
+ Back to project overview: see the top-level [README](../../README.md).
13
+
14
+ - [etlplus.file subpackage](#etlplusfile-subpackage)
15
+ - [Supported File Formats](#supported-file-formats)
16
+ - [Inferring File Format and Compression](#inferring-file-format-and-compression)
17
+ - [Reading and Writing Files](#reading-and-writing-files)
18
+ - [Reading a File](#reading-a-file)
19
+ - [Writing a File](#writing-a-file)
20
+ - [File Instance Methods](#file-instance-methods)
21
+ - [Example: Reading and Writing](#example-reading-and-writing)
22
+ - [See Also](#see-also)
23
+
24
+ ## Supported File Formats
25
+
26
+ The following formats are defined in `FileFormat` and supported for reading and writing:
27
+
28
+ | Format | Description |
29
+ |-----------|---------------------------------------------|
30
+ | avro | Apache Avro binary serialization |
31
+ | csv | Comma-separated values text files |
32
+ | feather | Apache Arrow Feather columnar format |
33
+ | gz | Gzip-compressed files (see Compression) |
34
+ | json | Standard JSON files |
35
+ | ndjson | Newline-delimited JSON (JSON Lines) |
36
+ | orc | Apache ORC columnar format |
37
+ | parquet | Apache Parquet columnar format |
38
+ | tsv | Tab-separated values text files |
39
+ | txt | Plain text files |
40
+ | xls | Microsoft Excel (legacy .xls) |
41
+ | xlsx | Microsoft Excel (modern .xlsx) |
42
+ | zip | ZIP-compressed files (see Compression) |
43
+ | xml | XML files |
44
+ | yaml | YAML files |
45
+
46
+ Compression formats (gz, zip) are also supported as wrappers for other formats.
47
+
48
+ ## Inferring File Format and Compression
49
+
50
+ Use `infer_file_format_and_compression(value, filename=None)` to infer the file format and
51
+ compression from a filename, extension, or MIME type. Returns a tuple `(file_format,
52
+ compression_format)`.
53
+
54
+ ## Reading and Writing Files
55
+
56
+ The main entry point for file operations is the `File` class. To read or write files:
57
+
58
+ ### Reading a File
59
+
60
+ ```python
61
+ from etlplus.file import File
62
+
63
+ f = File("data/sample.csv")
64
+ data = f.read()
65
+ ```
66
+
67
+ - The `read()` method automatically detects the format and compression.
68
+ - Returns parsed data (e.g., list of dicts for tabular formats).
69
+
70
+ ### Writing a File
71
+
72
+ ```python
73
+ from etlplus.file import File
74
+
75
+ f = File("output.json")
76
+ f.write(data)
77
+ ```
78
+
79
+ - The `write()` method serializes and writes data in the appropriate format.
80
+ - Supports all formats listed above.
81
+
82
+ ## File Instance Methods
83
+
84
+ - `read()`: Reads and parses the file, returning structured data.
85
+ - `write(data)`: Writes structured data to the file in the detected format.
86
+
87
+ ## Example: Reading and Writing
88
+
89
+ ```python
90
+ from etlplus.file import File
91
+
92
+ # Read CSV
93
+ csv_file = File("data.csv")
94
+ rows = csv_file.read()
95
+
96
+ # Write JSON
97
+ json_file = File("output.json")
98
+ json_file.write(rows)
99
+ ```
100
+
101
+ ## See Also
102
+
103
+ - Top-level CLI and library usage in the main [README](../../README.md)
104
+ - File format enums in [enums.py](enums.py)
105
+ - Compression format enums in [enums.py](enums.py)
@@ -0,0 +1,25 @@
1
+ """
2
+ :mod:`etlplus.file` package.
3
+
4
+ Public file IO helpers.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from .core import File
10
+ from .enums import CompressionFormat
11
+ from .enums import FileFormat
12
+ from .enums import infer_file_format_and_compression
13
+
14
+ # SECTION: EXPORTS ========================================================== #
15
+
16
+
17
+ __all__ = [
18
+ # Class
19
+ 'File',
20
+ # Enums
21
+ 'CompressionFormat',
22
+ 'FileFormat',
23
+ # Functions
24
+ 'infer_file_format_and_compression',
25
+ ]
etlplus/file/avro.py ADDED
@@ -0,0 +1,198 @@
1
+ """
2
+ :mod:`etlplus.file.avro` module.
3
+
4
+ Helpers for reading/writing Avro files.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+ from typing import Any
11
+ from typing import cast
12
+
13
+ from ..types import JSONData
14
+ from ..types import JSONDict
15
+ from ..types import JSONList
16
+
17
+ # SECTION: EXPORTS ========================================================== #
18
+
19
+
20
+ __all__ = [
21
+ 'read',
22
+ 'write',
23
+ ]
24
+
25
+
26
+ # SECTION: INTERNAL CONSTANTS =============================================== #
27
+
28
+
29
+ _FASTAVRO_CACHE: dict[str, Any] = {}
30
+
31
+
32
+ _PRIMITIVE_TYPES: tuple[type, ...] = (
33
+ bool,
34
+ int,
35
+ float,
36
+ str,
37
+ bytes,
38
+ bytearray,
39
+ )
40
+
41
+
42
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
43
+
44
+
45
+ def _get_fastavro() -> Any:
46
+ """
47
+ Return the fastavro module, importing it on first use.
48
+
49
+ Raises an informative ImportError if the optional dependency is missing.
50
+ """
51
+ mod = _FASTAVRO_CACHE.get('mod')
52
+ if mod is not None: # pragma: no cover - tiny branch
53
+ return mod
54
+ try:
55
+ _fastavro = __import__('fastavro') # type: ignore[assignment]
56
+ except ImportError as e: # pragma: no cover
57
+ raise ImportError(
58
+ 'AVRO support requires optional dependency "fastavro".\n'
59
+ 'Install with: pip install fastavro',
60
+ ) from e
61
+ _FASTAVRO_CACHE['mod'] = _fastavro
62
+
63
+ return _fastavro
64
+
65
+
66
+ def _normalize_records(data: JSONData) -> JSONList:
67
+ """
68
+ Normalize JSON payloads into a list of dictionaries.
69
+
70
+ Raises TypeError when payloads contain non-dict items.
71
+ """
72
+ if isinstance(data, list):
73
+ if not all(isinstance(item, dict) for item in data):
74
+ raise TypeError('AVRO payloads must contain only objects (dicts)')
75
+ return cast(JSONList, data)
76
+ return [cast(JSONDict, data)]
77
+
78
+
79
+ def _infer_value_type(value: object) -> str | list[str]:
80
+ """
81
+ Infer the Avro type for a primitive value.
82
+
83
+ Raises TypeError for unsupported types.
84
+ """
85
+ if value is None:
86
+ return 'null'
87
+ if isinstance(value, bool):
88
+ return 'boolean'
89
+ if isinstance(value, int):
90
+ return 'long'
91
+ if isinstance(value, float):
92
+ return 'double'
93
+ if isinstance(value, str):
94
+ return 'string'
95
+ if isinstance(value, (bytes, bytearray)):
96
+ return 'bytes'
97
+ raise TypeError('AVRO payloads must contain only primitive values')
98
+
99
+
100
+ def _merge_types(types: list[str]) -> str | list[str]:
101
+ """Return a stable Avro type union for a list of types."""
102
+ unique = list(dict.fromkeys(types))
103
+ if len(unique) == 1:
104
+ return unique[0]
105
+ ordered = ['null'] + sorted(t for t in unique if t != 'null')
106
+ return ordered
107
+
108
+
109
+ def _infer_schema(records: JSONList) -> dict[str, Any]:
110
+ """
111
+ Infer a basic Avro schema from record payloads.
112
+
113
+ Only primitive field values are supported; complex values raise TypeError.
114
+ """
115
+ field_names = sorted({key for record in records for key in record})
116
+ fields: list[dict[str, Any]] = []
117
+ for name in field_names:
118
+ types: list[str] = []
119
+ for record in records:
120
+ value = record.get(name)
121
+ if value is None:
122
+ types.append('null')
123
+ continue
124
+ if isinstance(value, dict | list):
125
+ raise TypeError(
126
+ 'AVRO payloads must contain only primitive values',
127
+ )
128
+ if not isinstance(value, _PRIMITIVE_TYPES):
129
+ raise TypeError(
130
+ 'AVRO payloads must contain only primitive values',
131
+ )
132
+ types.append(cast(str, _infer_value_type(value)))
133
+ fields.append({'name': name, 'type': _merge_types(types)})
134
+
135
+ return {
136
+ 'name': 'etlplus_record',
137
+ 'type': 'record',
138
+ 'fields': fields,
139
+ }
140
+
141
+
142
+ # SECTION: FUNCTIONS ======================================================== #
143
+
144
+
145
+ def read(
146
+ path: Path,
147
+ ) -> JSONList:
148
+ """
149
+ Read AVRO content from ``path``.
150
+
151
+ Parameters
152
+ ----------
153
+ path : Path
154
+ Path to the AVRO file on disk.
155
+
156
+ Returns
157
+ -------
158
+ JSONList
159
+ The list of dictionaries read from the AVRO file.
160
+ """
161
+ fastavro = _get_fastavro()
162
+ with path.open('rb') as handle:
163
+ reader = fastavro.reader(handle)
164
+ return [cast(JSONDict, record) for record in reader]
165
+
166
+
167
+ def write(
168
+ path: Path,
169
+ data: JSONData,
170
+ ) -> int:
171
+ """
172
+ Write ``data`` to AVRO at ``path`` and return record count.
173
+
174
+ Parameters
175
+ ----------
176
+ path : Path
177
+ Path to the AVRO file on disk.
178
+ data : JSONData
179
+ Data to write.
180
+
181
+ Returns
182
+ -------
183
+ int
184
+ Number of records written.
185
+ """
186
+ records = _normalize_records(data)
187
+ if not records:
188
+ return 0
189
+
190
+ fastavro = _get_fastavro()
191
+ schema = _infer_schema(records)
192
+ parsed_schema = fastavro.parse_schema(schema)
193
+
194
+ path.parent.mkdir(parents=True, exist_ok=True)
195
+ with path.open('wb') as handle:
196
+ fastavro.writer(handle, parsed_schema, records)
197
+
198
+ return len(records)
etlplus/file/core.py ADDED
@@ -0,0 +1,287 @@
1
+ """
2
+ :mod:`etlplus.file.core` module.
3
+
4
+ Shared helpers for reading and writing structured and semi-structured data
5
+ files.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+
13
+ from ..types import JSONData
14
+ from . import avro
15
+ from . import csv
16
+ from . import feather
17
+ from . import gz
18
+ from . import json
19
+ from . import ndjson
20
+ from . import orc
21
+ from . import parquet
22
+ from . import tsv
23
+ from . import txt
24
+ from . import xls
25
+ from . import xlsx
26
+ from . import xml
27
+ from . import yaml
28
+ from . import zip as zip_
29
+ from .enums import FileFormat
30
+ from .enums import infer_file_format_and_compression
31
+
32
+ # SECTION: EXPORTS ========================================================== #
33
+
34
+
35
+ __all__ = ['File']
36
+
37
+
38
+ # SECTION: CLASSES ========================================================== #
39
+
40
+
41
+ @dataclass(slots=True)
42
+ class File:
43
+ """
44
+ Convenience wrapper around structured file IO.
45
+
46
+ This class encapsulates the one-off helpers in this module as convenient
47
+ instance methods while retaining the original function API for
48
+ backward compatibility (those functions delegate to this class).
49
+
50
+ Attributes
51
+ ----------
52
+ path : Path
53
+ Path to the file on disk.
54
+ file_format : FileFormat | None, optional
55
+ Explicit format. If omitted, the format is inferred from the file
56
+ extension (``.csv``, ``.json``, etc.).
57
+
58
+ Parameters
59
+ ----------
60
+ path : StrPath
61
+ Path to the file on disk.
62
+ file_format : FileFormat | str | None, optional
63
+ Explicit format. If omitted, the format is inferred from the file
64
+ extension (``.csv``, ``.json``, etc.).
65
+ """
66
+
67
+ # -- Attributes -- #
68
+
69
+ path: Path
70
+ file_format: FileFormat | None = None
71
+
72
+ # -- Magic Methods (Object Lifecycle) -- #
73
+
74
+ def __post_init__(self) -> None:
75
+ """
76
+ Auto-detect and set the file format on initialization.
77
+
78
+ If no explicit ``file_format`` is provided, attempt to infer it from
79
+ the file path's extension and update :attr:`file_format`. If the
80
+ extension is unknown, the attribute is left as ``None`` and will be
81
+ validated later by :meth:`_ensure_format`.
82
+ """
83
+ self.path = Path(self.path)
84
+ self.file_format = self._coerce_format(self.file_format)
85
+ if self.file_format is None:
86
+ self.file_format = self._maybe_guess_format()
87
+
88
+ # -- Internal Instance Methods -- #
89
+
90
+ def _assert_exists(self) -> None:
91
+ """
92
+ Raise FileNotFoundError if :attr:`path` does not exist.
93
+
94
+ This centralizes existence checks across multiple read methods.
95
+ """
96
+ if not self.path.exists():
97
+ raise FileNotFoundError(f'File not found: {self.path}')
98
+
99
+ def _coerce_format(
100
+ self,
101
+ file_format: FileFormat | str | None,
102
+ ) -> FileFormat | None:
103
+ """
104
+ Normalize the file format input.
105
+
106
+ Parameters
107
+ ----------
108
+ file_format : FileFormat | str | None
109
+ File format specifier. Strings are coerced into
110
+ :class:`FileFormat`.
111
+
112
+ Returns
113
+ -------
114
+ FileFormat | None
115
+ A normalized file format, or ``None`` when unspecified.
116
+ """
117
+ if file_format is None or isinstance(file_format, FileFormat):
118
+ return file_format
119
+ return FileFormat.coerce(file_format)
120
+
121
+ def _ensure_format(self) -> FileFormat:
122
+ """
123
+ Resolve the active format, guessing from extension if needed.
124
+
125
+ Returns
126
+ -------
127
+ FileFormat
128
+ The resolved file format.
129
+ """
130
+ return (
131
+ self.file_format
132
+ if self.file_format is not None
133
+ else self._guess_format()
134
+ )
135
+
136
+ def _guess_format(self) -> FileFormat:
137
+ """
138
+ Infer the file format from the filename extension.
139
+
140
+ Returns
141
+ -------
142
+ FileFormat
143
+ The inferred file format based on the file extension.
144
+
145
+ Raises
146
+ ------
147
+ ValueError
148
+ If the extension is unknown or unsupported.
149
+ """
150
+ fmt, compression = infer_file_format_and_compression(self.path)
151
+ if fmt is not None:
152
+ return fmt
153
+ if compression is not None:
154
+ raise ValueError(
155
+ 'Cannot infer file format from compressed file '
156
+ f'{self.path!r} with compression {compression.value!r}',
157
+ )
158
+ raise ValueError(
159
+ f'Cannot infer file format from extension {self.path.suffix!r}',
160
+ )
161
+
162
+ def _maybe_guess_format(self) -> FileFormat | None:
163
+ """
164
+ Try to infer the format, returning ``None`` if it cannot be inferred.
165
+
166
+ Returns
167
+ -------
168
+ FileFormat | None
169
+ The inferred format, or ``None`` if inference fails.
170
+ """
171
+ try:
172
+ return self._guess_format()
173
+ except ValueError:
174
+ # Leave as None; _ensure_format() will raise on use if needed.
175
+ return None
176
+
177
+ # -- Instance Methods -- #
178
+
179
+ def read(self) -> JSONData:
180
+ """
181
+ Read structured data from :attr:`path` using :attr:`file_format`.
182
+
183
+ Returns
184
+ -------
185
+ JSONData
186
+ The structured data read from the file.
187
+
188
+ Raises
189
+ ------
190
+ ValueError
191
+ If the resolved file format is unsupported.
192
+ """
193
+ self._assert_exists()
194
+ fmt = self._ensure_format()
195
+ match fmt:
196
+ case FileFormat.AVRO:
197
+ return avro.read(self.path)
198
+ case FileFormat.CSV:
199
+ return csv.read(self.path)
200
+ case FileFormat.FEATHER:
201
+ return feather.read(self.path)
202
+ case FileFormat.GZ:
203
+ return gz.read(self.path)
204
+ case FileFormat.JSON:
205
+ return json.read(self.path)
206
+ case FileFormat.NDJSON:
207
+ return ndjson.read(self.path)
208
+ case FileFormat.ORC:
209
+ return orc.read(self.path)
210
+ case FileFormat.PARQUET:
211
+ return parquet.read(self.path)
212
+ case FileFormat.TSV:
213
+ return tsv.read(self.path)
214
+ case FileFormat.TXT:
215
+ return txt.read(self.path)
216
+ case FileFormat.XLS:
217
+ return xls.read(self.path)
218
+ case FileFormat.XLSX:
219
+ return xlsx.read(self.path)
220
+ case FileFormat.XML:
221
+ return xml.read(self.path)
222
+ case FileFormat.YAML:
223
+ return yaml.read(self.path)
224
+ case FileFormat.ZIP:
225
+ return zip_.read(self.path)
226
+ raise ValueError(f'Unsupported format: {fmt}')
227
+
228
+ def write(
229
+ self,
230
+ data: JSONData,
231
+ *,
232
+ root_tag: str = xml.DEFAULT_XML_ROOT,
233
+ ) -> int:
234
+ """
235
+ Write ``data`` to :attr:`path` using :attr:`file_format`.
236
+
237
+ Parameters
238
+ ----------
239
+ data : JSONData
240
+ Data to write to the file.
241
+ root_tag : str, optional
242
+ Root tag name to use when writing XML files. Defaults to
243
+ ``'root'``.
244
+
245
+ Returns
246
+ -------
247
+ int
248
+ The number of records written.
249
+
250
+ Raises
251
+ ------
252
+ ValueError
253
+ If the resolved file format is unsupported.
254
+ """
255
+ fmt = self._ensure_format()
256
+ match fmt:
257
+ case FileFormat.AVRO:
258
+ return avro.write(self.path, data)
259
+ case FileFormat.CSV:
260
+ return csv.write(self.path, data)
261
+ case FileFormat.FEATHER:
262
+ return feather.write(self.path, data)
263
+ case FileFormat.GZ:
264
+ return gz.write(self.path, data)
265
+ case FileFormat.JSON:
266
+ return json.write(self.path, data)
267
+ case FileFormat.NDJSON:
268
+ return ndjson.write(self.path, data)
269
+ case FileFormat.ORC:
270
+ return orc.write(self.path, data)
271
+ case FileFormat.PARQUET:
272
+ return parquet.write(self.path, data)
273
+ case FileFormat.TSV:
274
+ return tsv.write(self.path, data)
275
+ case FileFormat.TXT:
276
+ return txt.write(self.path, data)
277
+ case FileFormat.XLS:
278
+ return xls.write(self.path, data)
279
+ case FileFormat.XLSX:
280
+ return xlsx.write(self.path, data)
281
+ case FileFormat.XML:
282
+ return xml.write(self.path, data, root_tag=root_tag)
283
+ case FileFormat.YAML:
284
+ return yaml.write(self.path, data)
285
+ case FileFormat.ZIP:
286
+ return zip_.write(self.path, data)
287
+ raise ValueError(f'Unsupported format: {fmt}')
etlplus/file/csv.py ADDED
@@ -0,0 +1,91 @@
1
+ """
2
+ :mod:`etlplus.file.csv` module.
3
+
4
+ Helpers for reading/writing CSV files.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import csv
10
+ from pathlib import Path
11
+ from typing import cast
12
+
13
+ from ..types import JSONData
14
+ from ..types import JSONDict
15
+ from ..types import JSONList
16
+
17
+ # SECTION: EXPORTS ========================================================== #
18
+
19
+
20
+ __all__ = [
21
+ 'read',
22
+ 'write',
23
+ ]
24
+
25
+
26
+ # SECTION: FUNCTIONS ======================================================== #
27
+
28
+
29
+ def read(
30
+ path: Path,
31
+ ) -> JSONList:
32
+ """
33
+ Read CSV content from ``path``.
34
+
35
+ Parameters
36
+ ----------
37
+ path : Path
38
+ Path to the CSV file on disk.
39
+
40
+ Returns
41
+ -------
42
+ JSONList
43
+ The list of dictionaries read from the CSV file.
44
+ """
45
+ with path.open('r', encoding='utf-8', newline='') as handle:
46
+ reader: csv.DictReader[str] = csv.DictReader(handle)
47
+ rows: JSONList = []
48
+ for row in reader:
49
+ if not any(row.values()):
50
+ continue
51
+ rows.append(cast(JSONDict, dict(row)))
52
+ return rows
53
+
54
+
55
+ def write(
56
+ path: Path,
57
+ data: JSONData,
58
+ ) -> int:
59
+ """
60
+ Write ``data`` to CSV at ``path`` and return record count.
61
+
62
+ Parameters
63
+ ----------
64
+ path : Path
65
+ Path to the CSV file on disk.
66
+ data : JSONData
67
+ Data to write as CSV. Should be a list of dictionaries or a
68
+ single dictionary.
69
+
70
+ Returns
71
+ -------
72
+ int
73
+ The number of rows written to the CSV file.
74
+ """
75
+ rows: list[JSONDict]
76
+ if isinstance(data, list):
77
+ rows = [row for row in data if isinstance(row, dict)]
78
+ else:
79
+ rows = [data]
80
+
81
+ if not rows:
82
+ return 0
83
+
84
+ fieldnames = sorted({key for row in rows for key in row})
85
+ with path.open('w', encoding='utf-8', newline='') as handle:
86
+ writer = csv.DictWriter(handle, fieldnames=fieldnames)
87
+ writer.writeheader()
88
+ for row in rows:
89
+ writer.writerow({field: row.get(field) for field in fieldnames})
90
+
91
+ return len(rows)