etlplus 0.16.9__py3-none-any.whl → 0.17.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/file/README.md CHANGED
@@ -9,6 +9,12 @@ and writing data files.
9
9
  types
10
10
  - Exposes a `File` class with instance methods for reading and writing data
11
11
 
12
+ Some formats require optional dependencies. Install with:
13
+
14
+ ```bash
15
+ pip install -e ".[file]"
16
+ ```
17
+
12
18
  Back to project overview: see the top-level [README](../../README.md).
13
19
 
14
20
  - [`etlplus.file` Subpackage](#etlplusfile-subpackage)
@@ -29,21 +35,48 @@ matrix across all `FileFormat` values, see the top-level [README](../../README.m
29
35
  | Format | Description |
30
36
  |-----------|---------------------------------------------|
31
37
  | avro | Apache Avro binary serialization |
38
+ | arrow | Apache Arrow IPC |
39
+ | bson | Binary JSON (BSON) |
40
+ | cbor | Concise Binary Object Representation |
32
41
  | csv | Comma-separated values text files |
42
+ | dat | Generic data files (delimited) |
43
+ | dta | Stata datasets |
44
+ | duckdb | DuckDB database file |
33
45
  | feather | Apache Arrow Feather columnar format |
46
+ | fwf | Fixed-width formatted text files |
34
47
  | gz | Gzip-compressed files (see Compression) |
48
+ | hdf5 | Hierarchical Data Format |
49
+ | ini | INI config files |
35
50
  | json | Standard JSON files |
51
+ | msgpack | MessagePack binary serialization |
52
+ | nc | NetCDF datasets |
36
53
  | ndjson | Newline-delimited JSON (JSON Lines) |
54
+ | ods | OpenDocument spreadsheets |
37
55
  | orc | Apache ORC columnar format |
38
56
  | parquet | Apache Parquet columnar format |
57
+ | pb | Protocol Buffers binary |
58
+ | properties | Java-style properties |
59
+ | proto | Protocol Buffers schema |
60
+ | psv | Pipe-separated values text files |
61
+ | rda | RData workspace bundles |
62
+ | rds | RDS datasets |
63
+ | sas7bdat | SAS datasets |
64
+ | sav | SPSS datasets |
65
+ | sqlite | SQLite database file |
66
+ | tab | Tab-delimited text files |
67
+ | toml | TOML config files |
39
68
  | tsv | Tab-separated values text files |
40
69
  | txt | Plain text files |
41
70
  | xls | Microsoft Excel (legacy .xls; read-only) |
71
+ | xlsm | Microsoft Excel Macro-Enabled (XLSM) |
42
72
  | xlsx | Microsoft Excel (modern .xlsx) |
73
+ | xpt | SAS transport files |
43
74
  | zip | ZIP-compressed files (see Compression) |
44
75
  | xml | XML files |
45
76
  | yaml | YAML files |
46
77
 
78
+ Note: HDF5 support is read-only; writing is currently disabled.
79
+
47
80
  Compression formats (gz, zip) are also supported as wrappers for other formats. Formats not listed
48
81
  here are currently stubbed and will raise `NotImplementedError` on read/write.
49
82
 
etlplus/file/arrow.py CHANGED
@@ -1,8 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.arrow` module.
3
3
 
4
- Stub helpers for reading/writing Apache Arrow (ARROW) files (not implemented
5
- yet).
4
+ Helpers for reading/writing Apache Arrow (ARROW) files.
6
5
 
7
6
  Notes
8
7
  -----
@@ -20,10 +19,13 @@ Notes
20
19
  from __future__ import annotations
21
20
 
22
21
  from pathlib import Path
22
+ from typing import Any
23
+ from typing import cast
23
24
 
24
25
  from ..types import JSONData
25
26
  from ..types import JSONList
26
- from . import stub
27
+ from ._imports import get_optional_module
28
+ from ._io import normalize_records
27
29
 
28
30
  # SECTION: EXPORTS ========================================================== #
29
31
 
@@ -35,6 +37,20 @@ __all__ = [
35
37
  ]
36
38
 
37
39
 
40
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
41
+
42
+
43
+ def _get_pyarrow() -> Any:
44
+ """Return the pyarrow module, importing it on first use."""
45
+ return get_optional_module(
46
+ 'pyarrow',
47
+ error_message=(
48
+ 'ARROW support requires optional dependency "pyarrow".\n'
49
+ 'Install with: pip install pyarrow'
50
+ ),
51
+ )
52
+
53
+
38
54
  # SECTION: FUNCTIONS ======================================================== #
39
55
 
40
56
 
@@ -54,7 +70,11 @@ def read(
54
70
  JSONList
55
71
  The list of dictionaries read from the Apache Arrow file.
56
72
  """
57
- return stub.read(path, format_name='ARROW')
73
+ pyarrow = _get_pyarrow()
74
+ with pyarrow.memory_map(str(path), 'r') as source:
75
+ reader = pyarrow.ipc.open_file(source)
76
+ table = reader.read_all()
77
+ return cast(JSONList, table.to_pylist())
58
78
 
59
79
 
60
80
  def write(
@@ -77,4 +97,14 @@ def write(
77
97
  int
78
98
  The number of rows written to the ARROW file.
79
99
  """
80
- return stub.write(path, data, format_name='ARROW')
100
+ records = normalize_records(data, 'ARROW')
101
+ if not records:
102
+ return 0
103
+
104
+ pyarrow = _get_pyarrow()
105
+ table = pyarrow.Table.from_pylist(records)
106
+ path.parent.mkdir(parents=True, exist_ok=True)
107
+ with pyarrow.OSFile(str(path), 'wb') as sink:
108
+ with pyarrow.ipc.new_file(sink, table.schema) as writer:
109
+ writer.write_table(table)
110
+ return len(records)
etlplus/file/bson.py CHANGED
@@ -1,8 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.bson` module.
3
3
 
4
- Stub helpers for reading/writing Binary JSON (BSON) files (not implemented
5
- yet).
4
+ Helpers for reading/writing Binary JSON (BSON) files.
6
5
 
7
6
  Notes
8
7
  -----
@@ -19,10 +18,13 @@ Notes
19
18
  from __future__ import annotations
20
19
 
21
20
  from pathlib import Path
21
+ from typing import Any
22
+ from typing import cast
22
23
 
23
24
  from ..types import JSONData
24
25
  from ..types import JSONList
25
- from . import stub
26
+ from ._imports import get_optional_module
27
+ from ._io import normalize_records
26
28
 
27
29
  # SECTION: EXPORTS ========================================================== #
28
30
 
@@ -34,6 +36,36 @@ __all__ = [
34
36
  ]
35
37
 
36
38
 
39
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
40
+
41
+
42
+ def _decode_all(bson_module: Any, payload: bytes) -> list[dict[str, Any]]:
43
+ if hasattr(bson_module, 'decode_all'):
44
+ return bson_module.decode_all(payload)
45
+ if hasattr(bson_module, 'BSON'):
46
+ return bson_module.BSON.decode_all(payload)
47
+ raise AttributeError('bson module lacks decode_all()')
48
+
49
+
50
+ def _encode_doc(bson_module: Any, doc: dict[str, Any]) -> bytes:
51
+ if hasattr(bson_module, 'encode'):
52
+ return bson_module.encode(doc)
53
+ if hasattr(bson_module, 'BSON'):
54
+ return bson_module.BSON.encode(doc)
55
+ raise AttributeError('bson module lacks encode()')
56
+
57
+
58
+ def _get_bson() -> Any:
59
+ """Return the bson module, importing it on first use."""
60
+ return get_optional_module(
61
+ 'bson',
62
+ error_message=(
63
+ 'BSON support requires optional dependency "pymongo".\n'
64
+ 'Install with: pip install pymongo'
65
+ ),
66
+ )
67
+
68
+
37
69
  # SECTION: FUNCTIONS ======================================================== #
38
70
 
39
71
 
@@ -53,7 +85,11 @@ def read(
53
85
  JSONList
54
86
  The list of dictionaries read from the BSON file.
55
87
  """
56
- return stub.read(path, format_name='BSON')
88
+ bson = _get_bson()
89
+ with path.open('rb') as handle:
90
+ payload = handle.read()
91
+ docs = _decode_all(bson, payload)
92
+ return cast(JSONList, docs)
57
93
 
58
94
 
59
95
  def write(
@@ -76,4 +112,13 @@ def write(
76
112
  int
77
113
  The number of rows written to the BSON file.
78
114
  """
79
- return stub.write(path, data, format_name='BSON')
115
+ bson = _get_bson()
116
+ records = normalize_records(data, 'BSON')
117
+ if not records:
118
+ return 0
119
+
120
+ path.parent.mkdir(parents=True, exist_ok=True)
121
+ with path.open('wb') as handle:
122
+ for record in records:
123
+ handle.write(_encode_doc(bson, record))
124
+ return len(records)
etlplus/file/cbor.py CHANGED
@@ -1,8 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.cbor` module.
3
3
 
4
- Stub helpers for reading/writing Concise Binary Object Representation (CBOR)
5
- files (not implemented yet).
4
+ Helpers for reading/writing Concise Binary Object Representation (CBOR) files.
6
5
 
7
6
  Notes
8
7
  -----
@@ -20,10 +19,12 @@ Notes
20
19
  from __future__ import annotations
21
20
 
22
21
  from pathlib import Path
22
+ from typing import Any
23
23
 
24
24
  from ..types import JSONData
25
- from ..types import JSONList
26
- from . import stub
25
+ from ._imports import get_optional_module
26
+ from ._io import coerce_record_payload
27
+ from ._io import normalize_records
27
28
 
28
29
  # SECTION: EXPORTS ========================================================== #
29
30
 
@@ -35,12 +36,26 @@ __all__ = [
35
36
  ]
36
37
 
37
38
 
39
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
40
+
41
+
42
+ def _get_cbor() -> Any:
43
+ """Return the cbor2 module, importing it on first use."""
44
+ return get_optional_module(
45
+ 'cbor2',
46
+ error_message=(
47
+ 'CBOR support requires optional dependency "cbor2".\n'
48
+ 'Install with: pip install cbor2'
49
+ ),
50
+ )
51
+
52
+
38
53
  # SECTION: FUNCTIONS ======================================================== #
39
54
 
40
55
 
41
56
  def read(
42
57
  path: Path,
43
- ) -> JSONList:
58
+ ) -> JSONData:
44
59
  """
45
60
  Read CBOR content from *path*.
46
61
 
@@ -51,10 +66,13 @@ def read(
51
66
 
52
67
  Returns
53
68
  -------
54
- JSONList
55
- The list of dictionaries read from the CBOR file.
69
+ JSONData
70
+ The structured data read from the CBOR file.
56
71
  """
57
- return stub.read(path, format_name='CBOR')
72
+ cbor2 = _get_cbor()
73
+ with path.open('rb') as handle:
74
+ payload = cbor2.loads(handle.read())
75
+ return coerce_record_payload(payload, format_name='CBOR')
58
76
 
59
77
 
60
78
  def write(
@@ -62,14 +80,14 @@ def write(
62
80
  data: JSONData,
63
81
  ) -> int:
64
82
  """
65
- Write *data* to CBOR at *path* and return record count.
83
+ Write *data* to CBOR file at *path* and return record count.
66
84
 
67
85
  Parameters
68
86
  ----------
69
87
  path : Path
70
88
  Path to the CBOR file on disk.
71
89
  data : JSONData
72
- Data to write as CBOR. Should be a list of dictionaries or a
90
+ Data to write as CBOR file. Should be a list of dictionaries or a
73
91
  single dictionary.
74
92
 
75
93
  Returns
@@ -77,4 +95,10 @@ def write(
77
95
  int
78
96
  The number of rows written to the CBOR file.
79
97
  """
80
- return stub.write(path, data, format_name='CBOR')
98
+ cbor2 = _get_cbor()
99
+ records = normalize_records(data, 'CBOR')
100
+ payload: JSONData = records if isinstance(data, list) else records[0]
101
+ path.parent.mkdir(parents=True, exist_ok=True)
102
+ with path.open('wb') as handle:
103
+ handle.write(cbor2.dumps(payload))
104
+ return len(records)
etlplus/file/dat.py CHANGED
@@ -1,12 +1,12 @@
1
1
  """
2
2
  :mod:`etlplus.file.dat` module.
3
3
 
4
- Stub helpers for reading/writing data (DAT) files (not implemented yet).
4
+ Helpers for reading/writing data (DAT) files.
5
5
 
6
6
  Notes
7
7
  -----
8
- - A DAT-formatted” file is a generic data file that may use various
9
- delimiters or fixed-width formats.
8
+ - A DAT file is a generic data file that may use various delimiters or fixed-
9
+ width formats.
10
10
  - Common cases:
11
11
  - Delimited text files (e.g., CSV, TSV).
12
12
  - Fixed-width formatted files.
@@ -18,11 +18,14 @@ Notes
18
18
 
19
19
  from __future__ import annotations
20
20
 
21
+ import csv
21
22
  from pathlib import Path
23
+ from typing import cast
22
24
 
23
25
  from ..types import JSONData
26
+ from ..types import JSONDict
24
27
  from ..types import JSONList
25
- from . import stub
28
+ from ._io import write_delimited
26
29
 
27
30
  # SECTION: EXPORTS ========================================================== #
28
31
 
@@ -53,7 +56,42 @@ def read(
53
56
  JSONList
54
57
  The list of dictionaries read from the DAT file.
55
58
  """
56
- return stub.read(path, format_name='DAT')
59
+ with path.open('r', encoding='utf-8', newline='') as handle:
60
+ sample = handle.read(4096)
61
+ handle.seek(0)
62
+ sniffer = csv.Sniffer()
63
+ dialect: csv.Dialect
64
+ try:
65
+ dialect = cast(
66
+ csv.Dialect,
67
+ sniffer.sniff(sample, delimiters=',\t|;'),
68
+ )
69
+ except csv.Error:
70
+ dialect = cast(csv.Dialect, csv.get_dialect('excel'))
71
+ try:
72
+ has_header = sniffer.has_header(sample)
73
+ except csv.Error:
74
+ has_header = True
75
+
76
+ reader = csv.reader(handle, dialect)
77
+ rows = [row for row in reader if any(field.strip() for field in row)]
78
+ if not rows:
79
+ return []
80
+
81
+ if has_header:
82
+ header = rows[0]
83
+ data_rows = rows[1:]
84
+ else:
85
+ header = [f'col_{i + 1}' for i in range(len(rows[0]))]
86
+ data_rows = rows
87
+
88
+ records: JSONList = []
89
+ for row in data_rows:
90
+ record: JSONDict = {}
91
+ for index, name in enumerate(header):
92
+ record[name] = row[index] if index < len(row) else None
93
+ records.append(record)
94
+ return records
57
95
 
58
96
 
59
97
  def write(
@@ -76,4 +114,4 @@ def write(
76
114
  int
77
115
  The number of rows written to the DAT file.
78
116
  """
79
- return stub.write(path, data, format_name='DAT')
117
+ return write_delimited(path, data, delimiter=',')
etlplus/file/dta.py CHANGED
@@ -1,27 +1,30 @@
1
1
  """
2
2
  :mod:`etlplus.file.dta` module.
3
3
 
4
- Stub helpers for reading/writing Stata (DTA) data files (not implemented yet).
4
+ Helpers for reading/writing Stata (DTA) files.
5
5
 
6
6
  Notes
7
7
  -----
8
- - Stata DTA files are binary files used by Stata statistical software that
9
- store datasets with variables, labels, and data types.
8
+ - A DTA file is a proprietary binary format created by Stata to store datasets
9
+ with variables, labels, and data types.
10
10
  - Common cases:
11
- - Reading data for analysis in Python.
12
- - Writing processed data back to Stata format.
11
+ - Statistical analysis workflows.
12
+ - Data sharing in research environments.
13
+ - Interchange between Stata and other analytics tools.
13
14
  - Rule of thumb:
14
- - If you need to work with Stata data files, use this module for reading
15
+ - If the file follows the DTA specification, use this module for reading
15
16
  and writing.
16
17
  """
17
18
 
18
19
  from __future__ import annotations
19
20
 
20
21
  from pathlib import Path
22
+ from typing import cast
21
23
 
22
24
  from ..types import JSONData
23
25
  from ..types import JSONList
24
- from . import stub
26
+ from ._imports import get_pandas
27
+ from ._io import normalize_records
25
28
 
26
29
  # SECTION: EXPORTS ========================================================== #
27
30
 
@@ -51,8 +54,21 @@ def read(
51
54
  -------
52
55
  JSONList
53
56
  The list of dictionaries read from the DTA file.
57
+
58
+ Raises
59
+ ------
60
+ ImportError
61
+ If optional dependencies for DTA support are missing.
54
62
  """
55
- return stub.read(path, format_name='DTA')
63
+ pandas = get_pandas('DTA')
64
+ try:
65
+ frame = pandas.read_stata(path)
66
+ except ImportError as err: # pragma: no cover
67
+ raise ImportError(
68
+ 'DTA support may require optional dependency "pyreadstat".\n'
69
+ 'Install with: pip install pyreadstat',
70
+ ) from err
71
+ return cast(JSONList, frame.to_dict(orient='records'))
56
72
 
57
73
 
58
74
  def write(
@@ -67,12 +83,31 @@ def write(
67
83
  path : Path
68
84
  Path to the DTA file on disk.
69
85
  data : JSONData
70
- Data to write as DTA file. Should be a list of dictionaries or a
71
- single dictionary.
86
+ Data to write as DTA file. Should be a list of dictionaries or a single
87
+ dictionary.
72
88
 
73
89
  Returns
74
90
  -------
75
91
  int
76
92
  The number of rows written to the DTA file.
93
+
94
+ Raises
95
+ ------
96
+ ImportError
97
+ If optional dependencies for DTA support are missing.
77
98
  """
78
- return stub.write(path, data, format_name='DTA')
99
+ records = normalize_records(data, 'DTA')
100
+ if not records:
101
+ return 0
102
+
103
+ pandas = get_pandas('DTA')
104
+ path.parent.mkdir(parents=True, exist_ok=True)
105
+ frame = pandas.DataFrame.from_records(records)
106
+ try:
107
+ frame.to_stata(path, write_index=False)
108
+ except ImportError as err: # pragma: no cover
109
+ raise ImportError(
110
+ 'DTA support may require optional dependency "pyreadstat".\n'
111
+ 'Install with: pip install pyreadstat',
112
+ ) from err
113
+ return len(records)
etlplus/file/duckdb.py CHANGED
@@ -1,8 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.duckdb` module.
3
3
 
4
- Stub helpers for reading/writing DuckDB database (DUCKDB) files (not
5
- implemented yet).
4
+ Helpers for reading/writing DuckDB database (DUCKDB) files.
6
5
 
7
6
  Notes
8
7
  -----
@@ -19,11 +18,14 @@ Notes
19
18
 
20
19
  from __future__ import annotations
21
20
 
21
+ import json
22
22
  from pathlib import Path
23
+ from typing import Any
23
24
 
24
25
  from ..types import JSONData
25
26
  from ..types import JSONList
26
- from . import stub
27
+ from ._imports import get_optional_module
28
+ from ._io import normalize_records
27
29
 
28
30
  # SECTION: EXPORTS ========================================================== #
29
31
 
@@ -35,6 +37,103 @@ __all__ = [
35
37
  ]
36
38
 
37
39
 
40
+ # SECTION: INTERNAL CONSTANTS ============================================== #
41
+
42
+
43
+ DEFAULT_TABLE = 'data'
44
+
45
+
46
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
47
+
48
+
49
+ def _coerce_sql_value(
50
+ value: Any,
51
+ ) -> Any:
52
+ """
53
+ Normalize values into DuckDB-compatible types.
54
+
55
+ Parameters
56
+ ----------
57
+ value : Any
58
+ The value to normalize.
59
+
60
+ Returns
61
+ -------
62
+ Any
63
+ The normalized value.
64
+ """
65
+ if value is None or isinstance(value, (str, int, float, bool)):
66
+ return value
67
+ return json.dumps(value, ensure_ascii=True)
68
+
69
+
70
+ def _get_duckdb() -> Any:
71
+ """
72
+ Return the duckdb module, importing it on first use.
73
+
74
+ Returns
75
+ -------
76
+ Any
77
+ The duckdb module.
78
+ """
79
+ return get_optional_module(
80
+ 'duckdb',
81
+ error_message=(
82
+ 'DUCKDB support requires optional dependency "duckdb".\n'
83
+ 'Install with: pip install duckdb'
84
+ ),
85
+ )
86
+
87
+
88
+ def _infer_column_type(values: list[Any]) -> str:
89
+ """Infer a basic DuckDB column type from sample values."""
90
+ seen_bool = False
91
+ seen_int = False
92
+ seen_float = False
93
+ seen_other = False
94
+ for value in values:
95
+ if value is None:
96
+ continue
97
+ if isinstance(value, bool):
98
+ seen_bool = True
99
+ elif isinstance(value, int):
100
+ seen_int = True
101
+ elif isinstance(value, float):
102
+ seen_float = True
103
+ else:
104
+ seen_other = True
105
+ break
106
+ if seen_other:
107
+ return 'VARCHAR'
108
+ if seen_float:
109
+ return 'DOUBLE'
110
+ if seen_int:
111
+ return 'BIGINT'
112
+ if seen_bool:
113
+ return 'BOOLEAN'
114
+ return 'VARCHAR'
115
+
116
+
117
+ def _quote_identifier(value: str) -> str:
118
+ """Return a safely quoted SQL identifier."""
119
+ escaped = value.replace('"', '""')
120
+ return f'"{escaped}"'
121
+
122
+
123
+ def _resolve_table(tables: list[str]) -> str | None:
124
+ """Pick a table name for read operations."""
125
+ if not tables:
126
+ return None
127
+ if DEFAULT_TABLE in tables:
128
+ return DEFAULT_TABLE
129
+ if len(tables) == 1:
130
+ return tables[0]
131
+ raise ValueError(
132
+ 'Multiple tables found in DuckDB file; expected "data" or a '
133
+ 'single table',
134
+ )
135
+
136
+
38
137
  # SECTION: FUNCTIONS ======================================================== #
39
138
 
40
139
 
@@ -54,7 +153,25 @@ def read(
54
153
  JSONList
55
154
  The list of dictionaries read from the DUCKDB file.
56
155
  """
57
- return stub.read(path, format_name='DUCKDB')
156
+ duckdb = _get_duckdb()
157
+ conn = duckdb.connect(str(path))
158
+ try:
159
+ tables = [row[0] for row in conn.execute('SHOW TABLES').fetchall()]
160
+ table = _resolve_table(tables)
161
+ if table is None:
162
+ return []
163
+ query = f'SELECT * FROM {_quote_identifier(table)}'
164
+ cursor = conn.execute(query)
165
+ rows = cursor.fetchall()
166
+ columns = [desc[0] for desc in cursor.description or []]
167
+ if not columns:
168
+ info = conn.execute(
169
+ f'PRAGMA table_info({_quote_identifier(table)})',
170
+ ).fetchall()
171
+ columns = [row[1] for row in info]
172
+ return [dict(zip(columns, row, strict=True)) for row in rows]
173
+ finally:
174
+ conn.close()
58
175
 
59
176
 
60
177
  def write(
@@ -77,4 +194,41 @@ def write(
77
194
  int
78
195
  The number of rows written to the DUCKDB file.
79
196
  """
80
- return stub.write(path, data, format_name='DUCKDB')
197
+ records = normalize_records(data, 'DUCKDB')
198
+ if not records:
199
+ return 0
200
+
201
+ columns = sorted({key for row in records for key in row})
202
+ if not columns:
203
+ return 0
204
+
205
+ column_values: dict[str, list[Any]] = {col: [] for col in columns}
206
+ for row in records:
207
+ for column in columns:
208
+ column_values[column].append(row.get(column))
209
+
210
+ column_defs = ', '.join(
211
+ f'{_quote_identifier(column)} {_infer_column_type(values)}'
212
+ for column, values in column_values.items()
213
+ )
214
+ table_ident = _quote_identifier(DEFAULT_TABLE)
215
+ insert_columns = ', '.join(_quote_identifier(column) for column in columns)
216
+ placeholders = ', '.join('?' for _ in columns)
217
+ insert_sql = (
218
+ f'INSERT INTO {table_ident} ({insert_columns}) VALUES ({placeholders})'
219
+ )
220
+
221
+ duckdb = _get_duckdb()
222
+ path.parent.mkdir(parents=True, exist_ok=True)
223
+ conn = duckdb.connect(str(path))
224
+ try:
225
+ conn.execute(f'DROP TABLE IF EXISTS {table_ident}')
226
+ conn.execute(f'CREATE TABLE {table_ident} ({column_defs})')
227
+ rows = [
228
+ tuple(_coerce_sql_value(row.get(column)) for column in columns)
229
+ for row in records
230
+ ]
231
+ conn.executemany(insert_sql, rows)
232
+ finally:
233
+ conn.close()
234
+ return len(records)