polars-runtime-compat 1.34.0b2__cp39-abi3-win_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (203) hide show
  1. _polars_runtime_compat/.gitkeep +0 -0
  2. _polars_runtime_compat/_polars_runtime_compat.pyd +0 -0
  3. polars/__init__.py +528 -0
  4. polars/_cpu_check.py +265 -0
  5. polars/_dependencies.py +355 -0
  6. polars/_plr.py +99 -0
  7. polars/_plr.pyi +2496 -0
  8. polars/_reexport.py +23 -0
  9. polars/_typing.py +478 -0
  10. polars/_utils/__init__.py +37 -0
  11. polars/_utils/async_.py +102 -0
  12. polars/_utils/cache.py +176 -0
  13. polars/_utils/cloud.py +40 -0
  14. polars/_utils/constants.py +29 -0
  15. polars/_utils/construction/__init__.py +46 -0
  16. polars/_utils/construction/dataframe.py +1397 -0
  17. polars/_utils/construction/other.py +72 -0
  18. polars/_utils/construction/series.py +560 -0
  19. polars/_utils/construction/utils.py +118 -0
  20. polars/_utils/convert.py +224 -0
  21. polars/_utils/deprecation.py +406 -0
  22. polars/_utils/getitem.py +457 -0
  23. polars/_utils/logging.py +11 -0
  24. polars/_utils/nest_asyncio.py +264 -0
  25. polars/_utils/parquet.py +15 -0
  26. polars/_utils/parse/__init__.py +12 -0
  27. polars/_utils/parse/expr.py +242 -0
  28. polars/_utils/polars_version.py +19 -0
  29. polars/_utils/pycapsule.py +53 -0
  30. polars/_utils/scan.py +27 -0
  31. polars/_utils/serde.py +63 -0
  32. polars/_utils/slice.py +215 -0
  33. polars/_utils/udfs.py +1251 -0
  34. polars/_utils/unstable.py +63 -0
  35. polars/_utils/various.py +782 -0
  36. polars/_utils/wrap.py +25 -0
  37. polars/api.py +370 -0
  38. polars/catalog/__init__.py +0 -0
  39. polars/catalog/unity/__init__.py +19 -0
  40. polars/catalog/unity/client.py +733 -0
  41. polars/catalog/unity/models.py +152 -0
  42. polars/config.py +1571 -0
  43. polars/convert/__init__.py +25 -0
  44. polars/convert/general.py +1046 -0
  45. polars/convert/normalize.py +261 -0
  46. polars/dataframe/__init__.py +5 -0
  47. polars/dataframe/_html.py +186 -0
  48. polars/dataframe/frame.py +12582 -0
  49. polars/dataframe/group_by.py +1067 -0
  50. polars/dataframe/plotting.py +257 -0
  51. polars/datatype_expr/__init__.py +5 -0
  52. polars/datatype_expr/array.py +56 -0
  53. polars/datatype_expr/datatype_expr.py +304 -0
  54. polars/datatype_expr/list.py +18 -0
  55. polars/datatype_expr/struct.py +69 -0
  56. polars/datatypes/__init__.py +122 -0
  57. polars/datatypes/_parse.py +195 -0
  58. polars/datatypes/_utils.py +48 -0
  59. polars/datatypes/classes.py +1213 -0
  60. polars/datatypes/constants.py +11 -0
  61. polars/datatypes/constructor.py +172 -0
  62. polars/datatypes/convert.py +366 -0
  63. polars/datatypes/group.py +130 -0
  64. polars/exceptions.py +230 -0
  65. polars/expr/__init__.py +7 -0
  66. polars/expr/array.py +964 -0
  67. polars/expr/binary.py +346 -0
  68. polars/expr/categorical.py +306 -0
  69. polars/expr/datetime.py +2620 -0
  70. polars/expr/expr.py +11272 -0
  71. polars/expr/list.py +1408 -0
  72. polars/expr/meta.py +444 -0
  73. polars/expr/name.py +321 -0
  74. polars/expr/string.py +3045 -0
  75. polars/expr/struct.py +357 -0
  76. polars/expr/whenthen.py +185 -0
  77. polars/functions/__init__.py +193 -0
  78. polars/functions/aggregation/__init__.py +33 -0
  79. polars/functions/aggregation/horizontal.py +298 -0
  80. polars/functions/aggregation/vertical.py +341 -0
  81. polars/functions/as_datatype.py +848 -0
  82. polars/functions/business.py +138 -0
  83. polars/functions/col.py +384 -0
  84. polars/functions/datatype.py +121 -0
  85. polars/functions/eager.py +524 -0
  86. polars/functions/escape_regex.py +29 -0
  87. polars/functions/lazy.py +2751 -0
  88. polars/functions/len.py +68 -0
  89. polars/functions/lit.py +210 -0
  90. polars/functions/random.py +22 -0
  91. polars/functions/range/__init__.py +19 -0
  92. polars/functions/range/_utils.py +15 -0
  93. polars/functions/range/date_range.py +303 -0
  94. polars/functions/range/datetime_range.py +370 -0
  95. polars/functions/range/int_range.py +348 -0
  96. polars/functions/range/linear_space.py +311 -0
  97. polars/functions/range/time_range.py +287 -0
  98. polars/functions/repeat.py +301 -0
  99. polars/functions/whenthen.py +353 -0
  100. polars/interchange/__init__.py +10 -0
  101. polars/interchange/buffer.py +77 -0
  102. polars/interchange/column.py +190 -0
  103. polars/interchange/dataframe.py +230 -0
  104. polars/interchange/from_dataframe.py +328 -0
  105. polars/interchange/protocol.py +303 -0
  106. polars/interchange/utils.py +170 -0
  107. polars/io/__init__.py +64 -0
  108. polars/io/_utils.py +317 -0
  109. polars/io/avro.py +49 -0
  110. polars/io/clipboard.py +36 -0
  111. polars/io/cloud/__init__.py +17 -0
  112. polars/io/cloud/_utils.py +80 -0
  113. polars/io/cloud/credential_provider/__init__.py +17 -0
  114. polars/io/cloud/credential_provider/_builder.py +520 -0
  115. polars/io/cloud/credential_provider/_providers.py +618 -0
  116. polars/io/csv/__init__.py +9 -0
  117. polars/io/csv/_utils.py +38 -0
  118. polars/io/csv/batched_reader.py +142 -0
  119. polars/io/csv/functions.py +1495 -0
  120. polars/io/database/__init__.py +6 -0
  121. polars/io/database/_arrow_registry.py +70 -0
  122. polars/io/database/_cursor_proxies.py +147 -0
  123. polars/io/database/_executor.py +578 -0
  124. polars/io/database/_inference.py +314 -0
  125. polars/io/database/_utils.py +144 -0
  126. polars/io/database/functions.py +516 -0
  127. polars/io/delta.py +499 -0
  128. polars/io/iceberg/__init__.py +3 -0
  129. polars/io/iceberg/_utils.py +697 -0
  130. polars/io/iceberg/dataset.py +556 -0
  131. polars/io/iceberg/functions.py +151 -0
  132. polars/io/ipc/__init__.py +8 -0
  133. polars/io/ipc/functions.py +514 -0
  134. polars/io/json/__init__.py +3 -0
  135. polars/io/json/read.py +101 -0
  136. polars/io/ndjson.py +332 -0
  137. polars/io/parquet/__init__.py +17 -0
  138. polars/io/parquet/field_overwrites.py +140 -0
  139. polars/io/parquet/functions.py +722 -0
  140. polars/io/partition.py +491 -0
  141. polars/io/plugins.py +187 -0
  142. polars/io/pyarrow_dataset/__init__.py +5 -0
  143. polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
  144. polars/io/pyarrow_dataset/functions.py +79 -0
  145. polars/io/scan_options/__init__.py +5 -0
  146. polars/io/scan_options/_options.py +59 -0
  147. polars/io/scan_options/cast_options.py +126 -0
  148. polars/io/spreadsheet/__init__.py +6 -0
  149. polars/io/spreadsheet/_utils.py +52 -0
  150. polars/io/spreadsheet/_write_utils.py +647 -0
  151. polars/io/spreadsheet/functions.py +1323 -0
  152. polars/lazyframe/__init__.py +9 -0
  153. polars/lazyframe/engine_config.py +61 -0
  154. polars/lazyframe/frame.py +8564 -0
  155. polars/lazyframe/group_by.py +669 -0
  156. polars/lazyframe/in_process.py +42 -0
  157. polars/lazyframe/opt_flags.py +333 -0
  158. polars/meta/__init__.py +14 -0
  159. polars/meta/build.py +33 -0
  160. polars/meta/index_type.py +27 -0
  161. polars/meta/thread_pool.py +50 -0
  162. polars/meta/versions.py +120 -0
  163. polars/ml/__init__.py +0 -0
  164. polars/ml/torch.py +213 -0
  165. polars/ml/utilities.py +30 -0
  166. polars/plugins.py +155 -0
  167. polars/py.typed +0 -0
  168. polars/pyproject.toml +96 -0
  169. polars/schema.py +265 -0
  170. polars/selectors.py +3117 -0
  171. polars/series/__init__.py +5 -0
  172. polars/series/array.py +776 -0
  173. polars/series/binary.py +254 -0
  174. polars/series/categorical.py +246 -0
  175. polars/series/datetime.py +2275 -0
  176. polars/series/list.py +1087 -0
  177. polars/series/plotting.py +191 -0
  178. polars/series/series.py +9197 -0
  179. polars/series/string.py +2367 -0
  180. polars/series/struct.py +154 -0
  181. polars/series/utils.py +191 -0
  182. polars/sql/__init__.py +7 -0
  183. polars/sql/context.py +677 -0
  184. polars/sql/functions.py +139 -0
  185. polars/string_cache.py +185 -0
  186. polars/testing/__init__.py +13 -0
  187. polars/testing/asserts/__init__.py +9 -0
  188. polars/testing/asserts/frame.py +231 -0
  189. polars/testing/asserts/series.py +219 -0
  190. polars/testing/asserts/utils.py +12 -0
  191. polars/testing/parametric/__init__.py +33 -0
  192. polars/testing/parametric/profiles.py +107 -0
  193. polars/testing/parametric/strategies/__init__.py +22 -0
  194. polars/testing/parametric/strategies/_utils.py +14 -0
  195. polars/testing/parametric/strategies/core.py +615 -0
  196. polars/testing/parametric/strategies/data.py +452 -0
  197. polars/testing/parametric/strategies/dtype.py +436 -0
  198. polars/testing/parametric/strategies/legacy.py +169 -0
  199. polars/type_aliases.py +24 -0
  200. polars_runtime_compat-1.34.0b2.dist-info/METADATA +31 -0
  201. polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
  202. polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
  203. polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +1 -0
@@ -0,0 +1,647 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Sequence
4
+ from io import BytesIO
5
+ from os import PathLike
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING, Any, overload
8
+
9
+ from polars import functions as F
10
+ from polars._dependencies import json
11
+ from polars._utils.various import qualified_type_name
12
+ from polars.datatypes import (
13
+ Date,
14
+ Datetime,
15
+ Float64,
16
+ Int64,
17
+ Time,
18
+ )
19
+ from polars.datatypes.group import FLOAT_DTYPES, INTEGER_DTYPES
20
+ from polars.exceptions import DuplicateError
21
+ from polars.selectors import _expand_selector_dicts, _expand_selectors, numeric
22
+
23
+ if TYPE_CHECKING:
24
+ from collections.abc import Iterable
25
+ from typing import Literal
26
+
27
+ from xlsxwriter import Workbook
28
+ from xlsxwriter.format import Format
29
+ from xlsxwriter.worksheet import Worksheet
30
+
31
+ from polars import DataFrame, Schema, Series
32
+ from polars._typing import (
33
+ ColumnFormatDict,
34
+ ColumnTotalsDefinition,
35
+ ConditionalFormatDict,
36
+ OneOrMoreDataTypes,
37
+ PolarsDataType,
38
+ RowTotalsDefinition,
39
+ )
40
+ from polars.expr import Expr
41
+
42
+
43
+ def _cluster(iterable: Iterable[Any], n: int = 2) -> Iterable[Any]:
44
+ return zip(*[iter(iterable)] * n)
45
+
46
+
47
+ _XL_DEFAULT_FLOAT_FORMAT_ = "#,##0.000;[Red]-#,##0.000"
48
+ _XL_DEFAULT_INTEGER_FORMAT_ = "#,##0;[Red]-#,##0"
49
+ _XL_DEFAULT_DTYPE_FORMATS_: dict[PolarsDataType, str] = {
50
+ Datetime: "yyyy-mm-dd hh:mm:ss",
51
+ Date: "yyyy-mm-dd;@",
52
+ Time: "hh:mm:ss;@",
53
+ }
54
+
55
+
56
+ class _XLFormatCache:
57
+ """Create/cache only one Format object per distinct set of format options."""
58
+
59
+ def __init__(self, wb: Workbook) -> None:
60
+ self._cache: dict[str, Format] = {}
61
+ self.wb = wb
62
+
63
+ @staticmethod
64
+ def _key(fmt: dict[str, Any]) -> str:
65
+ return json.dumps(fmt, sort_keys=True, default=str)
66
+
67
+ def get(self, fmt: dict[str, Any] | Format) -> Format:
68
+ if not isinstance(fmt, dict):
69
+ wbfmt = fmt
70
+ else:
71
+ key = self._key(fmt)
72
+ wbfmt = self._cache.get(key)
73
+ if wbfmt is None:
74
+ wbfmt = self.wb.add_format(fmt)
75
+ self._cache[key] = wbfmt
76
+ return wbfmt
77
+
78
+
79
+ def _adjacent_cols(df: DataFrame, cols: Iterable[str], min_max: dict[str, Any]) -> bool:
80
+ """Indicate if the given columns are all adjacent to one another."""
81
+ idxs = sorted(df.get_column_index(col) for col in cols)
82
+ if idxs != sorted(range(min(idxs), max(idxs) + 1)):
83
+ return False
84
+ else:
85
+ columns = df.columns
86
+ min_max["min"] = {"idx": idxs[0], "name": columns[idxs[0]]}
87
+ min_max["max"] = {"idx": idxs[-1], "name": columns[idxs[-1]]}
88
+ return True
89
+
90
+
91
+ def _all_integer_cols(cols: Iterable[str], schema: Schema) -> bool:
92
+ """Indicate if the given columns are all integer-typed."""
93
+ return all(schema[col].is_integer() for col in cols)
94
+
95
+
96
+ def _unpack_multi_column_dict(
97
+ d: dict[str | Sequence[str], Any] | Any,
98
+ ) -> dict[str, Any] | Any:
99
+ """Unpack multi-col dictionary into equivalent single-col definitions."""
100
+ if not isinstance(d, dict):
101
+ return d
102
+ unpacked: dict[str, Any] = {}
103
+ for key, value in d.items():
104
+ if isinstance(key, str) or not isinstance(key, Sequence):
105
+ key = (key,)
106
+ for k in key:
107
+ unpacked[k] = value
108
+ return unpacked
109
+
110
+
111
+ def _xl_apply_conditional_formats(
112
+ df: DataFrame,
113
+ ws: Worksheet,
114
+ *,
115
+ conditional_formats: ConditionalFormatDict,
116
+ table_start: tuple[int, int],
117
+ include_header: bool,
118
+ format_cache: _XLFormatCache,
119
+ ) -> None:
120
+ """Take all conditional formatting options and apply them to the table/range."""
121
+ from xlsxwriter.format import Format
122
+
123
+ for cols, formats in _expand_selector_dicts(
124
+ df, conditional_formats, expand_keys=True, expand_values=False, tuple_keys=True
125
+ ).items():
126
+ if not isinstance(cols, str) and len(cols) == 1:
127
+ cols = next(iter(cols))
128
+ if isinstance(formats, (str, dict)):
129
+ formats = [formats]
130
+
131
+ for fmt in formats:
132
+ if not isinstance(fmt, dict):
133
+ fmt = {"type": fmt}
134
+ if isinstance(cols, str):
135
+ col_range = _xl_column_range(
136
+ df, table_start, cols, include_header=include_header
137
+ )
138
+ else:
139
+ col_range = _xl_column_multi_range(
140
+ df, table_start, cols, include_header=include_header
141
+ )
142
+ if " " in col_range:
143
+ col = next(iter(cols))
144
+ fmt["multi_range"] = col_range
145
+ col_range = _xl_column_range(
146
+ df, table_start, col, include_header=include_header
147
+ )
148
+
149
+ if "format" in fmt:
150
+ f = fmt["format"]
151
+ fmt["format"] = (
152
+ f # already registered
153
+ if isinstance(f, Format)
154
+ else format_cache.get(
155
+ {"num_format": f} if isinstance(f, str) else f
156
+ )
157
+ )
158
+ ws.conditional_format(col_range, fmt)
159
+
160
+
161
+ @overload
162
+ def _xl_column_range(
163
+ df: DataFrame,
164
+ table_start: tuple[int, int],
165
+ col: str | tuple[int, int],
166
+ *,
167
+ include_header: bool,
168
+ as_range: Literal[True] = ...,
169
+ ) -> str: ...
170
+
171
+
172
+ @overload
173
+ def _xl_column_range(
174
+ df: DataFrame,
175
+ table_start: tuple[int, int],
176
+ col: str | tuple[int, int],
177
+ *,
178
+ include_header: bool,
179
+ as_range: Literal[False],
180
+ ) -> tuple[int, int, int, int]: ...
181
+
182
+
183
+ def _xl_column_range(
184
+ df: DataFrame,
185
+ table_start: tuple[int, int],
186
+ col: str | tuple[int, int],
187
+ *,
188
+ include_header: bool,
189
+ as_range: bool = True,
190
+ ) -> tuple[int, int, int, int] | str:
191
+ """Return the Excel sheet range of a named column, accounting for all offsets."""
192
+ col_start = (
193
+ table_start[0] + int(include_header),
194
+ table_start[1] + (df.get_column_index(col) if isinstance(col, str) else col[0]),
195
+ )
196
+ col_finish = (
197
+ col_start[0] + df.height - 1,
198
+ col_start[1] + (0 if isinstance(col, str) else (col[1] - col[0])),
199
+ )
200
+ if as_range:
201
+ return "".join(_xl_rowcols_to_range(*col_start, *col_finish))
202
+ else:
203
+ return col_start + col_finish
204
+
205
+
206
+ def _xl_column_multi_range(
207
+ df: DataFrame,
208
+ table_start: tuple[int, int],
209
+ cols: Iterable[str],
210
+ *,
211
+ include_header: bool,
212
+ ) -> str:
213
+ """Return column ranges as an xlsxwriter 'multi_range' string, or spanning range."""
214
+ m: dict[str, Any] = {}
215
+ if _adjacent_cols(df, cols, min_max=m):
216
+ return _xl_column_range(
217
+ df,
218
+ table_start,
219
+ (m["min"]["idx"], m["max"]["idx"]),
220
+ include_header=include_header,
221
+ )
222
+ return " ".join(
223
+ _xl_column_range(df, table_start, col, include_header=include_header)
224
+ for col in cols
225
+ )
226
+
227
+
228
+ def _xl_inject_dummy_table_columns(
229
+ df: DataFrame,
230
+ coldefs: dict[str, Any],
231
+ *,
232
+ dtype: dict[str, PolarsDataType] | PolarsDataType | None = None,
233
+ expr: Expr | None = None,
234
+ ) -> DataFrame:
235
+ """Insert dummy frame columns in order to create empty/named table columns."""
236
+ df_original_columns = set(df.columns)
237
+ df_select_cols = df.columns.copy()
238
+ cast_lookup = {}
239
+
240
+ for col, definition in coldefs.items():
241
+ if col in df_original_columns:
242
+ msg = f"cannot create a second {col!r} column"
243
+ raise DuplicateError(msg)
244
+ elif not isinstance(definition, dict):
245
+ df_select_cols.append(col)
246
+ else:
247
+ cast_lookup[col] = definition.get("return_dtype")
248
+ insert_before = definition.get("insert_before")
249
+ insert_after = definition.get("insert_after")
250
+
251
+ if insert_after is None and insert_before is None:
252
+ df_select_cols.append(col)
253
+ else:
254
+ insert_idx = (
255
+ df_select_cols.index(insert_after) + 1 # type: ignore[arg-type]
256
+ if insert_before is None
257
+ else df_select_cols.index(insert_before)
258
+ )
259
+ df_select_cols.insert(insert_idx, col)
260
+
261
+ expr = F.lit(None) if expr is None else expr
262
+ df = df.select(
263
+ (
264
+ col
265
+ if col in df_original_columns
266
+ else (
267
+ expr.cast(
268
+ cast_lookup.get( # type:ignore[arg-type]
269
+ col,
270
+ dtype.get(col, Float64) if isinstance(dtype, dict) else dtype,
271
+ )
272
+ )
273
+ if dtype or (cast_lookup.get(col) is not None)
274
+ else expr
275
+ ).alias(col)
276
+ )
277
+ for col in df_select_cols
278
+ )
279
+ return df
280
+
281
+
282
+ def _xl_inject_sparklines(
283
+ ws: Worksheet,
284
+ df: DataFrame,
285
+ table_start: tuple[int, int],
286
+ col: str,
287
+ *,
288
+ include_header: bool,
289
+ params: Sequence[str] | dict[str, Any],
290
+ ) -> None:
291
+ """Inject sparklines into (previously-created) empty table columns."""
292
+ from xlsxwriter.utility import xl_rowcol_to_cell
293
+
294
+ m: dict[str, Any] = {}
295
+ data_cols = params.get("columns") if isinstance(params, dict) else params
296
+ if not data_cols:
297
+ msg = "supplying 'columns' param value is mandatory for sparklines"
298
+ raise ValueError(msg)
299
+ elif not _adjacent_cols(df, data_cols, min_max=m):
300
+ msg = "sparkline data range/cols must all be adjacent"
301
+ raise RuntimeError(msg)
302
+
303
+ spk_row, spk_col, _, _ = _xl_column_range(
304
+ df, table_start, col, include_header=include_header, as_range=False
305
+ )
306
+ data_start_col = table_start[1] + m["min"]["idx"]
307
+ data_end_col = table_start[1] + m["max"]["idx"]
308
+
309
+ if not isinstance(params, dict):
310
+ options = {}
311
+ else:
312
+ # strip polars-specific params before passing to xlsxwriter
313
+ options = {
314
+ name: val
315
+ for name, val in params.items()
316
+ if name not in ("columns", "insert_after", "insert_before")
317
+ }
318
+ if "negative_points" not in options:
319
+ options["negative_points"] = options.get("type") in ("column", "win_loss")
320
+
321
+ for _ in range(df.height):
322
+ data_start = xl_rowcol_to_cell(spk_row, data_start_col)
323
+ data_end = xl_rowcol_to_cell(spk_row, data_end_col)
324
+ options["range"] = f"{data_start}:{data_end}"
325
+ ws.add_sparkline(spk_row, spk_col, options)
326
+ spk_row += 1
327
+
328
+
329
+ def _xl_rowcols_to_range(*row_col_pairs: int) -> list[str]:
330
+ """Return list of "A1:B2" range refs from pairs of row/col indexes."""
331
+ from xlsxwriter.utility import xl_rowcol_to_cell
332
+
333
+ cell_refs = (xl_rowcol_to_cell(row, col) for row, col in _cluster(row_col_pairs))
334
+ return [f"{cell_start}:{cell_end}" for cell_start, cell_end in _cluster(cell_refs)]
335
+
336
+
337
+ def _xl_setup_table_columns(
338
+ df: DataFrame,
339
+ format_cache: _XLFormatCache,
340
+ column_totals: ColumnTotalsDefinition | None = None,
341
+ column_formats: ColumnFormatDict | None = None,
342
+ dtype_formats: dict[OneOrMoreDataTypes, str] | None = None,
343
+ header_format: dict[str, Any] | None = None,
344
+ sparklines: dict[str, Sequence[str] | dict[str, Any]] | None = None,
345
+ formulas: dict[str, str | dict[str, str]] | None = None,
346
+ row_totals: RowTotalsDefinition | None = None,
347
+ float_precision: int = 3,
348
+ table_style: dict[str, Any] | str | None = None,
349
+ ) -> tuple[list[dict[str, Any]], dict[str | tuple[str, ...], str], DataFrame]:
350
+ """Setup and unify all column-related formatting/defaults."""
351
+
352
+ # no excel support for compound types; cast to their simple string representation
353
+ def _map_str(s: Series) -> Series:
354
+ return s.__class__(
355
+ s.name, [(None if v is None else str(v)) for v in s.to_list()]
356
+ )
357
+
358
+ cast_cols = [
359
+ F.col(col).map_batches(_map_str).alias(col)
360
+ for col, tp in df.schema.items()
361
+ if tp.is_nested() or tp.is_object()
362
+ ]
363
+ if cast_cols:
364
+ df = df.with_columns(cast_cols)
365
+
366
+ # expand/normalise column formats
367
+ column_formats = _unpack_multi_column_dict( # type: ignore[assignment]
368
+ _expand_selector_dicts(
369
+ df, column_formats, expand_keys=True, expand_values=False, tuple_keys=True
370
+ )
371
+ )
372
+
373
+ # normalise row totals
374
+ if not row_totals:
375
+ row_totals_dtype = None
376
+ row_total_funcs = {}
377
+ else:
378
+ schema = df.schema
379
+ numeric_cols = {col for col, tp in schema.items() if tp.is_numeric()}
380
+ if not isinstance(row_totals, dict):
381
+ row_totals_dtype = (
382
+ Int64 if _all_integer_cols(numeric_cols, schema) else Float64
383
+ )
384
+ sum_cols = (
385
+ numeric_cols
386
+ if row_totals is True
387
+ else (
388
+ {row_totals}
389
+ if isinstance(row_totals, str)
390
+ else set(_expand_selectors(df, row_totals))
391
+ )
392
+ )
393
+ n_ucase = sum((c[0] if c else "").isupper() for c in df.columns)
394
+ total = f"{'T' if (n_ucase > df.width // 2) else 't'}otal"
395
+ row_total_funcs = {total: _xl_table_formula(df, sum_cols, "sum")}
396
+ row_totals = [total]
397
+ else:
398
+ row_totals = _expand_selector_dicts(
399
+ df, row_totals, expand_keys=False, expand_values=True
400
+ )
401
+ row_totals_dtype = { # type: ignore[assignment]
402
+ nm: (
403
+ Int64
404
+ if _all_integer_cols(numeric_cols if cols is True else cols, schema)
405
+ else Float64
406
+ )
407
+ for nm, cols in row_totals.items()
408
+ }
409
+ row_total_funcs = {
410
+ name: _xl_table_formula(
411
+ df, (numeric_cols if cols is True else cols), "sum"
412
+ )
413
+ for name, cols in row_totals.items()
414
+ }
415
+
416
+ # expand/normalise column totals
417
+ if column_totals is True:
418
+ column_totals = {numeric(): "sum", **dict.fromkeys(row_totals or (), "sum")}
419
+ elif isinstance(column_totals, str):
420
+ fn = column_totals.lower()
421
+ column_totals = {numeric(): fn, **dict.fromkeys(row_totals or (), fn)}
422
+
423
+ column_totals = _unpack_multi_column_dict( # type: ignore[assignment]
424
+ _expand_selector_dicts(df, column_totals, expand_keys=True, expand_values=False)
425
+ if isinstance(column_totals, dict)
426
+ else _expand_selectors(df, column_totals)
427
+ )
428
+ column_total_funcs = (
429
+ dict.fromkeys(column_totals, "sum")
430
+ if isinstance(column_totals, Sequence)
431
+ else (column_totals.copy() if isinstance(column_totals, dict) else {})
432
+ )
433
+
434
+ # normalise formulas
435
+ column_formulas = {
436
+ col: {"formula": options} if isinstance(options, str) else options
437
+ for col, options in (formulas or {}).items()
438
+ }
439
+
440
+ # normalise formats
441
+ column_formats = dict(column_formats or {})
442
+ dtype_formats = dict(dtype_formats or {})
443
+
444
+ for tp in list(dtype_formats):
445
+ if isinstance(tp, (tuple, frozenset)):
446
+ dtype_formats.update(dict.fromkeys(tp, dtype_formats.pop(tp)))
447
+ for fmt in dtype_formats.values():
448
+ if not isinstance(fmt, str):
449
+ msg = f"invalid dtype_format value: {fmt!r} (expected format string, got {qualified_type_name(fmt)!r})"
450
+ raise TypeError(msg)
451
+
452
+ # inject sparkline/row-total placeholder(s)
453
+ if sparklines:
454
+ df = _xl_inject_dummy_table_columns(df, sparklines)
455
+ if column_formulas:
456
+ df = _xl_inject_dummy_table_columns(df, column_formulas)
457
+ if row_totals:
458
+ df = _xl_inject_dummy_table_columns(df, row_total_funcs, dtype=row_totals_dtype)
459
+
460
+ # seed format cache with default fallback format
461
+ fmt_default = format_cache.get({"valign": "vcenter"})
462
+
463
+ if table_style is None:
464
+ # no table style; apply default black (+ve) & red (-ve) numeric formatting
465
+ int_base_fmt = _XL_DEFAULT_INTEGER_FORMAT_
466
+ flt_base_fmt = _XL_DEFAULT_FLOAT_FORMAT_
467
+ else:
468
+ # if we have a table style, defer the colours to that style
469
+ int_base_fmt = _XL_DEFAULT_INTEGER_FORMAT_.split(";", 1)[0]
470
+ flt_base_fmt = _XL_DEFAULT_FLOAT_FORMAT_.split(";", 1)[0]
471
+
472
+ for tp in INTEGER_DTYPES:
473
+ _XL_DEFAULT_DTYPE_FORMATS_[tp] = int_base_fmt
474
+
475
+ zeros = "0" * float_precision
476
+ fmt_float = int_base_fmt if not zeros else flt_base_fmt.replace(".000", f".{zeros}")
477
+
478
+ # assign default dtype formats
479
+ for tp, fmt in _XL_DEFAULT_DTYPE_FORMATS_.items():
480
+ dtype_formats.setdefault(tp, fmt)
481
+ for tp in FLOAT_DTYPES:
482
+ dtype_formats.setdefault(tp, fmt_float)
483
+ for tp, fmt in dtype_formats.items():
484
+ dtype_formats[tp] = fmt
485
+
486
+ # associate formats/functions with specific columns
487
+ for col, tp in df.schema.items():
488
+ base_type = tp.base_type()
489
+ if base_type in dtype_formats:
490
+ fmt = dtype_formats.get(tp, dtype_formats[base_type])
491
+ column_formats.setdefault(col, fmt)
492
+ if col not in column_formats:
493
+ column_formats[col] = fmt_default
494
+
495
+ # ensure externally supplied formats are made available
496
+ for col, fmt in column_formats.items(): # type: ignore[assignment]
497
+ if isinstance(fmt, str):
498
+ column_formats[col] = format_cache.get(
499
+ {"num_format": fmt, "valign": "vcenter"}
500
+ )
501
+ elif isinstance(fmt, dict):
502
+ if "num_format" not in fmt:
503
+ tp = df.schema.get(col)
504
+ if tp in dtype_formats:
505
+ fmt["num_format"] = dtype_formats[tp]
506
+ if "valign" not in fmt:
507
+ fmt["valign"] = "vcenter"
508
+ column_formats[col] = format_cache.get(fmt)
509
+
510
+ # optional custom header format
511
+ col_header_format = format_cache.get(header_format) if header_format else None
512
+
513
+ # assemble table columns
514
+ table_columns = [
515
+ {
516
+ k: v
517
+ for k, v in {
518
+ "header": col,
519
+ "format": column_formats[col],
520
+ "header_format": col_header_format,
521
+ "total_function": column_total_funcs.get(col),
522
+ "formula": (
523
+ row_total_funcs.get(col)
524
+ or column_formulas.get(col, {}).get("formula")
525
+ ),
526
+ }.items()
527
+ if v is not None
528
+ }
529
+ for col in df.columns
530
+ ]
531
+ return table_columns, column_formats, df # type: ignore[return-value]
532
+
533
+
534
+ def _xl_setup_table_options(
535
+ table_style: dict[str, Any] | str | None,
536
+ ) -> tuple[dict[str, Any] | str | None, dict[str, Any]]:
537
+ """Setup table options, distinguishing style name from other formatting."""
538
+ if isinstance(table_style, dict):
539
+ valid_options = (
540
+ "style",
541
+ "banded_columns",
542
+ "banded_rows",
543
+ "first_column",
544
+ "last_column",
545
+ )
546
+ for key in table_style:
547
+ if key not in valid_options:
548
+ msg = f"invalid table style key: {key!r}"
549
+ raise ValueError(msg)
550
+
551
+ table_options = table_style.copy()
552
+ table_style = table_options.pop("style", None)
553
+ else:
554
+ table_options = {}
555
+
556
+ return table_style, table_options
557
+
558
+
559
+ def _xl_worksheet_in_workbook(
560
+ wb: Workbook, ws: Worksheet, *, return_worksheet: bool = False
561
+ ) -> bool | Worksheet:
562
+ if any(ws is sheet for sheet in wb.worksheets()):
563
+ return ws if return_worksheet else True
564
+ msg = f"the given workbook object {wb.filename!r} is not the parent of worksheet {ws.name!r}"
565
+ raise ValueError(msg)
566
+
567
+
568
+ def _xl_setup_workbook(
569
+ workbook: Workbook | BytesIO | Path | str | None,
570
+ worksheet: str | Worksheet | None = None,
571
+ ) -> tuple[Workbook, Worksheet, bool]:
572
+ """Establish the target Excel workbook and worksheet."""
573
+ from xlsxwriter import Workbook
574
+ from xlsxwriter.worksheet import Worksheet
575
+
576
+ if isinstance(workbook, Workbook):
577
+ wb, can_close = workbook, False
578
+ ws = (
579
+ worksheet
580
+ if (
581
+ isinstance(worksheet, Worksheet)
582
+ and _xl_worksheet_in_workbook(wb, worksheet)
583
+ )
584
+ else wb.get_worksheet_by_name(name=worksheet)
585
+ )
586
+ elif isinstance(worksheet, Worksheet):
587
+ msg = f"worksheet object requires the parent workbook object; found workbook={workbook!r}"
588
+ raise TypeError(msg)
589
+ else:
590
+ workbook_options = {
591
+ "nan_inf_to_errors": True,
592
+ "strings_to_formulas": False,
593
+ "default_date_format": _XL_DEFAULT_DTYPE_FORMATS_[Date],
594
+ }
595
+ if isinstance(workbook, BytesIO):
596
+ wb, ws, can_close = Workbook(workbook, workbook_options), None, True
597
+ else:
598
+ if workbook is None:
599
+ file = Path("dataframe.xlsx")
600
+ elif isinstance(workbook, str):
601
+ file = Path(workbook)
602
+ else:
603
+ file = workbook
604
+
605
+ if isinstance(file, PathLike):
606
+ file = (
607
+ (file if file.suffix else file.with_suffix(".xlsx"))
608
+ .expanduser()
609
+ .resolve(strict=False)
610
+ )
611
+ wb = Workbook(file, workbook_options)
612
+ ws, can_close = None, True
613
+
614
+ if ws is None:
615
+ if isinstance(worksheet, Worksheet):
616
+ ws = _xl_worksheet_in_workbook(wb, worksheet, return_worksheet=True)
617
+ else:
618
+ ws = wb.add_worksheet(name=worksheet)
619
+ return wb, ws, can_close
620
+
621
+
622
+ def _xl_table_formula(df: DataFrame, cols: Iterable[str], func: str) -> str:
623
+ """Return a formula using structured references to columns in a named table."""
624
+ m: dict[str, Any] = {}
625
+ if isinstance(cols, str):
626
+ cols = [cols]
627
+ if _adjacent_cols(df, cols, min_max=m):
628
+ return f"={func.upper()}([@[{m['min']['name']}]:[{m['max']['name']}]])"
629
+ else:
630
+ colrefs = ",".join(f"[@[{c}]]" for c in cols)
631
+ return f"={func.upper()}({colrefs})"
632
+
633
+
634
+ def _xl_unique_table_name(wb: Workbook) -> str:
635
+ """Establish a unique (per-workbook) table object name."""
636
+ table_prefix = "Frame"
637
+ polars_tables: set[str] = set()
638
+ for ws in wb.worksheets():
639
+ polars_tables.update(
640
+ tbl["name"] for tbl in ws.tables if tbl["name"].startswith(table_prefix)
641
+ )
642
+ n = len(polars_tables)
643
+ table_name = f"{table_prefix}{n}"
644
+ while table_name in polars_tables:
645
+ n += 1
646
+ table_name = f"{table_prefix}{n}"
647
+ return table_name