polars-runtime-compat 1.34.0b3__cp39-abi3-manylinux_2_24_aarch64.whl → 1.34.0b4__cp39-abi3-manylinux_2_24_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/METADATA +1 -1
- polars_runtime_compat-1.34.0b4.dist-info/RECORD +6 -0
- polars/__init__.py +0 -528
- polars/_cpu_check.py +0 -265
- polars/_dependencies.py +0 -355
- polars/_plr.py +0 -99
- polars/_plr.pyi +0 -2496
- polars/_reexport.py +0 -23
- polars/_typing.py +0 -478
- polars/_utils/__init__.py +0 -37
- polars/_utils/async_.py +0 -102
- polars/_utils/cache.py +0 -176
- polars/_utils/cloud.py +0 -40
- polars/_utils/constants.py +0 -29
- polars/_utils/construction/__init__.py +0 -46
- polars/_utils/construction/dataframe.py +0 -1397
- polars/_utils/construction/other.py +0 -72
- polars/_utils/construction/series.py +0 -560
- polars/_utils/construction/utils.py +0 -118
- polars/_utils/convert.py +0 -224
- polars/_utils/deprecation.py +0 -406
- polars/_utils/getitem.py +0 -457
- polars/_utils/logging.py +0 -11
- polars/_utils/nest_asyncio.py +0 -264
- polars/_utils/parquet.py +0 -15
- polars/_utils/parse/__init__.py +0 -12
- polars/_utils/parse/expr.py +0 -242
- polars/_utils/polars_version.py +0 -19
- polars/_utils/pycapsule.py +0 -53
- polars/_utils/scan.py +0 -27
- polars/_utils/serde.py +0 -63
- polars/_utils/slice.py +0 -215
- polars/_utils/udfs.py +0 -1251
- polars/_utils/unstable.py +0 -63
- polars/_utils/various.py +0 -782
- polars/_utils/wrap.py +0 -25
- polars/api.py +0 -370
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +0 -19
- polars/catalog/unity/client.py +0 -733
- polars/catalog/unity/models.py +0 -152
- polars/config.py +0 -1571
- polars/convert/__init__.py +0 -25
- polars/convert/general.py +0 -1046
- polars/convert/normalize.py +0 -261
- polars/dataframe/__init__.py +0 -5
- polars/dataframe/_html.py +0 -186
- polars/dataframe/frame.py +0 -12582
- polars/dataframe/group_by.py +0 -1067
- polars/dataframe/plotting.py +0 -257
- polars/datatype_expr/__init__.py +0 -5
- polars/datatype_expr/array.py +0 -56
- polars/datatype_expr/datatype_expr.py +0 -304
- polars/datatype_expr/list.py +0 -18
- polars/datatype_expr/struct.py +0 -69
- polars/datatypes/__init__.py +0 -122
- polars/datatypes/_parse.py +0 -195
- polars/datatypes/_utils.py +0 -48
- polars/datatypes/classes.py +0 -1213
- polars/datatypes/constants.py +0 -11
- polars/datatypes/constructor.py +0 -172
- polars/datatypes/convert.py +0 -366
- polars/datatypes/group.py +0 -130
- polars/exceptions.py +0 -230
- polars/expr/__init__.py +0 -7
- polars/expr/array.py +0 -964
- polars/expr/binary.py +0 -346
- polars/expr/categorical.py +0 -306
- polars/expr/datetime.py +0 -2620
- polars/expr/expr.py +0 -11272
- polars/expr/list.py +0 -1408
- polars/expr/meta.py +0 -444
- polars/expr/name.py +0 -321
- polars/expr/string.py +0 -3045
- polars/expr/struct.py +0 -357
- polars/expr/whenthen.py +0 -185
- polars/functions/__init__.py +0 -193
- polars/functions/aggregation/__init__.py +0 -33
- polars/functions/aggregation/horizontal.py +0 -298
- polars/functions/aggregation/vertical.py +0 -341
- polars/functions/as_datatype.py +0 -848
- polars/functions/business.py +0 -138
- polars/functions/col.py +0 -384
- polars/functions/datatype.py +0 -121
- polars/functions/eager.py +0 -524
- polars/functions/escape_regex.py +0 -29
- polars/functions/lazy.py +0 -2751
- polars/functions/len.py +0 -68
- polars/functions/lit.py +0 -210
- polars/functions/random.py +0 -22
- polars/functions/range/__init__.py +0 -19
- polars/functions/range/_utils.py +0 -15
- polars/functions/range/date_range.py +0 -303
- polars/functions/range/datetime_range.py +0 -370
- polars/functions/range/int_range.py +0 -348
- polars/functions/range/linear_space.py +0 -311
- polars/functions/range/time_range.py +0 -287
- polars/functions/repeat.py +0 -301
- polars/functions/whenthen.py +0 -353
- polars/interchange/__init__.py +0 -10
- polars/interchange/buffer.py +0 -77
- polars/interchange/column.py +0 -190
- polars/interchange/dataframe.py +0 -230
- polars/interchange/from_dataframe.py +0 -328
- polars/interchange/protocol.py +0 -303
- polars/interchange/utils.py +0 -170
- polars/io/__init__.py +0 -64
- polars/io/_utils.py +0 -317
- polars/io/avro.py +0 -49
- polars/io/clipboard.py +0 -36
- polars/io/cloud/__init__.py +0 -17
- polars/io/cloud/_utils.py +0 -80
- polars/io/cloud/credential_provider/__init__.py +0 -17
- polars/io/cloud/credential_provider/_builder.py +0 -520
- polars/io/cloud/credential_provider/_providers.py +0 -618
- polars/io/csv/__init__.py +0 -9
- polars/io/csv/_utils.py +0 -38
- polars/io/csv/batched_reader.py +0 -142
- polars/io/csv/functions.py +0 -1495
- polars/io/database/__init__.py +0 -6
- polars/io/database/_arrow_registry.py +0 -70
- polars/io/database/_cursor_proxies.py +0 -147
- polars/io/database/_executor.py +0 -578
- polars/io/database/_inference.py +0 -314
- polars/io/database/_utils.py +0 -144
- polars/io/database/functions.py +0 -516
- polars/io/delta.py +0 -499
- polars/io/iceberg/__init__.py +0 -3
- polars/io/iceberg/_utils.py +0 -697
- polars/io/iceberg/dataset.py +0 -556
- polars/io/iceberg/functions.py +0 -151
- polars/io/ipc/__init__.py +0 -8
- polars/io/ipc/functions.py +0 -514
- polars/io/json/__init__.py +0 -3
- polars/io/json/read.py +0 -101
- polars/io/ndjson.py +0 -332
- polars/io/parquet/__init__.py +0 -17
- polars/io/parquet/field_overwrites.py +0 -140
- polars/io/parquet/functions.py +0 -722
- polars/io/partition.py +0 -491
- polars/io/plugins.py +0 -187
- polars/io/pyarrow_dataset/__init__.py +0 -5
- polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
- polars/io/pyarrow_dataset/functions.py +0 -79
- polars/io/scan_options/__init__.py +0 -5
- polars/io/scan_options/_options.py +0 -59
- polars/io/scan_options/cast_options.py +0 -126
- polars/io/spreadsheet/__init__.py +0 -6
- polars/io/spreadsheet/_utils.py +0 -52
- polars/io/spreadsheet/_write_utils.py +0 -647
- polars/io/spreadsheet/functions.py +0 -1323
- polars/lazyframe/__init__.py +0 -9
- polars/lazyframe/engine_config.py +0 -61
- polars/lazyframe/frame.py +0 -8564
- polars/lazyframe/group_by.py +0 -669
- polars/lazyframe/in_process.py +0 -42
- polars/lazyframe/opt_flags.py +0 -333
- polars/meta/__init__.py +0 -14
- polars/meta/build.py +0 -33
- polars/meta/index_type.py +0 -27
- polars/meta/thread_pool.py +0 -50
- polars/meta/versions.py +0 -120
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +0 -213
- polars/ml/utilities.py +0 -30
- polars/plugins.py +0 -155
- polars/py.typed +0 -0
- polars/pyproject.toml +0 -103
- polars/schema.py +0 -265
- polars/selectors.py +0 -3117
- polars/series/__init__.py +0 -5
- polars/series/array.py +0 -776
- polars/series/binary.py +0 -254
- polars/series/categorical.py +0 -246
- polars/series/datetime.py +0 -2275
- polars/series/list.py +0 -1087
- polars/series/plotting.py +0 -191
- polars/series/series.py +0 -9197
- polars/series/string.py +0 -2367
- polars/series/struct.py +0 -154
- polars/series/utils.py +0 -191
- polars/sql/__init__.py +0 -7
- polars/sql/context.py +0 -677
- polars/sql/functions.py +0 -139
- polars/string_cache.py +0 -185
- polars/testing/__init__.py +0 -13
- polars/testing/asserts/__init__.py +0 -9
- polars/testing/asserts/frame.py +0 -231
- polars/testing/asserts/series.py +0 -219
- polars/testing/asserts/utils.py +0 -12
- polars/testing/parametric/__init__.py +0 -33
- polars/testing/parametric/profiles.py +0 -107
- polars/testing/parametric/strategies/__init__.py +0 -22
- polars/testing/parametric/strategies/_utils.py +0 -14
- polars/testing/parametric/strategies/core.py +0 -615
- polars/testing/parametric/strategies/data.py +0 -452
- polars/testing/parametric/strategies/dtype.py +0 -436
- polars/testing/parametric/strategies/legacy.py +0 -169
- polars/type_aliases.py +0 -24
- polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/WHEEL +0 -0
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,647 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from collections.abc import Sequence
|
|
4
|
-
from io import BytesIO
|
|
5
|
-
from os import PathLike
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
from typing import TYPE_CHECKING, Any, overload
|
|
8
|
-
|
|
9
|
-
from polars import functions as F
|
|
10
|
-
from polars._dependencies import json
|
|
11
|
-
from polars._utils.various import qualified_type_name
|
|
12
|
-
from polars.datatypes import (
|
|
13
|
-
Date,
|
|
14
|
-
Datetime,
|
|
15
|
-
Float64,
|
|
16
|
-
Int64,
|
|
17
|
-
Time,
|
|
18
|
-
)
|
|
19
|
-
from polars.datatypes.group import FLOAT_DTYPES, INTEGER_DTYPES
|
|
20
|
-
from polars.exceptions import DuplicateError
|
|
21
|
-
from polars.selectors import _expand_selector_dicts, _expand_selectors, numeric
|
|
22
|
-
|
|
23
|
-
if TYPE_CHECKING:
|
|
24
|
-
from collections.abc import Iterable
|
|
25
|
-
from typing import Literal
|
|
26
|
-
|
|
27
|
-
from xlsxwriter import Workbook
|
|
28
|
-
from xlsxwriter.format import Format
|
|
29
|
-
from xlsxwriter.worksheet import Worksheet
|
|
30
|
-
|
|
31
|
-
from polars import DataFrame, Schema, Series
|
|
32
|
-
from polars._typing import (
|
|
33
|
-
ColumnFormatDict,
|
|
34
|
-
ColumnTotalsDefinition,
|
|
35
|
-
ConditionalFormatDict,
|
|
36
|
-
OneOrMoreDataTypes,
|
|
37
|
-
PolarsDataType,
|
|
38
|
-
RowTotalsDefinition,
|
|
39
|
-
)
|
|
40
|
-
from polars.expr import Expr
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def _cluster(iterable: Iterable[Any], n: int = 2) -> Iterable[Any]:
|
|
44
|
-
return zip(*[iter(iterable)] * n)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
_XL_DEFAULT_FLOAT_FORMAT_ = "#,##0.000;[Red]-#,##0.000"
|
|
48
|
-
_XL_DEFAULT_INTEGER_FORMAT_ = "#,##0;[Red]-#,##0"
|
|
49
|
-
_XL_DEFAULT_DTYPE_FORMATS_: dict[PolarsDataType, str] = {
|
|
50
|
-
Datetime: "yyyy-mm-dd hh:mm:ss",
|
|
51
|
-
Date: "yyyy-mm-dd;@",
|
|
52
|
-
Time: "hh:mm:ss;@",
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
class _XLFormatCache:
|
|
57
|
-
"""Create/cache only one Format object per distinct set of format options."""
|
|
58
|
-
|
|
59
|
-
def __init__(self, wb: Workbook) -> None:
|
|
60
|
-
self._cache: dict[str, Format] = {}
|
|
61
|
-
self.wb = wb
|
|
62
|
-
|
|
63
|
-
@staticmethod
|
|
64
|
-
def _key(fmt: dict[str, Any]) -> str:
|
|
65
|
-
return json.dumps(fmt, sort_keys=True, default=str)
|
|
66
|
-
|
|
67
|
-
def get(self, fmt: dict[str, Any] | Format) -> Format:
|
|
68
|
-
if not isinstance(fmt, dict):
|
|
69
|
-
wbfmt = fmt
|
|
70
|
-
else:
|
|
71
|
-
key = self._key(fmt)
|
|
72
|
-
wbfmt = self._cache.get(key)
|
|
73
|
-
if wbfmt is None:
|
|
74
|
-
wbfmt = self.wb.add_format(fmt)
|
|
75
|
-
self._cache[key] = wbfmt
|
|
76
|
-
return wbfmt
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def _adjacent_cols(df: DataFrame, cols: Iterable[str], min_max: dict[str, Any]) -> bool:
|
|
80
|
-
"""Indicate if the given columns are all adjacent to one another."""
|
|
81
|
-
idxs = sorted(df.get_column_index(col) for col in cols)
|
|
82
|
-
if idxs != sorted(range(min(idxs), max(idxs) + 1)):
|
|
83
|
-
return False
|
|
84
|
-
else:
|
|
85
|
-
columns = df.columns
|
|
86
|
-
min_max["min"] = {"idx": idxs[0], "name": columns[idxs[0]]}
|
|
87
|
-
min_max["max"] = {"idx": idxs[-1], "name": columns[idxs[-1]]}
|
|
88
|
-
return True
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
def _all_integer_cols(cols: Iterable[str], schema: Schema) -> bool:
|
|
92
|
-
"""Indicate if the given columns are all integer-typed."""
|
|
93
|
-
return all(schema[col].is_integer() for col in cols)
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
def _unpack_multi_column_dict(
|
|
97
|
-
d: dict[str | Sequence[str], Any] | Any,
|
|
98
|
-
) -> dict[str, Any] | Any:
|
|
99
|
-
"""Unpack multi-col dictionary into equivalent single-col definitions."""
|
|
100
|
-
if not isinstance(d, dict):
|
|
101
|
-
return d
|
|
102
|
-
unpacked: dict[str, Any] = {}
|
|
103
|
-
for key, value in d.items():
|
|
104
|
-
if isinstance(key, str) or not isinstance(key, Sequence):
|
|
105
|
-
key = (key,)
|
|
106
|
-
for k in key:
|
|
107
|
-
unpacked[k] = value
|
|
108
|
-
return unpacked
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
def _xl_apply_conditional_formats(
|
|
112
|
-
df: DataFrame,
|
|
113
|
-
ws: Worksheet,
|
|
114
|
-
*,
|
|
115
|
-
conditional_formats: ConditionalFormatDict,
|
|
116
|
-
table_start: tuple[int, int],
|
|
117
|
-
include_header: bool,
|
|
118
|
-
format_cache: _XLFormatCache,
|
|
119
|
-
) -> None:
|
|
120
|
-
"""Take all conditional formatting options and apply them to the table/range."""
|
|
121
|
-
from xlsxwriter.format import Format
|
|
122
|
-
|
|
123
|
-
for cols, formats in _expand_selector_dicts(
|
|
124
|
-
df, conditional_formats, expand_keys=True, expand_values=False, tuple_keys=True
|
|
125
|
-
).items():
|
|
126
|
-
if not isinstance(cols, str) and len(cols) == 1:
|
|
127
|
-
cols = next(iter(cols))
|
|
128
|
-
if isinstance(formats, (str, dict)):
|
|
129
|
-
formats = [formats]
|
|
130
|
-
|
|
131
|
-
for fmt in formats:
|
|
132
|
-
if not isinstance(fmt, dict):
|
|
133
|
-
fmt = {"type": fmt}
|
|
134
|
-
if isinstance(cols, str):
|
|
135
|
-
col_range = _xl_column_range(
|
|
136
|
-
df, table_start, cols, include_header=include_header
|
|
137
|
-
)
|
|
138
|
-
else:
|
|
139
|
-
col_range = _xl_column_multi_range(
|
|
140
|
-
df, table_start, cols, include_header=include_header
|
|
141
|
-
)
|
|
142
|
-
if " " in col_range:
|
|
143
|
-
col = next(iter(cols))
|
|
144
|
-
fmt["multi_range"] = col_range
|
|
145
|
-
col_range = _xl_column_range(
|
|
146
|
-
df, table_start, col, include_header=include_header
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
if "format" in fmt:
|
|
150
|
-
f = fmt["format"]
|
|
151
|
-
fmt["format"] = (
|
|
152
|
-
f # already registered
|
|
153
|
-
if isinstance(f, Format)
|
|
154
|
-
else format_cache.get(
|
|
155
|
-
{"num_format": f} if isinstance(f, str) else f
|
|
156
|
-
)
|
|
157
|
-
)
|
|
158
|
-
ws.conditional_format(col_range, fmt)
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
@overload
|
|
162
|
-
def _xl_column_range(
|
|
163
|
-
df: DataFrame,
|
|
164
|
-
table_start: tuple[int, int],
|
|
165
|
-
col: str | tuple[int, int],
|
|
166
|
-
*,
|
|
167
|
-
include_header: bool,
|
|
168
|
-
as_range: Literal[True] = ...,
|
|
169
|
-
) -> str: ...
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
@overload
|
|
173
|
-
def _xl_column_range(
|
|
174
|
-
df: DataFrame,
|
|
175
|
-
table_start: tuple[int, int],
|
|
176
|
-
col: str | tuple[int, int],
|
|
177
|
-
*,
|
|
178
|
-
include_header: bool,
|
|
179
|
-
as_range: Literal[False],
|
|
180
|
-
) -> tuple[int, int, int, int]: ...
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
def _xl_column_range(
|
|
184
|
-
df: DataFrame,
|
|
185
|
-
table_start: tuple[int, int],
|
|
186
|
-
col: str | tuple[int, int],
|
|
187
|
-
*,
|
|
188
|
-
include_header: bool,
|
|
189
|
-
as_range: bool = True,
|
|
190
|
-
) -> tuple[int, int, int, int] | str:
|
|
191
|
-
"""Return the Excel sheet range of a named column, accounting for all offsets."""
|
|
192
|
-
col_start = (
|
|
193
|
-
table_start[0] + int(include_header),
|
|
194
|
-
table_start[1] + (df.get_column_index(col) if isinstance(col, str) else col[0]),
|
|
195
|
-
)
|
|
196
|
-
col_finish = (
|
|
197
|
-
col_start[0] + df.height - 1,
|
|
198
|
-
col_start[1] + (0 if isinstance(col, str) else (col[1] - col[0])),
|
|
199
|
-
)
|
|
200
|
-
if as_range:
|
|
201
|
-
return "".join(_xl_rowcols_to_range(*col_start, *col_finish))
|
|
202
|
-
else:
|
|
203
|
-
return col_start + col_finish
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
def _xl_column_multi_range(
|
|
207
|
-
df: DataFrame,
|
|
208
|
-
table_start: tuple[int, int],
|
|
209
|
-
cols: Iterable[str],
|
|
210
|
-
*,
|
|
211
|
-
include_header: bool,
|
|
212
|
-
) -> str:
|
|
213
|
-
"""Return column ranges as an xlsxwriter 'multi_range' string, or spanning range."""
|
|
214
|
-
m: dict[str, Any] = {}
|
|
215
|
-
if _adjacent_cols(df, cols, min_max=m):
|
|
216
|
-
return _xl_column_range(
|
|
217
|
-
df,
|
|
218
|
-
table_start,
|
|
219
|
-
(m["min"]["idx"], m["max"]["idx"]),
|
|
220
|
-
include_header=include_header,
|
|
221
|
-
)
|
|
222
|
-
return " ".join(
|
|
223
|
-
_xl_column_range(df, table_start, col, include_header=include_header)
|
|
224
|
-
for col in cols
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
def _xl_inject_dummy_table_columns(
|
|
229
|
-
df: DataFrame,
|
|
230
|
-
coldefs: dict[str, Any],
|
|
231
|
-
*,
|
|
232
|
-
dtype: dict[str, PolarsDataType] | PolarsDataType | None = None,
|
|
233
|
-
expr: Expr | None = None,
|
|
234
|
-
) -> DataFrame:
|
|
235
|
-
"""Insert dummy frame columns in order to create empty/named table columns."""
|
|
236
|
-
df_original_columns = set(df.columns)
|
|
237
|
-
df_select_cols = df.columns.copy()
|
|
238
|
-
cast_lookup = {}
|
|
239
|
-
|
|
240
|
-
for col, definition in coldefs.items():
|
|
241
|
-
if col in df_original_columns:
|
|
242
|
-
msg = f"cannot create a second {col!r} column"
|
|
243
|
-
raise DuplicateError(msg)
|
|
244
|
-
elif not isinstance(definition, dict):
|
|
245
|
-
df_select_cols.append(col)
|
|
246
|
-
else:
|
|
247
|
-
cast_lookup[col] = definition.get("return_dtype")
|
|
248
|
-
insert_before = definition.get("insert_before")
|
|
249
|
-
insert_after = definition.get("insert_after")
|
|
250
|
-
|
|
251
|
-
if insert_after is None and insert_before is None:
|
|
252
|
-
df_select_cols.append(col)
|
|
253
|
-
else:
|
|
254
|
-
insert_idx = (
|
|
255
|
-
df_select_cols.index(insert_after) + 1 # type: ignore[arg-type]
|
|
256
|
-
if insert_before is None
|
|
257
|
-
else df_select_cols.index(insert_before)
|
|
258
|
-
)
|
|
259
|
-
df_select_cols.insert(insert_idx, col)
|
|
260
|
-
|
|
261
|
-
expr = F.lit(None) if expr is None else expr
|
|
262
|
-
df = df.select(
|
|
263
|
-
(
|
|
264
|
-
col
|
|
265
|
-
if col in df_original_columns
|
|
266
|
-
else (
|
|
267
|
-
expr.cast(
|
|
268
|
-
cast_lookup.get( # type:ignore[arg-type]
|
|
269
|
-
col,
|
|
270
|
-
dtype.get(col, Float64) if isinstance(dtype, dict) else dtype,
|
|
271
|
-
)
|
|
272
|
-
)
|
|
273
|
-
if dtype or (cast_lookup.get(col) is not None)
|
|
274
|
-
else expr
|
|
275
|
-
).alias(col)
|
|
276
|
-
)
|
|
277
|
-
for col in df_select_cols
|
|
278
|
-
)
|
|
279
|
-
return df
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
def _xl_inject_sparklines(
|
|
283
|
-
ws: Worksheet,
|
|
284
|
-
df: DataFrame,
|
|
285
|
-
table_start: tuple[int, int],
|
|
286
|
-
col: str,
|
|
287
|
-
*,
|
|
288
|
-
include_header: bool,
|
|
289
|
-
params: Sequence[str] | dict[str, Any],
|
|
290
|
-
) -> None:
|
|
291
|
-
"""Inject sparklines into (previously-created) empty table columns."""
|
|
292
|
-
from xlsxwriter.utility import xl_rowcol_to_cell
|
|
293
|
-
|
|
294
|
-
m: dict[str, Any] = {}
|
|
295
|
-
data_cols = params.get("columns") if isinstance(params, dict) else params
|
|
296
|
-
if not data_cols:
|
|
297
|
-
msg = "supplying 'columns' param value is mandatory for sparklines"
|
|
298
|
-
raise ValueError(msg)
|
|
299
|
-
elif not _adjacent_cols(df, data_cols, min_max=m):
|
|
300
|
-
msg = "sparkline data range/cols must all be adjacent"
|
|
301
|
-
raise RuntimeError(msg)
|
|
302
|
-
|
|
303
|
-
spk_row, spk_col, _, _ = _xl_column_range(
|
|
304
|
-
df, table_start, col, include_header=include_header, as_range=False
|
|
305
|
-
)
|
|
306
|
-
data_start_col = table_start[1] + m["min"]["idx"]
|
|
307
|
-
data_end_col = table_start[1] + m["max"]["idx"]
|
|
308
|
-
|
|
309
|
-
if not isinstance(params, dict):
|
|
310
|
-
options = {}
|
|
311
|
-
else:
|
|
312
|
-
# strip polars-specific params before passing to xlsxwriter
|
|
313
|
-
options = {
|
|
314
|
-
name: val
|
|
315
|
-
for name, val in params.items()
|
|
316
|
-
if name not in ("columns", "insert_after", "insert_before")
|
|
317
|
-
}
|
|
318
|
-
if "negative_points" not in options:
|
|
319
|
-
options["negative_points"] = options.get("type") in ("column", "win_loss")
|
|
320
|
-
|
|
321
|
-
for _ in range(df.height):
|
|
322
|
-
data_start = xl_rowcol_to_cell(spk_row, data_start_col)
|
|
323
|
-
data_end = xl_rowcol_to_cell(spk_row, data_end_col)
|
|
324
|
-
options["range"] = f"{data_start}:{data_end}"
|
|
325
|
-
ws.add_sparkline(spk_row, spk_col, options)
|
|
326
|
-
spk_row += 1
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
def _xl_rowcols_to_range(*row_col_pairs: int) -> list[str]:
|
|
330
|
-
"""Return list of "A1:B2" range refs from pairs of row/col indexes."""
|
|
331
|
-
from xlsxwriter.utility import xl_rowcol_to_cell
|
|
332
|
-
|
|
333
|
-
cell_refs = (xl_rowcol_to_cell(row, col) for row, col in _cluster(row_col_pairs))
|
|
334
|
-
return [f"{cell_start}:{cell_end}" for cell_start, cell_end in _cluster(cell_refs)]
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
def _xl_setup_table_columns(
|
|
338
|
-
df: DataFrame,
|
|
339
|
-
format_cache: _XLFormatCache,
|
|
340
|
-
column_totals: ColumnTotalsDefinition | None = None,
|
|
341
|
-
column_formats: ColumnFormatDict | None = None,
|
|
342
|
-
dtype_formats: dict[OneOrMoreDataTypes, str] | None = None,
|
|
343
|
-
header_format: dict[str, Any] | None = None,
|
|
344
|
-
sparklines: dict[str, Sequence[str] | dict[str, Any]] | None = None,
|
|
345
|
-
formulas: dict[str, str | dict[str, str]] | None = None,
|
|
346
|
-
row_totals: RowTotalsDefinition | None = None,
|
|
347
|
-
float_precision: int = 3,
|
|
348
|
-
table_style: dict[str, Any] | str | None = None,
|
|
349
|
-
) -> tuple[list[dict[str, Any]], dict[str | tuple[str, ...], str], DataFrame]:
|
|
350
|
-
"""Setup and unify all column-related formatting/defaults."""
|
|
351
|
-
|
|
352
|
-
# no excel support for compound types; cast to their simple string representation
|
|
353
|
-
def _map_str(s: Series) -> Series:
|
|
354
|
-
return s.__class__(
|
|
355
|
-
s.name, [(None if v is None else str(v)) for v in s.to_list()]
|
|
356
|
-
)
|
|
357
|
-
|
|
358
|
-
cast_cols = [
|
|
359
|
-
F.col(col).map_batches(_map_str).alias(col)
|
|
360
|
-
for col, tp in df.schema.items()
|
|
361
|
-
if tp.is_nested() or tp.is_object()
|
|
362
|
-
]
|
|
363
|
-
if cast_cols:
|
|
364
|
-
df = df.with_columns(cast_cols)
|
|
365
|
-
|
|
366
|
-
# expand/normalise column formats
|
|
367
|
-
column_formats = _unpack_multi_column_dict( # type: ignore[assignment]
|
|
368
|
-
_expand_selector_dicts(
|
|
369
|
-
df, column_formats, expand_keys=True, expand_values=False, tuple_keys=True
|
|
370
|
-
)
|
|
371
|
-
)
|
|
372
|
-
|
|
373
|
-
# normalise row totals
|
|
374
|
-
if not row_totals:
|
|
375
|
-
row_totals_dtype = None
|
|
376
|
-
row_total_funcs = {}
|
|
377
|
-
else:
|
|
378
|
-
schema = df.schema
|
|
379
|
-
numeric_cols = {col for col, tp in schema.items() if tp.is_numeric()}
|
|
380
|
-
if not isinstance(row_totals, dict):
|
|
381
|
-
row_totals_dtype = (
|
|
382
|
-
Int64 if _all_integer_cols(numeric_cols, schema) else Float64
|
|
383
|
-
)
|
|
384
|
-
sum_cols = (
|
|
385
|
-
numeric_cols
|
|
386
|
-
if row_totals is True
|
|
387
|
-
else (
|
|
388
|
-
{row_totals}
|
|
389
|
-
if isinstance(row_totals, str)
|
|
390
|
-
else set(_expand_selectors(df, row_totals))
|
|
391
|
-
)
|
|
392
|
-
)
|
|
393
|
-
n_ucase = sum((c[0] if c else "").isupper() for c in df.columns)
|
|
394
|
-
total = f"{'T' if (n_ucase > df.width // 2) else 't'}otal"
|
|
395
|
-
row_total_funcs = {total: _xl_table_formula(df, sum_cols, "sum")}
|
|
396
|
-
row_totals = [total]
|
|
397
|
-
else:
|
|
398
|
-
row_totals = _expand_selector_dicts(
|
|
399
|
-
df, row_totals, expand_keys=False, expand_values=True
|
|
400
|
-
)
|
|
401
|
-
row_totals_dtype = { # type: ignore[assignment]
|
|
402
|
-
nm: (
|
|
403
|
-
Int64
|
|
404
|
-
if _all_integer_cols(numeric_cols if cols is True else cols, schema)
|
|
405
|
-
else Float64
|
|
406
|
-
)
|
|
407
|
-
for nm, cols in row_totals.items()
|
|
408
|
-
}
|
|
409
|
-
row_total_funcs = {
|
|
410
|
-
name: _xl_table_formula(
|
|
411
|
-
df, (numeric_cols if cols is True else cols), "sum"
|
|
412
|
-
)
|
|
413
|
-
for name, cols in row_totals.items()
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
# expand/normalise column totals
|
|
417
|
-
if column_totals is True:
|
|
418
|
-
column_totals = {numeric(): "sum", **dict.fromkeys(row_totals or (), "sum")}
|
|
419
|
-
elif isinstance(column_totals, str):
|
|
420
|
-
fn = column_totals.lower()
|
|
421
|
-
column_totals = {numeric(): fn, **dict.fromkeys(row_totals or (), fn)}
|
|
422
|
-
|
|
423
|
-
column_totals = _unpack_multi_column_dict( # type: ignore[assignment]
|
|
424
|
-
_expand_selector_dicts(df, column_totals, expand_keys=True, expand_values=False)
|
|
425
|
-
if isinstance(column_totals, dict)
|
|
426
|
-
else _expand_selectors(df, column_totals)
|
|
427
|
-
)
|
|
428
|
-
column_total_funcs = (
|
|
429
|
-
dict.fromkeys(column_totals, "sum")
|
|
430
|
-
if isinstance(column_totals, Sequence)
|
|
431
|
-
else (column_totals.copy() if isinstance(column_totals, dict) else {})
|
|
432
|
-
)
|
|
433
|
-
|
|
434
|
-
# normalise formulas
|
|
435
|
-
column_formulas = {
|
|
436
|
-
col: {"formula": options} if isinstance(options, str) else options
|
|
437
|
-
for col, options in (formulas or {}).items()
|
|
438
|
-
}
|
|
439
|
-
|
|
440
|
-
# normalise formats
|
|
441
|
-
column_formats = dict(column_formats or {})
|
|
442
|
-
dtype_formats = dict(dtype_formats or {})
|
|
443
|
-
|
|
444
|
-
for tp in list(dtype_formats):
|
|
445
|
-
if isinstance(tp, (tuple, frozenset)):
|
|
446
|
-
dtype_formats.update(dict.fromkeys(tp, dtype_formats.pop(tp)))
|
|
447
|
-
for fmt in dtype_formats.values():
|
|
448
|
-
if not isinstance(fmt, str):
|
|
449
|
-
msg = f"invalid dtype_format value: {fmt!r} (expected format string, got {qualified_type_name(fmt)!r})"
|
|
450
|
-
raise TypeError(msg)
|
|
451
|
-
|
|
452
|
-
# inject sparkline/row-total placeholder(s)
|
|
453
|
-
if sparklines:
|
|
454
|
-
df = _xl_inject_dummy_table_columns(df, sparklines)
|
|
455
|
-
if column_formulas:
|
|
456
|
-
df = _xl_inject_dummy_table_columns(df, column_formulas)
|
|
457
|
-
if row_totals:
|
|
458
|
-
df = _xl_inject_dummy_table_columns(df, row_total_funcs, dtype=row_totals_dtype)
|
|
459
|
-
|
|
460
|
-
# seed format cache with default fallback format
|
|
461
|
-
fmt_default = format_cache.get({"valign": "vcenter"})
|
|
462
|
-
|
|
463
|
-
if table_style is None:
|
|
464
|
-
# no table style; apply default black (+ve) & red (-ve) numeric formatting
|
|
465
|
-
int_base_fmt = _XL_DEFAULT_INTEGER_FORMAT_
|
|
466
|
-
flt_base_fmt = _XL_DEFAULT_FLOAT_FORMAT_
|
|
467
|
-
else:
|
|
468
|
-
# if we have a table style, defer the colours to that style
|
|
469
|
-
int_base_fmt = _XL_DEFAULT_INTEGER_FORMAT_.split(";", 1)[0]
|
|
470
|
-
flt_base_fmt = _XL_DEFAULT_FLOAT_FORMAT_.split(";", 1)[0]
|
|
471
|
-
|
|
472
|
-
for tp in INTEGER_DTYPES:
|
|
473
|
-
_XL_DEFAULT_DTYPE_FORMATS_[tp] = int_base_fmt
|
|
474
|
-
|
|
475
|
-
zeros = "0" * float_precision
|
|
476
|
-
fmt_float = int_base_fmt if not zeros else flt_base_fmt.replace(".000", f".{zeros}")
|
|
477
|
-
|
|
478
|
-
# assign default dtype formats
|
|
479
|
-
for tp, fmt in _XL_DEFAULT_DTYPE_FORMATS_.items():
|
|
480
|
-
dtype_formats.setdefault(tp, fmt)
|
|
481
|
-
for tp in FLOAT_DTYPES:
|
|
482
|
-
dtype_formats.setdefault(tp, fmt_float)
|
|
483
|
-
for tp, fmt in dtype_formats.items():
|
|
484
|
-
dtype_formats[tp] = fmt
|
|
485
|
-
|
|
486
|
-
# associate formats/functions with specific columns
|
|
487
|
-
for col, tp in df.schema.items():
|
|
488
|
-
base_type = tp.base_type()
|
|
489
|
-
if base_type in dtype_formats:
|
|
490
|
-
fmt = dtype_formats.get(tp, dtype_formats[base_type])
|
|
491
|
-
column_formats.setdefault(col, fmt)
|
|
492
|
-
if col not in column_formats:
|
|
493
|
-
column_formats[col] = fmt_default
|
|
494
|
-
|
|
495
|
-
# ensure externally supplied formats are made available
|
|
496
|
-
for col, fmt in column_formats.items(): # type: ignore[assignment]
|
|
497
|
-
if isinstance(fmt, str):
|
|
498
|
-
column_formats[col] = format_cache.get(
|
|
499
|
-
{"num_format": fmt, "valign": "vcenter"}
|
|
500
|
-
)
|
|
501
|
-
elif isinstance(fmt, dict):
|
|
502
|
-
if "num_format" not in fmt:
|
|
503
|
-
tp = df.schema.get(col)
|
|
504
|
-
if tp in dtype_formats:
|
|
505
|
-
fmt["num_format"] = dtype_formats[tp]
|
|
506
|
-
if "valign" not in fmt:
|
|
507
|
-
fmt["valign"] = "vcenter"
|
|
508
|
-
column_formats[col] = format_cache.get(fmt)
|
|
509
|
-
|
|
510
|
-
# optional custom header format
|
|
511
|
-
col_header_format = format_cache.get(header_format) if header_format else None
|
|
512
|
-
|
|
513
|
-
# assemble table columns
|
|
514
|
-
table_columns = [
|
|
515
|
-
{
|
|
516
|
-
k: v
|
|
517
|
-
for k, v in {
|
|
518
|
-
"header": col,
|
|
519
|
-
"format": column_formats[col],
|
|
520
|
-
"header_format": col_header_format,
|
|
521
|
-
"total_function": column_total_funcs.get(col),
|
|
522
|
-
"formula": (
|
|
523
|
-
row_total_funcs.get(col)
|
|
524
|
-
or column_formulas.get(col, {}).get("formula")
|
|
525
|
-
),
|
|
526
|
-
}.items()
|
|
527
|
-
if v is not None
|
|
528
|
-
}
|
|
529
|
-
for col in df.columns
|
|
530
|
-
]
|
|
531
|
-
return table_columns, column_formats, df # type: ignore[return-value]
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
def _xl_setup_table_options(
|
|
535
|
-
table_style: dict[str, Any] | str | None,
|
|
536
|
-
) -> tuple[dict[str, Any] | str | None, dict[str, Any]]:
|
|
537
|
-
"""Setup table options, distinguishing style name from other formatting."""
|
|
538
|
-
if isinstance(table_style, dict):
|
|
539
|
-
valid_options = (
|
|
540
|
-
"style",
|
|
541
|
-
"banded_columns",
|
|
542
|
-
"banded_rows",
|
|
543
|
-
"first_column",
|
|
544
|
-
"last_column",
|
|
545
|
-
)
|
|
546
|
-
for key in table_style:
|
|
547
|
-
if key not in valid_options:
|
|
548
|
-
msg = f"invalid table style key: {key!r}"
|
|
549
|
-
raise ValueError(msg)
|
|
550
|
-
|
|
551
|
-
table_options = table_style.copy()
|
|
552
|
-
table_style = table_options.pop("style", None)
|
|
553
|
-
else:
|
|
554
|
-
table_options = {}
|
|
555
|
-
|
|
556
|
-
return table_style, table_options
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
def _xl_worksheet_in_workbook(
|
|
560
|
-
wb: Workbook, ws: Worksheet, *, return_worksheet: bool = False
|
|
561
|
-
) -> bool | Worksheet:
|
|
562
|
-
if any(ws is sheet for sheet in wb.worksheets()):
|
|
563
|
-
return ws if return_worksheet else True
|
|
564
|
-
msg = f"the given workbook object {wb.filename!r} is not the parent of worksheet {ws.name!r}"
|
|
565
|
-
raise ValueError(msg)
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
def _xl_setup_workbook(
|
|
569
|
-
workbook: Workbook | BytesIO | Path | str | None,
|
|
570
|
-
worksheet: str | Worksheet | None = None,
|
|
571
|
-
) -> tuple[Workbook, Worksheet, bool]:
|
|
572
|
-
"""Establish the target Excel workbook and worksheet."""
|
|
573
|
-
from xlsxwriter import Workbook
|
|
574
|
-
from xlsxwriter.worksheet import Worksheet
|
|
575
|
-
|
|
576
|
-
if isinstance(workbook, Workbook):
|
|
577
|
-
wb, can_close = workbook, False
|
|
578
|
-
ws = (
|
|
579
|
-
worksheet
|
|
580
|
-
if (
|
|
581
|
-
isinstance(worksheet, Worksheet)
|
|
582
|
-
and _xl_worksheet_in_workbook(wb, worksheet)
|
|
583
|
-
)
|
|
584
|
-
else wb.get_worksheet_by_name(name=worksheet)
|
|
585
|
-
)
|
|
586
|
-
elif isinstance(worksheet, Worksheet):
|
|
587
|
-
msg = f"worksheet object requires the parent workbook object; found workbook={workbook!r}"
|
|
588
|
-
raise TypeError(msg)
|
|
589
|
-
else:
|
|
590
|
-
workbook_options = {
|
|
591
|
-
"nan_inf_to_errors": True,
|
|
592
|
-
"strings_to_formulas": False,
|
|
593
|
-
"default_date_format": _XL_DEFAULT_DTYPE_FORMATS_[Date],
|
|
594
|
-
}
|
|
595
|
-
if isinstance(workbook, BytesIO):
|
|
596
|
-
wb, ws, can_close = Workbook(workbook, workbook_options), None, True
|
|
597
|
-
else:
|
|
598
|
-
if workbook is None:
|
|
599
|
-
file = Path("dataframe.xlsx")
|
|
600
|
-
elif isinstance(workbook, str):
|
|
601
|
-
file = Path(workbook)
|
|
602
|
-
else:
|
|
603
|
-
file = workbook
|
|
604
|
-
|
|
605
|
-
if isinstance(file, PathLike):
|
|
606
|
-
file = (
|
|
607
|
-
(file if file.suffix else file.with_suffix(".xlsx"))
|
|
608
|
-
.expanduser()
|
|
609
|
-
.resolve(strict=False)
|
|
610
|
-
)
|
|
611
|
-
wb = Workbook(file, workbook_options)
|
|
612
|
-
ws, can_close = None, True
|
|
613
|
-
|
|
614
|
-
if ws is None:
|
|
615
|
-
if isinstance(worksheet, Worksheet):
|
|
616
|
-
ws = _xl_worksheet_in_workbook(wb, worksheet, return_worksheet=True)
|
|
617
|
-
else:
|
|
618
|
-
ws = wb.add_worksheet(name=worksheet)
|
|
619
|
-
return wb, ws, can_close
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
def _xl_table_formula(df: DataFrame, cols: Iterable[str], func: str) -> str:
|
|
623
|
-
"""Return a formula using structured references to columns in a named table."""
|
|
624
|
-
m: dict[str, Any] = {}
|
|
625
|
-
if isinstance(cols, str):
|
|
626
|
-
cols = [cols]
|
|
627
|
-
if _adjacent_cols(df, cols, min_max=m):
|
|
628
|
-
return f"={func.upper()}([@[{m['min']['name']}]:[{m['max']['name']}]])"
|
|
629
|
-
else:
|
|
630
|
-
colrefs = ",".join(f"[@[{c}]]" for c in cols)
|
|
631
|
-
return f"={func.upper()}({colrefs})"
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
def _xl_unique_table_name(wb: Workbook) -> str:
|
|
635
|
-
"""Establish a unique (per-workbook) table object name."""
|
|
636
|
-
table_prefix = "Frame"
|
|
637
|
-
polars_tables: set[str] = set()
|
|
638
|
-
for ws in wb.worksheets():
|
|
639
|
-
polars_tables.update(
|
|
640
|
-
tbl["name"] for tbl in ws.tables if tbl["name"].startswith(table_prefix)
|
|
641
|
-
)
|
|
642
|
-
n = len(polars_tables)
|
|
643
|
-
table_name = f"{table_prefix}{n}"
|
|
644
|
-
while table_name in polars_tables:
|
|
645
|
-
n += 1
|
|
646
|
-
table_name = f"{table_prefix}{n}"
|
|
647
|
-
return table_name
|