polars-runtime-compat 1.34.0b2__cp39-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/.gitkeep +0 -0
- _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
- polars/__init__.py +528 -0
- polars/_cpu_check.py +265 -0
- polars/_dependencies.py +355 -0
- polars/_plr.py +99 -0
- polars/_plr.pyi +2496 -0
- polars/_reexport.py +23 -0
- polars/_typing.py +478 -0
- polars/_utils/__init__.py +37 -0
- polars/_utils/async_.py +102 -0
- polars/_utils/cache.py +176 -0
- polars/_utils/cloud.py +40 -0
- polars/_utils/constants.py +29 -0
- polars/_utils/construction/__init__.py +46 -0
- polars/_utils/construction/dataframe.py +1397 -0
- polars/_utils/construction/other.py +72 -0
- polars/_utils/construction/series.py +560 -0
- polars/_utils/construction/utils.py +118 -0
- polars/_utils/convert.py +224 -0
- polars/_utils/deprecation.py +406 -0
- polars/_utils/getitem.py +457 -0
- polars/_utils/logging.py +11 -0
- polars/_utils/nest_asyncio.py +264 -0
- polars/_utils/parquet.py +15 -0
- polars/_utils/parse/__init__.py +12 -0
- polars/_utils/parse/expr.py +242 -0
- polars/_utils/polars_version.py +19 -0
- polars/_utils/pycapsule.py +53 -0
- polars/_utils/scan.py +27 -0
- polars/_utils/serde.py +63 -0
- polars/_utils/slice.py +215 -0
- polars/_utils/udfs.py +1251 -0
- polars/_utils/unstable.py +63 -0
- polars/_utils/various.py +782 -0
- polars/_utils/wrap.py +25 -0
- polars/api.py +370 -0
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +19 -0
- polars/catalog/unity/client.py +733 -0
- polars/catalog/unity/models.py +152 -0
- polars/config.py +1571 -0
- polars/convert/__init__.py +25 -0
- polars/convert/general.py +1046 -0
- polars/convert/normalize.py +261 -0
- polars/dataframe/__init__.py +5 -0
- polars/dataframe/_html.py +186 -0
- polars/dataframe/frame.py +12582 -0
- polars/dataframe/group_by.py +1067 -0
- polars/dataframe/plotting.py +257 -0
- polars/datatype_expr/__init__.py +5 -0
- polars/datatype_expr/array.py +56 -0
- polars/datatype_expr/datatype_expr.py +304 -0
- polars/datatype_expr/list.py +18 -0
- polars/datatype_expr/struct.py +69 -0
- polars/datatypes/__init__.py +122 -0
- polars/datatypes/_parse.py +195 -0
- polars/datatypes/_utils.py +48 -0
- polars/datatypes/classes.py +1213 -0
- polars/datatypes/constants.py +11 -0
- polars/datatypes/constructor.py +172 -0
- polars/datatypes/convert.py +366 -0
- polars/datatypes/group.py +130 -0
- polars/exceptions.py +230 -0
- polars/expr/__init__.py +7 -0
- polars/expr/array.py +964 -0
- polars/expr/binary.py +346 -0
- polars/expr/categorical.py +306 -0
- polars/expr/datetime.py +2620 -0
- polars/expr/expr.py +11272 -0
- polars/expr/list.py +1408 -0
- polars/expr/meta.py +444 -0
- polars/expr/name.py +321 -0
- polars/expr/string.py +3045 -0
- polars/expr/struct.py +357 -0
- polars/expr/whenthen.py +185 -0
- polars/functions/__init__.py +193 -0
- polars/functions/aggregation/__init__.py +33 -0
- polars/functions/aggregation/horizontal.py +298 -0
- polars/functions/aggregation/vertical.py +341 -0
- polars/functions/as_datatype.py +848 -0
- polars/functions/business.py +138 -0
- polars/functions/col.py +384 -0
- polars/functions/datatype.py +121 -0
- polars/functions/eager.py +524 -0
- polars/functions/escape_regex.py +29 -0
- polars/functions/lazy.py +2751 -0
- polars/functions/len.py +68 -0
- polars/functions/lit.py +210 -0
- polars/functions/random.py +22 -0
- polars/functions/range/__init__.py +19 -0
- polars/functions/range/_utils.py +15 -0
- polars/functions/range/date_range.py +303 -0
- polars/functions/range/datetime_range.py +370 -0
- polars/functions/range/int_range.py +348 -0
- polars/functions/range/linear_space.py +311 -0
- polars/functions/range/time_range.py +287 -0
- polars/functions/repeat.py +301 -0
- polars/functions/whenthen.py +353 -0
- polars/interchange/__init__.py +10 -0
- polars/interchange/buffer.py +77 -0
- polars/interchange/column.py +190 -0
- polars/interchange/dataframe.py +230 -0
- polars/interchange/from_dataframe.py +328 -0
- polars/interchange/protocol.py +303 -0
- polars/interchange/utils.py +170 -0
- polars/io/__init__.py +64 -0
- polars/io/_utils.py +317 -0
- polars/io/avro.py +49 -0
- polars/io/clipboard.py +36 -0
- polars/io/cloud/__init__.py +17 -0
- polars/io/cloud/_utils.py +80 -0
- polars/io/cloud/credential_provider/__init__.py +17 -0
- polars/io/cloud/credential_provider/_builder.py +520 -0
- polars/io/cloud/credential_provider/_providers.py +618 -0
- polars/io/csv/__init__.py +9 -0
- polars/io/csv/_utils.py +38 -0
- polars/io/csv/batched_reader.py +142 -0
- polars/io/csv/functions.py +1495 -0
- polars/io/database/__init__.py +6 -0
- polars/io/database/_arrow_registry.py +70 -0
- polars/io/database/_cursor_proxies.py +147 -0
- polars/io/database/_executor.py +578 -0
- polars/io/database/_inference.py +314 -0
- polars/io/database/_utils.py +144 -0
- polars/io/database/functions.py +516 -0
- polars/io/delta.py +499 -0
- polars/io/iceberg/__init__.py +3 -0
- polars/io/iceberg/_utils.py +697 -0
- polars/io/iceberg/dataset.py +556 -0
- polars/io/iceberg/functions.py +151 -0
- polars/io/ipc/__init__.py +8 -0
- polars/io/ipc/functions.py +514 -0
- polars/io/json/__init__.py +3 -0
- polars/io/json/read.py +101 -0
- polars/io/ndjson.py +332 -0
- polars/io/parquet/__init__.py +17 -0
- polars/io/parquet/field_overwrites.py +140 -0
- polars/io/parquet/functions.py +722 -0
- polars/io/partition.py +491 -0
- polars/io/plugins.py +187 -0
- polars/io/pyarrow_dataset/__init__.py +5 -0
- polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
- polars/io/pyarrow_dataset/functions.py +79 -0
- polars/io/scan_options/__init__.py +5 -0
- polars/io/scan_options/_options.py +59 -0
- polars/io/scan_options/cast_options.py +126 -0
- polars/io/spreadsheet/__init__.py +6 -0
- polars/io/spreadsheet/_utils.py +52 -0
- polars/io/spreadsheet/_write_utils.py +647 -0
- polars/io/spreadsheet/functions.py +1323 -0
- polars/lazyframe/__init__.py +9 -0
- polars/lazyframe/engine_config.py +61 -0
- polars/lazyframe/frame.py +8564 -0
- polars/lazyframe/group_by.py +669 -0
- polars/lazyframe/in_process.py +42 -0
- polars/lazyframe/opt_flags.py +333 -0
- polars/meta/__init__.py +14 -0
- polars/meta/build.py +33 -0
- polars/meta/index_type.py +27 -0
- polars/meta/thread_pool.py +50 -0
- polars/meta/versions.py +120 -0
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +213 -0
- polars/ml/utilities.py +30 -0
- polars/plugins.py +155 -0
- polars/py.typed +0 -0
- polars/pyproject.toml +96 -0
- polars/schema.py +265 -0
- polars/selectors.py +3117 -0
- polars/series/__init__.py +5 -0
- polars/series/array.py +776 -0
- polars/series/binary.py +254 -0
- polars/series/categorical.py +246 -0
- polars/series/datetime.py +2275 -0
- polars/series/list.py +1087 -0
- polars/series/plotting.py +191 -0
- polars/series/series.py +9197 -0
- polars/series/string.py +2367 -0
- polars/series/struct.py +154 -0
- polars/series/utils.py +191 -0
- polars/sql/__init__.py +7 -0
- polars/sql/context.py +677 -0
- polars/sql/functions.py +139 -0
- polars/string_cache.py +185 -0
- polars/testing/__init__.py +13 -0
- polars/testing/asserts/__init__.py +9 -0
- polars/testing/asserts/frame.py +231 -0
- polars/testing/asserts/series.py +219 -0
- polars/testing/asserts/utils.py +12 -0
- polars/testing/parametric/__init__.py +33 -0
- polars/testing/parametric/profiles.py +107 -0
- polars/testing/parametric/strategies/__init__.py +22 -0
- polars/testing/parametric/strategies/_utils.py +14 -0
- polars/testing/parametric/strategies/core.py +615 -0
- polars/testing/parametric/strategies/data.py +452 -0
- polars/testing/parametric/strategies/dtype.py +436 -0
- polars/testing/parametric/strategies/legacy.py +169 -0
- polars/type_aliases.py +24 -0
- polars_runtime_compat-1.34.0b2.dist-info/METADATA +190 -0
- polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
- polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
- polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +20 -0
polars/_utils/getitem.py
ADDED
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Sequence
|
|
4
|
+
from typing import TYPE_CHECKING, Any, NoReturn, overload
|
|
5
|
+
|
|
6
|
+
import polars._reexport as pl
|
|
7
|
+
import polars.functions as F
|
|
8
|
+
from polars._dependencies import _check_for_numpy
|
|
9
|
+
from polars._dependencies import numpy as np
|
|
10
|
+
from polars._utils.constants import U32_MAX
|
|
11
|
+
from polars._utils.slice import PolarsSlice
|
|
12
|
+
from polars._utils.various import qualified_type_name, range_to_slice
|
|
13
|
+
from polars.datatypes.classes import (
|
|
14
|
+
Boolean,
|
|
15
|
+
Int8,
|
|
16
|
+
Int16,
|
|
17
|
+
Int32,
|
|
18
|
+
Int64,
|
|
19
|
+
String,
|
|
20
|
+
UInt32,
|
|
21
|
+
UInt64,
|
|
22
|
+
)
|
|
23
|
+
from polars.meta.index_type import get_index_type
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from collections.abc import Iterable
|
|
27
|
+
|
|
28
|
+
from polars import DataFrame, Series
|
|
29
|
+
from polars._typing import (
|
|
30
|
+
MultiColSelector,
|
|
31
|
+
MultiIndexSelector,
|
|
32
|
+
SingleColSelector,
|
|
33
|
+
SingleIndexSelector,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
"get_df_item_by_key",
|
|
38
|
+
"get_series_item_by_key",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@overload
|
|
43
|
+
def get_series_item_by_key(s: Series, key: SingleIndexSelector) -> Any: ...
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@overload
|
|
47
|
+
def get_series_item_by_key(s: Series, key: MultiIndexSelector) -> Series: ...
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_series_item_by_key(
|
|
51
|
+
s: Series, key: SingleIndexSelector | MultiIndexSelector
|
|
52
|
+
) -> Any | Series:
|
|
53
|
+
"""Select one or more elements from the Series."""
|
|
54
|
+
if isinstance(key, int):
|
|
55
|
+
return s._s.get_index_signed(key)
|
|
56
|
+
|
|
57
|
+
elif isinstance(key, slice):
|
|
58
|
+
return _select_elements_by_slice(s, key)
|
|
59
|
+
|
|
60
|
+
elif isinstance(key, range):
|
|
61
|
+
key = range_to_slice(key)
|
|
62
|
+
return _select_elements_by_slice(s, key)
|
|
63
|
+
|
|
64
|
+
elif isinstance(key, Sequence):
|
|
65
|
+
if not key:
|
|
66
|
+
return s.clear()
|
|
67
|
+
|
|
68
|
+
first = key[0]
|
|
69
|
+
if isinstance(first, bool):
|
|
70
|
+
_raise_on_boolean_mask()
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
indices = pl.Series("", key, dtype=Int64)
|
|
74
|
+
except TypeError:
|
|
75
|
+
msg = f"cannot select elements using Sequence with elements of type {qualified_type_name(first)!r}"
|
|
76
|
+
raise TypeError(msg) from None
|
|
77
|
+
|
|
78
|
+
indices = _convert_series_to_indices(indices, s.len())
|
|
79
|
+
return _select_elements_by_index(s, indices)
|
|
80
|
+
|
|
81
|
+
elif isinstance(key, pl.Series):
|
|
82
|
+
indices = _convert_series_to_indices(key, s.len())
|
|
83
|
+
return _select_elements_by_index(s, indices)
|
|
84
|
+
|
|
85
|
+
elif _check_for_numpy(key) and isinstance(key, np.ndarray):
|
|
86
|
+
indices = _convert_np_ndarray_to_indices(key, s.len())
|
|
87
|
+
return _select_elements_by_index(s, indices)
|
|
88
|
+
|
|
89
|
+
msg = f"cannot select elements using key of type {qualified_type_name(key)!r}: {key!r}"
|
|
90
|
+
raise TypeError(msg)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _select_elements_by_slice(s: Series, key: slice) -> Series:
|
|
94
|
+
return PolarsSlice(s).apply(key) # type: ignore[return-value]
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _select_elements_by_index(s: Series, key: Series) -> Series:
|
|
98
|
+
return s._from_pyseries(s._s.gather_with_series(key._s))
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# `str` overlaps with `Sequence[str]`
|
|
102
|
+
# We can ignore this but we must keep this overload ordering
|
|
103
|
+
@overload
|
|
104
|
+
def get_df_item_by_key(
|
|
105
|
+
df: DataFrame, key: tuple[SingleIndexSelector, SingleColSelector]
|
|
106
|
+
) -> Any: ...
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@overload
|
|
110
|
+
def get_df_item_by_key( # type: ignore[overload-overlap]
|
|
111
|
+
df: DataFrame, key: str | tuple[MultiIndexSelector, SingleColSelector]
|
|
112
|
+
) -> Series: ...
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
@overload
|
|
116
|
+
def get_df_item_by_key(
|
|
117
|
+
df: DataFrame,
|
|
118
|
+
key: (
|
|
119
|
+
SingleIndexSelector
|
|
120
|
+
| MultiIndexSelector
|
|
121
|
+
| MultiColSelector
|
|
122
|
+
| tuple[SingleIndexSelector, MultiColSelector]
|
|
123
|
+
| tuple[MultiIndexSelector, MultiColSelector]
|
|
124
|
+
),
|
|
125
|
+
) -> DataFrame: ...
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def get_df_item_by_key(
|
|
129
|
+
df: DataFrame,
|
|
130
|
+
key: (
|
|
131
|
+
SingleIndexSelector
|
|
132
|
+
| SingleColSelector
|
|
133
|
+
| MultiColSelector
|
|
134
|
+
| MultiIndexSelector
|
|
135
|
+
| tuple[SingleIndexSelector, SingleColSelector]
|
|
136
|
+
| tuple[SingleIndexSelector, MultiColSelector]
|
|
137
|
+
| tuple[MultiIndexSelector, SingleColSelector]
|
|
138
|
+
| tuple[MultiIndexSelector, MultiColSelector]
|
|
139
|
+
),
|
|
140
|
+
) -> DataFrame | Series | Any:
|
|
141
|
+
"""Get part of the DataFrame as a new DataFrame, Series, or scalar."""
|
|
142
|
+
# Two inputs, e.g. df[1, 2:5]
|
|
143
|
+
if isinstance(key, tuple) and len(key) == 2:
|
|
144
|
+
row_key, col_key = key
|
|
145
|
+
|
|
146
|
+
# Support df[True, False] and df["a", "b"] as these are not ambiguous
|
|
147
|
+
if isinstance(row_key, (bool, str)):
|
|
148
|
+
return _select_columns(df, key) # type: ignore[arg-type]
|
|
149
|
+
|
|
150
|
+
selection = _select_columns(df, col_key)
|
|
151
|
+
|
|
152
|
+
if selection.is_empty():
|
|
153
|
+
return selection
|
|
154
|
+
elif isinstance(selection, pl.Series):
|
|
155
|
+
return get_series_item_by_key(selection, row_key)
|
|
156
|
+
else:
|
|
157
|
+
return _select_rows(selection, row_key)
|
|
158
|
+
|
|
159
|
+
# Single string input, e.g. df["a"]
|
|
160
|
+
if isinstance(key, str):
|
|
161
|
+
# This case is required because empty strings are otherwise treated
|
|
162
|
+
# as an empty Sequence in `_select_rows`
|
|
163
|
+
return df.get_column(key)
|
|
164
|
+
|
|
165
|
+
# Single input - df[1] - or multiple inputs - df["a", "b", "c"]
|
|
166
|
+
try:
|
|
167
|
+
return _select_rows(df, key) # type: ignore[arg-type]
|
|
168
|
+
except TypeError:
|
|
169
|
+
return _select_columns(df, key)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# `str` overlaps with `Sequence[str]`
|
|
173
|
+
# We can ignore this but we must keep this overload ordering
|
|
174
|
+
@overload
|
|
175
|
+
def _select_columns(df: DataFrame, key: SingleColSelector) -> Series: ... # type: ignore[overload-overlap]
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
@overload
|
|
179
|
+
def _select_columns(df: DataFrame, key: MultiColSelector) -> DataFrame: ...
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _select_columns(
|
|
183
|
+
df: DataFrame, key: SingleColSelector | MultiColSelector
|
|
184
|
+
) -> DataFrame | Series:
|
|
185
|
+
"""Select one or more columns from the DataFrame."""
|
|
186
|
+
if isinstance(key, int):
|
|
187
|
+
return df.to_series(key)
|
|
188
|
+
|
|
189
|
+
elif isinstance(key, str):
|
|
190
|
+
return df.get_column(key)
|
|
191
|
+
|
|
192
|
+
elif isinstance(key, slice):
|
|
193
|
+
start, stop, step = key.start, key.stop, key.step
|
|
194
|
+
# Fast path for common case: df[x, :]
|
|
195
|
+
if start is None and stop is None and step is None:
|
|
196
|
+
return df
|
|
197
|
+
if isinstance(start, str):
|
|
198
|
+
start = df.get_column_index(start)
|
|
199
|
+
if isinstance(stop, str):
|
|
200
|
+
stop = df.get_column_index(stop) + 1
|
|
201
|
+
int_slice = slice(start, stop, step)
|
|
202
|
+
rng = range(df.width)[int_slice]
|
|
203
|
+
return _select_columns_by_index(df, rng)
|
|
204
|
+
|
|
205
|
+
elif isinstance(key, range):
|
|
206
|
+
return _select_columns_by_index(df, key)
|
|
207
|
+
|
|
208
|
+
elif isinstance(key, Sequence):
|
|
209
|
+
if not key:
|
|
210
|
+
return df.__class__()
|
|
211
|
+
first = key[0]
|
|
212
|
+
if isinstance(first, bool):
|
|
213
|
+
return _select_columns_by_mask(df, key) # type: ignore[arg-type]
|
|
214
|
+
elif isinstance(first, int):
|
|
215
|
+
return _select_columns_by_index(df, key) # type: ignore[arg-type]
|
|
216
|
+
elif isinstance(first, str):
|
|
217
|
+
return _select_columns_by_name(df, key) # type: ignore[arg-type]
|
|
218
|
+
else:
|
|
219
|
+
msg = f"cannot select columns using Sequence with elements of type {qualified_type_name(first)!r}"
|
|
220
|
+
raise TypeError(msg)
|
|
221
|
+
|
|
222
|
+
elif isinstance(key, pl.Series):
|
|
223
|
+
if key.is_empty():
|
|
224
|
+
return df.__class__()
|
|
225
|
+
dtype = key.dtype
|
|
226
|
+
if dtype == String:
|
|
227
|
+
return _select_columns_by_name(df, key)
|
|
228
|
+
elif dtype.is_integer():
|
|
229
|
+
return _select_columns_by_index(df, key)
|
|
230
|
+
elif dtype == Boolean:
|
|
231
|
+
return _select_columns_by_mask(df, key)
|
|
232
|
+
else:
|
|
233
|
+
msg = f"cannot select columns using Series of type {dtype}"
|
|
234
|
+
raise TypeError(msg)
|
|
235
|
+
|
|
236
|
+
elif _check_for_numpy(key) and isinstance(key, np.ndarray):
|
|
237
|
+
if key.ndim == 0:
|
|
238
|
+
key = np.atleast_1d(key)
|
|
239
|
+
elif key.ndim != 1:
|
|
240
|
+
msg = "multi-dimensional NumPy arrays not supported as index"
|
|
241
|
+
raise TypeError(msg)
|
|
242
|
+
|
|
243
|
+
if len(key) == 0:
|
|
244
|
+
return df.__class__()
|
|
245
|
+
|
|
246
|
+
dtype_kind = key.dtype.kind
|
|
247
|
+
if dtype_kind in ("i", "u"):
|
|
248
|
+
return _select_columns_by_index(df, key)
|
|
249
|
+
elif dtype_kind == "b":
|
|
250
|
+
return _select_columns_by_mask(df, key)
|
|
251
|
+
elif isinstance(key[0], str):
|
|
252
|
+
return _select_columns_by_name(df, key)
|
|
253
|
+
else:
|
|
254
|
+
msg = f"cannot select columns using NumPy array of type {key.dtype}"
|
|
255
|
+
raise TypeError(msg)
|
|
256
|
+
|
|
257
|
+
msg = (
|
|
258
|
+
f"cannot select columns using key of type {qualified_type_name(key)!r}: {key!r}"
|
|
259
|
+
)
|
|
260
|
+
raise TypeError(msg)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _select_columns_by_index(df: DataFrame, key: Iterable[int]) -> DataFrame:
|
|
264
|
+
series = [df.to_series(i) for i in key]
|
|
265
|
+
return df.__class__(series)
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def _select_columns_by_name(df: DataFrame, key: Iterable[str]) -> DataFrame:
|
|
269
|
+
return df._from_pydf(df._df.select(list(key)))
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _select_columns_by_mask(
|
|
273
|
+
df: DataFrame, key: Sequence[bool] | Series | np.ndarray[Any, Any]
|
|
274
|
+
) -> DataFrame:
|
|
275
|
+
if len(key) != df.width:
|
|
276
|
+
msg = f"expected {df.width} values when selecting columns by boolean mask, got {len(key)}"
|
|
277
|
+
raise ValueError(msg)
|
|
278
|
+
|
|
279
|
+
indices = (i for i, val in enumerate(key) if val)
|
|
280
|
+
return _select_columns_by_index(df, indices)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
@overload
|
|
284
|
+
def _select_rows(df: DataFrame, key: SingleIndexSelector) -> Series: ...
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
@overload
|
|
288
|
+
def _select_rows(df: DataFrame, key: MultiIndexSelector) -> DataFrame: ...
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _select_rows(
|
|
292
|
+
df: DataFrame, key: SingleIndexSelector | MultiIndexSelector
|
|
293
|
+
) -> DataFrame | Series:
|
|
294
|
+
"""Select one or more rows from the DataFrame."""
|
|
295
|
+
if isinstance(key, int):
|
|
296
|
+
num_rows = df.height
|
|
297
|
+
if (key >= num_rows) or (key < -num_rows):
|
|
298
|
+
msg = f"index {key} is out of bounds for DataFrame of height {num_rows}"
|
|
299
|
+
raise IndexError(msg)
|
|
300
|
+
return df.slice(key, 1)
|
|
301
|
+
|
|
302
|
+
if isinstance(key, slice):
|
|
303
|
+
return _select_rows_by_slice(df, key)
|
|
304
|
+
|
|
305
|
+
elif isinstance(key, range):
|
|
306
|
+
key = range_to_slice(key)
|
|
307
|
+
return _select_rows_by_slice(df, key)
|
|
308
|
+
|
|
309
|
+
elif isinstance(key, Sequence):
|
|
310
|
+
if not key:
|
|
311
|
+
return df.clear()
|
|
312
|
+
if isinstance(key[0], bool):
|
|
313
|
+
_raise_on_boolean_mask()
|
|
314
|
+
s = pl.Series("", key, dtype=Int64)
|
|
315
|
+
indices = _convert_series_to_indices(s, df.height)
|
|
316
|
+
return _select_rows_by_index(df, indices)
|
|
317
|
+
|
|
318
|
+
elif isinstance(key, pl.Series):
|
|
319
|
+
indices = _convert_series_to_indices(key, df.height)
|
|
320
|
+
return _select_rows_by_index(df, indices)
|
|
321
|
+
|
|
322
|
+
elif _check_for_numpy(key) and isinstance(key, np.ndarray):
|
|
323
|
+
indices = _convert_np_ndarray_to_indices(key, df.height)
|
|
324
|
+
return _select_rows_by_index(df, indices)
|
|
325
|
+
|
|
326
|
+
else:
|
|
327
|
+
msg = f"cannot select rows using key of type {qualified_type_name(key)!r}: {key!r}"
|
|
328
|
+
raise TypeError(msg)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _select_rows_by_slice(df: DataFrame, key: slice) -> DataFrame:
|
|
332
|
+
return PolarsSlice(df).apply(key) # type: ignore[return-value]
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def _select_rows_by_index(df: DataFrame, key: Series) -> DataFrame:
|
|
336
|
+
return df._from_pydf(df._df.gather_with_series(key._s))
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
# UTILS
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def _convert_series_to_indices(s: Series, size: int) -> Series:
|
|
343
|
+
"""Convert a Series to indices, taking into account negative values."""
|
|
344
|
+
# Unsigned or signed Series (ordered from fastest to slowest).
|
|
345
|
+
# - pl.UInt32 (polars) or pl.UInt64 (polars_u64_idx) Series indexes.
|
|
346
|
+
# - Other unsigned Series indexes are converted to pl.UInt32 (polars)
|
|
347
|
+
# or pl.UInt64 (polars_u64_idx).
|
|
348
|
+
# - Signed Series indexes are converted pl.UInt32 (polars) or
|
|
349
|
+
# pl.UInt64 (polars_u64_idx) after negative indexes are converted
|
|
350
|
+
# to absolute indexes.
|
|
351
|
+
|
|
352
|
+
# pl.UInt32 (polars) or pl.UInt64 (polars_u64_idx).
|
|
353
|
+
idx_type = get_index_type()
|
|
354
|
+
|
|
355
|
+
if s.dtype == idx_type:
|
|
356
|
+
return s
|
|
357
|
+
|
|
358
|
+
if not s.dtype.is_integer():
|
|
359
|
+
if s.dtype == Boolean:
|
|
360
|
+
_raise_on_boolean_mask()
|
|
361
|
+
else:
|
|
362
|
+
msg = f"cannot treat Series of type {s.dtype} as indices"
|
|
363
|
+
raise TypeError(msg)
|
|
364
|
+
|
|
365
|
+
if s.len() == 0:
|
|
366
|
+
return pl.Series(s.name, [], dtype=idx_type)
|
|
367
|
+
|
|
368
|
+
if idx_type == UInt32:
|
|
369
|
+
if s.dtype in {Int64, UInt64} and s.max() >= U32_MAX: # type: ignore[operator]
|
|
370
|
+
msg = "index positions should be smaller than 2^32"
|
|
371
|
+
raise ValueError(msg)
|
|
372
|
+
if s.dtype == Int64 and s.min() < -U32_MAX: # type: ignore[operator]
|
|
373
|
+
msg = "index positions should be greater than or equal to -2^32"
|
|
374
|
+
raise ValueError(msg)
|
|
375
|
+
|
|
376
|
+
if s.dtype.is_signed_integer():
|
|
377
|
+
if s.min() < 0: # type: ignore[operator]
|
|
378
|
+
if idx_type == UInt32:
|
|
379
|
+
idxs = s.cast(Int32) if s.dtype in {Int8, Int16} else s
|
|
380
|
+
else:
|
|
381
|
+
idxs = s.cast(Int64) if s.dtype in {Int8, Int16, Int32} else s
|
|
382
|
+
|
|
383
|
+
# Update negative indexes to absolute indexes.
|
|
384
|
+
return (
|
|
385
|
+
idxs.to_frame()
|
|
386
|
+
.select(
|
|
387
|
+
F.when(F.col(idxs.name) < 0)
|
|
388
|
+
.then(size + F.col(idxs.name))
|
|
389
|
+
.otherwise(F.col(idxs.name))
|
|
390
|
+
.cast(idx_type)
|
|
391
|
+
)
|
|
392
|
+
.to_series(0)
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
return s.cast(idx_type)
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def _convert_np_ndarray_to_indices(arr: np.ndarray[Any, Any], size: int) -> Series:
|
|
399
|
+
"""Convert a NumPy ndarray to indices, taking into account negative values."""
|
|
400
|
+
# Unsigned or signed Numpy array (ordered from fastest to slowest).
|
|
401
|
+
# - np.uint32 (polars) or np.uint64 (polars_u64_idx) numpy array
|
|
402
|
+
# indexes.
|
|
403
|
+
# - Other unsigned numpy array indexes are converted to pl.UInt32
|
|
404
|
+
# (polars) or pl.UInt64 (polars_u64_idx).
|
|
405
|
+
# - Signed numpy array indexes are converted pl.UInt32 (polars) or
|
|
406
|
+
# pl.UInt64 (polars_u64_idx) after negative indexes are converted
|
|
407
|
+
# to absolute indexes.
|
|
408
|
+
if arr.ndim == 0:
|
|
409
|
+
arr = np.atleast_1d(arr)
|
|
410
|
+
if arr.ndim != 1:
|
|
411
|
+
msg = "only 1D NumPy arrays can be treated as indices"
|
|
412
|
+
raise TypeError(msg)
|
|
413
|
+
|
|
414
|
+
idx_type = get_index_type()
|
|
415
|
+
|
|
416
|
+
if len(arr) == 0:
|
|
417
|
+
return pl.Series("", [], dtype=idx_type)
|
|
418
|
+
|
|
419
|
+
# Numpy array with signed or unsigned integers.
|
|
420
|
+
if arr.dtype.kind not in ("i", "u"):
|
|
421
|
+
if arr.dtype.kind == "b":
|
|
422
|
+
_raise_on_boolean_mask()
|
|
423
|
+
else:
|
|
424
|
+
msg = f"cannot treat NumPy array of type {arr.dtype} as indices"
|
|
425
|
+
raise TypeError(msg)
|
|
426
|
+
|
|
427
|
+
if idx_type == UInt32:
|
|
428
|
+
if arr.dtype in {np.int64, np.uint64} and arr.max() >= U32_MAX:
|
|
429
|
+
msg = "index positions should be smaller than 2^32"
|
|
430
|
+
raise ValueError(msg)
|
|
431
|
+
if arr.dtype == np.int64 and arr.min() < -U32_MAX:
|
|
432
|
+
msg = "index positions should be greater than or equal to -2^32"
|
|
433
|
+
raise ValueError(msg)
|
|
434
|
+
|
|
435
|
+
if arr.dtype.kind == "i" and arr.min() < 0:
|
|
436
|
+
if idx_type == UInt32:
|
|
437
|
+
if arr.dtype in (np.int8, np.int16):
|
|
438
|
+
arr = arr.astype(np.int32)
|
|
439
|
+
else:
|
|
440
|
+
if arr.dtype in (np.int8, np.int16, np.int32):
|
|
441
|
+
arr = arr.astype(np.int64)
|
|
442
|
+
|
|
443
|
+
# Update negative indexes to absolute indexes.
|
|
444
|
+
arr = np.where(arr < 0, size + arr, arr)
|
|
445
|
+
|
|
446
|
+
# numpy conversion is much faster
|
|
447
|
+
arr = arr.astype(np.uint32) if idx_type == UInt32 else arr.astype(np.uint64)
|
|
448
|
+
|
|
449
|
+
return pl.Series("", arr, dtype=idx_type)
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def _raise_on_boolean_mask() -> NoReturn:
|
|
453
|
+
msg = (
|
|
454
|
+
"selecting rows by passing a boolean mask to `__getitem__` is not supported"
|
|
455
|
+
"\n\nHint: Use the `filter` method instead."
|
|
456
|
+
)
|
|
457
|
+
raise TypeError(msg)
|