polars-runtime-compat 1.34.0b2__cp39-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (203) hide show
  1. _polars_runtime_compat/.gitkeep +0 -0
  2. _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
  3. polars/__init__.py +528 -0
  4. polars/_cpu_check.py +265 -0
  5. polars/_dependencies.py +355 -0
  6. polars/_plr.py +99 -0
  7. polars/_plr.pyi +2496 -0
  8. polars/_reexport.py +23 -0
  9. polars/_typing.py +478 -0
  10. polars/_utils/__init__.py +37 -0
  11. polars/_utils/async_.py +102 -0
  12. polars/_utils/cache.py +176 -0
  13. polars/_utils/cloud.py +40 -0
  14. polars/_utils/constants.py +29 -0
  15. polars/_utils/construction/__init__.py +46 -0
  16. polars/_utils/construction/dataframe.py +1397 -0
  17. polars/_utils/construction/other.py +72 -0
  18. polars/_utils/construction/series.py +560 -0
  19. polars/_utils/construction/utils.py +118 -0
  20. polars/_utils/convert.py +224 -0
  21. polars/_utils/deprecation.py +406 -0
  22. polars/_utils/getitem.py +457 -0
  23. polars/_utils/logging.py +11 -0
  24. polars/_utils/nest_asyncio.py +264 -0
  25. polars/_utils/parquet.py +15 -0
  26. polars/_utils/parse/__init__.py +12 -0
  27. polars/_utils/parse/expr.py +242 -0
  28. polars/_utils/polars_version.py +19 -0
  29. polars/_utils/pycapsule.py +53 -0
  30. polars/_utils/scan.py +27 -0
  31. polars/_utils/serde.py +63 -0
  32. polars/_utils/slice.py +215 -0
  33. polars/_utils/udfs.py +1251 -0
  34. polars/_utils/unstable.py +63 -0
  35. polars/_utils/various.py +782 -0
  36. polars/_utils/wrap.py +25 -0
  37. polars/api.py +370 -0
  38. polars/catalog/__init__.py +0 -0
  39. polars/catalog/unity/__init__.py +19 -0
  40. polars/catalog/unity/client.py +733 -0
  41. polars/catalog/unity/models.py +152 -0
  42. polars/config.py +1571 -0
  43. polars/convert/__init__.py +25 -0
  44. polars/convert/general.py +1046 -0
  45. polars/convert/normalize.py +261 -0
  46. polars/dataframe/__init__.py +5 -0
  47. polars/dataframe/_html.py +186 -0
  48. polars/dataframe/frame.py +12582 -0
  49. polars/dataframe/group_by.py +1067 -0
  50. polars/dataframe/plotting.py +257 -0
  51. polars/datatype_expr/__init__.py +5 -0
  52. polars/datatype_expr/array.py +56 -0
  53. polars/datatype_expr/datatype_expr.py +304 -0
  54. polars/datatype_expr/list.py +18 -0
  55. polars/datatype_expr/struct.py +69 -0
  56. polars/datatypes/__init__.py +122 -0
  57. polars/datatypes/_parse.py +195 -0
  58. polars/datatypes/_utils.py +48 -0
  59. polars/datatypes/classes.py +1213 -0
  60. polars/datatypes/constants.py +11 -0
  61. polars/datatypes/constructor.py +172 -0
  62. polars/datatypes/convert.py +366 -0
  63. polars/datatypes/group.py +130 -0
  64. polars/exceptions.py +230 -0
  65. polars/expr/__init__.py +7 -0
  66. polars/expr/array.py +964 -0
  67. polars/expr/binary.py +346 -0
  68. polars/expr/categorical.py +306 -0
  69. polars/expr/datetime.py +2620 -0
  70. polars/expr/expr.py +11272 -0
  71. polars/expr/list.py +1408 -0
  72. polars/expr/meta.py +444 -0
  73. polars/expr/name.py +321 -0
  74. polars/expr/string.py +3045 -0
  75. polars/expr/struct.py +357 -0
  76. polars/expr/whenthen.py +185 -0
  77. polars/functions/__init__.py +193 -0
  78. polars/functions/aggregation/__init__.py +33 -0
  79. polars/functions/aggregation/horizontal.py +298 -0
  80. polars/functions/aggregation/vertical.py +341 -0
  81. polars/functions/as_datatype.py +848 -0
  82. polars/functions/business.py +138 -0
  83. polars/functions/col.py +384 -0
  84. polars/functions/datatype.py +121 -0
  85. polars/functions/eager.py +524 -0
  86. polars/functions/escape_regex.py +29 -0
  87. polars/functions/lazy.py +2751 -0
  88. polars/functions/len.py +68 -0
  89. polars/functions/lit.py +210 -0
  90. polars/functions/random.py +22 -0
  91. polars/functions/range/__init__.py +19 -0
  92. polars/functions/range/_utils.py +15 -0
  93. polars/functions/range/date_range.py +303 -0
  94. polars/functions/range/datetime_range.py +370 -0
  95. polars/functions/range/int_range.py +348 -0
  96. polars/functions/range/linear_space.py +311 -0
  97. polars/functions/range/time_range.py +287 -0
  98. polars/functions/repeat.py +301 -0
  99. polars/functions/whenthen.py +353 -0
  100. polars/interchange/__init__.py +10 -0
  101. polars/interchange/buffer.py +77 -0
  102. polars/interchange/column.py +190 -0
  103. polars/interchange/dataframe.py +230 -0
  104. polars/interchange/from_dataframe.py +328 -0
  105. polars/interchange/protocol.py +303 -0
  106. polars/interchange/utils.py +170 -0
  107. polars/io/__init__.py +64 -0
  108. polars/io/_utils.py +317 -0
  109. polars/io/avro.py +49 -0
  110. polars/io/clipboard.py +36 -0
  111. polars/io/cloud/__init__.py +17 -0
  112. polars/io/cloud/_utils.py +80 -0
  113. polars/io/cloud/credential_provider/__init__.py +17 -0
  114. polars/io/cloud/credential_provider/_builder.py +520 -0
  115. polars/io/cloud/credential_provider/_providers.py +618 -0
  116. polars/io/csv/__init__.py +9 -0
  117. polars/io/csv/_utils.py +38 -0
  118. polars/io/csv/batched_reader.py +142 -0
  119. polars/io/csv/functions.py +1495 -0
  120. polars/io/database/__init__.py +6 -0
  121. polars/io/database/_arrow_registry.py +70 -0
  122. polars/io/database/_cursor_proxies.py +147 -0
  123. polars/io/database/_executor.py +578 -0
  124. polars/io/database/_inference.py +314 -0
  125. polars/io/database/_utils.py +144 -0
  126. polars/io/database/functions.py +516 -0
  127. polars/io/delta.py +499 -0
  128. polars/io/iceberg/__init__.py +3 -0
  129. polars/io/iceberg/_utils.py +697 -0
  130. polars/io/iceberg/dataset.py +556 -0
  131. polars/io/iceberg/functions.py +151 -0
  132. polars/io/ipc/__init__.py +8 -0
  133. polars/io/ipc/functions.py +514 -0
  134. polars/io/json/__init__.py +3 -0
  135. polars/io/json/read.py +101 -0
  136. polars/io/ndjson.py +332 -0
  137. polars/io/parquet/__init__.py +17 -0
  138. polars/io/parquet/field_overwrites.py +140 -0
  139. polars/io/parquet/functions.py +722 -0
  140. polars/io/partition.py +491 -0
  141. polars/io/plugins.py +187 -0
  142. polars/io/pyarrow_dataset/__init__.py +5 -0
  143. polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
  144. polars/io/pyarrow_dataset/functions.py +79 -0
  145. polars/io/scan_options/__init__.py +5 -0
  146. polars/io/scan_options/_options.py +59 -0
  147. polars/io/scan_options/cast_options.py +126 -0
  148. polars/io/spreadsheet/__init__.py +6 -0
  149. polars/io/spreadsheet/_utils.py +52 -0
  150. polars/io/spreadsheet/_write_utils.py +647 -0
  151. polars/io/spreadsheet/functions.py +1323 -0
  152. polars/lazyframe/__init__.py +9 -0
  153. polars/lazyframe/engine_config.py +61 -0
  154. polars/lazyframe/frame.py +8564 -0
  155. polars/lazyframe/group_by.py +669 -0
  156. polars/lazyframe/in_process.py +42 -0
  157. polars/lazyframe/opt_flags.py +333 -0
  158. polars/meta/__init__.py +14 -0
  159. polars/meta/build.py +33 -0
  160. polars/meta/index_type.py +27 -0
  161. polars/meta/thread_pool.py +50 -0
  162. polars/meta/versions.py +120 -0
  163. polars/ml/__init__.py +0 -0
  164. polars/ml/torch.py +213 -0
  165. polars/ml/utilities.py +30 -0
  166. polars/plugins.py +155 -0
  167. polars/py.typed +0 -0
  168. polars/pyproject.toml +96 -0
  169. polars/schema.py +265 -0
  170. polars/selectors.py +3117 -0
  171. polars/series/__init__.py +5 -0
  172. polars/series/array.py +776 -0
  173. polars/series/binary.py +254 -0
  174. polars/series/categorical.py +246 -0
  175. polars/series/datetime.py +2275 -0
  176. polars/series/list.py +1087 -0
  177. polars/series/plotting.py +191 -0
  178. polars/series/series.py +9197 -0
  179. polars/series/string.py +2367 -0
  180. polars/series/struct.py +154 -0
  181. polars/series/utils.py +191 -0
  182. polars/sql/__init__.py +7 -0
  183. polars/sql/context.py +677 -0
  184. polars/sql/functions.py +139 -0
  185. polars/string_cache.py +185 -0
  186. polars/testing/__init__.py +13 -0
  187. polars/testing/asserts/__init__.py +9 -0
  188. polars/testing/asserts/frame.py +231 -0
  189. polars/testing/asserts/series.py +219 -0
  190. polars/testing/asserts/utils.py +12 -0
  191. polars/testing/parametric/__init__.py +33 -0
  192. polars/testing/parametric/profiles.py +107 -0
  193. polars/testing/parametric/strategies/__init__.py +22 -0
  194. polars/testing/parametric/strategies/_utils.py +14 -0
  195. polars/testing/parametric/strategies/core.py +615 -0
  196. polars/testing/parametric/strategies/data.py +452 -0
  197. polars/testing/parametric/strategies/dtype.py +436 -0
  198. polars/testing/parametric/strategies/legacy.py +169 -0
  199. polars/type_aliases.py +24 -0
  200. polars_runtime_compat-1.34.0b2.dist-info/METADATA +190 -0
  201. polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
  202. polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
  203. polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +20 -0
@@ -0,0 +1,457 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Sequence
4
+ from typing import TYPE_CHECKING, Any, NoReturn, overload
5
+
6
+ import polars._reexport as pl
7
+ import polars.functions as F
8
+ from polars._dependencies import _check_for_numpy
9
+ from polars._dependencies import numpy as np
10
+ from polars._utils.constants import U32_MAX
11
+ from polars._utils.slice import PolarsSlice
12
+ from polars._utils.various import qualified_type_name, range_to_slice
13
+ from polars.datatypes.classes import (
14
+ Boolean,
15
+ Int8,
16
+ Int16,
17
+ Int32,
18
+ Int64,
19
+ String,
20
+ UInt32,
21
+ UInt64,
22
+ )
23
+ from polars.meta.index_type import get_index_type
24
+
25
+ if TYPE_CHECKING:
26
+ from collections.abc import Iterable
27
+
28
+ from polars import DataFrame, Series
29
+ from polars._typing import (
30
+ MultiColSelector,
31
+ MultiIndexSelector,
32
+ SingleColSelector,
33
+ SingleIndexSelector,
34
+ )
35
+
36
+ __all__ = [
37
+ "get_df_item_by_key",
38
+ "get_series_item_by_key",
39
+ ]
40
+
41
+
42
+ @overload
43
+ def get_series_item_by_key(s: Series, key: SingleIndexSelector) -> Any: ...
44
+
45
+
46
+ @overload
47
+ def get_series_item_by_key(s: Series, key: MultiIndexSelector) -> Series: ...
48
+
49
+
50
+ def get_series_item_by_key(
51
+ s: Series, key: SingleIndexSelector | MultiIndexSelector
52
+ ) -> Any | Series:
53
+ """Select one or more elements from the Series."""
54
+ if isinstance(key, int):
55
+ return s._s.get_index_signed(key)
56
+
57
+ elif isinstance(key, slice):
58
+ return _select_elements_by_slice(s, key)
59
+
60
+ elif isinstance(key, range):
61
+ key = range_to_slice(key)
62
+ return _select_elements_by_slice(s, key)
63
+
64
+ elif isinstance(key, Sequence):
65
+ if not key:
66
+ return s.clear()
67
+
68
+ first = key[0]
69
+ if isinstance(first, bool):
70
+ _raise_on_boolean_mask()
71
+
72
+ try:
73
+ indices = pl.Series("", key, dtype=Int64)
74
+ except TypeError:
75
+ msg = f"cannot select elements using Sequence with elements of type {qualified_type_name(first)!r}"
76
+ raise TypeError(msg) from None
77
+
78
+ indices = _convert_series_to_indices(indices, s.len())
79
+ return _select_elements_by_index(s, indices)
80
+
81
+ elif isinstance(key, pl.Series):
82
+ indices = _convert_series_to_indices(key, s.len())
83
+ return _select_elements_by_index(s, indices)
84
+
85
+ elif _check_for_numpy(key) and isinstance(key, np.ndarray):
86
+ indices = _convert_np_ndarray_to_indices(key, s.len())
87
+ return _select_elements_by_index(s, indices)
88
+
89
+ msg = f"cannot select elements using key of type {qualified_type_name(key)!r}: {key!r}"
90
+ raise TypeError(msg)
91
+
92
+
93
+ def _select_elements_by_slice(s: Series, key: slice) -> Series:
94
+ return PolarsSlice(s).apply(key) # type: ignore[return-value]
95
+
96
+
97
+ def _select_elements_by_index(s: Series, key: Series) -> Series:
98
+ return s._from_pyseries(s._s.gather_with_series(key._s))
99
+
100
+
101
+ # `str` overlaps with `Sequence[str]`
102
+ # We can ignore this but we must keep this overload ordering
103
+ @overload
104
+ def get_df_item_by_key(
105
+ df: DataFrame, key: tuple[SingleIndexSelector, SingleColSelector]
106
+ ) -> Any: ...
107
+
108
+
109
+ @overload
110
+ def get_df_item_by_key( # type: ignore[overload-overlap]
111
+ df: DataFrame, key: str | tuple[MultiIndexSelector, SingleColSelector]
112
+ ) -> Series: ...
113
+
114
+
115
+ @overload
116
+ def get_df_item_by_key(
117
+ df: DataFrame,
118
+ key: (
119
+ SingleIndexSelector
120
+ | MultiIndexSelector
121
+ | MultiColSelector
122
+ | tuple[SingleIndexSelector, MultiColSelector]
123
+ | tuple[MultiIndexSelector, MultiColSelector]
124
+ ),
125
+ ) -> DataFrame: ...
126
+
127
+
128
+ def get_df_item_by_key(
129
+ df: DataFrame,
130
+ key: (
131
+ SingleIndexSelector
132
+ | SingleColSelector
133
+ | MultiColSelector
134
+ | MultiIndexSelector
135
+ | tuple[SingleIndexSelector, SingleColSelector]
136
+ | tuple[SingleIndexSelector, MultiColSelector]
137
+ | tuple[MultiIndexSelector, SingleColSelector]
138
+ | tuple[MultiIndexSelector, MultiColSelector]
139
+ ),
140
+ ) -> DataFrame | Series | Any:
141
+ """Get part of the DataFrame as a new DataFrame, Series, or scalar."""
142
+ # Two inputs, e.g. df[1, 2:5]
143
+ if isinstance(key, tuple) and len(key) == 2:
144
+ row_key, col_key = key
145
+
146
+ # Support df[True, False] and df["a", "b"] as these are not ambiguous
147
+ if isinstance(row_key, (bool, str)):
148
+ return _select_columns(df, key) # type: ignore[arg-type]
149
+
150
+ selection = _select_columns(df, col_key)
151
+
152
+ if selection.is_empty():
153
+ return selection
154
+ elif isinstance(selection, pl.Series):
155
+ return get_series_item_by_key(selection, row_key)
156
+ else:
157
+ return _select_rows(selection, row_key)
158
+
159
+ # Single string input, e.g. df["a"]
160
+ if isinstance(key, str):
161
+ # This case is required because empty strings are otherwise treated
162
+ # as an empty Sequence in `_select_rows`
163
+ return df.get_column(key)
164
+
165
+ # Single input - df[1] - or multiple inputs - df["a", "b", "c"]
166
+ try:
167
+ return _select_rows(df, key) # type: ignore[arg-type]
168
+ except TypeError:
169
+ return _select_columns(df, key)
170
+
171
+
172
+ # `str` overlaps with `Sequence[str]`
173
+ # We can ignore this but we must keep this overload ordering
174
+ @overload
175
+ def _select_columns(df: DataFrame, key: SingleColSelector) -> Series: ... # type: ignore[overload-overlap]
176
+
177
+
178
+ @overload
179
+ def _select_columns(df: DataFrame, key: MultiColSelector) -> DataFrame: ...
180
+
181
+
182
+ def _select_columns(
183
+ df: DataFrame, key: SingleColSelector | MultiColSelector
184
+ ) -> DataFrame | Series:
185
+ """Select one or more columns from the DataFrame."""
186
+ if isinstance(key, int):
187
+ return df.to_series(key)
188
+
189
+ elif isinstance(key, str):
190
+ return df.get_column(key)
191
+
192
+ elif isinstance(key, slice):
193
+ start, stop, step = key.start, key.stop, key.step
194
+ # Fast path for common case: df[x, :]
195
+ if start is None and stop is None and step is None:
196
+ return df
197
+ if isinstance(start, str):
198
+ start = df.get_column_index(start)
199
+ if isinstance(stop, str):
200
+ stop = df.get_column_index(stop) + 1
201
+ int_slice = slice(start, stop, step)
202
+ rng = range(df.width)[int_slice]
203
+ return _select_columns_by_index(df, rng)
204
+
205
+ elif isinstance(key, range):
206
+ return _select_columns_by_index(df, key)
207
+
208
+ elif isinstance(key, Sequence):
209
+ if not key:
210
+ return df.__class__()
211
+ first = key[0]
212
+ if isinstance(first, bool):
213
+ return _select_columns_by_mask(df, key) # type: ignore[arg-type]
214
+ elif isinstance(first, int):
215
+ return _select_columns_by_index(df, key) # type: ignore[arg-type]
216
+ elif isinstance(first, str):
217
+ return _select_columns_by_name(df, key) # type: ignore[arg-type]
218
+ else:
219
+ msg = f"cannot select columns using Sequence with elements of type {qualified_type_name(first)!r}"
220
+ raise TypeError(msg)
221
+
222
+ elif isinstance(key, pl.Series):
223
+ if key.is_empty():
224
+ return df.__class__()
225
+ dtype = key.dtype
226
+ if dtype == String:
227
+ return _select_columns_by_name(df, key)
228
+ elif dtype.is_integer():
229
+ return _select_columns_by_index(df, key)
230
+ elif dtype == Boolean:
231
+ return _select_columns_by_mask(df, key)
232
+ else:
233
+ msg = f"cannot select columns using Series of type {dtype}"
234
+ raise TypeError(msg)
235
+
236
+ elif _check_for_numpy(key) and isinstance(key, np.ndarray):
237
+ if key.ndim == 0:
238
+ key = np.atleast_1d(key)
239
+ elif key.ndim != 1:
240
+ msg = "multi-dimensional NumPy arrays not supported as index"
241
+ raise TypeError(msg)
242
+
243
+ if len(key) == 0:
244
+ return df.__class__()
245
+
246
+ dtype_kind = key.dtype.kind
247
+ if dtype_kind in ("i", "u"):
248
+ return _select_columns_by_index(df, key)
249
+ elif dtype_kind == "b":
250
+ return _select_columns_by_mask(df, key)
251
+ elif isinstance(key[0], str):
252
+ return _select_columns_by_name(df, key)
253
+ else:
254
+ msg = f"cannot select columns using NumPy array of type {key.dtype}"
255
+ raise TypeError(msg)
256
+
257
+ msg = (
258
+ f"cannot select columns using key of type {qualified_type_name(key)!r}: {key!r}"
259
+ )
260
+ raise TypeError(msg)
261
+
262
+
263
+ def _select_columns_by_index(df: DataFrame, key: Iterable[int]) -> DataFrame:
264
+ series = [df.to_series(i) for i in key]
265
+ return df.__class__(series)
266
+
267
+
268
+ def _select_columns_by_name(df: DataFrame, key: Iterable[str]) -> DataFrame:
269
+ return df._from_pydf(df._df.select(list(key)))
270
+
271
+
272
+ def _select_columns_by_mask(
273
+ df: DataFrame, key: Sequence[bool] | Series | np.ndarray[Any, Any]
274
+ ) -> DataFrame:
275
+ if len(key) != df.width:
276
+ msg = f"expected {df.width} values when selecting columns by boolean mask, got {len(key)}"
277
+ raise ValueError(msg)
278
+
279
+ indices = (i for i, val in enumerate(key) if val)
280
+ return _select_columns_by_index(df, indices)
281
+
282
+
283
+ @overload
284
+ def _select_rows(df: DataFrame, key: SingleIndexSelector) -> Series: ...
285
+
286
+
287
+ @overload
288
+ def _select_rows(df: DataFrame, key: MultiIndexSelector) -> DataFrame: ...
289
+
290
+
291
+ def _select_rows(
292
+ df: DataFrame, key: SingleIndexSelector | MultiIndexSelector
293
+ ) -> DataFrame | Series:
294
+ """Select one or more rows from the DataFrame."""
295
+ if isinstance(key, int):
296
+ num_rows = df.height
297
+ if (key >= num_rows) or (key < -num_rows):
298
+ msg = f"index {key} is out of bounds for DataFrame of height {num_rows}"
299
+ raise IndexError(msg)
300
+ return df.slice(key, 1)
301
+
302
+ if isinstance(key, slice):
303
+ return _select_rows_by_slice(df, key)
304
+
305
+ elif isinstance(key, range):
306
+ key = range_to_slice(key)
307
+ return _select_rows_by_slice(df, key)
308
+
309
+ elif isinstance(key, Sequence):
310
+ if not key:
311
+ return df.clear()
312
+ if isinstance(key[0], bool):
313
+ _raise_on_boolean_mask()
314
+ s = pl.Series("", key, dtype=Int64)
315
+ indices = _convert_series_to_indices(s, df.height)
316
+ return _select_rows_by_index(df, indices)
317
+
318
+ elif isinstance(key, pl.Series):
319
+ indices = _convert_series_to_indices(key, df.height)
320
+ return _select_rows_by_index(df, indices)
321
+
322
+ elif _check_for_numpy(key) and isinstance(key, np.ndarray):
323
+ indices = _convert_np_ndarray_to_indices(key, df.height)
324
+ return _select_rows_by_index(df, indices)
325
+
326
+ else:
327
+ msg = f"cannot select rows using key of type {qualified_type_name(key)!r}: {key!r}"
328
+ raise TypeError(msg)
329
+
330
+
331
+ def _select_rows_by_slice(df: DataFrame, key: slice) -> DataFrame:
332
+ return PolarsSlice(df).apply(key) # type: ignore[return-value]
333
+
334
+
335
+ def _select_rows_by_index(df: DataFrame, key: Series) -> DataFrame:
336
+ return df._from_pydf(df._df.gather_with_series(key._s))
337
+
338
+
339
+ # UTILS
340
+
341
+
342
+ def _convert_series_to_indices(s: Series, size: int) -> Series:
343
+ """Convert a Series to indices, taking into account negative values."""
344
+ # Unsigned or signed Series (ordered from fastest to slowest).
345
+ # - pl.UInt32 (polars) or pl.UInt64 (polars_u64_idx) Series indexes.
346
+ # - Other unsigned Series indexes are converted to pl.UInt32 (polars)
347
+ # or pl.UInt64 (polars_u64_idx).
348
+ # - Signed Series indexes are converted pl.UInt32 (polars) or
349
+ # pl.UInt64 (polars_u64_idx) after negative indexes are converted
350
+ # to absolute indexes.
351
+
352
+ # pl.UInt32 (polars) or pl.UInt64 (polars_u64_idx).
353
+ idx_type = get_index_type()
354
+
355
+ if s.dtype == idx_type:
356
+ return s
357
+
358
+ if not s.dtype.is_integer():
359
+ if s.dtype == Boolean:
360
+ _raise_on_boolean_mask()
361
+ else:
362
+ msg = f"cannot treat Series of type {s.dtype} as indices"
363
+ raise TypeError(msg)
364
+
365
+ if s.len() == 0:
366
+ return pl.Series(s.name, [], dtype=idx_type)
367
+
368
+ if idx_type == UInt32:
369
+ if s.dtype in {Int64, UInt64} and s.max() >= U32_MAX: # type: ignore[operator]
370
+ msg = "index positions should be smaller than 2^32"
371
+ raise ValueError(msg)
372
+ if s.dtype == Int64 and s.min() < -U32_MAX: # type: ignore[operator]
373
+ msg = "index positions should be greater than or equal to -2^32"
374
+ raise ValueError(msg)
375
+
376
+ if s.dtype.is_signed_integer():
377
+ if s.min() < 0: # type: ignore[operator]
378
+ if idx_type == UInt32:
379
+ idxs = s.cast(Int32) if s.dtype in {Int8, Int16} else s
380
+ else:
381
+ idxs = s.cast(Int64) if s.dtype in {Int8, Int16, Int32} else s
382
+
383
+ # Update negative indexes to absolute indexes.
384
+ return (
385
+ idxs.to_frame()
386
+ .select(
387
+ F.when(F.col(idxs.name) < 0)
388
+ .then(size + F.col(idxs.name))
389
+ .otherwise(F.col(idxs.name))
390
+ .cast(idx_type)
391
+ )
392
+ .to_series(0)
393
+ )
394
+
395
+ return s.cast(idx_type)
396
+
397
+
398
+ def _convert_np_ndarray_to_indices(arr: np.ndarray[Any, Any], size: int) -> Series:
399
+ """Convert a NumPy ndarray to indices, taking into account negative values."""
400
+ # Unsigned or signed Numpy array (ordered from fastest to slowest).
401
+ # - np.uint32 (polars) or np.uint64 (polars_u64_idx) numpy array
402
+ # indexes.
403
+ # - Other unsigned numpy array indexes are converted to pl.UInt32
404
+ # (polars) or pl.UInt64 (polars_u64_idx).
405
+ # - Signed numpy array indexes are converted pl.UInt32 (polars) or
406
+ # pl.UInt64 (polars_u64_idx) after negative indexes are converted
407
+ # to absolute indexes.
408
+ if arr.ndim == 0:
409
+ arr = np.atleast_1d(arr)
410
+ if arr.ndim != 1:
411
+ msg = "only 1D NumPy arrays can be treated as indices"
412
+ raise TypeError(msg)
413
+
414
+ idx_type = get_index_type()
415
+
416
+ if len(arr) == 0:
417
+ return pl.Series("", [], dtype=idx_type)
418
+
419
+ # Numpy array with signed or unsigned integers.
420
+ if arr.dtype.kind not in ("i", "u"):
421
+ if arr.dtype.kind == "b":
422
+ _raise_on_boolean_mask()
423
+ else:
424
+ msg = f"cannot treat NumPy array of type {arr.dtype} as indices"
425
+ raise TypeError(msg)
426
+
427
+ if idx_type == UInt32:
428
+ if arr.dtype in {np.int64, np.uint64} and arr.max() >= U32_MAX:
429
+ msg = "index positions should be smaller than 2^32"
430
+ raise ValueError(msg)
431
+ if arr.dtype == np.int64 and arr.min() < -U32_MAX:
432
+ msg = "index positions should be greater than or equal to -2^32"
433
+ raise ValueError(msg)
434
+
435
+ if arr.dtype.kind == "i" and arr.min() < 0:
436
+ if idx_type == UInt32:
437
+ if arr.dtype in (np.int8, np.int16):
438
+ arr = arr.astype(np.int32)
439
+ else:
440
+ if arr.dtype in (np.int8, np.int16, np.int32):
441
+ arr = arr.astype(np.int64)
442
+
443
+ # Update negative indexes to absolute indexes.
444
+ arr = np.where(arr < 0, size + arr, arr)
445
+
446
+ # numpy conversion is much faster
447
+ arr = arr.astype(np.uint32) if idx_type == UInt32 else arr.astype(np.uint64)
448
+
449
+ return pl.Series("", arr, dtype=idx_type)
450
+
451
+
452
+ def _raise_on_boolean_mask() -> NoReturn:
453
+ msg = (
454
+ "selecting rows by passing a boolean mask to `__getitem__` is not supported"
455
+ "\n\nHint: Use the `filter` method instead."
456
+ )
457
+ raise TypeError(msg)
@@ -0,0 +1,11 @@
1
+ import os
2
+ import sys
3
+ from typing import Any
4
+
5
+
6
+ def verbose() -> bool:
7
+ return os.getenv("POLARS_VERBOSE") == "1"
8
+
9
+
10
+ def eprint(*a: Any, **kw: Any) -> None:
11
+ return print(*a, file=sys.stderr, **kw)