polars-runtime-compat 1.34.0b3__cp39-abi3-win_amd64.whl → 1.34.0b5__cp39-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (204) hide show
  1. _polars_runtime_compat/_polars_runtime_compat.pyd +0 -0
  2. polars_runtime_compat-1.34.0b5.dist-info/METADATA +35 -0
  3. polars_runtime_compat-1.34.0b5.dist-info/RECORD +6 -0
  4. polars/__init__.py +0 -528
  5. polars/_cpu_check.py +0 -265
  6. polars/_dependencies.py +0 -355
  7. polars/_plr.py +0 -99
  8. polars/_plr.pyi +0 -2496
  9. polars/_reexport.py +0 -23
  10. polars/_typing.py +0 -478
  11. polars/_utils/__init__.py +0 -37
  12. polars/_utils/async_.py +0 -102
  13. polars/_utils/cache.py +0 -176
  14. polars/_utils/cloud.py +0 -40
  15. polars/_utils/constants.py +0 -29
  16. polars/_utils/construction/__init__.py +0 -46
  17. polars/_utils/construction/dataframe.py +0 -1397
  18. polars/_utils/construction/other.py +0 -72
  19. polars/_utils/construction/series.py +0 -560
  20. polars/_utils/construction/utils.py +0 -118
  21. polars/_utils/convert.py +0 -224
  22. polars/_utils/deprecation.py +0 -406
  23. polars/_utils/getitem.py +0 -457
  24. polars/_utils/logging.py +0 -11
  25. polars/_utils/nest_asyncio.py +0 -264
  26. polars/_utils/parquet.py +0 -15
  27. polars/_utils/parse/__init__.py +0 -12
  28. polars/_utils/parse/expr.py +0 -242
  29. polars/_utils/polars_version.py +0 -19
  30. polars/_utils/pycapsule.py +0 -53
  31. polars/_utils/scan.py +0 -27
  32. polars/_utils/serde.py +0 -63
  33. polars/_utils/slice.py +0 -215
  34. polars/_utils/udfs.py +0 -1251
  35. polars/_utils/unstable.py +0 -63
  36. polars/_utils/various.py +0 -782
  37. polars/_utils/wrap.py +0 -25
  38. polars/api.py +0 -370
  39. polars/catalog/__init__.py +0 -0
  40. polars/catalog/unity/__init__.py +0 -19
  41. polars/catalog/unity/client.py +0 -733
  42. polars/catalog/unity/models.py +0 -152
  43. polars/config.py +0 -1571
  44. polars/convert/__init__.py +0 -25
  45. polars/convert/general.py +0 -1046
  46. polars/convert/normalize.py +0 -261
  47. polars/dataframe/__init__.py +0 -5
  48. polars/dataframe/_html.py +0 -186
  49. polars/dataframe/frame.py +0 -12582
  50. polars/dataframe/group_by.py +0 -1067
  51. polars/dataframe/plotting.py +0 -257
  52. polars/datatype_expr/__init__.py +0 -5
  53. polars/datatype_expr/array.py +0 -56
  54. polars/datatype_expr/datatype_expr.py +0 -304
  55. polars/datatype_expr/list.py +0 -18
  56. polars/datatype_expr/struct.py +0 -69
  57. polars/datatypes/__init__.py +0 -122
  58. polars/datatypes/_parse.py +0 -195
  59. polars/datatypes/_utils.py +0 -48
  60. polars/datatypes/classes.py +0 -1213
  61. polars/datatypes/constants.py +0 -11
  62. polars/datatypes/constructor.py +0 -172
  63. polars/datatypes/convert.py +0 -366
  64. polars/datatypes/group.py +0 -130
  65. polars/exceptions.py +0 -230
  66. polars/expr/__init__.py +0 -7
  67. polars/expr/array.py +0 -964
  68. polars/expr/binary.py +0 -346
  69. polars/expr/categorical.py +0 -306
  70. polars/expr/datetime.py +0 -2620
  71. polars/expr/expr.py +0 -11272
  72. polars/expr/list.py +0 -1408
  73. polars/expr/meta.py +0 -444
  74. polars/expr/name.py +0 -321
  75. polars/expr/string.py +0 -3045
  76. polars/expr/struct.py +0 -357
  77. polars/expr/whenthen.py +0 -185
  78. polars/functions/__init__.py +0 -193
  79. polars/functions/aggregation/__init__.py +0 -33
  80. polars/functions/aggregation/horizontal.py +0 -298
  81. polars/functions/aggregation/vertical.py +0 -341
  82. polars/functions/as_datatype.py +0 -848
  83. polars/functions/business.py +0 -138
  84. polars/functions/col.py +0 -384
  85. polars/functions/datatype.py +0 -121
  86. polars/functions/eager.py +0 -524
  87. polars/functions/escape_regex.py +0 -29
  88. polars/functions/lazy.py +0 -2751
  89. polars/functions/len.py +0 -68
  90. polars/functions/lit.py +0 -210
  91. polars/functions/random.py +0 -22
  92. polars/functions/range/__init__.py +0 -19
  93. polars/functions/range/_utils.py +0 -15
  94. polars/functions/range/date_range.py +0 -303
  95. polars/functions/range/datetime_range.py +0 -370
  96. polars/functions/range/int_range.py +0 -348
  97. polars/functions/range/linear_space.py +0 -311
  98. polars/functions/range/time_range.py +0 -287
  99. polars/functions/repeat.py +0 -301
  100. polars/functions/whenthen.py +0 -353
  101. polars/interchange/__init__.py +0 -10
  102. polars/interchange/buffer.py +0 -77
  103. polars/interchange/column.py +0 -190
  104. polars/interchange/dataframe.py +0 -230
  105. polars/interchange/from_dataframe.py +0 -328
  106. polars/interchange/protocol.py +0 -303
  107. polars/interchange/utils.py +0 -170
  108. polars/io/__init__.py +0 -64
  109. polars/io/_utils.py +0 -317
  110. polars/io/avro.py +0 -49
  111. polars/io/clipboard.py +0 -36
  112. polars/io/cloud/__init__.py +0 -17
  113. polars/io/cloud/_utils.py +0 -80
  114. polars/io/cloud/credential_provider/__init__.py +0 -17
  115. polars/io/cloud/credential_provider/_builder.py +0 -520
  116. polars/io/cloud/credential_provider/_providers.py +0 -618
  117. polars/io/csv/__init__.py +0 -9
  118. polars/io/csv/_utils.py +0 -38
  119. polars/io/csv/batched_reader.py +0 -142
  120. polars/io/csv/functions.py +0 -1495
  121. polars/io/database/__init__.py +0 -6
  122. polars/io/database/_arrow_registry.py +0 -70
  123. polars/io/database/_cursor_proxies.py +0 -147
  124. polars/io/database/_executor.py +0 -578
  125. polars/io/database/_inference.py +0 -314
  126. polars/io/database/_utils.py +0 -144
  127. polars/io/database/functions.py +0 -516
  128. polars/io/delta.py +0 -499
  129. polars/io/iceberg/__init__.py +0 -3
  130. polars/io/iceberg/_utils.py +0 -697
  131. polars/io/iceberg/dataset.py +0 -556
  132. polars/io/iceberg/functions.py +0 -151
  133. polars/io/ipc/__init__.py +0 -8
  134. polars/io/ipc/functions.py +0 -514
  135. polars/io/json/__init__.py +0 -3
  136. polars/io/json/read.py +0 -101
  137. polars/io/ndjson.py +0 -332
  138. polars/io/parquet/__init__.py +0 -17
  139. polars/io/parquet/field_overwrites.py +0 -140
  140. polars/io/parquet/functions.py +0 -722
  141. polars/io/partition.py +0 -491
  142. polars/io/plugins.py +0 -187
  143. polars/io/pyarrow_dataset/__init__.py +0 -5
  144. polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
  145. polars/io/pyarrow_dataset/functions.py +0 -79
  146. polars/io/scan_options/__init__.py +0 -5
  147. polars/io/scan_options/_options.py +0 -59
  148. polars/io/scan_options/cast_options.py +0 -126
  149. polars/io/spreadsheet/__init__.py +0 -6
  150. polars/io/spreadsheet/_utils.py +0 -52
  151. polars/io/spreadsheet/_write_utils.py +0 -647
  152. polars/io/spreadsheet/functions.py +0 -1323
  153. polars/lazyframe/__init__.py +0 -9
  154. polars/lazyframe/engine_config.py +0 -61
  155. polars/lazyframe/frame.py +0 -8564
  156. polars/lazyframe/group_by.py +0 -669
  157. polars/lazyframe/in_process.py +0 -42
  158. polars/lazyframe/opt_flags.py +0 -333
  159. polars/meta/__init__.py +0 -14
  160. polars/meta/build.py +0 -33
  161. polars/meta/index_type.py +0 -27
  162. polars/meta/thread_pool.py +0 -50
  163. polars/meta/versions.py +0 -120
  164. polars/ml/__init__.py +0 -0
  165. polars/ml/torch.py +0 -213
  166. polars/ml/utilities.py +0 -30
  167. polars/plugins.py +0 -155
  168. polars/py.typed +0 -0
  169. polars/pyproject.toml +0 -103
  170. polars/schema.py +0 -265
  171. polars/selectors.py +0 -3117
  172. polars/series/__init__.py +0 -5
  173. polars/series/array.py +0 -776
  174. polars/series/binary.py +0 -254
  175. polars/series/categorical.py +0 -246
  176. polars/series/datetime.py +0 -2275
  177. polars/series/list.py +0 -1087
  178. polars/series/plotting.py +0 -191
  179. polars/series/series.py +0 -9197
  180. polars/series/string.py +0 -2367
  181. polars/series/struct.py +0 -154
  182. polars/series/utils.py +0 -191
  183. polars/sql/__init__.py +0 -7
  184. polars/sql/context.py +0 -677
  185. polars/sql/functions.py +0 -139
  186. polars/string_cache.py +0 -185
  187. polars/testing/__init__.py +0 -13
  188. polars/testing/asserts/__init__.py +0 -9
  189. polars/testing/asserts/frame.py +0 -231
  190. polars/testing/asserts/series.py +0 -219
  191. polars/testing/asserts/utils.py +0 -12
  192. polars/testing/parametric/__init__.py +0 -33
  193. polars/testing/parametric/profiles.py +0 -107
  194. polars/testing/parametric/strategies/__init__.py +0 -22
  195. polars/testing/parametric/strategies/_utils.py +0 -14
  196. polars/testing/parametric/strategies/core.py +0 -615
  197. polars/testing/parametric/strategies/data.py +0 -452
  198. polars/testing/parametric/strategies/dtype.py +0 -436
  199. polars/testing/parametric/strategies/legacy.py +0 -169
  200. polars/type_aliases.py +0 -24
  201. polars_runtime_compat-1.34.0b3.dist-info/METADATA +0 -190
  202. polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
  203. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/WHEEL +0 -0
  204. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/licenses/LICENSE +0 -0
@@ -1,77 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import TYPE_CHECKING
4
-
5
- from polars.interchange.protocol import (
6
- Buffer,
7
- CopyNotAllowedError,
8
- DlpackDeviceType,
9
- DtypeKind,
10
- )
11
- from polars.interchange.utils import polars_dtype_to_dtype
12
-
13
- if TYPE_CHECKING:
14
- from typing import NoReturn
15
-
16
- from polars import Series
17
-
18
-
19
- class PolarsBuffer(Buffer):
20
- """
21
- A buffer object backed by a Polars Series consisting of a single chunk.
22
-
23
- Parameters
24
- ----------
25
- data
26
- The Polars Series backing the buffer object.
27
- allow_copy
28
- Allow data to be copied during operations on this column. If set to `False`,
29
- a RuntimeError will be raised if data would be copied.
30
- """
31
-
32
- def __init__(self, data: Series, *, allow_copy: bool = True) -> None:
33
- if data.n_chunks() > 1:
34
- if not allow_copy:
35
- msg = "non-contiguous buffer must be made contiguous"
36
- raise CopyNotAllowedError(msg)
37
- data = data.rechunk()
38
-
39
- self._data = data
40
-
41
- @property
42
- def bufsize(self) -> int:
43
- """Buffer size in bytes."""
44
- dtype = polars_dtype_to_dtype(self._data.dtype)
45
-
46
- if dtype[0] == DtypeKind.BOOL:
47
- _, offset, length = self._data._get_buffer_info()
48
- n_bits = offset + length
49
- n_bytes, rest = divmod(n_bits, 8)
50
- # Round up to the nearest byte
51
- if rest == 0:
52
- return n_bytes
53
- else:
54
- return n_bytes + 1
55
-
56
- return self._data.len() * (dtype[1] // 8)
57
-
58
- @property
59
- def ptr(self) -> int:
60
- """Pointer to start of the buffer as an integer."""
61
- pointer, _, _ = self._data._get_buffer_info()
62
- return pointer
63
-
64
- def __dlpack__(self) -> NoReturn:
65
- """Represent this structure as DLPack interface."""
66
- msg = "__dlpack__"
67
- raise NotImplementedError(msg)
68
-
69
- def __dlpack_device__(self) -> tuple[DlpackDeviceType, None]:
70
- """Device type and device ID for where the data in the buffer resides."""
71
- return (DlpackDeviceType.CPU, None)
72
-
73
- def __repr__(self) -> str:
74
- bufsize = self.bufsize
75
- ptr = self.ptr
76
- device = self.__dlpack_device__()[0].name
77
- return f"PolarsBuffer(bufsize={bufsize}, ptr={ptr}, device={device!r})"
@@ -1,190 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import TYPE_CHECKING
4
-
5
- from polars.datatypes import Boolean, Categorical, Enum, String
6
- from polars.interchange.buffer import PolarsBuffer
7
- from polars.interchange.protocol import (
8
- Column,
9
- ColumnNullType,
10
- CopyNotAllowedError,
11
- DtypeKind,
12
- Endianness,
13
- )
14
- from polars.interchange.utils import polars_dtype_to_dtype
15
-
16
- if TYPE_CHECKING:
17
- from collections.abc import Iterator
18
- from typing import Any
19
-
20
- from polars import Series
21
- from polars.interchange.protocol import CategoricalDescription, ColumnBuffers, Dtype
22
-
23
-
24
- class PolarsColumn(Column):
25
- """
26
- A column object backed by a Polars Series.
27
-
28
- Parameters
29
- ----------
30
- column
31
- The Polars Series backing the column object.
32
- allow_copy
33
- Allow data to be copied during operations on this column. If set to `False`,
34
- a RuntimeError will be raised if data would be copied.
35
- """
36
-
37
- def __init__(self, column: Series, *, allow_copy: bool = True) -> None:
38
- self._col = column
39
- self._allow_copy = allow_copy
40
-
41
- def size(self) -> int:
42
- """Size of the column in elements."""
43
- return self._col.len()
44
-
45
- @property
46
- def offset(self) -> int:
47
- """Offset of the first element with respect to the start of the underlying buffer.""" # noqa: W505
48
- if self._col.dtype == Boolean:
49
- return self._col._get_buffer_info()[1]
50
- else:
51
- return 0
52
-
53
- @property
54
- def dtype(self) -> Dtype:
55
- """Data type of the column."""
56
- pl_dtype = self._col.dtype
57
- return polars_dtype_to_dtype(pl_dtype)
58
-
59
- @property
60
- def describe_categorical(self) -> CategoricalDescription:
61
- """
62
- Description of the categorical data type of the column.
63
-
64
- Raises
65
- ------
66
- TypeError
67
- If the data type of the column is not categorical.
68
- """
69
- dtype = self._col.dtype
70
- if dtype == Categorical:
71
- categories = self._col.cat.get_categories()
72
- is_ordered = False
73
- elif dtype == Enum:
74
- categories = dtype.categories # type: ignore[attr-defined]
75
- is_ordered = True
76
- else:
77
- msg = "`describe_categorical` only works on categorical columns"
78
- raise TypeError(msg)
79
-
80
- return {
81
- "is_ordered": is_ordered,
82
- "is_dictionary": True,
83
- "categories": PolarsColumn(categories, allow_copy=self._allow_copy),
84
- }
85
-
86
- @property
87
- def describe_null(self) -> tuple[ColumnNullType, int | None]:
88
- """Description of the null representation the column uses."""
89
- if self.null_count == 0:
90
- return ColumnNullType.NON_NULLABLE, None
91
- else:
92
- return ColumnNullType.USE_BITMASK, 0
93
-
94
- @property
95
- def null_count(self) -> int:
96
- """The number of null elements."""
97
- return self._col.null_count()
98
-
99
- @property
100
- def metadata(self) -> dict[str, Any]:
101
- """The metadata for the column."""
102
- return {}
103
-
104
- def num_chunks(self) -> int:
105
- """Return the number of chunks the column consists of."""
106
- return self._col.n_chunks()
107
-
108
- def get_chunks(self, n_chunks: int | None = None) -> Iterator[PolarsColumn]:
109
- """
110
- Return an iterator yielding the column chunks.
111
-
112
- Parameters
113
- ----------
114
- n_chunks
115
- The number of chunks to return. Must be a multiple of the number of chunks
116
- in the column.
117
-
118
- Notes
119
- -----
120
- When `n_chunks` is higher than the number of chunks in the column, a slice
121
- must be performed that is not on the chunk boundary. This will trigger some
122
- compute if the column contains null values or if the column is of data type
123
- boolean.
124
- """
125
- total_n_chunks = self.num_chunks()
126
- chunks = self._col.get_chunks()
127
-
128
- if (n_chunks is None) or (n_chunks == total_n_chunks):
129
- for chunk in chunks:
130
- yield PolarsColumn(chunk, allow_copy=self._allow_copy)
131
-
132
- elif (n_chunks <= 0) or (n_chunks % total_n_chunks != 0):
133
- msg = (
134
- "`n_chunks` must be a multiple of the number of chunks of this column"
135
- f" ({total_n_chunks})"
136
- )
137
- raise ValueError(msg)
138
-
139
- else:
140
- subchunks_per_chunk = n_chunks // total_n_chunks
141
- for chunk in chunks:
142
- size = len(chunk)
143
- step = size // subchunks_per_chunk
144
- if size % subchunks_per_chunk != 0:
145
- step += 1
146
- for start in range(0, step * subchunks_per_chunk, step):
147
- yield PolarsColumn(
148
- chunk[start : start + step], allow_copy=self._allow_copy
149
- )
150
-
151
- def get_buffers(self) -> ColumnBuffers:
152
- """Return a dictionary containing the underlying buffers."""
153
- dtype = self._col.dtype
154
-
155
- if dtype == String and not self._allow_copy:
156
- msg = "string buffers must be converted"
157
- raise CopyNotAllowedError(msg)
158
-
159
- buffers = self._col._get_buffers()
160
-
161
- return {
162
- "data": self._wrap_data_buffer(buffers["values"]),
163
- "validity": self._wrap_validity_buffer(buffers["validity"]),
164
- "offsets": self._wrap_offsets_buffer(buffers["offsets"]),
165
- }
166
-
167
- def _wrap_data_buffer(self, buffer: Series) -> tuple[PolarsBuffer, Dtype]:
168
- interchange_buffer = PolarsBuffer(buffer, allow_copy=self._allow_copy)
169
- dtype = polars_dtype_to_dtype(buffer.dtype)
170
- return interchange_buffer, dtype
171
-
172
- def _wrap_validity_buffer(
173
- self, buffer: Series | None
174
- ) -> tuple[PolarsBuffer, Dtype] | None:
175
- if buffer is None:
176
- return None
177
-
178
- interchange_buffer = PolarsBuffer(buffer, allow_copy=self._allow_copy)
179
- dtype = (DtypeKind.BOOL, 1, "b", Endianness.NATIVE)
180
- return interchange_buffer, dtype
181
-
182
- def _wrap_offsets_buffer(
183
- self, buffer: Series | None
184
- ) -> tuple[PolarsBuffer, Dtype] | None:
185
- if buffer is None:
186
- return None
187
-
188
- interchange_buffer = PolarsBuffer(buffer, allow_copy=self._allow_copy)
189
- dtype = (DtypeKind.INT, 64, "l", Endianness.NATIVE)
190
- return interchange_buffer, dtype
@@ -1,230 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from collections.abc import Sequence
4
- from itertools import accumulate
5
- from typing import TYPE_CHECKING
6
-
7
- from polars.interchange.column import PolarsColumn
8
- from polars.interchange.protocol import CopyNotAllowedError
9
- from polars.interchange.protocol import DataFrame as InterchangeDataFrame
10
-
11
- if TYPE_CHECKING:
12
- from collections.abc import Iterator
13
- from typing import Any
14
-
15
- from polars import DataFrame
16
-
17
-
18
- class PolarsDataFrame(InterchangeDataFrame):
19
- """
20
- A dataframe object backed by a Polars DataFrame.
21
-
22
- Parameters
23
- ----------
24
- df
25
- The Polars DataFrame backing the dataframe object.
26
- allow_copy
27
- Allow data to be copied during operations on this column. If set to `False`,
28
- a RuntimeError is raised if data would be copied.
29
- """
30
-
31
- version = 0
32
-
33
- def __init__(self, df: DataFrame, *, allow_copy: bool = True) -> None:
34
- self._df = df
35
- self._allow_copy = allow_copy
36
-
37
- def __dataframe__(
38
- self,
39
- nan_as_null: bool = False, # noqa: FBT001
40
- allow_copy: bool = True, # noqa: FBT001
41
- ) -> PolarsDataFrame:
42
- """
43
- Construct a new dataframe object, potentially changing the parameters.
44
-
45
- Parameters
46
- ----------
47
- nan_as_null
48
- Overwrite null values in the data with `NaN`.
49
-
50
- .. warning::
51
- This functionality has not been implemented and the parameter will be
52
- removed in a future version.
53
- Setting this to `True` will raise a `NotImplementedError`.
54
- allow_copy
55
- Allow memory to be copied to perform the conversion. If set to `False`,
56
- causes conversions that are not zero-copy to fail.
57
- """
58
- if nan_as_null:
59
- msg = (
60
- "functionality for `nan_as_null` has not been implemented and the"
61
- " parameter will be removed in a future version"
62
- "\n\nUse the default `nan_as_null=False`."
63
- )
64
- raise NotImplementedError(msg)
65
- return PolarsDataFrame(self._df, allow_copy=allow_copy)
66
-
67
- @property
68
- def metadata(self) -> dict[str, Any]:
69
- """The metadata for the dataframe."""
70
- return {}
71
-
72
- def num_columns(self) -> int:
73
- """Return the number of columns in the dataframe."""
74
- return self._df.width
75
-
76
- def num_rows(self) -> int:
77
- """Return the number of rows in the dataframe."""
78
- return self._df.height
79
-
80
- def num_chunks(self) -> int:
81
- """
82
- Return the number of chunks the dataframe consists of.
83
-
84
- It is possible for a Polars DataFrame to consist of columns with a varying
85
- number of chunks. This method returns the number of chunks of the first
86
- column.
87
-
88
- See Also
89
- --------
90
- polars.dataframe.frame.DataFrame.n_chunks
91
- """
92
- return self._df.n_chunks("first")
93
-
94
- def column_names(self) -> list[str]:
95
- """Return the column names."""
96
- return self._df.columns
97
-
98
- def get_column(self, i: int) -> PolarsColumn:
99
- """
100
- Return the column at the indicated position.
101
-
102
- Parameters
103
- ----------
104
- i
105
- Index of the column.
106
- """
107
- s = self._df.to_series(i)
108
- return PolarsColumn(s, allow_copy=self._allow_copy)
109
-
110
- def get_column_by_name(self, name: str) -> PolarsColumn:
111
- """
112
- Return the column with the given name.
113
-
114
- Parameters
115
- ----------
116
- name
117
- Name of the column.
118
- """
119
- s = self._df.get_column(name)
120
- return PolarsColumn(s, allow_copy=self._allow_copy)
121
-
122
- def get_columns(self) -> Iterator[PolarsColumn]:
123
- """Return an iterator yielding the columns."""
124
- for column in self._df.get_columns():
125
- yield PolarsColumn(column, allow_copy=self._allow_copy)
126
-
127
- def select_columns(self, indices: Sequence[int]) -> PolarsDataFrame:
128
- """
129
- Create a new dataframe by selecting a subset of columns by index.
130
-
131
- Parameters
132
- ----------
133
- indices
134
- Column indices
135
- """
136
- if not isinstance(indices, Sequence):
137
- msg = "`indices` is not a sequence"
138
- raise TypeError(msg)
139
- if not isinstance(indices, list):
140
- indices = list(indices)
141
-
142
- return PolarsDataFrame(
143
- self._df[:, indices],
144
- allow_copy=self._allow_copy,
145
- )
146
-
147
- def select_columns_by_name(self, names: Sequence[str]) -> PolarsDataFrame:
148
- """
149
- Create a new dataframe by selecting a subset of columns by name.
150
-
151
- Parameters
152
- ----------
153
- names
154
- Column names.
155
- """
156
- if not isinstance(names, Sequence):
157
- msg = "`names` is not a sequence"
158
- raise TypeError(msg)
159
-
160
- return PolarsDataFrame(
161
- self._df.select(names),
162
- allow_copy=self._allow_copy,
163
- )
164
-
165
- def get_chunks(self, n_chunks: int | None = None) -> Iterator[PolarsDataFrame]:
166
- """
167
- Return an iterator yielding the chunks of the dataframe.
168
-
169
- Parameters
170
- ----------
171
- n_chunks
172
- The number of chunks to return. Must be a multiple of the number of chunks
173
- in the dataframe. If set to `None` (default), returns all chunks.
174
-
175
- Notes
176
- -----
177
- When the columns in the dataframe are chunked unevenly, or when `n_chunks` is
178
- higher than the number of chunks in the dataframe, a slice must be performed
179
- that is not on the chunk boundary. This will trigger some compute for columns
180
- that contain null values and boolean columns.
181
- """
182
- total_n_chunks = self.num_chunks()
183
- chunks = self._get_chunks_from_col_chunks()
184
-
185
- if (n_chunks is None) or (n_chunks == total_n_chunks):
186
- for chunk in chunks:
187
- yield PolarsDataFrame(chunk, allow_copy=self._allow_copy)
188
-
189
- elif (n_chunks <= 0) or (n_chunks % total_n_chunks != 0):
190
- msg = (
191
- "`n_chunks` must be a multiple of the number of chunks of this"
192
- f" dataframe ({total_n_chunks})"
193
- )
194
- raise ValueError(msg)
195
-
196
- else:
197
- subchunks_per_chunk = n_chunks // total_n_chunks
198
- for chunk in chunks:
199
- size = len(chunk)
200
- step = size // subchunks_per_chunk
201
- if size % subchunks_per_chunk != 0:
202
- step += 1
203
- for start in range(0, step * subchunks_per_chunk, step):
204
- yield PolarsDataFrame(
205
- chunk[start : start + step, :],
206
- allow_copy=self._allow_copy,
207
- )
208
-
209
- def _get_chunks_from_col_chunks(self) -> Iterator[DataFrame]:
210
- """
211
- Return chunks of this dataframe according to the chunks of the first column.
212
-
213
- If columns are not all chunked identically, they will be rechunked like the
214
- first column. If copy is not allowed, this raises a RuntimeError.
215
- """
216
- col_chunks = self.get_column(0).get_chunks()
217
- chunk_sizes = [chunk.size() for chunk in col_chunks]
218
- starts = [0] + list(accumulate(chunk_sizes))
219
-
220
- for i in range(len(starts) - 1):
221
- start, end = starts[i : i + 2]
222
- chunk = self._df[start:end, :]
223
-
224
- if not all(x == 1 for x in chunk.n_chunks("all")):
225
- if not self._allow_copy:
226
- msg = "unevenly chunked columns must be rechunked"
227
- raise CopyNotAllowedError(msg)
228
- chunk = chunk.rechunk()
229
-
230
- yield chunk