cudf-polars-cu13 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cudf_polars/GIT_COMMIT +1 -1
- cudf_polars/VERSION +1 -1
- cudf_polars/callback.py +60 -15
- cudf_polars/containers/column.py +137 -77
- cudf_polars/containers/dataframe.py +123 -34
- cudf_polars/containers/datatype.py +134 -13
- cudf_polars/dsl/expr.py +0 -2
- cudf_polars/dsl/expressions/aggregation.py +80 -28
- cudf_polars/dsl/expressions/binaryop.py +34 -14
- cudf_polars/dsl/expressions/boolean.py +110 -37
- cudf_polars/dsl/expressions/datetime.py +59 -30
- cudf_polars/dsl/expressions/literal.py +11 -5
- cudf_polars/dsl/expressions/rolling.py +460 -119
- cudf_polars/dsl/expressions/selection.py +9 -8
- cudf_polars/dsl/expressions/slicing.py +1 -1
- cudf_polars/dsl/expressions/string.py +256 -114
- cudf_polars/dsl/expressions/struct.py +19 -7
- cudf_polars/dsl/expressions/ternary.py +33 -3
- cudf_polars/dsl/expressions/unary.py +126 -64
- cudf_polars/dsl/ir.py +1053 -350
- cudf_polars/dsl/to_ast.py +30 -13
- cudf_polars/dsl/tracing.py +194 -0
- cudf_polars/dsl/translate.py +307 -107
- cudf_polars/dsl/utils/aggregations.py +43 -30
- cudf_polars/dsl/utils/reshape.py +14 -2
- cudf_polars/dsl/utils/rolling.py +12 -8
- cudf_polars/dsl/utils/windows.py +35 -20
- cudf_polars/experimental/base.py +55 -2
- cudf_polars/experimental/benchmarks/pdsds.py +12 -126
- cudf_polars/experimental/benchmarks/pdsh.py +792 -2
- cudf_polars/experimental/benchmarks/utils.py +596 -39
- cudf_polars/experimental/dask_registers.py +47 -20
- cudf_polars/experimental/dispatch.py +9 -3
- cudf_polars/experimental/distinct.py +2 -0
- cudf_polars/experimental/explain.py +15 -2
- cudf_polars/experimental/expressions.py +30 -15
- cudf_polars/experimental/groupby.py +25 -4
- cudf_polars/experimental/io.py +156 -124
- cudf_polars/experimental/join.py +53 -23
- cudf_polars/experimental/parallel.py +68 -19
- cudf_polars/experimental/rapidsmpf/__init__.py +8 -0
- cudf_polars/experimental/rapidsmpf/collectives/__init__.py +9 -0
- cudf_polars/experimental/rapidsmpf/collectives/allgather.py +90 -0
- cudf_polars/experimental/rapidsmpf/collectives/common.py +96 -0
- cudf_polars/experimental/rapidsmpf/collectives/shuffle.py +253 -0
- cudf_polars/experimental/rapidsmpf/core.py +488 -0
- cudf_polars/experimental/rapidsmpf/dask.py +172 -0
- cudf_polars/experimental/rapidsmpf/dispatch.py +153 -0
- cudf_polars/experimental/rapidsmpf/io.py +696 -0
- cudf_polars/experimental/rapidsmpf/join.py +322 -0
- cudf_polars/experimental/rapidsmpf/lower.py +74 -0
- cudf_polars/experimental/rapidsmpf/nodes.py +735 -0
- cudf_polars/experimental/rapidsmpf/repartition.py +216 -0
- cudf_polars/experimental/rapidsmpf/union.py +115 -0
- cudf_polars/experimental/rapidsmpf/utils.py +374 -0
- cudf_polars/experimental/repartition.py +9 -2
- cudf_polars/experimental/select.py +177 -14
- cudf_polars/experimental/shuffle.py +46 -12
- cudf_polars/experimental/sort.py +100 -26
- cudf_polars/experimental/spilling.py +1 -1
- cudf_polars/experimental/statistics.py +24 -5
- cudf_polars/experimental/utils.py +25 -7
- cudf_polars/testing/asserts.py +13 -8
- cudf_polars/testing/io.py +2 -1
- cudf_polars/testing/plugin.py +93 -17
- cudf_polars/typing/__init__.py +86 -32
- cudf_polars/utils/config.py +473 -58
- cudf_polars/utils/cuda_stream.py +70 -0
- cudf_polars/utils/versions.py +5 -4
- cudf_polars_cu13-26.2.0.dist-info/METADATA +181 -0
- cudf_polars_cu13-26.2.0.dist-info/RECORD +108 -0
- {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/WHEEL +1 -1
- cudf_polars_cu13-25.10.0.dist-info/METADATA +0 -136
- cudf_polars_cu13-25.10.0.dist-info/RECORD +0 -92
- {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/licenses/LICENSE +0 -0
- {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/top_level.txt +0 -0
|
@@ -17,7 +17,7 @@ from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
|
|
|
17
17
|
if TYPE_CHECKING:
|
|
18
18
|
from typing_extensions import Self
|
|
19
19
|
|
|
20
|
-
from polars
|
|
20
|
+
from polars import polars # type: ignore[attr-defined]
|
|
21
21
|
|
|
22
22
|
from cudf_polars.containers import DataFrame, DataType
|
|
23
23
|
|
|
@@ -75,7 +75,7 @@ class TemporalFunction(Expr):
|
|
|
75
75
|
Year = auto()
|
|
76
76
|
|
|
77
77
|
@classmethod
|
|
78
|
-
def from_polars(cls, obj:
|
|
78
|
+
def from_polars(cls, obj: polars._expr_nodes.TemporalFunction) -> Self:
|
|
79
79
|
"""Convert from polars' `TemporalFunction`."""
|
|
80
80
|
try:
|
|
81
81
|
function, name = str(obj).split(".", maxsplit=1)
|
|
@@ -129,7 +129,7 @@ class TemporalFunction(Expr):
|
|
|
129
129
|
raise NotImplementedError(f"Temporal function {self.name}")
|
|
130
130
|
|
|
131
131
|
if self.name is TemporalFunction.Name.ToString and plc.traits.is_duration(
|
|
132
|
-
self.children[0].dtype.
|
|
132
|
+
self.children[0].dtype.plc_type
|
|
133
133
|
):
|
|
134
134
|
raise NotImplementedError("ToString is not supported on duration types")
|
|
135
135
|
|
|
@@ -140,13 +140,19 @@ class TemporalFunction(Expr):
|
|
|
140
140
|
columns = [child.evaluate(df, context=context) for child in self.children]
|
|
141
141
|
(column,) = columns
|
|
142
142
|
if self.name is TemporalFunction.Name.CastTimeUnit:
|
|
143
|
-
return Column(
|
|
143
|
+
return Column(
|
|
144
|
+
plc.unary.cast(column.obj, self.dtype.plc_type, stream=df.stream),
|
|
145
|
+
dtype=self.dtype,
|
|
146
|
+
)
|
|
144
147
|
if self.name == TemporalFunction.Name.ToString:
|
|
145
148
|
return Column(
|
|
146
149
|
plc.strings.convert.convert_datetime.from_timestamps(
|
|
147
150
|
column.obj,
|
|
148
151
|
self.options[0],
|
|
149
|
-
plc.Column.from_iterable_of_py(
|
|
152
|
+
plc.Column.from_iterable_of_py(
|
|
153
|
+
[], dtype=self.dtype.plc_type, stream=df.stream
|
|
154
|
+
),
|
|
155
|
+
stream=df.stream,
|
|
150
156
|
),
|
|
151
157
|
dtype=self.dtype,
|
|
152
158
|
)
|
|
@@ -156,10 +162,12 @@ class TemporalFunction(Expr):
|
|
|
156
162
|
column.obj,
|
|
157
163
|
format="%V",
|
|
158
164
|
input_strings_names=plc.Column.from_iterable_of_py(
|
|
159
|
-
[], dtype=plc.DataType(plc.TypeId.STRING)
|
|
165
|
+
[], dtype=plc.DataType(plc.TypeId.STRING), stream=df.stream
|
|
160
166
|
),
|
|
167
|
+
stream=df.stream,
|
|
161
168
|
),
|
|
162
|
-
self.dtype.
|
|
169
|
+
self.dtype.plc_type,
|
|
170
|
+
stream=df.stream,
|
|
163
171
|
)
|
|
164
172
|
return Column(result, dtype=self.dtype)
|
|
165
173
|
if self.name is TemporalFunction.Name.IsoYear:
|
|
@@ -168,97 +176,117 @@ class TemporalFunction(Expr):
|
|
|
168
176
|
column.obj,
|
|
169
177
|
format="%G",
|
|
170
178
|
input_strings_names=plc.Column.from_iterable_of_py(
|
|
171
|
-
[], dtype=plc.DataType(plc.TypeId.STRING)
|
|
179
|
+
[], dtype=plc.DataType(plc.TypeId.STRING), stream=df.stream
|
|
172
180
|
),
|
|
181
|
+
stream=df.stream,
|
|
173
182
|
),
|
|
174
|
-
self.dtype.
|
|
183
|
+
self.dtype.plc_type,
|
|
184
|
+
stream=df.stream,
|
|
175
185
|
)
|
|
176
186
|
return Column(result, dtype=self.dtype)
|
|
177
187
|
if self.name is TemporalFunction.Name.MonthStart:
|
|
178
|
-
ends = plc.datetime.last_day_of_month(column.obj)
|
|
179
|
-
days_to_subtract = plc.datetime.days_in_month(column.obj)
|
|
188
|
+
ends = plc.datetime.last_day_of_month(column.obj, stream=df.stream)
|
|
189
|
+
days_to_subtract = plc.datetime.days_in_month(column.obj, stream=df.stream)
|
|
180
190
|
# must subtract 1 to avoid rolling over to the previous month
|
|
181
191
|
days_to_subtract = plc.binaryop.binary_operation(
|
|
182
192
|
days_to_subtract,
|
|
183
|
-
plc.Scalar.from_py(1, plc.DataType(plc.TypeId.INT32)),
|
|
193
|
+
plc.Scalar.from_py(1, plc.DataType(plc.TypeId.INT32), stream=df.stream),
|
|
184
194
|
plc.binaryop.BinaryOperator.SUB,
|
|
185
195
|
plc.DataType(plc.TypeId.DURATION_DAYS),
|
|
196
|
+
stream=df.stream,
|
|
186
197
|
)
|
|
187
198
|
result = plc.binaryop.binary_operation(
|
|
188
199
|
ends,
|
|
189
200
|
days_to_subtract,
|
|
190
201
|
plc.binaryop.BinaryOperator.SUB,
|
|
191
|
-
self.dtype.
|
|
202
|
+
self.dtype.plc_type,
|
|
203
|
+
stream=df.stream,
|
|
192
204
|
)
|
|
193
205
|
|
|
194
206
|
return Column(result, dtype=self.dtype)
|
|
195
207
|
if self.name is TemporalFunction.Name.MonthEnd:
|
|
196
208
|
return Column(
|
|
197
209
|
plc.unary.cast(
|
|
198
|
-
plc.datetime.last_day_of_month(column.obj
|
|
210
|
+
plc.datetime.last_day_of_month(column.obj, stream=df.stream),
|
|
211
|
+
self.dtype.plc_type,
|
|
212
|
+
stream=df.stream,
|
|
199
213
|
),
|
|
200
214
|
dtype=self.dtype,
|
|
201
215
|
)
|
|
202
216
|
if self.name is TemporalFunction.Name.IsLeapYear:
|
|
203
217
|
return Column(
|
|
204
|
-
plc.datetime.is_leap_year(column.obj),
|
|
218
|
+
plc.datetime.is_leap_year(column.obj, stream=df.stream),
|
|
205
219
|
dtype=self.dtype,
|
|
206
220
|
)
|
|
207
221
|
if self.name is TemporalFunction.Name.OrdinalDay:
|
|
208
|
-
return Column(
|
|
222
|
+
return Column(
|
|
223
|
+
plc.datetime.day_of_year(column.obj, stream=df.stream), dtype=self.dtype
|
|
224
|
+
)
|
|
209
225
|
if self.name is TemporalFunction.Name.Microsecond:
|
|
210
226
|
millis = plc.datetime.extract_datetime_component(
|
|
211
|
-
column.obj, plc.datetime.DatetimeComponent.MILLISECOND
|
|
227
|
+
column.obj, plc.datetime.DatetimeComponent.MILLISECOND, stream=df.stream
|
|
212
228
|
)
|
|
213
229
|
micros = plc.datetime.extract_datetime_component(
|
|
214
|
-
column.obj, plc.datetime.DatetimeComponent.MICROSECOND
|
|
230
|
+
column.obj, plc.datetime.DatetimeComponent.MICROSECOND, stream=df.stream
|
|
215
231
|
)
|
|
216
232
|
millis_as_micros = plc.binaryop.binary_operation(
|
|
217
233
|
millis,
|
|
218
|
-
plc.Scalar.from_py(
|
|
234
|
+
plc.Scalar.from_py(
|
|
235
|
+
1_000, plc.DataType(plc.TypeId.INT32), stream=df.stream
|
|
236
|
+
),
|
|
219
237
|
plc.binaryop.BinaryOperator.MUL,
|
|
220
|
-
self.dtype.
|
|
238
|
+
self.dtype.plc_type,
|
|
239
|
+
stream=df.stream,
|
|
221
240
|
)
|
|
222
241
|
total_micros = plc.binaryop.binary_operation(
|
|
223
242
|
micros,
|
|
224
243
|
millis_as_micros,
|
|
225
244
|
plc.binaryop.BinaryOperator.ADD,
|
|
226
|
-
self.dtype.
|
|
245
|
+
self.dtype.plc_type,
|
|
246
|
+
stream=df.stream,
|
|
227
247
|
)
|
|
228
248
|
return Column(total_micros, dtype=self.dtype)
|
|
229
249
|
elif self.name is TemporalFunction.Name.Nanosecond:
|
|
230
250
|
millis = plc.datetime.extract_datetime_component(
|
|
231
|
-
column.obj, plc.datetime.DatetimeComponent.MILLISECOND
|
|
251
|
+
column.obj, plc.datetime.DatetimeComponent.MILLISECOND, stream=df.stream
|
|
232
252
|
)
|
|
233
253
|
micros = plc.datetime.extract_datetime_component(
|
|
234
|
-
column.obj, plc.datetime.DatetimeComponent.MICROSECOND
|
|
254
|
+
column.obj, plc.datetime.DatetimeComponent.MICROSECOND, stream=df.stream
|
|
235
255
|
)
|
|
236
256
|
nanos = plc.datetime.extract_datetime_component(
|
|
237
|
-
column.obj, plc.datetime.DatetimeComponent.NANOSECOND
|
|
257
|
+
column.obj, plc.datetime.DatetimeComponent.NANOSECOND, stream=df.stream
|
|
238
258
|
)
|
|
239
259
|
millis_as_nanos = plc.binaryop.binary_operation(
|
|
240
260
|
millis,
|
|
241
|
-
plc.Scalar.from_py(
|
|
261
|
+
plc.Scalar.from_py(
|
|
262
|
+
1_000_000, plc.DataType(plc.TypeId.INT32), stream=df.stream
|
|
263
|
+
),
|
|
242
264
|
plc.binaryop.BinaryOperator.MUL,
|
|
243
|
-
self.dtype.
|
|
265
|
+
self.dtype.plc_type,
|
|
266
|
+
stream=df.stream,
|
|
244
267
|
)
|
|
245
268
|
micros_as_nanos = plc.binaryop.binary_operation(
|
|
246
269
|
micros,
|
|
247
|
-
plc.Scalar.from_py(
|
|
270
|
+
plc.Scalar.from_py(
|
|
271
|
+
1_000, plc.DataType(plc.TypeId.INT32), stream=df.stream
|
|
272
|
+
),
|
|
248
273
|
plc.binaryop.BinaryOperator.MUL,
|
|
249
|
-
self.dtype.
|
|
274
|
+
self.dtype.plc_type,
|
|
275
|
+
stream=df.stream,
|
|
250
276
|
)
|
|
251
277
|
total_nanos = plc.binaryop.binary_operation(
|
|
252
278
|
nanos,
|
|
253
279
|
millis_as_nanos,
|
|
254
280
|
plc.binaryop.BinaryOperator.ADD,
|
|
255
|
-
self.dtype.
|
|
281
|
+
self.dtype.plc_type,
|
|
282
|
+
stream=df.stream,
|
|
256
283
|
)
|
|
257
284
|
total_nanos = plc.binaryop.binary_operation(
|
|
258
285
|
total_nanos,
|
|
259
286
|
micros_as_nanos,
|
|
260
287
|
plc.binaryop.BinaryOperator.ADD,
|
|
261
|
-
self.dtype.
|
|
288
|
+
self.dtype.plc_type,
|
|
289
|
+
stream=df.stream,
|
|
262
290
|
)
|
|
263
291
|
return Column(total_nanos, dtype=self.dtype)
|
|
264
292
|
|
|
@@ -266,6 +294,7 @@ class TemporalFunction(Expr):
|
|
|
266
294
|
plc.datetime.extract_datetime_component(
|
|
267
295
|
column.obj,
|
|
268
296
|
self._COMPONENT_MAP[self.name],
|
|
297
|
+
stream=df.stream,
|
|
269
298
|
),
|
|
270
299
|
dtype=self.dtype,
|
|
271
300
|
)
|
|
@@ -43,7 +43,11 @@ class Literal(Expr):
|
|
|
43
43
|
) -> Column:
|
|
44
44
|
"""Evaluate this expression given a dataframe for context."""
|
|
45
45
|
return Column(
|
|
46
|
-
plc.Column.from_scalar(
|
|
46
|
+
plc.Column.from_scalar(
|
|
47
|
+
plc.Scalar.from_py(self.value, self.dtype.plc_type, stream=df.stream),
|
|
48
|
+
1,
|
|
49
|
+
stream=df.stream,
|
|
50
|
+
),
|
|
47
51
|
dtype=self.dtype,
|
|
48
52
|
)
|
|
49
53
|
|
|
@@ -60,8 +64,8 @@ class Literal(Expr):
|
|
|
60
64
|
else:
|
|
61
65
|
# Use polars to cast instead of pylibcudf
|
|
62
66
|
# since there are just Python scalars
|
|
63
|
-
casted = pl.Series(values=[self.value], dtype=self.dtype.
|
|
64
|
-
dtype.
|
|
67
|
+
casted = pl.Series(values=[self.value], dtype=self.dtype.polars_type).cast(
|
|
68
|
+
dtype.polars_type
|
|
65
69
|
)[0]
|
|
66
70
|
return Literal(dtype, casted)
|
|
67
71
|
|
|
@@ -82,13 +86,15 @@ class LiteralColumn(Expr):
|
|
|
82
86
|
# This is stricter than necessary, but we only need this hash
|
|
83
87
|
# for identity in groupby replacements so it's OK. And this
|
|
84
88
|
# way we avoid doing potentially expensive compute.
|
|
85
|
-
return (type(self), self.dtype.
|
|
89
|
+
return (type(self), self.dtype.plc_type, id(self.value))
|
|
86
90
|
|
|
87
91
|
def do_evaluate(
|
|
88
92
|
self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
|
|
89
93
|
) -> Column:
|
|
90
94
|
"""Evaluate this expression given a dataframe for context."""
|
|
91
|
-
return Column(
|
|
95
|
+
return Column(
|
|
96
|
+
plc.Column.from_arrow(self.value, stream=df.stream), dtype=self.dtype
|
|
97
|
+
)
|
|
92
98
|
|
|
93
99
|
@property
|
|
94
100
|
def agg_request(self) -> NoReturn: # noqa: D102
|