cudf-polars-cu13 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cudf_polars/GIT_COMMIT +1 -1
- cudf_polars/VERSION +1 -1
- cudf_polars/callback.py +60 -15
- cudf_polars/containers/column.py +137 -77
- cudf_polars/containers/dataframe.py +123 -34
- cudf_polars/containers/datatype.py +134 -13
- cudf_polars/dsl/expr.py +0 -2
- cudf_polars/dsl/expressions/aggregation.py +80 -28
- cudf_polars/dsl/expressions/binaryop.py +34 -14
- cudf_polars/dsl/expressions/boolean.py +110 -37
- cudf_polars/dsl/expressions/datetime.py +59 -30
- cudf_polars/dsl/expressions/literal.py +11 -5
- cudf_polars/dsl/expressions/rolling.py +460 -119
- cudf_polars/dsl/expressions/selection.py +9 -8
- cudf_polars/dsl/expressions/slicing.py +1 -1
- cudf_polars/dsl/expressions/string.py +256 -114
- cudf_polars/dsl/expressions/struct.py +19 -7
- cudf_polars/dsl/expressions/ternary.py +33 -3
- cudf_polars/dsl/expressions/unary.py +126 -64
- cudf_polars/dsl/ir.py +1053 -350
- cudf_polars/dsl/to_ast.py +30 -13
- cudf_polars/dsl/tracing.py +194 -0
- cudf_polars/dsl/translate.py +307 -107
- cudf_polars/dsl/utils/aggregations.py +43 -30
- cudf_polars/dsl/utils/reshape.py +14 -2
- cudf_polars/dsl/utils/rolling.py +12 -8
- cudf_polars/dsl/utils/windows.py +35 -20
- cudf_polars/experimental/base.py +55 -2
- cudf_polars/experimental/benchmarks/pdsds.py +12 -126
- cudf_polars/experimental/benchmarks/pdsh.py +792 -2
- cudf_polars/experimental/benchmarks/utils.py +596 -39
- cudf_polars/experimental/dask_registers.py +47 -20
- cudf_polars/experimental/dispatch.py +9 -3
- cudf_polars/experimental/distinct.py +2 -0
- cudf_polars/experimental/explain.py +15 -2
- cudf_polars/experimental/expressions.py +30 -15
- cudf_polars/experimental/groupby.py +25 -4
- cudf_polars/experimental/io.py +156 -124
- cudf_polars/experimental/join.py +53 -23
- cudf_polars/experimental/parallel.py +68 -19
- cudf_polars/experimental/rapidsmpf/__init__.py +8 -0
- cudf_polars/experimental/rapidsmpf/collectives/__init__.py +9 -0
- cudf_polars/experimental/rapidsmpf/collectives/allgather.py +90 -0
- cudf_polars/experimental/rapidsmpf/collectives/common.py +96 -0
- cudf_polars/experimental/rapidsmpf/collectives/shuffle.py +253 -0
- cudf_polars/experimental/rapidsmpf/core.py +488 -0
- cudf_polars/experimental/rapidsmpf/dask.py +172 -0
- cudf_polars/experimental/rapidsmpf/dispatch.py +153 -0
- cudf_polars/experimental/rapidsmpf/io.py +696 -0
- cudf_polars/experimental/rapidsmpf/join.py +322 -0
- cudf_polars/experimental/rapidsmpf/lower.py +74 -0
- cudf_polars/experimental/rapidsmpf/nodes.py +735 -0
- cudf_polars/experimental/rapidsmpf/repartition.py +216 -0
- cudf_polars/experimental/rapidsmpf/union.py +115 -0
- cudf_polars/experimental/rapidsmpf/utils.py +374 -0
- cudf_polars/experimental/repartition.py +9 -2
- cudf_polars/experimental/select.py +177 -14
- cudf_polars/experimental/shuffle.py +46 -12
- cudf_polars/experimental/sort.py +100 -26
- cudf_polars/experimental/spilling.py +1 -1
- cudf_polars/experimental/statistics.py +24 -5
- cudf_polars/experimental/utils.py +25 -7
- cudf_polars/testing/asserts.py +13 -8
- cudf_polars/testing/io.py +2 -1
- cudf_polars/testing/plugin.py +93 -17
- cudf_polars/typing/__init__.py +86 -32
- cudf_polars/utils/config.py +473 -58
- cudf_polars/utils/cuda_stream.py +70 -0
- cudf_polars/utils/versions.py +5 -4
- cudf_polars_cu13-26.2.0.dist-info/METADATA +181 -0
- cudf_polars_cu13-26.2.0.dist-info/RECORD +108 -0
- {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/WHEEL +1 -1
- cudf_polars_cu13-25.10.0.dist-info/METADATA +0 -136
- cudf_polars_cu13-25.10.0.dist-info/RECORD +0 -92
- {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/licenses/LICENSE +0 -0
- {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/top_level.txt +0 -0
cudf_polars/dsl/to_ast.py
CHANGED
|
@@ -6,7 +6,9 @@
|
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
|
|
8
8
|
from functools import partial, reduce, singledispatch
|
|
9
|
-
from typing import TYPE_CHECKING, TypeAlias, TypedDict
|
|
9
|
+
from typing import TYPE_CHECKING, TypeAlias, TypedDict, cast
|
|
10
|
+
|
|
11
|
+
import polars as pl
|
|
10
12
|
|
|
11
13
|
import pylibcudf as plc
|
|
12
14
|
from pylibcudf import expressions as plc_expr
|
|
@@ -19,6 +21,8 @@ from cudf_polars.typing import GenericTransformer
|
|
|
19
21
|
if TYPE_CHECKING:
|
|
20
22
|
from collections.abc import Mapping
|
|
21
23
|
|
|
24
|
+
from rmm.pylibrmm.stream import Stream
|
|
25
|
+
|
|
22
26
|
|
|
23
27
|
# Can't merge these op-mapping dictionaries because scoped enum values
|
|
24
28
|
# are exposed by cython with equality/hash based one their underlying
|
|
@@ -103,6 +107,7 @@ class ASTState(TypedDict):
|
|
|
103
107
|
"""
|
|
104
108
|
|
|
105
109
|
for_parquet: bool
|
|
110
|
+
stream: Stream
|
|
106
111
|
|
|
107
112
|
|
|
108
113
|
class ExprTransformerState(TypedDict):
|
|
@@ -170,7 +175,9 @@ def _(node: expr.ColRef, self: Transformer) -> plc_expr.Expression:
|
|
|
170
175
|
|
|
171
176
|
@_to_ast.register
|
|
172
177
|
def _(node: expr.Literal, self: Transformer) -> plc_expr.Expression:
|
|
173
|
-
return plc_expr.Literal(
|
|
178
|
+
return plc_expr.Literal(
|
|
179
|
+
plc.Scalar.from_py(node.value, node.dtype.plc_type, stream=self.state["stream"])
|
|
180
|
+
)
|
|
174
181
|
|
|
175
182
|
|
|
176
183
|
@_to_ast.register
|
|
@@ -190,7 +197,7 @@ def _(node: expr.BinOp, self: Transformer) -> plc_expr.Expression:
|
|
|
190
197
|
if self.state["for_parquet"]:
|
|
191
198
|
op1_col, op2_col = (isinstance(op, expr.Col) for op in node.children)
|
|
192
199
|
if op1_col ^ op2_col:
|
|
193
|
-
op = node.op
|
|
200
|
+
op: plc.binaryop.BinaryOperator = node.op
|
|
194
201
|
if op not in SUPPORTED_STATISTICS_BINOPS:
|
|
195
202
|
raise NotImplementedError(
|
|
196
203
|
f"Parquet filter binop with column doesn't support {node.op!r}"
|
|
@@ -221,14 +228,16 @@ def _(node: expr.BooleanFunction, self: Transformer) -> plc_expr.Expression:
|
|
|
221
228
|
if haystack.dtype.id() == plc.TypeId.LIST:
|
|
222
229
|
# Because we originally translated pl_expr.Literal with a list scalar
|
|
223
230
|
# to a expr.LiteralColumn, so the actual type is in the inner type
|
|
224
|
-
#
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
231
|
+
# .inner returns DataTypeClass | DataType, need to cast to DataType
|
|
232
|
+
plc_dtype = DataType(
|
|
233
|
+
cast(pl.DataType, cast(pl.List, haystack.dtype.polars_type).inner)
|
|
234
|
+
).plc_type
|
|
228
235
|
else:
|
|
229
|
-
plc_dtype = haystack.dtype.
|
|
236
|
+
plc_dtype = haystack.dtype.plc_type # pragma: no cover
|
|
230
237
|
values = (
|
|
231
|
-
plc_expr.Literal(
|
|
238
|
+
plc_expr.Literal(
|
|
239
|
+
plc.Scalar.from_py(val, plc_dtype, stream=self.state["stream"])
|
|
240
|
+
)
|
|
232
241
|
for val in haystack.value
|
|
233
242
|
)
|
|
234
243
|
return reduce(
|
|
@@ -265,7 +274,7 @@ def _(node: expr.UnaryFunction, self: Transformer) -> plc_expr.Expression:
|
|
|
265
274
|
)
|
|
266
275
|
|
|
267
276
|
|
|
268
|
-
def to_parquet_filter(node: expr.Expr) -> plc_expr.Expression | None:
|
|
277
|
+
def to_parquet_filter(node: expr.Expr, stream: Stream) -> plc_expr.Expression | None:
|
|
269
278
|
"""
|
|
270
279
|
Convert an expression to libcudf AST nodes suitable for parquet filtering.
|
|
271
280
|
|
|
@@ -273,19 +282,23 @@ def to_parquet_filter(node: expr.Expr) -> plc_expr.Expression | None:
|
|
|
273
282
|
----------
|
|
274
283
|
node
|
|
275
284
|
Expression to convert.
|
|
285
|
+
stream
|
|
286
|
+
CUDA stream used for device memory operations and kernel launches.
|
|
276
287
|
|
|
277
288
|
Returns
|
|
278
289
|
-------
|
|
279
290
|
pylibcudf Expression if conversion is possible, otherwise None.
|
|
280
291
|
"""
|
|
281
|
-
mapper: Transformer = CachingVisitor(
|
|
292
|
+
mapper: Transformer = CachingVisitor(
|
|
293
|
+
_to_ast, state={"for_parquet": True, "stream": stream}
|
|
294
|
+
)
|
|
282
295
|
try:
|
|
283
296
|
return mapper(node)
|
|
284
297
|
except (KeyError, NotImplementedError):
|
|
285
298
|
return None
|
|
286
299
|
|
|
287
300
|
|
|
288
|
-
def to_ast(node: expr.Expr) -> plc_expr.Expression | None:
|
|
301
|
+
def to_ast(node: expr.Expr, stream: Stream) -> plc_expr.Expression | None:
|
|
289
302
|
"""
|
|
290
303
|
Convert an expression to libcudf AST nodes suitable for compute_column.
|
|
291
304
|
|
|
@@ -293,6 +306,8 @@ def to_ast(node: expr.Expr) -> plc_expr.Expression | None:
|
|
|
293
306
|
----------
|
|
294
307
|
node
|
|
295
308
|
Expression to convert.
|
|
309
|
+
stream
|
|
310
|
+
CUDA stream used for device memory operations and kernel launches.
|
|
296
311
|
|
|
297
312
|
Notes
|
|
298
313
|
-----
|
|
@@ -304,7 +319,9 @@ def to_ast(node: expr.Expr) -> plc_expr.Expression | None:
|
|
|
304
319
|
-------
|
|
305
320
|
pylibcudf Expression if conversion is possible, otherwise None.
|
|
306
321
|
"""
|
|
307
|
-
mapper: Transformer = CachingVisitor(
|
|
322
|
+
mapper: Transformer = CachingVisitor(
|
|
323
|
+
_to_ast, state={"for_parquet": False, "stream": stream}
|
|
324
|
+
)
|
|
308
325
|
try:
|
|
309
326
|
return mapper(node)
|
|
310
327
|
except (KeyError, NotImplementedError):
|
cudf_polars/dsl/tracing.py
CHANGED
|
@@ -6,11 +6,205 @@
|
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
|
|
8
8
|
import functools
|
|
9
|
+
import os
|
|
10
|
+
import time
|
|
11
|
+
from typing import TYPE_CHECKING, Any, Concatenate, Literal
|
|
9
12
|
|
|
10
13
|
import nvtx
|
|
14
|
+
import pynvml
|
|
15
|
+
from typing_extensions import ParamSpec
|
|
16
|
+
|
|
17
|
+
import rmm
|
|
18
|
+
import rmm.statistics
|
|
19
|
+
|
|
20
|
+
from cudf_polars.utils.config import _bool_converter, get_device_handle
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
import structlog
|
|
24
|
+
except ImportError:
|
|
25
|
+
_HAS_STRUCTLOG = False
|
|
26
|
+
else:
|
|
27
|
+
_HAS_STRUCTLOG = True
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
LOG_TRACES = _HAS_STRUCTLOG and _bool_converter(
|
|
31
|
+
os.environ.get("CUDF_POLARS_LOG_TRACES", "0")
|
|
32
|
+
)
|
|
33
|
+
LOG_MEMORY = LOG_TRACES and _bool_converter(
|
|
34
|
+
os.environ.get("CUDF_POLARS_LOG_TRACES_MEMORY", "1")
|
|
35
|
+
)
|
|
36
|
+
LOG_DATAFRAMES = LOG_TRACES and _bool_converter(
|
|
37
|
+
os.environ.get("CUDF_POLARS_LOG_TRACES_DATAFRAMES", "1")
|
|
38
|
+
)
|
|
11
39
|
|
|
12
40
|
CUDF_POLARS_NVTX_DOMAIN = "cudf_polars"
|
|
13
41
|
|
|
14
42
|
nvtx_annotate_cudf_polars = functools.partial(
|
|
15
43
|
nvtx.annotate, domain=CUDF_POLARS_NVTX_DOMAIN
|
|
16
44
|
)
|
|
45
|
+
|
|
46
|
+
if TYPE_CHECKING:
|
|
47
|
+
from collections.abc import Callable, Sequence
|
|
48
|
+
|
|
49
|
+
import cudf_polars.containers
|
|
50
|
+
from cudf_polars.dsl import ir
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@functools.cache
|
|
54
|
+
def _getpid() -> int: # pragma: no cover
|
|
55
|
+
# Gets called for each IR.do_evaluate node, so we'll cache it.
|
|
56
|
+
return os.getpid()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def make_snapshot(
|
|
60
|
+
node_type: type[ir.IR],
|
|
61
|
+
frames: Sequence[cudf_polars.containers.DataFrame],
|
|
62
|
+
extra: dict[str, Any] | None = None,
|
|
63
|
+
*,
|
|
64
|
+
pid: int,
|
|
65
|
+
device_handle: Any | None = None,
|
|
66
|
+
phase: Literal["input", "output"] = "input",
|
|
67
|
+
) -> dict: # pragma: no cover; requires CUDF_POLARS_LOG_TRACES=1
|
|
68
|
+
"""
|
|
69
|
+
Collect statistics about the evaluation of an IR node.
|
|
70
|
+
|
|
71
|
+
Parameters
|
|
72
|
+
----------
|
|
73
|
+
node_type
|
|
74
|
+
The type of the IR node.
|
|
75
|
+
frames
|
|
76
|
+
The list of DataFrames to capture information for. For ``phase="input"``,
|
|
77
|
+
this is typically the dataframes passed to ``IR.do_evaluate``. For
|
|
78
|
+
``phase="output"``, this is typically the DataFrame returned from
|
|
79
|
+
``IR.do_evaluate``.
|
|
80
|
+
extra
|
|
81
|
+
Extra information to log.
|
|
82
|
+
pid
|
|
83
|
+
The ID of the current process. Used for NVML memory usage.
|
|
84
|
+
device_handle
|
|
85
|
+
The pynvml device handle. Used for NVML memory usage.
|
|
86
|
+
phase
|
|
87
|
+
The phase of the evaluation. Either "input" or "output".
|
|
88
|
+
"""
|
|
89
|
+
ir_name = node_type.__name__
|
|
90
|
+
|
|
91
|
+
d: dict[str, Any] = {
|
|
92
|
+
"type": ir_name,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if LOG_DATAFRAMES:
|
|
96
|
+
d.update(
|
|
97
|
+
{
|
|
98
|
+
f"count_frames_{phase}": len(frames),
|
|
99
|
+
f"frames_{phase}": [
|
|
100
|
+
{
|
|
101
|
+
"shape": frame.table.shape(),
|
|
102
|
+
"size": sum(
|
|
103
|
+
col.device_buffer_size() for col in frame.table.columns()
|
|
104
|
+
),
|
|
105
|
+
}
|
|
106
|
+
for frame in frames
|
|
107
|
+
],
|
|
108
|
+
}
|
|
109
|
+
)
|
|
110
|
+
d[f"total_bytes_{phase}"] = sum(x["size"] for x in d[f"frames_{phase}"])
|
|
111
|
+
|
|
112
|
+
if LOG_MEMORY:
|
|
113
|
+
stats = rmm.statistics.get_statistics()
|
|
114
|
+
if stats:
|
|
115
|
+
d.update(
|
|
116
|
+
{
|
|
117
|
+
f"rmm_current_bytes_{phase}": stats.current_bytes,
|
|
118
|
+
f"rmm_current_count_{phase}": stats.current_count,
|
|
119
|
+
f"rmm_peak_bytes_{phase}": stats.peak_bytes,
|
|
120
|
+
f"rmm_peak_count_{phase}": stats.peak_count,
|
|
121
|
+
f"rmm_total_bytes_{phase}": stats.total_bytes,
|
|
122
|
+
f"rmm_total_count_{phase}": stats.total_count,
|
|
123
|
+
}
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
if device_handle is not None:
|
|
127
|
+
processes = pynvml.nvmlDeviceGetComputeRunningProcesses(device_handle)
|
|
128
|
+
for proc in processes:
|
|
129
|
+
if proc.pid == pid:
|
|
130
|
+
d[f"nvml_current_bytes_{phase}"] = proc.usedGpuMemory
|
|
131
|
+
break
|
|
132
|
+
if extra:
|
|
133
|
+
d.update(extra)
|
|
134
|
+
|
|
135
|
+
return d
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
P = ParamSpec("P")
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def log_do_evaluate(
|
|
142
|
+
func: Callable[Concatenate[type[ir.IR], P], cudf_polars.containers.DataFrame],
|
|
143
|
+
) -> Callable[Concatenate[type[ir.IR], P], cudf_polars.containers.DataFrame]:
|
|
144
|
+
"""
|
|
145
|
+
Decorator for an ``IR.do_evaluate`` method that logs information before and after evaluation.
|
|
146
|
+
|
|
147
|
+
Parameters
|
|
148
|
+
----------
|
|
149
|
+
func
|
|
150
|
+
The ``IR.do_evaluate`` method to wrap.
|
|
151
|
+
"""
|
|
152
|
+
if not LOG_TRACES:
|
|
153
|
+
return func
|
|
154
|
+
else: # pragma: no cover; requires CUDF_POLARS_LOG_TRACES=1
|
|
155
|
+
|
|
156
|
+
@functools.wraps(func)
|
|
157
|
+
def wrapper(
|
|
158
|
+
cls: type[ir.IR],
|
|
159
|
+
*args: P.args,
|
|
160
|
+
**kwargs: P.kwargs,
|
|
161
|
+
) -> cudf_polars.containers.DataFrame:
|
|
162
|
+
# do this just once
|
|
163
|
+
pynvml.nvmlInit()
|
|
164
|
+
maybe_handle = get_device_handle()
|
|
165
|
+
pid = _getpid()
|
|
166
|
+
log = structlog.get_logger()
|
|
167
|
+
|
|
168
|
+
# By convention, all non-dataframe arguments (non_child) come first.
|
|
169
|
+
# Anything remaining is a dataframe, except for 'context' kwarg.
|
|
170
|
+
frames: list[cudf_polars.containers.DataFrame] = (
|
|
171
|
+
list(args) + [v for k, v in kwargs.items() if k != "context"]
|
|
172
|
+
)[len(cls._non_child) :] # type: ignore[assignment]
|
|
173
|
+
|
|
174
|
+
before_start = time.monotonic_ns()
|
|
175
|
+
before = make_snapshot(
|
|
176
|
+
cls, frames, phase="input", device_handle=maybe_handle, pid=pid
|
|
177
|
+
)
|
|
178
|
+
before_end = time.monotonic_ns()
|
|
179
|
+
|
|
180
|
+
# The decorator preserves the exact signature of the original do_evaluate method.
|
|
181
|
+
# Each IR.do_evaluate method is a classmethod that takes the IR class as first
|
|
182
|
+
# argument, followed by the method-specific arguments, and returns a DataFrame.
|
|
183
|
+
|
|
184
|
+
start = time.monotonic_ns()
|
|
185
|
+
result = func(cls, *args, **kwargs)
|
|
186
|
+
stop = time.monotonic_ns()
|
|
187
|
+
|
|
188
|
+
after_start = time.monotonic_ns()
|
|
189
|
+
after = make_snapshot(
|
|
190
|
+
cls,
|
|
191
|
+
[result],
|
|
192
|
+
phase="output",
|
|
193
|
+
extra={"start": start, "stop": stop},
|
|
194
|
+
device_handle=maybe_handle,
|
|
195
|
+
pid=pid,
|
|
196
|
+
)
|
|
197
|
+
after_end = time.monotonic_ns()
|
|
198
|
+
record = (
|
|
199
|
+
before
|
|
200
|
+
| after
|
|
201
|
+
| {
|
|
202
|
+
"overhead_duration": (before_end - before_start)
|
|
203
|
+
+ (after_end - after_start)
|
|
204
|
+
}
|
|
205
|
+
)
|
|
206
|
+
log.info("Execute IR", **record)
|
|
207
|
+
|
|
208
|
+
return result
|
|
209
|
+
|
|
210
|
+
return wrapper
|