cudf-polars-cu13 25.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cudf_polars/GIT_COMMIT +1 -0
- cudf_polars/VERSION +1 -0
- cudf_polars/__init__.py +28 -0
- cudf_polars/_version.py +21 -0
- cudf_polars/callback.py +318 -0
- cudf_polars/containers/__init__.py +13 -0
- cudf_polars/containers/column.py +495 -0
- cudf_polars/containers/dataframe.py +361 -0
- cudf_polars/containers/datatype.py +137 -0
- cudf_polars/dsl/__init__.py +8 -0
- cudf_polars/dsl/expr.py +66 -0
- cudf_polars/dsl/expressions/__init__.py +8 -0
- cudf_polars/dsl/expressions/aggregation.py +226 -0
- cudf_polars/dsl/expressions/base.py +272 -0
- cudf_polars/dsl/expressions/binaryop.py +120 -0
- cudf_polars/dsl/expressions/boolean.py +326 -0
- cudf_polars/dsl/expressions/datetime.py +271 -0
- cudf_polars/dsl/expressions/literal.py +97 -0
- cudf_polars/dsl/expressions/rolling.py +643 -0
- cudf_polars/dsl/expressions/selection.py +74 -0
- cudf_polars/dsl/expressions/slicing.py +46 -0
- cudf_polars/dsl/expressions/sorting.py +85 -0
- cudf_polars/dsl/expressions/string.py +1002 -0
- cudf_polars/dsl/expressions/struct.py +137 -0
- cudf_polars/dsl/expressions/ternary.py +49 -0
- cudf_polars/dsl/expressions/unary.py +517 -0
- cudf_polars/dsl/ir.py +2607 -0
- cudf_polars/dsl/nodebase.py +164 -0
- cudf_polars/dsl/to_ast.py +359 -0
- cudf_polars/dsl/tracing.py +16 -0
- cudf_polars/dsl/translate.py +939 -0
- cudf_polars/dsl/traversal.py +224 -0
- cudf_polars/dsl/utils/__init__.py +8 -0
- cudf_polars/dsl/utils/aggregations.py +481 -0
- cudf_polars/dsl/utils/groupby.py +98 -0
- cudf_polars/dsl/utils/naming.py +34 -0
- cudf_polars/dsl/utils/replace.py +61 -0
- cudf_polars/dsl/utils/reshape.py +74 -0
- cudf_polars/dsl/utils/rolling.py +121 -0
- cudf_polars/dsl/utils/windows.py +192 -0
- cudf_polars/experimental/__init__.py +8 -0
- cudf_polars/experimental/base.py +386 -0
- cudf_polars/experimental/benchmarks/__init__.py +4 -0
- cudf_polars/experimental/benchmarks/pdsds.py +220 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/__init__.py +4 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q1.py +88 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q10.py +225 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q2.py +244 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q3.py +65 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q4.py +359 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q5.py +462 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q6.py +92 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q7.py +79 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q8.py +524 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q9.py +137 -0
- cudf_polars/experimental/benchmarks/pdsh.py +814 -0
- cudf_polars/experimental/benchmarks/utils.py +832 -0
- cudf_polars/experimental/dask_registers.py +200 -0
- cudf_polars/experimental/dispatch.py +156 -0
- cudf_polars/experimental/distinct.py +197 -0
- cudf_polars/experimental/explain.py +157 -0
- cudf_polars/experimental/expressions.py +590 -0
- cudf_polars/experimental/groupby.py +327 -0
- cudf_polars/experimental/io.py +943 -0
- cudf_polars/experimental/join.py +391 -0
- cudf_polars/experimental/parallel.py +423 -0
- cudf_polars/experimental/repartition.py +69 -0
- cudf_polars/experimental/scheduler.py +155 -0
- cudf_polars/experimental/select.py +188 -0
- cudf_polars/experimental/shuffle.py +354 -0
- cudf_polars/experimental/sort.py +609 -0
- cudf_polars/experimental/spilling.py +151 -0
- cudf_polars/experimental/statistics.py +795 -0
- cudf_polars/experimental/utils.py +169 -0
- cudf_polars/py.typed +0 -0
- cudf_polars/testing/__init__.py +8 -0
- cudf_polars/testing/asserts.py +448 -0
- cudf_polars/testing/io.py +122 -0
- cudf_polars/testing/plugin.py +236 -0
- cudf_polars/typing/__init__.py +219 -0
- cudf_polars/utils/__init__.py +8 -0
- cudf_polars/utils/config.py +741 -0
- cudf_polars/utils/conversion.py +40 -0
- cudf_polars/utils/dtypes.py +118 -0
- cudf_polars/utils/sorting.py +53 -0
- cudf_polars/utils/timer.py +39 -0
- cudf_polars/utils/versions.py +27 -0
- cudf_polars_cu13-25.10.0.dist-info/METADATA +136 -0
- cudf_polars_cu13-25.10.0.dist-info/RECORD +92 -0
- cudf_polars_cu13-25.10.0.dist-info/WHEEL +5 -0
- cudf_polars_cu13-25.10.0.dist-info/licenses/LICENSE +201 -0
- cudf_polars_cu13-25.10.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""Dask function registrations such as serializers and dispatch implementations."""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import TYPE_CHECKING, Any, ClassVar, overload
|
|
9
|
+
|
|
10
|
+
from dask.sizeof import sizeof as sizeof_dispatch
|
|
11
|
+
from dask.tokenize import normalize_token
|
|
12
|
+
from distributed.protocol import dask_deserialize, dask_serialize
|
|
13
|
+
from distributed.protocol.cuda import cuda_deserialize, cuda_serialize
|
|
14
|
+
from distributed.utils import log_errors
|
|
15
|
+
|
|
16
|
+
import pylibcudf as plc
|
|
17
|
+
import rmm
|
|
18
|
+
|
|
19
|
+
from cudf_polars.containers import Column, DataFrame, DataType
|
|
20
|
+
from cudf_polars.dsl.expressions.base import NamedExpr
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from collections.abc import Hashable, Mapping
|
|
24
|
+
|
|
25
|
+
from distributed import Client
|
|
26
|
+
|
|
27
|
+
from rmm.pylibrmm.memory_resource import DeviceMemoryResource
|
|
28
|
+
from rmm.pylibrmm.stream import Stream
|
|
29
|
+
|
|
30
|
+
from cudf_polars.typing import ColumnHeader, ColumnOptions, DataFrameHeader
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
__all__ = ["DaskRegisterManager", "register"]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class DaskRegisterManager: # pragma: no cover; Only used with Distributed scheduler
|
|
37
|
+
"""Manager to ensure ensure serializer is only registered once."""
|
|
38
|
+
|
|
39
|
+
_registered: bool = False
|
|
40
|
+
_client_run_executed: ClassVar[set[str]] = set()
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def register_once(cls) -> None:
|
|
44
|
+
"""Register Dask/cudf-polars serializers in calling process."""
|
|
45
|
+
if not cls._registered:
|
|
46
|
+
from cudf_polars.experimental.dask_registers import register
|
|
47
|
+
|
|
48
|
+
register()
|
|
49
|
+
cls._registered = True
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def run_on_cluster(cls, client: Client) -> None:
|
|
53
|
+
"""Run register on the workers and scheduler once."""
|
|
54
|
+
if client.id not in cls._client_run_executed:
|
|
55
|
+
client.run(cls.register_once)
|
|
56
|
+
client.run_on_scheduler(cls.register_once)
|
|
57
|
+
cls._client_run_executed.add(client.id)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def register() -> None:
|
|
61
|
+
"""Register dask serialization and dispatch functions."""
|
|
62
|
+
|
|
63
|
+
@overload
|
|
64
|
+
def serialize_column_or_frame(
|
|
65
|
+
x: DataFrame,
|
|
66
|
+
) -> tuple[DataFrameHeader, list[memoryview]]: ...
|
|
67
|
+
|
|
68
|
+
@overload
|
|
69
|
+
def serialize_column_or_frame(
|
|
70
|
+
x: Column,
|
|
71
|
+
) -> tuple[ColumnHeader, list[memoryview]]: ...
|
|
72
|
+
|
|
73
|
+
@cuda_serialize.register((Column, DataFrame))
|
|
74
|
+
def serialize_column_or_frame(
|
|
75
|
+
x: DataFrame | Column,
|
|
76
|
+
) -> tuple[DataFrameHeader | ColumnHeader, list[memoryview]]:
|
|
77
|
+
with log_errors():
|
|
78
|
+
header, frames = x.serialize()
|
|
79
|
+
return header, list(frames) # Dask expect a list of frames
|
|
80
|
+
|
|
81
|
+
@cuda_deserialize.register(DataFrame)
|
|
82
|
+
def _(
|
|
83
|
+
header: DataFrameHeader, frames: tuple[memoryview, plc.gpumemoryview]
|
|
84
|
+
) -> DataFrame:
|
|
85
|
+
with log_errors():
|
|
86
|
+
metadata, gpudata = frames # TODO: check if this is a length-2 list...
|
|
87
|
+
return DataFrame.deserialize(header, (metadata, plc.gpumemoryview(gpudata)))
|
|
88
|
+
|
|
89
|
+
@cuda_deserialize.register(Column)
|
|
90
|
+
def _(header: ColumnHeader, frames: tuple[memoryview, plc.gpumemoryview]) -> Column:
|
|
91
|
+
with log_errors():
|
|
92
|
+
metadata, gpudata = frames
|
|
93
|
+
return Column.deserialize(header, (metadata, plc.gpumemoryview(gpudata)))
|
|
94
|
+
|
|
95
|
+
@overload
|
|
96
|
+
def dask_serialize_column_or_frame(
|
|
97
|
+
x: DataFrame,
|
|
98
|
+
) -> tuple[DataFrameHeader, tuple[memoryview, memoryview]]: ...
|
|
99
|
+
|
|
100
|
+
@overload
|
|
101
|
+
def dask_serialize_column_or_frame(
|
|
102
|
+
x: Column,
|
|
103
|
+
) -> tuple[ColumnHeader, tuple[memoryview, memoryview]]: ...
|
|
104
|
+
|
|
105
|
+
@dask_serialize.register(Column)
|
|
106
|
+
def dask_serialize_column_or_frame(
|
|
107
|
+
x: DataFrame | Column,
|
|
108
|
+
) -> tuple[DataFrameHeader | ColumnHeader, tuple[memoryview, memoryview]]:
|
|
109
|
+
with log_errors():
|
|
110
|
+
header, (metadata, gpudata) = x.serialize()
|
|
111
|
+
|
|
112
|
+
# For robustness, we check that the gpu data is contiguous
|
|
113
|
+
cai = gpudata.__cuda_array_interface__
|
|
114
|
+
assert len(cai["shape"]) == 1
|
|
115
|
+
assert cai["strides"] is None or cai["strides"] == (1,)
|
|
116
|
+
assert cai["typestr"] == "|u1"
|
|
117
|
+
nbytes = cai["shape"][0]
|
|
118
|
+
|
|
119
|
+
# Copy the gpudata to host memory
|
|
120
|
+
gpudata_on_host = memoryview(
|
|
121
|
+
rmm.DeviceBuffer(ptr=gpudata.ptr, size=nbytes).copy_to_host()
|
|
122
|
+
)
|
|
123
|
+
return header, (metadata, gpudata_on_host)
|
|
124
|
+
|
|
125
|
+
@dask_deserialize.register(Column)
|
|
126
|
+
def _(header: ColumnHeader, frames: tuple[memoryview, memoryview]) -> Column:
|
|
127
|
+
with log_errors():
|
|
128
|
+
assert len(frames) == 2
|
|
129
|
+
# Copy the second frame (the gpudata in host memory) back to the gpu
|
|
130
|
+
frames = frames[0], plc.gpumemoryview(rmm.DeviceBuffer.to_device(frames[1]))
|
|
131
|
+
return Column.deserialize(header, frames)
|
|
132
|
+
|
|
133
|
+
@dask_serialize.register(DataFrame)
|
|
134
|
+
def _(
|
|
135
|
+
x: DataFrame, context: Mapping[str, Any] | None = None
|
|
136
|
+
) -> tuple[DataFrameHeader, tuple[memoryview, memoryview]]:
|
|
137
|
+
# Do regular serialization if no staging buffer is provided.
|
|
138
|
+
if context is None or "staging_device_buffer" not in context:
|
|
139
|
+
return dask_serialize_column_or_frame(x)
|
|
140
|
+
|
|
141
|
+
# If a staging buffer is provided, we use `ChunkedPack` to
|
|
142
|
+
# serialize the dataframe using the provided staging buffer.
|
|
143
|
+
with log_errors():
|
|
144
|
+
# Keyword arguments for `Column.__init__`.
|
|
145
|
+
columns_kwargs: list[ColumnOptions] = [
|
|
146
|
+
col.serialize_ctor_kwargs() for col in x.columns
|
|
147
|
+
]
|
|
148
|
+
header: DataFrameHeader = {
|
|
149
|
+
"columns_kwargs": columns_kwargs,
|
|
150
|
+
"frame_count": 2,
|
|
151
|
+
}
|
|
152
|
+
if "stream" not in context:
|
|
153
|
+
raise ValueError(
|
|
154
|
+
"context: stream must be given when staging_device_buffer is"
|
|
155
|
+
)
|
|
156
|
+
if "device_mr" not in context:
|
|
157
|
+
raise ValueError(
|
|
158
|
+
"context: device_mr must be given when staging_device_buffer is"
|
|
159
|
+
)
|
|
160
|
+
stream: Stream = context["stream"]
|
|
161
|
+
device_mr: DeviceMemoryResource = context["device_mr"]
|
|
162
|
+
buf: rmm.DeviceBuffer = context["staging_device_buffer"]
|
|
163
|
+
frame = plc.contiguous_split.ChunkedPack.create(
|
|
164
|
+
x.table, buf.nbytes, stream, device_mr
|
|
165
|
+
).pack_to_host(buf)
|
|
166
|
+
return header, frame
|
|
167
|
+
|
|
168
|
+
@dask_deserialize.register(DataFrame)
|
|
169
|
+
def _(header: DataFrameHeader, frames: tuple[memoryview, memoryview]) -> DataFrame:
|
|
170
|
+
with log_errors():
|
|
171
|
+
assert len(frames) == 2
|
|
172
|
+
# Copy the second frame (the gpudata in host memory) back to the gpu
|
|
173
|
+
frames = frames[0], plc.gpumemoryview(rmm.DeviceBuffer.to_device(frames[1]))
|
|
174
|
+
return DataFrame.deserialize(header, frames)
|
|
175
|
+
|
|
176
|
+
@sizeof_dispatch.register(Column)
|
|
177
|
+
def _(x: Column) -> int:
|
|
178
|
+
"""The total size of the device buffers used by the DataFrame or Column."""
|
|
179
|
+
return x.obj.device_buffer_size()
|
|
180
|
+
|
|
181
|
+
@sizeof_dispatch.register(DataFrame)
|
|
182
|
+
def _(x: DataFrame) -> int:
|
|
183
|
+
"""The total size of the device buffers used by the DataFrame or Column."""
|
|
184
|
+
return sum(c.obj.device_buffer_size() for c in x.columns)
|
|
185
|
+
|
|
186
|
+
# Register rapidsmpf serializer if it's installed.
|
|
187
|
+
try:
|
|
188
|
+
from rapidsmpf.integrations.dask.spilling import register_dask_serialize
|
|
189
|
+
|
|
190
|
+
register_dask_serialize() # pragma: no cover; rapidsmpf dependency not included yet
|
|
191
|
+
except ImportError:
|
|
192
|
+
pass
|
|
193
|
+
|
|
194
|
+
# Register the tokenizer for NamedExpr and DataType. This is a performance
|
|
195
|
+
# optimization that speeds up tokenization for the most common types seen in
|
|
196
|
+
# the Dask task graph.
|
|
197
|
+
@normalize_token.register(NamedExpr)
|
|
198
|
+
@normalize_token.register(DataType)
|
|
199
|
+
def _(x: NamedExpr | DataType) -> Hashable:
|
|
200
|
+
return hash(x)
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Multi-partition dispatch functions."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from functools import singledispatch
|
|
8
|
+
from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict
|
|
9
|
+
|
|
10
|
+
from cudf_polars.typing import GenericTransformer
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from collections.abc import MutableMapping
|
|
14
|
+
|
|
15
|
+
from cudf_polars.dsl import ir
|
|
16
|
+
from cudf_polars.dsl.ir import IR
|
|
17
|
+
from cudf_polars.experimental.base import (
|
|
18
|
+
ColumnStats,
|
|
19
|
+
PartitionInfo,
|
|
20
|
+
StatsCollector,
|
|
21
|
+
)
|
|
22
|
+
from cudf_polars.utils.config import ConfigOptions
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class State(TypedDict):
|
|
26
|
+
"""
|
|
27
|
+
State used for lowering IR nodes.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
config_options
|
|
32
|
+
GPUEngine configuration options.
|
|
33
|
+
stats
|
|
34
|
+
Statistics collector.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
config_options: ConfigOptions
|
|
38
|
+
stats: StatsCollector
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
LowerIRTransformer: TypeAlias = GenericTransformer[
|
|
42
|
+
"ir.IR", "tuple[ir.IR, MutableMapping[ir.IR, PartitionInfo]]", State
|
|
43
|
+
]
|
|
44
|
+
"""Protocol for Lowering IR nodes."""
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@singledispatch
|
|
48
|
+
def lower_ir_node(
|
|
49
|
+
ir: IR, rec: LowerIRTransformer
|
|
50
|
+
) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
|
|
51
|
+
"""
|
|
52
|
+
Rewrite an IR node and extract partitioning information.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
ir
|
|
57
|
+
IR node to rewrite.
|
|
58
|
+
rec
|
|
59
|
+
Recursive LowerIRTransformer callable.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
new_ir, partition_info
|
|
64
|
+
The rewritten node, and a mapping from unique nodes in
|
|
65
|
+
the full IR graph to associated partitioning information.
|
|
66
|
+
|
|
67
|
+
Notes
|
|
68
|
+
-----
|
|
69
|
+
This function is used by `lower_ir_graph`.
|
|
70
|
+
|
|
71
|
+
See Also
|
|
72
|
+
--------
|
|
73
|
+
lower_ir_graph
|
|
74
|
+
"""
|
|
75
|
+
raise AssertionError(f"Unhandled type {type(ir)}") # pragma: no cover
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@singledispatch
|
|
79
|
+
def generate_ir_tasks(
|
|
80
|
+
ir: IR, partition_info: MutableMapping[IR, PartitionInfo]
|
|
81
|
+
) -> MutableMapping[Any, Any]:
|
|
82
|
+
"""
|
|
83
|
+
Generate a task graph for evaluation of an IR node.
|
|
84
|
+
|
|
85
|
+
Parameters
|
|
86
|
+
----------
|
|
87
|
+
ir
|
|
88
|
+
IR node to generate tasks for.
|
|
89
|
+
partition_info
|
|
90
|
+
Partitioning information, obtained from :func:`lower_ir_graph`.
|
|
91
|
+
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
mapping
|
|
95
|
+
A (partial) dask task graph for the evaluation of an ir node.
|
|
96
|
+
|
|
97
|
+
Notes
|
|
98
|
+
-----
|
|
99
|
+
Task generation should only produce the tasks for the current node,
|
|
100
|
+
referring to child tasks by name.
|
|
101
|
+
|
|
102
|
+
See Also
|
|
103
|
+
--------
|
|
104
|
+
task_graph
|
|
105
|
+
"""
|
|
106
|
+
raise AssertionError(f"Unhandled type {type(ir)}") # pragma: no cover
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@singledispatch
|
|
110
|
+
def initialize_column_stats(
|
|
111
|
+
ir: IR, stats: StatsCollector, config_options: ConfigOptions
|
|
112
|
+
) -> dict[str, ColumnStats]:
|
|
113
|
+
"""
|
|
114
|
+
Initialize column statistics for an IR node.
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
ir
|
|
119
|
+
The IR node to collect source statistics for.
|
|
120
|
+
stats
|
|
121
|
+
The `StatsCollector` object containing known source statistics.
|
|
122
|
+
config_options
|
|
123
|
+
GPUEngine configuration options.
|
|
124
|
+
|
|
125
|
+
Returns
|
|
126
|
+
-------
|
|
127
|
+
base_stats_mapping
|
|
128
|
+
Mapping between column names and base ``ColumnStats`` objects.
|
|
129
|
+
|
|
130
|
+
Notes
|
|
131
|
+
-----
|
|
132
|
+
Base column stats correspond to ``ColumnStats`` objects **without**
|
|
133
|
+
populated ``unique_stats`` information. The purpose of this function
|
|
134
|
+
is to propagate ``DataSourceInfo`` references and set ``children``
|
|
135
|
+
attributes for each column of each IR node.
|
|
136
|
+
"""
|
|
137
|
+
raise AssertionError(f"Unhandled type {type(ir)}") # pragma: no cover
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@singledispatch
|
|
141
|
+
def update_column_stats(
|
|
142
|
+
ir: IR, stats: StatsCollector, config_options: ConfigOptions
|
|
143
|
+
) -> None:
|
|
144
|
+
"""
|
|
145
|
+
Finalize local column statistics for an IR node.
|
|
146
|
+
|
|
147
|
+
Parameters
|
|
148
|
+
----------
|
|
149
|
+
ir
|
|
150
|
+
The IR node to finalize local column statistics for.
|
|
151
|
+
stats
|
|
152
|
+
The `StatsCollector` object containing known statistics.
|
|
153
|
+
config_options
|
|
154
|
+
GPUEngine configuration options.
|
|
155
|
+
"""
|
|
156
|
+
raise AssertionError(f"Unhandled type {type(ir)}") # pragma: no cover
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Multi-partition Distinct logic."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import math
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
import pylibcudf as plc
|
|
11
|
+
|
|
12
|
+
from cudf_polars.dsl.expressions.base import Col, NamedExpr
|
|
13
|
+
from cudf_polars.dsl.ir import Distinct
|
|
14
|
+
from cudf_polars.experimental.base import PartitionInfo
|
|
15
|
+
from cudf_polars.experimental.dispatch import lower_ir_node
|
|
16
|
+
from cudf_polars.experimental.utils import (
|
|
17
|
+
_fallback_inform,
|
|
18
|
+
_get_unique_fractions,
|
|
19
|
+
_lower_ir_fallback,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from collections.abc import MutableMapping
|
|
24
|
+
|
|
25
|
+
from cudf_polars.dsl.ir import IR
|
|
26
|
+
from cudf_polars.experimental.dispatch import LowerIRTransformer
|
|
27
|
+
from cudf_polars.utils.config import ConfigOptions
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def lower_distinct(
|
|
31
|
+
ir: Distinct,
|
|
32
|
+
child: IR,
|
|
33
|
+
partition_info: MutableMapping[IR, PartitionInfo],
|
|
34
|
+
config_options: ConfigOptions,
|
|
35
|
+
*,
|
|
36
|
+
unique_fraction: float | None = None,
|
|
37
|
+
) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
|
|
38
|
+
"""
|
|
39
|
+
Lower a Distinct IR into partition-wise stages.
|
|
40
|
+
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
ir
|
|
44
|
+
The Distinct IR node to lower.
|
|
45
|
+
child
|
|
46
|
+
The reconstructed child of ``ir``. May differ
|
|
47
|
+
from ``ir.children[0]``.
|
|
48
|
+
partition_info
|
|
49
|
+
A mapping from all unique IR nodes to the
|
|
50
|
+
associated partitioning information.
|
|
51
|
+
config_options
|
|
52
|
+
GPUEngine configuration options.
|
|
53
|
+
unique_fraction
|
|
54
|
+
Fraction of unique values to total values. Used for algorithm selection.
|
|
55
|
+
A value of `1.0` means the column is unique.
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
new_node
|
|
60
|
+
The lowered Distinct node.
|
|
61
|
+
partition_info
|
|
62
|
+
A mapping from unique nodes in the new graph to associated
|
|
63
|
+
partitioning information.
|
|
64
|
+
"""
|
|
65
|
+
from cudf_polars.experimental.repartition import Repartition
|
|
66
|
+
from cudf_polars.experimental.shuffle import Shuffle
|
|
67
|
+
|
|
68
|
+
# Extract child partitioning
|
|
69
|
+
child_count = partition_info[child].count
|
|
70
|
+
assert config_options.executor.name == "streaming", (
|
|
71
|
+
"'in-memory' executor not supported in 'lower_distinct'"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Assume shuffle is not stable for now. Therefore, we
|
|
75
|
+
# require a tree reduction if row order matters.
|
|
76
|
+
require_tree_reduction = ir.stable or ir.keep in (
|
|
77
|
+
plc.stream_compaction.DuplicateKeepOption.KEEP_FIRST,
|
|
78
|
+
plc.stream_compaction.DuplicateKeepOption.KEEP_LAST,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
subset: frozenset = ir.subset or frozenset(ir.schema)
|
|
82
|
+
shuffle_keys = tuple(NamedExpr(name, Col(ir.schema[name], name)) for name in subset)
|
|
83
|
+
shuffled = partition_info[child].partitioned_on == shuffle_keys
|
|
84
|
+
if ir.keep == plc.stream_compaction.DuplicateKeepOption.KEEP_NONE:
|
|
85
|
+
# Need to shuffle the original data for keep == "none"
|
|
86
|
+
if require_tree_reduction:
|
|
87
|
+
# TODO: We cannot drop all duplicates without
|
|
88
|
+
# shuffling the data up front, and we assume
|
|
89
|
+
# shuffling is unstable for now. Note that the
|
|
90
|
+
# task-based shuffle should be stable, but it
|
|
91
|
+
# its performance is very poor.
|
|
92
|
+
raise NotImplementedError(
|
|
93
|
+
"Unsupported unique options for multiple partitions."
|
|
94
|
+
)
|
|
95
|
+
if not shuffled:
|
|
96
|
+
child = Shuffle(
|
|
97
|
+
child.schema,
|
|
98
|
+
shuffle_keys,
|
|
99
|
+
config_options.executor.shuffle_method,
|
|
100
|
+
child,
|
|
101
|
+
)
|
|
102
|
+
partition_info[child] = PartitionInfo(
|
|
103
|
+
count=child_count,
|
|
104
|
+
partitioned_on=shuffle_keys,
|
|
105
|
+
)
|
|
106
|
+
shuffled = True
|
|
107
|
+
|
|
108
|
+
output_count = 1
|
|
109
|
+
n_ary = 32 # Arbitrary default (for now)
|
|
110
|
+
if ir.zlice is not None:
|
|
111
|
+
# Head/tail slice operation has been pushed into Distinct
|
|
112
|
+
if ir.zlice[0] < 1 and ir.zlice[1] is not None:
|
|
113
|
+
# Use rough 1m-row heuristic to set n_ary
|
|
114
|
+
n_ary = max(int(1_000_000 / ir.zlice[1]), 2)
|
|
115
|
+
else: # pragma: no cover
|
|
116
|
+
# TODO: General slicing is not supported for multiple
|
|
117
|
+
# partitions. For now, we raise an error to fall back
|
|
118
|
+
# to one partition.
|
|
119
|
+
raise NotImplementedError("Unsupported slice for multiple partitions.")
|
|
120
|
+
elif unique_fraction is not None:
|
|
121
|
+
# Use unique_fraction to determine partitioning
|
|
122
|
+
n_ary = min(max(int(1.0 / unique_fraction), 2), child_count)
|
|
123
|
+
output_count = max(int(unique_fraction * child_count), 1)
|
|
124
|
+
|
|
125
|
+
if output_count > 1 and require_tree_reduction:
|
|
126
|
+
# Need to reduce down to a single partition even
|
|
127
|
+
# if the unique_fraction is large.
|
|
128
|
+
output_count = 1
|
|
129
|
+
_fallback_inform(
|
|
130
|
+
"Unsupported unique options for multiple partitions.",
|
|
131
|
+
config_options,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# Partition-wise unique
|
|
135
|
+
count = child_count
|
|
136
|
+
new_node: IR = ir.reconstruct([child])
|
|
137
|
+
partition_info[new_node] = PartitionInfo(count=count)
|
|
138
|
+
|
|
139
|
+
if shuffled or output_count == 1:
|
|
140
|
+
# Tree reduction
|
|
141
|
+
while count > output_count:
|
|
142
|
+
new_node = Repartition(new_node.schema, new_node)
|
|
143
|
+
count = max(math.ceil(count / n_ary), output_count)
|
|
144
|
+
partition_info[new_node] = PartitionInfo(count=count)
|
|
145
|
+
new_node = ir.reconstruct([new_node])
|
|
146
|
+
partition_info[new_node] = PartitionInfo(count=count)
|
|
147
|
+
else:
|
|
148
|
+
# Shuffle
|
|
149
|
+
new_node = Shuffle(
|
|
150
|
+
new_node.schema,
|
|
151
|
+
shuffle_keys,
|
|
152
|
+
config_options.executor.shuffle_method,
|
|
153
|
+
new_node,
|
|
154
|
+
)
|
|
155
|
+
partition_info[new_node] = PartitionInfo(count=output_count)
|
|
156
|
+
new_node = ir.reconstruct([new_node])
|
|
157
|
+
partition_info[new_node] = PartitionInfo(
|
|
158
|
+
count=output_count,
|
|
159
|
+
partitioned_on=shuffle_keys,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
return new_node, partition_info
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@lower_ir_node.register(Distinct)
|
|
166
|
+
def _(
|
|
167
|
+
ir: Distinct, rec: LowerIRTransformer
|
|
168
|
+
) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
|
|
169
|
+
# Extract child partitioning
|
|
170
|
+
original_child = ir.children[0]
|
|
171
|
+
child, partition_info = rec(ir.children[0])
|
|
172
|
+
config_options = rec.state["config_options"]
|
|
173
|
+
assert config_options.executor.name == "streaming", (
|
|
174
|
+
"'in-memory' executor not supported in 'lower_ir_node'"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
subset: frozenset[str] = ir.subset or frozenset(ir.schema)
|
|
178
|
+
unique_fraction_dict = _get_unique_fractions(
|
|
179
|
+
tuple(subset),
|
|
180
|
+
config_options.executor.unique_fraction,
|
|
181
|
+
row_count=rec.state["stats"].row_count.get(original_child),
|
|
182
|
+
column_stats=rec.state["stats"].column_stats.get(original_child),
|
|
183
|
+
)
|
|
184
|
+
unique_fraction = (
|
|
185
|
+
max(unique_fraction_dict.values()) if unique_fraction_dict else None
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
try:
|
|
189
|
+
return lower_distinct(
|
|
190
|
+
ir,
|
|
191
|
+
child,
|
|
192
|
+
partition_info,
|
|
193
|
+
config_options,
|
|
194
|
+
unique_fraction=unique_fraction,
|
|
195
|
+
)
|
|
196
|
+
except NotImplementedError as err:
|
|
197
|
+
return _lower_ir_fallback(ir, rec, msg=str(err))
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
|
|
4
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
5
|
+
"""Explain logical and physical plans."""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import functools
|
|
10
|
+
from itertools import groupby
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
from cudf_polars.dsl.ir import (
|
|
14
|
+
GroupBy,
|
|
15
|
+
Join,
|
|
16
|
+
Scan,
|
|
17
|
+
Sort,
|
|
18
|
+
)
|
|
19
|
+
from cudf_polars.dsl.translate import Translator
|
|
20
|
+
from cudf_polars.experimental.base import ColumnStat
|
|
21
|
+
from cudf_polars.experimental.parallel import lower_ir_graph
|
|
22
|
+
from cudf_polars.experimental.statistics import (
|
|
23
|
+
collect_statistics,
|
|
24
|
+
)
|
|
25
|
+
from cudf_polars.utils.config import ConfigOptions
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from collections.abc import MutableMapping
|
|
29
|
+
|
|
30
|
+
import polars as pl
|
|
31
|
+
|
|
32
|
+
from cudf_polars.dsl.ir import IR
|
|
33
|
+
from cudf_polars.experimental.base import PartitionInfo, StatsCollector
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def explain_query(
|
|
37
|
+
q: pl.LazyFrame, engine: pl.GPUEngine, *, physical: bool = True
|
|
38
|
+
) -> str:
|
|
39
|
+
"""
|
|
40
|
+
Return a formatted string representation of the IR plan.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
q : pl.LazyFrame
|
|
45
|
+
The LazyFrame to explain.
|
|
46
|
+
engine : pl.GPUEngine
|
|
47
|
+
The configured GPU engine to use.
|
|
48
|
+
physical : bool, default True
|
|
49
|
+
If True, show the physical (lowered) plan.
|
|
50
|
+
If False, show the logical (pre-lowering) plan.
|
|
51
|
+
|
|
52
|
+
Returns
|
|
53
|
+
-------
|
|
54
|
+
str
|
|
55
|
+
A string representation of the IR plan.
|
|
56
|
+
"""
|
|
57
|
+
config = ConfigOptions.from_polars_engine(engine)
|
|
58
|
+
ir = Translator(q._ldf.visit(), engine).translate_ir()
|
|
59
|
+
|
|
60
|
+
if physical:
|
|
61
|
+
lowered_ir, partition_info = lower_ir_graph(ir, config)
|
|
62
|
+
return _repr_ir_tree(lowered_ir, partition_info)
|
|
63
|
+
else:
|
|
64
|
+
if config.executor.name == "streaming":
|
|
65
|
+
# Include row-count statistics for the logical plan
|
|
66
|
+
return _repr_ir_tree(ir, stats=collect_statistics(ir, config))
|
|
67
|
+
else:
|
|
68
|
+
return _repr_ir_tree(ir)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _fmt_row_count(value: int | None) -> str:
|
|
72
|
+
"""Format a row count as a readable string."""
|
|
73
|
+
if value is None:
|
|
74
|
+
return ""
|
|
75
|
+
elif value < 1_000:
|
|
76
|
+
return f"{value}"
|
|
77
|
+
elif value < 1_000_000:
|
|
78
|
+
return f"{round(value / 1_000, 2):g} K"
|
|
79
|
+
elif value < 1_000_000_000:
|
|
80
|
+
return f"{round(value / 1_000_000, 2):g} M"
|
|
81
|
+
else:
|
|
82
|
+
return f"{round(value / 1_000_000_000, 2):g} B"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _repr_ir_tree(
|
|
86
|
+
ir: IR,
|
|
87
|
+
partition_info: MutableMapping[IR, PartitionInfo] | None = None,
|
|
88
|
+
*,
|
|
89
|
+
offset: str = "",
|
|
90
|
+
stats: StatsCollector | None = None,
|
|
91
|
+
) -> str:
|
|
92
|
+
header = _repr_ir(ir, offset=offset)
|
|
93
|
+
count = partition_info[ir].count if partition_info else None
|
|
94
|
+
if stats is not None:
|
|
95
|
+
# Include row-count estimate (if available)
|
|
96
|
+
row_count_estimate = _fmt_row_count(
|
|
97
|
+
stats.row_count.get(ir, ColumnStat[int](None)).value
|
|
98
|
+
)
|
|
99
|
+
row_count = f"~{row_count_estimate}" if row_count_estimate else "unknown"
|
|
100
|
+
header = header.rstrip("\n") + f" {row_count=}\n"
|
|
101
|
+
if count is not None:
|
|
102
|
+
header = header.rstrip("\n") + f" [{count}]\n"
|
|
103
|
+
|
|
104
|
+
children_strs = [
|
|
105
|
+
_repr_ir_tree(child, partition_info, offset=offset + " ", stats=stats)
|
|
106
|
+
for child in ir.children
|
|
107
|
+
]
|
|
108
|
+
|
|
109
|
+
return header + "".join(
|
|
110
|
+
f"{line}{offset} (repeated {count} times)\n"
|
|
111
|
+
if (count := sum(1 for _ in group)) > 1
|
|
112
|
+
else line
|
|
113
|
+
for line, group in groupby(children_strs)
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _repr_schema(schema: tuple | None) -> str:
|
|
118
|
+
if schema is None:
|
|
119
|
+
return "" # pragma: no cover; no test yet
|
|
120
|
+
names = tuple(schema)
|
|
121
|
+
if len(names) > 6:
|
|
122
|
+
names = names[:3] + ("...",) + names[-2:]
|
|
123
|
+
return f" {names}"
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _repr_header(offset: str, label: str, schema: tuple | dict | None) -> str:
|
|
127
|
+
return f"{offset}{label}{_repr_schema(tuple(schema) if schema is not None else None)}\n"
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@functools.singledispatch
|
|
131
|
+
def _repr_ir(ir: IR, *, offset: str = "") -> str:
|
|
132
|
+
return _repr_header(offset, type(ir).__name__.upper(), ir.schema)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@_repr_ir.register
|
|
136
|
+
def _(ir: GroupBy, *, offset: str = "") -> str:
|
|
137
|
+
keys = tuple(ne.name for ne in ir.keys)
|
|
138
|
+
return _repr_header(offset, f"GROUPBY {keys}", ir.schema)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@_repr_ir.register
|
|
142
|
+
def _(ir: Join, *, offset: str = "") -> str:
|
|
143
|
+
left_on = tuple(ne.name for ne in ir.left_on)
|
|
144
|
+
right_on = tuple(ne.name for ne in ir.right_on)
|
|
145
|
+
return _repr_header(offset, f"JOIN {ir.options[0]} {left_on} {right_on}", ir.schema)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@_repr_ir.register
|
|
149
|
+
def _(ir: Sort, *, offset: str = "") -> str:
|
|
150
|
+
by = tuple(ne.name for ne in ir.by)
|
|
151
|
+
return _repr_header(offset, f"SORT {by}", ir.schema)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@_repr_ir.register
|
|
155
|
+
def _(ir: Scan, *, offset: str = "") -> str:
|
|
156
|
+
label = f"SCAN {ir.typ.upper()}"
|
|
157
|
+
return _repr_header(offset, label, ir.schema)
|