cudf-polars-cu13 25.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. cudf_polars/GIT_COMMIT +1 -0
  2. cudf_polars/VERSION +1 -0
  3. cudf_polars/__init__.py +28 -0
  4. cudf_polars/_version.py +21 -0
  5. cudf_polars/callback.py +318 -0
  6. cudf_polars/containers/__init__.py +13 -0
  7. cudf_polars/containers/column.py +495 -0
  8. cudf_polars/containers/dataframe.py +361 -0
  9. cudf_polars/containers/datatype.py +137 -0
  10. cudf_polars/dsl/__init__.py +8 -0
  11. cudf_polars/dsl/expr.py +66 -0
  12. cudf_polars/dsl/expressions/__init__.py +8 -0
  13. cudf_polars/dsl/expressions/aggregation.py +226 -0
  14. cudf_polars/dsl/expressions/base.py +272 -0
  15. cudf_polars/dsl/expressions/binaryop.py +120 -0
  16. cudf_polars/dsl/expressions/boolean.py +326 -0
  17. cudf_polars/dsl/expressions/datetime.py +271 -0
  18. cudf_polars/dsl/expressions/literal.py +97 -0
  19. cudf_polars/dsl/expressions/rolling.py +643 -0
  20. cudf_polars/dsl/expressions/selection.py +74 -0
  21. cudf_polars/dsl/expressions/slicing.py +46 -0
  22. cudf_polars/dsl/expressions/sorting.py +85 -0
  23. cudf_polars/dsl/expressions/string.py +1002 -0
  24. cudf_polars/dsl/expressions/struct.py +137 -0
  25. cudf_polars/dsl/expressions/ternary.py +49 -0
  26. cudf_polars/dsl/expressions/unary.py +517 -0
  27. cudf_polars/dsl/ir.py +2607 -0
  28. cudf_polars/dsl/nodebase.py +164 -0
  29. cudf_polars/dsl/to_ast.py +359 -0
  30. cudf_polars/dsl/tracing.py +16 -0
  31. cudf_polars/dsl/translate.py +939 -0
  32. cudf_polars/dsl/traversal.py +224 -0
  33. cudf_polars/dsl/utils/__init__.py +8 -0
  34. cudf_polars/dsl/utils/aggregations.py +481 -0
  35. cudf_polars/dsl/utils/groupby.py +98 -0
  36. cudf_polars/dsl/utils/naming.py +34 -0
  37. cudf_polars/dsl/utils/replace.py +61 -0
  38. cudf_polars/dsl/utils/reshape.py +74 -0
  39. cudf_polars/dsl/utils/rolling.py +121 -0
  40. cudf_polars/dsl/utils/windows.py +192 -0
  41. cudf_polars/experimental/__init__.py +8 -0
  42. cudf_polars/experimental/base.py +386 -0
  43. cudf_polars/experimental/benchmarks/__init__.py +4 -0
  44. cudf_polars/experimental/benchmarks/pdsds.py +220 -0
  45. cudf_polars/experimental/benchmarks/pdsds_queries/__init__.py +4 -0
  46. cudf_polars/experimental/benchmarks/pdsds_queries/q1.py +88 -0
  47. cudf_polars/experimental/benchmarks/pdsds_queries/q10.py +225 -0
  48. cudf_polars/experimental/benchmarks/pdsds_queries/q2.py +244 -0
  49. cudf_polars/experimental/benchmarks/pdsds_queries/q3.py +65 -0
  50. cudf_polars/experimental/benchmarks/pdsds_queries/q4.py +359 -0
  51. cudf_polars/experimental/benchmarks/pdsds_queries/q5.py +462 -0
  52. cudf_polars/experimental/benchmarks/pdsds_queries/q6.py +92 -0
  53. cudf_polars/experimental/benchmarks/pdsds_queries/q7.py +79 -0
  54. cudf_polars/experimental/benchmarks/pdsds_queries/q8.py +524 -0
  55. cudf_polars/experimental/benchmarks/pdsds_queries/q9.py +137 -0
  56. cudf_polars/experimental/benchmarks/pdsh.py +814 -0
  57. cudf_polars/experimental/benchmarks/utils.py +832 -0
  58. cudf_polars/experimental/dask_registers.py +200 -0
  59. cudf_polars/experimental/dispatch.py +156 -0
  60. cudf_polars/experimental/distinct.py +197 -0
  61. cudf_polars/experimental/explain.py +157 -0
  62. cudf_polars/experimental/expressions.py +590 -0
  63. cudf_polars/experimental/groupby.py +327 -0
  64. cudf_polars/experimental/io.py +943 -0
  65. cudf_polars/experimental/join.py +391 -0
  66. cudf_polars/experimental/parallel.py +423 -0
  67. cudf_polars/experimental/repartition.py +69 -0
  68. cudf_polars/experimental/scheduler.py +155 -0
  69. cudf_polars/experimental/select.py +188 -0
  70. cudf_polars/experimental/shuffle.py +354 -0
  71. cudf_polars/experimental/sort.py +609 -0
  72. cudf_polars/experimental/spilling.py +151 -0
  73. cudf_polars/experimental/statistics.py +795 -0
  74. cudf_polars/experimental/utils.py +169 -0
  75. cudf_polars/py.typed +0 -0
  76. cudf_polars/testing/__init__.py +8 -0
  77. cudf_polars/testing/asserts.py +448 -0
  78. cudf_polars/testing/io.py +122 -0
  79. cudf_polars/testing/plugin.py +236 -0
  80. cudf_polars/typing/__init__.py +219 -0
  81. cudf_polars/utils/__init__.py +8 -0
  82. cudf_polars/utils/config.py +741 -0
  83. cudf_polars/utils/conversion.py +40 -0
  84. cudf_polars/utils/dtypes.py +118 -0
  85. cudf_polars/utils/sorting.py +53 -0
  86. cudf_polars/utils/timer.py +39 -0
  87. cudf_polars/utils/versions.py +27 -0
  88. cudf_polars_cu13-25.10.0.dist-info/METADATA +136 -0
  89. cudf_polars_cu13-25.10.0.dist-info/RECORD +92 -0
  90. cudf_polars_cu13-25.10.0.dist-info/WHEEL +5 -0
  91. cudf_polars_cu13-25.10.0.dist-info/licenses/LICENSE +201 -0
  92. cudf_polars_cu13-25.10.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,200 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Dask function registrations such as serializers and dispatch implementations."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING, Any, ClassVar, overload
9
+
10
+ from dask.sizeof import sizeof as sizeof_dispatch
11
+ from dask.tokenize import normalize_token
12
+ from distributed.protocol import dask_deserialize, dask_serialize
13
+ from distributed.protocol.cuda import cuda_deserialize, cuda_serialize
14
+ from distributed.utils import log_errors
15
+
16
+ import pylibcudf as plc
17
+ import rmm
18
+
19
+ from cudf_polars.containers import Column, DataFrame, DataType
20
+ from cudf_polars.dsl.expressions.base import NamedExpr
21
+
22
+ if TYPE_CHECKING:
23
+ from collections.abc import Hashable, Mapping
24
+
25
+ from distributed import Client
26
+
27
+ from rmm.pylibrmm.memory_resource import DeviceMemoryResource
28
+ from rmm.pylibrmm.stream import Stream
29
+
30
+ from cudf_polars.typing import ColumnHeader, ColumnOptions, DataFrameHeader
31
+
32
+
33
+ __all__ = ["DaskRegisterManager", "register"]
34
+
35
+
36
+ class DaskRegisterManager: # pragma: no cover; Only used with Distributed scheduler
37
+ """Manager to ensure ensure serializer is only registered once."""
38
+
39
+ _registered: bool = False
40
+ _client_run_executed: ClassVar[set[str]] = set()
41
+
42
+ @classmethod
43
+ def register_once(cls) -> None:
44
+ """Register Dask/cudf-polars serializers in calling process."""
45
+ if not cls._registered:
46
+ from cudf_polars.experimental.dask_registers import register
47
+
48
+ register()
49
+ cls._registered = True
50
+
51
+ @classmethod
52
+ def run_on_cluster(cls, client: Client) -> None:
53
+ """Run register on the workers and scheduler once."""
54
+ if client.id not in cls._client_run_executed:
55
+ client.run(cls.register_once)
56
+ client.run_on_scheduler(cls.register_once)
57
+ cls._client_run_executed.add(client.id)
58
+
59
+
60
+ def register() -> None:
61
+ """Register dask serialization and dispatch functions."""
62
+
63
+ @overload
64
+ def serialize_column_or_frame(
65
+ x: DataFrame,
66
+ ) -> tuple[DataFrameHeader, list[memoryview]]: ...
67
+
68
+ @overload
69
+ def serialize_column_or_frame(
70
+ x: Column,
71
+ ) -> tuple[ColumnHeader, list[memoryview]]: ...
72
+
73
+ @cuda_serialize.register((Column, DataFrame))
74
+ def serialize_column_or_frame(
75
+ x: DataFrame | Column,
76
+ ) -> tuple[DataFrameHeader | ColumnHeader, list[memoryview]]:
77
+ with log_errors():
78
+ header, frames = x.serialize()
79
+ return header, list(frames) # Dask expect a list of frames
80
+
81
+ @cuda_deserialize.register(DataFrame)
82
+ def _(
83
+ header: DataFrameHeader, frames: tuple[memoryview, plc.gpumemoryview]
84
+ ) -> DataFrame:
85
+ with log_errors():
86
+ metadata, gpudata = frames # TODO: check if this is a length-2 list...
87
+ return DataFrame.deserialize(header, (metadata, plc.gpumemoryview(gpudata)))
88
+
89
+ @cuda_deserialize.register(Column)
90
+ def _(header: ColumnHeader, frames: tuple[memoryview, plc.gpumemoryview]) -> Column:
91
+ with log_errors():
92
+ metadata, gpudata = frames
93
+ return Column.deserialize(header, (metadata, plc.gpumemoryview(gpudata)))
94
+
95
+ @overload
96
+ def dask_serialize_column_or_frame(
97
+ x: DataFrame,
98
+ ) -> tuple[DataFrameHeader, tuple[memoryview, memoryview]]: ...
99
+
100
+ @overload
101
+ def dask_serialize_column_or_frame(
102
+ x: Column,
103
+ ) -> tuple[ColumnHeader, tuple[memoryview, memoryview]]: ...
104
+
105
+ @dask_serialize.register(Column)
106
+ def dask_serialize_column_or_frame(
107
+ x: DataFrame | Column,
108
+ ) -> tuple[DataFrameHeader | ColumnHeader, tuple[memoryview, memoryview]]:
109
+ with log_errors():
110
+ header, (metadata, gpudata) = x.serialize()
111
+
112
+ # For robustness, we check that the gpu data is contiguous
113
+ cai = gpudata.__cuda_array_interface__
114
+ assert len(cai["shape"]) == 1
115
+ assert cai["strides"] is None or cai["strides"] == (1,)
116
+ assert cai["typestr"] == "|u1"
117
+ nbytes = cai["shape"][0]
118
+
119
+ # Copy the gpudata to host memory
120
+ gpudata_on_host = memoryview(
121
+ rmm.DeviceBuffer(ptr=gpudata.ptr, size=nbytes).copy_to_host()
122
+ )
123
+ return header, (metadata, gpudata_on_host)
124
+
125
+ @dask_deserialize.register(Column)
126
+ def _(header: ColumnHeader, frames: tuple[memoryview, memoryview]) -> Column:
127
+ with log_errors():
128
+ assert len(frames) == 2
129
+ # Copy the second frame (the gpudata in host memory) back to the gpu
130
+ frames = frames[0], plc.gpumemoryview(rmm.DeviceBuffer.to_device(frames[1]))
131
+ return Column.deserialize(header, frames)
132
+
133
+ @dask_serialize.register(DataFrame)
134
+ def _(
135
+ x: DataFrame, context: Mapping[str, Any] | None = None
136
+ ) -> tuple[DataFrameHeader, tuple[memoryview, memoryview]]:
137
+ # Do regular serialization if no staging buffer is provided.
138
+ if context is None or "staging_device_buffer" not in context:
139
+ return dask_serialize_column_or_frame(x)
140
+
141
+ # If a staging buffer is provided, we use `ChunkedPack` to
142
+ # serialize the dataframe using the provided staging buffer.
143
+ with log_errors():
144
+ # Keyword arguments for `Column.__init__`.
145
+ columns_kwargs: list[ColumnOptions] = [
146
+ col.serialize_ctor_kwargs() for col in x.columns
147
+ ]
148
+ header: DataFrameHeader = {
149
+ "columns_kwargs": columns_kwargs,
150
+ "frame_count": 2,
151
+ }
152
+ if "stream" not in context:
153
+ raise ValueError(
154
+ "context: stream must be given when staging_device_buffer is"
155
+ )
156
+ if "device_mr" not in context:
157
+ raise ValueError(
158
+ "context: device_mr must be given when staging_device_buffer is"
159
+ )
160
+ stream: Stream = context["stream"]
161
+ device_mr: DeviceMemoryResource = context["device_mr"]
162
+ buf: rmm.DeviceBuffer = context["staging_device_buffer"]
163
+ frame = plc.contiguous_split.ChunkedPack.create(
164
+ x.table, buf.nbytes, stream, device_mr
165
+ ).pack_to_host(buf)
166
+ return header, frame
167
+
168
+ @dask_deserialize.register(DataFrame)
169
+ def _(header: DataFrameHeader, frames: tuple[memoryview, memoryview]) -> DataFrame:
170
+ with log_errors():
171
+ assert len(frames) == 2
172
+ # Copy the second frame (the gpudata in host memory) back to the gpu
173
+ frames = frames[0], plc.gpumemoryview(rmm.DeviceBuffer.to_device(frames[1]))
174
+ return DataFrame.deserialize(header, frames)
175
+
176
+ @sizeof_dispatch.register(Column)
177
+ def _(x: Column) -> int:
178
+ """The total size of the device buffers used by the DataFrame or Column."""
179
+ return x.obj.device_buffer_size()
180
+
181
+ @sizeof_dispatch.register(DataFrame)
182
+ def _(x: DataFrame) -> int:
183
+ """The total size of the device buffers used by the DataFrame or Column."""
184
+ return sum(c.obj.device_buffer_size() for c in x.columns)
185
+
186
+ # Register rapidsmpf serializer if it's installed.
187
+ try:
188
+ from rapidsmpf.integrations.dask.spilling import register_dask_serialize
189
+
190
+ register_dask_serialize() # pragma: no cover; rapidsmpf dependency not included yet
191
+ except ImportError:
192
+ pass
193
+
194
+ # Register the tokenizer for NamedExpr and DataType. This is a performance
195
+ # optimization that speeds up tokenization for the most common types seen in
196
+ # the Dask task graph.
197
+ @normalize_token.register(NamedExpr)
198
+ @normalize_token.register(DataType)
199
+ def _(x: NamedExpr | DataType) -> Hashable:
200
+ return hash(x)
@@ -0,0 +1,156 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Multi-partition dispatch functions."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from functools import singledispatch
8
+ from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict
9
+
10
+ from cudf_polars.typing import GenericTransformer
11
+
12
+ if TYPE_CHECKING:
13
+ from collections.abc import MutableMapping
14
+
15
+ from cudf_polars.dsl import ir
16
+ from cudf_polars.dsl.ir import IR
17
+ from cudf_polars.experimental.base import (
18
+ ColumnStats,
19
+ PartitionInfo,
20
+ StatsCollector,
21
+ )
22
+ from cudf_polars.utils.config import ConfigOptions
23
+
24
+
25
+ class State(TypedDict):
26
+ """
27
+ State used for lowering IR nodes.
28
+
29
+ Parameters
30
+ ----------
31
+ config_options
32
+ GPUEngine configuration options.
33
+ stats
34
+ Statistics collector.
35
+ """
36
+
37
+ config_options: ConfigOptions
38
+ stats: StatsCollector
39
+
40
+
41
+ LowerIRTransformer: TypeAlias = GenericTransformer[
42
+ "ir.IR", "tuple[ir.IR, MutableMapping[ir.IR, PartitionInfo]]", State
43
+ ]
44
+ """Protocol for Lowering IR nodes."""
45
+
46
+
47
+ @singledispatch
48
+ def lower_ir_node(
49
+ ir: IR, rec: LowerIRTransformer
50
+ ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
51
+ """
52
+ Rewrite an IR node and extract partitioning information.
53
+
54
+ Parameters
55
+ ----------
56
+ ir
57
+ IR node to rewrite.
58
+ rec
59
+ Recursive LowerIRTransformer callable.
60
+
61
+ Returns
62
+ -------
63
+ new_ir, partition_info
64
+ The rewritten node, and a mapping from unique nodes in
65
+ the full IR graph to associated partitioning information.
66
+
67
+ Notes
68
+ -----
69
+ This function is used by `lower_ir_graph`.
70
+
71
+ See Also
72
+ --------
73
+ lower_ir_graph
74
+ """
75
+ raise AssertionError(f"Unhandled type {type(ir)}") # pragma: no cover
76
+
77
+
78
+ @singledispatch
79
+ def generate_ir_tasks(
80
+ ir: IR, partition_info: MutableMapping[IR, PartitionInfo]
81
+ ) -> MutableMapping[Any, Any]:
82
+ """
83
+ Generate a task graph for evaluation of an IR node.
84
+
85
+ Parameters
86
+ ----------
87
+ ir
88
+ IR node to generate tasks for.
89
+ partition_info
90
+ Partitioning information, obtained from :func:`lower_ir_graph`.
91
+
92
+ Returns
93
+ -------
94
+ mapping
95
+ A (partial) dask task graph for the evaluation of an ir node.
96
+
97
+ Notes
98
+ -----
99
+ Task generation should only produce the tasks for the current node,
100
+ referring to child tasks by name.
101
+
102
+ See Also
103
+ --------
104
+ task_graph
105
+ """
106
+ raise AssertionError(f"Unhandled type {type(ir)}") # pragma: no cover
107
+
108
+
109
+ @singledispatch
110
+ def initialize_column_stats(
111
+ ir: IR, stats: StatsCollector, config_options: ConfigOptions
112
+ ) -> dict[str, ColumnStats]:
113
+ """
114
+ Initialize column statistics for an IR node.
115
+
116
+ Parameters
117
+ ----------
118
+ ir
119
+ The IR node to collect source statistics for.
120
+ stats
121
+ The `StatsCollector` object containing known source statistics.
122
+ config_options
123
+ GPUEngine configuration options.
124
+
125
+ Returns
126
+ -------
127
+ base_stats_mapping
128
+ Mapping between column names and base ``ColumnStats`` objects.
129
+
130
+ Notes
131
+ -----
132
+ Base column stats correspond to ``ColumnStats`` objects **without**
133
+ populated ``unique_stats`` information. The purpose of this function
134
+ is to propagate ``DataSourceInfo`` references and set ``children``
135
+ attributes for each column of each IR node.
136
+ """
137
+ raise AssertionError(f"Unhandled type {type(ir)}") # pragma: no cover
138
+
139
+
140
+ @singledispatch
141
+ def update_column_stats(
142
+ ir: IR, stats: StatsCollector, config_options: ConfigOptions
143
+ ) -> None:
144
+ """
145
+ Finalize local column statistics for an IR node.
146
+
147
+ Parameters
148
+ ----------
149
+ ir
150
+ The IR node to finalize local column statistics for.
151
+ stats
152
+ The `StatsCollector` object containing known statistics.
153
+ config_options
154
+ GPUEngine configuration options.
155
+ """
156
+ raise AssertionError(f"Unhandled type {type(ir)}") # pragma: no cover
@@ -0,0 +1,197 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Multi-partition Distinct logic."""
4
+
5
+ from __future__ import annotations
6
+
7
+ import math
8
+ from typing import TYPE_CHECKING
9
+
10
+ import pylibcudf as plc
11
+
12
+ from cudf_polars.dsl.expressions.base import Col, NamedExpr
13
+ from cudf_polars.dsl.ir import Distinct
14
+ from cudf_polars.experimental.base import PartitionInfo
15
+ from cudf_polars.experimental.dispatch import lower_ir_node
16
+ from cudf_polars.experimental.utils import (
17
+ _fallback_inform,
18
+ _get_unique_fractions,
19
+ _lower_ir_fallback,
20
+ )
21
+
22
+ if TYPE_CHECKING:
23
+ from collections.abc import MutableMapping
24
+
25
+ from cudf_polars.dsl.ir import IR
26
+ from cudf_polars.experimental.dispatch import LowerIRTransformer
27
+ from cudf_polars.utils.config import ConfigOptions
28
+
29
+
30
+ def lower_distinct(
31
+ ir: Distinct,
32
+ child: IR,
33
+ partition_info: MutableMapping[IR, PartitionInfo],
34
+ config_options: ConfigOptions,
35
+ *,
36
+ unique_fraction: float | None = None,
37
+ ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
38
+ """
39
+ Lower a Distinct IR into partition-wise stages.
40
+
41
+ Parameters
42
+ ----------
43
+ ir
44
+ The Distinct IR node to lower.
45
+ child
46
+ The reconstructed child of ``ir``. May differ
47
+ from ``ir.children[0]``.
48
+ partition_info
49
+ A mapping from all unique IR nodes to the
50
+ associated partitioning information.
51
+ config_options
52
+ GPUEngine configuration options.
53
+ unique_fraction
54
+ Fraction of unique values to total values. Used for algorithm selection.
55
+ A value of `1.0` means the column is unique.
56
+
57
+ Returns
58
+ -------
59
+ new_node
60
+ The lowered Distinct node.
61
+ partition_info
62
+ A mapping from unique nodes in the new graph to associated
63
+ partitioning information.
64
+ """
65
+ from cudf_polars.experimental.repartition import Repartition
66
+ from cudf_polars.experimental.shuffle import Shuffle
67
+
68
+ # Extract child partitioning
69
+ child_count = partition_info[child].count
70
+ assert config_options.executor.name == "streaming", (
71
+ "'in-memory' executor not supported in 'lower_distinct'"
72
+ )
73
+
74
+ # Assume shuffle is not stable for now. Therefore, we
75
+ # require a tree reduction if row order matters.
76
+ require_tree_reduction = ir.stable or ir.keep in (
77
+ plc.stream_compaction.DuplicateKeepOption.KEEP_FIRST,
78
+ plc.stream_compaction.DuplicateKeepOption.KEEP_LAST,
79
+ )
80
+
81
+ subset: frozenset = ir.subset or frozenset(ir.schema)
82
+ shuffle_keys = tuple(NamedExpr(name, Col(ir.schema[name], name)) for name in subset)
83
+ shuffled = partition_info[child].partitioned_on == shuffle_keys
84
+ if ir.keep == plc.stream_compaction.DuplicateKeepOption.KEEP_NONE:
85
+ # Need to shuffle the original data for keep == "none"
86
+ if require_tree_reduction:
87
+ # TODO: We cannot drop all duplicates without
88
+ # shuffling the data up front, and we assume
89
+ # shuffling is unstable for now. Note that the
90
+ # task-based shuffle should be stable, but it
91
+ # its performance is very poor.
92
+ raise NotImplementedError(
93
+ "Unsupported unique options for multiple partitions."
94
+ )
95
+ if not shuffled:
96
+ child = Shuffle(
97
+ child.schema,
98
+ shuffle_keys,
99
+ config_options.executor.shuffle_method,
100
+ child,
101
+ )
102
+ partition_info[child] = PartitionInfo(
103
+ count=child_count,
104
+ partitioned_on=shuffle_keys,
105
+ )
106
+ shuffled = True
107
+
108
+ output_count = 1
109
+ n_ary = 32 # Arbitrary default (for now)
110
+ if ir.zlice is not None:
111
+ # Head/tail slice operation has been pushed into Distinct
112
+ if ir.zlice[0] < 1 and ir.zlice[1] is not None:
113
+ # Use rough 1m-row heuristic to set n_ary
114
+ n_ary = max(int(1_000_000 / ir.zlice[1]), 2)
115
+ else: # pragma: no cover
116
+ # TODO: General slicing is not supported for multiple
117
+ # partitions. For now, we raise an error to fall back
118
+ # to one partition.
119
+ raise NotImplementedError("Unsupported slice for multiple partitions.")
120
+ elif unique_fraction is not None:
121
+ # Use unique_fraction to determine partitioning
122
+ n_ary = min(max(int(1.0 / unique_fraction), 2), child_count)
123
+ output_count = max(int(unique_fraction * child_count), 1)
124
+
125
+ if output_count > 1 and require_tree_reduction:
126
+ # Need to reduce down to a single partition even
127
+ # if the unique_fraction is large.
128
+ output_count = 1
129
+ _fallback_inform(
130
+ "Unsupported unique options for multiple partitions.",
131
+ config_options,
132
+ )
133
+
134
+ # Partition-wise unique
135
+ count = child_count
136
+ new_node: IR = ir.reconstruct([child])
137
+ partition_info[new_node] = PartitionInfo(count=count)
138
+
139
+ if shuffled or output_count == 1:
140
+ # Tree reduction
141
+ while count > output_count:
142
+ new_node = Repartition(new_node.schema, new_node)
143
+ count = max(math.ceil(count / n_ary), output_count)
144
+ partition_info[new_node] = PartitionInfo(count=count)
145
+ new_node = ir.reconstruct([new_node])
146
+ partition_info[new_node] = PartitionInfo(count=count)
147
+ else:
148
+ # Shuffle
149
+ new_node = Shuffle(
150
+ new_node.schema,
151
+ shuffle_keys,
152
+ config_options.executor.shuffle_method,
153
+ new_node,
154
+ )
155
+ partition_info[new_node] = PartitionInfo(count=output_count)
156
+ new_node = ir.reconstruct([new_node])
157
+ partition_info[new_node] = PartitionInfo(
158
+ count=output_count,
159
+ partitioned_on=shuffle_keys,
160
+ )
161
+
162
+ return new_node, partition_info
163
+
164
+
165
+ @lower_ir_node.register(Distinct)
166
+ def _(
167
+ ir: Distinct, rec: LowerIRTransformer
168
+ ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
169
+ # Extract child partitioning
170
+ original_child = ir.children[0]
171
+ child, partition_info = rec(ir.children[0])
172
+ config_options = rec.state["config_options"]
173
+ assert config_options.executor.name == "streaming", (
174
+ "'in-memory' executor not supported in 'lower_ir_node'"
175
+ )
176
+
177
+ subset: frozenset[str] = ir.subset or frozenset(ir.schema)
178
+ unique_fraction_dict = _get_unique_fractions(
179
+ tuple(subset),
180
+ config_options.executor.unique_fraction,
181
+ row_count=rec.state["stats"].row_count.get(original_child),
182
+ column_stats=rec.state["stats"].column_stats.get(original_child),
183
+ )
184
+ unique_fraction = (
185
+ max(unique_fraction_dict.values()) if unique_fraction_dict else None
186
+ )
187
+
188
+ try:
189
+ return lower_distinct(
190
+ ir,
191
+ child,
192
+ partition_info,
193
+ config_options,
194
+ unique_fraction=unique_fraction,
195
+ )
196
+ except NotImplementedError as err:
197
+ return _lower_ir_fallback(ir, rec, msg=str(err))
@@ -0,0 +1,157 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
4
+ # SPDX-License-Identifier: Apache-2.0
5
+ """Explain logical and physical plans."""
6
+
7
+ from __future__ import annotations
8
+
9
+ import functools
10
+ from itertools import groupby
11
+ from typing import TYPE_CHECKING
12
+
13
+ from cudf_polars.dsl.ir import (
14
+ GroupBy,
15
+ Join,
16
+ Scan,
17
+ Sort,
18
+ )
19
+ from cudf_polars.dsl.translate import Translator
20
+ from cudf_polars.experimental.base import ColumnStat
21
+ from cudf_polars.experimental.parallel import lower_ir_graph
22
+ from cudf_polars.experimental.statistics import (
23
+ collect_statistics,
24
+ )
25
+ from cudf_polars.utils.config import ConfigOptions
26
+
27
+ if TYPE_CHECKING:
28
+ from collections.abc import MutableMapping
29
+
30
+ import polars as pl
31
+
32
+ from cudf_polars.dsl.ir import IR
33
+ from cudf_polars.experimental.base import PartitionInfo, StatsCollector
34
+
35
+
36
+ def explain_query(
37
+ q: pl.LazyFrame, engine: pl.GPUEngine, *, physical: bool = True
38
+ ) -> str:
39
+ """
40
+ Return a formatted string representation of the IR plan.
41
+
42
+ Parameters
43
+ ----------
44
+ q : pl.LazyFrame
45
+ The LazyFrame to explain.
46
+ engine : pl.GPUEngine
47
+ The configured GPU engine to use.
48
+ physical : bool, default True
49
+ If True, show the physical (lowered) plan.
50
+ If False, show the logical (pre-lowering) plan.
51
+
52
+ Returns
53
+ -------
54
+ str
55
+ A string representation of the IR plan.
56
+ """
57
+ config = ConfigOptions.from_polars_engine(engine)
58
+ ir = Translator(q._ldf.visit(), engine).translate_ir()
59
+
60
+ if physical:
61
+ lowered_ir, partition_info = lower_ir_graph(ir, config)
62
+ return _repr_ir_tree(lowered_ir, partition_info)
63
+ else:
64
+ if config.executor.name == "streaming":
65
+ # Include row-count statistics for the logical plan
66
+ return _repr_ir_tree(ir, stats=collect_statistics(ir, config))
67
+ else:
68
+ return _repr_ir_tree(ir)
69
+
70
+
71
+ def _fmt_row_count(value: int | None) -> str:
72
+ """Format a row count as a readable string."""
73
+ if value is None:
74
+ return ""
75
+ elif value < 1_000:
76
+ return f"{value}"
77
+ elif value < 1_000_000:
78
+ return f"{round(value / 1_000, 2):g} K"
79
+ elif value < 1_000_000_000:
80
+ return f"{round(value / 1_000_000, 2):g} M"
81
+ else:
82
+ return f"{round(value / 1_000_000_000, 2):g} B"
83
+
84
+
85
+ def _repr_ir_tree(
86
+ ir: IR,
87
+ partition_info: MutableMapping[IR, PartitionInfo] | None = None,
88
+ *,
89
+ offset: str = "",
90
+ stats: StatsCollector | None = None,
91
+ ) -> str:
92
+ header = _repr_ir(ir, offset=offset)
93
+ count = partition_info[ir].count if partition_info else None
94
+ if stats is not None:
95
+ # Include row-count estimate (if available)
96
+ row_count_estimate = _fmt_row_count(
97
+ stats.row_count.get(ir, ColumnStat[int](None)).value
98
+ )
99
+ row_count = f"~{row_count_estimate}" if row_count_estimate else "unknown"
100
+ header = header.rstrip("\n") + f" {row_count=}\n"
101
+ if count is not None:
102
+ header = header.rstrip("\n") + f" [{count}]\n"
103
+
104
+ children_strs = [
105
+ _repr_ir_tree(child, partition_info, offset=offset + " ", stats=stats)
106
+ for child in ir.children
107
+ ]
108
+
109
+ return header + "".join(
110
+ f"{line}{offset} (repeated {count} times)\n"
111
+ if (count := sum(1 for _ in group)) > 1
112
+ else line
113
+ for line, group in groupby(children_strs)
114
+ )
115
+
116
+
117
+ def _repr_schema(schema: tuple | None) -> str:
118
+ if schema is None:
119
+ return "" # pragma: no cover; no test yet
120
+ names = tuple(schema)
121
+ if len(names) > 6:
122
+ names = names[:3] + ("...",) + names[-2:]
123
+ return f" {names}"
124
+
125
+
126
+ def _repr_header(offset: str, label: str, schema: tuple | dict | None) -> str:
127
+ return f"{offset}{label}{_repr_schema(tuple(schema) if schema is not None else None)}\n"
128
+
129
+
130
+ @functools.singledispatch
131
+ def _repr_ir(ir: IR, *, offset: str = "") -> str:
132
+ return _repr_header(offset, type(ir).__name__.upper(), ir.schema)
133
+
134
+
135
+ @_repr_ir.register
136
+ def _(ir: GroupBy, *, offset: str = "") -> str:
137
+ keys = tuple(ne.name for ne in ir.keys)
138
+ return _repr_header(offset, f"GROUPBY {keys}", ir.schema)
139
+
140
+
141
+ @_repr_ir.register
142
+ def _(ir: Join, *, offset: str = "") -> str:
143
+ left_on = tuple(ne.name for ne in ir.left_on)
144
+ right_on = tuple(ne.name for ne in ir.right_on)
145
+ return _repr_header(offset, f"JOIN {ir.options[0]} {left_on} {right_on}", ir.schema)
146
+
147
+
148
+ @_repr_ir.register
149
+ def _(ir: Sort, *, offset: str = "") -> str:
150
+ by = tuple(ne.name for ne in ir.by)
151
+ return _repr_header(offset, f"SORT {by}", ir.schema)
152
+
153
+
154
+ @_repr_ir.register
155
+ def _(ir: Scan, *, offset: str = "") -> str:
156
+ label = f"SCAN {ir.typ.upper()}"
157
+ return _repr_header(offset, label, ir.schema)