cudf-polars-cu13 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. cudf_polars/GIT_COMMIT +1 -1
  2. cudf_polars/VERSION +1 -1
  3. cudf_polars/callback.py +60 -15
  4. cudf_polars/containers/column.py +137 -77
  5. cudf_polars/containers/dataframe.py +123 -34
  6. cudf_polars/containers/datatype.py +134 -13
  7. cudf_polars/dsl/expr.py +0 -2
  8. cudf_polars/dsl/expressions/aggregation.py +80 -28
  9. cudf_polars/dsl/expressions/binaryop.py +34 -14
  10. cudf_polars/dsl/expressions/boolean.py +110 -37
  11. cudf_polars/dsl/expressions/datetime.py +59 -30
  12. cudf_polars/dsl/expressions/literal.py +11 -5
  13. cudf_polars/dsl/expressions/rolling.py +460 -119
  14. cudf_polars/dsl/expressions/selection.py +9 -8
  15. cudf_polars/dsl/expressions/slicing.py +1 -1
  16. cudf_polars/dsl/expressions/string.py +256 -114
  17. cudf_polars/dsl/expressions/struct.py +19 -7
  18. cudf_polars/dsl/expressions/ternary.py +33 -3
  19. cudf_polars/dsl/expressions/unary.py +126 -64
  20. cudf_polars/dsl/ir.py +1053 -350
  21. cudf_polars/dsl/to_ast.py +30 -13
  22. cudf_polars/dsl/tracing.py +194 -0
  23. cudf_polars/dsl/translate.py +307 -107
  24. cudf_polars/dsl/utils/aggregations.py +43 -30
  25. cudf_polars/dsl/utils/reshape.py +14 -2
  26. cudf_polars/dsl/utils/rolling.py +12 -8
  27. cudf_polars/dsl/utils/windows.py +35 -20
  28. cudf_polars/experimental/base.py +55 -2
  29. cudf_polars/experimental/benchmarks/pdsds.py +12 -126
  30. cudf_polars/experimental/benchmarks/pdsh.py +792 -2
  31. cudf_polars/experimental/benchmarks/utils.py +596 -39
  32. cudf_polars/experimental/dask_registers.py +47 -20
  33. cudf_polars/experimental/dispatch.py +9 -3
  34. cudf_polars/experimental/distinct.py +2 -0
  35. cudf_polars/experimental/explain.py +15 -2
  36. cudf_polars/experimental/expressions.py +30 -15
  37. cudf_polars/experimental/groupby.py +25 -4
  38. cudf_polars/experimental/io.py +156 -124
  39. cudf_polars/experimental/join.py +53 -23
  40. cudf_polars/experimental/parallel.py +68 -19
  41. cudf_polars/experimental/rapidsmpf/__init__.py +8 -0
  42. cudf_polars/experimental/rapidsmpf/collectives/__init__.py +9 -0
  43. cudf_polars/experimental/rapidsmpf/collectives/allgather.py +90 -0
  44. cudf_polars/experimental/rapidsmpf/collectives/common.py +96 -0
  45. cudf_polars/experimental/rapidsmpf/collectives/shuffle.py +253 -0
  46. cudf_polars/experimental/rapidsmpf/core.py +488 -0
  47. cudf_polars/experimental/rapidsmpf/dask.py +172 -0
  48. cudf_polars/experimental/rapidsmpf/dispatch.py +153 -0
  49. cudf_polars/experimental/rapidsmpf/io.py +696 -0
  50. cudf_polars/experimental/rapidsmpf/join.py +322 -0
  51. cudf_polars/experimental/rapidsmpf/lower.py +74 -0
  52. cudf_polars/experimental/rapidsmpf/nodes.py +735 -0
  53. cudf_polars/experimental/rapidsmpf/repartition.py +216 -0
  54. cudf_polars/experimental/rapidsmpf/union.py +115 -0
  55. cudf_polars/experimental/rapidsmpf/utils.py +374 -0
  56. cudf_polars/experimental/repartition.py +9 -2
  57. cudf_polars/experimental/select.py +177 -14
  58. cudf_polars/experimental/shuffle.py +46 -12
  59. cudf_polars/experimental/sort.py +100 -26
  60. cudf_polars/experimental/spilling.py +1 -1
  61. cudf_polars/experimental/statistics.py +24 -5
  62. cudf_polars/experimental/utils.py +25 -7
  63. cudf_polars/testing/asserts.py +13 -8
  64. cudf_polars/testing/io.py +2 -1
  65. cudf_polars/testing/plugin.py +93 -17
  66. cudf_polars/typing/__init__.py +86 -32
  67. cudf_polars/utils/config.py +473 -58
  68. cudf_polars/utils/cuda_stream.py +70 -0
  69. cudf_polars/utils/versions.py +5 -4
  70. cudf_polars_cu13-26.2.0.dist-info/METADATA +181 -0
  71. cudf_polars_cu13-26.2.0.dist-info/RECORD +108 -0
  72. {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/WHEEL +1 -1
  73. cudf_polars_cu13-25.10.0.dist-info/METADATA +0 -136
  74. cudf_polars_cu13-25.10.0.dist-info/RECORD +0 -92
  75. {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/licenses/LICENSE +0 -0
  76. {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/top_level.txt +0 -0
cudf_polars/dsl/to_ast.py CHANGED
@@ -6,7 +6,9 @@
6
6
  from __future__ import annotations
7
7
 
8
8
  from functools import partial, reduce, singledispatch
9
- from typing import TYPE_CHECKING, TypeAlias, TypedDict
9
+ from typing import TYPE_CHECKING, TypeAlias, TypedDict, cast
10
+
11
+ import polars as pl
10
12
 
11
13
  import pylibcudf as plc
12
14
  from pylibcudf import expressions as plc_expr
@@ -19,6 +21,8 @@ from cudf_polars.typing import GenericTransformer
19
21
  if TYPE_CHECKING:
20
22
  from collections.abc import Mapping
21
23
 
24
+ from rmm.pylibrmm.stream import Stream
25
+
22
26
 
23
27
  # Can't merge these op-mapping dictionaries because scoped enum values
24
28
  # are exposed by cython with equality/hash based one their underlying
@@ -103,6 +107,7 @@ class ASTState(TypedDict):
103
107
  """
104
108
 
105
109
  for_parquet: bool
110
+ stream: Stream
106
111
 
107
112
 
108
113
  class ExprTransformerState(TypedDict):
@@ -170,7 +175,9 @@ def _(node: expr.ColRef, self: Transformer) -> plc_expr.Expression:
170
175
 
171
176
  @_to_ast.register
172
177
  def _(node: expr.Literal, self: Transformer) -> plc_expr.Expression:
173
- return plc_expr.Literal(plc.Scalar.from_py(node.value, node.dtype.plc))
178
+ return plc_expr.Literal(
179
+ plc.Scalar.from_py(node.value, node.dtype.plc_type, stream=self.state["stream"])
180
+ )
174
181
 
175
182
 
176
183
  @_to_ast.register
@@ -190,7 +197,7 @@ def _(node: expr.BinOp, self: Transformer) -> plc_expr.Expression:
190
197
  if self.state["for_parquet"]:
191
198
  op1_col, op2_col = (isinstance(op, expr.Col) for op in node.children)
192
199
  if op1_col ^ op2_col:
193
- op = node.op
200
+ op: plc.binaryop.BinaryOperator = node.op
194
201
  if op not in SUPPORTED_STATISTICS_BINOPS:
195
202
  raise NotImplementedError(
196
203
  f"Parquet filter binop with column doesn't support {node.op!r}"
@@ -221,14 +228,16 @@ def _(node: expr.BooleanFunction, self: Transformer) -> plc_expr.Expression:
221
228
  if haystack.dtype.id() == plc.TypeId.LIST:
222
229
  # Because we originally translated pl_expr.Literal with a list scalar
223
230
  # to a expr.LiteralColumn, so the actual type is in the inner type
224
- #
225
- # the type-ignore is safe because the for plc.TypeID.LIST, we know
226
- # we have a polars.List type, which has an inner attribute.
227
- plc_dtype = DataType(haystack.dtype.polars.inner).plc # type: ignore[attr-defined]
231
+ # .inner returns DataTypeClass | DataType, need to cast to DataType
232
+ plc_dtype = DataType(
233
+ cast(pl.DataType, cast(pl.List, haystack.dtype.polars_type).inner)
234
+ ).plc_type
228
235
  else:
229
- plc_dtype = haystack.dtype.plc # pragma: no cover
236
+ plc_dtype = haystack.dtype.plc_type # pragma: no cover
230
237
  values = (
231
- plc_expr.Literal(plc.Scalar.from_py(val, plc_dtype))
238
+ plc_expr.Literal(
239
+ plc.Scalar.from_py(val, plc_dtype, stream=self.state["stream"])
240
+ )
232
241
  for val in haystack.value
233
242
  )
234
243
  return reduce(
@@ -265,7 +274,7 @@ def _(node: expr.UnaryFunction, self: Transformer) -> plc_expr.Expression:
265
274
  )
266
275
 
267
276
 
268
- def to_parquet_filter(node: expr.Expr) -> plc_expr.Expression | None:
277
+ def to_parquet_filter(node: expr.Expr, stream: Stream) -> plc_expr.Expression | None:
269
278
  """
270
279
  Convert an expression to libcudf AST nodes suitable for parquet filtering.
271
280
 
@@ -273,19 +282,23 @@ def to_parquet_filter(node: expr.Expr) -> plc_expr.Expression | None:
273
282
  ----------
274
283
  node
275
284
  Expression to convert.
285
+ stream
286
+ CUDA stream used for device memory operations and kernel launches.
276
287
 
277
288
  Returns
278
289
  -------
279
290
  pylibcudf Expression if conversion is possible, otherwise None.
280
291
  """
281
- mapper: Transformer = CachingVisitor(_to_ast, state={"for_parquet": True})
292
+ mapper: Transformer = CachingVisitor(
293
+ _to_ast, state={"for_parquet": True, "stream": stream}
294
+ )
282
295
  try:
283
296
  return mapper(node)
284
297
  except (KeyError, NotImplementedError):
285
298
  return None
286
299
 
287
300
 
288
- def to_ast(node: expr.Expr) -> plc_expr.Expression | None:
301
+ def to_ast(node: expr.Expr, stream: Stream) -> plc_expr.Expression | None:
289
302
  """
290
303
  Convert an expression to libcudf AST nodes suitable for compute_column.
291
304
 
@@ -293,6 +306,8 @@ def to_ast(node: expr.Expr) -> plc_expr.Expression | None:
293
306
  ----------
294
307
  node
295
308
  Expression to convert.
309
+ stream
310
+ CUDA stream used for device memory operations and kernel launches.
296
311
 
297
312
  Notes
298
313
  -----
@@ -304,7 +319,9 @@ def to_ast(node: expr.Expr) -> plc_expr.Expression | None:
304
319
  -------
305
320
  pylibcudf Expression if conversion is possible, otherwise None.
306
321
  """
307
- mapper: Transformer = CachingVisitor(_to_ast, state={"for_parquet": False})
322
+ mapper: Transformer = CachingVisitor(
323
+ _to_ast, state={"for_parquet": False, "stream": stream}
324
+ )
308
325
  try:
309
326
  return mapper(node)
310
327
  except (KeyError, NotImplementedError):
@@ -6,11 +6,205 @@
6
6
  from __future__ import annotations
7
7
 
8
8
  import functools
9
+ import os
10
+ import time
11
+ from typing import TYPE_CHECKING, Any, Concatenate, Literal
9
12
 
10
13
  import nvtx
14
+ import pynvml
15
+ from typing_extensions import ParamSpec
16
+
17
+ import rmm
18
+ import rmm.statistics
19
+
20
+ from cudf_polars.utils.config import _bool_converter, get_device_handle
21
+
22
+ try:
23
+ import structlog
24
+ except ImportError:
25
+ _HAS_STRUCTLOG = False
26
+ else:
27
+ _HAS_STRUCTLOG = True
28
+
29
+
30
+ LOG_TRACES = _HAS_STRUCTLOG and _bool_converter(
31
+ os.environ.get("CUDF_POLARS_LOG_TRACES", "0")
32
+ )
33
+ LOG_MEMORY = LOG_TRACES and _bool_converter(
34
+ os.environ.get("CUDF_POLARS_LOG_TRACES_MEMORY", "1")
35
+ )
36
+ LOG_DATAFRAMES = LOG_TRACES and _bool_converter(
37
+ os.environ.get("CUDF_POLARS_LOG_TRACES_DATAFRAMES", "1")
38
+ )
11
39
 
12
40
  CUDF_POLARS_NVTX_DOMAIN = "cudf_polars"
13
41
 
14
42
  nvtx_annotate_cudf_polars = functools.partial(
15
43
  nvtx.annotate, domain=CUDF_POLARS_NVTX_DOMAIN
16
44
  )
45
+
46
+ if TYPE_CHECKING:
47
+ from collections.abc import Callable, Sequence
48
+
49
+ import cudf_polars.containers
50
+ from cudf_polars.dsl import ir
51
+
52
+
53
+ @functools.cache
54
+ def _getpid() -> int: # pragma: no cover
55
+ # Gets called for each IR.do_evaluate node, so we'll cache it.
56
+ return os.getpid()
57
+
58
+
59
+ def make_snapshot(
60
+ node_type: type[ir.IR],
61
+ frames: Sequence[cudf_polars.containers.DataFrame],
62
+ extra: dict[str, Any] | None = None,
63
+ *,
64
+ pid: int,
65
+ device_handle: Any | None = None,
66
+ phase: Literal["input", "output"] = "input",
67
+ ) -> dict: # pragma: no cover; requires CUDF_POLARS_LOG_TRACES=1
68
+ """
69
+ Collect statistics about the evaluation of an IR node.
70
+
71
+ Parameters
72
+ ----------
73
+ node_type
74
+ The type of the IR node.
75
+ frames
76
+ The list of DataFrames to capture information for. For ``phase="input"``,
77
+ this is typically the dataframes passed to ``IR.do_evaluate``. For
78
+ ``phase="output"``, this is typically the DataFrame returned from
79
+ ``IR.do_evaluate``.
80
+ extra
81
+ Extra information to log.
82
+ pid
83
+ The ID of the current process. Used for NVML memory usage.
84
+ device_handle
85
+ The pynvml device handle. Used for NVML memory usage.
86
+ phase
87
+ The phase of the evaluation. Either "input" or "output".
88
+ """
89
+ ir_name = node_type.__name__
90
+
91
+ d: dict[str, Any] = {
92
+ "type": ir_name,
93
+ }
94
+
95
+ if LOG_DATAFRAMES:
96
+ d.update(
97
+ {
98
+ f"count_frames_{phase}": len(frames),
99
+ f"frames_{phase}": [
100
+ {
101
+ "shape": frame.table.shape(),
102
+ "size": sum(
103
+ col.device_buffer_size() for col in frame.table.columns()
104
+ ),
105
+ }
106
+ for frame in frames
107
+ ],
108
+ }
109
+ )
110
+ d[f"total_bytes_{phase}"] = sum(x["size"] for x in d[f"frames_{phase}"])
111
+
112
+ if LOG_MEMORY:
113
+ stats = rmm.statistics.get_statistics()
114
+ if stats:
115
+ d.update(
116
+ {
117
+ f"rmm_current_bytes_{phase}": stats.current_bytes,
118
+ f"rmm_current_count_{phase}": stats.current_count,
119
+ f"rmm_peak_bytes_{phase}": stats.peak_bytes,
120
+ f"rmm_peak_count_{phase}": stats.peak_count,
121
+ f"rmm_total_bytes_{phase}": stats.total_bytes,
122
+ f"rmm_total_count_{phase}": stats.total_count,
123
+ }
124
+ )
125
+
126
+ if device_handle is not None:
127
+ processes = pynvml.nvmlDeviceGetComputeRunningProcesses(device_handle)
128
+ for proc in processes:
129
+ if proc.pid == pid:
130
+ d[f"nvml_current_bytes_{phase}"] = proc.usedGpuMemory
131
+ break
132
+ if extra:
133
+ d.update(extra)
134
+
135
+ return d
136
+
137
+
138
+ P = ParamSpec("P")
139
+
140
+
141
+ def log_do_evaluate(
142
+ func: Callable[Concatenate[type[ir.IR], P], cudf_polars.containers.DataFrame],
143
+ ) -> Callable[Concatenate[type[ir.IR], P], cudf_polars.containers.DataFrame]:
144
+ """
145
+ Decorator for an ``IR.do_evaluate`` method that logs information before and after evaluation.
146
+
147
+ Parameters
148
+ ----------
149
+ func
150
+ The ``IR.do_evaluate`` method to wrap.
151
+ """
152
+ if not LOG_TRACES:
153
+ return func
154
+ else: # pragma: no cover; requires CUDF_POLARS_LOG_TRACES=1
155
+
156
+ @functools.wraps(func)
157
+ def wrapper(
158
+ cls: type[ir.IR],
159
+ *args: P.args,
160
+ **kwargs: P.kwargs,
161
+ ) -> cudf_polars.containers.DataFrame:
162
+ # do this just once
163
+ pynvml.nvmlInit()
164
+ maybe_handle = get_device_handle()
165
+ pid = _getpid()
166
+ log = structlog.get_logger()
167
+
168
+ # By convention, all non-dataframe arguments (non_child) come first.
169
+ # Anything remaining is a dataframe, except for 'context' kwarg.
170
+ frames: list[cudf_polars.containers.DataFrame] = (
171
+ list(args) + [v for k, v in kwargs.items() if k != "context"]
172
+ )[len(cls._non_child) :] # type: ignore[assignment]
173
+
174
+ before_start = time.monotonic_ns()
175
+ before = make_snapshot(
176
+ cls, frames, phase="input", device_handle=maybe_handle, pid=pid
177
+ )
178
+ before_end = time.monotonic_ns()
179
+
180
+ # The decorator preserves the exact signature of the original do_evaluate method.
181
+ # Each IR.do_evaluate method is a classmethod that takes the IR class as first
182
+ # argument, followed by the method-specific arguments, and returns a DataFrame.
183
+
184
+ start = time.monotonic_ns()
185
+ result = func(cls, *args, **kwargs)
186
+ stop = time.monotonic_ns()
187
+
188
+ after_start = time.monotonic_ns()
189
+ after = make_snapshot(
190
+ cls,
191
+ [result],
192
+ phase="output",
193
+ extra={"start": start, "stop": stop},
194
+ device_handle=maybe_handle,
195
+ pid=pid,
196
+ )
197
+ after_end = time.monotonic_ns()
198
+ record = (
199
+ before
200
+ | after
201
+ | {
202
+ "overhead_duration": (before_end - before_start)
203
+ + (after_end - after_start)
204
+ }
205
+ )
206
+ log.info("Execute IR", **record)
207
+
208
+ return result
209
+
210
+ return wrapper