cudf-polars-cu13 25.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. cudf_polars/GIT_COMMIT +1 -0
  2. cudf_polars/VERSION +1 -0
  3. cudf_polars/__init__.py +28 -0
  4. cudf_polars/_version.py +21 -0
  5. cudf_polars/callback.py +318 -0
  6. cudf_polars/containers/__init__.py +13 -0
  7. cudf_polars/containers/column.py +495 -0
  8. cudf_polars/containers/dataframe.py +361 -0
  9. cudf_polars/containers/datatype.py +137 -0
  10. cudf_polars/dsl/__init__.py +8 -0
  11. cudf_polars/dsl/expr.py +66 -0
  12. cudf_polars/dsl/expressions/__init__.py +8 -0
  13. cudf_polars/dsl/expressions/aggregation.py +226 -0
  14. cudf_polars/dsl/expressions/base.py +272 -0
  15. cudf_polars/dsl/expressions/binaryop.py +120 -0
  16. cudf_polars/dsl/expressions/boolean.py +326 -0
  17. cudf_polars/dsl/expressions/datetime.py +271 -0
  18. cudf_polars/dsl/expressions/literal.py +97 -0
  19. cudf_polars/dsl/expressions/rolling.py +643 -0
  20. cudf_polars/dsl/expressions/selection.py +74 -0
  21. cudf_polars/dsl/expressions/slicing.py +46 -0
  22. cudf_polars/dsl/expressions/sorting.py +85 -0
  23. cudf_polars/dsl/expressions/string.py +1002 -0
  24. cudf_polars/dsl/expressions/struct.py +137 -0
  25. cudf_polars/dsl/expressions/ternary.py +49 -0
  26. cudf_polars/dsl/expressions/unary.py +517 -0
  27. cudf_polars/dsl/ir.py +2607 -0
  28. cudf_polars/dsl/nodebase.py +164 -0
  29. cudf_polars/dsl/to_ast.py +359 -0
  30. cudf_polars/dsl/tracing.py +16 -0
  31. cudf_polars/dsl/translate.py +939 -0
  32. cudf_polars/dsl/traversal.py +224 -0
  33. cudf_polars/dsl/utils/__init__.py +8 -0
  34. cudf_polars/dsl/utils/aggregations.py +481 -0
  35. cudf_polars/dsl/utils/groupby.py +98 -0
  36. cudf_polars/dsl/utils/naming.py +34 -0
  37. cudf_polars/dsl/utils/replace.py +61 -0
  38. cudf_polars/dsl/utils/reshape.py +74 -0
  39. cudf_polars/dsl/utils/rolling.py +121 -0
  40. cudf_polars/dsl/utils/windows.py +192 -0
  41. cudf_polars/experimental/__init__.py +8 -0
  42. cudf_polars/experimental/base.py +386 -0
  43. cudf_polars/experimental/benchmarks/__init__.py +4 -0
  44. cudf_polars/experimental/benchmarks/pdsds.py +220 -0
  45. cudf_polars/experimental/benchmarks/pdsds_queries/__init__.py +4 -0
  46. cudf_polars/experimental/benchmarks/pdsds_queries/q1.py +88 -0
  47. cudf_polars/experimental/benchmarks/pdsds_queries/q10.py +225 -0
  48. cudf_polars/experimental/benchmarks/pdsds_queries/q2.py +244 -0
  49. cudf_polars/experimental/benchmarks/pdsds_queries/q3.py +65 -0
  50. cudf_polars/experimental/benchmarks/pdsds_queries/q4.py +359 -0
  51. cudf_polars/experimental/benchmarks/pdsds_queries/q5.py +462 -0
  52. cudf_polars/experimental/benchmarks/pdsds_queries/q6.py +92 -0
  53. cudf_polars/experimental/benchmarks/pdsds_queries/q7.py +79 -0
  54. cudf_polars/experimental/benchmarks/pdsds_queries/q8.py +524 -0
  55. cudf_polars/experimental/benchmarks/pdsds_queries/q9.py +137 -0
  56. cudf_polars/experimental/benchmarks/pdsh.py +814 -0
  57. cudf_polars/experimental/benchmarks/utils.py +832 -0
  58. cudf_polars/experimental/dask_registers.py +200 -0
  59. cudf_polars/experimental/dispatch.py +156 -0
  60. cudf_polars/experimental/distinct.py +197 -0
  61. cudf_polars/experimental/explain.py +157 -0
  62. cudf_polars/experimental/expressions.py +590 -0
  63. cudf_polars/experimental/groupby.py +327 -0
  64. cudf_polars/experimental/io.py +943 -0
  65. cudf_polars/experimental/join.py +391 -0
  66. cudf_polars/experimental/parallel.py +423 -0
  67. cudf_polars/experimental/repartition.py +69 -0
  68. cudf_polars/experimental/scheduler.py +155 -0
  69. cudf_polars/experimental/select.py +188 -0
  70. cudf_polars/experimental/shuffle.py +354 -0
  71. cudf_polars/experimental/sort.py +609 -0
  72. cudf_polars/experimental/spilling.py +151 -0
  73. cudf_polars/experimental/statistics.py +795 -0
  74. cudf_polars/experimental/utils.py +169 -0
  75. cudf_polars/py.typed +0 -0
  76. cudf_polars/testing/__init__.py +8 -0
  77. cudf_polars/testing/asserts.py +448 -0
  78. cudf_polars/testing/io.py +122 -0
  79. cudf_polars/testing/plugin.py +236 -0
  80. cudf_polars/typing/__init__.py +219 -0
  81. cudf_polars/utils/__init__.py +8 -0
  82. cudf_polars/utils/config.py +741 -0
  83. cudf_polars/utils/conversion.py +40 -0
  84. cudf_polars/utils/dtypes.py +118 -0
  85. cudf_polars/utils/sorting.py +53 -0
  86. cudf_polars/utils/timer.py +39 -0
  87. cudf_polars/utils/versions.py +27 -0
  88. cudf_polars_cu13-25.10.0.dist-info/METADATA +136 -0
  89. cudf_polars_cu13-25.10.0.dist-info/RECORD +92 -0
  90. cudf_polars_cu13-25.10.0.dist-info/WHEEL +5 -0
  91. cudf_polars_cu13-25.10.0.dist-info/licenses/LICENSE +201 -0
  92. cudf_polars_cu13-25.10.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,164 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Base class for IR nodes, and utilities."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING, Any, ClassVar, Generic, TypeVar
9
+
10
+ if TYPE_CHECKING:
11
+ from collections.abc import Generator, Hashable, Sequence
12
+
13
+ from typing_extensions import Self
14
+
15
+
16
+ __all__: list[str] = ["Node"]
17
+
18
+ T = TypeVar("T", bound="Node[Any]")
19
+
20
+
21
+ class Node(Generic[T]):
22
+ """
23
+ An abstract node type.
24
+
25
+ Nodes are immutable!
26
+
27
+ This contains a (potentially empty) tuple of child nodes,
28
+ along with non-child data. For uniform reconstruction and
29
+ implementation of hashing and equality schemes, child classes need
30
+ to provide a certain amount of metadata when they are defined.
31
+ Specifically, the ``_non_child`` attribute must list, in-order,
32
+ the names of the slots that are passed to the constructor. The
33
+ constructor must take arguments in the order ``(*_non_child,
34
+ *children).``
35
+ """
36
+
37
+ __slots__ = ("_hash_value", "_repr_value", "children")
38
+ _hash_value: int
39
+ _repr_value: str
40
+ children: tuple[T, ...]
41
+ _non_child: ClassVar[tuple[str, ...]] = ()
42
+
43
+ def _ctor_arguments(self, children: Sequence[T]) -> Sequence[Any | T]:
44
+ return (*(getattr(self, attr) for attr in self._non_child), *children)
45
+
46
+ def reconstruct(self, children: Sequence[T]) -> Self:
47
+ """
48
+ Rebuild this node with new children.
49
+
50
+ Parameters
51
+ ----------
52
+ children
53
+ New children
54
+
55
+ Returns
56
+ -------
57
+ New node with new children. Non-child data is shared with the input.
58
+ """
59
+ return type(self)(*self._ctor_arguments(children))
60
+
61
+ def __reduce__(self) -> tuple[Any, ...]:
62
+ """Pickle a Node object."""
63
+ return (
64
+ type(self),
65
+ self._ctor_arguments(self.children),
66
+ )
67
+
68
+ def get_hashable(self) -> Hashable:
69
+ """
70
+ Return a hashable object for the node.
71
+
72
+ Returns
73
+ -------
74
+ Hashable object.
75
+
76
+ Notes
77
+ -----
78
+ This method is used by the :meth:`__hash__` implementation
79
+ (which does caching). If your node type needs special-case
80
+ handling for some of its attributes, override this method, not
81
+ :meth:`__hash__`.
82
+ """
83
+ return (type(self), self._ctor_arguments(self.children))
84
+
85
+ def __hash__(self) -> int:
86
+ """
87
+ Hash of an expression with caching.
88
+
89
+ See Also
90
+ --------
91
+ get_hashable
92
+ """
93
+ try:
94
+ return self._hash_value
95
+ except AttributeError:
96
+ self._hash_value = hash(self.get_hashable())
97
+ return self._hash_value
98
+
99
+ def is_equal(self, other: Self) -> bool:
100
+ """
101
+ Equality of two nodes of equal type.
102
+
103
+ Override this in subclasses, rather than :meth:`__eq__`.
104
+
105
+ Parameters
106
+ ----------
107
+ other
108
+ object of same type to compare to.
109
+
110
+ Notes
111
+ -----
112
+ Since nodes are immutable, this does common subexpression
113
+ elimination when two nodes are determined to be equal.
114
+
115
+ :meth:`__eq__` handles the case where the objects being
116
+ compared are not of the same type, so in this method, we only
117
+ need to implement equality of equal types.
118
+
119
+ Returns
120
+ -------
121
+ True if the two nodes are equal, false otherwise.
122
+ """
123
+ if self is other:
124
+ return True
125
+ result = self._ctor_arguments(self.children) == other._ctor_arguments(
126
+ other.children
127
+ )
128
+ # Eager CSE for nodes that match.
129
+ if result:
130
+ self.children = other.children
131
+ return result
132
+
133
+ def __eq__(self, other: Any) -> bool:
134
+ """
135
+ Equality of expressions.
136
+
137
+ See Also
138
+ --------
139
+ is_equal
140
+ """
141
+ if type(self) is not type(other) or hash(self) != hash(other):
142
+ return False
143
+ else:
144
+ return self.is_equal(other)
145
+
146
+ def __ne__(self, other: Any) -> bool:
147
+ """Inequality of expressions."""
148
+ return not self.__eq__(other)
149
+
150
+ def __repr__(self) -> str:
151
+ """String representation of an expression with caching."""
152
+ try:
153
+ return self._repr_value
154
+ except AttributeError:
155
+ args = ", ".join(f"{arg!r}" for arg in self._ctor_arguments(self.children))
156
+ self._repr_value = f"{type(self).__name__}({args})"
157
+ return self._repr_value
158
+
159
+ def __rich_repr__(self) -> Generator[Any, None, None]:
160
+ """Formatting for rich.pretty.pprint."""
161
+ for attr in self._non_child:
162
+ yield attr, getattr(self, attr)
163
+
164
+ yield from self.children
@@ -0,0 +1,359 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Conversion of expression nodes to libcudf AST nodes."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from functools import partial, reduce, singledispatch
9
+ from typing import TYPE_CHECKING, TypeAlias, TypedDict
10
+
11
+ import pylibcudf as plc
12
+ from pylibcudf import expressions as plc_expr
13
+
14
+ from cudf_polars.containers import DataType
15
+ from cudf_polars.dsl import expr
16
+ from cudf_polars.dsl.traversal import CachingVisitor, reuse_if_unchanged
17
+ from cudf_polars.typing import GenericTransformer
18
+
19
+ if TYPE_CHECKING:
20
+ from collections.abc import Mapping
21
+
22
+
23
+ # Can't merge these op-mapping dictionaries because scoped enum values
24
+ # are exposed by cython with equality/hash based one their underlying
25
+ # representation type. So in a dict they are just treated as integers.
26
+ BINOP_TO_ASTOP = {
27
+ plc.binaryop.BinaryOperator.EQUAL: plc_expr.ASTOperator.EQUAL,
28
+ plc.binaryop.BinaryOperator.NULL_EQUALS: plc_expr.ASTOperator.NULL_EQUAL,
29
+ plc.binaryop.BinaryOperator.NOT_EQUAL: plc_expr.ASTOperator.NOT_EQUAL,
30
+ plc.binaryop.BinaryOperator.LESS: plc_expr.ASTOperator.LESS,
31
+ plc.binaryop.BinaryOperator.LESS_EQUAL: plc_expr.ASTOperator.LESS_EQUAL,
32
+ plc.binaryop.BinaryOperator.GREATER: plc_expr.ASTOperator.GREATER,
33
+ plc.binaryop.BinaryOperator.GREATER_EQUAL: plc_expr.ASTOperator.GREATER_EQUAL,
34
+ plc.binaryop.BinaryOperator.ADD: plc_expr.ASTOperator.ADD,
35
+ plc.binaryop.BinaryOperator.SUB: plc_expr.ASTOperator.SUB,
36
+ plc.binaryop.BinaryOperator.MUL: plc_expr.ASTOperator.MUL,
37
+ plc.binaryop.BinaryOperator.DIV: plc_expr.ASTOperator.DIV,
38
+ plc.binaryop.BinaryOperator.TRUE_DIV: plc_expr.ASTOperator.TRUE_DIV,
39
+ plc.binaryop.BinaryOperator.FLOOR_DIV: plc_expr.ASTOperator.FLOOR_DIV,
40
+ plc.binaryop.BinaryOperator.PYMOD: plc_expr.ASTOperator.PYMOD,
41
+ plc.binaryop.BinaryOperator.BITWISE_AND: plc_expr.ASTOperator.BITWISE_AND,
42
+ plc.binaryop.BinaryOperator.BITWISE_OR: plc_expr.ASTOperator.BITWISE_OR,
43
+ plc.binaryop.BinaryOperator.BITWISE_XOR: plc_expr.ASTOperator.BITWISE_XOR,
44
+ plc.binaryop.BinaryOperator.LOGICAL_AND: plc_expr.ASTOperator.LOGICAL_AND,
45
+ plc.binaryop.BinaryOperator.LOGICAL_OR: plc_expr.ASTOperator.LOGICAL_OR,
46
+ plc.binaryop.BinaryOperator.NULL_LOGICAL_AND: plc_expr.ASTOperator.NULL_LOGICAL_AND,
47
+ plc.binaryop.BinaryOperator.NULL_LOGICAL_OR: plc_expr.ASTOperator.NULL_LOGICAL_OR,
48
+ }
49
+
50
+ UOP_TO_ASTOP = {
51
+ plc.unary.UnaryOperator.SIN: plc_expr.ASTOperator.SIN,
52
+ plc.unary.UnaryOperator.COS: plc_expr.ASTOperator.COS,
53
+ plc.unary.UnaryOperator.TAN: plc_expr.ASTOperator.TAN,
54
+ plc.unary.UnaryOperator.ARCSIN: plc_expr.ASTOperator.ARCSIN,
55
+ plc.unary.UnaryOperator.ARCCOS: plc_expr.ASTOperator.ARCCOS,
56
+ plc.unary.UnaryOperator.ARCTAN: plc_expr.ASTOperator.ARCTAN,
57
+ plc.unary.UnaryOperator.SINH: plc_expr.ASTOperator.SINH,
58
+ plc.unary.UnaryOperator.COSH: plc_expr.ASTOperator.COSH,
59
+ plc.unary.UnaryOperator.TANH: plc_expr.ASTOperator.TANH,
60
+ plc.unary.UnaryOperator.ARCSINH: plc_expr.ASTOperator.ARCSINH,
61
+ plc.unary.UnaryOperator.ARCCOSH: plc_expr.ASTOperator.ARCCOSH,
62
+ plc.unary.UnaryOperator.ARCTANH: plc_expr.ASTOperator.ARCTANH,
63
+ plc.unary.UnaryOperator.EXP: plc_expr.ASTOperator.EXP,
64
+ plc.unary.UnaryOperator.LOG: plc_expr.ASTOperator.LOG,
65
+ plc.unary.UnaryOperator.SQRT: plc_expr.ASTOperator.SQRT,
66
+ plc.unary.UnaryOperator.CBRT: plc_expr.ASTOperator.CBRT,
67
+ plc.unary.UnaryOperator.CEIL: plc_expr.ASTOperator.CEIL,
68
+ plc.unary.UnaryOperator.FLOOR: plc_expr.ASTOperator.FLOOR,
69
+ plc.unary.UnaryOperator.ABS: plc_expr.ASTOperator.ABS,
70
+ plc.unary.UnaryOperator.RINT: plc_expr.ASTOperator.RINT,
71
+ plc.unary.UnaryOperator.BIT_INVERT: plc_expr.ASTOperator.BIT_INVERT,
72
+ plc.unary.UnaryOperator.NOT: plc_expr.ASTOperator.NOT,
73
+ }
74
+
75
+ SUPPORTED_STATISTICS_BINOPS = {
76
+ plc.binaryop.BinaryOperator.EQUAL,
77
+ plc.binaryop.BinaryOperator.NOT_EQUAL,
78
+ plc.binaryop.BinaryOperator.LESS,
79
+ plc.binaryop.BinaryOperator.LESS_EQUAL,
80
+ plc.binaryop.BinaryOperator.GREATER,
81
+ plc.binaryop.BinaryOperator.GREATER_EQUAL,
82
+ }
83
+
84
+ REVERSED_COMPARISON = {
85
+ plc.binaryop.BinaryOperator.EQUAL: plc.binaryop.BinaryOperator.EQUAL,
86
+ plc.binaryop.BinaryOperator.NOT_EQUAL: plc.binaryop.BinaryOperator.NOT_EQUAL,
87
+ plc.binaryop.BinaryOperator.LESS: plc.binaryop.BinaryOperator.GREATER,
88
+ plc.binaryop.BinaryOperator.LESS_EQUAL: plc.binaryop.BinaryOperator.GREATER_EQUAL,
89
+ plc.binaryop.BinaryOperator.GREATER: plc.binaryop.BinaryOperator.LESS,
90
+ plc.binaryop.BinaryOperator.GREATER_EQUAL: plc.binaryop.BinaryOperator.LESS_EQUAL,
91
+ }
92
+
93
+
94
+ class ASTState(TypedDict):
95
+ """
96
+ State for AST transformations.
97
+
98
+ Parameters
99
+ ----------
100
+ for_parquet
101
+ Indicator for whether this transformation should provide an expression
102
+ suitable for use in parquet filters.
103
+ """
104
+
105
+ for_parquet: bool
106
+
107
+
108
+ class ExprTransformerState(TypedDict):
109
+ """
110
+ State used for AST transformation when inserting column references.
111
+
112
+ Parameters
113
+ ----------
114
+ name_to_index
115
+ Mapping from column names to column indices in the table
116
+ eventually used for evaluation.
117
+ table_ref
118
+ pylibcudf `TableReference` indicating whether column
119
+ references are coming from the left or right table.
120
+ """
121
+
122
+ name_to_index: Mapping[str, int]
123
+ table_ref: plc.expressions.TableReference
124
+
125
+
126
+ Transformer: TypeAlias = GenericTransformer[expr.Expr, plc_expr.Expression, ASTState]
127
+ ExprTransformer: TypeAlias = GenericTransformer[
128
+ expr.Expr, expr.Expr, ExprTransformerState
129
+ ]
130
+ """Protocol for transformation of Expr nodes."""
131
+
132
+
133
+ @singledispatch
134
+ def _to_ast(node: expr.Expr, self: Transformer) -> plc_expr.Expression:
135
+ """
136
+ Translate an expression to a pylibcudf Expression.
137
+
138
+ Parameters
139
+ ----------
140
+ node
141
+ Expression to translate.
142
+ self
143
+ Recursive transformer. The state dictionary is an instance of
144
+ :class:`ASTState`.
145
+
146
+ Returns
147
+ -------
148
+ pylibcudf Expression.
149
+
150
+ Raises
151
+ ------
152
+ NotImplementedError or KeyError if the expression cannot be translated.
153
+ """
154
+ raise NotImplementedError(f"Unhandled expression type {type(node)}")
155
+
156
+
157
+ @_to_ast.register
158
+ def _(node: expr.Col, self: Transformer) -> plc_expr.Expression:
159
+ if self.state["for_parquet"]:
160
+ return plc_expr.ColumnNameReference(node.name)
161
+ raise TypeError("Should always be wrapped in a ColRef node before translation")
162
+
163
+
164
+ @_to_ast.register
165
+ def _(node: expr.ColRef, self: Transformer) -> plc_expr.Expression:
166
+ if self.state["for_parquet"]:
167
+ raise TypeError("Not expecting ColRef node in parquet filter")
168
+ return plc_expr.ColumnReference(node.index, node.table_ref)
169
+
170
+
171
+ @_to_ast.register
172
+ def _(node: expr.Literal, self: Transformer) -> plc_expr.Expression:
173
+ return plc_expr.Literal(plc.Scalar.from_py(node.value, node.dtype.plc))
174
+
175
+
176
+ @_to_ast.register
177
+ def _(node: expr.BinOp, self: Transformer) -> plc_expr.Expression:
178
+ if node.op == plc.binaryop.BinaryOperator.NULL_NOT_EQUALS:
179
+ return plc_expr.Operation(
180
+ plc_expr.ASTOperator.NOT,
181
+ self(
182
+ # Reconstruct and apply, rather than directly
183
+ # constructing the right expression so we get the
184
+ # handling of parquet special cases for free.
185
+ expr.BinOp(
186
+ node.dtype, plc.binaryop.BinaryOperator.NULL_EQUALS, *node.children
187
+ )
188
+ ),
189
+ )
190
+ if self.state["for_parquet"]:
191
+ op1_col, op2_col = (isinstance(op, expr.Col) for op in node.children)
192
+ if op1_col ^ op2_col:
193
+ op = node.op
194
+ if op not in SUPPORTED_STATISTICS_BINOPS:
195
+ raise NotImplementedError(
196
+ f"Parquet filter binop with column doesn't support {node.op!r}"
197
+ )
198
+ op1, op2 = node.children
199
+ if op2_col:
200
+ (op1, op2) = (op2, op1)
201
+ op = REVERSED_COMPARISON[op]
202
+ if not isinstance(op2, expr.Literal):
203
+ raise NotImplementedError(
204
+ "Parquet filter binops must have form 'col binop literal'"
205
+ )
206
+ return plc_expr.Operation(BINOP_TO_ASTOP[op], self(op1), self(op2))
207
+ elif op1_col and op2_col:
208
+ raise NotImplementedError(
209
+ "Parquet filter binops must have one column reference not two"
210
+ )
211
+ return plc_expr.Operation(BINOP_TO_ASTOP[node.op], *map(self, node.children))
212
+
213
+
214
+ @_to_ast.register
215
+ def _(node: expr.BooleanFunction, self: Transformer) -> plc_expr.Expression:
216
+ if node.name is expr.BooleanFunction.Name.IsIn:
217
+ needles, haystack = node.children
218
+ if isinstance(haystack, expr.LiteralColumn) and len(haystack.value) < 16:
219
+ # 16 is an arbitrary limit
220
+ needle_ref = self(needles)
221
+ if haystack.dtype.id() == plc.TypeId.LIST:
222
+ # Because we originally translated pl_expr.Literal with a list scalar
223
+ # to a expr.LiteralColumn, so the actual type is in the inner type
224
+ #
225
+ # the type-ignore is safe because the for plc.TypeID.LIST, we know
226
+ # we have a polars.List type, which has an inner attribute.
227
+ plc_dtype = DataType(haystack.dtype.polars.inner).plc # type: ignore[attr-defined]
228
+ else:
229
+ plc_dtype = haystack.dtype.plc # pragma: no cover
230
+ values = (
231
+ plc_expr.Literal(plc.Scalar.from_py(val, plc_dtype))
232
+ for val in haystack.value
233
+ )
234
+ return reduce(
235
+ partial(plc_expr.Operation, plc_expr.ASTOperator.LOGICAL_OR),
236
+ (
237
+ plc_expr.Operation(plc_expr.ASTOperator.EQUAL, needle_ref, value)
238
+ for value in values
239
+ ),
240
+ )
241
+ if self.state["for_parquet"] and isinstance(node.children[0], expr.Col):
242
+ raise NotImplementedError(
243
+ f"Parquet filters don't support {node.name} on columns"
244
+ )
245
+ if node.name is expr.BooleanFunction.Name.IsNull:
246
+ return plc_expr.Operation(plc_expr.ASTOperator.IS_NULL, self(node.children[0]))
247
+ elif node.name is expr.BooleanFunction.Name.IsNotNull:
248
+ return plc_expr.Operation(
249
+ plc_expr.ASTOperator.NOT,
250
+ plc_expr.Operation(plc_expr.ASTOperator.IS_NULL, self(node.children[0])),
251
+ )
252
+ elif node.name is expr.BooleanFunction.Name.Not:
253
+ return plc_expr.Operation(plc_expr.ASTOperator.NOT, self(node.children[0]))
254
+ raise NotImplementedError(f"AST conversion does not support {node.name}")
255
+
256
+
257
+ @_to_ast.register
258
+ def _(node: expr.UnaryFunction, self: Transformer) -> plc_expr.Expression:
259
+ if isinstance(node.children[0], expr.Col) and self.state["for_parquet"]:
260
+ raise NotImplementedError(
261
+ "Parquet filters don't support {node.name} on columns"
262
+ )
263
+ return plc_expr.Operation(
264
+ UOP_TO_ASTOP[node._OP_MAPPING[node.name]], self(node.children[0])
265
+ )
266
+
267
+
268
+ def to_parquet_filter(node: expr.Expr) -> plc_expr.Expression | None:
269
+ """
270
+ Convert an expression to libcudf AST nodes suitable for parquet filtering.
271
+
272
+ Parameters
273
+ ----------
274
+ node
275
+ Expression to convert.
276
+
277
+ Returns
278
+ -------
279
+ pylibcudf Expression if conversion is possible, otherwise None.
280
+ """
281
+ mapper: Transformer = CachingVisitor(_to_ast, state={"for_parquet": True})
282
+ try:
283
+ return mapper(node)
284
+ except (KeyError, NotImplementedError):
285
+ return None
286
+
287
+
288
+ def to_ast(node: expr.Expr) -> plc_expr.Expression | None:
289
+ """
290
+ Convert an expression to libcudf AST nodes suitable for compute_column.
291
+
292
+ Parameters
293
+ ----------
294
+ node
295
+ Expression to convert.
296
+
297
+ Notes
298
+ -----
299
+ `Col` nodes must always be wrapped in `TableRef` nodes when
300
+ converting to an ast expression so that their table reference and
301
+ index are provided.
302
+
303
+ Returns
304
+ -------
305
+ pylibcudf Expression if conversion is possible, otherwise None.
306
+ """
307
+ mapper: Transformer = CachingVisitor(_to_ast, state={"for_parquet": False})
308
+ try:
309
+ return mapper(node)
310
+ except (KeyError, NotImplementedError):
311
+ return None
312
+
313
+
314
+ def _insert_colrefs(node: expr.Expr, rec: ExprTransformer) -> expr.Expr:
315
+ if isinstance(node, expr.Col):
316
+ return expr.ColRef(
317
+ node.dtype,
318
+ rec.state["name_to_index"][node.name],
319
+ rec.state["table_ref"],
320
+ node,
321
+ )
322
+ return reuse_if_unchanged(node, rec)
323
+
324
+
325
+ def insert_colrefs(
326
+ node: expr.Expr,
327
+ *,
328
+ table_ref: plc.expressions.TableReference,
329
+ name_to_index: Mapping[str, int],
330
+ ) -> expr.Expr:
331
+ """
332
+ Insert column references into an expression before conversion to libcudf AST.
333
+
334
+ Parameters
335
+ ----------
336
+ node
337
+ Expression to insert references into.
338
+ table_ref
339
+ pylibcudf `TableReference` indicating whether column
340
+ references are coming from the left or right table.
341
+ name_to_index:
342
+ Mapping from column names to column indices in the table
343
+ eventually used for evaluation.
344
+
345
+ Notes
346
+ -----
347
+ All column references are wrapped in the same, singular, table
348
+ reference, so this function relies on the expression only
349
+ containing column references from a single table.
350
+
351
+ Returns
352
+ -------
353
+ New expression with column references inserted.
354
+ """
355
+ mapper: ExprTransformer = CachingVisitor(
356
+ _insert_colrefs,
357
+ state={"name_to_index": name_to_index, "table_ref": table_ref},
358
+ )
359
+ return mapper(node)
@@ -0,0 +1,16 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Utilities for tracing and monitoring IR execution."""
5
+
6
+ from __future__ import annotations
7
+
8
+ import functools
9
+
10
+ import nvtx
11
+
12
+ CUDF_POLARS_NVTX_DOMAIN = "cudf_polars"
13
+
14
+ nvtx_annotate_cudf_polars = functools.partial(
15
+ nvtx.annotate, domain=CUDF_POLARS_NVTX_DOMAIN
16
+ )