cudf-polars-cu13 25.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. cudf_polars/GIT_COMMIT +1 -0
  2. cudf_polars/VERSION +1 -0
  3. cudf_polars/__init__.py +28 -0
  4. cudf_polars/_version.py +21 -0
  5. cudf_polars/callback.py +318 -0
  6. cudf_polars/containers/__init__.py +13 -0
  7. cudf_polars/containers/column.py +495 -0
  8. cudf_polars/containers/dataframe.py +361 -0
  9. cudf_polars/containers/datatype.py +137 -0
  10. cudf_polars/dsl/__init__.py +8 -0
  11. cudf_polars/dsl/expr.py +66 -0
  12. cudf_polars/dsl/expressions/__init__.py +8 -0
  13. cudf_polars/dsl/expressions/aggregation.py +226 -0
  14. cudf_polars/dsl/expressions/base.py +272 -0
  15. cudf_polars/dsl/expressions/binaryop.py +120 -0
  16. cudf_polars/dsl/expressions/boolean.py +326 -0
  17. cudf_polars/dsl/expressions/datetime.py +271 -0
  18. cudf_polars/dsl/expressions/literal.py +97 -0
  19. cudf_polars/dsl/expressions/rolling.py +643 -0
  20. cudf_polars/dsl/expressions/selection.py +74 -0
  21. cudf_polars/dsl/expressions/slicing.py +46 -0
  22. cudf_polars/dsl/expressions/sorting.py +85 -0
  23. cudf_polars/dsl/expressions/string.py +1002 -0
  24. cudf_polars/dsl/expressions/struct.py +137 -0
  25. cudf_polars/dsl/expressions/ternary.py +49 -0
  26. cudf_polars/dsl/expressions/unary.py +517 -0
  27. cudf_polars/dsl/ir.py +2607 -0
  28. cudf_polars/dsl/nodebase.py +164 -0
  29. cudf_polars/dsl/to_ast.py +359 -0
  30. cudf_polars/dsl/tracing.py +16 -0
  31. cudf_polars/dsl/translate.py +939 -0
  32. cudf_polars/dsl/traversal.py +224 -0
  33. cudf_polars/dsl/utils/__init__.py +8 -0
  34. cudf_polars/dsl/utils/aggregations.py +481 -0
  35. cudf_polars/dsl/utils/groupby.py +98 -0
  36. cudf_polars/dsl/utils/naming.py +34 -0
  37. cudf_polars/dsl/utils/replace.py +61 -0
  38. cudf_polars/dsl/utils/reshape.py +74 -0
  39. cudf_polars/dsl/utils/rolling.py +121 -0
  40. cudf_polars/dsl/utils/windows.py +192 -0
  41. cudf_polars/experimental/__init__.py +8 -0
  42. cudf_polars/experimental/base.py +386 -0
  43. cudf_polars/experimental/benchmarks/__init__.py +4 -0
  44. cudf_polars/experimental/benchmarks/pdsds.py +220 -0
  45. cudf_polars/experimental/benchmarks/pdsds_queries/__init__.py +4 -0
  46. cudf_polars/experimental/benchmarks/pdsds_queries/q1.py +88 -0
  47. cudf_polars/experimental/benchmarks/pdsds_queries/q10.py +225 -0
  48. cudf_polars/experimental/benchmarks/pdsds_queries/q2.py +244 -0
  49. cudf_polars/experimental/benchmarks/pdsds_queries/q3.py +65 -0
  50. cudf_polars/experimental/benchmarks/pdsds_queries/q4.py +359 -0
  51. cudf_polars/experimental/benchmarks/pdsds_queries/q5.py +462 -0
  52. cudf_polars/experimental/benchmarks/pdsds_queries/q6.py +92 -0
  53. cudf_polars/experimental/benchmarks/pdsds_queries/q7.py +79 -0
  54. cudf_polars/experimental/benchmarks/pdsds_queries/q8.py +524 -0
  55. cudf_polars/experimental/benchmarks/pdsds_queries/q9.py +137 -0
  56. cudf_polars/experimental/benchmarks/pdsh.py +814 -0
  57. cudf_polars/experimental/benchmarks/utils.py +832 -0
  58. cudf_polars/experimental/dask_registers.py +200 -0
  59. cudf_polars/experimental/dispatch.py +156 -0
  60. cudf_polars/experimental/distinct.py +197 -0
  61. cudf_polars/experimental/explain.py +157 -0
  62. cudf_polars/experimental/expressions.py +590 -0
  63. cudf_polars/experimental/groupby.py +327 -0
  64. cudf_polars/experimental/io.py +943 -0
  65. cudf_polars/experimental/join.py +391 -0
  66. cudf_polars/experimental/parallel.py +423 -0
  67. cudf_polars/experimental/repartition.py +69 -0
  68. cudf_polars/experimental/scheduler.py +155 -0
  69. cudf_polars/experimental/select.py +188 -0
  70. cudf_polars/experimental/shuffle.py +354 -0
  71. cudf_polars/experimental/sort.py +609 -0
  72. cudf_polars/experimental/spilling.py +151 -0
  73. cudf_polars/experimental/statistics.py +795 -0
  74. cudf_polars/experimental/utils.py +169 -0
  75. cudf_polars/py.typed +0 -0
  76. cudf_polars/testing/__init__.py +8 -0
  77. cudf_polars/testing/asserts.py +448 -0
  78. cudf_polars/testing/io.py +122 -0
  79. cudf_polars/testing/plugin.py +236 -0
  80. cudf_polars/typing/__init__.py +219 -0
  81. cudf_polars/utils/__init__.py +8 -0
  82. cudf_polars/utils/config.py +741 -0
  83. cudf_polars/utils/conversion.py +40 -0
  84. cudf_polars/utils/dtypes.py +118 -0
  85. cudf_polars/utils/sorting.py +53 -0
  86. cudf_polars/utils/timer.py +39 -0
  87. cudf_polars/utils/versions.py +27 -0
  88. cudf_polars_cu13-25.10.0.dist-info/METADATA +136 -0
  89. cudf_polars_cu13-25.10.0.dist-info/RECORD +92 -0
  90. cudf_polars_cu13-25.10.0.dist-info/WHEEL +5 -0
  91. cudf_polars_cu13-25.10.0.dist-info/licenses/LICENSE +201 -0
  92. cudf_polars_cu13-25.10.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,98 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Utilities for grouped aggregations."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING
9
+
10
+ import pylibcudf as plc
11
+
12
+ from cudf_polars.dsl import ir
13
+ from cudf_polars.dsl.expressions.base import ExecutionContext
14
+ from cudf_polars.dsl.utils.aggregations import apply_pre_evaluation
15
+ from cudf_polars.dsl.utils.naming import unique_names
16
+
17
+ if TYPE_CHECKING:
18
+ from collections.abc import Sequence
19
+ from typing import Any
20
+
21
+ from cudf_polars.dsl import expr
22
+ from cudf_polars.typing import Schema
23
+
24
+ __all__ = ["rewrite_groupby"]
25
+
26
+
27
+ def rewrite_groupby(
28
+ node: Any,
29
+ schema: Schema,
30
+ keys: Sequence[expr.NamedExpr],
31
+ aggs: Sequence[expr.NamedExpr],
32
+ inp: ir.IR,
33
+ ) -> ir.IR:
34
+ """
35
+ Rewrite a groupby plan node into something we can handle.
36
+
37
+ Parameters
38
+ ----------
39
+ node
40
+ The polars groupby plan node.
41
+ schema
42
+ Schema of the groupby plan node.
43
+ keys
44
+ Grouping keys.
45
+ aggs
46
+ Originally requested aggregations.
47
+ inp
48
+ Input plan node to the groupby.
49
+
50
+ Returns
51
+ -------
52
+ New plan node representing the grouped aggregations.
53
+
54
+ Raises
55
+ ------
56
+ NotImplementedError
57
+ If any of the requested aggregations are unsupported.
58
+
59
+ Notes
60
+ -----
61
+ Since libcudf can only perform grouped aggregations on columns
62
+ (not arbitrary expressions), the approach is to split each
63
+ aggregation into a pre-selection phase (evaluating expressions
64
+ that live within an aggregation), the aggregation phase (now
65
+ acting on columns only), and a post-selection phase (evaluating
66
+ expressions of aggregated results).
67
+
68
+ This does scheme does not permit nested aggregations, so those are
69
+ unsupported.
70
+ """
71
+ if len(aggs) == 0:
72
+ return ir.Distinct(
73
+ schema,
74
+ plc.stream_compaction.DuplicateKeepOption.KEEP_ANY,
75
+ None,
76
+ node.options.slice,
77
+ node.maintain_order,
78
+ ir.Select(schema, keys, True, inp), # noqa: FBT003
79
+ )
80
+
81
+ aggs, group_schema, apply_post_evaluation = apply_pre_evaluation(
82
+ schema,
83
+ keys,
84
+ aggs,
85
+ unique_names(schema.keys()),
86
+ ExecutionContext.GROUPBY,
87
+ )
88
+ # TODO: use Distinct when the partitioned executor supports it if
89
+ # the requested aggregations are empty
90
+ inp = ir.GroupBy(
91
+ group_schema,
92
+ keys,
93
+ aggs,
94
+ node.maintain_order,
95
+ node.options.slice,
96
+ inp,
97
+ )
98
+ return apply_post_evaluation(inp)
@@ -0,0 +1,34 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Name generation utilities."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING
9
+
10
+ if TYPE_CHECKING:
11
+ from collections.abc import Generator, Iterable
12
+
13
+
14
+ __all__ = ["unique_names"]
15
+
16
+
17
+ def unique_names(names: Iterable[str]) -> Generator[str, None, None]:
18
+ """
19
+ Generate unique names relative to some known names.
20
+
21
+ Parameters
22
+ ----------
23
+ names
24
+ Names we should be unique with respect to.
25
+
26
+ Yields
27
+ ------
28
+ Unique names (just using sequence numbers)
29
+ """
30
+ prefix = "_" * max(map(len, names))
31
+ i = 0
32
+ while True:
33
+ yield f"{prefix}{i}"
34
+ i += 1
@@ -0,0 +1,61 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Utilities for replacing nodes in a DAG."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING, Generic
9
+
10
+ from cudf_polars.dsl.traversal import CachingVisitor, reuse_if_unchanged
11
+ from cudf_polars.typing import NodeT, TypedDict
12
+
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Mapping, Sequence
15
+
16
+ from cudf_polars.typing import GenericTransformer
17
+
18
+ __all__ = ["replace"]
19
+
20
+
21
+ class State(Generic[NodeT], TypedDict):
22
+ """
23
+ State used when replacing nodes in expressions.
24
+
25
+ Parameters
26
+ ----------
27
+ replacements
28
+ Mapping from nodes to be replaced to their replacements.
29
+ This state is generic over the type of these nodes.
30
+ """
31
+
32
+ replacements: Mapping[NodeT, NodeT]
33
+
34
+
35
+ def _replace(node: NodeT, fn: GenericTransformer[NodeT, NodeT, State]) -> NodeT:
36
+ try:
37
+ return fn.state["replacements"][node]
38
+ except KeyError:
39
+ return reuse_if_unchanged(node, fn)
40
+
41
+
42
+ def replace(nodes: Sequence[NodeT], replacements: Mapping[NodeT, NodeT]) -> list[NodeT]:
43
+ """
44
+ Replace nodes in expressions.
45
+
46
+ Parameters
47
+ ----------
48
+ nodes
49
+ Sequence of nodes to perform replacements in.
50
+ replacements
51
+ Mapping from nodes to be replaced to their replacements.
52
+
53
+ Returns
54
+ -------
55
+ list
56
+ Of nodes with replacements performed.
57
+ """
58
+ mapper: GenericTransformer[NodeT, NodeT, State] = CachingVisitor(
59
+ _replace, state={"replacements": replacements}
60
+ )
61
+ return [mapper(node) for node in nodes]
@@ -0,0 +1,74 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Utilities for reshaping Columns."""
4
+
5
+ from __future__ import annotations
6
+
7
+ import pylibcudf as plc
8
+
9
+ from cudf_polars.containers import Column
10
+
11
+
12
+ def broadcast(*columns: Column, target_length: int | None = None) -> list[Column]:
13
+ """
14
+ Broadcast a sequence of columns to a common length.
15
+
16
+ Parameters
17
+ ----------
18
+ columns
19
+ Columns to broadcast.
20
+ target_length
21
+ Optional length to broadcast to. If not provided, uses the
22
+ non-unit length of existing columns.
23
+
24
+ Returns
25
+ -------
26
+ List of broadcasted columns all of the same length.
27
+
28
+ Raises
29
+ ------
30
+ RuntimeError
31
+ If broadcasting is not possible.
32
+
33
+ Notes
34
+ -----
35
+ In evaluation of a set of expressions, polars type-puns length-1
36
+ columns with scalars. When we insert these into a DataFrame
37
+ object, we need to ensure they are of equal length. This function
38
+ takes some columns, some of which may be length-1 and ensures that
39
+ all length-1 columns are broadcast to the length of the others.
40
+
41
+ Broadcasting is only possible if the set of lengths of the input
42
+ columns is a subset of ``{1, n}`` for some (fixed) ``n``. If
43
+ ``target_length`` is provided and not all columns are length-1
44
+ (i.e. ``n != 1``), then ``target_length`` must be equal to ``n``.
45
+ """
46
+ if len(columns) == 0:
47
+ return []
48
+ lengths: set[int] = {column.size for column in columns}
49
+ if lengths == {1}:
50
+ if target_length is None:
51
+ return list(columns)
52
+ nrows = target_length
53
+ else:
54
+ try:
55
+ (nrows,) = lengths.difference([1])
56
+ except ValueError as e:
57
+ raise RuntimeError("Mismatching column lengths") from e
58
+ if target_length is not None and nrows != target_length:
59
+ raise RuntimeError(
60
+ f"Cannot broadcast columns of length {nrows=} to {target_length=}"
61
+ )
62
+ return [
63
+ column
64
+ if column.size != 1
65
+ else Column(
66
+ plc.Column.from_scalar(column.obj_scalar, nrows),
67
+ is_sorted=plc.types.Sorted.YES,
68
+ order=plc.types.Order.ASCENDING,
69
+ null_order=plc.types.NullOrder.BEFORE,
70
+ name=column.name,
71
+ dtype=column.dtype,
72
+ )
73
+ for column in columns
74
+ ]
@@ -0,0 +1,121 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Utilities for rolling window aggregations."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING
9
+
10
+ import pylibcudf as plc
11
+
12
+ from cudf_polars.dsl import expr, ir
13
+ from cudf_polars.dsl.expressions.base import ExecutionContext
14
+ from cudf_polars.dsl.utils.aggregations import apply_pre_evaluation
15
+ from cudf_polars.dsl.utils.naming import unique_names
16
+ from cudf_polars.dsl.utils.windows import offsets_to_windows
17
+
18
+ if TYPE_CHECKING:
19
+ from collections.abc import Sequence
20
+ from typing import Any
21
+
22
+ from cudf_polars.typing import Schema
23
+ from cudf_polars.utils import config
24
+
25
+ __all__ = ["rewrite_rolling"]
26
+
27
+
28
+ def rewrite_rolling(
29
+ options: Any,
30
+ schema: Schema,
31
+ keys: Sequence[expr.NamedExpr],
32
+ aggs: Sequence[expr.NamedExpr],
33
+ config_options: config.ConfigOptions,
34
+ inp: ir.IR,
35
+ ) -> ir.IR:
36
+ """
37
+ Rewrite a rolling plan node into something we can handle.
38
+
39
+ Parameters
40
+ ----------
41
+ options
42
+ Rolling-specific group options.
43
+ schema
44
+ Schema of the rolling plan node.
45
+ keys
46
+ Grouping keys for the rolling node (may be empty).
47
+ aggs
48
+ Originally requested rolling aggregations.
49
+ config_options
50
+ Configuration options (currently unused).
51
+ inp
52
+ Input plan node to the rolling aggregation.
53
+
54
+ Returns
55
+ -------
56
+ New plan node representing the rolling aggregations
57
+
58
+ Raises
59
+ ------
60
+ NotImplementedError
61
+ If any of the requested aggregations are unsupported.
62
+
63
+ Notes
64
+ -----
65
+ Since libcudf can only perform rolling aggregations on columns
66
+ (not arbitrary expressions), the approach is to split each
67
+ aggregation into a pre-selection phase (evaluating expressions
68
+ that live within an aggregation), the aggregation phase (now
69
+ acting on columns only), and a post-selection phase (evaluating
70
+ expressions of aggregated results).
71
+ This scheme does not permit nested aggregations, so those are
72
+ unsupported.
73
+ """
74
+ index_name = options.rolling.index_column
75
+ index_dtype = schema[index_name]
76
+ index_col = expr.Col(index_dtype, index_name)
77
+ if plc.traits.is_integral(index_dtype.plc) and index_dtype.id() != plc.TypeId.INT64:
78
+ plc_index_dtype = plc.DataType(plc.TypeId.INT64)
79
+ else:
80
+ plc_index_dtype = index_dtype.plc
81
+ index = expr.NamedExpr(index_name, index_col)
82
+ temp_prefix = "_" * max(map(len, schema))
83
+ if len(aggs) > 0:
84
+ aggs, rolling_schema, apply_post_evaluation = apply_pre_evaluation(
85
+ schema,
86
+ keys,
87
+ aggs,
88
+ unique_names(temp_prefix),
89
+ ExecutionContext.ROLLING,
90
+ index,
91
+ )
92
+ else:
93
+ rolling_schema = schema
94
+ apply_post_evaluation = lambda inp: inp # noqa: E731
95
+ preceding, following = offsets_to_windows(
96
+ plc_index_dtype, options.rolling.offset, options.rolling.period
97
+ )
98
+ if (n := len(keys)) > 0:
99
+ # Grouped rolling in polars sorts the output by the groups.
100
+ inp = ir.Sort(
101
+ inp.schema,
102
+ keys,
103
+ [plc.types.Order.ASCENDING] * n,
104
+ [plc.types.NullOrder.BEFORE] * n,
105
+ True, # noqa: FBT003
106
+ None,
107
+ inp,
108
+ )
109
+ return apply_post_evaluation(
110
+ ir.Rolling(
111
+ rolling_schema,
112
+ index,
113
+ preceding,
114
+ following,
115
+ options.rolling.closed_window,
116
+ keys,
117
+ aggs,
118
+ options.slice,
119
+ inp,
120
+ )
121
+ )
@@ -0,0 +1,192 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Utilities for rolling window aggregations."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING
9
+
10
+ import polars as pl
11
+
12
+ import pylibcudf as plc
13
+
14
+ if TYPE_CHECKING:
15
+ from cudf_polars.typing import ClosedInterval, Duration
16
+
17
+
18
+ __all__ = [
19
+ "duration_to_int",
20
+ "duration_to_scalar",
21
+ "offsets_to_windows",
22
+ "range_window_bounds",
23
+ ]
24
+
25
+
26
+ def duration_to_int(
27
+ dtype: plc.DataType,
28
+ months: int,
29
+ weeks: int,
30
+ days: int,
31
+ nanoseconds: int,
32
+ parsed_int: bool, # noqa: FBT001
33
+ negative: bool, # noqa: FBT001
34
+ ) -> int:
35
+ """
36
+ Convert a polars duration value to an integer.
37
+
38
+ Parameters
39
+ ----------
40
+ dtype
41
+ The type of the column being added to.
42
+ months
43
+ Number of months
44
+ weeks
45
+ Number of weeks
46
+ days
47
+ Number of days
48
+ nanoseconds
49
+ Number of nanoseconds
50
+ parsed_int
51
+ Is this actually a representation of an integer, not a duration?
52
+ negative
53
+ Is this a negative duration?
54
+
55
+ Returns
56
+ -------
57
+ int
58
+ The total number of nanoseconds represented by this duration,
59
+ or just an integer if `parsed_int` was true.
60
+
61
+ Raises
62
+ ------
63
+ NotImplementedError
64
+ For unsupported durations or datatypes.
65
+ """
66
+ if months != 0:
67
+ raise NotImplementedError("Month durations in rolling windows")
68
+ if parsed_int and (weeks != 0 or days != 0 or dtype.id() != plc.TypeId.INT64):
69
+ raise NotImplementedError(
70
+ "Invalid duration for parsed_int"
71
+ ) # pragma: no cover; polars raises first
72
+ elif not parsed_int and dtype.id() == plc.TypeId.INT64:
73
+ raise pl.exceptions.InvalidOperationError("Duration must be a parsed integer")
74
+ value = nanoseconds + 24 * 60 * 60 * 10**9 * (days + 7 * weeks)
75
+ return -value if negative else value
76
+
77
+
78
+ def duration_to_scalar(dtype: plc.DataType, value: int) -> plc.Scalar:
79
+ """
80
+ Convert a raw polars duration value to a pylibcudf scalar.
81
+
82
+ Parameters
83
+ ----------
84
+ dtype
85
+ The type of the column being added to.
86
+ value
87
+ The raw value as in integer. If `dtype` represents a timestamp
88
+ type, this should be in nanoseconds.
89
+
90
+ Returns
91
+ -------
92
+ pylibcudf.Scalar
93
+ With datatype matching the provided dtype.
94
+
95
+ Raises
96
+ ------
97
+ NotImplementedError
98
+ For unsupported durations or datatypes.
99
+ """
100
+ tid = dtype.id()
101
+ if tid == plc.TypeId.INT64:
102
+ return plc.Scalar.from_py(value, dtype)
103
+ elif tid == plc.TypeId.TIMESTAMP_NANOSECONDS:
104
+ return plc.Scalar.from_py(value, plc.DataType(plc.TypeId.DURATION_NANOSECONDS))
105
+ elif tid == plc.TypeId.TIMESTAMP_MICROSECONDS:
106
+ return plc.Scalar.from_py(
107
+ value // 1000, plc.DataType(plc.TypeId.DURATION_MICROSECONDS)
108
+ )
109
+ elif tid == plc.TypeId.TIMESTAMP_MILLISECONDS:
110
+ return plc.Scalar.from_py(
111
+ value // 1_000_000, plc.DataType(plc.TypeId.DURATION_MILLISECONDS)
112
+ )
113
+ elif tid == plc.TypeId.TIMESTAMP_DAYS:
114
+ return plc.Scalar.from_py(
115
+ value // 86_400_000_000_000, plc.DataType(plc.TypeId.DURATION_DAYS)
116
+ )
117
+ else:
118
+ raise NotImplementedError(
119
+ "Unsupported data type in rolling window offset"
120
+ ) # pragma: no cover; polars raises first
121
+
122
+
123
+ def offsets_to_windows(
124
+ dtype: plc.DataType,
125
+ offset: Duration,
126
+ period: Duration,
127
+ ) -> tuple[plc.Scalar, plc.Scalar]:
128
+ """
129
+ Convert polars offset/period pair to preceding/following windows.
130
+
131
+ Parameters
132
+ ----------
133
+ dtype
134
+ Datatype of column defining windows
135
+ offset
136
+ Offset duration
137
+ period
138
+ Period of window
139
+
140
+ Returns
141
+ -------
142
+ tuple of preceding and following windows as pylibcudf scalars.
143
+ """
144
+ offset_i = duration_to_int(dtype, *offset)
145
+ period_i = duration_to_int(dtype, *period)
146
+ # Polars uses current_row + offset, ..., current_row + offset + period
147
+ # Libcudf uses current_row - preceding, ..., current_row + following
148
+ return duration_to_scalar(dtype, -offset_i), duration_to_scalar(
149
+ dtype, offset_i + period_i
150
+ )
151
+
152
+
153
+ def range_window_bounds(
154
+ preceding: plc.Scalar, following: plc.Scalar, closed_window: ClosedInterval
155
+ ) -> tuple[plc.rolling.RangeWindowType, plc.rolling.RangeWindowType]:
156
+ """
157
+ Convert preceding and following scalars to range window specs.
158
+
159
+ Parameters
160
+ ----------
161
+ preceding
162
+ The preceding window scalar.
163
+ following
164
+ The following window scalar.
165
+ closed_window
166
+ How the window interval endpoints are treated.
167
+
168
+ Returns
169
+ -------
170
+ tuple
171
+ Of preceding and following range window types.
172
+ """
173
+ if closed_window == "both":
174
+ return (
175
+ plc.rolling.BoundedClosed(preceding),
176
+ plc.rolling.BoundedClosed(following),
177
+ )
178
+ elif closed_window == "left":
179
+ return (
180
+ plc.rolling.BoundedClosed(preceding),
181
+ plc.rolling.BoundedOpen(following),
182
+ )
183
+ elif closed_window == "right":
184
+ return (
185
+ plc.rolling.BoundedOpen(preceding),
186
+ plc.rolling.BoundedClosed(following),
187
+ )
188
+ else:
189
+ return (
190
+ plc.rolling.BoundedOpen(preceding),
191
+ plc.rolling.BoundedOpen(following),
192
+ )
@@ -0,0 +1,8 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Experimental features, which can change without any deprecation period."""
5
+
6
+ from __future__ import annotations
7
+
8
+ __all__: list[str] = []