cudf-polars-cu13 25.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cudf_polars/GIT_COMMIT +1 -0
- cudf_polars/VERSION +1 -0
- cudf_polars/__init__.py +28 -0
- cudf_polars/_version.py +21 -0
- cudf_polars/callback.py +318 -0
- cudf_polars/containers/__init__.py +13 -0
- cudf_polars/containers/column.py +495 -0
- cudf_polars/containers/dataframe.py +361 -0
- cudf_polars/containers/datatype.py +137 -0
- cudf_polars/dsl/__init__.py +8 -0
- cudf_polars/dsl/expr.py +66 -0
- cudf_polars/dsl/expressions/__init__.py +8 -0
- cudf_polars/dsl/expressions/aggregation.py +226 -0
- cudf_polars/dsl/expressions/base.py +272 -0
- cudf_polars/dsl/expressions/binaryop.py +120 -0
- cudf_polars/dsl/expressions/boolean.py +326 -0
- cudf_polars/dsl/expressions/datetime.py +271 -0
- cudf_polars/dsl/expressions/literal.py +97 -0
- cudf_polars/dsl/expressions/rolling.py +643 -0
- cudf_polars/dsl/expressions/selection.py +74 -0
- cudf_polars/dsl/expressions/slicing.py +46 -0
- cudf_polars/dsl/expressions/sorting.py +85 -0
- cudf_polars/dsl/expressions/string.py +1002 -0
- cudf_polars/dsl/expressions/struct.py +137 -0
- cudf_polars/dsl/expressions/ternary.py +49 -0
- cudf_polars/dsl/expressions/unary.py +517 -0
- cudf_polars/dsl/ir.py +2607 -0
- cudf_polars/dsl/nodebase.py +164 -0
- cudf_polars/dsl/to_ast.py +359 -0
- cudf_polars/dsl/tracing.py +16 -0
- cudf_polars/dsl/translate.py +939 -0
- cudf_polars/dsl/traversal.py +224 -0
- cudf_polars/dsl/utils/__init__.py +8 -0
- cudf_polars/dsl/utils/aggregations.py +481 -0
- cudf_polars/dsl/utils/groupby.py +98 -0
- cudf_polars/dsl/utils/naming.py +34 -0
- cudf_polars/dsl/utils/replace.py +61 -0
- cudf_polars/dsl/utils/reshape.py +74 -0
- cudf_polars/dsl/utils/rolling.py +121 -0
- cudf_polars/dsl/utils/windows.py +192 -0
- cudf_polars/experimental/__init__.py +8 -0
- cudf_polars/experimental/base.py +386 -0
- cudf_polars/experimental/benchmarks/__init__.py +4 -0
- cudf_polars/experimental/benchmarks/pdsds.py +220 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/__init__.py +4 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q1.py +88 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q10.py +225 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q2.py +244 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q3.py +65 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q4.py +359 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q5.py +462 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q6.py +92 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q7.py +79 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q8.py +524 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q9.py +137 -0
- cudf_polars/experimental/benchmarks/pdsh.py +814 -0
- cudf_polars/experimental/benchmarks/utils.py +832 -0
- cudf_polars/experimental/dask_registers.py +200 -0
- cudf_polars/experimental/dispatch.py +156 -0
- cudf_polars/experimental/distinct.py +197 -0
- cudf_polars/experimental/explain.py +157 -0
- cudf_polars/experimental/expressions.py +590 -0
- cudf_polars/experimental/groupby.py +327 -0
- cudf_polars/experimental/io.py +943 -0
- cudf_polars/experimental/join.py +391 -0
- cudf_polars/experimental/parallel.py +423 -0
- cudf_polars/experimental/repartition.py +69 -0
- cudf_polars/experimental/scheduler.py +155 -0
- cudf_polars/experimental/select.py +188 -0
- cudf_polars/experimental/shuffle.py +354 -0
- cudf_polars/experimental/sort.py +609 -0
- cudf_polars/experimental/spilling.py +151 -0
- cudf_polars/experimental/statistics.py +795 -0
- cudf_polars/experimental/utils.py +169 -0
- cudf_polars/py.typed +0 -0
- cudf_polars/testing/__init__.py +8 -0
- cudf_polars/testing/asserts.py +448 -0
- cudf_polars/testing/io.py +122 -0
- cudf_polars/testing/plugin.py +236 -0
- cudf_polars/typing/__init__.py +219 -0
- cudf_polars/utils/__init__.py +8 -0
- cudf_polars/utils/config.py +741 -0
- cudf_polars/utils/conversion.py +40 -0
- cudf_polars/utils/dtypes.py +118 -0
- cudf_polars/utils/sorting.py +53 -0
- cudf_polars/utils/timer.py +39 -0
- cudf_polars/utils/versions.py +27 -0
- cudf_polars_cu13-25.10.0.dist-info/METADATA +136 -0
- cudf_polars_cu13-25.10.0.dist-info/RECORD +92 -0
- cudf_polars_cu13-25.10.0.dist-info/WHEEL +5 -0
- cudf_polars_cu13-25.10.0.dist-info/licenses/LICENSE +201 -0
- cudf_polars_cu13-25.10.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
# TODO: remove need for this
|
|
4
|
+
# ruff: noqa: D101
|
|
5
|
+
"""DSL nodes for selection operations."""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
import pylibcudf as plc
|
|
12
|
+
|
|
13
|
+
from cudf_polars.containers import Column
|
|
14
|
+
from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from cudf_polars.containers import DataFrame, DataType
|
|
18
|
+
|
|
19
|
+
__all__ = ["Filter", "Gather"]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Gather(Expr):
|
|
23
|
+
__slots__ = ()
|
|
24
|
+
_non_child = ("dtype",)
|
|
25
|
+
|
|
26
|
+
def __init__(self, dtype: DataType, values: Expr, indices: Expr) -> None:
|
|
27
|
+
self.dtype = dtype
|
|
28
|
+
self.children = (values, indices)
|
|
29
|
+
self.is_pointwise = False
|
|
30
|
+
|
|
31
|
+
def do_evaluate(
|
|
32
|
+
self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
|
|
33
|
+
) -> Column:
|
|
34
|
+
"""Evaluate this expression given a dataframe for context."""
|
|
35
|
+
values, indices = (
|
|
36
|
+
child.evaluate(df, context=context) for child in self.children
|
|
37
|
+
)
|
|
38
|
+
lo, hi = plc.reduce.minmax(indices.obj)
|
|
39
|
+
lo = lo.to_py()
|
|
40
|
+
hi = hi.to_py()
|
|
41
|
+
n = df.num_rows
|
|
42
|
+
if hi >= n or lo < -n:
|
|
43
|
+
raise ValueError("gather indices are out of bounds")
|
|
44
|
+
if indices.null_count:
|
|
45
|
+
bounds_policy = plc.copying.OutOfBoundsPolicy.NULLIFY
|
|
46
|
+
obj = plc.replace.replace_nulls(
|
|
47
|
+
indices.obj,
|
|
48
|
+
plc.Scalar.from_py(n, dtype=indices.obj.type()),
|
|
49
|
+
)
|
|
50
|
+
else:
|
|
51
|
+
bounds_policy = plc.copying.OutOfBoundsPolicy.DONT_CHECK
|
|
52
|
+
obj = indices.obj
|
|
53
|
+
table = plc.copying.gather(plc.Table([values.obj]), obj, bounds_policy)
|
|
54
|
+
return Column(table.columns()[0], dtype=self.dtype)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class Filter(Expr):
|
|
58
|
+
__slots__ = ()
|
|
59
|
+
_non_child = ("dtype",)
|
|
60
|
+
|
|
61
|
+
def __init__(self, dtype: DataType, values: Expr, indices: Expr):
|
|
62
|
+
self.dtype = dtype
|
|
63
|
+
self.children = (values, indices)
|
|
64
|
+
self.is_pointwise = False
|
|
65
|
+
|
|
66
|
+
def do_evaluate(
|
|
67
|
+
self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
|
|
68
|
+
) -> Column:
|
|
69
|
+
"""Evaluate this expression given a dataframe for context."""
|
|
70
|
+
values, mask = (child.evaluate(df, context=context) for child in self.children)
|
|
71
|
+
table = plc.stream_compaction.apply_boolean_mask(
|
|
72
|
+
plc.Table([values.obj]), mask.obj
|
|
73
|
+
)
|
|
74
|
+
return Column(table.columns()[0], dtype=self.dtype).sorted_like(values)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
# TODO: remove need for this
|
|
4
|
+
# ruff: noqa: D101
|
|
5
|
+
"""Slicing DSL nodes."""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
from cudf_polars.dsl.expressions.base import (
|
|
12
|
+
ExecutionContext,
|
|
13
|
+
Expr,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from cudf_polars.containers import Column, DataFrame, DataType
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
__all__ = ["Slice"]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Slice(Expr):
|
|
24
|
+
__slots__ = ("length", "offset")
|
|
25
|
+
_non_child = ("dtype", "offset", "length")
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
dtype: DataType,
|
|
30
|
+
offset: int,
|
|
31
|
+
length: int | None,
|
|
32
|
+
column: Expr,
|
|
33
|
+
) -> None:
|
|
34
|
+
self.dtype = dtype
|
|
35
|
+
self.offset = offset
|
|
36
|
+
self.length = length
|
|
37
|
+
self.children = (column,)
|
|
38
|
+
self.is_pointwise = False
|
|
39
|
+
|
|
40
|
+
def do_evaluate(
|
|
41
|
+
self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
|
|
42
|
+
) -> Column:
|
|
43
|
+
"""Evaluate this expression given a dataframe for context."""
|
|
44
|
+
(child,) = self.children
|
|
45
|
+
column = child.evaluate(df, context=context)
|
|
46
|
+
return column.slice((self.offset, self.length))
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
# TODO: remove need for this
|
|
4
|
+
# ruff: noqa: D101
|
|
5
|
+
"""Sorting DSL nodes."""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
import pylibcudf as plc
|
|
12
|
+
|
|
13
|
+
from cudf_polars.containers import Column
|
|
14
|
+
from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
|
|
15
|
+
from cudf_polars.utils import sorting
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from cudf_polars.containers import DataFrame, DataType
|
|
19
|
+
|
|
20
|
+
__all__ = ["Sort", "SortBy"]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Sort(Expr):
|
|
24
|
+
__slots__ = ("options",)
|
|
25
|
+
_non_child = ("dtype", "options")
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self, dtype: DataType, options: tuple[bool, bool, bool], column: Expr
|
|
29
|
+
) -> None:
|
|
30
|
+
self.dtype = dtype
|
|
31
|
+
self.options = options
|
|
32
|
+
self.children = (column,)
|
|
33
|
+
self.is_pointwise = False
|
|
34
|
+
|
|
35
|
+
def do_evaluate(
|
|
36
|
+
self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
|
|
37
|
+
) -> Column:
|
|
38
|
+
"""Evaluate this expression given a dataframe for context."""
|
|
39
|
+
(child,) = self.children
|
|
40
|
+
column = child.evaluate(df, context=context)
|
|
41
|
+
(stable, nulls_last, descending) = self.options
|
|
42
|
+
order, null_order = sorting.sort_order(
|
|
43
|
+
[descending], nulls_last=[nulls_last], num_keys=1
|
|
44
|
+
)
|
|
45
|
+
do_sort = plc.sorting.stable_sort if stable else plc.sorting.sort
|
|
46
|
+
table = do_sort(plc.Table([column.obj]), order, null_order)
|
|
47
|
+
return Column(
|
|
48
|
+
table.columns()[0],
|
|
49
|
+
is_sorted=plc.types.Sorted.YES,
|
|
50
|
+
order=order[0],
|
|
51
|
+
null_order=null_order[0],
|
|
52
|
+
dtype=self.dtype,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class SortBy(Expr):
|
|
57
|
+
__slots__ = ("options",)
|
|
58
|
+
_non_child = ("dtype", "options")
|
|
59
|
+
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
dtype: DataType,
|
|
63
|
+
options: tuple[bool, tuple[bool], tuple[bool]],
|
|
64
|
+
column: Expr,
|
|
65
|
+
*by: Expr,
|
|
66
|
+
) -> None:
|
|
67
|
+
self.dtype = dtype
|
|
68
|
+
self.options = options
|
|
69
|
+
self.children = (column, *by)
|
|
70
|
+
self.is_pointwise = False
|
|
71
|
+
|
|
72
|
+
def do_evaluate(
|
|
73
|
+
self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
|
|
74
|
+
) -> Column:
|
|
75
|
+
"""Evaluate this expression given a dataframe for context."""
|
|
76
|
+
column, *by = (child.evaluate(df, context=context) for child in self.children)
|
|
77
|
+
(stable, nulls_last, descending) = self.options
|
|
78
|
+
order, null_order = sorting.sort_order(
|
|
79
|
+
descending, nulls_last=nulls_last, num_keys=len(by)
|
|
80
|
+
)
|
|
81
|
+
do_sort = plc.sorting.stable_sort_by_key if stable else plc.sorting.sort_by_key
|
|
82
|
+
table = do_sort(
|
|
83
|
+
plc.Table([column.obj]), plc.Table([c.obj for c in by]), order, null_order
|
|
84
|
+
)
|
|
85
|
+
return Column(table.columns()[0], dtype=self.dtype)
|