cudf-polars-cu13 25.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cudf_polars/GIT_COMMIT +1 -0
- cudf_polars/VERSION +1 -0
- cudf_polars/__init__.py +28 -0
- cudf_polars/_version.py +21 -0
- cudf_polars/callback.py +318 -0
- cudf_polars/containers/__init__.py +13 -0
- cudf_polars/containers/column.py +495 -0
- cudf_polars/containers/dataframe.py +361 -0
- cudf_polars/containers/datatype.py +137 -0
- cudf_polars/dsl/__init__.py +8 -0
- cudf_polars/dsl/expr.py +66 -0
- cudf_polars/dsl/expressions/__init__.py +8 -0
- cudf_polars/dsl/expressions/aggregation.py +226 -0
- cudf_polars/dsl/expressions/base.py +272 -0
- cudf_polars/dsl/expressions/binaryop.py +120 -0
- cudf_polars/dsl/expressions/boolean.py +326 -0
- cudf_polars/dsl/expressions/datetime.py +271 -0
- cudf_polars/dsl/expressions/literal.py +97 -0
- cudf_polars/dsl/expressions/rolling.py +643 -0
- cudf_polars/dsl/expressions/selection.py +74 -0
- cudf_polars/dsl/expressions/slicing.py +46 -0
- cudf_polars/dsl/expressions/sorting.py +85 -0
- cudf_polars/dsl/expressions/string.py +1002 -0
- cudf_polars/dsl/expressions/struct.py +137 -0
- cudf_polars/dsl/expressions/ternary.py +49 -0
- cudf_polars/dsl/expressions/unary.py +517 -0
- cudf_polars/dsl/ir.py +2607 -0
- cudf_polars/dsl/nodebase.py +164 -0
- cudf_polars/dsl/to_ast.py +359 -0
- cudf_polars/dsl/tracing.py +16 -0
- cudf_polars/dsl/translate.py +939 -0
- cudf_polars/dsl/traversal.py +224 -0
- cudf_polars/dsl/utils/__init__.py +8 -0
- cudf_polars/dsl/utils/aggregations.py +481 -0
- cudf_polars/dsl/utils/groupby.py +98 -0
- cudf_polars/dsl/utils/naming.py +34 -0
- cudf_polars/dsl/utils/replace.py +61 -0
- cudf_polars/dsl/utils/reshape.py +74 -0
- cudf_polars/dsl/utils/rolling.py +121 -0
- cudf_polars/dsl/utils/windows.py +192 -0
- cudf_polars/experimental/__init__.py +8 -0
- cudf_polars/experimental/base.py +386 -0
- cudf_polars/experimental/benchmarks/__init__.py +4 -0
- cudf_polars/experimental/benchmarks/pdsds.py +220 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/__init__.py +4 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q1.py +88 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q10.py +225 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q2.py +244 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q3.py +65 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q4.py +359 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q5.py +462 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q6.py +92 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q7.py +79 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q8.py +524 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q9.py +137 -0
- cudf_polars/experimental/benchmarks/pdsh.py +814 -0
- cudf_polars/experimental/benchmarks/utils.py +832 -0
- cudf_polars/experimental/dask_registers.py +200 -0
- cudf_polars/experimental/dispatch.py +156 -0
- cudf_polars/experimental/distinct.py +197 -0
- cudf_polars/experimental/explain.py +157 -0
- cudf_polars/experimental/expressions.py +590 -0
- cudf_polars/experimental/groupby.py +327 -0
- cudf_polars/experimental/io.py +943 -0
- cudf_polars/experimental/join.py +391 -0
- cudf_polars/experimental/parallel.py +423 -0
- cudf_polars/experimental/repartition.py +69 -0
- cudf_polars/experimental/scheduler.py +155 -0
- cudf_polars/experimental/select.py +188 -0
- cudf_polars/experimental/shuffle.py +354 -0
- cudf_polars/experimental/sort.py +609 -0
- cudf_polars/experimental/spilling.py +151 -0
- cudf_polars/experimental/statistics.py +795 -0
- cudf_polars/experimental/utils.py +169 -0
- cudf_polars/py.typed +0 -0
- cudf_polars/testing/__init__.py +8 -0
- cudf_polars/testing/asserts.py +448 -0
- cudf_polars/testing/io.py +122 -0
- cudf_polars/testing/plugin.py +236 -0
- cudf_polars/typing/__init__.py +219 -0
- cudf_polars/utils/__init__.py +8 -0
- cudf_polars/utils/config.py +741 -0
- cudf_polars/utils/conversion.py +40 -0
- cudf_polars/utils/dtypes.py +118 -0
- cudf_polars/utils/sorting.py +53 -0
- cudf_polars/utils/timer.py +39 -0
- cudf_polars/utils/versions.py +27 -0
- cudf_polars_cu13-25.10.0.dist-info/METADATA +136 -0
- cudf_polars_cu13-25.10.0.dist-info/RECORD +92 -0
- cudf_polars_cu13-25.10.0.dist-info/WHEEL +5 -0
- cudf_polars_cu13-25.10.0.dist-info/licenses/LICENSE +201 -0
- cudf_polars_cu13-25.10.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""Miscellaneous conversion functions."""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from cudf_polars.typing import Slice
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def from_polars_slice(zlice: Slice, *, num_rows: int) -> list[int]:
|
|
15
|
+
"""
|
|
16
|
+
Convert a Polar slice into something pylibcudf handles.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
zlice
|
|
21
|
+
The slice to convert
|
|
22
|
+
num_rows
|
|
23
|
+
The number of rows in the object being sliced.
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
List of start and end slice bounds.
|
|
28
|
+
"""
|
|
29
|
+
start, length = zlice
|
|
30
|
+
if length is None:
|
|
31
|
+
length = num_rows
|
|
32
|
+
if start < 0:
|
|
33
|
+
start += num_rows
|
|
34
|
+
# Polars implementation wraps negative start by num_rows, then
|
|
35
|
+
# adds length to start to get the end, then clamps both to
|
|
36
|
+
# [0, num_rows)
|
|
37
|
+
end = start + length
|
|
38
|
+
start = max(min(start, num_rows), 0)
|
|
39
|
+
end = max(min(end, num_rows), 0)
|
|
40
|
+
return [start, end]
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""Datatype utilities."""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import pylibcudf as plc
|
|
9
|
+
from pylibcudf.traits import (
|
|
10
|
+
is_floating_point,
|
|
11
|
+
is_integral_not_bool,
|
|
12
|
+
is_nested,
|
|
13
|
+
is_numeric_not_bool,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"can_cast",
|
|
18
|
+
"is_order_preserving_cast",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
|
|
23
|
+
"""
|
|
24
|
+
Determine whether a cast between two datatypes is supported by cudf-polars.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
from_ : pylibcudf.DataType
|
|
29
|
+
Source datatype
|
|
30
|
+
|
|
31
|
+
to : pylibcudf.DataType
|
|
32
|
+
Target datatype
|
|
33
|
+
|
|
34
|
+
Returns
|
|
35
|
+
-------
|
|
36
|
+
bool
|
|
37
|
+
True if the cast is supported, False otherwise.
|
|
38
|
+
"""
|
|
39
|
+
to_is_empty = to.id() == plc.TypeId.EMPTY
|
|
40
|
+
from_is_empty = from_.id() == plc.TypeId.EMPTY
|
|
41
|
+
has_empty = to_is_empty or from_is_empty
|
|
42
|
+
if is_nested(from_) and is_nested(to):
|
|
43
|
+
return False
|
|
44
|
+
return (
|
|
45
|
+
(
|
|
46
|
+
from_ == to
|
|
47
|
+
or (
|
|
48
|
+
not has_empty
|
|
49
|
+
and (
|
|
50
|
+
plc.traits.is_fixed_width(to)
|
|
51
|
+
and plc.traits.is_fixed_width(from_)
|
|
52
|
+
and plc.unary.is_supported_cast(from_, to)
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
)
|
|
56
|
+
or (
|
|
57
|
+
from_.id() == plc.TypeId.STRING
|
|
58
|
+
and not to_is_empty
|
|
59
|
+
and is_numeric_not_bool(to)
|
|
60
|
+
)
|
|
61
|
+
or (
|
|
62
|
+
to.id() == plc.TypeId.STRING
|
|
63
|
+
and not from_is_empty
|
|
64
|
+
and is_numeric_not_bool(from_)
|
|
65
|
+
)
|
|
66
|
+
or (
|
|
67
|
+
plc.traits.is_integral_not_bool(from_)
|
|
68
|
+
and from_.id() != plc.TypeId.UINT64 # not overflow safe
|
|
69
|
+
and not to_is_empty
|
|
70
|
+
and plc.traits.is_timestamp(to)
|
|
71
|
+
)
|
|
72
|
+
or (
|
|
73
|
+
plc.traits.is_integral_not_bool(to)
|
|
74
|
+
and not to_is_empty
|
|
75
|
+
and plc.traits.is_timestamp(from_)
|
|
76
|
+
)
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def is_order_preserving_cast(from_: plc.DataType, to: plc.DataType) -> bool:
|
|
81
|
+
"""
|
|
82
|
+
Determine if a cast would preserve the order of the source data.
|
|
83
|
+
|
|
84
|
+
Parameters
|
|
85
|
+
----------
|
|
86
|
+
from_
|
|
87
|
+
Source datatype
|
|
88
|
+
to
|
|
89
|
+
Target datatype
|
|
90
|
+
|
|
91
|
+
Returns
|
|
92
|
+
-------
|
|
93
|
+
True if the cast is order-preserving, False otherwise
|
|
94
|
+
"""
|
|
95
|
+
if from_.id() == to.id():
|
|
96
|
+
return True
|
|
97
|
+
|
|
98
|
+
if is_integral_not_bool(from_) and is_integral_not_bool(to):
|
|
99
|
+
# True if signedness is the same and the target is larger
|
|
100
|
+
if plc.traits.is_unsigned(from_) == plc.traits.is_unsigned(to):
|
|
101
|
+
if plc.types.size_of(to) >= plc.types.size_of(from_):
|
|
102
|
+
return True
|
|
103
|
+
elif (plc.traits.is_unsigned(from_) and not plc.traits.is_unsigned(to)) and (
|
|
104
|
+
plc.types.size_of(to) > plc.types.size_of(from_)
|
|
105
|
+
):
|
|
106
|
+
# Unsigned to signed is order preserving if target is large enough
|
|
107
|
+
# But signed to unsigned is never order preserving due to negative values
|
|
108
|
+
return True
|
|
109
|
+
elif (
|
|
110
|
+
is_floating_point(from_)
|
|
111
|
+
and is_floating_point(to)
|
|
112
|
+
and (plc.types.size_of(to) >= plc.types.size_of(from_))
|
|
113
|
+
):
|
|
114
|
+
# True if the target is larger
|
|
115
|
+
return True
|
|
116
|
+
return (is_integral_not_bool(from_) and is_floating_point(to)) or (
|
|
117
|
+
is_floating_point(from_) and is_integral_not_bool(to)
|
|
118
|
+
)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""Sorting utilities."""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
import pylibcudf as plc
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from collections.abc import Sequence
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def sort_order(
|
|
17
|
+
descending: Sequence[bool], *, nulls_last: Sequence[bool], num_keys: int
|
|
18
|
+
) -> tuple[list[plc.types.Order], list[plc.types.NullOrder]]:
|
|
19
|
+
"""
|
|
20
|
+
Produce sort order arguments.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
descending
|
|
25
|
+
List indicating order for each column
|
|
26
|
+
nulls_last
|
|
27
|
+
Should nulls sort last or first?
|
|
28
|
+
num_keys
|
|
29
|
+
Number of sort keys
|
|
30
|
+
|
|
31
|
+
Returns
|
|
32
|
+
-------
|
|
33
|
+
tuple of column_order and null_precedence
|
|
34
|
+
suitable for passing to sort routines
|
|
35
|
+
"""
|
|
36
|
+
# Mimicking polars broadcast handling of descending
|
|
37
|
+
if num_keys > (n := len(descending)) and n == 1:
|
|
38
|
+
descending = [descending[0]] * num_keys
|
|
39
|
+
if num_keys > (n := len(nulls_last)) and n == 1:
|
|
40
|
+
nulls_last = [nulls_last[0]] * num_keys
|
|
41
|
+
column_order = [
|
|
42
|
+
plc.types.Order.DESCENDING if d else plc.types.Order.ASCENDING
|
|
43
|
+
for d in descending
|
|
44
|
+
]
|
|
45
|
+
null_precedence = []
|
|
46
|
+
if len(descending) != len(nulls_last) or len(descending) != num_keys:
|
|
47
|
+
raise ValueError("Mismatching length of arguments in sort_order")
|
|
48
|
+
for asc, null_last in zip(column_order, nulls_last, strict=True):
|
|
49
|
+
if (asc == plc.types.Order.ASCENDING) ^ (not null_last):
|
|
50
|
+
null_precedence.append(plc.types.NullOrder.AFTER)
|
|
51
|
+
elif (asc == plc.types.Order.ASCENDING) ^ null_last:
|
|
52
|
+
null_precedence.append(plc.types.NullOrder.BEFORE)
|
|
53
|
+
return column_order, null_precedence
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""Timing utilities."""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
__all__: list[str] = ["Timer"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Timer:
|
|
12
|
+
"""
|
|
13
|
+
A timer for recording execution times of nodes.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
query_start
|
|
18
|
+
Duration in nanoseconds since the query was started on the
|
|
19
|
+
Polars side
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, query_start: int):
|
|
23
|
+
self.query_start = query_start
|
|
24
|
+
self.timings: list[tuple[int, int, str]] = []
|
|
25
|
+
|
|
26
|
+
def store(self, start: int, end: int, name: str) -> None:
|
|
27
|
+
"""
|
|
28
|
+
Store timing for a node.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
start
|
|
33
|
+
Start of the execution for this node (use time.monotonic_ns).
|
|
34
|
+
end
|
|
35
|
+
End of the execution for this node.
|
|
36
|
+
name
|
|
37
|
+
The name for this node.
|
|
38
|
+
"""
|
|
39
|
+
self.timings.append((start - self.query_start, end - self.query_start, name))
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""Version utilities so that cudf_polars supports a range of polars versions."""
|
|
5
|
+
|
|
6
|
+
# ruff: noqa: SIM300
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from packaging.version import parse
|
|
10
|
+
|
|
11
|
+
from polars import __version__
|
|
12
|
+
|
|
13
|
+
POLARS_VERSION = parse(__version__)
|
|
14
|
+
POLARS_LOWER_BOUND = parse("1.28")
|
|
15
|
+
POLARS_VERSION_LT_129 = POLARS_VERSION < parse("1.29")
|
|
16
|
+
POLARS_VERSION_LT_130 = POLARS_VERSION < parse("1.30")
|
|
17
|
+
POLARS_VERSION_LT_131 = POLARS_VERSION < parse("1.31")
|
|
18
|
+
POLARS_VERSION_LT_132 = POLARS_VERSION < parse("1.32")
|
|
19
|
+
POLARS_VERSION_LT_1321 = POLARS_VERSION < parse("1.32.1")
|
|
20
|
+
POLARS_VERSION_LT_1323 = POLARS_VERSION < parse("1.32.3")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _ensure_polars_version() -> None:
|
|
24
|
+
if POLARS_VERSION < POLARS_LOWER_BOUND:
|
|
25
|
+
raise ImportError(
|
|
26
|
+
f"cudf_polars requires py-polars v{POLARS_LOWER_BOUND} or greater."
|
|
27
|
+
) # pragma: no cover
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cudf-polars-cu13
|
|
3
|
+
Version: 25.10.0
|
|
4
|
+
Summary: Executor for polars using cudf
|
|
5
|
+
Author: NVIDIA Corporation
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/rapidsai/cudf
|
|
8
|
+
Classifier: Intended Audience :: Developers
|
|
9
|
+
Classifier: Topic :: Database
|
|
10
|
+
Classifier: Topic :: Scientific/Engineering
|
|
11
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
|
+
Classifier: Programming Language :: Python
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: nvidia-ml-py>=12
|
|
21
|
+
Requires-Dist: packaging
|
|
22
|
+
Requires-Dist: polars<1.33,>=1.28
|
|
23
|
+
Requires-Dist: pylibcudf-cu13==25.10.*
|
|
24
|
+
Requires-Dist: typing-extensions; python_version < "3.11"
|
|
25
|
+
Provides-Extra: test
|
|
26
|
+
Requires-Dist: dask-cuda==25.10.*; extra == "test"
|
|
27
|
+
Requires-Dist: numpy<3.0a0,>=1.23; extra == "test"
|
|
28
|
+
Requires-Dist: pytest; extra == "test"
|
|
29
|
+
Requires-Dist: pytest-cov; extra == "test"
|
|
30
|
+
Requires-Dist: pytest-httpserver; extra == "test"
|
|
31
|
+
Requires-Dist: pytest-xdist; extra == "test"
|
|
32
|
+
Requires-Dist: rich; extra == "test"
|
|
33
|
+
Provides-Extra: experimental
|
|
34
|
+
Requires-Dist: nvidia-ml-py>=12; extra == "experimental"
|
|
35
|
+
Requires-Dist: rapids-dask-dependency==25.10.*; extra == "experimental"
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
# <div align="left"><img src="img/rapids_logo.png" width="90px"/> cuDF - GPU DataFrames</div>
|
|
39
|
+
|
|
40
|
+
## 📢 cuDF can now be used as a no-code-change accelerator for pandas! To learn more, see [here](https://rapids.ai/cudf-pandas/)!
|
|
41
|
+
|
|
42
|
+
cuDF (pronounced "KOO-dee-eff") is a GPU DataFrame library
|
|
43
|
+
for loading, joining, aggregating, filtering, and otherwise
|
|
44
|
+
manipulating data. cuDF leverages
|
|
45
|
+
[libcudf](https://docs.rapids.ai/api/libcudf/stable/), a
|
|
46
|
+
blazing-fast C++/CUDA dataframe library and the [Apache
|
|
47
|
+
Arrow](https://arrow.apache.org/) columnar format to provide a
|
|
48
|
+
GPU-accelerated pandas API.
|
|
49
|
+
|
|
50
|
+
You can import `cudf` directly and use it like `pandas`:
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
import cudf
|
|
54
|
+
|
|
55
|
+
tips_df = cudf.read_csv("https://github.com/plotly/datasets/raw/master/tips.csv")
|
|
56
|
+
tips_df["tip_percentage"] = tips_df["tip"] / tips_df["total_bill"] * 100
|
|
57
|
+
|
|
58
|
+
# display average tip by dining party size
|
|
59
|
+
print(tips_df.groupby("size").tip_percentage.mean())
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Or, you can use cuDF as a no-code-change accelerator for pandas, using
|
|
63
|
+
[`cudf.pandas`](https://docs.rapids.ai/api/cudf/stable/cudf_pandas).
|
|
64
|
+
`cudf.pandas` supports 100% of the pandas API, utilizing cuDF for
|
|
65
|
+
supported operations and falling back to pandas when needed:
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
%load_ext cudf.pandas # pandas operations now use the GPU!
|
|
69
|
+
|
|
70
|
+
import pandas as pd
|
|
71
|
+
|
|
72
|
+
tips_df = pd.read_csv("https://github.com/plotly/datasets/raw/master/tips.csv")
|
|
73
|
+
tips_df["tip_percentage"] = tips_df["tip"] / tips_df["total_bill"] * 100
|
|
74
|
+
|
|
75
|
+
# display average tip by dining party size
|
|
76
|
+
print(tips_df.groupby("size").tip_percentage.mean())
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Resources
|
|
80
|
+
|
|
81
|
+
- [Try cudf.pandas now](https://nvda.ws/rapids-cudf): Explore `cudf.pandas` on a free GPU enabled instance on Google Colab!
|
|
82
|
+
- [Install](https://docs.rapids.ai/install): Instructions for installing cuDF and other [RAPIDS](https://rapids.ai) libraries.
|
|
83
|
+
- [cudf (Python) documentation](https://docs.rapids.ai/api/cudf/stable/)
|
|
84
|
+
- [libcudf (C++/CUDA) documentation](https://docs.rapids.ai/api/libcudf/stable/)
|
|
85
|
+
- [RAPIDS Community](https://rapids.ai/learn-more/#get-involved): Get help, contribute, and collaborate.
|
|
86
|
+
|
|
87
|
+
See the [RAPIDS install page](https://docs.rapids.ai/install) for
|
|
88
|
+
the most up-to-date information and commands for installing cuDF
|
|
89
|
+
and other RAPIDS packages.
|
|
90
|
+
|
|
91
|
+
## Installation
|
|
92
|
+
|
|
93
|
+
### CUDA/GPU requirements
|
|
94
|
+
|
|
95
|
+
* CUDA 12.0+ with a compatible NVIDIA driver
|
|
96
|
+
* Volta architecture or better (Compute Capability >=7.0)
|
|
97
|
+
|
|
98
|
+
### Pip
|
|
99
|
+
|
|
100
|
+
cuDF can be installed via `pip` from the NVIDIA Python Package Index.
|
|
101
|
+
Be sure to select the appropriate cuDF package depending
|
|
102
|
+
on the major version of CUDA available in your environment:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
# CUDA 13
|
|
106
|
+
pip install cudf-cu13
|
|
107
|
+
|
|
108
|
+
# CUDA 12
|
|
109
|
+
pip install cudf-cu12
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Conda
|
|
113
|
+
|
|
114
|
+
cuDF can be installed with conda (via [miniforge](https://github.com/conda-forge/miniforge)) from the `rapidsai` channel:
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
# CUDA 13
|
|
118
|
+
conda install -c rapidsai -c conda-forge cudf=25.10 cuda-version=13.0
|
|
119
|
+
|
|
120
|
+
# CUDA 12
|
|
121
|
+
conda install -c rapidsai -c conda-forge cudf=25.10 cuda-version=12.9
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD
|
|
125
|
+
of our latest development branch.
|
|
126
|
+
|
|
127
|
+
Note: cuDF is supported only on Linux, and with Python versions 3.10 and later.
|
|
128
|
+
|
|
129
|
+
See the [RAPIDS installation guide](https://docs.rapids.ai/install) for more OS and version info.
|
|
130
|
+
|
|
131
|
+
## Build/Install from Source
|
|
132
|
+
See build [instructions](CONTRIBUTING.md#setting-up-your-build-environment).
|
|
133
|
+
|
|
134
|
+
## Contributing
|
|
135
|
+
|
|
136
|
+
Please see our [guide for contributing to cuDF](CONTRIBUTING.md).
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
cudf_polars/GIT_COMMIT,sha256=dSIC7QF9D2u7uNLtZDtGWJYqrCMXIqV9uYhOHjtkmLU,41
|
|
2
|
+
cudf_polars/VERSION,sha256=qFUmAhDerTzlTeG7G1N8kkXh-gyZzDU9R0ARLQtoP_Y,8
|
|
3
|
+
cudf_polars/__init__.py,sha256=fSTx5nmqajdwp7qvP4PnYL6wZN9-k1fKB43NkcZlHwk,740
|
|
4
|
+
cudf_polars/_version.py,sha256=kj5Ir4dxZRR-k2k8mWUDJHiGpE8_ZcTNzt_kMZxcFRA,528
|
|
5
|
+
cudf_polars/callback.py,sha256=r8hf3BbpXaKtBjQkxIt_XMP9IVj6UjtdSIvJXR3r_NA,9994
|
|
6
|
+
cudf_polars/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
cudf_polars/containers/__init__.py,sha256=EIiTQKXBTwCmbsUNotYImSi3wq31pX55hYFwojygcyI,518
|
|
8
|
+
cudf_polars/containers/column.py,sha256=jklTNU2gMazs84wybZIlojY0XXVdRZYNT4X4rPL31nM,15233
|
|
9
|
+
cudf_polars/containers/dataframe.py,sha256=geASmznODV_7pFcxqcQqEzdDpN5dLDR1_vmdXHFI1og,11665
|
|
10
|
+
cudf_polars/containers/datatype.py,sha256=uNsVCieVWR7fgpiSmXu25Z5mfECSIxBGrweZf9EQu7E,4680
|
|
11
|
+
cudf_polars/dsl/__init__.py,sha256=bYwYnqmqINMgwnkJ22EnXMlHviLolPaMgQ8QqoZL3YE,244
|
|
12
|
+
cudf_polars/dsl/expr.py,sha256=fLimKmIxdEkVwie90QR3ajjxA7le5zI-iu1VZW1ZN8c,1952
|
|
13
|
+
cudf_polars/dsl/ir.py,sha256=vfPHOOPYaz7KTJfkshN-5mVMkrVqYKFGle5IfQhxFLw,93272
|
|
14
|
+
cudf_polars/dsl/nodebase.py,sha256=QbZHK9aUbdiE-Mp_NkkiuNvCnD8E3xzd9-GYKR8UqcM,4777
|
|
15
|
+
cudf_polars/dsl/to_ast.py,sha256=We0idh-0ckSz9nIZrGkeMg75XbnijRW2_DHVXZ9-a34,13595
|
|
16
|
+
cudf_polars/dsl/tracing.py,sha256=xPTyXNQ64PSuV4_t5z6_GGJ1V_m4sFxxHiYEDp64Ofk,383
|
|
17
|
+
cudf_polars/dsl/translate.py,sha256=t_bHoIS6Ny3gQcMqWAgBUT0AmWTM0LSCQnhYG36Km2s,31687
|
|
18
|
+
cudf_polars/dsl/traversal.py,sha256=dzOHVaRj0wTYQd5h-JnjQrj4DffEfq1gdENYcDk5Eis,5729
|
|
19
|
+
cudf_polars/dsl/expressions/__init__.py,sha256=uj1a4BzrDVAjOgP6FKKtnvR5elF4ksUj1RhkLTZoS1k,224
|
|
20
|
+
cudf_polars/dsl/expressions/aggregation.py,sha256=XxC1d4cbQIvL6o6REQ2FlyxcdUbVyMl-gN4RDFHnLSQ,7950
|
|
21
|
+
cudf_polars/dsl/expressions/base.py,sha256=Fq_MCz2LOMm6eekLJQ1jwEsAvYuouFXzrhIpxIRs4ak,7976
|
|
22
|
+
cudf_polars/dsl/expressions/binaryop.py,sha256=7Psj8BKnotNGopQw5W9hM6cP8fbng-OllxrkgYrvLzs,5070
|
|
23
|
+
cudf_polars/dsl/expressions/boolean.py,sha256=Gpx0cn2L1Wr4q5OqGT2s7lYsN_VlG_IYPtKD-BE6Cc8,12550
|
|
24
|
+
cudf_polars/dsl/expressions/datetime.py,sha256=MEGtYOYl2ftbNzVd9NBv9DMJ2dUxZwGg5z69xzjZP18,10102
|
|
25
|
+
cudf_polars/dsl/expressions/literal.py,sha256=CpfWj5XFqBNKqu3jaWgP13_HfSg6maXiKDjmY6pdoUE,3192
|
|
26
|
+
cudf_polars/dsl/expressions/rolling.py,sha256=NiR8FfUwQMdmqFIrMM-hBcZ_JEKn9a1X9UZF4DPKM0E,23037
|
|
27
|
+
cudf_polars/dsl/expressions/selection.py,sha256=RfGj0RbKairCNibfQCUtwbFiS4xv9fRoznycEKxE3ww,2520
|
|
28
|
+
cudf_polars/dsl/expressions/slicing.py,sha256=xaI-XzZvPzyLDwG0yZcIPII56OMEJDxem2piV4LBGI0,1185
|
|
29
|
+
cudf_polars/dsl/expressions/sorting.py,sha256=6XO0JktGGUJujADXrZoSBeJGDk80vSOCzboB7jOlL5Q,2789
|
|
30
|
+
cudf_polars/dsl/expressions/string.py,sha256=LXUd0IYiblmlNHQ9zTpV5i0m770TY0k9vBcLyPI9c0k,38164
|
|
31
|
+
cudf_polars/dsl/expressions/struct.py,sha256=DC426pMVQrebvAEi9NCzKhzFxPFhUglvUlvOb866TLQ,4658
|
|
32
|
+
cudf_polars/dsl/expressions/ternary.py,sha256=J_85frSq5Hh2ERSXOIZlwiwFTEp9WASh2hPiCkbkbqM,1415
|
|
33
|
+
cudf_polars/dsl/expressions/unary.py,sha256=sW4t9pSbRMMUlD6wJ9Vq4khi4qhBelVfqmtj_gl1Oj8,20283
|
|
34
|
+
cudf_polars/dsl/utils/__init__.py,sha256=JL26nlMAbcdL8ZE4iXRrMOEVSTEZU1P5y9WvxTEDdnY,199
|
|
35
|
+
cudf_polars/dsl/utils/aggregations.py,sha256=Vozij-WaR8UwOq28WSEMNmigokduydXSknL0krKj-6Y,17522
|
|
36
|
+
cudf_polars/dsl/utils/groupby.py,sha256=PhkzM62N8b9qjJs8910IewnTbn_Qx2OiMPXgqMo1yDI,2621
|
|
37
|
+
cudf_polars/dsl/utils/naming.py,sha256=ydp_BYYAt3mG7JHfi9Snp3dDNzdQZD6F2sAMEmT4OYA,737
|
|
38
|
+
cudf_polars/dsl/utils/replace.py,sha256=8ns_TpbG1Hh8ZJejRyGA6KCu5t-TvUaM009AO8J98vc,1612
|
|
39
|
+
cudf_polars/dsl/utils/reshape.py,sha256=Q13_0tIjgtMocGRFciPa1GcMxc2ClqqZf1mujl7w1kw,2397
|
|
40
|
+
cudf_polars/dsl/utils/rolling.py,sha256=ioqNHIzEip9vd7XHHZvUmHL3RYPwOD6qYsPHUDmlhM8,3618
|
|
41
|
+
cudf_polars/dsl/utils/windows.py,sha256=ysRZfl9wm2z-QXTRO09tT5gy1vwvoV_8_8iBsE9FZeA,5388
|
|
42
|
+
cudf_polars/experimental/__init__.py,sha256=S2oI2K__woyPQAAlFMOo6RTMtdfIZxmzzAO92VtJgP4,256
|
|
43
|
+
cudf_polars/experimental/base.py,sha256=9_bEWrbizmR9n4I55oqlORrGxBwc4kaGtn4EXSo_uu0,11582
|
|
44
|
+
cudf_polars/experimental/dask_registers.py,sha256=bGU6nEh-rQd6lMPaEhJUdkVrkCFSjknb8IwB0EeMnrs,7780
|
|
45
|
+
cudf_polars/experimental/dispatch.py,sha256=i1Q0J5M9rLMi1lp_MxjGmvAfjKEGda0B3c5kvTtz3uM,3942
|
|
46
|
+
cudf_polars/experimental/distinct.py,sha256=ZyQ2SEVftdRAbtVOdJ89TvbK8uDPpam1FG6VKj86kAY,6978
|
|
47
|
+
cudf_polars/experimental/explain.py,sha256=-NPFj7jplbobu7jGTOnv8e9VDOs9BHuPxjilvZ19ryI,4803
|
|
48
|
+
cudf_polars/experimental/expressions.py,sha256=3NXUUepYLdqzrhaFuhu_Ya5nc89Dxhsmv0liIxAyQlQ,18412
|
|
49
|
+
cudf_polars/experimental/groupby.py,sha256=uoN759pB3yrvNRFwc_jq4bNtsTmDrtPjifMABhfutwY,11222
|
|
50
|
+
cudf_polars/experimental/io.py,sha256=RMH5dDrO3TsPK9QXkq5ibcHT_pRh64_m9RmIxiYtawE,31667
|
|
51
|
+
cudf_polars/experimental/join.py,sha256=bfxbAl34Ql-FcMYCbn8oYAW9bROhFpf3d8yIjEVtcIY,12790
|
|
52
|
+
cudf_polars/experimental/parallel.py,sha256=NsI_X-LKUNgpjoD9EFjWHXRiDvIy_onUJvzDCZAtr5U,12803
|
|
53
|
+
cudf_polars/experimental/repartition.py,sha256=o1qtstaB_dBaYjkmsmfnveZ6T66A9XGwzFEBUucfyrk,2124
|
|
54
|
+
cudf_polars/experimental/scheduler.py,sha256=ieL7bdxTqlmd8MO37JCaCoqhyDRZNTLnPFUme3hv6SQ,4203
|
|
55
|
+
cudf_polars/experimental/select.py,sha256=5r4zAa2iupN_VJDLQ-PKWLyfzcgeJkTpVI5HXlTtULs,6106
|
|
56
|
+
cudf_polars/experimental/shuffle.py,sha256=pfMxjVufnYf_1GXApBRSng-_O1Gl33NP0KQAZhKpu1E,11077
|
|
57
|
+
cudf_polars/experimental/sort.py,sha256=6EGk1rAzw58Jtd5CZ9pHvesK-yA1h-SmDnE8ZFUlGcw,20830
|
|
58
|
+
cudf_polars/experimental/spilling.py,sha256=OVpH9PHYNJcYL-PAB0CvoAil_nJW0VepLvcIrrAUdlc,4255
|
|
59
|
+
cudf_polars/experimental/statistics.py,sha256=GMurzuADNEyWaKh43BHTzb30inTKkdALJrNx3GfMa3o,29147
|
|
60
|
+
cudf_polars/experimental/utils.py,sha256=e17n8NojqVQ33UhKXkXS1MgFopyINgCFDjRSIMBV9Mw,5632
|
|
61
|
+
cudf_polars/experimental/benchmarks/__init__.py,sha256=XiT8hL6V9Ns_SSXDXkzoSWlXIo6aFLDXUHLLWch23Ok,149
|
|
62
|
+
cudf_polars/experimental/benchmarks/pdsds.py,sha256=e565fV2a6tzhKfQ4pCqTMoxJboDx3O8hV2_NHjkRaow,6889
|
|
63
|
+
cudf_polars/experimental/benchmarks/pdsh.py,sha256=2KTQvP4ordyo0qy1WDgagpHoYYRrQz1PkDHnhNpo-_s,31151
|
|
64
|
+
cudf_polars/experimental/benchmarks/utils.py,sha256=eC8mcqUEh1fu07SCIvr9ZVbiHkzt6aqBZBKWL8y7Wc4,28196
|
|
65
|
+
cudf_polars/experimental/benchmarks/pdsds_queries/__init__.py,sha256=pkjkRg2qJCMbhBpD9cIxcjsgMOZXXliWJPZIgZpcUQA,151
|
|
66
|
+
cudf_polars/experimental/benchmarks/pdsds_queries/q1.py,sha256=NTvgxMJUB9xH2llo6_SWO7JQNwxEoK9nQ-mnRCsYf9Y,3100
|
|
67
|
+
cudf_polars/experimental/benchmarks/pdsds_queries/q10.py,sha256=SBDDIf-BfoPTqHCi4jIpgLJXkA99UcZ-NhAPhE1D2hA,7797
|
|
68
|
+
cudf_polars/experimental/benchmarks/pdsds_queries/q2.py,sha256=hns2Hz1Eu8YBl4YDweINv__BME3HTt5A_TDppjXP0aw,9088
|
|
69
|
+
cudf_polars/experimental/benchmarks/pdsds_queries/q3.py,sha256=GhdN9WbYVv97aIc29i3w_tl1T7rtotwY5SZBhZK6gf4,2150
|
|
70
|
+
cudf_polars/experimental/benchmarks/pdsds_queries/q4.py,sha256=6E8lOk5lS_R4VeN81Dof1ijzKbMsQ3zuqinU0nQZVWg,13475
|
|
71
|
+
cudf_polars/experimental/benchmarks/pdsds_queries/q5.py,sha256=neKCu04M9SRDQHa6Nc51NbJ3gFY3yJkM05qE1YHUevU,19800
|
|
72
|
+
cudf_polars/experimental/benchmarks/pdsds_queries/q6.py,sha256=7zdkXHpnY3xwa8oj2c3dtPgW_pAmr4kfhATdFQiq9vk,3226
|
|
73
|
+
cudf_polars/experimental/benchmarks/pdsds_queries/q7.py,sha256=gWAuGaDQbdCn6CofbqPAah3X4uVBVYdsfIIqRpZppFc,2789
|
|
74
|
+
cudf_polars/experimental/benchmarks/pdsds_queries/q8.py,sha256=62qZn1RFhS3T-GL9bBY2cIR3kbVxgoYdY9n4en309EU,9546
|
|
75
|
+
cudf_polars/experimental/benchmarks/pdsds_queries/q9.py,sha256=_MyLgknIVUK1U1x12bsM9Lrhs-ZMKuSOwMc9yu0ddYY,4723
|
|
76
|
+
cudf_polars/testing/__init__.py,sha256=0MnlTjkTEqkSpL5GdMhQf4uXOaQwNrzgEJCZKa5FnL4,219
|
|
77
|
+
cudf_polars/testing/asserts.py,sha256=he6BcpNuPyHOBVNiXAAcr-7HdjKl7n0PQfCjdyPxWoA,15250
|
|
78
|
+
cudf_polars/testing/io.py,sha256=mNHsddu-c0Gs0SVVmBOYpXIDFjMiuVrmArllqIEu-2w,3832
|
|
79
|
+
cudf_polars/testing/plugin.py,sha256=aAut5jHckE2JhxBfVmtEBfGsfxcNtFMBph12pYSfpw0,23343
|
|
80
|
+
cudf_polars/typing/__init__.py,sha256=Q1iVabv-7etFo01UxT6cz-zgZa_9_WYA8D8QHnjZuTg,5022
|
|
81
|
+
cudf_polars/utils/__init__.py,sha256=urdV5MUIneU8Dn6pt1db5GkDG0oY4NsFD0Uhl3j98l8,195
|
|
82
|
+
cudf_polars/utils/config.py,sha256=LTS-gc90wWVFuDFOYqhZZ6mK68W8z6L0G4YVOZRGCIs,28814
|
|
83
|
+
cudf_polars/utils/conversion.py,sha256=k_apLbSR-MiYYlQBGrzYOInuvcbfSi-il-o9nkovdXQ,1042
|
|
84
|
+
cudf_polars/utils/dtypes.py,sha256=yktzqBLfbv-zve1-iS_XsGZD1R6GXgXV_grZ8m7KidM,3358
|
|
85
|
+
cudf_polars/utils/sorting.py,sha256=Mqb_KLsYnKU8p1dDan2mtlIQl65RqwM78OlUi-_Jj0k,1725
|
|
86
|
+
cudf_polars/utils/timer.py,sha256=KqcXqOcbovsj6KDCwaxl70baQXjuod43rABrpQkE78M,1005
|
|
87
|
+
cudf_polars/utils/versions.py,sha256=UxJc6S0Sss8hIUs2ZqviKH-2FXwEBmoRKQW3BZlpydY,959
|
|
88
|
+
cudf_polars_cu13-25.10.0.dist-info/licenses/LICENSE,sha256=4YCpjWCbYMkMQFW47JXsorZLOaP957HwmP6oHW2_ngM,11348
|
|
89
|
+
cudf_polars_cu13-25.10.0.dist-info/METADATA,sha256=CHrSC8yWGA3bG70IlulJshqH90qpQW6-OzlRW_sH83g,4889
|
|
90
|
+
cudf_polars_cu13-25.10.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
91
|
+
cudf_polars_cu13-25.10.0.dist-info/top_level.txt,sha256=w2bOa7MpuyapYgZh480Znh4UzX7rSWlFcYR1Yo6QIPs,12
|
|
92
|
+
cudf_polars_cu13-25.10.0.dist-info/RECORD,,
|