cudf-polars-cu13 25.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. cudf_polars/GIT_COMMIT +1 -0
  2. cudf_polars/VERSION +1 -0
  3. cudf_polars/__init__.py +28 -0
  4. cudf_polars/_version.py +21 -0
  5. cudf_polars/callback.py +318 -0
  6. cudf_polars/containers/__init__.py +13 -0
  7. cudf_polars/containers/column.py +495 -0
  8. cudf_polars/containers/dataframe.py +361 -0
  9. cudf_polars/containers/datatype.py +137 -0
  10. cudf_polars/dsl/__init__.py +8 -0
  11. cudf_polars/dsl/expr.py +66 -0
  12. cudf_polars/dsl/expressions/__init__.py +8 -0
  13. cudf_polars/dsl/expressions/aggregation.py +226 -0
  14. cudf_polars/dsl/expressions/base.py +272 -0
  15. cudf_polars/dsl/expressions/binaryop.py +120 -0
  16. cudf_polars/dsl/expressions/boolean.py +326 -0
  17. cudf_polars/dsl/expressions/datetime.py +271 -0
  18. cudf_polars/dsl/expressions/literal.py +97 -0
  19. cudf_polars/dsl/expressions/rolling.py +643 -0
  20. cudf_polars/dsl/expressions/selection.py +74 -0
  21. cudf_polars/dsl/expressions/slicing.py +46 -0
  22. cudf_polars/dsl/expressions/sorting.py +85 -0
  23. cudf_polars/dsl/expressions/string.py +1002 -0
  24. cudf_polars/dsl/expressions/struct.py +137 -0
  25. cudf_polars/dsl/expressions/ternary.py +49 -0
  26. cudf_polars/dsl/expressions/unary.py +517 -0
  27. cudf_polars/dsl/ir.py +2607 -0
  28. cudf_polars/dsl/nodebase.py +164 -0
  29. cudf_polars/dsl/to_ast.py +359 -0
  30. cudf_polars/dsl/tracing.py +16 -0
  31. cudf_polars/dsl/translate.py +939 -0
  32. cudf_polars/dsl/traversal.py +224 -0
  33. cudf_polars/dsl/utils/__init__.py +8 -0
  34. cudf_polars/dsl/utils/aggregations.py +481 -0
  35. cudf_polars/dsl/utils/groupby.py +98 -0
  36. cudf_polars/dsl/utils/naming.py +34 -0
  37. cudf_polars/dsl/utils/replace.py +61 -0
  38. cudf_polars/dsl/utils/reshape.py +74 -0
  39. cudf_polars/dsl/utils/rolling.py +121 -0
  40. cudf_polars/dsl/utils/windows.py +192 -0
  41. cudf_polars/experimental/__init__.py +8 -0
  42. cudf_polars/experimental/base.py +386 -0
  43. cudf_polars/experimental/benchmarks/__init__.py +4 -0
  44. cudf_polars/experimental/benchmarks/pdsds.py +220 -0
  45. cudf_polars/experimental/benchmarks/pdsds_queries/__init__.py +4 -0
  46. cudf_polars/experimental/benchmarks/pdsds_queries/q1.py +88 -0
  47. cudf_polars/experimental/benchmarks/pdsds_queries/q10.py +225 -0
  48. cudf_polars/experimental/benchmarks/pdsds_queries/q2.py +244 -0
  49. cudf_polars/experimental/benchmarks/pdsds_queries/q3.py +65 -0
  50. cudf_polars/experimental/benchmarks/pdsds_queries/q4.py +359 -0
  51. cudf_polars/experimental/benchmarks/pdsds_queries/q5.py +462 -0
  52. cudf_polars/experimental/benchmarks/pdsds_queries/q6.py +92 -0
  53. cudf_polars/experimental/benchmarks/pdsds_queries/q7.py +79 -0
  54. cudf_polars/experimental/benchmarks/pdsds_queries/q8.py +524 -0
  55. cudf_polars/experimental/benchmarks/pdsds_queries/q9.py +137 -0
  56. cudf_polars/experimental/benchmarks/pdsh.py +814 -0
  57. cudf_polars/experimental/benchmarks/utils.py +832 -0
  58. cudf_polars/experimental/dask_registers.py +200 -0
  59. cudf_polars/experimental/dispatch.py +156 -0
  60. cudf_polars/experimental/distinct.py +197 -0
  61. cudf_polars/experimental/explain.py +157 -0
  62. cudf_polars/experimental/expressions.py +590 -0
  63. cudf_polars/experimental/groupby.py +327 -0
  64. cudf_polars/experimental/io.py +943 -0
  65. cudf_polars/experimental/join.py +391 -0
  66. cudf_polars/experimental/parallel.py +423 -0
  67. cudf_polars/experimental/repartition.py +69 -0
  68. cudf_polars/experimental/scheduler.py +155 -0
  69. cudf_polars/experimental/select.py +188 -0
  70. cudf_polars/experimental/shuffle.py +354 -0
  71. cudf_polars/experimental/sort.py +609 -0
  72. cudf_polars/experimental/spilling.py +151 -0
  73. cudf_polars/experimental/statistics.py +795 -0
  74. cudf_polars/experimental/utils.py +169 -0
  75. cudf_polars/py.typed +0 -0
  76. cudf_polars/testing/__init__.py +8 -0
  77. cudf_polars/testing/asserts.py +448 -0
  78. cudf_polars/testing/io.py +122 -0
  79. cudf_polars/testing/plugin.py +236 -0
  80. cudf_polars/typing/__init__.py +219 -0
  81. cudf_polars/utils/__init__.py +8 -0
  82. cudf_polars/utils/config.py +741 -0
  83. cudf_polars/utils/conversion.py +40 -0
  84. cudf_polars/utils/dtypes.py +118 -0
  85. cudf_polars/utils/sorting.py +53 -0
  86. cudf_polars/utils/timer.py +39 -0
  87. cudf_polars/utils/versions.py +27 -0
  88. cudf_polars_cu13-25.10.0.dist-info/METADATA +136 -0
  89. cudf_polars_cu13-25.10.0.dist-info/RECORD +92 -0
  90. cudf_polars_cu13-25.10.0.dist-info/WHEEL +5 -0
  91. cudf_polars_cu13-25.10.0.dist-info/licenses/LICENSE +201 -0
  92. cudf_polars_cu13-25.10.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,74 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ # TODO: remove need for this
4
+ # ruff: noqa: D101
5
+ """DSL nodes for selection operations."""
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import TYPE_CHECKING
10
+
11
+ import pylibcudf as plc
12
+
13
+ from cudf_polars.containers import Column
14
+ from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
15
+
16
+ if TYPE_CHECKING:
17
+ from cudf_polars.containers import DataFrame, DataType
18
+
19
+ __all__ = ["Filter", "Gather"]
20
+
21
+
22
+ class Gather(Expr):
23
+ __slots__ = ()
24
+ _non_child = ("dtype",)
25
+
26
+ def __init__(self, dtype: DataType, values: Expr, indices: Expr) -> None:
27
+ self.dtype = dtype
28
+ self.children = (values, indices)
29
+ self.is_pointwise = False
30
+
31
+ def do_evaluate(
32
+ self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
33
+ ) -> Column:
34
+ """Evaluate this expression given a dataframe for context."""
35
+ values, indices = (
36
+ child.evaluate(df, context=context) for child in self.children
37
+ )
38
+ lo, hi = plc.reduce.minmax(indices.obj)
39
+ lo = lo.to_py()
40
+ hi = hi.to_py()
41
+ n = df.num_rows
42
+ if hi >= n or lo < -n:
43
+ raise ValueError("gather indices are out of bounds")
44
+ if indices.null_count:
45
+ bounds_policy = plc.copying.OutOfBoundsPolicy.NULLIFY
46
+ obj = plc.replace.replace_nulls(
47
+ indices.obj,
48
+ plc.Scalar.from_py(n, dtype=indices.obj.type()),
49
+ )
50
+ else:
51
+ bounds_policy = plc.copying.OutOfBoundsPolicy.DONT_CHECK
52
+ obj = indices.obj
53
+ table = plc.copying.gather(plc.Table([values.obj]), obj, bounds_policy)
54
+ return Column(table.columns()[0], dtype=self.dtype)
55
+
56
+
57
+ class Filter(Expr):
58
+ __slots__ = ()
59
+ _non_child = ("dtype",)
60
+
61
+ def __init__(self, dtype: DataType, values: Expr, indices: Expr):
62
+ self.dtype = dtype
63
+ self.children = (values, indices)
64
+ self.is_pointwise = False
65
+
66
+ def do_evaluate(
67
+ self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
68
+ ) -> Column:
69
+ """Evaluate this expression given a dataframe for context."""
70
+ values, mask = (child.evaluate(df, context=context) for child in self.children)
71
+ table = plc.stream_compaction.apply_boolean_mask(
72
+ plc.Table([values.obj]), mask.obj
73
+ )
74
+ return Column(table.columns()[0], dtype=self.dtype).sorted_like(values)
@@ -0,0 +1,46 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ # TODO: remove need for this
4
+ # ruff: noqa: D101
5
+ """Slicing DSL nodes."""
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import TYPE_CHECKING
10
+
11
+ from cudf_polars.dsl.expressions.base import (
12
+ ExecutionContext,
13
+ Expr,
14
+ )
15
+
16
+ if TYPE_CHECKING:
17
+ from cudf_polars.containers import Column, DataFrame, DataType
18
+
19
+
20
+ __all__ = ["Slice"]
21
+
22
+
23
+ class Slice(Expr):
24
+ __slots__ = ("length", "offset")
25
+ _non_child = ("dtype", "offset", "length")
26
+
27
+ def __init__(
28
+ self,
29
+ dtype: DataType,
30
+ offset: int,
31
+ length: int | None,
32
+ column: Expr,
33
+ ) -> None:
34
+ self.dtype = dtype
35
+ self.offset = offset
36
+ self.length = length
37
+ self.children = (column,)
38
+ self.is_pointwise = False
39
+
40
+ def do_evaluate(
41
+ self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
42
+ ) -> Column:
43
+ """Evaluate this expression given a dataframe for context."""
44
+ (child,) = self.children
45
+ column = child.evaluate(df, context=context)
46
+ return column.slice((self.offset, self.length))
@@ -0,0 +1,85 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ # TODO: remove need for this
4
+ # ruff: noqa: D101
5
+ """Sorting DSL nodes."""
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import TYPE_CHECKING
10
+
11
+ import pylibcudf as plc
12
+
13
+ from cudf_polars.containers import Column
14
+ from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
15
+ from cudf_polars.utils import sorting
16
+
17
+ if TYPE_CHECKING:
18
+ from cudf_polars.containers import DataFrame, DataType
19
+
20
+ __all__ = ["Sort", "SortBy"]
21
+
22
+
23
+ class Sort(Expr):
24
+ __slots__ = ("options",)
25
+ _non_child = ("dtype", "options")
26
+
27
+ def __init__(
28
+ self, dtype: DataType, options: tuple[bool, bool, bool], column: Expr
29
+ ) -> None:
30
+ self.dtype = dtype
31
+ self.options = options
32
+ self.children = (column,)
33
+ self.is_pointwise = False
34
+
35
+ def do_evaluate(
36
+ self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
37
+ ) -> Column:
38
+ """Evaluate this expression given a dataframe for context."""
39
+ (child,) = self.children
40
+ column = child.evaluate(df, context=context)
41
+ (stable, nulls_last, descending) = self.options
42
+ order, null_order = sorting.sort_order(
43
+ [descending], nulls_last=[nulls_last], num_keys=1
44
+ )
45
+ do_sort = plc.sorting.stable_sort if stable else plc.sorting.sort
46
+ table = do_sort(plc.Table([column.obj]), order, null_order)
47
+ return Column(
48
+ table.columns()[0],
49
+ is_sorted=plc.types.Sorted.YES,
50
+ order=order[0],
51
+ null_order=null_order[0],
52
+ dtype=self.dtype,
53
+ )
54
+
55
+
56
+ class SortBy(Expr):
57
+ __slots__ = ("options",)
58
+ _non_child = ("dtype", "options")
59
+
60
+ def __init__(
61
+ self,
62
+ dtype: DataType,
63
+ options: tuple[bool, tuple[bool], tuple[bool]],
64
+ column: Expr,
65
+ *by: Expr,
66
+ ) -> None:
67
+ self.dtype = dtype
68
+ self.options = options
69
+ self.children = (column, *by)
70
+ self.is_pointwise = False
71
+
72
+ def do_evaluate(
73
+ self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
74
+ ) -> Column:
75
+ """Evaluate this expression given a dataframe for context."""
76
+ column, *by = (child.evaluate(df, context=context) for child in self.children)
77
+ (stable, nulls_last, descending) = self.options
78
+ order, null_order = sorting.sort_order(
79
+ descending, nulls_last=nulls_last, num_keys=len(by)
80
+ )
81
+ do_sort = plc.sorting.stable_sort_by_key if stable else plc.sorting.sort_by_key
82
+ table = do_sort(
83
+ plc.Table([column.obj]), plc.Table([c.obj for c in by]), order, null_order
84
+ )
85
+ return Column(table.columns()[0], dtype=self.dtype)