cudf-polars-cu13 25.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. cudf_polars/GIT_COMMIT +1 -0
  2. cudf_polars/VERSION +1 -0
  3. cudf_polars/__init__.py +28 -0
  4. cudf_polars/_version.py +21 -0
  5. cudf_polars/callback.py +318 -0
  6. cudf_polars/containers/__init__.py +13 -0
  7. cudf_polars/containers/column.py +495 -0
  8. cudf_polars/containers/dataframe.py +361 -0
  9. cudf_polars/containers/datatype.py +137 -0
  10. cudf_polars/dsl/__init__.py +8 -0
  11. cudf_polars/dsl/expr.py +66 -0
  12. cudf_polars/dsl/expressions/__init__.py +8 -0
  13. cudf_polars/dsl/expressions/aggregation.py +226 -0
  14. cudf_polars/dsl/expressions/base.py +272 -0
  15. cudf_polars/dsl/expressions/binaryop.py +120 -0
  16. cudf_polars/dsl/expressions/boolean.py +326 -0
  17. cudf_polars/dsl/expressions/datetime.py +271 -0
  18. cudf_polars/dsl/expressions/literal.py +97 -0
  19. cudf_polars/dsl/expressions/rolling.py +643 -0
  20. cudf_polars/dsl/expressions/selection.py +74 -0
  21. cudf_polars/dsl/expressions/slicing.py +46 -0
  22. cudf_polars/dsl/expressions/sorting.py +85 -0
  23. cudf_polars/dsl/expressions/string.py +1002 -0
  24. cudf_polars/dsl/expressions/struct.py +137 -0
  25. cudf_polars/dsl/expressions/ternary.py +49 -0
  26. cudf_polars/dsl/expressions/unary.py +517 -0
  27. cudf_polars/dsl/ir.py +2607 -0
  28. cudf_polars/dsl/nodebase.py +164 -0
  29. cudf_polars/dsl/to_ast.py +359 -0
  30. cudf_polars/dsl/tracing.py +16 -0
  31. cudf_polars/dsl/translate.py +939 -0
  32. cudf_polars/dsl/traversal.py +224 -0
  33. cudf_polars/dsl/utils/__init__.py +8 -0
  34. cudf_polars/dsl/utils/aggregations.py +481 -0
  35. cudf_polars/dsl/utils/groupby.py +98 -0
  36. cudf_polars/dsl/utils/naming.py +34 -0
  37. cudf_polars/dsl/utils/replace.py +61 -0
  38. cudf_polars/dsl/utils/reshape.py +74 -0
  39. cudf_polars/dsl/utils/rolling.py +121 -0
  40. cudf_polars/dsl/utils/windows.py +192 -0
  41. cudf_polars/experimental/__init__.py +8 -0
  42. cudf_polars/experimental/base.py +386 -0
  43. cudf_polars/experimental/benchmarks/__init__.py +4 -0
  44. cudf_polars/experimental/benchmarks/pdsds.py +220 -0
  45. cudf_polars/experimental/benchmarks/pdsds_queries/__init__.py +4 -0
  46. cudf_polars/experimental/benchmarks/pdsds_queries/q1.py +88 -0
  47. cudf_polars/experimental/benchmarks/pdsds_queries/q10.py +225 -0
  48. cudf_polars/experimental/benchmarks/pdsds_queries/q2.py +244 -0
  49. cudf_polars/experimental/benchmarks/pdsds_queries/q3.py +65 -0
  50. cudf_polars/experimental/benchmarks/pdsds_queries/q4.py +359 -0
  51. cudf_polars/experimental/benchmarks/pdsds_queries/q5.py +462 -0
  52. cudf_polars/experimental/benchmarks/pdsds_queries/q6.py +92 -0
  53. cudf_polars/experimental/benchmarks/pdsds_queries/q7.py +79 -0
  54. cudf_polars/experimental/benchmarks/pdsds_queries/q8.py +524 -0
  55. cudf_polars/experimental/benchmarks/pdsds_queries/q9.py +137 -0
  56. cudf_polars/experimental/benchmarks/pdsh.py +814 -0
  57. cudf_polars/experimental/benchmarks/utils.py +832 -0
  58. cudf_polars/experimental/dask_registers.py +200 -0
  59. cudf_polars/experimental/dispatch.py +156 -0
  60. cudf_polars/experimental/distinct.py +197 -0
  61. cudf_polars/experimental/explain.py +157 -0
  62. cudf_polars/experimental/expressions.py +590 -0
  63. cudf_polars/experimental/groupby.py +327 -0
  64. cudf_polars/experimental/io.py +943 -0
  65. cudf_polars/experimental/join.py +391 -0
  66. cudf_polars/experimental/parallel.py +423 -0
  67. cudf_polars/experimental/repartition.py +69 -0
  68. cudf_polars/experimental/scheduler.py +155 -0
  69. cudf_polars/experimental/select.py +188 -0
  70. cudf_polars/experimental/shuffle.py +354 -0
  71. cudf_polars/experimental/sort.py +609 -0
  72. cudf_polars/experimental/spilling.py +151 -0
  73. cudf_polars/experimental/statistics.py +795 -0
  74. cudf_polars/experimental/utils.py +169 -0
  75. cudf_polars/py.typed +0 -0
  76. cudf_polars/testing/__init__.py +8 -0
  77. cudf_polars/testing/asserts.py +448 -0
  78. cudf_polars/testing/io.py +122 -0
  79. cudf_polars/testing/plugin.py +236 -0
  80. cudf_polars/typing/__init__.py +219 -0
  81. cudf_polars/utils/__init__.py +8 -0
  82. cudf_polars/utils/config.py +741 -0
  83. cudf_polars/utils/conversion.py +40 -0
  84. cudf_polars/utils/dtypes.py +118 -0
  85. cudf_polars/utils/sorting.py +53 -0
  86. cudf_polars/utils/timer.py +39 -0
  87. cudf_polars/utils/versions.py +27 -0
  88. cudf_polars_cu13-25.10.0.dist-info/METADATA +136 -0
  89. cudf_polars_cu13-25.10.0.dist-info/RECORD +92 -0
  90. cudf_polars_cu13-25.10.0.dist-info/WHEEL +5 -0
  91. cudf_polars_cu13-25.10.0.dist-info/licenses/LICENSE +201 -0
  92. cudf_polars_cu13-25.10.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,151 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Spilling in multi-partition Dask execution using RAPIDSMPF."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import TYPE_CHECKING, Any
8
+
9
+ from dask.sizeof import sizeof
10
+ from distributed import get_worker
11
+ from rapidsmpf.buffer.buffer import MemoryType
12
+ from rapidsmpf.integrations.dask.core import get_worker_context
13
+ from rapidsmpf.integrations.dask.spilling import SpillableWrapper
14
+
15
+ from cudf_polars.containers import DataFrame
16
+
17
+ if TYPE_CHECKING:
18
+ from collections.abc import Callable, MutableMapping
19
+ from typing import Any
20
+
21
+ from cudf_polars.utils.config import ConfigOptions
22
+
23
+
24
+ def wrap_arg(obj: Any) -> Any:
25
+ """
26
+ Make `obj` spillable if it is a DataFrame.
27
+
28
+ Parameters
29
+ ----------
30
+ obj
31
+ The object to be wrapped (if it is a DataFrame).
32
+
33
+ Returns
34
+ -------
35
+ A SpillableWrapper if obj is a DataFrame, otherwise the original object.
36
+ """
37
+ if isinstance(obj, DataFrame):
38
+ return SpillableWrapper(on_device=obj)
39
+ return obj
40
+
41
+
42
+ def unwrap_arg(obj: Any) -> Any:
43
+ """
44
+ Unwraps a SpillableWrapper to retrieve the original object.
45
+
46
+ Parameters
47
+ ----------
48
+ obj
49
+ The object to be unwrapped.
50
+
51
+ Returns
52
+ -------
53
+ The unwrapped obj is a SpillableWrapper, otherwise the original object.
54
+ """
55
+ if isinstance(obj, SpillableWrapper):
56
+ return obj.unspill()
57
+ return obj
58
+
59
+
60
+ def wrap_func_spillable(
61
+ func: Callable,
62
+ *,
63
+ make_func_output_spillable: bool,
64
+ target_partition_size: int,
65
+ ) -> Callable:
66
+ """
67
+ Wraps a function to handle spillable DataFrames.
68
+
69
+ Parameters
70
+ ----------
71
+ func
72
+ The function to be wrapped.
73
+ make_func_output_spillable
74
+ Whether to wrap the function's output in a SpillableWrapper.
75
+ target_partition_size
76
+ Target byte size for IO tasks.
77
+
78
+ Returns
79
+ -------
80
+ A wrapped function that processes spillable DataFrames.
81
+ """
82
+
83
+ def wrapper(*args: Any) -> Any:
84
+ # Make headroom before executing the task
85
+ headroom = 0
86
+ probable_io_task = True
87
+ for arg in args:
88
+ if isinstance(arg, SpillableWrapper):
89
+ if arg.mem_type() == MemoryType.HOST:
90
+ headroom += sizeof(arg._on_host)
91
+ probable_io_task = False
92
+ if probable_io_task:
93
+ # Likely an IO task - Assume we need target_partition_size
94
+ headroom = target_partition_size
95
+ if headroom > 128_000_000: # Don't waste time on smaller data
96
+ ctx = get_worker_context(get_worker())
97
+ with ctx.lock:
98
+ ctx.br.spill_manager.spill_to_make_headroom(headroom=headroom)
99
+
100
+ ret: Any = func(*(unwrap_arg(arg) for arg in args))
101
+ if make_func_output_spillable:
102
+ ret = wrap_arg(ret)
103
+ return ret
104
+
105
+ return wrapper
106
+
107
+
108
+ def wrap_dataframe_in_spillable(
109
+ graph: MutableMapping[Any, Any],
110
+ ignore_key: str | tuple[str, int],
111
+ config_options: ConfigOptions,
112
+ ) -> MutableMapping[Any, Any]:
113
+ """
114
+ Wraps functions within a task graph to handle spillable DataFrames.
115
+
116
+ Only supports flat task graphs where each DataFrame can be found in the
117
+ outermost level. Currently, this is true for all cudf-polars task graphs.
118
+
119
+ Parameters
120
+ ----------
121
+ graph
122
+ Task graph.
123
+ ignore_key
124
+ The key to ignore when wrapping function, typically the key of the
125
+ output node.
126
+ config_options
127
+ GPUEngine configuration options.
128
+
129
+ Returns
130
+ -------
131
+ A new task graph with wrapped functions.
132
+ """
133
+ assert config_options.executor.name == "streaming", (
134
+ "'in-memory' executor not supported in 'wrap_dataframe_in_spillable'"
135
+ )
136
+ target_partition_size = config_options.executor.target_partition_size
137
+
138
+ ret = {}
139
+ for key, task in graph.items():
140
+ assert isinstance(task, tuple)
141
+ ret[key] = tuple(
142
+ wrap_func_spillable(
143
+ a,
144
+ make_func_output_spillable=key != ignore_key,
145
+ target_partition_size=target_partition_size,
146
+ )
147
+ if callable(a)
148
+ else a
149
+ for a in task
150
+ )
151
+ return ret