cudf-polars-cu13 25.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. cudf_polars/GIT_COMMIT +1 -0
  2. cudf_polars/VERSION +1 -0
  3. cudf_polars/__init__.py +28 -0
  4. cudf_polars/_version.py +21 -0
  5. cudf_polars/callback.py +318 -0
  6. cudf_polars/containers/__init__.py +13 -0
  7. cudf_polars/containers/column.py +495 -0
  8. cudf_polars/containers/dataframe.py +361 -0
  9. cudf_polars/containers/datatype.py +137 -0
  10. cudf_polars/dsl/__init__.py +8 -0
  11. cudf_polars/dsl/expr.py +66 -0
  12. cudf_polars/dsl/expressions/__init__.py +8 -0
  13. cudf_polars/dsl/expressions/aggregation.py +226 -0
  14. cudf_polars/dsl/expressions/base.py +272 -0
  15. cudf_polars/dsl/expressions/binaryop.py +120 -0
  16. cudf_polars/dsl/expressions/boolean.py +326 -0
  17. cudf_polars/dsl/expressions/datetime.py +271 -0
  18. cudf_polars/dsl/expressions/literal.py +97 -0
  19. cudf_polars/dsl/expressions/rolling.py +643 -0
  20. cudf_polars/dsl/expressions/selection.py +74 -0
  21. cudf_polars/dsl/expressions/slicing.py +46 -0
  22. cudf_polars/dsl/expressions/sorting.py +85 -0
  23. cudf_polars/dsl/expressions/string.py +1002 -0
  24. cudf_polars/dsl/expressions/struct.py +137 -0
  25. cudf_polars/dsl/expressions/ternary.py +49 -0
  26. cudf_polars/dsl/expressions/unary.py +517 -0
  27. cudf_polars/dsl/ir.py +2607 -0
  28. cudf_polars/dsl/nodebase.py +164 -0
  29. cudf_polars/dsl/to_ast.py +359 -0
  30. cudf_polars/dsl/tracing.py +16 -0
  31. cudf_polars/dsl/translate.py +939 -0
  32. cudf_polars/dsl/traversal.py +224 -0
  33. cudf_polars/dsl/utils/__init__.py +8 -0
  34. cudf_polars/dsl/utils/aggregations.py +481 -0
  35. cudf_polars/dsl/utils/groupby.py +98 -0
  36. cudf_polars/dsl/utils/naming.py +34 -0
  37. cudf_polars/dsl/utils/replace.py +61 -0
  38. cudf_polars/dsl/utils/reshape.py +74 -0
  39. cudf_polars/dsl/utils/rolling.py +121 -0
  40. cudf_polars/dsl/utils/windows.py +192 -0
  41. cudf_polars/experimental/__init__.py +8 -0
  42. cudf_polars/experimental/base.py +386 -0
  43. cudf_polars/experimental/benchmarks/__init__.py +4 -0
  44. cudf_polars/experimental/benchmarks/pdsds.py +220 -0
  45. cudf_polars/experimental/benchmarks/pdsds_queries/__init__.py +4 -0
  46. cudf_polars/experimental/benchmarks/pdsds_queries/q1.py +88 -0
  47. cudf_polars/experimental/benchmarks/pdsds_queries/q10.py +225 -0
  48. cudf_polars/experimental/benchmarks/pdsds_queries/q2.py +244 -0
  49. cudf_polars/experimental/benchmarks/pdsds_queries/q3.py +65 -0
  50. cudf_polars/experimental/benchmarks/pdsds_queries/q4.py +359 -0
  51. cudf_polars/experimental/benchmarks/pdsds_queries/q5.py +462 -0
  52. cudf_polars/experimental/benchmarks/pdsds_queries/q6.py +92 -0
  53. cudf_polars/experimental/benchmarks/pdsds_queries/q7.py +79 -0
  54. cudf_polars/experimental/benchmarks/pdsds_queries/q8.py +524 -0
  55. cudf_polars/experimental/benchmarks/pdsds_queries/q9.py +137 -0
  56. cudf_polars/experimental/benchmarks/pdsh.py +814 -0
  57. cudf_polars/experimental/benchmarks/utils.py +832 -0
  58. cudf_polars/experimental/dask_registers.py +200 -0
  59. cudf_polars/experimental/dispatch.py +156 -0
  60. cudf_polars/experimental/distinct.py +197 -0
  61. cudf_polars/experimental/explain.py +157 -0
  62. cudf_polars/experimental/expressions.py +590 -0
  63. cudf_polars/experimental/groupby.py +327 -0
  64. cudf_polars/experimental/io.py +943 -0
  65. cudf_polars/experimental/join.py +391 -0
  66. cudf_polars/experimental/parallel.py +423 -0
  67. cudf_polars/experimental/repartition.py +69 -0
  68. cudf_polars/experimental/scheduler.py +155 -0
  69. cudf_polars/experimental/select.py +188 -0
  70. cudf_polars/experimental/shuffle.py +354 -0
  71. cudf_polars/experimental/sort.py +609 -0
  72. cudf_polars/experimental/spilling.py +151 -0
  73. cudf_polars/experimental/statistics.py +795 -0
  74. cudf_polars/experimental/utils.py +169 -0
  75. cudf_polars/py.typed +0 -0
  76. cudf_polars/testing/__init__.py +8 -0
  77. cudf_polars/testing/asserts.py +448 -0
  78. cudf_polars/testing/io.py +122 -0
  79. cudf_polars/testing/plugin.py +236 -0
  80. cudf_polars/typing/__init__.py +219 -0
  81. cudf_polars/utils/__init__.py +8 -0
  82. cudf_polars/utils/config.py +741 -0
  83. cudf_polars/utils/conversion.py +40 -0
  84. cudf_polars/utils/dtypes.py +118 -0
  85. cudf_polars/utils/sorting.py +53 -0
  86. cudf_polars/utils/timer.py +39 -0
  87. cudf_polars/utils/versions.py +27 -0
  88. cudf_polars_cu13-25.10.0.dist-info/METADATA +136 -0
  89. cudf_polars_cu13-25.10.0.dist-info/RECORD +92 -0
  90. cudf_polars_cu13-25.10.0.dist-info/WHEEL +5 -0
  91. cudf_polars_cu13-25.10.0.dist-info/licenses/LICENSE +201 -0
  92. cudf_polars_cu13-25.10.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,361 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """A dataframe, with some properties."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from functools import cached_property
9
+ from typing import TYPE_CHECKING, cast
10
+
11
+ import polars as pl
12
+
13
+ import pylibcudf as plc
14
+
15
+ from cudf_polars.containers import Column, DataType
16
+ from cudf_polars.utils import conversion
17
+
18
+ if TYPE_CHECKING:
19
+ from collections.abc import Iterable, Mapping, Sequence, Set
20
+
21
+ from typing_extensions import Any, CapsuleType, Self
22
+
23
+ from cudf_polars.typing import ColumnOptions, DataFrameHeader, PolarsDataType, Slice
24
+
25
+
26
+ __all__: list[str] = ["DataFrame"]
27
+
28
+
29
+ def _create_polars_column_metadata(
30
+ name: str, dtype: PolarsDataType
31
+ ) -> plc.interop.ColumnMetadata:
32
+ """Create ColumnMetadata preserving dtype attributes not supported by libcudf."""
33
+ children_meta = []
34
+ timezone = ""
35
+ precision: int | None = None
36
+
37
+ if isinstance(dtype, pl.Struct):
38
+ children_meta = [
39
+ _create_polars_column_metadata(field.name, field.dtype)
40
+ for field in dtype.fields
41
+ ]
42
+ elif isinstance(dtype, pl.Datetime):
43
+ timezone = dtype.time_zone or timezone
44
+ elif isinstance(dtype, pl.Decimal):
45
+ precision = dtype.precision
46
+
47
+ return plc.interop.ColumnMetadata(
48
+ name=name,
49
+ timezone=timezone,
50
+ precision=precision,
51
+ children_meta=children_meta,
52
+ )
53
+
54
+
55
+ # This is also defined in pylibcudf.interop
56
+ class _ObjectWithArrowMetadata:
57
+ def __init__(
58
+ self, obj: plc.Table | plc.Column, metadata: list[plc.interop.ColumnMetadata]
59
+ ) -> None:
60
+ self.obj = obj
61
+ self.metadata = metadata
62
+
63
+ def __arrow_c_array__(
64
+ self, requested_schema: None = None
65
+ ) -> tuple[CapsuleType, CapsuleType]:
66
+ return self.obj._to_schema(self.metadata), self.obj._to_host_array()
67
+
68
+
69
+ # Pacify the type checker. DataFrame init asserts that all the columns
70
+ # have a string name, so let's narrow the type.
71
+ class NamedColumn(Column):
72
+ name: str
73
+
74
+
75
+ class DataFrame:
76
+ """A representation of a dataframe."""
77
+
78
+ column_map: dict[str, Column]
79
+ table: plc.Table
80
+ columns: list[NamedColumn]
81
+
82
+ def __init__(self, columns: Iterable[Column]) -> None:
83
+ columns = list(columns)
84
+ if any(c.name is None for c in columns):
85
+ raise ValueError("All columns must have a name")
86
+ self.columns = [cast(NamedColumn, c) for c in columns]
87
+ self.dtypes = [c.dtype for c in self.columns]
88
+ self.column_map = {c.name: c for c in self.columns}
89
+ self.table = plc.Table([c.obj for c in self.columns])
90
+
91
+ def copy(self) -> Self:
92
+ """Return a shallow copy of self."""
93
+ return type(self)(c.copy() for c in self.columns)
94
+
95
+ def to_polars(self) -> pl.DataFrame:
96
+ """Convert to a polars DataFrame."""
97
+ # If the arrow table has empty names, from_arrow produces
98
+ # column_$i. But here we know there is only one such column
99
+ # (by construction) and it should have an empty name.
100
+ # https://github.com/pola-rs/polars/issues/11632
101
+ # To guarantee we produce correct names, we therefore
102
+ # serialise with names we control and rename with that map.
103
+ name_map = {f"column_{i}": name for i, name in enumerate(self.column_map)}
104
+ metadata = [
105
+ _create_polars_column_metadata(name, dtype.polars)
106
+ for name, dtype in zip(name_map, self.dtypes, strict=True)
107
+ ]
108
+ table_with_metadata = _ObjectWithArrowMetadata(self.table, metadata)
109
+ df = pl.DataFrame(table_with_metadata)
110
+ return df.rename(name_map).with_columns(
111
+ pl.col(c.name).set_sorted(descending=c.order == plc.types.Order.DESCENDING)
112
+ if c.is_sorted
113
+ else pl.col(c.name)
114
+ for c in self.columns
115
+ )
116
+
117
+ @cached_property
118
+ def column_names_set(self) -> frozenset[str]:
119
+ """Return the column names as a set."""
120
+ return frozenset(self.column_map)
121
+
122
+ @cached_property
123
+ def column_names(self) -> list[str]:
124
+ """Return a list of the column names."""
125
+ return list(self.column_map)
126
+
127
+ @cached_property
128
+ def num_columns(self) -> int:
129
+ """Number of columns."""
130
+ return len(self.column_map)
131
+
132
+ @cached_property
133
+ def num_rows(self) -> int:
134
+ """Number of rows."""
135
+ return self.table.num_rows() if self.column_map else 0
136
+
137
+ @classmethod
138
+ def from_polars(cls, df: pl.DataFrame) -> Self:
139
+ """
140
+ Create from a polars dataframe.
141
+
142
+ Parameters
143
+ ----------
144
+ df
145
+ Polars dataframe to convert
146
+
147
+ Returns
148
+ -------
149
+ New dataframe representing the input.
150
+ """
151
+ plc_table = plc.Table.from_arrow(df)
152
+ return cls(
153
+ Column(d_col, name=name, dtype=DataType(h_col.dtype)).copy_metadata(h_col)
154
+ for d_col, h_col, name in zip(
155
+ plc_table.columns(), df.iter_columns(), df.columns, strict=True
156
+ )
157
+ )
158
+
159
+ @classmethod
160
+ def from_table(
161
+ cls, table: plc.Table, names: Sequence[str], dtypes: Sequence[DataType]
162
+ ) -> Self:
163
+ """
164
+ Create from a pylibcudf table.
165
+
166
+ Parameters
167
+ ----------
168
+ table
169
+ Pylibcudf table to obtain columns from
170
+ names
171
+ Names for the columns
172
+ dtypes
173
+ Dtypes for the columns
174
+
175
+ Returns
176
+ -------
177
+ New dataframe sharing data with the input table.
178
+
179
+ Raises
180
+ ------
181
+ ValueError
182
+ If the number of provided names does not match the
183
+ number of columns in the table.
184
+ """
185
+ if table.num_columns() != len(names):
186
+ raise ValueError("Mismatching name and table length.")
187
+ return cls(
188
+ Column(c, name=name, dtype=dtype)
189
+ for c, name, dtype in zip(table.columns(), names, dtypes, strict=True)
190
+ )
191
+
192
+ @classmethod
193
+ def deserialize(
194
+ cls, header: DataFrameHeader, frames: tuple[memoryview, plc.gpumemoryview]
195
+ ) -> Self:
196
+ """
197
+ Create a DataFrame from a serialized representation returned by `.serialize()`.
198
+
199
+ Parameters
200
+ ----------
201
+ header
202
+ The (unpickled) metadata required to reconstruct the object.
203
+ frames
204
+ Two-tuple of frames (a memoryview and a gpumemoryview).
205
+
206
+ Returns
207
+ -------
208
+ DataFrame
209
+ The deserialized DataFrame.
210
+ """
211
+ packed_metadata, packed_gpu_data = frames
212
+ table = plc.contiguous_split.unpack_from_memoryviews(
213
+ packed_metadata, packed_gpu_data
214
+ )
215
+ return cls(
216
+ Column(c, **Column.deserialize_ctor_kwargs(kw))
217
+ for c, kw in zip(table.columns(), header["columns_kwargs"], strict=True)
218
+ )
219
+
220
+ def serialize(
221
+ self,
222
+ ) -> tuple[DataFrameHeader, tuple[memoryview, plc.gpumemoryview]]:
223
+ """
224
+ Serialize the table into header and frames.
225
+
226
+ Follows the Dask serialization scheme with a picklable header (dict) and
227
+ a tuple of frames (in this case a contiguous host and device buffer).
228
+
229
+ To enable dask support, dask serializers must be registered
230
+
231
+ >>> from cudf_polars.experimental.dask_serialize import register
232
+ >>> register()
233
+
234
+ Returns
235
+ -------
236
+ header
237
+ A dict containing any picklable metadata required to reconstruct the object.
238
+ frames
239
+ Two-tuple of frames suitable for passing to `plc.contiguous_split.unpack_from_memoryviews`
240
+ """
241
+ packed = plc.contiguous_split.pack(self.table)
242
+
243
+ # Keyword arguments for `Column.__init__`.
244
+ columns_kwargs: list[ColumnOptions] = [
245
+ col.serialize_ctor_kwargs() for col in self.columns
246
+ ]
247
+ header: DataFrameHeader = {
248
+ "columns_kwargs": columns_kwargs,
249
+ "frame_count": 2,
250
+ }
251
+ return header, packed.release()
252
+
253
+ def sorted_like(
254
+ self, like: DataFrame, /, *, subset: Set[str] | None = None
255
+ ) -> Self:
256
+ """
257
+ Return a shallow copy with sortedness copied from like.
258
+
259
+ Parameters
260
+ ----------
261
+ like
262
+ The dataframe to copy from
263
+ subset
264
+ Optional subset of columns from which to copy data.
265
+
266
+ Returns
267
+ -------
268
+ Shallow copy of self with metadata set.
269
+
270
+ Raises
271
+ ------
272
+ ValueError
273
+ If there is a name mismatch between self and like.
274
+ """
275
+ if like.column_names != self.column_names:
276
+ raise ValueError("Can only copy from identically named frame")
277
+ subset = self.column_names_set if subset is None else subset
278
+ return type(self)(
279
+ c.sorted_like(other) if c.name in subset else c
280
+ for c, other in zip(self.columns, like.columns, strict=True)
281
+ )
282
+
283
+ def with_columns(
284
+ self, columns: Iterable[Column], *, replace_only: bool = False
285
+ ) -> Self:
286
+ """
287
+ Return a new dataframe with extra columns.
288
+
289
+ Parameters
290
+ ----------
291
+ columns
292
+ Columns to add
293
+ replace_only
294
+ If true, then only replacements are allowed (matching by name).
295
+
296
+ Returns
297
+ -------
298
+ New dataframe
299
+
300
+ Notes
301
+ -----
302
+ If column names overlap, newer names replace older ones, and
303
+ appear in the same order as the original frame.
304
+ """
305
+ new = {c.name: c for c in columns}
306
+ if replace_only and not self.column_names_set.issuperset(new.keys()):
307
+ raise ValueError("Cannot replace with non-existing names")
308
+ return type(self)((self.column_map | new).values())
309
+
310
+ def discard_columns(self, names: Set[str]) -> Self:
311
+ """Drop columns by name."""
312
+ return type(self)(column for column in self.columns if column.name not in names)
313
+
314
+ def select(self, names: Sequence[str] | Mapping[str, Any]) -> Self:
315
+ """Select columns by name returning DataFrame."""
316
+ try:
317
+ return type(self)(self.column_map[name] for name in names)
318
+ except KeyError as e:
319
+ raise ValueError("Can't select missing names") from e
320
+
321
+ def rename_columns(self, mapping: Mapping[str, str]) -> Self:
322
+ """Rename some columns."""
323
+ return type(self)(c.rename(mapping.get(c.name, c.name)) for c in self.columns)
324
+
325
+ def select_columns(self, names: Set[str]) -> list[Column]:
326
+ """Select columns by name."""
327
+ return [c for c in self.columns if c.name in names]
328
+
329
+ def filter(self, mask: Column) -> Self:
330
+ """Return a filtered table given a mask."""
331
+ table = plc.stream_compaction.apply_boolean_mask(self.table, mask.obj)
332
+ return (
333
+ type(self)
334
+ .from_table(table, self.column_names, self.dtypes)
335
+ .sorted_like(self)
336
+ )
337
+
338
+ def slice(self, zlice: Slice | None) -> Self:
339
+ """
340
+ Slice a dataframe.
341
+
342
+ Parameters
343
+ ----------
344
+ zlice
345
+ optional, tuple of start and length, negative values of start
346
+ treated as for python indexing. If not provided, returns self.
347
+
348
+ Returns
349
+ -------
350
+ New dataframe (if zlice is not None) otherwise self (if it is)
351
+ """
352
+ if zlice is None:
353
+ return self
354
+ (table,) = plc.copying.slice(
355
+ self.table, conversion.from_polars_slice(zlice, num_rows=self.num_rows)
356
+ )
357
+ return (
358
+ type(self)
359
+ .from_table(table, self.column_names, self.dtypes)
360
+ .sorted_like(self)
361
+ )
@@ -0,0 +1,137 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """A datatype, preserving polars metadata."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from functools import cache
9
+
10
+ from typing_extensions import assert_never
11
+
12
+ import polars as pl
13
+
14
+ import pylibcudf as plc
15
+
16
+ __all__ = ["DataType"]
17
+
18
+
19
+ @cache
20
+ def _from_polars(dtype: pl.DataType) -> plc.DataType:
21
+ """
22
+ Convert a polars datatype to a pylibcudf one.
23
+
24
+ Parameters
25
+ ----------
26
+ dtype
27
+ Polars dtype to convert
28
+
29
+ Returns
30
+ -------
31
+ Matching pylibcudf DataType object.
32
+
33
+ Raises
34
+ ------
35
+ NotImplementedError
36
+ For unsupported conversions.
37
+ """
38
+ if isinstance(dtype, pl.Boolean):
39
+ return plc.DataType(plc.TypeId.BOOL8)
40
+ elif isinstance(dtype, pl.Int8):
41
+ return plc.DataType(plc.TypeId.INT8)
42
+ elif isinstance(dtype, pl.Int16):
43
+ return plc.DataType(plc.TypeId.INT16)
44
+ elif isinstance(dtype, pl.Int32):
45
+ return plc.DataType(plc.TypeId.INT32)
46
+ elif isinstance(dtype, pl.Int64):
47
+ return plc.DataType(plc.TypeId.INT64)
48
+ if isinstance(dtype, pl.UInt8):
49
+ return plc.DataType(plc.TypeId.UINT8)
50
+ elif isinstance(dtype, pl.UInt16):
51
+ return plc.DataType(plc.TypeId.UINT16)
52
+ elif isinstance(dtype, pl.UInt32):
53
+ return plc.DataType(plc.TypeId.UINT32)
54
+ elif isinstance(dtype, pl.UInt64):
55
+ return plc.DataType(plc.TypeId.UINT64)
56
+ elif isinstance(dtype, pl.Float32):
57
+ return plc.DataType(plc.TypeId.FLOAT32)
58
+ elif isinstance(dtype, pl.Float64):
59
+ return plc.DataType(plc.TypeId.FLOAT64)
60
+ elif isinstance(dtype, pl.Date):
61
+ return plc.DataType(plc.TypeId.TIMESTAMP_DAYS)
62
+ elif isinstance(dtype, pl.Time):
63
+ raise NotImplementedError("Time of day dtype not implemented")
64
+ elif isinstance(dtype, pl.Datetime):
65
+ if dtype.time_unit == "ms":
66
+ return plc.DataType(plc.TypeId.TIMESTAMP_MILLISECONDS)
67
+ elif dtype.time_unit == "us":
68
+ return plc.DataType(plc.TypeId.TIMESTAMP_MICROSECONDS)
69
+ elif dtype.time_unit == "ns":
70
+ return plc.DataType(plc.TypeId.TIMESTAMP_NANOSECONDS)
71
+ assert dtype.time_unit is not None # pragma: no cover
72
+ assert_never(dtype.time_unit)
73
+ elif isinstance(dtype, pl.Duration):
74
+ if dtype.time_unit == "ms":
75
+ return plc.DataType(plc.TypeId.DURATION_MILLISECONDS)
76
+ elif dtype.time_unit == "us":
77
+ return plc.DataType(plc.TypeId.DURATION_MICROSECONDS)
78
+ elif dtype.time_unit == "ns":
79
+ return plc.DataType(plc.TypeId.DURATION_NANOSECONDS)
80
+ assert dtype.time_unit is not None # pragma: no cover
81
+ assert_never(dtype.time_unit)
82
+ elif isinstance(dtype, pl.String):
83
+ return plc.DataType(plc.TypeId.STRING)
84
+ elif isinstance(dtype, pl.Decimal):
85
+ return plc.DataType(plc.TypeId.DECIMAL128, scale=-dtype.scale)
86
+ elif isinstance(dtype, pl.Null):
87
+ # TODO: Hopefully
88
+ return plc.DataType(plc.TypeId.EMPTY)
89
+ elif isinstance(dtype, pl.List):
90
+ # Recurse to catch unsupported inner types
91
+ _ = _from_polars(dtype.inner)
92
+ return plc.DataType(plc.TypeId.LIST)
93
+ elif isinstance(dtype, pl.Struct):
94
+ # Recurse to catch unsupported field types
95
+ for field in dtype.fields:
96
+ _ = _from_polars(field.dtype)
97
+ return plc.DataType(plc.TypeId.STRUCT)
98
+ else:
99
+ raise NotImplementedError(f"{dtype=} conversion not supported")
100
+
101
+
102
+ class DataType:
103
+ """A datatype, preserving polars metadata."""
104
+
105
+ polars: pl.datatypes.DataType
106
+ plc: plc.DataType
107
+
108
+ def __init__(self, polars_dtype: pl.DataType) -> None:
109
+ self.polars = polars_dtype
110
+ self.plc = _from_polars(polars_dtype)
111
+
112
+ def id(self) -> plc.TypeId:
113
+ """The pylibcudf.TypeId of this DataType."""
114
+ return self.plc.id()
115
+
116
+ @property
117
+ def children(self) -> list[DataType]:
118
+ """The children types of this DataType."""
119
+ if self.plc.id() == plc.TypeId.STRUCT:
120
+ return [DataType(field.dtype) for field in self.polars.fields]
121
+ elif self.plc.id() == plc.TypeId.LIST:
122
+ return [DataType(self.polars.inner)]
123
+ return []
124
+
125
+ def __eq__(self, other: object) -> bool:
126
+ """Equality of DataTypes."""
127
+ if not isinstance(other, DataType):
128
+ return False
129
+ return self.polars == other.polars
130
+
131
+ def __hash__(self) -> int:
132
+ """Hash of the DataType."""
133
+ return hash(self.polars)
134
+
135
+ def __repr__(self) -> str:
136
+ """Representation of the DataType."""
137
+ return f"<DataType(polars={self.polars}, plc={self.id()!r})>"
@@ -0,0 +1,8 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """The domain-specific language (DSL) for the polars executor."""
5
+
6
+ from __future__ import annotations
7
+
8
+ __all__: list[str] = []
@@ -0,0 +1,66 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ # TODO: remove need for this
4
+ # ruff: noqa: D101
5
+ """
6
+ DSL nodes for the polars expression language.
7
+
8
+ An expression node is a function, `DataFrame -> Column`.
9
+
10
+ The evaluation context is provided by a LogicalPlan node, and can
11
+ affect the evaluation rule as well as providing the dataframe input.
12
+ In particular, the interpretation of the expression language in a
13
+ `GroupBy` node is groupwise, rather than whole frame.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from cudf_polars.dsl.expressions.aggregation import Agg
19
+ from cudf_polars.dsl.expressions.base import (
20
+ AggInfo,
21
+ Col,
22
+ ColRef,
23
+ ErrorExpr,
24
+ Expr,
25
+ NamedExpr,
26
+ )
27
+ from cudf_polars.dsl.expressions.binaryop import BinOp
28
+ from cudf_polars.dsl.expressions.boolean import BooleanFunction
29
+ from cudf_polars.dsl.expressions.datetime import TemporalFunction
30
+ from cudf_polars.dsl.expressions.literal import Literal, LiteralColumn
31
+ from cudf_polars.dsl.expressions.rolling import GroupedRollingWindow, RollingWindow
32
+ from cudf_polars.dsl.expressions.selection import Filter, Gather
33
+ from cudf_polars.dsl.expressions.slicing import Slice
34
+ from cudf_polars.dsl.expressions.sorting import Sort, SortBy
35
+ from cudf_polars.dsl.expressions.string import StringFunction
36
+ from cudf_polars.dsl.expressions.struct import StructFunction
37
+ from cudf_polars.dsl.expressions.ternary import Ternary
38
+ from cudf_polars.dsl.expressions.unary import Cast, Len, UnaryFunction
39
+
40
+ __all__ = [
41
+ "Agg",
42
+ "AggInfo",
43
+ "BinOp",
44
+ "BooleanFunction",
45
+ "Cast",
46
+ "Col",
47
+ "ColRef",
48
+ "ErrorExpr",
49
+ "Expr",
50
+ "Filter",
51
+ "Gather",
52
+ "GroupedRollingWindow",
53
+ "Len",
54
+ "Literal",
55
+ "LiteralColumn",
56
+ "NamedExpr",
57
+ "RollingWindow",
58
+ "Slice",
59
+ "Sort",
60
+ "SortBy",
61
+ "StringFunction",
62
+ "StructFunction",
63
+ "TemporalFunction",
64
+ "Ternary",
65
+ "UnaryFunction",
66
+ ]
@@ -0,0 +1,8 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Implementations of various expressions."""
5
+
6
+ from __future__ import annotations
7
+
8
+ __all__: list[str] = []