PyPI - cudf-polars-cu12 - Versions diffs - 25.8.0__py3-none-any.whl → 25.10.0__py3-none-any.whl - Mend

cudf-polars-cu12 25.8.0py3-none-any.whl → 25.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

cudf_polars/GIT_COMMIT +1 -0
cudf_polars/VERSION +1 -1
cudf_polars/callback.py +1 -10
cudf_polars/containers/column.py +29 -0
cudf_polars/containers/dataframe.py +15 -6
cudf_polars/containers/datatype.py +2 -0
cudf_polars/dsl/expressions/base.py +4 -0
cudf_polars/dsl/expressions/boolean.py +41 -20
cudf_polars/dsl/expressions/datetime.py +1 -0
cudf_polars/dsl/expressions/rolling.py +487 -7
cudf_polars/dsl/expressions/string.py +275 -21
cudf_polars/dsl/expressions/struct.py +3 -4
cudf_polars/dsl/expressions/unary.py +148 -32
cudf_polars/dsl/ir.py +244 -61
cudf_polars/dsl/translate.py +68 -21
cudf_polars/dsl/utils/aggregations.py +201 -21
cudf_polars/dsl/utils/groupby.py +6 -1
cudf_polars/dsl/utils/rolling.py +7 -1
cudf_polars/dsl/utils/windows.py +7 -1
cudf_polars/experimental/base.py +258 -25
cudf_polars/experimental/benchmarks/pdsds.py +8 -4
cudf_polars/experimental/benchmarks/pdsds_queries/q10.py +225 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q2.py +244 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q3.py +65 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q4.py +359 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q5.py +462 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q6.py +92 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q7.py +79 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q8.py +524 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q9.py +137 -0
cudf_polars/experimental/benchmarks/pdsh.py +2 -0
cudf_polars/experimental/benchmarks/utils.py +140 -33
cudf_polars/experimental/dispatch.py +58 -1
cudf_polars/experimental/distinct.py +3 -0
cudf_polars/experimental/explain.py +33 -3
cudf_polars/experimental/expressions.py +45 -2
cudf_polars/experimental/groupby.py +3 -0
cudf_polars/experimental/io.py +80 -39
cudf_polars/experimental/join.py +5 -3
cudf_polars/experimental/parallel.py +39 -6
cudf_polars/experimental/select.py +32 -5
cudf_polars/experimental/shuffle.py +82 -30
cudf_polars/experimental/sort.py +575 -11
cudf_polars/experimental/statistics.py +795 -0
cudf_polars/experimental/utils.py +65 -8
cudf_polars/testing/asserts.py +23 -12
cudf_polars/testing/io.py +52 -2
cudf_polars/testing/plugin.py +14 -23
cudf_polars/utils/config.py +154 -18
cudf_polars/utils/dtypes.py +17 -4
cudf_polars/utils/versions.py +3 -1
{cudf_polars_cu12-25.8.0.dist-info → cudf_polars_cu12-25.10.0.dist-info}/METADATA +18 -7
cudf_polars_cu12-25.10.0.dist-info/RECORD +92 -0
cudf_polars_cu12-25.8.0.dist-info/RECORD +0 -81
{cudf_polars_cu12-25.8.0.dist-info → cudf_polars_cu12-25.10.0.dist-info}/WHEEL +0 -0
{cudf_polars_cu12-25.8.0.dist-info → cudf_polars_cu12-25.10.0.dist-info}/licenses/LICENSE +0 -0
{cudf_polars_cu12-25.8.0.dist-info → cudf_polars_cu12-25.10.0.dist-info}/top_level.txt +0 -0

cudf_polars/GIT_COMMIT ADDED Viewed

	@@ -0,0 +1 @@
1	+ f4e35ca02118eada383e7417273c6cb1857ec66e

cudf_polars/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 25.08.00
1	+ 25.10.00

cudf_polars/callback.py CHANGED Viewed

@@ -40,14 +40,6 @@ if TYPE_CHECKING:
 __all__: list[str] = ["execute_with_cudf"]
-_SUPPORTED_PREFETCHES = {
-    "column_view::get_data",
-    "mutable_column_view::get_data",
-    "gather",
-    "hash_join",
-}
 @cache
 def default_memory_resource(
     device: int,
@@ -80,8 +72,7 @@ def default_memory_resource(
             # Leaving a 20% headroom to avoid OOM errors.
             free_memory, _ = rmm.mr.available_device_memory()
             free_memory = int(round(float(free_memory) * 0.80 / 256) * 256)
-            for key in _SUPPORTED_PREFETCHES:
-                pylibcudf.experimental.enable_prefetching(key)
+            pylibcudf.prefetch.enable()
             mr = rmm.mr.PrefetchResourceAdaptor(
                 rmm.mr.PoolMemoryResource(
                     rmm.mr.ManagedMemoryResource(),

cudf_polars/containers/column.py CHANGED Viewed

@@ -293,6 +293,35 @@ class Column:
             or self.obj.type().id() == plc.TypeId.STRING
         ):
             return Column(self._handle_string_cast(plc_dtype), dtype=dtype)
+        elif plc.traits.is_integral_not_bool(
+            self.obj.type()
+        ) and plc.traits.is_timestamp(plc_dtype):
+            upcasted = plc.unary.cast(self.obj, plc.DataType(plc.TypeId.INT64))
+            result = plc.column.Column(
+                plc_dtype,
+                upcasted.size(),
+                upcasted.data(),
+                upcasted.null_mask(),
+                upcasted.null_count(),
+                upcasted.offset(),
+                upcasted.children(),
+            )
+            return Column(result, dtype=dtype).sorted_like(self)
+        elif plc.traits.is_integral_not_bool(plc_dtype) and plc.traits.is_timestamp(
+            self.obj.type()
+        ):
+            result = plc.column.Column(
+                plc.DataType(plc.TypeId.INT64),
+                self.obj.size(),
+                self.obj.data(),
+                self.obj.null_mask(),
+                self.obj.null_count(),
+                self.obj.offset(),
+                self.obj.children(),
+            )
+            return Column(plc.unary.cast(result, plc_dtype), dtype=dtype).sorted_like(
+                self
+            )
         else:
             result = Column(plc.unary.cast(self.obj, plc_dtype), dtype=dtype)
             if is_order_preserving_cast(self.obj.type(), plc_dtype):

cudf_polars/containers/dataframe.py CHANGED Viewed

@@ -29,24 +29,33 @@ __all__: list[str] = ["DataFrame"]
 def _create_polars_column_metadata(
     name: str, dtype: PolarsDataType
 ) -> plc.interop.ColumnMetadata:
-    """Create ColumnMetadata preserving pl.Struct field names."""
+    """Create ColumnMetadata preserving dtype attributes not supported by libcudf."""
+    children_meta = []
+    timezone = ""
+    precision: int | None = None
     if isinstance(dtype, pl.Struct):
         children_meta = [
             _create_polars_column_metadata(field.name, field.dtype)
             for field in dtype.fields
         ]
-    else:
-        children_meta = []
-    timezone = dtype.time_zone if isinstance(dtype, pl.Datetime) else None
+    elif isinstance(dtype, pl.Datetime):
+        timezone = dtype.time_zone or timezone
+    elif isinstance(dtype, pl.Decimal):
+        precision = dtype.precision
     return plc.interop.ColumnMetadata(
-        name=name, timezone=timezone or "", children_meta=children_meta
+        name=name,
+        timezone=timezone,
+        precision=precision,
+        children_meta=children_meta,
     )
 # This is also defined in pylibcudf.interop
 class _ObjectWithArrowMetadata:
     def __init__(
-        self, obj: plc.Table, metadata: list[plc.interop.ColumnMetadata]
+        self, obj: plc.Table | plc.Column, metadata: list[plc.interop.ColumnMetadata]
     ) -> None:
         self.obj = obj
         self.metadata = metadata

cudf_polars/containers/datatype.py CHANGED Viewed

@@ -81,6 +81,8 @@ def _from_polars(dtype: pl.DataType) -> plc.DataType:
         assert_never(dtype.time_unit)
     elif isinstance(dtype, pl.String):
         return plc.DataType(plc.TypeId.STRING)
+    elif isinstance(dtype, pl.Decimal):
+        return plc.DataType(plc.TypeId.DECIMAL128, scale=-dtype.scale)
     elif isinstance(dtype, pl.Null):
         # TODO: Hopefully
         return plc.DataType(plc.TypeId.EMPTY)

cudf_polars/dsl/expressions/base.py CHANGED Viewed

@@ -31,6 +31,10 @@ class ExecutionContext(IntEnum):
     FRAME = enum.auto()
     GROUPBY = enum.auto()
     ROLLING = enum.auto()
+    # Follows GROUPBY semantics but useful
+    # to differentiate from GROUPBY so we can
+    # implement agg/per-row ops independently
+    WINDOW = enum.auto()
 class Expr(Node["Expr"]):

cudf_polars/dsl/expressions/boolean.py CHANGED Viewed

@@ -38,6 +38,7 @@ class BooleanFunction(Expr):
         Any = auto()
         AnyHorizontal = auto()
         IsBetween = auto()
+        IsClose = auto()
         IsDuplicated = auto()
         IsFinite = auto()
         IsFirstDistinct = auto()
@@ -85,6 +86,12 @@ class BooleanFunction(Expr):
             BooleanFunction.Name.IsLastDistinct,
             BooleanFunction.Name.IsUnique,
         )
+        if self.name in {
+            BooleanFunction.Name.IsClose,
+        }:
+            raise NotImplementedError(
+                f"Boolean function {self.name}"
+            )  # pragma: no cover
     @staticmethod
     def _distinct(
@@ -146,13 +153,18 @@ class BooleanFunction(Expr):
         ):
             # Avoid evaluating the child if the dtype tells us it's unnecessary.
             (child,) = self.children
+            needles = child.evaluate(df, context=context)
+            is_float = needles.obj.type().id() in (
+                plc.TypeId.FLOAT32,
+                plc.TypeId.FLOAT64,
+            )
             is_finite = self.name is BooleanFunction.Name.IsFinite
-            if child.dtype.id() not in (plc.TypeId.FLOAT32, plc.TypeId.FLOAT64):
-                value = plc.Scalar.from_py(is_finite)
-                return Column(
-                    plc.Column.from_scalar(value, df.num_rows), dtype=self.dtype
+            if not is_float:
+                base = plc.Column.from_scalar(
+                    plc.Scalar.from_py(py_val=is_finite), needles.size
                 )
-            needles = child.evaluate(df, context=context)
+                out = base.with_mask(needles.obj.null_mask(), needles.null_count)
+                return Column(out, dtype=self.dtype)
             to_search = [-float("inf"), float("inf")]
             if is_finite:
                 # NaN is neither finite not infinite
@@ -164,7 +176,10 @@ class BooleanFunction(Expr):
             result = plc.search.contains(haystack, needles.obj)
             if is_finite:
                 result = plc.unary.unary_operation(result, plc.unary.UnaryOperator.NOT)
-            return Column(result, dtype=self.dtype)
+            return Column(
+                result.with_mask(needles.obj.null_mask(), needles.null_count),
+                dtype=self.dtype,
+            )
         columns = [child.evaluate(df, context=context) for child in self.children]
         # Kleene logic for Any (OR) and All (AND) if ignore_nulls is
         # False
@@ -199,22 +214,28 @@ class BooleanFunction(Expr):
         elif self.name is BooleanFunction.Name.IsNotNull:
             (column,) = columns
             return Column(plc.unary.is_valid(column.obj), dtype=self.dtype)
-        elif self.name is BooleanFunction.Name.IsNan:
+        elif self.name in (BooleanFunction.Name.IsNan, BooleanFunction.Name.IsNotNan):
             (column,) = columns
-            return Column(
-                plc.unary.is_nan(column.obj).with_mask(
-                    column.obj.null_mask(), column.null_count
-                ),
-                dtype=self.dtype,
-            )
-        elif self.name is BooleanFunction.Name.IsNotNan:
-            (column,) = columns
-            return Column(
-                plc.unary.is_not_nan(column.obj).with_mask(
-                    column.obj.null_mask(), column.null_count
-                ),
-                dtype=self.dtype,
+            is_float = column.obj.type().id() in (
+                plc.TypeId.FLOAT32,
+                plc.TypeId.FLOAT64,
             )
+            if is_float:
+                op = (
+                    plc.unary.is_nan
+                    if self.name is BooleanFunction.Name.IsNan
+                    else plc.unary.is_not_nan
+                )
+                base = op(column.obj)
+            else:
+                base = plc.Column.from_scalar(
+                    plc.Scalar.from_py(
+                        py_val=self.name is not BooleanFunction.Name.IsNan
+                    ),
+                    column.size,
+                )
+            out = base.with_mask(column.obj.null_mask(), column.null_count)
+            return Column(out, dtype=self.dtype)
         elif self.name is BooleanFunction.Name.IsFirstDistinct:
             (column,) = columns
             return self._distinct(

cudf_polars/dsl/expressions/datetime.py CHANGED Viewed

@@ -38,6 +38,7 @@ class TemporalFunction(Expr):
         Datetime = auto()
         DatetimeFunction = auto()
         Day = auto()
+        DaysInMonth = auto()
         Duration = auto()
         Hour = auto()
         IsLeapYear = auto()

cudf-polars-cu12 25.8.0__py3-none-any.whl → 25.10.0__py3-none-any.whl

cudf-polars-cu12 25.8.0py3-none-any.whl → 25.10.0py3-none-any.whl