PyPI - cudf-polars-cu12 - Versions diffs - 24.12.0__py3-none-any.whl → 25.2.0__py3-none-any.whl - Mend

cudf-polars-cu12 24.12.0py3-none-any.whl → 25.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

cudf_polars/VERSION +1 -1
cudf_polars/__init__.py +1 -1
cudf_polars/callback.py +28 -3
cudf_polars/containers/__init__.py +1 -1
cudf_polars/dsl/expr.py +16 -16
cudf_polars/dsl/expressions/aggregation.py +21 -4
cudf_polars/dsl/expressions/base.py +7 -2
cudf_polars/dsl/expressions/binaryop.py +1 -0
cudf_polars/dsl/expressions/boolean.py +65 -22
cudf_polars/dsl/expressions/datetime.py +82 -20
cudf_polars/dsl/expressions/literal.py +2 -0
cudf_polars/dsl/expressions/rolling.py +3 -1
cudf_polars/dsl/expressions/selection.py +3 -1
cudf_polars/dsl/expressions/sorting.py +2 -0
cudf_polars/dsl/expressions/string.py +118 -39
cudf_polars/dsl/expressions/ternary.py +1 -0
cudf_polars/dsl/expressions/unary.py +11 -1
cudf_polars/dsl/ir.py +173 -122
cudf_polars/dsl/to_ast.py +4 -6
cudf_polars/dsl/translate.py +53 -21
cudf_polars/dsl/traversal.py +10 -10
cudf_polars/experimental/base.py +43 -0
cudf_polars/experimental/dispatch.py +84 -0
cudf_polars/experimental/io.py +325 -0
cudf_polars/experimental/parallel.py +253 -0
cudf_polars/experimental/select.py +36 -0
cudf_polars/testing/asserts.py +14 -5
cudf_polars/testing/plugin.py +60 -4
cudf_polars/typing/__init__.py +5 -5
cudf_polars/utils/dtypes.py +9 -7
cudf_polars/utils/versions.py +4 -7
{cudf_polars_cu12-24.12.0.dist-info → cudf_polars_cu12-25.2.0.dist-info}/METADATA +6 -6
cudf_polars_cu12-25.2.0.dist-info/RECORD +48 -0
{cudf_polars_cu12-24.12.0.dist-info → cudf_polars_cu12-25.2.0.dist-info}/WHEEL +1 -1
cudf_polars_cu12-24.12.0.dist-info/RECORD +0 -43
{cudf_polars_cu12-24.12.0.dist-info → cudf_polars_cu12-25.2.0.dist-info}/LICENSE +0 -0
{cudf_polars_cu12-24.12.0.dist-info → cudf_polars_cu12-25.2.0.dist-info}/top_level.txt +0 -0

cudf_polars/experimental/parallel.py ADDED Viewed

@@ -0,0 +1,253 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+"""Multi-partition Dask execution."""
+from __future__ import annotations
+import itertools
+import operator
+from functools import reduce
+from typing import TYPE_CHECKING, Any
+import cudf_polars.experimental.io
+import cudf_polars.experimental.select  # noqa: F401
+from cudf_polars.dsl.ir import IR, Cache, Filter, HStack, Projection, Select, Union
+from cudf_polars.dsl.traversal import CachingVisitor, traversal
+from cudf_polars.experimental.base import PartitionInfo, _concat, get_key_name
+from cudf_polars.experimental.dispatch import (
+    generate_ir_tasks,
+    lower_ir_node,
+)
+if TYPE_CHECKING:
+    from collections.abc import MutableMapping
+    from cudf_polars.containers import DataFrame
+    from cudf_polars.experimental.dispatch import LowerIRTransformer
+@lower_ir_node.register(IR)
+def _(ir: IR, rec: LowerIRTransformer) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
+    # Default logic - Requires single partition
+    if len(ir.children) == 0:
+        # Default leaf node has single partition
+        return ir, {
+            ir: PartitionInfo(count=1)
+        }  # pragma: no cover; Missed by pylibcudf executor
+    # Lower children
+    children, _partition_info = zip(*(rec(c) for c in ir.children), strict=True)
+    partition_info = reduce(operator.or_, _partition_info)
+    # Check that child partitioning is supported
+    if any(partition_info[c].count > 1 for c in children):
+        raise NotImplementedError(
+            f"Class {type(ir)} does not support multiple partitions."
+        )  # pragma: no cover
+    # Return reconstructed node and partition-info dict
+    partition = PartitionInfo(count=1)
+    new_node = ir.reconstruct(children)
+    partition_info[new_node] = partition
+    return new_node, partition_info
+def lower_ir_graph(ir: IR) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
+    """
+    Rewrite an IR graph and extract partitioning information.
+    Parameters
+    ----------
+    ir
+        Root of the graph to rewrite.
+    Returns
+    -------
+    new_ir, partition_info
+        The rewritten graph, and a mapping from unique nodes
+        in the new graph to associated partitioning information.
+    Notes
+    -----
+    This function traverses the unique nodes of the graph with
+    root `ir`, and applies :func:`lower_ir_node` to each node.
+    See Also
+    --------
+    lower_ir_node
+    """
+    mapper = CachingVisitor(lower_ir_node)
+    return mapper(ir)
+def task_graph(
+    ir: IR, partition_info: MutableMapping[IR, PartitionInfo]
+) -> tuple[MutableMapping[Any, Any], str | tuple[str, int]]:
+    """
+    Construct a task graph for evaluation of an IR graph.
+    Parameters
+    ----------
+    ir
+        Root of the graph to rewrite.
+    partition_info
+        A mapping from all unique IR nodes to the
+        associated partitioning information.
+    Returns
+    -------
+    graph
+        A Dask-compatible task graph for the entire
+        IR graph with root `ir`.
+    Notes
+    -----
+    This function traverses the unique nodes of the
+    graph with root `ir`, and extracts the tasks for
+    each node with :func:`generate_ir_tasks`.
+    See Also
+    --------
+    generate_ir_tasks
+    """
+    graph = reduce(
+        operator.or_,
+        (generate_ir_tasks(node, partition_info) for node in traversal([ir])),
+    )
+    key_name = get_key_name(ir)
+    partition_count = partition_info[ir].count
+    if partition_count > 1:
+        graph[key_name] = (_concat, list(partition_info[ir].keys(ir)))
+        return graph, key_name
+    else:
+        return graph, (key_name, 0)
+def evaluate_dask(ir: IR) -> DataFrame:
+    """Evaluate an IR graph with Dask."""
+    from dask import get
+    ir, partition_info = lower_ir_graph(ir)
+    graph, key = task_graph(ir, partition_info)
+    return get(graph, key)
+@generate_ir_tasks.register(IR)
+def _(
+    ir: IR, partition_info: MutableMapping[IR, PartitionInfo]
+) -> MutableMapping[Any, Any]:
+    # Single-partition default behavior.
+    # This is used by `generate_ir_tasks` for all unregistered IR sub-types.
+    if partition_info[ir].count > 1:
+        raise NotImplementedError(
+            f"Failed to generate multiple output tasks for {ir}."
+        )  # pragma: no cover
+    child_names = []
+    for child in ir.children:
+        child_names.append(get_key_name(child))
+        if partition_info[child].count > 1:
+            raise NotImplementedError(
+                f"Failed to generate tasks for {ir} with child {child}."
+            )  # pragma: no cover
+    key_name = get_key_name(ir)
+    return {
+        (key_name, 0): (
+            ir.do_evaluate,
+            *ir._non_child_args,
+            *((child_name, 0) for child_name in child_names),
+        )
+    }
+@lower_ir_node.register(Union)
+def _(
+    ir: Union, rec: LowerIRTransformer
+) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
+    # Lower children
+    children, _partition_info = zip(*(rec(c) for c in ir.children), strict=True)
+    partition_info = reduce(operator.or_, _partition_info)
+    # Check zlice
+    if ir.zlice is not None:  # pragma: no cover
+        if any(p[c].count > 1 for p, c in zip(children, _partition_info, strict=False)):
+            raise NotImplementedError("zlice is not supported for multiple partitions.")
+        new_node = ir.reconstruct(children)
+        partition_info[new_node] = PartitionInfo(count=1)
+        return new_node, partition_info
+    # Partition count is the sum of all child partitions
+    count = sum(partition_info[c].count for c in children)
+    # Return reconstructed node and partition-info dict
+    new_node = ir.reconstruct(children)
+    partition_info[new_node] = PartitionInfo(count=count)
+    return new_node, partition_info
+@generate_ir_tasks.register(Union)
+def _(
+    ir: Union, partition_info: MutableMapping[IR, PartitionInfo]
+) -> MutableMapping[Any, Any]:
+    key_name = get_key_name(ir)
+    partition = itertools.count()
+    return {
+        (key_name, next(partition)): child_key
+        for child in ir.children
+        for child_key in partition_info[child].keys(child)
+    }
+def _lower_ir_pwise(
+    ir: IR, rec: LowerIRTransformer
+) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
+    # Lower a partition-wise (i.e. embarrassingly-parallel) IR node
+    # Lower children
+    children, _partition_info = zip(*(rec(c) for c in ir.children), strict=True)
+    partition_info = reduce(operator.or_, _partition_info)
+    counts = {partition_info[c].count for c in children}
+    # Check that child partitioning is supported
+    if len(counts) > 1:
+        raise NotImplementedError(
+            f"Class {type(ir)} does not support unbalanced partitions."
+        )  # pragma: no cover
+    # Return reconstructed node and partition-info dict
+    partition = PartitionInfo(count=max(counts))
+    new_node = ir.reconstruct(children)
+    partition_info[new_node] = partition
+    return new_node, partition_info
+lower_ir_node.register(Projection, _lower_ir_pwise)
+lower_ir_node.register(Cache, _lower_ir_pwise)
+lower_ir_node.register(Filter, _lower_ir_pwise)
+lower_ir_node.register(HStack, _lower_ir_pwise)
+def _generate_ir_tasks_pwise(
+    ir: IR, partition_info: MutableMapping[IR, PartitionInfo]
+) -> MutableMapping[Any, Any]:
+    # Generate partition-wise (i.e. embarrassingly-parallel) tasks
+    child_names = [get_key_name(c) for c in ir.children]
+    return {
+        key: (
+            ir.do_evaluate,
+            *ir._non_child_args,
+            *[(child_name, i) for child_name in child_names],
+        )
+        for i, key in enumerate(partition_info[ir].keys(ir))
+    }
+generate_ir_tasks.register(Projection, _generate_ir_tasks_pwise)
+generate_ir_tasks.register(Cache, _generate_ir_tasks_pwise)
+generate_ir_tasks.register(Filter, _generate_ir_tasks_pwise)
+generate_ir_tasks.register(HStack, _generate_ir_tasks_pwise)
+generate_ir_tasks.register(Select, _generate_ir_tasks_pwise)

cudf_polars/experimental/select.py ADDED Viewed

@@ -0,0 +1,36 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+"""Parallel Select Logic."""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from cudf_polars.dsl.ir import Select
+from cudf_polars.dsl.traversal import traversal
+from cudf_polars.experimental.dispatch import lower_ir_node
+if TYPE_CHECKING:
+    from collections.abc import MutableMapping
+    from cudf_polars.dsl.ir import IR
+    from cudf_polars.experimental.base import PartitionInfo
+    from cudf_polars.experimental.parallel import LowerIRTransformer
+@lower_ir_node.register(Select)
+def _(
+    ir: Select, rec: LowerIRTransformer
+) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
+    child, partition_info = rec(ir.children[0])
+    pi = partition_info[child]
+    if pi.count > 1 and not all(
+        expr.is_pointwise for expr in traversal([e.value for e in ir.exprs])
+    ):
+        # TODO: Handle non-pointwise expressions.
+        raise NotImplementedError(
+            f"Selection {ir} does not support multiple partitions."
+        )
+    new_node = ir.reconstruct([child])
+    partition_info[new_node] = pi
+    return new_node, partition_info

cudf_polars/testing/asserts.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 """Device-aware assertions."""
@@ -20,6 +20,11 @@ if TYPE_CHECKING:
 __all__: list[str] = ["assert_gpu_result_equal", "assert_ir_translation_raises"]
+# Will be overriden by `conftest.py` with the value from the `--executor`
+# command-line argument
+Executor = None
 def assert_gpu_result_equal(
     lazydf: pl.LazyFrame,
     *,
@@ -34,6 +39,7 @@ def assert_gpu_result_equal(
     rtol: float = 1e-05,
     atol: float = 1e-08,
     categorical_as_str: bool = False,
+    executor: str | None = None,
 ) -> None:
     """
     Assert that collection of a lazyframe on GPU produces correct results.
@@ -71,6 +77,9 @@ def assert_gpu_result_equal(
         Absolute tolerance for float comparisons
     categorical_as_str
         Decat categoricals to strings before comparing
+    executor
+        The executor configuration to pass to `GPUEngine`. If not specified
+        uses the module level `Executor` attribute.
     Raises
     ------
@@ -80,7 +89,7 @@ def assert_gpu_result_equal(
         If GPU collection failed in some way.
     """
     if engine is None:
-        engine = GPUEngine(raise_on_fail=True)
+        engine = GPUEngine(raise_on_fail=True, executor=executor or Executor)
     final_polars_collect_kwargs, final_cudf_collect_kwargs = _process_kwargs(
         collect_kwargs, polars_collect_kwargs, cudf_collect_kwargs
@@ -126,9 +135,9 @@ def assert_ir_translation_raises(q: pl.LazyFrame, *exceptions: type[Exception])
     translator.translate_ir()
     if errors := translator.errors:
         for err in errors:
-            assert any(
-                isinstance(err, err_type) for err_type in exceptions
-            ), f"Translation DID NOT RAISE {exceptions}"
+            assert any(isinstance(err, err_type) for err_type in exceptions), (
+                f"Translation DID NOT RAISE {exceptions}"
+            )
         return
     else:
         raise AssertionError(f"Translation DID NOT RAISE {exceptions}")

cudf_polars/testing/plugin.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 """Plugin for running polars test suite setting GPU engine as default."""
@@ -44,7 +44,7 @@ def pytest_configure(config: pytest.Config) -> None:
     )
-EXPECTED_FAILURES: Mapping[str, str] = {
+EXPECTED_FAILURES: Mapping[str, str | tuple[str, bool]] = {
     "tests/unit/io/test_csv.py::test_compressed_csv": "Need to determine if file is compressed",
     "tests/unit/io/test_csv.py::test_read_csv_only_loads_selected_columns": "Memory usage won't be correct due to GPU",
     "tests/unit/io/test_delta.py::test_scan_delta_version": "Need to expose hive partitioning",
@@ -122,7 +122,15 @@ EXPECTED_FAILURES: Mapping[str, str] = {
     "tests/unit/io/test_scan.py::test_scan_with_row_index_filter_and_limit[single-parquet-async]": "Debug output on stderr doesn't match",
     "tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_parquet-write_parquet]": "Need to add include_file_path to IR",
     "tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_csv-write_csv]": "Need to add include_file_path to IR",
+    "tests/unit/io/test_scan.py::test_scan_include_file_paths[False-scan_parquet-write_parquet]": "Debug output on stderr doesn't match",
+    "tests/unit/io/test_scan.py::test_scan_include_file_paths[False-scan_csv-write_csv]": "Debug output on stderr doesn't match",
+    "tests/unit/io/test_scan.py::test_scan_include_file_paths[False-scan_ndjson-write_ndjson]": "Debug output on stderr doesn't match",
     "tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_ndjson-write_ndjson]": "Need to add include_file_path to IR",
+    "tests/unit/io/test_write.py::test_write_async[read_parquet-write_parquet]": "Need to add include_file_path to IR",
+    "tests/unit/io/test_write.py::test_write_async[<lambda>-write_csv]": "Need to add include_file_path to IR",
+    "tests/unit/io/test_write.py::test_write_async[read_parquet-<lambda>]": "Need to add include_file_path to IR",
+    "tests/unit/io/test_write.py::test_write_async[<lambda>-<lambda>0]": "Need to add include_file_path to IR",
+    "tests/unit/io/test_write.py::test_write_async[<lambda>-<lambda>2]": "Need to add include_file_path to IR",
     "tests/unit/lazyframe/test_engine_selection.py::test_engine_import_error_raises[gpu]": "Expect this to pass because cudf-polars is installed",
     "tests/unit/lazyframe/test_engine_selection.py::test_engine_import_error_raises[engine1]": "Expect this to pass because cudf-polars is installed",
     "tests/unit/lazyframe/test_lazyframe.py::test_round[dtype1-123.55-1-123.6]": "Rounding midpoints is handled incorrectly",
@@ -140,6 +148,22 @@ EXPECTED_FAILURES: Mapping[str, str] = {
     "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func1-none]": "cudf-polars doesn't nullify division by zero",
     "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func2-none]": "cudf-polars doesn't nullify division by zero",
     "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func3-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
     "tests/unit/operations/test_abs.py::test_abs_duration": "Need to raise for unsupported uops on timelike values",
     "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input7-expected7-Float32-Float32]": "Mismatching dtypes, needs cudf#15852",
     "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input10-expected10-Date-output_dtype10]": "Unsupported groupby-agg for a particular dtype",
@@ -155,6 +179,7 @@ EXPECTED_FAILURES: Mapping[str, str] = {
     "tests/unit/operations/test_group_by.py::test_group_by_median_by_dtype[input15-expected15-input_dtype15-output_dtype15]": "Unsupported groupby-agg for a particular dtype",
     "tests/unit/operations/test_group_by.py::test_group_by_median_by_dtype[input16-expected16-input_dtype16-output_dtype16]": "Unsupported groupby-agg for a particular dtype",
     "tests/unit/operations/test_group_by.py::test_group_by_binary_agg_with_literal": "Incorrect broadcasting of literals in groupby-agg",
+    "tests/unit/operations/test_group_by.py::test_group_by_lit_series": "Incorrect broadcasting of literals in groupby-agg",
     "tests/unit/operations/test_group_by.py::test_aggregated_scalar_elementwise_15602": "Unsupported boolean function/dtype combination in groupby-agg",
     "tests/unit/operations/test_group_by.py::test_schemas[data1-expr1-expected_select1-expected_gb1]": "Mismatching dtypes, needs cudf#15852",
     "tests/unit/operations/test_join.py::test_cross_join_slice_pushdown": "Need to implement slice pushdown for cross joins",
@@ -174,6 +199,24 @@ EXPECTED_FAILURES: Mapping[str, str] = {
 }
+TESTS_TO_SKIP: Mapping[str, str] = {
+    # On Ubuntu 20.04, the tzdata package contains a bunch of symlinks
+    # for obsolete timezone names. However, the chrono_tz package that
+    # polars uses doesn't read /usr/share/zoneinfo, instead packaging
+    # the current zoneinfo database from IANA. Consequently, when this
+    # hypothesis-generated test runs and generates timezones from the
+    # available zoneinfo-reported timezones, we can get an error from
+    # polars that the requested timezone is unknown.
+    # Since this is random, just skip it, rather than xfailing.
+    "tests/unit/lazyframe/test_serde.py::test_lf_serde_roundtrip_binary": "chrono_tz doesn't have all tzdata symlink names",
+    # The test may segfault with the legacy streaming engine. We should
+    # remove this skip when all polars tests use the new streaming engine.
+    "tests/unit/streaming/test_streaming_group_by.py::test_streaming_group_by_literal[1]": "May segfault w/the legacy streaming engine",
+    # Fails in CI, but passes locally
+    "tests/unit/streaming/test_streaming.py::test_streaming_streamable_functions": "RuntimeError: polars_python::sql::PySQLContext is unsendable, but is being dropped on another thread",
+}
 def pytest_collection_modifyitems(
     session: pytest.Session, config: pytest.Config, items: list[pytest.Item]
 ) -> None:
@@ -182,5 +225,18 @@ def pytest_collection_modifyitems(
         # Don't xfail tests if running without fallback
         return
     for item in items:
-        if item.nodeid in EXPECTED_FAILURES:
-            item.add_marker(pytest.mark.xfail(reason=EXPECTED_FAILURES[item.nodeid]))
+        if item.nodeid in TESTS_TO_SKIP:
+            item.add_marker(pytest.mark.skip(reason=TESTS_TO_SKIP[item.nodeid]))
+        elif item.nodeid in EXPECTED_FAILURES:
+            if isinstance(EXPECTED_FAILURES[item.nodeid], tuple):
+                # the second entry in the tuple is the condition to xfail on
+                item.add_marker(
+                    pytest.mark.xfail(
+                        condition=EXPECTED_FAILURES[item.nodeid][1],
+                        reason=EXPECTED_FAILURES[item.nodeid][0],
+                    ),
+                )
+            else:
+                item.add_marker(
+                    pytest.mark.xfail(reason=EXPECTED_FAILURES[item.nodeid])
+                )

cudf_polars/typing/__init__.py CHANGED Viewed

@@ -21,13 +21,13 @@ if TYPE_CHECKING:
     from cudf_polars.dsl import expr, ir, nodebase
 __all__: list[str] = [
-    "PolarsIR",
-    "PolarsExpr",
-    "NodeTraverser",
-    "OptimizationArgs",
-    "GenericTransformer",
     "ExprTransformer",
+    "GenericTransformer",
     "IRTransformer",
+    "NodeTraverser",
+    "OptimizationArgs",
+    "PolarsExpr",
+    "PolarsIR",
 ]
 PolarsIR: TypeAlias = Union[

cudf_polars/utils/dtypes.py CHANGED Viewed

@@ -19,9 +19,9 @@ from pylibcudf.traits import (
 )
 __all__ = [
-    "from_polars",
-    "downcast_arrow_lists",
     "can_cast",
+    "downcast_arrow_lists",
+    "from_polars",
     "is_order_preserving_cast",
 ]
 import pylibcudf as plc
@@ -75,11 +75,13 @@ def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
     return (
         (
             from_ == to
-            or not has_empty
-            and (
-                plc.traits.is_fixed_width(to)
-                and plc.traits.is_fixed_width(from_)
-                and plc.unary.is_supported_cast(from_, to)
+            or (
+                not has_empty
+                and (
+                    plc.traits.is_fixed_width(to)
+                    and plc.traits.is_fixed_width(from_)
+                    and plc.unary.is_supported_cast(from_, to)
+                )
             )
         )
         or (from_.id() == plc.TypeId.STRING and is_numeric_not_bool(to))

cudf_polars/utils/versions.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 """Version utilities so that cudf_polars supports a range of polars versions."""
@@ -12,14 +12,11 @@ from polars import __version__
 POLARS_VERSION = parse(__version__)
-POLARS_VERSION_LT_111 = POLARS_VERSION < parse("1.11")
-POLARS_VERSION_LT_112 = POLARS_VERSION < parse("1.12")
-POLARS_VERSION_GT_112 = POLARS_VERSION > parse("1.12")
-POLARS_VERSION_LT_113 = POLARS_VERSION < parse("1.13")
+POLARS_VERSION_LT_120 = POLARS_VERSION < parse("1.20")
 def _ensure_polars_version():
-    if POLARS_VERSION_LT_111:
+    if POLARS_VERSION_LT_120:
         raise ImportError(
-            "cudf_polars requires py-polars v1.11 or greater."
+            "cudf_polars requires py-polars v1.20 or greater."
         )  # pragma: no cover

{cudf_polars_cu12-24.12.0.dist-info → cudf_polars_cu12-25.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: cudf-polars-cu12
-Version: 24.12.0
+Version: 25.2.0
 Summary: Executor for polars using cudf
 Author: NVIDIA Corporation
 License: Apache 2.0
@@ -16,15 +16,15 @@ Classifier: Programming Language :: Python :: 3.12
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: polars<1.15,>=1.11
-Requires-Dist: pylibcudf-cu12==24.12.*
+Requires-Dist: polars<1.22,>=1.20
+Requires-Dist: pylibcudf-cu12==25.2.*
 Provides-Extra: test
 Requires-Dist: numpy<3.0a0,>=1.23; extra == "test"
 Requires-Dist: pytest-cov; extra == "test"
 Requires-Dist: pytest-xdist; extra == "test"
 Requires-Dist: pytest<8; extra == "test"
 Provides-Extra: experimental
-Requires-Dist: rapids-dask-dependency==24.12.*; extra == "experimental"
+Requires-Dist: rapids-dask-dependency==25.2.*; extra == "experimental"
 # <div align="left"><img src="img/rapids_logo.png" width="90px"/>&nbsp;cuDF - GPU DataFrames</div>
@@ -111,7 +111,7 @@ cuDF can be installed with conda (via [miniforge](https://github.com/conda-forge
 ```bash
 conda install -c rapidsai -c conda-forge -c nvidia \
-    cudf=24.12 python=3.12 cuda-version=12.5
+    cudf=25.02 python=3.12 cuda-version=12.8
 ```
 We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD

cudf_polars_cu12-25.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,48 @@
+cudf_polars/VERSION,sha256=gWnOsR7j8lHNsXJO_balY3FJzbDTto6xlQk1ItvppEY,8
+cudf_polars/__init__.py,sha256=fSTx5nmqajdwp7qvP4PnYL6wZN9-k1fKB43NkcZlHwk,740
+cudf_polars/_version.py,sha256=kj5Ir4dxZRR-k2k8mWUDJHiGpE8_ZcTNzt_kMZxcFRA,528
+cudf_polars/callback.py,sha256=u8CyAFgb9f8fL4eOBqGRQWb0BuoX6tIDaig4gJeAlLw,9023
+cudf_polars/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+cudf_polars/containers/__init__.py,sha256=He8qCk9m37k7KOWC6s4QuL8LYF11KhP2vluH9s8EeYI,339
+cudf_polars/containers/column.py,sha256=O4Sjnjs2gUoo50oaL2OZ45BFePjG1oo3yO163KIKY0E,8371
+cudf_polars/containers/dataframe.py,sha256=Szzv4jc8cxux5MVII9mmEFwZdYn6jCNcVJ9EKSNLpb4,10637
+cudf_polars/dsl/__init__.py,sha256=bYwYnqmqINMgwnkJ22EnXMlHviLolPaMgQ8QqoZL3YE,244
+cudf_polars/dsl/expr.py,sha256=L6nmP4zsBITrzvTfR_QvO1NLnbTMlB-KUeFvXbyG26A,1795
+cudf_polars/dsl/ir.py,sha256=6o7VLhfnNeP50YQzL-Jf9qxa55isjb6zmY2Fyntd2ik,64874
+cudf_polars/dsl/nodebase.py,sha256=_ffrQJcsE0_Nwa6K8LG0N5DmgHHEFBnTE1oz8Ugx4N4,4352
+cudf_polars/dsl/to_ast.py,sha256=q_lLko1AI9fYStzqEzASDqlRZrjBnoMfNu6MHAaI3d0,12309
+cudf_polars/dsl/translate.py,sha256=Ex5uVL1sVpDAZrqfrC_xUVnR5SQ51n5v9-KnL5Q7pdU,24915
+cudf_polars/dsl/traversal.py,sha256=dVH37YoLx-wo61lUofaOM77eji99yIaC62PSb3QYKHM,4610
+cudf_polars/dsl/expressions/__init__.py,sha256=uj1a4BzrDVAjOgP6FKKtnvR5elF4ksUj1RhkLTZoS1k,224
+cudf_polars/dsl/expressions/aggregation.py,sha256=BdKCdKj4DNxkYjmRNLER-7zYCo-PfXVY3ZFqPPoPeNM,8722
+cudf_polars/dsl/expressions/base.py,sha256=a7GRXNRUeheR4haHLGtgC8j796D9p8yolF5tKKxT5do,9038
+cudf_polars/dsl/expressions/binaryop.py,sha256=EU3YF0d5OQMMwIuZVsQgtHaWyJ1Qc09HBLuSpMma_xg,5640
+cudf_polars/dsl/expressions/boolean.py,sha256=GCJhinhgGpKKO1RsBAWEwFdyQiFIvDDrWsnSMSoAEWU,11756
+cudf_polars/dsl/expressions/datetime.py,sha256=H6Nmz_cHFthzlb8bpD5gDJSMv0BWcdTvPm-OTcXTPoQ,6861
+cudf_polars/dsl/expressions/literal.py,sha256=BTNamYPm6fHxuMlH7Knr7pCWbqYZURf6DmDRZHPeklc,2864
+cudf_polars/dsl/expressions/rolling.py,sha256=zeGjO-FqnQF7buxRrWdtuJe9x_CpCPjUhL6nJtk8jgU,1177
+cudf_polars/dsl/expressions/selection.py,sha256=OK3dT9aP_-hpHzKhjVbPQ-uUe7YCcXelpMUZP-vxNOk,2900
+cudf_polars/dsl/expressions/sorting.py,sha256=djIyJ_FXeJOSKGRIF7uKwa9uefgCVwLMaf4pQNMraUU,3000
+cudf_polars/dsl/expressions/string.py,sha256=px-6_J46XXuS3JcM3zvV3O9QQCGhDy5t8ntkQl4TJeg,14220
+cudf_polars/dsl/expressions/ternary.py,sha256=iF7g9XvZNXwNqFDjgKMGR-uWat6G3vJEZudesnQ4KUs,1499
+cudf_polars/dsl/expressions/unary.py,sha256=H24itZGMoDv63y57pBSsnCPOJ5IrEuZxM5gNNQtfwh4,12788
+cudf_polars/experimental/__init__.py,sha256=S2oI2K__woyPQAAlFMOo6RTMtdfIZxmzzAO92VtJgP4,256
+cudf_polars/experimental/base.py,sha256=mW8A3DBK4S7VpD7k4UjGynWp6kDmV8uWQvj0EsWYQhg,1149
+cudf_polars/experimental/dask_serialize.py,sha256=9-qTd04I9nRA-ijyv8daGLjXo5W6YbVnY8Z1ugyinCY,2029
+cudf_polars/experimental/dispatch.py,sha256=pw6KyC_irRcQgKb8S0o1KKvRHQPFOikyv8kfMQtgxjc,2094
+cudf_polars/experimental/io.py,sha256=ejOPjn5WEK7KiPoo4N_KRMTT82qROhFi12wRpKt9Bw8,11431
+cudf_polars/experimental/parallel.py,sha256=M6vECqlpaOVpbm_MSw84EUxC7TgwO8YHrfEwzZgF-Bw,7981
+cudf_polars/experimental/select.py,sha256=TMU-7DKv8e-F0Crqn811HDiBAHzyZxE68kTnYmuZYjk,1196
+cudf_polars/testing/__init__.py,sha256=0MnlTjkTEqkSpL5GdMhQf4uXOaQwNrzgEJCZKa5FnL4,219
+cudf_polars/testing/asserts.py,sha256=-D9ZNojBZk75Dz8c8IZUMF_sPOJc9bIsX2cybqX2zK8,7883
+cudf_polars/testing/plugin.py,sha256=a6gxdQoC1Hgx1wSjm_nrwvs5W4AUdhpaH0qC8Of8jW4,24557
+cudf_polars/typing/__init__.py,sha256=7A2xD8w62xG-jrx9ujxWkQnEGTt8WN-s_S_VT8pNP7M,3555
+cudf_polars/utils/__init__.py,sha256=urdV5MUIneU8Dn6pt1db5GkDG0oY4NsFD0Uhl3j98l8,195
+cudf_polars/utils/dtypes.py,sha256=6ygA_y-wTdv0WelNnWGZDnzgA2GKHfkhQ_hfP-jr8mk,6570
+cudf_polars/utils/sorting.py,sha256=Mqb_KLsYnKU8p1dDan2mtlIQl65RqwM78OlUi-_Jj0k,1725
+cudf_polars/utils/versions.py,sha256=4txZKAD5Ql-fEeW9HAomHgePOnNN1pj1e-XXPE0HSAQ,605
+cudf_polars_cu12-25.2.0.dist-info/LICENSE,sha256=4YCpjWCbYMkMQFW47JXsorZLOaP957HwmP6oHW2_ngM,11348
+cudf_polars_cu12-25.2.0.dist-info/METADATA,sha256=_NnRcF4Gzeq_9vIMkV2VUW1MNh8krORxnyNKCxbR-TU,4545
+cudf_polars_cu12-25.2.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+cudf_polars_cu12-25.2.0.dist-info/top_level.txt,sha256=w2bOa7MpuyapYgZh480Znh4UzX7rSWlFcYR1Yo6QIPs,12
+cudf_polars_cu12-25.2.0.dist-info/RECORD,,

{cudf_polars_cu12-24.12.0.dist-info → cudf_polars_cu12-25.2.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.6.0)
+Generator: setuptools (75.8.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

cudf-polars-cu12 24.12.0__py3-none-any.whl → 25.2.0__py3-none-any.whl

cudf-polars-cu12 24.12.0py3-none-any.whl → 25.2.0py3-none-any.whl