PyPI - cudf-polars-cu13 - Versions diffs - 25.10.0__py3-none-any.whl - Mend

cudf-polars-cu13 25.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

cudf_polars/GIT_COMMIT +1 -0
cudf_polars/VERSION +1 -0
cudf_polars/__init__.py +28 -0
cudf_polars/_version.py +21 -0
cudf_polars/callback.py +318 -0
cudf_polars/containers/__init__.py +13 -0
cudf_polars/containers/column.py +495 -0
cudf_polars/containers/dataframe.py +361 -0
cudf_polars/containers/datatype.py +137 -0
cudf_polars/dsl/__init__.py +8 -0
cudf_polars/dsl/expr.py +66 -0
cudf_polars/dsl/expressions/__init__.py +8 -0
cudf_polars/dsl/expressions/aggregation.py +226 -0
cudf_polars/dsl/expressions/base.py +272 -0
cudf_polars/dsl/expressions/binaryop.py +120 -0
cudf_polars/dsl/expressions/boolean.py +326 -0
cudf_polars/dsl/expressions/datetime.py +271 -0
cudf_polars/dsl/expressions/literal.py +97 -0
cudf_polars/dsl/expressions/rolling.py +643 -0
cudf_polars/dsl/expressions/selection.py +74 -0
cudf_polars/dsl/expressions/slicing.py +46 -0
cudf_polars/dsl/expressions/sorting.py +85 -0
cudf_polars/dsl/expressions/string.py +1002 -0
cudf_polars/dsl/expressions/struct.py +137 -0
cudf_polars/dsl/expressions/ternary.py +49 -0
cudf_polars/dsl/expressions/unary.py +517 -0
cudf_polars/dsl/ir.py +2607 -0
cudf_polars/dsl/nodebase.py +164 -0
cudf_polars/dsl/to_ast.py +359 -0
cudf_polars/dsl/tracing.py +16 -0
cudf_polars/dsl/translate.py +939 -0
cudf_polars/dsl/traversal.py +224 -0
cudf_polars/dsl/utils/__init__.py +8 -0
cudf_polars/dsl/utils/aggregations.py +481 -0
cudf_polars/dsl/utils/groupby.py +98 -0
cudf_polars/dsl/utils/naming.py +34 -0
cudf_polars/dsl/utils/replace.py +61 -0
cudf_polars/dsl/utils/reshape.py +74 -0
cudf_polars/dsl/utils/rolling.py +121 -0
cudf_polars/dsl/utils/windows.py +192 -0
cudf_polars/experimental/__init__.py +8 -0
cudf_polars/experimental/base.py +386 -0
cudf_polars/experimental/benchmarks/__init__.py +4 -0
cudf_polars/experimental/benchmarks/pdsds.py +220 -0
cudf_polars/experimental/benchmarks/pdsds_queries/__init__.py +4 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q1.py +88 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q10.py +225 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q2.py +244 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q3.py +65 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q4.py +359 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q5.py +462 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q6.py +92 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q7.py +79 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q8.py +524 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q9.py +137 -0
cudf_polars/experimental/benchmarks/pdsh.py +814 -0
cudf_polars/experimental/benchmarks/utils.py +832 -0
cudf_polars/experimental/dask_registers.py +200 -0
cudf_polars/experimental/dispatch.py +156 -0
cudf_polars/experimental/distinct.py +197 -0
cudf_polars/experimental/explain.py +157 -0
cudf_polars/experimental/expressions.py +590 -0
cudf_polars/experimental/groupby.py +327 -0
cudf_polars/experimental/io.py +943 -0
cudf_polars/experimental/join.py +391 -0
cudf_polars/experimental/parallel.py +423 -0
cudf_polars/experimental/repartition.py +69 -0
cudf_polars/experimental/scheduler.py +155 -0
cudf_polars/experimental/select.py +188 -0
cudf_polars/experimental/shuffle.py +354 -0
cudf_polars/experimental/sort.py +609 -0
cudf_polars/experimental/spilling.py +151 -0
cudf_polars/experimental/statistics.py +795 -0
cudf_polars/experimental/utils.py +169 -0
cudf_polars/py.typed +0 -0
cudf_polars/testing/__init__.py +8 -0
cudf_polars/testing/asserts.py +448 -0
cudf_polars/testing/io.py +122 -0
cudf_polars/testing/plugin.py +236 -0
cudf_polars/typing/__init__.py +219 -0
cudf_polars/utils/__init__.py +8 -0
cudf_polars/utils/config.py +741 -0
cudf_polars/utils/conversion.py +40 -0
cudf_polars/utils/dtypes.py +118 -0
cudf_polars/utils/sorting.py +53 -0
cudf_polars/utils/timer.py +39 -0
cudf_polars/utils/versions.py +27 -0
cudf_polars_cu13-25.10.0.dist-info/METADATA +136 -0
cudf_polars_cu13-25.10.0.dist-info/RECORD +92 -0
cudf_polars_cu13-25.10.0.dist-info/WHEEL +5 -0
cudf_polars_cu13-25.10.0.dist-info/licenses/LICENSE +201 -0
cudf_polars_cu13-25.10.0.dist-info/top_level.txt +1 -0

cudf_polars/experimental/parallel.py ADDED Viewed

@@ -0,0 +1,423 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+"""Multi-partition evaluation."""
+from __future__ import annotations
+import itertools
+import operator
+from functools import partial, reduce
+from typing import TYPE_CHECKING, Any
+import cudf_polars.experimental.distinct
+import cudf_polars.experimental.groupby
+import cudf_polars.experimental.io
+import cudf_polars.experimental.join
+import cudf_polars.experimental.select
+import cudf_polars.experimental.shuffle
+import cudf_polars.experimental.sort  # noqa: F401
+from cudf_polars.dsl.ir import (
+    IR,
+    Cache,
+    Filter,
+    HConcat,
+    HStack,
+    MapFunction,
+    Projection,
+    Slice,
+    Union,
+)
+from cudf_polars.dsl.traversal import CachingVisitor, traversal
+from cudf_polars.experimental.base import PartitionInfo, get_key_name
+from cudf_polars.experimental.dispatch import (
+    generate_ir_tasks,
+    lower_ir_node,
+)
+from cudf_polars.experimental.io import _clear_source_info_cache
+from cudf_polars.experimental.repartition import Repartition
+from cudf_polars.experimental.statistics import collect_statistics
+from cudf_polars.experimental.utils import _concat, _contains_over, _lower_ir_fallback
+if TYPE_CHECKING:
+    from collections.abc import MutableMapping
+    from typing import Any
+    from cudf_polars.containers import DataFrame
+    from cudf_polars.experimental.dispatch import LowerIRTransformer, State
+    from cudf_polars.utils.config import ConfigOptions
+@lower_ir_node.register(IR)
+def _(
+    ir: IR, rec: LowerIRTransformer
+) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:  # pragma: no cover
+    # Default logic - Requires single partition
+    return _lower_ir_fallback(
+        ir, rec, msg=f"Class {type(ir)} does not support multiple partitions."
+    )
+def lower_ir_graph(
+    ir: IR, config_options: ConfigOptions
+) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
+    """
+    Rewrite an IR graph and extract partitioning information.
+    Parameters
+    ----------
+    ir
+        Root of the graph to rewrite.
+    config_options
+        GPUEngine configuration options.
+    Returns
+    -------
+    new_ir, partition_info
+        The rewritten graph, and a mapping from unique nodes
+        in the new graph to associated partitioning information.
+    Notes
+    -----
+    This function traverses the unique nodes of the graph with
+    root `ir`, and applies :func:`lower_ir_node` to each node.
+    See Also
+    --------
+    lower_ir_node
+    """
+    state: State = {
+        "config_options": config_options,
+        "stats": collect_statistics(ir, config_options),
+    }
+    mapper: LowerIRTransformer = CachingVisitor(lower_ir_node, state=state)
+    return mapper(ir)
+def task_graph(
+    ir: IR,
+    partition_info: MutableMapping[IR, PartitionInfo],
+    config_options: ConfigOptions,
+) -> tuple[MutableMapping[Any, Any], str | tuple[str, int]]:
+    """
+    Construct a task graph for evaluation of an IR graph.
+    Parameters
+    ----------
+    ir
+        Root of the graph to rewrite.
+    partition_info
+        A mapping from all unique IR nodes to the
+        associated partitioning information.
+    config_options
+        GPUEngine configuration options.
+    Returns
+    -------
+    graph
+        A Dask-compatible task graph for the entire
+        IR graph with root `ir`.
+    Notes
+    -----
+    This function traverses the unique nodes of the
+    graph with root `ir`, and extracts the tasks for
+    each node with :func:`generate_ir_tasks`.
+    The output is passed into :func:`post_process_task_graph` to
+    add any additional processing that is specific to the executor.
+    See Also
+    --------
+    generate_ir_tasks
+    """
+    graph = reduce(
+        operator.or_,
+        (generate_ir_tasks(node, partition_info) for node in traversal([ir])),
+    )
+    key_name = get_key_name(ir)
+    partition_count = partition_info[ir].count
+    key: str | tuple[str, int]
+    if partition_count > 1:
+        graph[key_name] = (_concat, *partition_info[ir].keys(ir))
+        key = key_name
+    else:
+        key = (key_name, 0)
+    graph = post_process_task_graph(graph, key, config_options)
+    return graph, key
+# The true type signature for get_scheduler() needs an overload. Not worth it.
+def get_scheduler(config_options: ConfigOptions) -> Any:
+    """Get appropriate task scheduler."""
+    assert config_options.executor.name == "streaming", (
+        "'in-memory' executor not supported in 'generate_ir_tasks'"
+    )
+    scheduler = config_options.executor.scheduler
+    if (
+        scheduler == "distributed"
+    ):  # pragma: no cover; block depends on executor type and Distributed cluster
+        from distributed import get_client
+        from cudf_polars.experimental.dask_registers import DaskRegisterManager
+        client = get_client()
+        DaskRegisterManager.register_once()
+        DaskRegisterManager.run_on_cluster(client)
+        return client.get
+    elif scheduler == "synchronous":
+        from cudf_polars.experimental.scheduler import synchronous_scheduler
+        return synchronous_scheduler
+    else:  # pragma: no cover
+        raise ValueError(f"{scheduler} not a supported scheduler option.")
+def post_process_task_graph(
+    graph: MutableMapping[Any, Any],
+    key: str | tuple[str, int],
+    config_options: ConfigOptions,
+) -> MutableMapping[Any, Any]:
+    """
+    Post-process the task graph.
+    Parameters
+    ----------
+    graph
+        Task graph to post-process.
+    key
+        Output key for the graph.
+    config_options
+        GPUEngine configuration options.
+    Returns
+    -------
+    graph
+        A Dask-compatible task graph.
+    """
+    assert config_options.executor.name == "streaming", (
+        "'in-memory' executor not supported in 'post_process_task_graph'"
+    )
+    if config_options.executor.rapidsmpf_spill:  # pragma: no cover
+        from cudf_polars.experimental.spilling import wrap_dataframe_in_spillable
+        return wrap_dataframe_in_spillable(
+            graph, ignore_key=key, config_options=config_options
+        )
+    return graph
+def evaluate_streaming(
+    ir: IR,
+    config_options: ConfigOptions,
+) -> DataFrame:
+    """
+    Evaluate an IR graph with partitioning.
+    Parameters
+    ----------
+    ir
+        Logical plan to evaluate.
+    config_options
+        GPUEngine configuration options.
+    Returns
+    -------
+    A cudf-polars DataFrame object.
+    """
+    # Clear source info cache in case data was overwritten
+    _clear_source_info_cache()
+    ir, partition_info = lower_ir_graph(ir, config_options)
+    graph, key = task_graph(ir, partition_info, config_options)
+    return get_scheduler(config_options)(graph, key)
+@generate_ir_tasks.register(IR)
+def _(
+    ir: IR, partition_info: MutableMapping[IR, PartitionInfo]
+) -> MutableMapping[Any, Any]:
+    # Generate pointwise (embarrassingly-parallel) tasks by default
+    child_names = [get_key_name(c) for c in ir.children]
+    bcast_child = [partition_info[c].count == 1 for c in ir.children]
+    return {
+        key: (
+            ir.do_evaluate,
+            *ir._non_child_args,
+            *[
+                (child_name, 0 if bcast_child[j] else i)
+                for j, child_name in enumerate(child_names)
+            ],
+        )
+        for i, key in enumerate(partition_info[ir].keys(ir))
+    }
+@lower_ir_node.register(Union)
+def _(
+    ir: Union, rec: LowerIRTransformer
+) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
+    # Check zlice
+    if ir.zlice is not None:
+        return rec(
+            Slice(
+                ir.schema,
+                *ir.zlice,
+                Union(ir.schema, None, *ir.children),
+            )
+        )
+    # Lower children
+    children, _partition_info = zip(*(rec(c) for c in ir.children), strict=True)
+    partition_info = reduce(operator.or_, _partition_info)
+    # Partition count is the sum of all child partitions
+    count = sum(partition_info[c].count for c in children)
+    # Return reconstructed node and partition-info dict
+    new_node = ir.reconstruct(children)
+    partition_info[new_node] = PartitionInfo(count=count)
+    return new_node, partition_info
+@generate_ir_tasks.register(Union)
+def _(
+    ir: Union, partition_info: MutableMapping[IR, PartitionInfo]
+) -> MutableMapping[Any, Any]:
+    key_name = get_key_name(ir)
+    partition = itertools.count()
+    return {
+        (key_name, next(partition)): child_key
+        for child in ir.children
+        for child_key in partition_info[child].keys(child)
+    }
+@lower_ir_node.register(MapFunction)
+def _(
+    ir: MapFunction, rec: LowerIRTransformer
+) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
+    # Allow pointwise operations
+    if ir.name in ("rename", "explode"):
+        return _lower_ir_pwise(ir, rec)
+    # Fallback for everything else
+    return _lower_ir_fallback(
+        ir, rec, msg=f"{ir.name} is not supported for multiple partitions."
+    )
+def _lower_ir_pwise(
+    ir: IR, rec: LowerIRTransformer, *, preserve_partitioning: bool = False
+) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
+    # Lower a partition-wise (i.e. embarrassingly-parallel) IR node
+    # Lower children
+    children, _partition_info = zip(*(rec(c) for c in ir.children), strict=True)
+    partition_info = reduce(operator.or_, _partition_info)
+    counts = {partition_info[c].count for c in children}
+    # Check that child partitioning is supported
+    if len(counts) > 1:  # pragma: no cover
+        return _lower_ir_fallback(
+            ir,
+            rec,
+            msg=f"Class {type(ir)} does not support children with mismatched partition counts.",
+        )
+    # Preserve child partition_info if possible
+    if preserve_partitioning and len(children) == 1:
+        partition = partition_info[children[0]]
+    else:
+        partition = PartitionInfo(count=max(counts))
+    # Return reconstructed node and partition-info dict
+    new_node = ir.reconstruct(children)
+    partition_info[new_node] = partition
+    return new_node, partition_info
+_lower_ir_pwise_preserve = partial(_lower_ir_pwise, preserve_partitioning=True)
+lower_ir_node.register(Projection, _lower_ir_pwise_preserve)
+lower_ir_node.register(Cache, _lower_ir_pwise)
+lower_ir_node.register(HConcat, _lower_ir_pwise)
+@lower_ir_node.register(Filter)
+def _(
+    ir: Filter, rec: LowerIRTransformer
+) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
+    child, partition_info = rec(ir.children[0])
+    if partition_info[child].count > 1 and _contains_over([ir.mask.value]):
+        # mask contains .over(...), collapse to single partition
+        return _lower_ir_fallback(
+            ir.reconstruct([child]),
+            rec,
+            msg=(
+                "over(...) inside filter is not supported for multiple partitions; "
+                "falling back to in-memory evaluation."
+            ),
+        )
+    if partition_info[child].count > 1 and not all(
+        expr.is_pointwise for expr in traversal([ir.mask.value])
+    ):
+        # TODO: Use expression decomposition to lower Filter
+        # See: https://github.com/rapidsai/cudf/issues/20076
+        return _lower_ir_fallback(
+            ir, rec, msg="This filter is not supported for multiple partitions."
+        )
+    new_node = ir.reconstruct([child])
+    partition_info[new_node] = partition_info[child]
+    return new_node, partition_info
+@lower_ir_node.register(Slice)
+def _(
+    ir: Slice, rec: LowerIRTransformer
+) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
+    if ir.offset == 0:
+        # Taking the first N rows.
+        # We don't know how large each partition is, so we reduce.
+        new_node, partition_info = _lower_ir_pwise(ir, rec)
+        if partition_info[new_node].count > 1:
+            # Collapse down to single partition
+            inter = Repartition(new_node.schema, new_node)
+            partition_info[inter] = PartitionInfo(count=1)
+            # Slice reduced partition
+            new_node = ir.reconstruct([inter])
+            partition_info[new_node] = PartitionInfo(count=1)
+        return new_node, partition_info
+    # Fallback
+    return _lower_ir_fallback(
+        ir, rec, msg="This slice not supported for multiple partitions."
+    )
+@lower_ir_node.register(HStack)
+def _(
+    ir: HStack, rec: LowerIRTransformer
+) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
+    if not all(expr.is_pointwise for expr in traversal([e.value for e in ir.columns])):
+        # TODO: Avoid fallback if/when possible
+        return _lower_ir_fallback(
+            ir, rec, msg="This HStack not supported for multiple partitions."
+        )
+    child, partition_info = rec(ir.children[0])
+    new_node = ir.reconstruct([child])
+    partition_info[new_node] = partition_info[child]
+    return new_node, partition_info

cudf_polars/experimental/repartition.py ADDED Viewed

@@ -0,0 +1,69 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+"""Repartitioning Logic."""
+from __future__ import annotations
+import itertools
+from typing import TYPE_CHECKING, Any
+from cudf_polars.dsl.ir import IR
+from cudf_polars.experimental.base import get_key_name
+from cudf_polars.experimental.dispatch import generate_ir_tasks
+from cudf_polars.experimental.utils import _concat
+if TYPE_CHECKING:
+    from collections.abc import MutableMapping
+    from cudf_polars.experimental.parallel import PartitionInfo
+    from cudf_polars.typing import Schema
+class Repartition(IR):
+    """
+    Repartition a DataFrame.
+    Notes
+    -----
+    Repartitioning means that we are not modifying any
+    data, nor are we reordering or shuffling rows. We
+    are only changing the overall partition count. For
+    now, we only support an N -> [1...N] repartitioning
+    (inclusive). The output partition count is tracked
+    separately using PartitionInfo.
+    """
+    __slots__ = ()
+    _non_child = ("schema",)
+    def __init__(self, schema: Schema, df: IR):
+        self.schema = schema
+        self._non_child_args = ()
+        self.children = (df,)
+@generate_ir_tasks.register(Repartition)
+def _(
+    ir: Repartition, partition_info: MutableMapping[IR, PartitionInfo]
+) -> MutableMapping[Any, Any]:
+    # Repartition an IR node.
+    # Only supports rapartitioning to fewer (for now).
+    (child,) = ir.children
+    count_in = partition_info[child].count
+    count_out = partition_info[ir].count
+    if count_out > count_in:  # pragma: no cover
+        raise NotImplementedError(
+            f"Repartition {count_in} -> {count_out} not supported."
+        )
+    key_name = get_key_name(ir)
+    n, remainder = divmod(count_in, count_out)
+    # Spread remainder evenly over the partitions.
+    offsets = [0, *itertools.accumulate(n + (i < remainder) for i in range(count_out))]
+    child_keys = tuple(partition_info[child].keys(child))
+    return {
+        (key_name, i): (_concat, *child_keys[offsets[i] : offsets[i + 1]])
+        for i in range(count_out)
+    }

cudf_polars/experimental/scheduler.py ADDED Viewed

@@ -0,0 +1,155 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+"""Synchronous task scheduler."""
+from __future__ import annotations
+from collections import Counter
+from collections.abc import MutableMapping
+from itertools import chain
+from typing import TYPE_CHECKING, Any, TypeVar
+from typing_extensions import Unpack
+if TYPE_CHECKING:
+    from collections.abc import Mapping
+    from typing import TypeAlias
+Key: TypeAlias = str | tuple[str, Unpack[tuple[int, ...]]]
+Graph: TypeAlias = MutableMapping[Key, Any]
+T_ = TypeVar("T_")
+# NOTE: This is a slimmed-down version of the single-threaded
+# (synchronous) scheduler in `dask.core`.
+#
+# Key Differences:
+# * We do not allow a task to contain a list of key names.
+#   Keys must be distinct elements of the task.
+# * We do not support nested tasks.
+def istask(x: Any) -> bool:
+    """Check if x is a callable task."""
+    return isinstance(x, tuple) and bool(x) and callable(x[0])
+def is_hashable(x: Any) -> bool:
+    """Check if x is hashable."""
+    try:
+        hash(x)
+    except BaseException:
+        return False
+    else:
+        return True
+def _execute_task(arg: Any, cache: Mapping) -> Any:
+    """Execute a compute task."""
+    if istask(arg):
+        return arg[0](*(_execute_task(a, cache) for a in arg[1:]))
+    elif is_hashable(arg):
+        return cache.get(arg, arg)
+    else:
+        return arg
+def required_keys(key: Key, graph: Graph) -> list[Key]:
+    """
+    Return the dependencies to extract a key from the graph.
+    Parameters
+    ----------
+    key
+        Root key we want to extract.
+    graph
+        The full task graph.
+    Returns
+    -------
+    List of other keys needed to extract ``key``.
+    """
+    maybe_task = graph[key]
+    return [
+        k
+        for k in (
+            maybe_task[1:]
+            if istask(maybe_task)
+            else [maybe_task]  # maybe_task might be a key
+        )
+        if is_hashable(k) and k in graph
+    ]
+def toposort(graph: Graph, dependencies: Mapping[Key, list[Key]]) -> list[Key]:
+    """Return a list of task keys sorted in topological order."""
+    # Stack-based depth-first search traversal. This is based on Tarjan's
+    # algorithm for strongly-connected components
+    # (https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm)
+    ordered: list[Key] = []
+    completed: set[Key] = set()
+    for key in graph:
+        if key in completed:
+            continue
+        nodes = [key]
+        while nodes:
+            # Keep current node on the stack until all descendants are visited
+            current = nodes[-1]
+            if current in completed:  # pragma: no cover
+                # Already fully traversed descendants of current
+                nodes.pop()
+                continue
+            # Add direct descendants of current to nodes stack
+            next_nodes = set(dependencies[current]) - completed
+            if next_nodes:
+                nodes.extend(next_nodes)
+            else:
+                # Current has no more descendants to explore
+                ordered.append(current)
+                completed.add(current)
+                nodes.pop()
+    return ordered
+def synchronous_scheduler(
+    graph: Graph,
+    key: Key,
+    *,
+    cache: MutableMapping | None = None,
+) -> Any:
+    """
+    Execute the task graph for a given key.
+    Parameters
+    ----------
+    graph
+        The task graph to execute.
+    key
+        The final output key to extract from the graph.
+    cache
+        Intermediate-data cache.
+    Returns
+    -------
+    Executed task-graph result for ``key``.
+    """
+    if key not in graph:  # pragma: no cover
+        raise KeyError(f"{key} is not a key in the graph")
+    if cache is None:
+        cache = {}
+    dependencies = {k: required_keys(k, graph) for k in graph}
+    refcount = Counter(chain.from_iterable(dependencies.values()))
+    for k in toposort(graph, dependencies):
+        cache[k] = _execute_task(graph[k], cache)
+        for dep in dependencies[k]:
+            refcount[dep] -= 1
+            if refcount[dep] == 0 and dep != key:
+                del cache[dep]
+    return cache[key]