PyPI - lsst-pipe-base - Versions diffs - 29.2025.1000__py3-none-any.whl → 29.2025.1200__py3-none-any.whl - Mend

lsst-pipe-base 29.2025.1000py3-none-any.whl → 29.2025.1200py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

lsst/pipe/base/connections.py CHANGED Viewed

@@ -35,6 +35,7 @@ __all__ = [
     "InputQuantizedConnection",
     "OutputQuantizedConnection",
     "PipelineTaskConnections",
+    "QuantaAdjuster",
     "QuantizedConnection",
     "ScalarError",
     "ScalarError",
@@ -45,8 +46,8 @@ import dataclasses
 import itertools
 import string
 import warnings
-from collections import UserDict
-from collections.abc import Collection, Generator, Iterable, Mapping, Sequence, Set
+from collections import UserDict, defaultdict
+from collections.abc import Collection, Generator, Iterable, Iterator, Mapping, Sequence, Set
 from dataclasses import dataclass
 from types import MappingProxyType, SimpleNamespace
 from typing import TYPE_CHECKING, Any
@@ -58,6 +59,8 @@ from .connectionTypes import BaseConnection, BaseInput, Output, PrerequisiteInpu
 if TYPE_CHECKING:
     from .config import PipelineTaskConfig
+    from .pipeline_graph import PipelineGraph, TaskNode
+    from .quantum_graph_skeleton import QuantumGraphSkeleton
 class ScalarError(TypeError):
@@ -999,6 +1002,25 @@ class PipelineTaskConnections(metaclass=PipelineTaskConnectionsMetaclass):
         """
         return ()
+    def adjust_all_quanta(self, adjuster: QuantaAdjuster) -> None:
+        """Customize the set of quanta predicted for this task during quantum
+        graph generation.
+        Parameters
+        ----------
+        adjuster : `QuantaAdjuster`
+            A helper object that implementations can use to modify the
+            under-construction quantum graph.
+        Notes
+        -----
+        This hook is called before `adjustQuantum`, which is where built-in
+        checks for `NoWorkFound` cases and missing prerequisites are handled.
+        This means that the set of preliminary quanta seen by this method could
+        include some that would normally be dropped later.
+        """
+        pass
 def iterConnections(
     connections: PipelineTaskConnections, connectionType: str | Iterable[str]
@@ -1130,3 +1152,158 @@ class AdjustQuantumHelper:
             self.outputs_adjusted = True
         else:
             self.outputs_adjusted = False
+class QuantaAdjuster:
+    """A helper class for the `PipelineTaskConnections.adjust_all_quanta` hook.
+    Parameters
+    ----------
+    task_label : `str`
+        Label of the task whose quanta are being adjusted.
+    pipeline_graph : `pipeline_graph.PipelineGraph`
+        Pipeline graph the quantum graph is being built from.
+    skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton`
+        Under-construction quantum graph that will be modified in place.
+    """
+    def __init__(self, task_label: str, pipeline_graph: PipelineGraph, skeleton: QuantumGraphSkeleton):
+        self._task_node = pipeline_graph.tasks[task_label]
+        self._pipeline_graph = pipeline_graph
+        self._skeleton = skeleton
+        self._n_removed = 0
+    @property
+    def task_label(self) -> str:
+        """The label this task has been configured with."""
+        return self._task_node.label
+    @property
+    def task_node(self) -> TaskNode:
+        """The node for this task in the pipeline graph."""
+        return self._task_node
+    def iter_data_ids(self) -> Iterator[DataCoordinate]:
+        """Iterate over the data IDs of all quanta for this task."
+        Returns
+        -------
+        data_ids : `~collections.abc.Iterator` [ \
+                `~lsst.daf.butler.DataCoordinate` ]
+            Data IDs.  These are minimal data IDs without dimension records or
+            implied values; use `expand_quantum_data_id` to get a full data ID
+            when needed.
+        """
+        for key in self._skeleton.get_quanta(self._task_node.label):
+            yield DataCoordinate.from_required_values(self._task_node.dimensions, key.data_id_values)
+    def remove_quantum(self, data_id: DataCoordinate) -> None:
+        """Remove a quantum from the graph.
+        Parameters
+        ----------
+        data_id : `~lsst.daf.butler.DataCoordinate`
+            Data ID of the quantum to remove.  All outputs will be removed as
+            well.
+        """
+        from .quantum_graph_skeleton import QuantumKey
+        self._skeleton.remove_quantum_node(
+            QuantumKey(self._task_node.label, data_id.required_values), remove_outputs=True
+        )
+        self._n_removed += 1
+    def get_inputs(self, quantum_data_id: DataCoordinate) -> dict[str, list[DataCoordinate]]:
+        """Return the data IDs of all regular inputs to a quantum.
+        Parameters
+        ----------
+        data_id : `~lsst.daf.butler.DataCoordinate`
+            Data ID of the quantum to get the inputs of.
+        Returns
+        -------
+        inputs : `dict` [ `str`, `list` [ `~lsst.daf.butler.DataCoordinate` ] ]
+            Data IDs of inputs, keyed by the connection name (the internal task
+            name, not the dataset type name).  This only contains regular
+            inputs, not init-inputs or prerequisite inputs.
+        Notes
+        -----
+        If two connections have the same dataset type, the current
+        implementation assumes the set of datasets is the same for the two
+        connections.  This limitation may be removed in the future.
+        """
+        from .quantum_graph_skeleton import DatasetKey, QuantumKey
+        by_dataset_type_name: defaultdict[str, list[DataCoordinate]] = defaultdict(list)
+        quantum_key = QuantumKey(self._task_node.label, quantum_data_id.required_values)
+        for dataset_key in self._skeleton.iter_inputs_of(quantum_key):
+            if not isinstance(dataset_key, DatasetKey):
+                continue
+            dataset_type_node = self._pipeline_graph.dataset_types[dataset_key.parent_dataset_type_name]
+            by_dataset_type_name[dataset_key.parent_dataset_type_name].append(
+                DataCoordinate.from_required_values(dataset_type_node.dimensions, dataset_key.data_id_values)
+            )
+        return {
+            edge.connection_name: by_dataset_type_name[edge.parent_dataset_type_name]
+            for edge in self._task_node.iter_all_inputs()
+        }
+    def add_input(
+        self, quantum_data_id: DataCoordinate, connection_name: str, dataset_data_id: DataCoordinate
+    ) -> None:
+        """Add a new input to a quantum.
+        Parameters
+        ----------
+        quantum_data_id : `~lsst.daf.butler.DataCoordinate`
+            Data ID of the quantum to add an input to.
+        connection_name : `str`
+            Name of the connection (the task-internal name, not the butler
+            dataset type name).
+        dataset_data_id : `~lsst.daf.butler.DataCoordinate`
+            Data ID of the input dataset.  Must already exist in the graph
+            as an input to a different quantum of this task, and must be a
+            regular input, not a prerequisite input or init-input.
+        Notes
+        -----
+        If two connections have the same dataset type, the current
+        implementation assumes the set of datasets is the same for the two
+        connections.  This limitation may be removed in the future.
+        """
+        from .quantum_graph_skeleton import DatasetKey, QuantumKey
+        quantum_key = QuantumKey(self._task_node.label, quantum_data_id.required_values)
+        read_edge = self._task_node.inputs[connection_name]
+        dataset_key = DatasetKey(read_edge.parent_dataset_type_name, dataset_data_id.required_values)
+        if dataset_key not in self._skeleton:
+            raise LookupError(
+                f"Dataset {read_edge.parent_dataset_type_name}@{dataset_data_id} is not already in the graph."
+            )
+        self._skeleton.add_input_edge(quantum_key, dataset_key)
+    def expand_quantum_data_id(self, data_id: DataCoordinate) -> DataCoordinate:
+        """Expand a quantum data ID to include implied values and dimension
+        records.
+        Parameters
+        ----------
+        quantum_data_id : `~lsst.daf.butler.DataCoordinate`
+            A data ID of a quantum already in the graph.
+        Returns
+        -------
+        expanded_data_id : `~lsst.daf.butler.DataCoordinate`
+            The same data ID, with implied values included and dimension
+            records attached.
+        """
+        from .quantum_graph_skeleton import QuantumKey
+        return self._skeleton.get_data_id(QuantumKey(self._task_node.label, data_id.required_values))
+    @property
+    def n_removed(self) -> int:
+        """The number of quanta that have been removed by this helper."""
+        return self._n_removed

lsst/pipe/base/pipeline_graph/visualization/_mermaid.py CHANGED Viewed

@@ -32,6 +32,7 @@ import html
 import os
 import sys
 from collections.abc import Mapping
+from io import BufferedIOBase, BytesIO, StringIO, TextIOBase
 from typing import Any, TextIO
 from .._nodes import NodeType
@@ -40,6 +41,14 @@ from ._formatting import NodeKey, format_dimensions, format_task_class
 from ._options import NodeAttributeOptions
 from ._show import parse_display_args
+try:
+    from mermaid import Mermaid  # type: ignore
+    from mermaid.graph import Graph  # type: ignore
+    MERMAID_AVAILABLE = True
+except ImportError:
+    MERMAID_AVAILABLE = False
 # Configuration constants for label formatting and overflow handling.
 _LABEL_PX_SIZE = 18
 _LABEL_MAX_LINES_SOFT = 10
@@ -49,7 +58,11 @@ _OVERFLOW_MAX_LINES = 20
 def show_mermaid(
     pipeline_graph: PipelineGraph,
-    stream: TextIO = sys.stdout,
+    stream: TextIO | BytesIO = sys.stdout,
+    output_format: str = "mmd",
+    width: int | None = None,
+    height: int | None = None,
+    scale: float | None = None,
     **kwargs: Any,
 ) -> None:
     """Write a Mermaid flowchart representation of the pipeline graph to a
@@ -65,9 +78,20 @@ def show_mermaid(
     ----------
     pipeline_graph : `PipelineGraph`
         The pipeline graph to visualize.
-    stream : `TextIO`, optional
+    stream : `TextIO` or `BytesIO`, optional
         The output stream where Mermaid code is written. Defaults to
         `sys.stdout`.
+    output_format : str, optional
+        Defines the output format. 'mmd' (default) generates a Mermaid
+        definition text file, while 'svg' and 'png' produce rendered images as
+        binary streams.
+    width : int, optional
+        The width of the rendered image in pixels.
+    height : int, optional
+        The height of the rendered image in pixels.
+    scale : float, optional
+        The scale factor for the rendered image. Must be an float between 1
+        and 3, and one of height or width must be provided.
     **kwargs : Any
         Additional arguments passed to `parse_display_args` to control aspects
         such as displaying dimensions, storage classes, or full task class
@@ -85,27 +109,61 @@ def show_mermaid(
     - If a node's label is too long, overflow nodes are created to hold extra
       lines.
     """
+    # Generate Mermaid source code in-memory.
+    mermaid_source = _generate_mermaid_source(pipeline_graph, **kwargs)
+    if output_format == "mmd":
+        if isinstance(stream, TextIOBase):
+            # Write Mermaid source as a string.
+            stream.write(mermaid_source)
+        else:
+            raise TypeError(f"Expected a text stream, but got {type(stream)}.")
+    else:
+        if isinstance(stream, BufferedIOBase):
+            # Render Mermaid source as an image and write to binary stream.
+            _render_mermaid_image(
+                mermaid_source, stream, output_format, width=width, height=height, scale=scale
+            )
+        else:
+            raise ValueError(f"Expected a binary stream, but got {type(stream)}.")
+def _generate_mermaid_source(pipeline_graph: PipelineGraph, **kwargs: Any) -> str:
+    """Generate the Mermaid source code from the pipeline graph.
+    Parameters
+    ----------
+    pipeline_graph : `PipelineGraph`
+        The pipeline graph to visualize.
+    **kwargs : Any
+        Additional arguments passed to `parse_display_args` for rendering.
+    Returns
+    -------
+    str
+        The Mermaid source code as a string.
+    """
+    # A buffer to collect Mermaid source code.
+    buffer = StringIO()
     # Parse display arguments to determine what to show.
     xgraph, options = parse_display_args(pipeline_graph, **kwargs)
     # Begin the Mermaid code block.
-    print("flowchart TD", file=stream)
+    buffer.write("flowchart TD\n")
     # Define Mermaid classes for node styling.
-    print(
+    buffer.write(
         f"classDef task fill:#B1F2EF,color:#000,stroke:#000,stroke-width:3px,"
-        f"font-family:Monospace,font-size:{_LABEL_PX_SIZE}px,text-align:left;",
-        file=stream,
+        f"font-family:Monospace,font-size:{_LABEL_PX_SIZE}px,text-align:left;\n"
     )
-    print(
+    buffer.write(
         f"classDef dsType fill:#F5F5F5,color:#000,stroke:#00BABC,stroke-width:3px,"
-        f"font-family:Monospace,font-size:{_LABEL_PX_SIZE}px,text-align:left,rx:8,ry:8;",
-        file=stream,
+        f"font-family:Monospace,font-size:{_LABEL_PX_SIZE}px,text-align:left,rx:8,ry:8;\n"
     )
-    print(
+    buffer.write(
         f"classDef taskInit fill:#F4DEFA,color:#000,stroke:#000,stroke-width:3px,"
-        f"font-family:Monospace,font-size:{_LABEL_PX_SIZE}px,text-align:left;",
-        file=stream,
+        f"font-family:Monospace,font-size:{_LABEL_PX_SIZE}px,text-align:left;\n"
     )
     # `overflow_ref` tracks the reference numbers for overflow nodes.
@@ -116,30 +174,27 @@ def show_mermaid(
     for node_key, node_data in xgraph.nodes.items():
         match node_key.node_type:
             case NodeType.TASK | NodeType.TASK_INIT:
-                # Render a task or task-init node.
-                _render_task_node(node_key, node_data, options, stream)
+                _render_task_node(node_key, node_data, options, buffer)
             case NodeType.DATASET_TYPE:
-                # Render a dataset-type node with possible overflow handling.
                 overflow_ref, node_overflow_ids = _render_dataset_type_node(
-                    node_key, node_data, options, stream, overflow_ref
+                    node_key, node_data, options, buffer, overflow_ref
                 )
-                if node_overflow_ids:
-                    overflow_ids += node_overflow_ids
+                overflow_ids += node_overflow_ids if node_overflow_ids else []
             case _:
                 raise AssertionError(f"Unexpected node type: {node_key.node_type}")
-    # Collect edges for printing and track which ones are prerequisite
-    # so we can apply dashed styling after printing them.
+    # Collect edges for adding to the Mermaid code and track which ones are
+    # prerequisite so we can apply dashed styling to them later.
     edges = []
     for _, (from_node, to_node, *_rest) in enumerate(xgraph.edges):
         is_prereq = xgraph.nodes[from_node].get("is_prerequisite", False)
         edges.append((from_node.node_id, to_node.node_id, is_prereq))
-    # Print all edges
+    # Render all edges.
     for _, (f, t, p) in enumerate(edges):
-        _render_edge(f, t, p, stream)
+        _render_edge(f, t, p, buffer)
-    # After printing all edges, apply linkStyle to prerequisite edges to make
+    # After rendering all edges, apply linkStyle to prerequisite edges to make
     # them dashed:
     # First, gather indices of prerequisite edges.
@@ -147,7 +202,85 @@ def show_mermaid(
     # Then apply dashed styling to all prerequisite edges in one line.
     if prereq_indices:
-        print(f"linkStyle {','.join(prereq_indices)} stroke-dasharray:5;", file=stream)
+        buffer.write(f"linkStyle {','.join(prereq_indices)} stroke-dasharray:5;\n")
+    # Return Mermaid source as string.
+    return buffer.getvalue()
+def _render_mermaid_image(
+    mermaid_source: str,
+    binary_stream: BytesIO,
+    output_format: str,
+    width: int | None = None,
+    height: int | None = None,
+    scale: float | None = None,
+) -> None:
+    """Render a Mermaid diagram as an image and write the output to a binary
+    stream.
+    Parameters
+    ----------
+    mermaid_source : str
+        The Mermaid diagram source code.
+    binary_stream : `BytesIO`
+        The binary stream where the output content will be written.
+    output_format : str
+        The desired output format for the image. Supported image formats are
+        'svg' and 'png'.
+    width : int, optional
+        The width of the rendered image in pixels.
+    height : int, optional
+        The height of the rendered image in pixels.
+    scale : float, optional
+        The scale factor for the rendered image. Must be a float between 1 and
+        3, and one of height or width must be provided.
+    Raises
+    ------
+    ImportError
+        If `mermaid-py` is not installed.
+    ValueError
+        If the requested ``output_format`` is not supported.
+    RuntimeError
+        If the rendering process fails.
+    """
+    if output_format.lower() not in {"svg", "png"}:
+        raise ValueError(f"Unsupported format: {output_format}. Use 'svg' or 'png'.")
+    if not MERMAID_AVAILABLE:
+        raise ImportError("The `mermaid-py` package is required for rendering images but is not installed.")
+    # Generate Mermaid graph object.
+    graph = Graph(title="Mermaid Diagram", script=mermaid_source)
+    diagram = Mermaid(graph, width=width, height=height, scale=scale)
+    # Determine the response type based on the output format.
+    if output_format.lower() == "svg":
+        response_type = "svg_response"
+    else:
+        response_type = "img_response"
+    # Select the appropriate output format and write the content to the stream.
+    try:
+        content = getattr(diagram, response_type).content
+        # Check if the response is actually an image.
+        if content.startswith(b"<!DOCTYPE html>") or b"<title>" in content[:200]:
+            error_msg = content.decode(errors="ignore")[:1000]
+            if "524" in error_msg or "timeout" in error_msg.lower():
+                raise RuntimeError(
+                    f"Mermaid rendering service (mermaid.ink) timed out while generating {response_type}. "
+                    "This may be due to server overload. Try again later or use a local rendering option."
+                )
+            raise RuntimeError(
+                f"Unexpected error from Mermaid API while generating {response_type}. Response:\n{error_msg}"
+            )
+        # Write the content to the binary stream if it's a valid image.
+        binary_stream.write(content)
+    except AttributeError as exc:
+        raise RuntimeError(f"Failed to generate {response_type} content") from exc
 def _render_task_node(

lsst/pipe/base/prerequisite_helpers.py CHANGED Viewed

@@ -340,7 +340,7 @@ class PrerequisiteFinder:
                 where_terms: list[str] = []
                 bind: dict[str, list[int]] = {}
                 for name in self.dataset_skypix:
-                    where_terms.append(f"{name} IN ({name}_pixels)")
+                    where_terms.append(f"{name} IN (:{name}_pixels)")
                     pixels: list[int] = []
                     for begin, end in skypix_bounds[name]:
                         pixels.extend(range(begin, end))

lsst-pipe-base 29.2025.1000__py3-none-any.whl → 29.2025.1200__py3-none-any.whl

lsst-pipe-base 29.2025.1000py3-none-any.whl → 29.2025.1200py3-none-any.whl