PyPI - lsst-pipe-base - Versions diffs - 30.0.0rc2__py3-none-any.whl → 30.0.1__py3-none-any.whl - Mend

lsst-pipe-base 30.0.0rc2py3-none-any.whl → 30.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

lsst/pipe/base/_instrument.py +31 -20
lsst/pipe/base/_quantumContext.py +3 -3
lsst/pipe/base/_status.py +43 -10
lsst/pipe/base/_task_metadata.py +2 -2
lsst/pipe/base/all_dimensions_quantum_graph_builder.py +8 -3
lsst/pipe/base/automatic_connection_constants.py +20 -1
lsst/pipe/base/cli/cmd/__init__.py +18 -2
lsst/pipe/base/cli/cmd/commands.py +149 -4
lsst/pipe/base/connectionTypes.py +72 -160
lsst/pipe/base/connections.py +6 -9
lsst/pipe/base/execution_reports.py +0 -5
lsst/pipe/base/graph/graph.py +11 -10
lsst/pipe/base/graph/quantumNode.py +4 -4
lsst/pipe/base/graph_walker.py +8 -10
lsst/pipe/base/log_capture.py +40 -80
lsst/pipe/base/log_on_close.py +76 -0
lsst/pipe/base/mp_graph_executor.py +51 -15
lsst/pipe/base/pipeline.py +5 -6
lsst/pipe/base/pipelineIR.py +2 -8
lsst/pipe/base/pipelineTask.py +5 -7
lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
lsst/pipe/base/pipeline_graph/_edges.py +32 -22
lsst/pipe/base/pipeline_graph/_mapping_views.py +4 -7
lsst/pipe/base/pipeline_graph/_pipeline_graph.py +14 -7
lsst/pipe/base/pipeline_graph/expressions.py +2 -2
lsst/pipe/base/pipeline_graph/io.py +7 -10
lsst/pipe/base/pipeline_graph/visualization/_dot.py +13 -12
lsst/pipe/base/pipeline_graph/visualization/_layout.py +16 -18
lsst/pipe/base/pipeline_graph/visualization/_merge.py +4 -7
lsst/pipe/base/pipeline_graph/visualization/_printer.py +10 -10
lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +7 -0
lsst/pipe/base/prerequisite_helpers.py +2 -1
lsst/pipe/base/quantum_graph/_common.py +19 -20
lsst/pipe/base/quantum_graph/_multiblock.py +37 -31
lsst/pipe/base/quantum_graph/_predicted.py +113 -15
lsst/pipe/base/quantum_graph/_provenance.py +1136 -45
lsst/pipe/base/quantum_graph/aggregator/__init__.py +0 -1
lsst/pipe/base/quantum_graph/aggregator/_communicators.py +204 -289
lsst/pipe/base/quantum_graph/aggregator/_config.py +87 -9
lsst/pipe/base/quantum_graph/aggregator/_ingester.py +13 -12
lsst/pipe/base/quantum_graph/aggregator/_scanner.py +49 -235
lsst/pipe/base/quantum_graph/aggregator/_structs.py +6 -116
lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +29 -39
lsst/pipe/base/quantum_graph/aggregator/_workers.py +303 -0
lsst/pipe/base/quantum_graph/aggregator/_writer.py +34 -351
lsst/pipe/base/quantum_graph/formatter.py +171 -0
lsst/pipe/base/quantum_graph/ingest_graph.py +413 -0
lsst/pipe/base/quantum_graph/visualization.py +5 -1
lsst/pipe/base/quantum_graph_builder.py +33 -9
lsst/pipe/base/quantum_graph_executor.py +116 -13
lsst/pipe/base/quantum_graph_skeleton.py +31 -35
lsst/pipe/base/quantum_provenance_graph.py +29 -12
lsst/pipe/base/separable_pipeline_executor.py +19 -3
lsst/pipe/base/single_quantum_executor.py +67 -42
lsst/pipe/base/struct.py +4 -0
lsst/pipe/base/testUtils.py +3 -3
lsst/pipe/base/tests/mocks/_storage_class.py +2 -1
lsst/pipe/base/version.py +1 -1
{lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/METADATA +3 -3
lsst_pipe_base-30.0.1.dist-info/RECORD +129 -0
{lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/WHEEL +1 -1
lsst_pipe_base-30.0.0rc2.dist-info/RECORD +0 -125
{lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/entry_points.txt +0 -0
{lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/COPYRIGHT +0 -0
{lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/LICENSE +0 -0
{lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/bsd_license.txt +0 -0
{lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/gpl-v3.0.txt +0 -0
{lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/top_level.txt +0 -0
{lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/zip-safe +0 -0

lsst/pipe/base/log_capture.py CHANGED Viewed

@@ -31,17 +31,15 @@ __all__ = ["LogCapture"]
 import dataclasses
 import logging
-import os
-import shutil
-import tempfile
 import uuid
 from collections.abc import Iterator
-from contextlib import contextmanager, suppress
+from contextlib import contextmanager
 from logging import FileHandler
 import pydantic
-from lsst.daf.butler import Butler, FileDataset, LimitedButler, Quantum
+from lsst.daf.butler import Butler, LimitedButler, Quantum
+from lsst.daf.butler._rubin.temporary_for_ingest import TemporaryForIngest
 from lsst.daf.butler.logging import (
     ButlerLogRecord,
     ButlerLogRecordHandler,
@@ -105,7 +103,7 @@ class _ExecutionLogRecordsExtra(pydantic.BaseModel):
         Parameters
         ----------
-        log_records : `ButlerLogRecords`
+        log_records : `lsst.daf.butler.ButlerLogRecords`
             Logs from a past attempt to run a quantum.
         """
         previous = self.model_validate(log_records.extra)
@@ -165,7 +163,9 @@ class LogCapture:
         return cls(butler, butler)
     @contextmanager
-    def capture_logging(self, task_node: TaskNode, /, quantum: Quantum) -> Iterator[_LogCaptureContext]:
+    def capture_logging(
+        self, task_node: TaskNode, /, quantum: Quantum, records: ButlerLogRecords | None = None
+    ) -> Iterator[_LogCaptureContext]:
         """Configure logging system to capture logs for execution of this task.
         Parameters
@@ -174,6 +174,9 @@ class LogCapture:
             The task definition.
         quantum : `~lsst.daf.butler.Quantum`
             Single Quantum instance.
+        records : `lsst.daf.butler.logging.ButlerLogRecords`, optional
+            Log record container to append to and save.  If provided, streaming
+            mode is disabled (since we'll be saving logs in memory anyway).
         Notes
         -----
@@ -205,44 +208,40 @@ class LogCapture:
         # Add a handler to the root logger to capture execution log output.
         if log_dataset_name is not None:
+            try:
+                [ref] = quantum.outputs[log_dataset_name]
+            except LookupError as exc:
+                raise InvalidQuantumError(
+                    f"Quantum outputs is missing log output dataset type {log_dataset_name};"
+                    " this could happen due to inconsistent options between QuantumGraph generation"
+                    " and execution"
+                ) from exc
             # Either accumulate into ButlerLogRecords or stream JSON records to
             # file and ingest that (ingest is possible only with full butler).
-            if self.stream_json_logs and self.full_butler is not None:
-                # Create the log file in a temporary directory rather than
-                # creating a temporary file. This is necessary because
-                # temporary files are created with restrictive permissions
-                # and during file ingest these permissions persist in the
-                # datastore. Using a temp directory allows us to create
-                # a file with umask default permissions.
-                tmpdir = tempfile.mkdtemp(prefix="butler-temp-logs-")
-                # Construct a file to receive the log records and "touch" it.
-                log_file = os.path.join(tmpdir, f"butler-log-{task_node.label}.json")
-                with open(log_file, "w"):
-                    pass
-                log_handler_file = FileHandler(log_file)
-                log_handler_file.setFormatter(JsonLogFormatter())
-                logging.getLogger().addHandler(log_handler_file)
-                try:
-                    with ButlerMDC.set_mdc(mdc):
-                        yield ctx
-                finally:
-                    # Ensure that the logs are stored in butler.
-                    logging.getLogger().removeHandler(log_handler_file)
-                    log_handler_file.close()
-                    if ctx.extra:
-                        with open(log_file, "a") as log_stream:
-                            ButlerLogRecords.write_streaming_extra(
-                                log_stream,
-                                ctx.extra.model_dump_json(exclude_unset=True, exclude_defaults=True),
-                            )
-                    if ctx.store:
-                        self._ingest_log_records(quantum, log_dataset_name, log_file)
-                    shutil.rmtree(tmpdir, ignore_errors=True)
+            if self.stream_json_logs and self.full_butler is not None and records is None:
+                with TemporaryForIngest(self.full_butler, ref) as temporary:
+                    log_handler_file = FileHandler(temporary.ospath)
+                    log_handler_file.setFormatter(JsonLogFormatter())
+                    logging.getLogger().addHandler(log_handler_file)
+                    try:
+                        with ButlerMDC.set_mdc(mdc):
+                            yield ctx
+                    finally:
+                        # Ensure that the logs are stored in butler.
+                        logging.getLogger().removeHandler(log_handler_file)
+                        log_handler_file.close()
+                        if ctx.extra:
+                            with open(temporary.ospath, "a") as log_stream:
+                                ButlerLogRecords.write_streaming_extra(
+                                    log_stream,
+                                    ctx.extra.model_dump_json(exclude_unset=True, exclude_defaults=True),
+                                )
+                        if ctx.store:
+                            temporary.ingest()
             else:
-                log_handler_memory = ButlerLogRecordHandler()
+                log_handler_memory = ButlerLogRecordHandler(records)
                 logging.getLogger().addHandler(log_handler_memory)
                 try:
@@ -261,7 +260,6 @@ class LogCapture:
                     logging.getLogger().removeHandler(log_handler_memory)
                     if ctx.store:
                         self._store_log_records(quantum, log_dataset_name, log_handler_memory)
-                    log_handler_memory.records.clear()
         else:
             with ButlerMDC.set_mdc(mdc):
@@ -281,41 +279,3 @@ class LogCapture:
             ) from exc
         self.butler.put(log_handler.records, ref)
-    def _ingest_log_records(self, quantum: Quantum, dataset_type: str, filename: str) -> None:
-        # If we are logging to an external file we must always try to
-        # close it.
-        assert self.full_butler is not None, "Expected to have full butler for ingest"
-        ingested = False
-        try:
-            # DatasetRef has to be in the Quantum outputs, can lookup by name.
-            try:
-                [ref] = quantum.outputs[dataset_type]
-            except LookupError as exc:
-                raise InvalidQuantumError(
-                    f"Quantum outputs is missing log output dataset type {dataset_type};"
-                    " this could happen due to inconsistent options between QuantumGraph generation"
-                    " and execution"
-                ) from exc
-            # Need to ingest this file directly into butler.
-            dataset = FileDataset(path=filename, refs=ref)
-            try:
-                self.full_butler.ingest(dataset, transfer="move")
-                ingested = True
-            except NotImplementedError:
-                # Some datastores can't receive files (e.g. in-memory datastore
-                # when testing), we store empty list for those just to have a
-                # dataset. Alternative is to read the file as a
-                # ButlerLogRecords object and put it.
-                _LOG.info(
-                    "Log records could not be stored in this butler because the"
-                    " datastore can not ingest files, empty record list is stored instead."
-                )
-                records = ButlerLogRecords.from_records([])
-                self.full_butler.put(records, ref)
-        finally:
-            # remove file if it is not ingested
-            if not ingested:
-                with suppress(OSError):
-                    os.remove(filename)

lsst/pipe/base/log_on_close.py ADDED Viewed

@@ -0,0 +1,76 @@
+# This file is part of pipe_base.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively.  If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+from __future__ import annotations
+__all__ = ("LogOnClose",)
+from collections.abc import Callable, Iterator
+from contextlib import AbstractContextManager, contextmanager
+from lsst.utils.logging import VERBOSE
+class LogOnClose:
+    """A factory for context manager wrappers that emit a log message when
+    they are closed.
+    Parameters
+    ----------
+    log_func : `~collections.abc.Callable` [ `int`, `str` ]
+        Callable that takes an integer log level and a string message and emits
+        a log message.  Note that placeholder formatting is not supported.
+    """
+    def __init__(self, log_func: Callable[[int, str], None]):
+        self.log_func = log_func
+    def wrap[T](
+        self,
+        cm: AbstractContextManager[T],
+        msg: str,
+        level: int = VERBOSE,
+    ) -> AbstractContextManager[T]:
+        """Wrap a context manager to log when it is exited.
+        Parameters
+        ----------
+        cm : `contextlib.AbstractContextManager`
+            Context manager to wrap.
+        msg : `str`
+            Log message.
+        level : `int`, optional
+            Log level.
+        """
+        @contextmanager
+        def wrapper() -> Iterator[T]:
+            with cm as result:
+                yield result
+                self.log_func(level, msg)
+        return wrapper()

lsst/pipe/base/mp_graph_executor.py CHANGED Viewed

@@ -39,20 +39,24 @@ import sys
 import threading
 import time
 import uuid
+from contextlib import ExitStack
 from typing import Literal, cast
 import networkx
 from lsst.daf.butler import DataCoordinate, Quantum
 from lsst.daf.butler.cli.cliLog import CliLog
+from lsst.daf.butler.logging import ButlerLogRecords
 from lsst.utils.threads import disable_implicit_threading
 from ._status import InvalidQuantumError, RepeatableQuantumError
+from ._task_metadata import TaskMetadata
 from .execution_graph_fixup import ExecutionGraphFixup
 from .graph import QuantumGraph
 from .graph_walker import GraphWalker
+from .log_on_close import LogOnClose
 from .pipeline_graph import TaskNode
-from .quantum_graph import PredictedQuantumGraph, PredictedQuantumInfo
+from .quantum_graph import PredictedQuantumGraph, PredictedQuantumInfo, ProvenanceQuantumGraphWriter
 from .quantum_graph_executor import QuantumExecutor, QuantumGraphExecutor
 from .quantum_reports import ExecutionStatus, QuantumReport, Report
@@ -515,7 +519,9 @@ class MPGraphExecutor(QuantumGraphExecutor):
             start_method = "spawn"
         self._start_method = start_method
-    def execute(self, graph: QuantumGraph | PredictedQuantumGraph) -> None:
+    def execute(
+        self, graph: QuantumGraph | PredictedQuantumGraph, *, provenance_graph_file: str | None = None
+    ) -> None:
         # Docstring inherited from QuantumGraphExecutor.execute
         old_graph: QuantumGraph | None = None
         if isinstance(graph, QuantumGraph):
@@ -525,14 +531,31 @@ class MPGraphExecutor(QuantumGraphExecutor):
             new_graph = graph
         xgraph = self._make_xgraph(new_graph, old_graph)
         self._report = Report(qgraphSummary=new_graph._make_summary())
-        try:
-            if self._num_proc > 1:
-                self._execute_quanta_mp(xgraph, self._report)
-            else:
-                self._execute_quanta_in_process(xgraph, self._report)
-        except Exception as exc:
-            self._report.set_exception(exc)
-            raise
+        with ExitStack() as exit_stack:
+            provenance_writer: ProvenanceQuantumGraphWriter | None = None
+            if provenance_graph_file is not None:
+                if provenance_graph_file is not None and self._num_proc > 1:
+                    raise NotImplementedError(
+                        "Provenance writing is not implemented for multiprocess execution."
+                    )
+                provenance_writer = ProvenanceQuantumGraphWriter(
+                    provenance_graph_file,
+                    exit_stack=exit_stack,
+                    log_on_close=LogOnClose(_LOG.log),
+                    predicted=new_graph,
+                )
+            try:
+                if self._num_proc > 1:
+                    self._execute_quanta_mp(xgraph, self._report)
+                else:
+                    self._execute_quanta_in_process(xgraph, self._report, provenance_writer)
+            except Exception as exc:
+                self._report.set_exception(exc)
+                raise
+            if provenance_writer is not None:
+                provenance_writer.write_overall_inputs()
+                provenance_writer.write_packages()
+                provenance_writer.write_init_outputs(assume_existence=True)
     def _make_xgraph(
         self, new_graph: PredictedQuantumGraph, old_graph: QuantumGraph | None
@@ -576,7 +599,9 @@ class MPGraphExecutor(QuantumGraphExecutor):
                 raise MPGraphExecutorError("Updated execution graph has dependency cycle.")
         return xgraph
-    def _execute_quanta_in_process(self, xgraph: networkx.DiGraph, report: Report) -> None:
+    def _execute_quanta_in_process(
+        self, xgraph: networkx.DiGraph, report: Report, provenance_writer: ProvenanceQuantumGraphWriter | None
+    ) -> None:
         """Execute all Quanta in current process.
         Parameters
@@ -589,6 +614,9 @@ class MPGraphExecutor(QuantumGraphExecutor):
             `.quantum_graph.PredictedQuantumGraph.quantum_only_xgraph`.
         report : `Report`
             Object for reporting execution status.
+        provenance_writer : `.quantum_graph.ProvenanceQuantumGraphWriter` or \
+                `None`
+            Object for recording provenance.
         """
         def tiebreaker_sort_key(quantum_id: uuid.UUID) -> tuple:
@@ -606,16 +634,19 @@ class MPGraphExecutor(QuantumGraphExecutor):
                 _LOG.debug("Executing %s (%s@%s)", quantum_id, task_node.label, data_id)
                 fail_exit_code: int | None = None
+                task_metadata: TaskMetadata | None = None
+                task_logs = ButlerLogRecords([])
                 try:
                     # For some exception types we want to exit immediately with
                     # exception-specific exit code, but we still want to start
                     # debugger before exiting if debugging is enabled.
                     try:
-                        _, quantum_report = self._quantum_executor.execute(
-                            task_node, quantum, quantum_id=quantum_id
+                        execution_result = self._quantum_executor.execute(
+                            task_node, quantum, quantum_id=quantum_id, log_records=task_logs
                         )
-                        if quantum_report:
-                            report.quantaReports.append(quantum_report)
+                        if execution_result.report:
+                            report.quantaReports.append(execution_result.report)
+                        task_metadata = execution_result.task_metadata
                         success_count += 1
                         walker.finish(quantum_id)
                     except RepeatableQuantumError as exc:
@@ -701,6 +732,11 @@ class MPGraphExecutor(QuantumGraphExecutor):
                         )
                         failed_count += 1
+                if provenance_writer is not None:
+                    provenance_writer.write_quantum_provenance(
+                        quantum_id, metadata=task_metadata, logs=task_logs
+                    )
                 _LOG.info(
                     "Executed %d quanta successfully, %d failed and %d remain out of total %d quanta.",
                     success_count,

lsst/pipe/base/pipeline.py CHANGED Viewed

@@ -54,13 +54,12 @@ from lsst.utils.introspection import get_full_type_name
 from . import automatic_connection_constants as acc
 from . import pipeline_graph, pipelineIR
-from ._instrument import Instrument as PipeBaseInstrument
+from ._instrument import Instrument as Instrument
 from .config import PipelineTaskConfig
 from .connections import PipelineTaskConnections
 from .pipelineTask import PipelineTask
 if TYPE_CHECKING:  # Imports needed only for type annotations; may be circular.
-    from lsst.obs.base import Instrument
     from lsst.pex.config import Config
 # ----------------------------------
@@ -496,7 +495,7 @@ class Pipeline:
         Returns
         -------
         pipeline: `Pipeline`
-           The new pipeline.
+            The new pipeline.
         """
         return cls.fromIR(copy.deepcopy(pipeline._pipelineIR))
@@ -606,7 +605,7 @@ class Pipeline:
     @property
     def subsets(self) -> MappingProxyType[str, set]:
-        """Returns a `MappingProxyType` where the keys are the labels of
+        """Returns a `types.MappingProxyType` where the keys are the labels of
         labeled subsets in the `Pipeline` and the values are the set of task
         labels contained within that subset.
         """
@@ -702,7 +701,7 @@ class Pipeline:
         """
         instrument_class_name = self._pipelineIR.instrument
         if instrument_class_name is not None:
-            instrument_class = cast(PipeBaseInstrument, doImportType(instrument_class_name))
+            instrument_class = cast(Instrument, doImportType(instrument_class_name))
             if instrument_class is not None:
                 return DataCoordinate.standardize(instrument=instrument_class.getName(), universe=universe)
         return DataCoordinate.make_empty(universe)
@@ -893,7 +892,7 @@ class Pipeline:
             raise NameError(f"Label {label} does not appear in this pipeline")
         taskClass: type[PipelineTask] = doImportType(taskIR.klass)
         config = taskClass.ConfigClass()
-        instrument: PipeBaseInstrument | None = None
+        instrument: Instrument | None = None
         if (instrumentName := self._pipelineIR.instrument) is not None:
             instrument_cls: type = doImportType(instrumentName)
             instrument = instrument_cls()

lsst/pipe/base/pipelineIR.py CHANGED Viewed

@@ -220,12 +220,6 @@ class LabeledSubset:
 class ParametersIR:
     """Intermediate representation of parameters that are global to a pipeline.
-    Attributes
-    ----------
-    mapping : `dict` [`str`, `str`]
-        A mutable mapping of identifiers as keys, and shared configuration
-        as values.
     Notes
     -----
     These parameters are specified under a top level key named ``parameters``
@@ -343,7 +337,7 @@ class ConfigIR:
                 )
         return new_config
-    def maybe_merge(self, other_config: ConfigIR) -> Generator[ConfigIR, None, None]:
+    def maybe_merge(self, other_config: ConfigIR) -> Generator[ConfigIR]:
         """Merge another instance of a `ConfigIR` into this instance if
         possible. This function returns a generator that is either self
         if the configs were merged, or self, and other_config if that could
@@ -706,7 +700,7 @@ class PipelineIR:
         Parameters
         ----------
-        loaded_yaml: `dict`
+        loaded_yaml : `dict`
             A dictionary which matches the structure that would be produced
             by a yaml reader which parses a pipeline definition document
         """

lsst/pipe/base/pipelineTask.py CHANGED Viewed

@@ -55,7 +55,7 @@ class PipelineTask(Task):
     resulting data is also stored in a data butler.
     PipelineTask inherits from a `~lsst.pipe.base.Task` and uses the same
-    configuration mechanism based on :ref:`lsst.pex.config`. `PipelineTask`
+    configuration mechanism based on `lsst.pex.config`. `PipelineTask`
     classes also have a `PipelineTaskConnections` class associated with their
     config which defines all of the IO a `PipelineTask` will need to do.
     PipelineTask sub-class typically implements `run()` method which receives
@@ -75,12 +75,6 @@ class PipelineTask(Task):
     PipelineTask base class constructor, but may support other signatures as
     well.
-    Attributes
-    ----------
-    canMultiprocess : bool, True by default (class attribute)
-        This class attribute is checked by execution framework, sub-classes
-        can set it to ``False`` in case task does not support multiprocessing.
     Parameters
     ----------
     config : `~lsst.pex.config.Config`, optional
@@ -102,7 +96,11 @@ class PipelineTask(Task):
     """
     ConfigClass: ClassVar[type[PipelineTaskConfig]]
     canMultiprocess: ClassVar[bool] = True
+    """Whether this task can be run by an executor that uses subprocesses for
+    parallelism.
+    """
     def __init__(
         self,

lsst/pipe/base/pipeline_graph/_dataset_types.py CHANGED Viewed

@@ -106,8 +106,8 @@ class DatasetTypeNode:
             The internal networkx graph.
         get_registered : `~collections.abc.Callable` or `None`
             Callable that takes a dataset type name and returns the
-            `DatasetType` registered in the data repository, or `None` if it is
-            not registered.
+            `~lsst.daf.butler.DatasetType` registered in the data repository,
+            or `None` if it is not registered.
         dimensions : `lsst.daf.butler.DimensionUniverse`
             Definitions of all dimensions.
         previous : `DatasetTypeNode` or `None`

lsst/pipe/base/pipeline_graph/_edges.py CHANGED Viewed

@@ -30,7 +30,7 @@ __all__ = ("Edge", "ReadEdge", "WriteEdge")
 from abc import ABC, abstractmethod
 from collections.abc import Callable, Mapping, Sequence
-from typing import Any, ClassVar, Self, TypeVar
+from typing import Any, ClassVar, Self
 from lsst.daf.butler import DatasetRef, DatasetType, DimensionUniverse, StorageClassFactory
 from lsst.daf.butler.registry import MissingDatasetTypeError
@@ -40,8 +40,6 @@ from ..connectionTypes import BaseConnection
 from ._exceptions import ConnectionTypeConsistencyError, IncompatibleDatasetTypeError
 from ._nodes import NodeKey, NodeType
-_S = TypeVar("_S", bound="Edge")
 @immutable
 class Edge(ABC):
@@ -172,7 +170,7 @@ class Edge(ABC):
         """
         return self.parent_dataset_type_name
-    def diff(self: _S, other: _S, connection_type: str = "connection") -> list[str]:
+    def diff[S: Edge](self: S, other: S, connection_type: str = "connection") -> list[str]:
         """Compare this edge to another one from a possibly-different
         configuration of the same task label.
@@ -480,11 +478,11 @@ class ReadEdge(Edge):
         Parameters
         ----------
         current : `lsst.daf.butler.DatasetType` or `None`
-            The current graph-wide `DatasetType`, or `None`.  This will always
-            be the registry's definition of the parent dataset type, if one
-            exists.  If not, it will be the dataset type definition from the
-            task in the graph that writes it, if there is one.  If there is no
-            such task, this will be `None`.
+            The current graph-wide `~lsst.daf.butler.DatasetType`, or `None`.
+            This will always be the registry's definition of the parent dataset
+            type, if one exists.  If not, it will be the dataset type
+            definition from the task in the graph that writes it, if there is
+            one.  If there is no such task, this will be `None`.
         is_initial_query_constraint : `bool`
             Whether this dataset type is currently marked as a constraint on
             the initial data ID query in QuantumGraph generation.
@@ -496,7 +494,7 @@ class ReadEdge(Edge):
         producer : `str` or `None`
             The label of the task that produces this dataset type in the
             pipeline, or `None` if it is an overall input.
-        consumers : `Sequence` [ `str` ]
+        consumers : `~collections.abc.Sequence` [ `str` ]
             Labels for other consuming tasks that have already participated in
             this dataset type's resolution.
         is_registered : `bool`
@@ -512,7 +510,7 @@ class ReadEdge(Edge):
         Returns
         -------
-        dataset_type : `DatasetType`
+        dataset_type : `~lsst.daf.butler.DatasetType`
             The updated graph-wide dataset type.  If ``current`` was provided,
             this must be equal to it.
         is_initial_query_constraint : `bool`
@@ -659,13 +657,25 @@ class ReadEdge(Edge):
                     # compatible), since neither connection should take
                     # precedence.
                     if dataset_type != current:
-                        raise MissingDatasetTypeError(
-                            f"Definitions differ for input dataset type {self.parent_dataset_type_name!r}; "
-                            f"task {self.task_label!r} has {dataset_type}, but the definition "
-                            f"from {report_current_origin()} is {current}.  If the storage classes are "
-                            "compatible but different, registering the dataset type in the data repository "
-                            "in advance will avoid this error."
-                        )
+                        if visualization_only and dataset_type.dimensions == current.dimensions:
+                            # Make a visualization-only ambiguous storage class
+                            # "name".
+                            all_storage_classes = set(current.storageClass_name.split("/"))
+                            all_storage_classes.update(dataset_type.storageClass_name.split("/"))
+                            current = DatasetType(
+                                current.name,
+                                current.dimensions,
+                                "/".join(sorted(all_storage_classes)),
+                            )
+                        else:
+                            raise MissingDatasetTypeError(
+                                f"Definitions differ for input dataset type "
+                                f"{self.parent_dataset_type_name!r}; task {self.task_label!r} has "
+                                f"{dataset_type}, but the definition from {report_current_origin()} is "
+                                f"{current}.  If the storage classes are compatible but different, "
+                                "registering the dataset type in the data repository in advance will avoid "
+                                "this error."
+                            )
                 elif not visualization_only and not dataset_type.is_compatible_with(current):
                     raise IncompatibleDatasetTypeError(
                         f"Incompatible definition for input dataset type {self.parent_dataset_type_name!r}; "
@@ -788,15 +798,15 @@ class WriteEdge(Edge):
         Parameters
         ----------
         current : `lsst.daf.butler.DatasetType` or `None`
-            The current graph-wide `DatasetType`, or `None`.  This will always
-            be the registry's definition of the parent dataset type, if one
-            exists.
+            The current graph-wide `~lsst.daf.butler.DatasetType`, or `None`.
+            This will always be the registry's definition of the parent dataset
+            type, if one exists.
         universe : `lsst.daf.butler.DimensionUniverse`
             Object that holds all dimension definitions.
         Returns
         -------
-        dataset_type : `DatasetType`
+        dataset_type : `~lsst.daf.butler.DatasetType`
             A dataset type compatible with this edge.  If ``current`` was
             provided, this must be equal to it.

lsst/pipe/base/pipeline_graph/_mapping_views.py CHANGED Viewed

@@ -27,7 +27,7 @@
 from __future__ import annotations
 from collections.abc import Iterable, Iterator, Mapping, Sequence
-from typing import Any, ClassVar, TypeVar, cast, overload
+from typing import Any, ClassVar, cast, overload
 import networkx
@@ -36,11 +36,8 @@ from ._exceptions import UnresolvedGraphError
 from ._nodes import NodeKey, NodeType
 from ._tasks import TaskInitNode, TaskNode
-_N = TypeVar("_N", covariant=True)
-_T = TypeVar("_T")
-class MappingView(Mapping[str, _N]):
+class MappingView[N](Mapping[str, N]):
     """Base class for mapping views into nodes of certain types in a
     `PipelineGraph`.
@@ -74,7 +71,7 @@ class MappingView(Mapping[str, _N]):
             self._keys = self._make_keys(self._parent_xgraph)
         return iter(self._keys)
-    def __getitem__(self, key: str) -> _N:
+    def __getitem__(self, key: str) -> N:
         return self._parent_xgraph.nodes[NodeKey(self._NODE_TYPE, key)]["instance"]
     def __len__(self) -> int:
@@ -230,7 +227,7 @@ class DatasetTypeMappingView(MappingView[DatasetTypeNode]):
     def get_if_resolved(self, key: str) -> DatasetTypeNode | None: ...  # pragma: nocover
     @overload
-    def get_if_resolved(self, key: str, default: _T) -> DatasetTypeNode | _T: ...  # pragma: nocover
+    def get_if_resolved[T](self, key: str, default: T) -> DatasetTypeNode | T: ...  # pragma: nocover
     def get_if_resolved(self, key: str, default: Any = None) -> DatasetTypeNode | Any:
         """Get a node or return a default if it has not been resolved.

lsst-pipe-base 30.0.0rc2__py3-none-any.whl → 30.0.1__py3-none-any.whl

lsst-pipe-base 30.0.0rc2py3-none-any.whl → 30.0.1py3-none-any.whl