PyPI - lsst-ctrl-mpexec - Versions diffs - 29.2025.3400__py3-none-any.whl → 29.2025.3600__py3-none-any.whl - Mend

lsst-ctrl-mpexec 29.2025.3400py3-none-any.whl → 29.2025.3600py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

lsst/ctrl/mpexec/cli/script/run.py CHANGED Viewed

@@ -25,53 +25,67 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
-import logging
-from types import SimpleNamespace
+from __future__ import annotations
-from lsst.pipe.base import TaskFactory
+from collections.abc import Iterable
+from typing import TYPE_CHECKING, Literal
+import astropy.units as u
+import lsst.utils.timer
+from lsst.pipe.base import ExecutionResources, QuantumGraph, TaskFactory
+from lsst.pipe.base.mp_graph_executor import MPGraphExecutor
+from lsst.pipe.base.single_quantum_executor import SingleQuantumExecutor
+from lsst.resources import ResourcePath, ResourcePathExpression
+from lsst.utils.doImport import doImportType
+from lsst.utils.iteration import ensure_iterable
+from lsst.utils.logging import getLogger
 from lsst.utils.threads import disable_implicit_threading
-from ... import CmdLineFwk
-_log = logging.getLogger(__name__)
-def run(  # type: ignore
-    pdb,
-    graph_fixup,
-    init_only,
-    no_versions,
-    processes,
-    start_method,
-    profile,
-    qgraphObj,
-    register_dataset_types,
-    skip_init_writes,
-    timeout,
-    butler_config,
-    input,
-    output,
-    output_run,
-    extend_run,
-    replace_run,
-    prune_replaced,
-    data_query,
-    skip_existing_in,
-    skip_existing,
-    debug,
-    fail_fast,
-    clobber_outputs,
-    summary,
-    mock,
-    unmocked_dataset_types,
-    mock_failure,
-    enable_implicit_threading,
+from ..butler_factory import ButlerFactory
+from ..utils import MP_TIMEOUT
+if TYPE_CHECKING:
+    from lsst.pipe.base.execution_graph_fixup import ExecutionGraphFixup
+_LOG = getLogger(__name__)
+def run(
+    qg: QuantumGraph,
+    *,
+    task_factory: TaskFactory | None = None,
+    pdb: str | None,
+    graph_fixup: str,
+    init_only: bool,
+    no_versions: bool,
+    processes: int,
+    start_method: Literal["spawn", "forkserver"] | None,
+    profile: str,
+    register_dataset_types: bool,
+    skip_init_writes: bool,
+    timeout: int | None,
+    butler_config: ResourcePathExpression,
+    input: Iterable[str] | str,
+    output: str | None,
+    output_run: str | None,
+    extend_run: bool,
+    replace_run: bool,
+    prune_replaced: str | None,
+    data_query: str | None,
+    skip_existing_in: Iterable[str] | None,
+    skip_existing: bool,
+    debug: bool,
+    fail_fast: bool,
+    clobber_outputs: bool,
+    summary: ResourcePathExpression | None,
+    enable_implicit_threading: bool,
     cores_per_quantum: int,
-    memory_per_quantum: str,
-    rebase,
+    memory_per_quantum: str | None,
+    rebase: bool,
     raise_on_partial_outputs: bool,
-    **kwargs,
-):
+    **kwargs: object,
+) -> None:
     """Implement the command line interface `pipetask run` subcommand.
     Should only be called by command line tools and unit test code that test
@@ -79,8 +93,13 @@ def run(  # type: ignore
     Parameters
     ----------
-    pdb : `bool`
-        Drop into pdb on exception or not.
+    qg : `lsst.pipe.base.QuantumGraph`
+        A QuantumGraph generated by a previous subcommand.
+    task_factory : `lsst.pipe.base.TaskFactory`, optional
+        A custom task factory to use.
+    pdb : `str`, optional
+        Debugger to import and use (via the ``post_mortem`` function) in the
+        event of an exception.
     graph_fixup : `str`
         The name of the class or factory method which makes an instance used
         for execution graph fixup.
@@ -96,8 +115,6 @@ def run(  # type: ignore
         one for current platform.
     profile : `str`
         File name to dump cProfile information to.
-    qgraphObj : `lsst.pipe.base.QuantumGraph`
-        A QuantumGraph generated by a previous subcommand.
     register_dataset_types : `bool`
         If true, register DatasetTypes that do not already exist in the
         Registry.
@@ -106,21 +123,18 @@ def run(  # type: ignore
         schemas).
     timeout : `int`
         Timeout for multiprocessing; maximum wall time (sec).
-    butler_config : `str`, `dict`, or `lsst.daf.butler.Config`
-        If `str`, `butler_config` is the path location of the gen3
-        butler/registry config file. If `dict`, `butler_config` is key value
-        pairs used to init or update the `lsst.daf.butler.Config` instance. If
-        `Config`, it is the object used to configure a Butler.
-    input : `list` [ `str` ]
+    butler_config : convertible to `lsst.resources.ResourcePath`
+        Path to butler repository configuration.
+    input : `~collections.abc.Iterable` [ `str` ] or `None`
         List of names of the input collection(s).
-    output : `str`
+    output : `str` or `None`
         Name of the output CHAINED collection. This may either be an existing
         CHAINED collection to use as both input and output (if `input` is
         `None`), or a new CHAINED collection created to include all inputs
         (if `input` is not `None`). In both cases, the collection's children
         will start with an output RUN collection that directly holds all new
         datasets (see `output_run`).
-    output_run : `str`
+    output_run : `str` or `None`
         Name of the new output RUN collection. If not provided then `output`
         must be provided and a new RUN collection will be created by appending
         a timestamp to the value passed with `output`. If this collection
@@ -136,13 +150,14 @@ def run(  # type: ignore
         development, but it does not delete the datasets associated with the
         replaced run unless `prune-replaced` is also True. Requires `output`,
         and `extend_run` must be `None`.
-    prune_replaced : "unstore", "purge", or `None`
+    prune_replaced : `str` or `None`
         If not `None`, delete the datasets in the collection replaced by
         `replace_run`, either just from the datastore ("unstore") or by
-        removing them and the RUN completely ("purge"). Requires `replace_run`.
+        removing them and the RUN completely ("purge"). Requires
+        ``replace_run`` to be `True`.
     data_query : `str`
         User query selection expression.
-    skip_existing_in : `list` [ `str` ]
+    skip_existing_in : `~collections.abc.Iterable` [ `str` ] or `None`
         Accepts list of collections, if all Quantum outputs already exist in
         the specified list of collections then that Quantum will be excluded
         from the QuantumGraph.
@@ -160,14 +175,6 @@ def run(  # type: ignore
         given.
     summary : `str`
         File path to store job report in JSON format.
-    mock : `bool`, optional
-        If `True` then run mock pipeline instead of real one.  Ignored if an
-        existing QuantumGraph is provided.
-    unmocked_dataset_types : `collections.abc.Sequence` [ `str` ]
-        List of overall-input dataset types that should not be mocked.
-        Ignored if an existing QuantumGraph is provided.
-    mock_failure : `~collections.abc.Sequence`, optional
-        List of quanta that should raise exceptions.
     enable_implicit_threading : `bool`, optional
         If `True`, do not disable implicit threading by third-party libraries.
         Implicit threading is always disabled during actual quantum execution
@@ -183,58 +190,163 @@ def run(  # type: ignore
         the ``inputs``.
     raise_on_partial_outputs : `bool`
         Consider partial outputs an error instead of a success.
-    **kwargs : `dict` [`str`, `str`]
+    **kwargs : `object`
         Ignored; click commands may accept options for more than one script
         function and pass all the option kwargs to each of the script functions
         which ignore these unused kwargs.
     """
     # Fork option still exists for compatibility but we use spawn instead.
-    if start_method == "fork":
-        start_method = "spawn"
-        _log.warning("Option --start-method=fork is unsafe and no longer supported, will use spawn instead.")
+    if start_method == "fork":  # type: ignore[comparison-overlap]
+        start_method = "spawn"  # type: ignore[unreachable]
+        _LOG.warning("Option --start-method=fork is unsafe and no longer supported, using spawn instead.")
     if not enable_implicit_threading:
         disable_implicit_threading()
-    args = SimpleNamespace(
-        pdb=pdb,
-        graph_fixup=graph_fixup,
-        init_only=init_only,
-        no_versions=no_versions,
-        processes=processes,
-        start_method=start_method,
-        profile=profile,
-        skip_init_writes=skip_init_writes,
-        timeout=timeout,
-        register_dataset_types=register_dataset_types,
-        butler_config=butler_config,
-        input=input,
+    skip_existing_in = tuple(skip_existing_in) if skip_existing_in is not None else ()
+    if data_query is None:
+        data_query = ""
+    inputs = list(ensure_iterable(input)) if input else []
+    del input
+    enable_lsst_debug = debug
+    del debug
+    # If we have no output run specified, use the one from the graph rather
+    # than letting a new timestamped run be created.
+    if not output_run and qg.metadata and (output_run := qg.metadata.get("output_run")):
+        output_run = output_run
+    # Check that output run defined on command line is consistent with
+    # quantum graph.
+    if output_run and qg.metadata:
+        graph_output_run = qg.metadata.get("output_run", output_run)
+        if graph_output_run != output_run:
+            raise ValueError(
+                f"Output run defined on command line ({output_run}) has to be "
+                f"identical to graph metadata ({graph_output_run}). "
+                "To update graph metadata run `pipetask update-graph-run` command."
+            )
+    # Make sure that --extend-run always enables --skip-existing,
+    # clobbering should be disabled if --extend-run is not specified.
+    if extend_run:
+        skip_existing = True
+    else:
+        clobber_outputs = False
+    # Make butler instance. QuantumGraph should have an output run defined,
+    # but we ignore it here and let command line decide actual output run.
+    butler = ButlerFactory.make_write_butler(
+        butler_config,
+        qg.pipeline_graph,
         output=output,
         output_run=output_run,
+        inputs=inputs,
         extend_run=extend_run,
+        rebase=rebase,
         replace_run=replace_run,
         prune_replaced=prune_replaced,
-        data_query=data_query,
+    )
+    assert butler.run is not None, "Guaranteed by make_write_butler."
+    if skip_existing:
+        skip_existing_in += (butler.run,)
+    # Enable lsstDebug debugging. Note that this is done once in the
+    # main process before PreExecInit and it is also repeated before
+    # running each task in SingleQuantumExecutor (which may not be
+    # needed if `multiprocessing` always uses fork start method).
+    if enable_lsst_debug:
+        try:
+            _LOG.debug("Will try to import debug.py")
+            import debug  # type: ignore  # noqa: F401
+        except ImportError:
+            _LOG.warning("No 'debug' module found.")
+    # Save all InitOutputs, configs, etc.
+    if register_dataset_types:
+        qg.pipeline_graph.register_dataset_types(butler, include_packages=not no_versions)
+    if not skip_init_writes:
+        qg.write_init_outputs(butler, skip_existing=skip_existing)
+        qg.write_configs(butler, compare_existing=extend_run)
+        if not no_versions:
+            qg.write_packages(butler, compare_existing=extend_run)
+    if init_only:
+        return
+    if task_factory is None:
+        task_factory = TaskFactory()
+    resources = ExecutionResources(
+        num_cores=cores_per_quantum, max_mem=memory_per_quantum, default_mem_units=u.MB
+    )
+    quantum_executor = SingleQuantumExecutor(
+        butler=butler,
+        task_factory=task_factory,
         skip_existing_in=skip_existing_in,
-        skip_existing=skip_existing,
-        enableLsstDebug=debug,
-        fail_fast=fail_fast,
         clobber_outputs=clobber_outputs,
-        summary=summary,
-        # Mock options only used by qgraph.
-        enable_implicit_threading=enable_implicit_threading,
-        cores_per_quantum=cores_per_quantum,
-        memory_per_quantum=memory_per_quantum,
-        rebase=rebase,
+        enable_lsst_debug=enable_lsst_debug,
+        resources=resources,
         raise_on_partial_outputs=raise_on_partial_outputs,
     )
-    f = CmdLineFwk()
-    taskFactory = TaskFactory()
+    if timeout is None:
+        timeout = MP_TIMEOUT
+    executor = MPGraphExecutor(
+        num_proc=processes,
+        timeout=timeout,
+        start_method=start_method,
+        quantum_executor=quantum_executor,
+        fail_fast=fail_fast,
+        pdb=pdb,
+        execution_graph_fixup=_import_graph_fixup(graph_fixup),
+    )
+    # Have to reset connection pool to avoid sharing connections with
+    # forked processes.
+    butler.registry.resetConnectionPool()
+    try:
+        with lsst.utils.timer.profile(profile, _LOG):
+            executor.execute(qg)
+    finally:
+        if summary:
+            report = executor.getReport()
+            if report:
+                with ResourcePath(summary).open("w") as out:
+                    # Do not save fields that are not set.
+                    out.write(report.model_dump_json(exclude_none=True, indent=2))
-    # If we have no output run specified, use the one from the graph rather
-    # than letting a new timestamped run be created.
-    if not args.output_run and qgraphObj.metadata and (output_run := qgraphObj.metadata.get("output_run")):
-        args.output_run = output_run
-    f.runPipeline(qgraphObj, taskFactory, args)
+def _import_graph_fixup(graph_fixup: str) -> ExecutionGraphFixup | None:
+    """Import/instantiate graph fixup object.
+    Parameters
+    ----------
+    graph_fixup : `str`
+        Graph fixup command-line argument.
+    Returns
+    -------
+    fixup : `ExecutionGraphFixup` or `None`
+        Object that imposes additional ordering constraints on the graph.
+    Raises
+    ------
+    ValueError
+        Raised if import fails, method call raises exception, or returned
+        instance has unexpected type.
+    """
+    from lsst.pipe.base.execution_graph_fixup import ExecutionGraphFixup
+    if graph_fixup:
+        try:
+            factory = doImportType(graph_fixup)
+        except Exception as exc:
+            raise ValueError("Failed to import graph fixup class/method") from exc
+        try:
+            fixup = factory()
+        except Exception as exc:
+            raise ValueError("Failed to make instance of graph fixup") from exc
+        if not isinstance(fixup, ExecutionGraphFixup):
+            raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class")
+        return fixup
+    return None

lsst/ctrl/mpexec/cli/script/run_qbb.py CHANGED Viewed

@@ -25,36 +25,59 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
-import logging
-from types import SimpleNamespace
+from __future__ import annotations
-from lsst.pipe.base import TaskFactory
+import pickle
+import uuid
+from collections.abc import Mapping
+from typing import Literal
+import astropy.units as u
+import lsst.utils.timer
+from lsst.daf.butler import (
+    DatasetType,
+    DimensionConfig,
+    DimensionUniverse,
+    LimitedButler,
+    Quantum,
+    QuantumBackedButler,
+)
+from lsst.pipe.base import BuildId, ExecutionResources, QuantumGraph, TaskFactory
+from lsst.pipe.base.mp_graph_executor import MPGraphExecutor
+from lsst.pipe.base.single_quantum_executor import SingleQuantumExecutor
+from lsst.resources import ResourcePath, ResourcePathExpression
+from lsst.utils.logging import VERBOSE, getLogger
 from lsst.utils.threads import disable_implicit_threading
-from ... import CmdLineFwk
+from ..butler_factory import ButlerFactory
+from ..utils import MP_TIMEOUT, summarize_quantum_graph
-_log = logging.getLogger(__name__)
+_LOG = getLogger(__name__)
 def run_qbb(
-    butler_config: str,
-    qgraph: str,
+    *,
+    task_factory: TaskFactory | None = None,
+    butler_config: ResourcePathExpression,
+    qgraph: ResourcePathExpression,
     config_search_path: list[str] | None,
     qgraph_id: str | None,
-    qgraph_node_id: list[int] | None,
+    qgraph_node_id: list[str | uuid.UUID] | None,
     processes: int,
     pdb: str | None,
     profile: str | None,
     debug: bool,
-    start_method: str | None,
+    start_method: Literal["spawn", "forkserver"] | None,
     timeout: int | None,
     fail_fast: bool,
-    summary: str | None,
+    summary: ResourcePathExpression | None,
     enable_implicit_threading: bool,
     cores_per_quantum: int,
     memory_per_quantum: str,
     raise_on_partial_outputs: bool,
     no_existing_outputs: bool,
+    **kwargs: object,
 ) -> None:
     """Implement the command line interface ``pipetask run-qbb`` subcommand.
@@ -63,6 +86,8 @@ def run_qbb(
     Parameters
     ----------
+    task_factory : `lsst.pipe.base.TaskFactory`, optional
+        A custom task factory to use.
     butler_config : `str`
         The path location of the gen3 butler/registry config file.
     qgraph : `str`
@@ -109,36 +134,146 @@ def run_qbb(
     no_existing_outputs : `bool`
         Whether to assume that no predicted outputs for these quanta already
         exist in the output run collection.
+    **kwargs : `object`
+        Ignored; click commands may accept options for more than one script
+        function and pass all the option kwargs to each of the script functions
+        which ignore these unused kwargs.
     """
     # Fork option still exists for compatibility but we use spawn instead.
-    if start_method == "fork":
-        start_method = "spawn"
-        _log.warning("Option --start-method=fork is unsafe and no longer supported, will use spawn instead.")
+    if start_method == "fork":  # type: ignore[comparison-overlap]
+        start_method = "spawn"  # type: ignore[unreachable]
+        _LOG.warning("Option --start-method=fork is unsafe and no longer supported, using spawn instead.")
     if not enable_implicit_threading:
         disable_implicit_threading()
-    args = SimpleNamespace(
+    # Load quantum graph.
+    nodes = qgraph_node_id or None
+    with lsst.utils.timer.time_this(
+        _LOG,
+        msg=f"Reading {str(len(nodes)) if nodes is not None else 'all'} quanta.",
+        level=VERBOSE,
+    ) as qg_read_time:
+        qg = QuantumGraph.loadUri(
+            qgraph, nodes=nodes, graphID=BuildId(qgraph_id) if qgraph_id is not None else None
+        )
+    job_metadata = {"qg_read_time": qg_read_time.duration, "qg_size": len(qg)}
+    if qg.metadata is None:
+        raise ValueError("QuantumGraph is missing metadata, cannot continue.")
+    summarize_quantum_graph(qg)
+    dataset_types = {dstype.name: dstype for dstype in qg.registryDatasetTypes()}
+    # Ensure that QBB uses shared datastore cache.
+    ButlerFactory.define_datastore_cache()
+    _butler_factory = _QBBFactory(
         butler_config=butler_config,
-        qgraph=qgraph,
+        dimensions=qg.universe,
+        dataset_types=dataset_types,
         config_search_path=config_search_path,
-        qgraph_id=qgraph_id,
-        qgraph_node_id=qgraph_node_id,
-        processes=processes,
-        pdb=pdb,
-        profile=profile,
-        enableLsstDebug=debug,
-        start_method=start_method,
+    )
+    # make special quantum executor
+    resources = ExecutionResources(
+        num_cores=cores_per_quantum, max_mem=memory_per_quantum, default_mem_units=u.MB
+    )
+    quantumExecutor = SingleQuantumExecutor(
+        butler=None,
+        task_factory=task_factory,
+        enable_lsst_debug=debug,
+        limited_butler_factory=_butler_factory,
+        resources=resources,
+        assume_no_existing_outputs=no_existing_outputs,
+        skip_existing=True,
+        clobber_outputs=True,
+        raise_on_partial_outputs=raise_on_partial_outputs,
+        job_metadata=job_metadata,
+    )
+    timeout = MP_TIMEOUT if timeout is None else timeout
+    executor = MPGraphExecutor(
+        num_proc=processes,
         timeout=timeout,
+        start_method=start_method,
+        quantum_executor=quantumExecutor,
         fail_fast=fail_fast,
-        summary=summary,
-        enable_implicit_threading=enable_implicit_threading,
-        cores_per_quantum=cores_per_quantum,
-        memory_per_quantum=memory_per_quantum,
-        raise_on_partial_outputs=raise_on_partial_outputs,
-        no_existing_outputs=no_existing_outputs,
+        pdb=pdb,
     )
+    try:
+        with lsst.utils.timer.profile(profile, _LOG):
+            executor.execute(qg)
+    finally:
+        if summary:
+            report = executor.getReport()
+            if report:
+                with ResourcePath(summary).open("w") as out:
+                    # Do not save fields that are not set.
+                    out.write(report.model_dump_json(exclude_none=True, indent=2))
+class _QBBFactory:
+    """Class which is a callable for making QBB instances.
+    This class is also responsible for reconstructing correct dimension
+    universe after unpickling. When pickling multiple things that require
+    dimension universe, this class must be unpickled first. The logic in
+    MPGraphExecutor ensures that SingleQuantumExecutor is unpickled first in
+    the subprocess, which causes unpickling of this class.
+    """
+    def __init__(
+        self,
+        butler_config: ResourcePathExpression,
+        dimensions: DimensionUniverse,
+        dataset_types: Mapping[str, DatasetType],
+        config_search_path: list[str] | None,
+    ):
+        self.butler_config = butler_config
+        self.dimensions = dimensions
+        self.dataset_types = dataset_types
+        self.config_search_path = config_search_path
+    def __call__(self, quantum: Quantum) -> LimitedButler:
+        """Return freshly initialized `~lsst.daf.butler.QuantumBackedButler`.
+        Factory method to create QuantumBackedButler instances.
+        """
+        return QuantumBackedButler.initialize(
+            config=self.butler_config,
+            quantum=quantum,
+            dimensions=self.dimensions,
+            dataset_types=self.dataset_types,
+        )
+    @classmethod
+    def _unpickle(
+        cls,
+        butler_config: ResourcePathExpression,
+        dimensions_config: DimensionConfig | None,
+        dataset_types_pickle: bytes,
+        config_search_path: list[str] | None,
+    ) -> _QBBFactory:
+        universe = DimensionUniverse(dimensions_config)
+        dataset_types = pickle.loads(dataset_types_pickle)
+        return _QBBFactory(butler_config, universe, dataset_types, config_search_path)
-    f = CmdLineFwk()
-    task_factory = TaskFactory()
-    f.runGraphQBB(task_factory, args)
+    def __reduce__(self) -> tuple:
+        # If dimension universe is not default one, we need to dump/restore
+        # its config.
+        config = self.dimensions.dimensionConfig
+        default = DimensionConfig()
+        # Only send configuration to other side if it is non-default, default
+        # will be instantiated from config=None.
+        if (config["namespace"], config["version"]) != (default["namespace"], default["version"]):
+            dimension_config = config
+        else:
+            dimension_config = None
+        # Dataset types need to be unpickled only after universe is made.
+        dataset_types_pickle = pickle.dumps(self.dataset_types)
+        return (
+            self._unpickle,
+            (self.butler_config, dimension_config, dataset_types_pickle, self.config_search_path),
+        )

lsst-ctrl-mpexec 29.2025.3400__py3-none-any.whl → 29.2025.3600__py3-none-any.whl

lsst-ctrl-mpexec 29.2025.3400py3-none-any.whl → 29.2025.3600py3-none-any.whl