lsst-pipe-base 29.2025.1000__py3-none-any.whl → 29.2025.1200__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. lsst/pipe/base/_datasetQueryConstraints.py +1 -1
  2. lsst/pipe/base/all_dimensions_quantum_graph_builder.py +642 -357
  3. lsst/pipe/base/connections.py +179 -2
  4. lsst/pipe/base/pipeline_graph/visualization/_mermaid.py +157 -24
  5. lsst/pipe/base/prerequisite_helpers.py +1 -1
  6. lsst/pipe/base/quantum_graph_builder.py +91 -60
  7. lsst/pipe/base/quantum_graph_skeleton.py +20 -0
  8. lsst/pipe/base/quantum_provenance_graph.py +790 -421
  9. lsst/pipe/base/tests/mocks/_data_id_match.py +4 -0
  10. lsst/pipe/base/version.py +1 -1
  11. {lsst_pipe_base-29.2025.1000.dist-info → lsst_pipe_base-29.2025.1200.dist-info}/METADATA +5 -2
  12. {lsst_pipe_base-29.2025.1000.dist-info → lsst_pipe_base-29.2025.1200.dist-info}/RECORD +20 -20
  13. {lsst_pipe_base-29.2025.1000.dist-info → lsst_pipe_base-29.2025.1200.dist-info}/WHEEL +1 -1
  14. {lsst_pipe_base-29.2025.1000.dist-info → lsst_pipe_base-29.2025.1200.dist-info}/entry_points.txt +0 -0
  15. {lsst_pipe_base-29.2025.1000.dist-info → lsst_pipe_base-29.2025.1200.dist-info/licenses}/COPYRIGHT +0 -0
  16. {lsst_pipe_base-29.2025.1000.dist-info → lsst_pipe_base-29.2025.1200.dist-info/licenses}/LICENSE +0 -0
  17. {lsst_pipe_base-29.2025.1000.dist-info → lsst_pipe_base-29.2025.1200.dist-info/licenses}/bsd_license.txt +0 -0
  18. {lsst_pipe_base-29.2025.1000.dist-info → lsst_pipe_base-29.2025.1200.dist-info/licenses}/gpl-v3.0.txt +0 -0
  19. {lsst_pipe_base-29.2025.1000.dist-info → lsst_pipe_base-29.2025.1200.dist-info}/top_level.txt +0 -0
  20. {lsst_pipe_base-29.2025.1000.dist-info → lsst_pipe_base-29.2025.1200.dist-info}/zip-safe +0 -0
@@ -62,7 +62,7 @@ from lsst.utils.timer import timeMethod
62
62
  from . import automatic_connection_constants as acc
63
63
  from ._status import NoWorkFound
64
64
  from ._task_metadata import TaskMetadata
65
- from .connections import AdjustQuantumHelper
65
+ from .connections import AdjustQuantumHelper, QuantaAdjuster
66
66
  from .graph import QuantumGraph
67
67
  from .pipeline_graph import PipelineGraph, TaskNode
68
68
  from .prerequisite_helpers import PrerequisiteInfo, SkyPixBoundsBuilder, TimespanBuilder
@@ -475,7 +475,6 @@ class QuantumGraphBuilder(ABC):
475
475
  # Loop over all quanta for this task, remembering the ones we've
476
476
  # gotten rid of.
477
477
  skipped_quanta = []
478
- no_work_quanta = []
479
478
  for quantum_key in skeleton.get_quanta(task_node.label):
480
479
  if self._skip_quantum_if_metadata_exists(task_node, quantum_key, skeleton):
481
480
  skipped_quanta.append(quantum_key)
@@ -483,17 +482,26 @@ class QuantumGraphBuilder(ABC):
483
482
  quantum_data_id = skeleton[quantum_key]["data_id"]
484
483
  skypix_bounds_builder = task_prerequisite_info.bounds.make_skypix_bounds_builder(quantum_data_id)
485
484
  timespan_builder = task_prerequisite_info.bounds.make_timespan_builder(quantum_data_id)
486
- adjusted_outputs = self._gather_quantum_outputs(
487
- task_node, quantum_key, skeleton, skypix_bounds_builder, timespan_builder
488
- )
489
- adjusted_inputs = self._gather_quantum_inputs(
490
- task_node,
485
+ self._update_quantum_for_adjust(
491
486
  quantum_key,
492
487
  skeleton,
493
488
  task_prerequisite_info,
494
489
  skypix_bounds_builder,
495
490
  timespan_builder,
496
491
  )
492
+ for skipped_quantum in skipped_quanta:
493
+ skeleton.remove_quantum_node(skipped_quantum, remove_outputs=False)
494
+ # Give the task a chance to adjust all quanta together. This
495
+ # operates directly on the skeleton (via a the 'adjuster', which
496
+ # is just an interface adapter).
497
+ adjuster = QuantaAdjuster(task_node.label, self._pipeline_graph, skeleton)
498
+ task_node.get_connections().adjust_all_quanta(adjuster)
499
+ # Loop over all quanta again, remembering those we get rid of in other
500
+ # ways.
501
+ no_work_quanta = []
502
+ for quantum_key in skeleton.get_quanta(task_node.label):
503
+ adjusted_outputs = self._adapt_quantum_outputs(task_node, quantum_key, skeleton)
504
+ adjusted_inputs = self._adapt_quantum_inputs(task_node, quantum_key, skeleton)
497
505
  # Give the task's Connections class an opportunity to remove
498
506
  # some inputs, or complain if they are unacceptable. This will
499
507
  # raise if one of the check conditions is not met, which is the
@@ -552,8 +560,6 @@ class QuantumGraphBuilder(ABC):
552
560
  skeleton[quantum_key]["outputs"] = helper.outputs
553
561
  for no_work_quantum in no_work_quanta:
554
562
  skeleton.remove_quantum_node(no_work_quantum, remove_outputs=True)
555
- for skipped_quantum in skipped_quanta:
556
- skeleton.remove_quantum_node(skipped_quantum, remove_outputs=False)
557
563
  remaining_quanta = skeleton.get_quanta(task_node.label)
558
564
  self._resolve_task_init(task_node, skeleton, bool(skipped_quanta))
559
565
  message_terms = []
@@ -561,6 +567,8 @@ class QuantumGraphBuilder(ABC):
561
567
  message_terms.append(f"{len(no_work_quanta)} had no work to do")
562
568
  if skipped_quanta:
563
569
  message_terms.append(f"{len(skipped_quanta)} previously succeeded")
570
+ if adjuster.n_removed:
571
+ message_terms.append(f"{adjuster.n_removed} removed by adjust_all_quanta")
564
572
  message_parenthetical = f" ({', '.join(message_terms)})" if message_terms else ""
565
573
  if remaining_quanta:
566
574
  self.log.info(
@@ -634,40 +642,32 @@ class QuantumGraphBuilder(ABC):
634
642
  return False
635
643
 
636
644
  @final
637
- def _gather_quantum_outputs(
645
+ def _update_quantum_for_adjust(
638
646
  self,
639
- task_node: TaskNode,
640
647
  quantum_key: QuantumKey,
641
648
  skeleton: QuantumGraphSkeleton,
649
+ task_prerequisite_info: PrerequisiteInfo,
642
650
  skypix_bounds_builder: SkyPixBoundsBuilder,
643
651
  timespan_builder: TimespanBuilder,
644
- ) -> NamedKeyDict[DatasetType, list[DatasetRef]]:
645
- """Collect outputs or generate datasets for a preliminary quantum and
646
- put them in the form used by `~lsst.daf.butler.Quantum` and
647
- `~PipelineTaskConnections.adjustQuantum`.
652
+ ) -> None:
653
+ """Update the quantum node in the skeleton by finding remaining
654
+ prerequisite inputs and dropping regular inputs that we now know will
655
+ not be produced.
648
656
 
649
657
  Parameters
650
658
  ----------
651
- task_node : `pipeline_graph.TaskNode`
652
- Node for this task in the pipeline graph.
653
659
  quantum_key : `QuantumKey`
654
660
  Identifier for this quantum in the graph.
655
661
  skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton`
656
662
  Preliminary quantum graph, to be modified in-place.
663
+ task_prerequisite_info : `~prerequisite_helpers.PrerequisiteInfo`
664
+ Information about the prerequisite inputs to this task.
657
665
  skypix_bounds_builder : `~prerequisite_helpers.SkyPixBoundsBuilder`
658
666
  An object that accumulates the appropriate spatial bounds for a
659
667
  quantum.
660
668
  timespan_builder : `~prerequisite_helpers.TimespanBuilder`
661
669
  An object that accumulates the appropriate timespan for a quantum.
662
670
 
663
- Returns
664
- -------
665
- outputs : `~lsst.daf.butler.NamedKeyDict` [ \
666
- `~lsst.daf.butler.DatasetType`, `list` [ \
667
- `~lsst.daf.butler.DatasetRef` ] ]
668
- All outputs to the task, using the storage class and components
669
- defined by the task's own connections.
670
-
671
671
  Notes
672
672
  -----
673
673
  This first looks for outputs already present in the `output_run` (i.e.
@@ -680,8 +680,7 @@ class QuantumGraphBuilder(ABC):
680
680
  UUID is generated. In all cases the dataset node in the skeleton is
681
681
  associated with a `DatasetRef`.
682
682
  """
683
- outputs_by_type: dict[str, list[DatasetRef]] = {}
684
- dataset_key: DatasetKey
683
+ dataset_key: DatasetKey | PrerequisiteDatasetKey
685
684
  for dataset_key in skeleton.iter_outputs_of(quantum_key):
686
685
  dataset_data_id = skeleton[dataset_key]["data_id"]
687
686
  dataset_type_node = self._pipeline_graph.dataset_types[dataset_key.parent_dataset_type_name]
@@ -699,6 +698,66 @@ class QuantumGraphBuilder(ABC):
699
698
  skypix_bounds_builder.handle_dataset(dataset_key.parent_dataset_type_name, dataset_data_id)
700
699
  timespan_builder.handle_dataset(dataset_key.parent_dataset_type_name, dataset_data_id)
701
700
  skeleton.set_dataset_ref(ref, dataset_key)
701
+ quantum_data_id = skeleton[quantum_key]["data_id"]
702
+ # Process inputs already present in the skeleton - this should include
703
+ # all regular inputs (including intermediates) and may include some
704
+ # prerequisites.
705
+ for dataset_key in list(skeleton.iter_inputs_of(quantum_key)):
706
+ if (ref := skeleton.get_dataset_ref(dataset_key)) is None:
707
+ # If the dataset ref hasn't been set either as an existing
708
+ # input or as an output of an already-processed upstream
709
+ # quantum, it's not going to be produced; remove it.
710
+ skeleton.remove_dataset_nodes([dataset_key])
711
+ continue
712
+ skypix_bounds_builder.handle_dataset(dataset_key.parent_dataset_type_name, ref.dataId)
713
+ timespan_builder.handle_dataset(dataset_key.parent_dataset_type_name, ref.dataId)
714
+ # Query for any prerequisites not handled by process_subgraph. Note
715
+ # that these were not already in the skeleton graph, so we add them
716
+ # now.
717
+ skypix_bounds = skypix_bounds_builder.finish()
718
+ timespan = timespan_builder.finish()
719
+ for finder in task_prerequisite_info.finders.values():
720
+ dataset_keys = []
721
+ for ref in finder.find(
722
+ self.butler, self.input_collections, quantum_data_id, skypix_bounds, timespan
723
+ ):
724
+ dataset_key = skeleton.add_prerequisite_node(ref)
725
+ dataset_keys.append(dataset_key)
726
+ skeleton.add_input_edges(quantum_key, dataset_keys)
727
+
728
+ @final
729
+ def _adapt_quantum_outputs(
730
+ self,
731
+ task_node: TaskNode,
732
+ quantum_key: QuantumKey,
733
+ skeleton: QuantumGraphSkeleton,
734
+ ) -> NamedKeyDict[DatasetType, list[DatasetRef]]:
735
+ """Adapt outputs for a preliminary quantum and put them into the form
736
+ used by `~lsst.daf.butler.Quantum` and
737
+ `~PipelineTaskConnections.adjustQuantum`.
738
+
739
+ Parameters
740
+ ----------
741
+ task_node : `pipeline_graph.TaskNode`
742
+ Node for this task in the pipeline graph.
743
+ quantum_key : `QuantumKey`
744
+ Identifier for this quantum in the graph.
745
+ skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton`
746
+ Preliminary quantum graph, to be modified in-place.
747
+
748
+ Returns
749
+ -------
750
+ outputs : `~lsst.daf.butler.NamedKeyDict` [ \
751
+ `~lsst.daf.butler.DatasetType`, `list` [ \
752
+ `~lsst.daf.butler.DatasetRef` ] ]
753
+ All outputs to the task, using the storage class and components
754
+ defined by the task's own connections.
755
+ """
756
+ outputs_by_type: dict[str, list[DatasetRef]] = {}
757
+ dataset_key: DatasetKey
758
+ for dataset_key in skeleton.iter_outputs_of(quantum_key):
759
+ ref = skeleton.get_dataset_ref(dataset_key)
760
+ assert ref is not None, "Should have been added (or the node removed) in a previous pass."
702
761
  outputs_by_type.setdefault(dataset_key.parent_dataset_type_name, []).append(ref)
703
762
  adapted_outputs: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict()
704
763
  for write_edge in task_node.iter_all_outputs():
@@ -711,17 +770,14 @@ class QuantumGraphBuilder(ABC):
711
770
  return adapted_outputs
712
771
 
713
772
  @final
714
- def _gather_quantum_inputs(
773
+ def _adapt_quantum_inputs(
715
774
  self,
716
775
  task_node: TaskNode,
717
776
  quantum_key: QuantumKey,
718
777
  skeleton: QuantumGraphSkeleton,
719
- task_prerequisite_info: PrerequisiteInfo,
720
- skypix_bounds_builder: SkyPixBoundsBuilder,
721
- timespan_builder: TimespanBuilder,
722
778
  ) -> NamedKeyDict[DatasetType, list[DatasetRef]]:
723
- """Collect input datasets for a preliminary quantum and put them in the
724
- form used by `~lsst.daf.butler.Quantum` and
779
+ """Adapt input datasets for a preliminary quantum into the form used by
780
+ `~lsst.daf.butler.Quantum` and
725
781
  `~PipelineTaskConnections.adjustQuantum`.
726
782
 
727
783
  Parameters
@@ -752,37 +808,12 @@ class QuantumGraphBuilder(ABC):
752
808
  with a `DatasetRef`, and queries for prerequisite input nodes that do
753
809
  not exist.
754
810
  """
755
- quantum_data_id = skeleton[quantum_key]["data_id"]
756
811
  inputs_by_type: dict[str, set[DatasetRef]] = {}
757
812
  dataset_key: DatasetKey | PrerequisiteDatasetKey
758
- # Process inputs already present in the skeleton - this should include
759
- # all regular inputs (including intermediates) and may include some
760
- # prerequisites.
761
813
  for dataset_key in list(skeleton.iter_inputs_of(quantum_key)):
762
- if (ref := skeleton.get_dataset_ref(dataset_key)) is None:
763
- # If the dataset ref hasn't been set either as an existing
764
- # input or as an output of an already-processed upstream
765
- # quantum, it's not going to be produced; remove it.
766
- skeleton.remove_dataset_nodes([dataset_key])
767
- continue
814
+ ref = skeleton.get_dataset_ref(dataset_key)
815
+ assert ref is not None, "Should have been added (or the node removed) in a previous pass."
768
816
  inputs_by_type.setdefault(dataset_key.parent_dataset_type_name, set()).add(ref)
769
- skypix_bounds_builder.handle_dataset(dataset_key.parent_dataset_type_name, ref.dataId)
770
- timespan_builder.handle_dataset(dataset_key.parent_dataset_type_name, ref.dataId)
771
- # Query for any prerequisites not handled by process_subgraph. Note
772
- # that these were not already in the skeleton graph, so we add them
773
- # now.
774
- skypix_bounds = skypix_bounds_builder.finish()
775
- timespan = timespan_builder.finish()
776
- for finder in task_prerequisite_info.finders.values():
777
- inputs_for_type = inputs_by_type.setdefault(finder.dataset_type_node.name, set())
778
- dataset_keys = []
779
- for ref in finder.find(
780
- self.butler, self.input_collections, quantum_data_id, skypix_bounds, timespan
781
- ):
782
- dataset_key = skeleton.add_prerequisite_node(ref)
783
- dataset_keys.append(dataset_key)
784
- inputs_for_type.add(ref)
785
- skeleton.add_input_edges(quantum_key, dataset_keys)
786
817
  adapted_inputs: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict()
787
818
  for read_edge in task_node.iter_all_inputs():
788
819
  dataset_type_node = self._pipeline_graph.dataset_types[read_edge.parent_dataset_type_name]
@@ -669,3 +669,23 @@ class QuantumGraphSkeleton:
669
669
  output_in_the_way: DatasetRef | None
670
670
  if (output_in_the_way := state.get("output_in_the_way")) is not None:
671
671
  state["output_in_the_way"] = output_in_the_way.expanded(data_id)
672
+
673
+ def get_data_id(self, key: Key) -> DataCoordinate:
674
+ """Return the full data ID for a quantum or dataset, if available.
675
+
676
+ Parameters
677
+ ----------
678
+ key : `Key`
679
+ Identifier for the graph node.
680
+
681
+ Returns
682
+ -------
683
+ data_id : `DataCoordinate`
684
+ Expanded data ID for the node, if one is available.
685
+
686
+ Raises
687
+ ------
688
+ KeyError
689
+ Raised if this node does not have an expanded data ID.
690
+ """
691
+ return self._xgraph.nodes[key]["data_id"]