lsst-pipe-base 29.2025.3000__py3-none-any.whl → 29.2025.3200__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/__init__.py +0 -1
- lsst/pipe/base/_datasetQueryConstraints.py +1 -1
- lsst/pipe/base/all_dimensions_quantum_graph_builder.py +10 -46
- lsst/pipe/base/caching_limited_butler.py +8 -4
- lsst/pipe/base/connectionTypes.py +19 -19
- lsst/pipe/base/connections.py +2 -2
- lsst/pipe/base/exec_fixup_data_id.py +131 -0
- lsst/pipe/base/execution_graph_fixup.py +69 -0
- lsst/pipe/base/graph/graphSummary.py +4 -4
- lsst/pipe/base/log_capture.py +227 -0
- lsst/pipe/base/mp_graph_executor.py +786 -0
- lsst/pipe/base/pipeline_graph/_pipeline_graph.py +40 -10
- lsst/pipe/base/pipeline_graph/_tasks.py +106 -0
- lsst/pipe/base/pipeline_graph/io.py +1 -1
- lsst/pipe/base/quantum_graph_builder.py +85 -58
- lsst/pipe/base/quantum_graph_executor.py +125 -0
- lsst/pipe/base/quantum_graph_skeleton.py +60 -1
- lsst/pipe/base/quantum_reports.py +334 -0
- lsst/pipe/base/script/transfer_from_graph.py +4 -1
- lsst/pipe/base/separable_pipeline_executor.py +296 -0
- lsst/pipe/base/simple_pipeline_executor.py +674 -0
- lsst/pipe/base/single_quantum_executor.py +635 -0
- lsst/pipe/base/taskFactory.py +18 -12
- lsst/pipe/base/tests/in_memory_limited_butler.py +223 -0
- lsst/pipe/base/tests/mocks/__init__.py +1 -0
- lsst/pipe/base/tests/mocks/_in_memory_repo.py +357 -0
- lsst/pipe/base/tests/mocks/_pipeline_task.py +19 -2
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/METADATA +1 -1
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/RECORD +38 -28
- lsst/pipe/base/executionButlerBuilder.py +0 -493
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/WHEEL +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/zip-safe +0 -0
|
@@ -150,8 +150,10 @@ class PipelineGraph:
|
|
|
150
150
|
self._description = value
|
|
151
151
|
|
|
152
152
|
@property
|
|
153
|
-
def universe(self) -> DimensionUniverse
|
|
153
|
+
def universe(self) -> DimensionUniverse:
|
|
154
154
|
"""Definitions for all butler dimensions."""
|
|
155
|
+
if self._universe is None:
|
|
156
|
+
raise UnresolvedGraphError("Pipeline graph is not resolved.")
|
|
155
157
|
return self._universe
|
|
156
158
|
|
|
157
159
|
@property
|
|
@@ -159,7 +161,7 @@ class PipelineGraph:
|
|
|
159
161
|
"""Data ID that represents a constraint on all quanta generated from
|
|
160
162
|
this pipeline.
|
|
161
163
|
|
|
162
|
-
This is may not be available unless
|
|
164
|
+
This is may not be available unless the graph is resolved.
|
|
163
165
|
"""
|
|
164
166
|
return DataCoordinate.standardize(self._raw_data_id, universe=self.universe)
|
|
165
167
|
|
|
@@ -305,7 +307,7 @@ class PipelineGraph:
|
|
|
305
307
|
for k, v in self._task_subsets.items()
|
|
306
308
|
},
|
|
307
309
|
description=self._description,
|
|
308
|
-
universe=self.
|
|
310
|
+
universe=self._universe,
|
|
309
311
|
data_id=self._raw_data_id,
|
|
310
312
|
step_definitions=step_definitions,
|
|
311
313
|
)
|
|
@@ -774,7 +776,7 @@ class PipelineGraph:
|
|
|
774
776
|
key=NodeKey(NodeType.TASK, label),
|
|
775
777
|
init_key=NodeKey(NodeType.TASK_INIT, label),
|
|
776
778
|
data=_TaskNodeImportedData.configure(label, task_class, config, connections),
|
|
777
|
-
universe=self.
|
|
779
|
+
universe=self._universe,
|
|
778
780
|
)
|
|
779
781
|
self.add_task_nodes([task_node])
|
|
780
782
|
return task_node
|
|
@@ -1571,9 +1573,9 @@ class PipelineGraph:
|
|
|
1571
1573
|
|
|
1572
1574
|
Returns
|
|
1573
1575
|
-------
|
|
1574
|
-
groups : `dict` [
|
|
1575
|
-
A dictionary of groups keyed by
|
|
1576
|
-
value is a tuple of:
|
|
1576
|
+
groups : `dict` [ `~lsst.daf.butler.DimensionGroup`, `tuple` ]
|
|
1577
|
+
A dictionary of groups keyed by `~lsst.daf.butler.DimensionGroup`,
|
|
1578
|
+
in which each value is a tuple of:
|
|
1577
1579
|
|
|
1578
1580
|
- a `dict` of `TaskNode` instances, keyed by task label
|
|
1579
1581
|
- a `dict` of `DatasetTypeNode` instances, keyed by
|
|
@@ -1605,6 +1607,25 @@ class PipelineGraph:
|
|
|
1605
1607
|
group[1][dataset_type_node.name] = dataset_type_node
|
|
1606
1608
|
return result
|
|
1607
1609
|
|
|
1610
|
+
def get_all_dimensions(self, prerequisites: bool = True) -> DimensionGroup:
|
|
1611
|
+
"""Return all dimensions used in this graph's tasks and dataset types.
|
|
1612
|
+
|
|
1613
|
+
Parameters
|
|
1614
|
+
----------
|
|
1615
|
+
prerequisites : `bool`, optional
|
|
1616
|
+
If `False`, do not include the dimensions that are only used by
|
|
1617
|
+
prerequisite input dataset types.
|
|
1618
|
+
|
|
1619
|
+
Returns
|
|
1620
|
+
-------
|
|
1621
|
+
dimensions : `~lsst.daf.butler.DimensionGroup`.
|
|
1622
|
+
All dimensions in this pipeline.
|
|
1623
|
+
"""
|
|
1624
|
+
return DimensionGroup.union(
|
|
1625
|
+
*self.group_by_dimensions(prerequisites=prerequisites).keys(),
|
|
1626
|
+
universe=self.universe,
|
|
1627
|
+
)
|
|
1628
|
+
|
|
1608
1629
|
def split_independent(self) -> Iterable[PipelineGraph]:
|
|
1609
1630
|
"""Iterate over independent subgraphs that together comprise this
|
|
1610
1631
|
pipeline graph.
|
|
@@ -1668,11 +1689,13 @@ class PipelineGraph:
|
|
|
1668
1689
|
not considered part of the pipeline graph in other respects, but it
|
|
1669
1690
|
does get written with other provenance datasets.
|
|
1670
1691
|
"""
|
|
1671
|
-
if self.
|
|
1692
|
+
if self._universe is None:
|
|
1672
1693
|
raise UnresolvedGraphError(
|
|
1673
1694
|
"PipelineGraph must be resolved in order to get the packages dataset type."
|
|
1674
1695
|
)
|
|
1675
|
-
return DatasetType(
|
|
1696
|
+
return DatasetType(
|
|
1697
|
+
PACKAGES_INIT_OUTPUT_NAME, self._universe.empty, PACKAGES_INIT_OUTPUT_STORAGE_CLASS
|
|
1698
|
+
)
|
|
1676
1699
|
|
|
1677
1700
|
def register_dataset_types(self, butler: Butler, include_packages: bool = True) -> None:
|
|
1678
1701
|
"""Register all dataset types in a data repository.
|
|
@@ -1767,6 +1790,7 @@ class PipelineGraph:
|
|
|
1767
1790
|
self,
|
|
1768
1791
|
get_init_input: Callable[[DatasetType], Any] | None = None,
|
|
1769
1792
|
init_outputs: list[tuple[Any, DatasetType]] | None = None,
|
|
1793
|
+
labels: Iterable[str] | None = None,
|
|
1770
1794
|
) -> list[PipelineTask]:
|
|
1771
1795
|
"""Instantiate all tasks in the pipeline.
|
|
1772
1796
|
|
|
@@ -1785,6 +1809,9 @@ class PipelineGraph:
|
|
|
1785
1809
|
correspond to the storage class of the output connection, which
|
|
1786
1810
|
may not be the same as the storage class on the graph's dataset
|
|
1787
1811
|
type node.
|
|
1812
|
+
labels : `~collections.abc.Iterable` [ `str` ], optional
|
|
1813
|
+
The labels of tasks to instantiate. If not provided, all tasks in
|
|
1814
|
+
the graph will be instantiated.
|
|
1788
1815
|
|
|
1789
1816
|
Returns
|
|
1790
1817
|
-------
|
|
@@ -1793,10 +1820,13 @@ class PipelineGraph:
|
|
|
1793
1820
|
"""
|
|
1794
1821
|
if not self.is_fully_resolved:
|
|
1795
1822
|
raise UnresolvedGraphError("Pipeline graph must be fully resolved before instantiating tasks.")
|
|
1796
|
-
empty_data_id = DataCoordinate.make_empty(
|
|
1823
|
+
empty_data_id = DataCoordinate.make_empty(self.universe)
|
|
1824
|
+
labels = set(labels) if labels is not None else self.tasks.keys()
|
|
1797
1825
|
handles: dict[str, InMemoryDatasetHandle] = {}
|
|
1798
1826
|
tasks: list[PipelineTask] = []
|
|
1799
1827
|
for task_node in self.tasks.values():
|
|
1828
|
+
if task_node.label not in labels:
|
|
1829
|
+
continue
|
|
1800
1830
|
task_init_inputs: dict[str, Any] = {}
|
|
1801
1831
|
for read_edge in task_node.init.inputs.values():
|
|
1802
1832
|
if (handle := handles.get(read_edge.dataset_type_name)) is not None:
|
|
@@ -360,6 +360,57 @@ class TaskInitNode:
|
|
|
360
360
|
yield from self.outputs.values()
|
|
361
361
|
yield self.config_output
|
|
362
362
|
|
|
363
|
+
def get_input_edge(self, connection_name: str) -> ReadEdge:
|
|
364
|
+
"""Look up an input edge by connection name.
|
|
365
|
+
|
|
366
|
+
Parameters
|
|
367
|
+
----------
|
|
368
|
+
connection_name : `str`
|
|
369
|
+
Name of the connection.
|
|
370
|
+
|
|
371
|
+
Returns
|
|
372
|
+
-------
|
|
373
|
+
edge : `ReadEdge`
|
|
374
|
+
Input edge.
|
|
375
|
+
"""
|
|
376
|
+
return self.inputs[connection_name]
|
|
377
|
+
|
|
378
|
+
def get_output_edge(self, connection_name: str) -> WriteEdge:
|
|
379
|
+
"""Look up an output edge by connection name.
|
|
380
|
+
|
|
381
|
+
Parameters
|
|
382
|
+
----------
|
|
383
|
+
connection_name : `str`
|
|
384
|
+
Name of the connection.
|
|
385
|
+
|
|
386
|
+
Returns
|
|
387
|
+
-------
|
|
388
|
+
edge : `WriteEdge`
|
|
389
|
+
Output edge.
|
|
390
|
+
"""
|
|
391
|
+
if connection_name == acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME:
|
|
392
|
+
return self.config_output
|
|
393
|
+
return self.outputs[connection_name]
|
|
394
|
+
|
|
395
|
+
def get_edge(self, connection_name: str) -> Edge:
|
|
396
|
+
"""Look up an edge by connection name.
|
|
397
|
+
|
|
398
|
+
Parameters
|
|
399
|
+
----------
|
|
400
|
+
connection_name : `str`
|
|
401
|
+
Name of the connection.
|
|
402
|
+
|
|
403
|
+
Returns
|
|
404
|
+
-------
|
|
405
|
+
edge : `Edge`
|
|
406
|
+
Edge.
|
|
407
|
+
"""
|
|
408
|
+
try:
|
|
409
|
+
return self.get_input_edge(connection_name)
|
|
410
|
+
except KeyError:
|
|
411
|
+
pass
|
|
412
|
+
return self.get_output_edge(connection_name)
|
|
413
|
+
|
|
363
414
|
def diff_edges(self, other: TaskInitNode) -> list[str]:
|
|
364
415
|
"""Compare the edges of this task initialization node to those from the
|
|
365
416
|
same task label in a different pipeline.
|
|
@@ -742,6 +793,61 @@ class TaskNode:
|
|
|
742
793
|
if self.log_output is not None:
|
|
743
794
|
yield self.log_output
|
|
744
795
|
|
|
796
|
+
def get_input_edge(self, connection_name: str) -> ReadEdge:
|
|
797
|
+
"""Look up an input edge by connection name.
|
|
798
|
+
|
|
799
|
+
Parameters
|
|
800
|
+
----------
|
|
801
|
+
connection_name : `str`
|
|
802
|
+
Name of the connection.
|
|
803
|
+
|
|
804
|
+
Returns
|
|
805
|
+
-------
|
|
806
|
+
edge : `ReadEdge`
|
|
807
|
+
Input edge.
|
|
808
|
+
"""
|
|
809
|
+
return self.inputs[connection_name]
|
|
810
|
+
|
|
811
|
+
def get_output_edge(self, connection_name: str) -> WriteEdge:
|
|
812
|
+
"""Look up an output edge by connection name.
|
|
813
|
+
|
|
814
|
+
Parameters
|
|
815
|
+
----------
|
|
816
|
+
connection_name : `str`
|
|
817
|
+
Name of the connection.
|
|
818
|
+
|
|
819
|
+
Returns
|
|
820
|
+
-------
|
|
821
|
+
edge : `WriteEdge`
|
|
822
|
+
Output edge.
|
|
823
|
+
"""
|
|
824
|
+
if connection_name == acc.METADATA_OUTPUT_CONNECTION_NAME:
|
|
825
|
+
return self.metadata_output
|
|
826
|
+
if connection_name == acc.LOG_OUTPUT_CONNECTION_NAME:
|
|
827
|
+
if self.log_output is None:
|
|
828
|
+
raise KeyError(connection_name)
|
|
829
|
+
return self.log_output
|
|
830
|
+
return self.outputs[connection_name]
|
|
831
|
+
|
|
832
|
+
def get_edge(self, connection_name: str) -> Edge:
|
|
833
|
+
"""Look up an edge by connection name.
|
|
834
|
+
|
|
835
|
+
Parameters
|
|
836
|
+
----------
|
|
837
|
+
connection_name : `str`
|
|
838
|
+
Name of the connection.
|
|
839
|
+
|
|
840
|
+
Returns
|
|
841
|
+
-------
|
|
842
|
+
edge : `Edge`
|
|
843
|
+
Edge.
|
|
844
|
+
"""
|
|
845
|
+
try:
|
|
846
|
+
return self.get_input_edge(connection_name)
|
|
847
|
+
except KeyError:
|
|
848
|
+
pass
|
|
849
|
+
return self.get_output_edge(connection_name)
|
|
850
|
+
|
|
745
851
|
def diff_edges(self, other: TaskNode) -> list[str]:
|
|
746
852
|
"""Compare the edges of this task node to those from the same task
|
|
747
853
|
label in a different pipeline.
|
|
@@ -713,7 +713,7 @@ class SerializedPipelineGraph(pydantic.BaseModel):
|
|
|
713
713
|
},
|
|
714
714
|
step_labels=list(target.steps),
|
|
715
715
|
steps_verified=target.steps.verified,
|
|
716
|
-
dimensions=target.
|
|
716
|
+
dimensions=target._universe.dimensionConfig.toDict() if target._universe is not None else None,
|
|
717
717
|
data_id=target._raw_data_id,
|
|
718
718
|
)
|
|
719
719
|
if target._sorted_keys:
|
|
@@ -55,6 +55,7 @@ from lsst.daf.butler import (
|
|
|
55
55
|
NamedKeyMapping,
|
|
56
56
|
Quantum,
|
|
57
57
|
)
|
|
58
|
+
from lsst.daf.butler.datastore.record_data import DatastoreRecordData
|
|
58
59
|
from lsst.daf.butler.registry import MissingCollectionError, MissingDatasetTypeError
|
|
59
60
|
from lsst.utils.logging import LsstLogAdapter, getLogger
|
|
60
61
|
from lsst.utils.timer import timeMethod
|
|
@@ -103,13 +104,13 @@ class InitInputMissingError(QuantumGraphBuilderError):
|
|
|
103
104
|
|
|
104
105
|
|
|
105
106
|
class QuantumGraphBuilder(ABC):
|
|
106
|
-
"""An abstract base class for building
|
|
107
|
+
"""An abstract base class for building `.QuantumGraph` objects from a
|
|
107
108
|
pipeline.
|
|
108
109
|
|
|
109
110
|
Parameters
|
|
110
111
|
----------
|
|
111
112
|
pipeline_graph : `.pipeline_graph.PipelineGraph`
|
|
112
|
-
Pipeline to build a
|
|
113
|
+
Pipeline to build a `.QuantumGraph` from, as a graph. Will be resolved
|
|
113
114
|
in-place with the given butler (any existing resolution is ignored).
|
|
114
115
|
butler : `lsst.daf.butler.Butler`
|
|
115
116
|
Client for the data repository. Should be read-only.
|
|
@@ -139,7 +140,7 @@ class QuantumGraphBuilder(ABC):
|
|
|
139
140
|
The `build` method splits the pipeline graph into independent subgraphs,
|
|
140
141
|
then calls the abstract method `process_subgraph` on each, to allow
|
|
141
142
|
concrete implementations to populate the rough graph structure (the
|
|
142
|
-
|
|
143
|
+
`~.quantum_graph_skeleton.QuantumGraphSkeleton` class), including searching
|
|
143
144
|
for existing datasets. The `build` method then:
|
|
144
145
|
|
|
145
146
|
- assembles `lsst.daf.butler.Quantum` instances from all data IDs in the
|
|
@@ -321,7 +322,7 @@ class QuantumGraphBuilder(ABC):
|
|
|
321
322
|
|
|
322
323
|
Returns
|
|
323
324
|
-------
|
|
324
|
-
quantum_graph :
|
|
325
|
+
quantum_graph : `.QuantumGraph`
|
|
325
326
|
DAG describing processing to be performed.
|
|
326
327
|
|
|
327
328
|
Notes
|
|
@@ -373,7 +374,7 @@ class QuantumGraphBuilder(ABC):
|
|
|
373
374
|
@abstractmethod
|
|
374
375
|
def process_subgraph(self, subgraph: PipelineGraph) -> QuantumGraphSkeleton:
|
|
375
376
|
"""Build the rough structure for an independent subset of the
|
|
376
|
-
|
|
377
|
+
`.QuantumGraph` and query for relevant existing datasets.
|
|
377
378
|
|
|
378
379
|
Parameters
|
|
379
380
|
----------
|
|
@@ -384,39 +385,38 @@ class QuantumGraphBuilder(ABC):
|
|
|
384
385
|
|
|
385
386
|
Returns
|
|
386
387
|
-------
|
|
387
|
-
skeleton :
|
|
388
|
+
skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
|
|
388
389
|
Class representing an initial quantum graph. See
|
|
389
|
-
|
|
390
|
+
`.quantum_graph_skeleton.QuantumGraphSkeleton` docs for details.
|
|
390
391
|
After this is returned, the object may be modified in-place in
|
|
391
392
|
unspecified ways.
|
|
392
393
|
|
|
393
394
|
Notes
|
|
394
395
|
-----
|
|
395
|
-
The
|
|
396
|
-
`DatasetRef` objects with nodes for existing datasets.
|
|
397
|
-
particular:
|
|
396
|
+
The `.quantum_graph_skeleton.QuantumGraphSkeleton` should associate
|
|
397
|
+
`lsst.daf.butler.DatasetRef` objects with nodes for existing datasets.
|
|
398
|
+
In particular:
|
|
398
399
|
|
|
399
|
-
-
|
|
400
|
-
used to associate existing datasets with all overall-input dataset
|
|
400
|
+
- `.quantum_graph_skeleton.QuantumGraphSkeleton.set_dataset_ref` must
|
|
401
|
+
be used to associate existing datasets with all overall-input dataset
|
|
401
402
|
nodes in the skeleton by querying `input_collections`. This includes
|
|
402
403
|
all standard input nodes and any prerequisite nodes added by the
|
|
403
404
|
method (prerequisite nodes may also be left out entirely, as the base
|
|
404
405
|
class can add them later, albeit possibly less efficiently).
|
|
405
|
-
-
|
|
406
|
+
- `.quantum_graph_skeleton.QuantumGraphSkeleton.set_output_for_skip`
|
|
406
407
|
must be used to associate existing datasets with output dataset nodes
|
|
407
408
|
by querying `skip_existing_in`.
|
|
408
|
-
-
|
|
409
|
+
- `.quantum_graph_skeleton.QuantumGraphSkeleton.add_output_in_the_way`
|
|
409
410
|
must be used to associated existing outputs with output dataset nodes
|
|
410
|
-
by querying `output_run` if `output_run_exists` is `True`.
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
be skipped.
|
|
411
|
+
by querying `output_run` if `output_run_exists` is `True`. Note that
|
|
412
|
+
the presence of such datasets is not automatically an error, even if
|
|
413
|
+
`clobber` is `False`, as these may be quanta that will be skipped.
|
|
414
414
|
|
|
415
|
-
`DatasetRef` objects for existing datasets with empty
|
|
416
|
-
of the above categories may be found in the
|
|
417
|
-
attribute, as these are queried for prior
|
|
418
|
-
class, but associating them with graph nodes
|
|
419
|
-
responsibility.
|
|
415
|
+
`lsst.daf.butler.DatasetRef` objects for existing datasets with empty
|
|
416
|
+
data IDs in all of the above categories may be found in the
|
|
417
|
+
`empty_dimensions_datasets` attribute, as these are queried for prior
|
|
418
|
+
to this call by the base class, but associating them with graph nodes
|
|
419
|
+
is still this method's responsibility.
|
|
420
420
|
|
|
421
421
|
Dataset types should never be components and should always use the
|
|
422
422
|
"common" storage class definition in `pipeline_graph.DatasetTypeNode`
|
|
@@ -435,16 +435,17 @@ class QuantumGraphBuilder(ABC):
|
|
|
435
435
|
----------
|
|
436
436
|
task_node : `pipeline_graph.TaskNode`
|
|
437
437
|
Node for this task in the pipeline graph.
|
|
438
|
-
skeleton :
|
|
438
|
+
skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
|
|
439
439
|
Preliminary quantum graph, to be modified in-place.
|
|
440
440
|
|
|
441
441
|
Notes
|
|
442
442
|
-----
|
|
443
443
|
This method modifies ``skeleton`` in-place in several ways:
|
|
444
444
|
|
|
445
|
-
- It associates a `DatasetRef` with all output datasets
|
|
446
|
-
dataset nodes that do not have a
|
|
447
|
-
|
|
445
|
+
- It associates a `lsst.daf.butler.DatasetRef` with all output datasets
|
|
446
|
+
and drops input dataset nodes that do not have a
|
|
447
|
+
`lsst.daf.butler.DatasetRef` already. This ensures producing and
|
|
448
|
+
consuming tasks start from the same `lsst.daf.butler.DatasetRef`.
|
|
448
449
|
- It adds "inputs", "outputs", and "init_inputs" attributes to the
|
|
449
450
|
quantum nodes, holding the same `NamedValueMapping` objects needed to
|
|
450
451
|
construct an actual `Quantum` instances.
|
|
@@ -596,7 +597,7 @@ class QuantumGraphBuilder(ABC):
|
|
|
596
597
|
Node for this task in the pipeline graph.
|
|
597
598
|
quantum_key : `QuantumKey`
|
|
598
599
|
Identifier for this quantum in the graph.
|
|
599
|
-
skeleton :
|
|
600
|
+
skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
|
|
600
601
|
Preliminary quantum graph, to be modified in-place.
|
|
601
602
|
|
|
602
603
|
Returns
|
|
@@ -611,9 +612,10 @@ class QuantumGraphBuilder(ABC):
|
|
|
611
612
|
`skip_existing_in` collections, the quantum will be skipped. This
|
|
612
613
|
causes the quantum node to be removed from the graph. Dataset nodes
|
|
613
614
|
that were previously the outputs of this quantum will be associated
|
|
614
|
-
with `DatasetRef` objects that were found in
|
|
615
|
-
will be removed if there is no such dataset
|
|
616
|
-
in `output_run` will be removed from the
|
|
615
|
+
with `lsst.daf.butler.DatasetRef` objects that were found in
|
|
616
|
+
``skip_existing_in``, or will be removed if there is no such dataset
|
|
617
|
+
there. Any output dataset in `output_run` will be removed from the
|
|
618
|
+
"output in the way" category.
|
|
617
619
|
"""
|
|
618
620
|
metadata_dataset_key = DatasetKey(
|
|
619
621
|
task_node.metadata_output.parent_dataset_type_name, quantum_key.data_id_values
|
|
@@ -659,7 +661,7 @@ class QuantumGraphBuilder(ABC):
|
|
|
659
661
|
----------
|
|
660
662
|
quantum_key : `QuantumKey`
|
|
661
663
|
Identifier for this quantum in the graph.
|
|
662
|
-
skeleton :
|
|
664
|
+
skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
|
|
663
665
|
Preliminary quantum graph, to be modified in-place.
|
|
664
666
|
task_prerequisite_info : `~prerequisite_helpers.PrerequisiteInfo`
|
|
665
667
|
Information about the prerequisite inputs to this task.
|
|
@@ -679,7 +681,7 @@ class QuantumGraphBuilder(ABC):
|
|
|
679
681
|
the original there). If `clobber` is `False`, `RuntimeError` is
|
|
680
682
|
raised. If there is no output already present, a new one with a random
|
|
681
683
|
UUID is generated. In all cases the dataset node in the skeleton is
|
|
682
|
-
associated with a `DatasetRef`.
|
|
684
|
+
associated with a `lsst.daf.butler.DatasetRef`.
|
|
683
685
|
"""
|
|
684
686
|
dataset_key: DatasetKey | PrerequisiteDatasetKey
|
|
685
687
|
for dataset_key in skeleton.iter_outputs_of(quantum_key):
|
|
@@ -743,7 +745,7 @@ class QuantumGraphBuilder(ABC):
|
|
|
743
745
|
Node for this task in the pipeline graph.
|
|
744
746
|
quantum_key : `QuantumKey`
|
|
745
747
|
Identifier for this quantum in the graph.
|
|
746
|
-
skeleton :
|
|
748
|
+
skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
|
|
747
749
|
Preliminary quantum graph, to be modified in-place.
|
|
748
750
|
|
|
749
751
|
Returns
|
|
@@ -787,7 +789,7 @@ class QuantumGraphBuilder(ABC):
|
|
|
787
789
|
Node for this task in the pipeline graph.
|
|
788
790
|
quantum_key : `QuantumKey`
|
|
789
791
|
Identifier for this quantum in the graph.
|
|
790
|
-
skeleton :
|
|
792
|
+
skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
|
|
791
793
|
Preliminary quantum graph, to be modified in-place.
|
|
792
794
|
skypix_bounds_builder : `~prerequisite_helpers.SkyPixBoundsBuilder`
|
|
793
795
|
An object that accumulates the appropriate spatial bounds for a
|
|
@@ -806,8 +808,8 @@ class QuantumGraphBuilder(ABC):
|
|
|
806
808
|
Notes
|
|
807
809
|
-----
|
|
808
810
|
This method trims input dataset nodes that are not already associated
|
|
809
|
-
with a `DatasetRef`, and queries for prerequisite input
|
|
810
|
-
not exist.
|
|
811
|
+
with a `lsst.daf.butler.DatasetRef`, and queries for prerequisite input
|
|
812
|
+
nodes that do not exist.
|
|
811
813
|
"""
|
|
812
814
|
inputs_by_type: dict[str, set[DatasetRef]] = {}
|
|
813
815
|
dataset_key: DatasetKey | PrerequisiteDatasetKey
|
|
@@ -987,7 +989,7 @@ class QuantumGraphBuilder(ABC):
|
|
|
987
989
|
|
|
988
990
|
Parameters
|
|
989
991
|
----------
|
|
990
|
-
skeleton :
|
|
992
|
+
skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
|
|
991
993
|
Preliminary quantum graph to update in place.
|
|
992
994
|
|
|
993
995
|
Notes
|
|
@@ -999,31 +1001,47 @@ class QuantumGraphBuilder(ABC):
|
|
|
999
1001
|
"""
|
|
1000
1002
|
overall_inputs = skeleton.extract_overall_inputs()
|
|
1001
1003
|
exported_records = self.butler._datastore.export_records(overall_inputs.values())
|
|
1002
|
-
for
|
|
1003
|
-
|
|
1004
|
-
|
|
1004
|
+
for task_label in self._pipeline_graph.tasks:
|
|
1005
|
+
if not skeleton.has_task(task_label):
|
|
1006
|
+
continue
|
|
1007
|
+
task_init_key = skeleton.get_task_init_node(task_label)
|
|
1008
|
+
init_input_ids = {
|
|
1005
1009
|
ref.id
|
|
1006
|
-
for dataset_key in skeleton.iter_inputs_of(
|
|
1010
|
+
for dataset_key in skeleton.iter_inputs_of(task_init_key)
|
|
1007
1011
|
if (ref := overall_inputs.get(dataset_key)) is not None
|
|
1008
1012
|
}
|
|
1009
|
-
|
|
1013
|
+
init_records = {}
|
|
1014
|
+
if init_input_ids:
|
|
1010
1015
|
for datastore_name, records in exported_records.items():
|
|
1011
|
-
matching_records = records.subset(
|
|
1016
|
+
matching_records = records.subset(init_input_ids)
|
|
1012
1017
|
if matching_records is not None:
|
|
1013
|
-
|
|
1014
|
-
skeleton[
|
|
1018
|
+
init_records[datastore_name] = matching_records
|
|
1019
|
+
skeleton[task_init_key]["datastore_records"] = init_records
|
|
1020
|
+
for quantum_key in skeleton.get_quanta(task_label):
|
|
1021
|
+
quantum_records = {}
|
|
1022
|
+
input_ids = {
|
|
1023
|
+
ref.id
|
|
1024
|
+
for dataset_key in skeleton.iter_inputs_of(quantum_key)
|
|
1025
|
+
if (ref := overall_inputs.get(dataset_key)) is not None
|
|
1026
|
+
}
|
|
1027
|
+
if input_ids:
|
|
1028
|
+
for datastore_name, records in exported_records.items():
|
|
1029
|
+
matching_records = records.subset(input_ids)
|
|
1030
|
+
if matching_records is not None:
|
|
1031
|
+
quantum_records[datastore_name] = matching_records
|
|
1032
|
+
skeleton[quantum_key]["datastore_records"] = quantum_records
|
|
1015
1033
|
|
|
1016
1034
|
@final
|
|
1017
1035
|
@timeMethod
|
|
1018
1036
|
def _construct_quantum_graph(
|
|
1019
1037
|
self, skeleton: QuantumGraphSkeleton, metadata: Mapping[str, Any]
|
|
1020
1038
|
) -> QuantumGraph:
|
|
1021
|
-
"""Construct a
|
|
1022
|
-
fully-processed
|
|
1039
|
+
"""Construct a `.QuantumGraph` object from the contents of a
|
|
1040
|
+
fully-processed `.quantum_graph_skeleton.QuantumGraphSkeleton`.
|
|
1023
1041
|
|
|
1024
1042
|
Parameters
|
|
1025
1043
|
----------
|
|
1026
|
-
skeleton :
|
|
1044
|
+
skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
|
|
1027
1045
|
Preliminary quantum graph. Must have "init_inputs", "inputs", and
|
|
1028
1046
|
"outputs" attributes on all quantum nodes, as added by
|
|
1029
1047
|
`_resolve_task_quanta`, as well as a "datastore_records" attribute
|
|
@@ -1033,7 +1051,7 @@ class QuantumGraphBuilder(ABC):
|
|
|
1033
1051
|
|
|
1034
1052
|
Returns
|
|
1035
1053
|
-------
|
|
1036
|
-
quantum_graph :
|
|
1054
|
+
quantum_graph : `.QuantumGraph`
|
|
1037
1055
|
DAG describing processing to be performed.
|
|
1038
1056
|
"""
|
|
1039
1057
|
quanta: dict[TaskDef, set[Quantum]] = {}
|
|
@@ -1044,20 +1062,29 @@ class QuantumGraphBuilder(ABC):
|
|
|
1044
1062
|
continue
|
|
1045
1063
|
task_node = self._pipeline_graph.tasks[task_def.label]
|
|
1046
1064
|
task_init_key = skeleton.get_task_init_node(task_def.label)
|
|
1047
|
-
|
|
1048
|
-
|
|
1065
|
+
task_init_state = skeleton[task_init_key]
|
|
1066
|
+
init_datastore_records: dict[str, DatastoreRecordData] = task_init_state.get(
|
|
1067
|
+
"datastore_records", {}
|
|
1068
|
+
)
|
|
1069
|
+
init_inputs[task_def] = task_init_state["inputs"].values()
|
|
1070
|
+
init_outputs[task_def] = task_init_state["outputs"].values()
|
|
1049
1071
|
quanta_for_task: set[Quantum] = set()
|
|
1050
1072
|
for quantum_key in skeleton.get_quanta(task_node.label):
|
|
1051
|
-
|
|
1073
|
+
quantum_state = skeleton[quantum_key]
|
|
1074
|
+
quantum_datastore_records: dict[str, DatastoreRecordData] = quantum_state.get(
|
|
1075
|
+
"datastore_records", {}
|
|
1076
|
+
)
|
|
1052
1077
|
quanta_for_task.add(
|
|
1053
1078
|
Quantum(
|
|
1054
1079
|
taskName=task_node.task_class_name,
|
|
1055
1080
|
taskClass=task_node.task_class,
|
|
1056
|
-
dataId=
|
|
1057
|
-
initInputs=
|
|
1058
|
-
inputs=
|
|
1059
|
-
outputs=
|
|
1060
|
-
datastore_records=
|
|
1081
|
+
dataId=quantum_state["data_id"],
|
|
1082
|
+
initInputs=quantum_state["init_inputs"],
|
|
1083
|
+
inputs=quantum_state["inputs"],
|
|
1084
|
+
outputs=quantum_state["outputs"],
|
|
1085
|
+
datastore_records=DatastoreRecordData.merge_mappings(
|
|
1086
|
+
quantum_datastore_records, init_datastore_records
|
|
1087
|
+
),
|
|
1061
1088
|
)
|
|
1062
1089
|
)
|
|
1063
1090
|
quanta[task_def] = quanta_for_task
|