lsst-pipe-base 30.2026.300__py3-none-any.whl → 30.2026.400__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_instrument.py +4 -7
- lsst/pipe/base/_status.py +29 -10
- lsst/pipe/base/automatic_connection_constants.py +9 -1
- lsst/pipe/base/cli/cmd/__init__.py +16 -2
- lsst/pipe/base/cli/cmd/commands.py +42 -4
- lsst/pipe/base/connectionTypes.py +72 -160
- lsst/pipe/base/connections.py +3 -6
- lsst/pipe/base/execution_reports.py +0 -5
- lsst/pipe/base/pipeline.py +3 -4
- lsst/pipe/base/pipelineIR.py +0 -6
- lsst/pipe/base/pipelineTask.py +5 -7
- lsst/pipe/base/pipeline_graph/_edges.py +19 -7
- lsst/pipe/base/pipeline_graph/_pipeline_graph.py +8 -0
- lsst/pipe/base/quantum_graph/_common.py +3 -1
- lsst/pipe/base/quantum_graph/_predicted.py +7 -0
- lsst/pipe/base/quantum_graph/_provenance.py +87 -37
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +9 -1
- lsst/pipe/base/quantum_graph/aggregator/_config.py +78 -9
- lsst/pipe/base/quantum_graph/aggregator/_ingester.py +12 -11
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +14 -6
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +3 -3
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +14 -13
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +2 -2
- lsst/pipe/base/quantum_graph/formatter.py +70 -0
- lsst/pipe/base/quantum_graph/ingest_graph.py +356 -0
- lsst/pipe/base/quantum_provenance_graph.py +17 -2
- lsst/pipe/base/separable_pipeline_executor.py +5 -6
- lsst/pipe/base/single_quantum_executor.py +6 -6
- lsst/pipe/base/struct.py +4 -0
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/METADATA +2 -1
- {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/RECORD +40 -39
- {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/WHEEL +1 -1
- {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/zip-safe +0 -0
|
@@ -897,6 +897,10 @@ class PipelineGraph:
|
|
|
897
897
|
New config objects or overrides to apply to copies of the current
|
|
898
898
|
config objects, with task labels as the keywords.
|
|
899
899
|
|
|
900
|
+
Returns
|
|
901
|
+
-------
|
|
902
|
+
None
|
|
903
|
+
|
|
900
904
|
Raises
|
|
901
905
|
------
|
|
902
906
|
ValueError
|
|
@@ -1755,6 +1759,10 @@ class PipelineGraph:
|
|
|
1755
1759
|
not considered part of the pipeline graph in other respects, but it
|
|
1756
1760
|
does get written with other provenance datasets).
|
|
1757
1761
|
|
|
1762
|
+
Returns
|
|
1763
|
+
-------
|
|
1764
|
+
None
|
|
1765
|
+
|
|
1758
1766
|
Raises
|
|
1759
1767
|
------
|
|
1760
1768
|
lsst.daf.butler.MissingDatasetTypeError
|
|
@@ -453,8 +453,10 @@ class BaseQuantumGraphWriter:
|
|
|
453
453
|
cdict_data: bytes | None = None,
|
|
454
454
|
zstd_level: int = 10,
|
|
455
455
|
) -> Iterator[Self]:
|
|
456
|
-
uri = ResourcePath(uri)
|
|
456
|
+
uri = ResourcePath(uri, forceDirectory=False)
|
|
457
457
|
address_writer = AddressWriter()
|
|
458
|
+
if uri.isLocal:
|
|
459
|
+
os.makedirs(uri.dirname().ospath, exist_ok=True)
|
|
458
460
|
cdict = zstandard.ZstdCompressionDict(cdict_data) if cdict_data is not None else None
|
|
459
461
|
compressor = zstandard.ZstdCompressor(level=zstd_level, dict_data=cdict)
|
|
460
462
|
with uri.open(mode="wb") as stream:
|
|
@@ -110,6 +110,13 @@ if TYPE_CHECKING:
|
|
|
110
110
|
from ..config import PipelineTaskConfig
|
|
111
111
|
from ..graph import QgraphSummary, QuantumGraph
|
|
112
112
|
|
|
113
|
+
# Sphinx needs imports for type annotations of base class members.
|
|
114
|
+
if "sphinx" in sys.modules:
|
|
115
|
+
import zipfile # noqa: F401
|
|
116
|
+
|
|
117
|
+
from ._multiblock import AddressReader, Decompressor # noqa: F401
|
|
118
|
+
|
|
119
|
+
|
|
113
120
|
_LOG = logging.getLogger(__name__)
|
|
114
121
|
|
|
115
122
|
|
|
@@ -94,6 +94,13 @@ from ._predicted import (
|
|
|
94
94
|
PredictedQuantumGraphComponents,
|
|
95
95
|
)
|
|
96
96
|
|
|
97
|
+
# Sphinx needs imports for type annotations of base class members.
|
|
98
|
+
if "sphinx" in sys.modules:
|
|
99
|
+
import zipfile # noqa: F401
|
|
100
|
+
|
|
101
|
+
from ._multiblock import AddressReader, Decompressor # noqa: F401
|
|
102
|
+
|
|
103
|
+
|
|
97
104
|
_T = TypeVar("_T")
|
|
98
105
|
|
|
99
106
|
LoopWrapper: TypeAlias = Callable[[Iterable[_T]], Iterable[_T]]
|
|
@@ -186,6 +193,12 @@ class ProvenanceQuantumInfo(QuantumInfo):
|
|
|
186
193
|
failure.
|
|
187
194
|
"""
|
|
188
195
|
|
|
196
|
+
metadata_id: uuid.UUID
|
|
197
|
+
"""ID of this quantum's metadata dataset."""
|
|
198
|
+
|
|
199
|
+
log_id: uuid.UUID
|
|
200
|
+
"""ID of this quantum's log dataset."""
|
|
201
|
+
|
|
189
202
|
|
|
190
203
|
class ProvenanceInitQuantumInfo(TypedDict):
|
|
191
204
|
"""A typed dictionary that annotates the attributes of the NetworkX graph
|
|
@@ -212,6 +225,9 @@ class ProvenanceInitQuantumInfo(TypedDict):
|
|
|
212
225
|
pipeline_node: TaskInitNode
|
|
213
226
|
"""Node in the pipeline graph for this task's init-only step."""
|
|
214
227
|
|
|
228
|
+
config_id: uuid.UUID
|
|
229
|
+
"""ID of this task's config dataset."""
|
|
230
|
+
|
|
215
231
|
|
|
216
232
|
class ProvenanceDatasetModel(PredictedDatasetModel):
|
|
217
233
|
"""Data model for the datasets in a provenance quantum graph file."""
|
|
@@ -646,6 +662,8 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
|
|
|
646
662
|
resource_usage=last_attempt.resource_usage,
|
|
647
663
|
attempts=self.attempts,
|
|
648
664
|
)
|
|
665
|
+
graph._quanta_by_task_label[self.task_label][data_id] = self.quantum_id
|
|
666
|
+
graph._quantum_only_xgraph.add_node(self.quantum_id, **graph._bipartite_xgraph.nodes[self.quantum_id])
|
|
649
667
|
for connection_name, dataset_ids in self.inputs.items():
|
|
650
668
|
read_edge = task_node.get_input_edge(connection_name)
|
|
651
669
|
for dataset_id in dataset_ids:
|
|
@@ -655,6 +673,30 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
|
|
|
655
673
|
).append(read_edge)
|
|
656
674
|
for connection_name, dataset_ids in self.outputs.items():
|
|
657
675
|
write_edge = task_node.get_output_edge(connection_name)
|
|
676
|
+
if connection_name == acc.METADATA_OUTPUT_CONNECTION_NAME:
|
|
677
|
+
graph._bipartite_xgraph.add_node(
|
|
678
|
+
dataset_ids[0],
|
|
679
|
+
data_id=data_id,
|
|
680
|
+
dataset_type_name=write_edge.dataset_type_name,
|
|
681
|
+
pipeline_node=graph.pipeline_graph.dataset_types[write_edge.dataset_type_name],
|
|
682
|
+
run=graph.header.output_run,
|
|
683
|
+
produced=last_attempt.status.has_metadata,
|
|
684
|
+
)
|
|
685
|
+
graph._datasets_by_type[write_edge.dataset_type_name][data_id] = dataset_ids[0]
|
|
686
|
+
graph._bipartite_xgraph.nodes[self.quantum_id]["metadata_id"] = dataset_ids[0]
|
|
687
|
+
graph._quantum_only_xgraph.nodes[self.quantum_id]["metadata_id"] = dataset_ids[0]
|
|
688
|
+
if connection_name == acc.LOG_OUTPUT_CONNECTION_NAME:
|
|
689
|
+
graph._bipartite_xgraph.add_node(
|
|
690
|
+
dataset_ids[0],
|
|
691
|
+
data_id=data_id,
|
|
692
|
+
dataset_type_name=write_edge.dataset_type_name,
|
|
693
|
+
pipeline_node=graph.pipeline_graph.dataset_types[write_edge.dataset_type_name],
|
|
694
|
+
run=graph.header.output_run,
|
|
695
|
+
produced=last_attempt.status.has_log,
|
|
696
|
+
)
|
|
697
|
+
graph._datasets_by_type[write_edge.dataset_type_name][data_id] = dataset_ids[0]
|
|
698
|
+
graph._bipartite_xgraph.nodes[self.quantum_id]["log_id"] = dataset_ids[0]
|
|
699
|
+
graph._quantum_only_xgraph.nodes[self.quantum_id]["log_id"] = dataset_ids[0]
|
|
658
700
|
for dataset_id in dataset_ids:
|
|
659
701
|
graph._bipartite_xgraph.add_edge(
|
|
660
702
|
self.quantum_id,
|
|
@@ -663,8 +705,6 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
|
|
|
663
705
|
# There can only be one pipeline edge for an output.
|
|
664
706
|
pipeline_edges=[write_edge],
|
|
665
707
|
)
|
|
666
|
-
graph._quanta_by_task_label[self.task_label][data_id] = self.quantum_id
|
|
667
|
-
graph._quantum_only_xgraph.add_node(self.quantum_id, **graph._bipartite_xgraph.nodes[self.quantum_id])
|
|
668
708
|
for dataset_id in graph._bipartite_xgraph.predecessors(self.quantum_id):
|
|
669
709
|
for upstream_quantum_id in graph._bipartite_xgraph.predecessors(dataset_id):
|
|
670
710
|
graph._quantum_only_xgraph.add_edge(upstream_quantum_id, self.quantum_id)
|
|
@@ -803,6 +843,15 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
|
|
|
803
843
|
).append(read_edge)
|
|
804
844
|
for connection_name, dataset_id in self.outputs.items():
|
|
805
845
|
write_edge = task_init_node.get_output_edge(connection_name)
|
|
846
|
+
graph._bipartite_xgraph.add_node(
|
|
847
|
+
dataset_id,
|
|
848
|
+
data_id=empty_data_id,
|
|
849
|
+
dataset_type_name=write_edge.dataset_type_name,
|
|
850
|
+
pipeline_node=graph.pipeline_graph.dataset_types[write_edge.dataset_type_name],
|
|
851
|
+
run=graph.header.output_run,
|
|
852
|
+
produced=True,
|
|
853
|
+
)
|
|
854
|
+
graph._datasets_by_type[write_edge.dataset_type_name][empty_data_id] = dataset_id
|
|
806
855
|
graph._bipartite_xgraph.add_edge(
|
|
807
856
|
self.quantum_id,
|
|
808
857
|
dataset_id,
|
|
@@ -810,6 +859,8 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
|
|
|
810
859
|
# There can only be one pipeline edge for an output.
|
|
811
860
|
pipeline_edges=[write_edge],
|
|
812
861
|
)
|
|
862
|
+
if write_edge.connection_name == acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME:
|
|
863
|
+
graph._bipartite_xgraph.nodes[self.quantum_id]["config_id"] = dataset_id
|
|
813
864
|
graph._init_quanta[self.task_label] = self.quantum_id
|
|
814
865
|
|
|
815
866
|
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
@@ -994,6 +1045,8 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
|
|
|
994
1045
|
types in the pipeline graph are included, even if none of their
|
|
995
1046
|
datasets were loaded (i.e. nested mappings may be empty).
|
|
996
1047
|
|
|
1048
|
+
Reading a quantum also populates its log and metadata datasets.
|
|
1049
|
+
|
|
997
1050
|
The returned object may be an internal dictionary; as the type
|
|
998
1051
|
annotation indicates, it should not be modified in place.
|
|
999
1052
|
"""
|
|
@@ -1032,7 +1085,8 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
|
|
|
1032
1085
|
`ProvenanceQuantumGraphReader.read_quanta`) or datasets (via
|
|
1033
1086
|
`ProvenanceQuantumGraphReader.read_datasets`) will load those nodes
|
|
1034
1087
|
with full attributes and edges to adjacent nodes with no attributes.
|
|
1035
|
-
Loading quanta necessary to populate edge attributes.
|
|
1088
|
+
Loading quanta is necessary to populate edge attributes.
|
|
1089
|
+
Reading a quantum also populates its log and metadata datasets.
|
|
1036
1090
|
|
|
1037
1091
|
Node attributes are described by the
|
|
1038
1092
|
`ProvenanceQuantumInfo`, `ProvenanceInitQuantumInfo`, and
|
|
@@ -1104,10 +1158,6 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
|
|
|
1104
1158
|
"""Construct an `astropy.table.Table` with counts for each exception
|
|
1105
1159
|
type raised by each task.
|
|
1106
1160
|
|
|
1107
|
-
At present this only includes information from partial-outputs-error
|
|
1108
|
-
successes, since exception information for failures is not tracked.
|
|
1109
|
-
This may change in the future.
|
|
1110
|
-
|
|
1111
1161
|
Returns
|
|
1112
1162
|
-------
|
|
1113
1163
|
table : `astropy.table.Table`
|
|
@@ -1294,19 +1344,19 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1294
1344
|
# also have other outstanding reference holders).
|
|
1295
1345
|
continue
|
|
1296
1346
|
node._add_to_graph(self.graph)
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1347
|
+
else:
|
|
1348
|
+
with MultiblockReader.open_in_zip(self.zf, mb_name, int_size=self.header.int_size) as mb_reader:
|
|
1349
|
+
for node_id_or_index in nodes:
|
|
1350
|
+
address_row = self.address_reader.find(node_id_or_index)
|
|
1351
|
+
if "pipeline_node" in self.graph._bipartite_xgraph.nodes.get(address_row.key, {}):
|
|
1352
|
+
# Use the old node to reduce memory usage (since it
|
|
1353
|
+
# might also have other outstanding reference holders).
|
|
1354
|
+
continue
|
|
1355
|
+
node = mb_reader.read_model(
|
|
1356
|
+
address_row.addresses[address_index], model_type, self.decompressor
|
|
1357
|
+
)
|
|
1358
|
+
if node is not None:
|
|
1359
|
+
node._add_to_graph(self.graph)
|
|
1310
1360
|
|
|
1311
1361
|
def fetch_logs(self, nodes: Iterable[uuid.UUID]) -> dict[uuid.UUID, list[ButlerLogRecords | None]]:
|
|
1312
1362
|
"""Fetch log datasets.
|
|
@@ -1588,7 +1638,7 @@ class ProvenanceQuantumGraphWriter:
|
|
|
1588
1638
|
"""
|
|
1589
1639
|
predicted_quantum = self._predicted_quanta[quantum_id]
|
|
1590
1640
|
provenance_models = ProvenanceQuantumScanModels.from_metadata_and_logs(
|
|
1591
|
-
predicted_quantum, metadata, logs,
|
|
1641
|
+
predicted_quantum, metadata, logs, incomplete=False
|
|
1592
1642
|
)
|
|
1593
1643
|
scan_data = provenance_models.to_scan_data(predicted_quantum, compressor=self.compressor)
|
|
1594
1644
|
self.write_scan_data(scan_data)
|
|
@@ -1665,8 +1715,8 @@ class ProvenanceQuantumScanStatus(enum.Enum):
|
|
|
1665
1715
|
enough (according to `ScannerTimeConfigDict.retry_timeout`) that it's time
|
|
1666
1716
|
to stop trying for now.
|
|
1667
1717
|
|
|
1668
|
-
This state means
|
|
1669
|
-
|
|
1718
|
+
This state means `ProvenanceQuantumScanModels.from_metadata_and_logs` must
|
|
1719
|
+
be run again with ``incomplete=False``.
|
|
1670
1720
|
"""
|
|
1671
1721
|
|
|
1672
1722
|
SUCCESSFUL = enum.auto()
|
|
@@ -1721,7 +1771,7 @@ class ProvenanceQuantumScanModels:
|
|
|
1721
1771
|
metadata: TaskMetadata | None,
|
|
1722
1772
|
logs: ButlerLogRecords | None,
|
|
1723
1773
|
*,
|
|
1724
|
-
|
|
1774
|
+
incomplete: bool = False,
|
|
1725
1775
|
) -> ProvenanceQuantumScanModels:
|
|
1726
1776
|
"""Construct provenance information from task metadata and logs.
|
|
1727
1777
|
|
|
@@ -1733,8 +1783,8 @@ class ProvenanceQuantumScanModels:
|
|
|
1733
1783
|
Task metadata.
|
|
1734
1784
|
logs : `lsst.daf.butler.logging.ButlerLogRecords` or `None`
|
|
1735
1785
|
Task logs.
|
|
1736
|
-
|
|
1737
|
-
If `
|
|
1786
|
+
incomplete : `bool`, optional
|
|
1787
|
+
If `True`, treat execution failures as possibly-incomplete quanta
|
|
1738
1788
|
and do not fully process them; instead just set the status to
|
|
1739
1789
|
`ProvenanceQuantumScanStatus.ABANDONED` and return.
|
|
1740
1790
|
|
|
@@ -1752,8 +1802,8 @@ class ProvenanceQuantumScanModels:
|
|
|
1752
1802
|
"""
|
|
1753
1803
|
self = ProvenanceQuantumScanModels(predicted.quantum_id)
|
|
1754
1804
|
last_attempt = ProvenanceQuantumAttemptModel()
|
|
1755
|
-
self._process_logs(predicted, logs, last_attempt,
|
|
1756
|
-
self._process_metadata(predicted, metadata, last_attempt,
|
|
1805
|
+
self._process_logs(predicted, logs, last_attempt, incomplete=incomplete)
|
|
1806
|
+
self._process_metadata(predicted, metadata, last_attempt, incomplete=incomplete)
|
|
1757
1807
|
if self.status is ProvenanceQuantumScanStatus.ABANDONED:
|
|
1758
1808
|
return self
|
|
1759
1809
|
self._reconcile_attempts(last_attempt)
|
|
@@ -1766,15 +1816,15 @@ class ProvenanceQuantumScanModels:
|
|
|
1766
1816
|
logs: ButlerLogRecords | None,
|
|
1767
1817
|
last_attempt: ProvenanceQuantumAttemptModel,
|
|
1768
1818
|
*,
|
|
1769
|
-
|
|
1819
|
+
incomplete: bool,
|
|
1770
1820
|
) -> None:
|
|
1771
1821
|
(predicted_log_dataset,) = predicted.outputs[acc.LOG_OUTPUT_CONNECTION_NAME]
|
|
1772
1822
|
if logs is None:
|
|
1773
1823
|
self.output_existence[predicted_log_dataset.dataset_id] = False
|
|
1774
|
-
if
|
|
1775
|
-
self.status = ProvenanceQuantumScanStatus.FAILED
|
|
1776
|
-
else:
|
|
1824
|
+
if incomplete:
|
|
1777
1825
|
self.status = ProvenanceQuantumScanStatus.ABANDONED
|
|
1826
|
+
else:
|
|
1827
|
+
self.status = ProvenanceQuantumScanStatus.FAILED
|
|
1778
1828
|
else:
|
|
1779
1829
|
# Set the attempt's run status to FAILED, since the default is
|
|
1780
1830
|
# UNKNOWN (i.e. logs *and* metadata are missing) and we now know
|
|
@@ -1832,15 +1882,15 @@ class ProvenanceQuantumScanModels:
|
|
|
1832
1882
|
metadata: TaskMetadata | None,
|
|
1833
1883
|
last_attempt: ProvenanceQuantumAttemptModel,
|
|
1834
1884
|
*,
|
|
1835
|
-
|
|
1885
|
+
incomplete: bool,
|
|
1836
1886
|
) -> None:
|
|
1837
1887
|
(predicted_metadata_dataset,) = predicted.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME]
|
|
1838
1888
|
if metadata is None:
|
|
1839
1889
|
self.output_existence[predicted_metadata_dataset.dataset_id] = False
|
|
1840
|
-
if
|
|
1841
|
-
self.status = ProvenanceQuantumScanStatus.FAILED
|
|
1842
|
-
else:
|
|
1890
|
+
if incomplete:
|
|
1843
1891
|
self.status = ProvenanceQuantumScanStatus.ABANDONED
|
|
1892
|
+
else:
|
|
1893
|
+
self.status = ProvenanceQuantumScanStatus.FAILED
|
|
1844
1894
|
else:
|
|
1845
1895
|
self.status = ProvenanceQuantumScanStatus.SUCCESSFUL
|
|
1846
1896
|
self.output_existence[predicted_metadata_dataset.dataset_id] = True
|
|
@@ -1875,7 +1925,7 @@ class ProvenanceQuantumScanModels:
|
|
|
1875
1925
|
# But we found the metadata! Either that hard error happened
|
|
1876
1926
|
# at a very unlucky time (in between those two writes), or
|
|
1877
1927
|
# something even weirder happened.
|
|
1878
|
-
self.attempts[-1].status = QuantumAttemptStatus.
|
|
1928
|
+
self.attempts[-1].status = QuantumAttemptStatus.ABORTED_SUCCESS
|
|
1879
1929
|
else:
|
|
1880
1930
|
self.attempts[-1].status = QuantumAttemptStatus.FAILED
|
|
1881
1931
|
if len(self.metadata.attempts) < len(self.attempts):
|
|
@@ -318,6 +318,12 @@ Report: TypeAlias = (
|
|
|
318
318
|
)
|
|
319
319
|
|
|
320
320
|
|
|
321
|
+
def _disable_resources_parallelism() -> None:
|
|
322
|
+
os.environ["LSST_RESOURCES_NUM_WORKERS"] = "1"
|
|
323
|
+
os.environ.pop("LSST_RESOURCES_EXECUTOR", None)
|
|
324
|
+
os.environ["LSST_S3_USE_THREADS"] = "False"
|
|
325
|
+
|
|
326
|
+
|
|
321
327
|
class SupervisorCommunicator:
|
|
322
328
|
"""A helper object that lets the supervisor direct the other workers.
|
|
323
329
|
|
|
@@ -364,7 +370,7 @@ class SupervisorCommunicator:
|
|
|
364
370
|
# starts its shutdown.
|
|
365
371
|
self._write_requests: (
|
|
366
372
|
Queue[ProvenanceQuantumScanData | Literal[_Sentinel.NO_MORE_WRITE_REQUESTS]] | None
|
|
367
|
-
) = context.make_queue() if config.
|
|
373
|
+
) = context.make_queue() if config.is_writing_provenance else None
|
|
368
374
|
# All other workers use this queue to send many different kinds of
|
|
369
375
|
# reports the supervisor. The supervisor waits for a _DONE sentinal
|
|
370
376
|
# from each worker before it finishes its shutdown.
|
|
@@ -433,6 +439,7 @@ class SupervisorCommunicator:
|
|
|
433
439
|
self._expect_empty_queue(self._compression_dict)
|
|
434
440
|
|
|
435
441
|
def __enter__(self) -> Self:
|
|
442
|
+
_disable_resources_parallelism()
|
|
436
443
|
self.progress.__enter__()
|
|
437
444
|
# We make the low-level logger in __enter__ instead of __init__ only
|
|
438
445
|
# because that's the pattern used by true workers (where it matters).
|
|
@@ -581,6 +588,7 @@ class WorkerCommunicator:
|
|
|
581
588
|
self._cancel_event = supervisor._cancel_event
|
|
582
589
|
|
|
583
590
|
def __enter__(self) -> Self:
|
|
591
|
+
_disable_resources_parallelism()
|
|
584
592
|
self.log = make_worker_log(self.name, self.config)
|
|
585
593
|
self.log.verbose("%s has PID %s (parent is %s).", self.name, os.getpid(), os.getppid())
|
|
586
594
|
self._exit_stack = ExitStack().__enter__()
|
|
@@ -29,6 +29,8 @@ from __future__ import annotations
|
|
|
29
29
|
|
|
30
30
|
__all__ = ("AggregatorConfig",)
|
|
31
31
|
|
|
32
|
+
import sys
|
|
33
|
+
from typing import TYPE_CHECKING, Any
|
|
32
34
|
|
|
33
35
|
import pydantic
|
|
34
36
|
|
|
@@ -60,11 +62,13 @@ class AggregatorConfig(pydantic.BaseModel):
|
|
|
60
62
|
n_processes: int = 1
|
|
61
63
|
"""Number of processes the scanner should use."""
|
|
62
64
|
|
|
63
|
-
|
|
64
|
-
"""If `True`,
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
65
|
+
incomplete: bool = False
|
|
66
|
+
"""If `True`, do not expect the graph to have been executed to completion
|
|
67
|
+
yet, and only ingest the outputs of successful quanta.
|
|
68
|
+
|
|
69
|
+
This disables writing the provenance quantum graph, since this is likely to
|
|
70
|
+
be wasted effort that just complicates a follow-up run with
|
|
71
|
+
``incomplete=False`` later.
|
|
68
72
|
"""
|
|
69
73
|
|
|
70
74
|
defensive_ingest: bool = False
|
|
@@ -95,11 +99,10 @@ class AggregatorConfig(pydantic.BaseModel):
|
|
|
95
99
|
"""
|
|
96
100
|
|
|
97
101
|
dry_run: bool = False
|
|
98
|
-
"""If `True`, do not actually perform any
|
|
99
|
-
ingests.
|
|
102
|
+
"""If `True`, do not actually perform any central butler ingests.
|
|
100
103
|
|
|
101
|
-
Most log messages concerning
|
|
102
|
-
|
|
104
|
+
Most log messages concerning ingests will still be emitted in order to
|
|
105
|
+
provide a better emulation of a real run.
|
|
103
106
|
"""
|
|
104
107
|
|
|
105
108
|
interactive_status: bool = False
|
|
@@ -137,3 +140,69 @@ class AggregatorConfig(pydantic.BaseModel):
|
|
|
137
140
|
"""Enable support for storage classes by created by the
|
|
138
141
|
lsst.pipe.base.tests.mocks package.
|
|
139
142
|
"""
|
|
143
|
+
|
|
144
|
+
promise_ingest_graph: bool = False
|
|
145
|
+
"""If `True`, the aggregator will assume that `~.ingest_graph.ingest_graph`
|
|
146
|
+
will be run later to ingest metadata/log/config datasets, and will not
|
|
147
|
+
ingest them itself. This means that if `~.ingest_graph.ingest_graph` is
|
|
148
|
+
not run, those files will be abandoned in the butler storage root without
|
|
149
|
+
being present in the butler database, but it will speed up both processes.
|
|
150
|
+
|
|
151
|
+
It is *usually* safe to build a quantum graph for downstream processing
|
|
152
|
+
before or while running `~.ingest_graph.ingest_graph`, because
|
|
153
|
+
metadata/log/config datasets are rarely used as inputs. To check, use
|
|
154
|
+
``pipetask build ... --show inputs`` to show the overall-inputs to the
|
|
155
|
+
graph and scan for these dataset types.
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def is_writing_provenance(self) -> bool:
|
|
160
|
+
"""Whether the aggregator is configured to write the provenance quantum
|
|
161
|
+
graph.
|
|
162
|
+
"""
|
|
163
|
+
return self.output_path is not None and not self.incomplete
|
|
164
|
+
|
|
165
|
+
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
166
|
+
# when we inherit those docstrings in our public classes.
|
|
167
|
+
if "sphinx" in sys.modules and not TYPE_CHECKING:
|
|
168
|
+
|
|
169
|
+
def copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
170
|
+
"""See `pydantic.BaseModel.copy`."""
|
|
171
|
+
return super().copy(*args, **kwargs)
|
|
172
|
+
|
|
173
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> Any:
|
|
174
|
+
"""See `pydantic.BaseModel.model_dump`."""
|
|
175
|
+
return super().model_dump(*args, **kwargs)
|
|
176
|
+
|
|
177
|
+
def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
|
|
178
|
+
"""See `pydantic.BaseModel.model_dump_json`."""
|
|
179
|
+
return super().model_dump(*args, **kwargs)
|
|
180
|
+
|
|
181
|
+
def model_copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
182
|
+
"""See `pydantic.BaseModel.model_copy`."""
|
|
183
|
+
return super().model_copy(*args, **kwargs)
|
|
184
|
+
|
|
185
|
+
@classmethod
|
|
186
|
+
def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
|
|
187
|
+
"""See `pydantic.BaseModel.model_construct`."""
|
|
188
|
+
return super().model_construct(*args, **kwargs)
|
|
189
|
+
|
|
190
|
+
@classmethod
|
|
191
|
+
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
192
|
+
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
193
|
+
return super().model_json_schema(*args, **kwargs)
|
|
194
|
+
|
|
195
|
+
@classmethod
|
|
196
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
197
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
198
|
+
return super().model_validate(*args, **kwargs)
|
|
199
|
+
|
|
200
|
+
@classmethod
|
|
201
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
202
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
203
|
+
return super().model_validate_json(*args, **kwargs)
|
|
204
|
+
|
|
205
|
+
@classmethod
|
|
206
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
207
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
208
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
@@ -43,7 +43,7 @@ from lsst.daf.butler.registry import ConflictingDefinitionError
|
|
|
43
43
|
|
|
44
44
|
from ...pipeline_graph import TaskImportMode
|
|
45
45
|
from .._common import DatastoreName
|
|
46
|
-
from .._predicted import
|
|
46
|
+
from .._predicted import PredictedQuantumGraphComponents, PredictedQuantumGraphReader
|
|
47
47
|
from ._communicators import IngesterCommunicator
|
|
48
48
|
|
|
49
49
|
|
|
@@ -170,7 +170,7 @@ class Ingester(AbstractContextManager):
|
|
|
170
170
|
for ingest_request in self.comms.poll():
|
|
171
171
|
self.n_producers_pending += 1
|
|
172
172
|
self.comms.log.debug(f"Got ingest request for producer {ingest_request.producer_id}.")
|
|
173
|
-
self.
|
|
173
|
+
self.update_outputs_pending(refs=ingest_request.refs, records=ingest_request.records)
|
|
174
174
|
if self.n_datasets_pending > self.comms.config.ingest_batch_size:
|
|
175
175
|
self.ingest()
|
|
176
176
|
self.comms.log.info("All ingest requests received.")
|
|
@@ -266,31 +266,32 @@ class Ingester(AbstractContextManager):
|
|
|
266
266
|
else:
|
|
267
267
|
del self.records_pending[datastore_name]
|
|
268
268
|
|
|
269
|
-
def
|
|
270
|
-
self,
|
|
269
|
+
def update_outputs_pending(
|
|
270
|
+
self,
|
|
271
|
+
refs: list[DatasetRef],
|
|
272
|
+
records: dict[DatastoreName, DatastoreRecordData],
|
|
271
273
|
) -> None:
|
|
272
274
|
"""Add an ingest request to the pending-ingest data structures.
|
|
273
275
|
|
|
274
276
|
Parameters
|
|
275
277
|
----------
|
|
276
|
-
|
|
277
|
-
Registry information about
|
|
278
|
+
refs : `list` [ `lsst.daf.butler.DatasetRef` ]
|
|
279
|
+
Registry information about regular quantum-output datasets.
|
|
278
280
|
records : `dict` [ `str`, \
|
|
279
281
|
`lsst.daf.butler.datastore.record_data.DatastoreRecordData` ]
|
|
280
282
|
Datastore information about the datasets.
|
|
281
283
|
"""
|
|
282
|
-
n_given = len(
|
|
284
|
+
n_given = len(refs)
|
|
283
285
|
if self.already_ingested is not None:
|
|
284
|
-
|
|
285
|
-
kept = {
|
|
286
|
+
refs = [ref for ref in refs if ref.id not in self.already_ingested]
|
|
287
|
+
kept = {ref.id for ref in refs}
|
|
286
288
|
self.n_datasets_skipped += n_given - len(kept)
|
|
287
289
|
records = {
|
|
288
290
|
datastore_name: filtered_records
|
|
289
291
|
for datastore_name, original_records in records.items()
|
|
290
292
|
if (filtered_records := original_records.subset(kept)) is not None
|
|
291
293
|
}
|
|
292
|
-
for
|
|
293
|
-
ref = self.predicted.make_dataset_ref(dataset)
|
|
294
|
+
for ref in refs:
|
|
294
295
|
self.refs_pending[ref.datasetType.dimensions].append(ref)
|
|
295
296
|
for datastore_name, datastore_records in records.items():
|
|
296
297
|
if (existing_records := self.records_pending.get(datastore_name)) is not None:
|
|
@@ -223,7 +223,7 @@ class Scanner(AbstractContextManager):
|
|
|
223
223
|
logs = self._read_log(predicted_quantum)
|
|
224
224
|
metadata = self._read_metadata(predicted_quantum)
|
|
225
225
|
result = ProvenanceQuantumScanModels.from_metadata_and_logs(
|
|
226
|
-
predicted_quantum, metadata, logs,
|
|
226
|
+
predicted_quantum, metadata, logs, incomplete=self.comms.config.incomplete
|
|
227
227
|
)
|
|
228
228
|
if result.status is ProvenanceQuantumScanStatus.ABANDONED:
|
|
229
229
|
self.comms.log.debug("Abandoning scan for failed quantum %s.", quantum_id)
|
|
@@ -233,7 +233,7 @@ class Scanner(AbstractContextManager):
|
|
|
233
233
|
if predicted_output.dataset_id not in result.output_existence:
|
|
234
234
|
result.output_existence[predicted_output.dataset_id] = self.scan_dataset(predicted_output)
|
|
235
235
|
to_ingest = self._make_ingest_request(predicted_quantum, result)
|
|
236
|
-
if self.comms.config.
|
|
236
|
+
if self.comms.config.is_writing_provenance:
|
|
237
237
|
to_write = result.to_scan_data(predicted_quantum, compressor=self.compressor)
|
|
238
238
|
self.comms.request_write(to_write)
|
|
239
239
|
self.comms.request_ingest(to_ingest)
|
|
@@ -261,15 +261,23 @@ class Scanner(AbstractContextManager):
|
|
|
261
261
|
predicted_outputs_by_id = {
|
|
262
262
|
d.dataset_id: d for d in itertools.chain.from_iterable(predicted_quantum.outputs.values())
|
|
263
263
|
}
|
|
264
|
-
to_ingest_predicted: list[PredictedDatasetModel] = []
|
|
265
264
|
to_ingest_refs: list[DatasetRef] = []
|
|
265
|
+
to_ignore: set[uuid.UUID] = set()
|
|
266
|
+
if self.comms.config.promise_ingest_graph:
|
|
267
|
+
if result.status is ProvenanceQuantumScanStatus.INIT:
|
|
268
|
+
if predicted_quantum.task_label: # i.e. not the 'packages' producer
|
|
269
|
+
to_ignore.add(
|
|
270
|
+
predicted_quantum.outputs[acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME][0].dataset_id
|
|
271
|
+
)
|
|
272
|
+
else:
|
|
273
|
+
to_ignore.add(predicted_quantum.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME][0].dataset_id)
|
|
274
|
+
to_ignore.add(predicted_quantum.outputs[acc.LOG_OUTPUT_CONNECTION_NAME][0].dataset_id)
|
|
266
275
|
for dataset_id, was_produced in result.output_existence.items():
|
|
267
|
-
if was_produced:
|
|
276
|
+
if was_produced and dataset_id not in to_ignore:
|
|
268
277
|
predicted_output = predicted_outputs_by_id[dataset_id]
|
|
269
|
-
to_ingest_predicted.append(predicted_output)
|
|
270
278
|
to_ingest_refs.append(self.reader.components.make_dataset_ref(predicted_output))
|
|
271
279
|
to_ingest_records = self.qbb._datastore.export_predicted_records(to_ingest_refs)
|
|
272
|
-
return IngestRequest(result.quantum_id,
|
|
280
|
+
return IngestRequest(result.quantum_id, to_ingest_refs, to_ingest_records)
|
|
273
281
|
|
|
274
282
|
def _read_metadata(self, predicted_quantum: PredictedQuantumDatasetsModel) -> TaskMetadata | None:
|
|
275
283
|
"""Attempt to read the metadata dataset for a quantum.
|
|
@@ -32,10 +32,10 @@ __all__ = ("IngestRequest", "ScanReport")
|
|
|
32
32
|
import dataclasses
|
|
33
33
|
import uuid
|
|
34
34
|
|
|
35
|
+
from lsst.daf.butler import DatasetRef
|
|
35
36
|
from lsst.daf.butler.datastore.record_data import DatastoreRecordData
|
|
36
37
|
|
|
37
38
|
from .._common import DatastoreName
|
|
38
|
-
from .._predicted import PredictedDatasetModel
|
|
39
39
|
from .._provenance import ProvenanceQuantumScanStatus
|
|
40
40
|
|
|
41
41
|
|
|
@@ -57,11 +57,11 @@ class IngestRequest:
|
|
|
57
57
|
producer_id: uuid.UUID
|
|
58
58
|
"""ID of the quantum that produced these datasets."""
|
|
59
59
|
|
|
60
|
-
|
|
60
|
+
refs: list[DatasetRef]
|
|
61
61
|
"""Registry information about the datasets."""
|
|
62
62
|
|
|
63
63
|
records: dict[DatastoreName, DatastoreRecordData]
|
|
64
64
|
"""Datastore information about the datasets."""
|
|
65
65
|
|
|
66
66
|
def __bool__(self) -> bool:
|
|
67
|
-
return bool(self.
|
|
67
|
+
return bool(self.refs or self.records)
|
|
@@ -117,6 +117,18 @@ class Supervisor:
|
|
|
117
117
|
self.comms.request_scan(ready_set.pop())
|
|
118
118
|
for scan_return in self.comms.poll():
|
|
119
119
|
self.handle_report(scan_return)
|
|
120
|
+
if self.comms.config.incomplete:
|
|
121
|
+
quantum_or_quanta = "quanta" if self.n_abandoned != 1 else "quantum"
|
|
122
|
+
self.comms.progress.log.info(
|
|
123
|
+
"%d %s incomplete/failed abandoned; re-run with incomplete=False to finish.",
|
|
124
|
+
self.n_abandoned,
|
|
125
|
+
quantum_or_quanta,
|
|
126
|
+
)
|
|
127
|
+
self.comms.progress.log.info(
|
|
128
|
+
"Scanning complete after %0.1fs; waiting for workers to finish.",
|
|
129
|
+
self.comms.progress.elapsed_time,
|
|
130
|
+
)
|
|
131
|
+
self.comms.wait_for_workers_to_finish()
|
|
120
132
|
|
|
121
133
|
def handle_report(self, scan_report: ScanReport) -> None:
|
|
122
134
|
"""Handle a report from a scanner.
|
|
@@ -134,7 +146,7 @@ class Supervisor:
|
|
|
134
146
|
self.comms.log.debug("Scan complete for %s: quantum failed.", scan_report.quantum_id)
|
|
135
147
|
blocked_quanta = self.walker.fail(scan_report.quantum_id)
|
|
136
148
|
for blocked_quantum_id in blocked_quanta:
|
|
137
|
-
if self.comms.config.
|
|
149
|
+
if self.comms.config.is_writing_provenance:
|
|
138
150
|
self.comms.request_write(
|
|
139
151
|
ProvenanceQuantumScanData(
|
|
140
152
|
blocked_quantum_id, status=ProvenanceQuantumScanStatus.BLOCKED
|
|
@@ -172,7 +184,7 @@ def aggregate_graph(predicted_path: str, butler_path: str, config: AggregatorCon
|
|
|
172
184
|
writer: Worker | None = None
|
|
173
185
|
with SupervisorCommunicator(log, config.n_processes, ctx, config) as comms:
|
|
174
186
|
comms.progress.log.verbose("Starting workers.")
|
|
175
|
-
if config.
|
|
187
|
+
if config.is_writing_provenance:
|
|
176
188
|
writer_comms = WriterCommunicator(comms)
|
|
177
189
|
writer = ctx.make_worker(
|
|
178
190
|
target=Writer.run,
|
|
@@ -198,17 +210,6 @@ def aggregate_graph(predicted_path: str, butler_path: str, config: AggregatorCon
|
|
|
198
210
|
ingester.start()
|
|
199
211
|
supervisor = Supervisor(predicted_path, comms)
|
|
200
212
|
supervisor.loop()
|
|
201
|
-
log.info(
|
|
202
|
-
"Scanning complete after %0.1fs; waiting for workers to finish.",
|
|
203
|
-
comms.progress.elapsed_time,
|
|
204
|
-
)
|
|
205
|
-
comms.wait_for_workers_to_finish()
|
|
206
|
-
if supervisor.n_abandoned:
|
|
207
|
-
raise RuntimeError(
|
|
208
|
-
f"{supervisor.n_abandoned} {'quanta' if supervisor.n_abandoned > 1 else 'quantum'} "
|
|
209
|
-
"abandoned because they did not succeed. Re-run with assume_complete=True after all retry "
|
|
210
|
-
"attempts have been exhausted."
|
|
211
|
-
)
|
|
212
213
|
for w in scanners:
|
|
213
214
|
w.join()
|
|
214
215
|
ingester.join()
|