lsst-pipe-base 29.2025.4100__py3-none-any.whl → 29.2025.4300__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_status.py +1 -1
- lsst/pipe/base/cli/cmd/__init__.py +2 -2
- lsst/pipe/base/cli/cmd/commands.py +116 -1
- lsst/pipe/base/graph_walker.py +8 -4
- lsst/pipe/base/pipeline_graph/_pipeline_graph.py +30 -5
- lsst/pipe/base/quantum_graph/__init__.py +1 -0
- lsst/pipe/base/quantum_graph/_common.py +2 -1
- lsst/pipe/base/quantum_graph/_multiblock.py +41 -7
- lsst/pipe/base/quantum_graph/_predicted.py +62 -5
- lsst/pipe/base/quantum_graph/_provenance.py +1209 -0
- lsst/pipe/base/quantum_graph/aggregator/__init__.py +143 -0
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +981 -0
- lsst/pipe/base/quantum_graph/aggregator/_config.py +139 -0
- lsst/pipe/base/quantum_graph/aggregator/_ingester.py +312 -0
- lsst/pipe/base/quantum_graph/aggregator/_progress.py +208 -0
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +371 -0
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +167 -0
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +225 -0
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +593 -0
- lsst/pipe/base/resource_usage.py +183 -0
- lsst/pipe/base/simple_pipeline_executor.py +4 -1
- lsst/pipe/base/tests/util.py +31 -0
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/METADATA +1 -1
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/RECORD +33 -22
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/WHEEL +0 -0
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
# This file is part of pipe_base.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (http://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# This software is dual licensed under the GNU General Public License and also
|
|
10
|
+
# under a 3-clause BSD license. Recipients may choose which of these licenses
|
|
11
|
+
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
|
|
12
|
+
# respectively. If you choose the GPL option then the following text applies
|
|
13
|
+
# (but note that there is still no warranty even if you opt for BSD instead):
|
|
14
|
+
#
|
|
15
|
+
# This program is free software: you can redistribute it and/or modify
|
|
16
|
+
# it under the terms of the GNU General Public License as published by
|
|
17
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
18
|
+
# (at your option) any later version.
|
|
19
|
+
#
|
|
20
|
+
# This program is distributed in the hope that it will be useful,
|
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
23
|
+
# GNU General Public License for more details.
|
|
24
|
+
#
|
|
25
|
+
# You should have received a copy of the GNU General Public License
|
|
26
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
__all__ = ("aggregate_graph",)
|
|
31
|
+
|
|
32
|
+
import dataclasses
|
|
33
|
+
import uuid
|
|
34
|
+
|
|
35
|
+
import astropy.units as u
|
|
36
|
+
import networkx
|
|
37
|
+
|
|
38
|
+
from lsst.utils.logging import getLogger
|
|
39
|
+
from lsst.utils.usage import get_peak_mem_usage
|
|
40
|
+
|
|
41
|
+
from ...graph_walker import GraphWalker
|
|
42
|
+
from ...pipeline_graph import TaskImportMode
|
|
43
|
+
from .._predicted import PredictedQuantumGraphComponents, PredictedQuantumGraphReader
|
|
44
|
+
from ._communicators import (
|
|
45
|
+
IngesterCommunicator,
|
|
46
|
+
ScannerCommunicator,
|
|
47
|
+
SpawnProcessContext,
|
|
48
|
+
SupervisorCommunicator,
|
|
49
|
+
ThreadingContext,
|
|
50
|
+
Worker,
|
|
51
|
+
WriterCommunicator,
|
|
52
|
+
)
|
|
53
|
+
from ._config import AggregatorConfig
|
|
54
|
+
from ._ingester import Ingester
|
|
55
|
+
from ._scanner import Scanner
|
|
56
|
+
from ._structs import ScanReport, ScanResult, ScanStatus
|
|
57
|
+
from ._writer import Writer
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclasses.dataclass
|
|
61
|
+
class Supervisor:
|
|
62
|
+
"""The main process/thread for the provenance aggregator."""
|
|
63
|
+
|
|
64
|
+
predicted_path: str
|
|
65
|
+
"""Path to the predicted quantum graph."""
|
|
66
|
+
|
|
67
|
+
comms: SupervisorCommunicator
|
|
68
|
+
"""Communicator object for the supervisor."""
|
|
69
|
+
|
|
70
|
+
predicted: PredictedQuantumGraphComponents = dataclasses.field(init=False)
|
|
71
|
+
"""Components of the predicted quantum graph."""
|
|
72
|
+
|
|
73
|
+
walker: GraphWalker[uuid.UUID] = dataclasses.field(init=False)
|
|
74
|
+
"""Iterator that traverses the quantum graph."""
|
|
75
|
+
|
|
76
|
+
n_abandoned: int = 0
|
|
77
|
+
"""Number of quanta we abandoned because they did not complete in time and
|
|
78
|
+
we could not assume they had failed.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __post_init__(self) -> None:
|
|
82
|
+
self.comms.progress.log.info("Reading predicted quantum graph.")
|
|
83
|
+
with PredictedQuantumGraphReader.open(
|
|
84
|
+
self.predicted_path, import_mode=TaskImportMode.DO_NOT_IMPORT
|
|
85
|
+
) as reader:
|
|
86
|
+
reader.read_thin_graph()
|
|
87
|
+
reader.read_init_quanta()
|
|
88
|
+
self.predicted = reader.components
|
|
89
|
+
self.comms.progress.log.info("Analyzing predicted graph.")
|
|
90
|
+
uuid_by_index = {
|
|
91
|
+
quantum_index: quantum_id for quantum_id, quantum_index in self.predicted.quantum_indices.items()
|
|
92
|
+
}
|
|
93
|
+
xgraph = networkx.DiGraph(
|
|
94
|
+
[(uuid_by_index[a], uuid_by_index[b]) for a, b in self.predicted.thin_graph.edges]
|
|
95
|
+
)
|
|
96
|
+
# Make sure all quanta are in the graph, even if they don't have any
|
|
97
|
+
# quantum-only edges.
|
|
98
|
+
xgraph.add_nodes_from(uuid_by_index.values())
|
|
99
|
+
# Add init quanta as nodes without edges, because the scanner should
|
|
100
|
+
# only be run after init outputs are all written and hence we don't
|
|
101
|
+
# care when we process them.
|
|
102
|
+
for init_quantum in self.predicted.init_quanta.root[1:]: # skip 'packages' producer
|
|
103
|
+
xgraph.add_node(init_quantum.quantum_id)
|
|
104
|
+
self.walker = GraphWalker(xgraph)
|
|
105
|
+
|
|
106
|
+
def loop(self) -> None:
|
|
107
|
+
"""Scan the outputs of the quantum graph to gather provenance and
|
|
108
|
+
ingest outputs.
|
|
109
|
+
"""
|
|
110
|
+
self.comms.progress.set_n_quanta(
|
|
111
|
+
self.predicted.header.n_quanta + len(self.predicted.init_quanta.root)
|
|
112
|
+
)
|
|
113
|
+
ready_set: set[uuid.UUID] = set()
|
|
114
|
+
for ready_quanta in self.walker:
|
|
115
|
+
self.comms.log.debug("Sending %d new quanta to scan queue.", len(ready_quanta))
|
|
116
|
+
ready_set.update(ready_quanta)
|
|
117
|
+
while ready_set:
|
|
118
|
+
self.comms.request_scan(ready_set.pop())
|
|
119
|
+
for scan_return in self.comms.poll():
|
|
120
|
+
self.handle_report(scan_return)
|
|
121
|
+
|
|
122
|
+
def handle_report(self, scan_report: ScanReport) -> None:
|
|
123
|
+
"""Handle a report from a scanner.
|
|
124
|
+
|
|
125
|
+
Parameters
|
|
126
|
+
----------
|
|
127
|
+
scan_report : `ScanReport`
|
|
128
|
+
Information about the scan.
|
|
129
|
+
"""
|
|
130
|
+
match scan_report.status:
|
|
131
|
+
case ScanStatus.SUCCESSFUL | ScanStatus.INIT:
|
|
132
|
+
self.comms.log.debug("Scan complete for %s: quantum succeeded.", scan_report.quantum_id)
|
|
133
|
+
self.walker.finish(scan_report.quantum_id)
|
|
134
|
+
case ScanStatus.FAILED:
|
|
135
|
+
self.comms.log.debug("Scan complete for %s: quantum failed.", scan_report.quantum_id)
|
|
136
|
+
blocked_quanta = self.walker.fail(scan_report.quantum_id)
|
|
137
|
+
for blocked_quantum_id in blocked_quanta:
|
|
138
|
+
if self.comms.config.output_path is not None:
|
|
139
|
+
self.comms.request_write(ScanResult(blocked_quantum_id, status=ScanStatus.BLOCKED))
|
|
140
|
+
self.comms.progress.report_scan()
|
|
141
|
+
self.comms.progress.report_ingests(len(blocked_quanta))
|
|
142
|
+
case ScanStatus.ABANDONED:
|
|
143
|
+
self.comms.log.debug("Abandoning scan for %s: quantum has not succeeded (yet).")
|
|
144
|
+
self.walker.fail(scan_report.quantum_id)
|
|
145
|
+
self.n_abandoned += 1
|
|
146
|
+
case unexpected:
|
|
147
|
+
raise AssertionError(
|
|
148
|
+
f"Unexpected status {unexpected!r} in scanner loop for {scan_report.quantum_id}."
|
|
149
|
+
)
|
|
150
|
+
self.comms.progress.report_scan()
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def aggregate_graph(predicted_path: str, butler_path: str, config: AggregatorConfig) -> None:
|
|
154
|
+
"""Run the graph aggregator tool.
|
|
155
|
+
|
|
156
|
+
Parameters
|
|
157
|
+
----------
|
|
158
|
+
predicted_path : `str`
|
|
159
|
+
Path to the predicted quantum graph.
|
|
160
|
+
butler_path : `str`
|
|
161
|
+
Path or alias to the central butler repository.
|
|
162
|
+
config: `AggregatorConfig`
|
|
163
|
+
Configuration for the aggregator.
|
|
164
|
+
"""
|
|
165
|
+
log = getLogger("lsst.pipe.base.quantum_graph.aggregator")
|
|
166
|
+
ctx = ThreadingContext() if config.n_processes == 1 else SpawnProcessContext()
|
|
167
|
+
scanners: list[Worker] = []
|
|
168
|
+
ingester: Worker
|
|
169
|
+
writer: Worker | None = None
|
|
170
|
+
with SupervisorCommunicator(log, config.n_processes, ctx, config) as comms:
|
|
171
|
+
comms.progress.log.verbose("Starting workers.")
|
|
172
|
+
if config.output_path is not None:
|
|
173
|
+
writer_comms = WriterCommunicator(comms)
|
|
174
|
+
writer = ctx.make_worker(
|
|
175
|
+
target=Writer.run,
|
|
176
|
+
args=(predicted_path, writer_comms),
|
|
177
|
+
name=writer_comms.name,
|
|
178
|
+
)
|
|
179
|
+
writer.start()
|
|
180
|
+
for scanner_id in range(config.n_processes):
|
|
181
|
+
scanner_comms = ScannerCommunicator(comms, scanner_id)
|
|
182
|
+
worker = ctx.make_worker(
|
|
183
|
+
target=Scanner.run,
|
|
184
|
+
args=(predicted_path, butler_path, scanner_comms),
|
|
185
|
+
name=scanner_comms.name,
|
|
186
|
+
)
|
|
187
|
+
worker.start()
|
|
188
|
+
scanners.append(worker)
|
|
189
|
+
ingester_comms = IngesterCommunicator(comms)
|
|
190
|
+
ingester = ctx.make_worker(
|
|
191
|
+
target=Ingester.run,
|
|
192
|
+
args=(predicted_path, butler_path, ingester_comms),
|
|
193
|
+
name=ingester_comms.name,
|
|
194
|
+
)
|
|
195
|
+
ingester.start()
|
|
196
|
+
supervisor = Supervisor(predicted_path, comms)
|
|
197
|
+
supervisor.loop()
|
|
198
|
+
log.info(
|
|
199
|
+
"Scanning complete after %0.1fs; waiting for workers to finish.",
|
|
200
|
+
comms.progress.elapsed_time,
|
|
201
|
+
)
|
|
202
|
+
comms.wait_for_workers_to_finish()
|
|
203
|
+
if supervisor.n_abandoned:
|
|
204
|
+
raise RuntimeError(
|
|
205
|
+
f"{supervisor.n_abandoned} {'quanta' if supervisor.n_abandoned > 1 else 'quantum'} "
|
|
206
|
+
"abandoned because they did not succeed. Re-run with assume_complete=True after all retry "
|
|
207
|
+
"attempts have been exhausted."
|
|
208
|
+
)
|
|
209
|
+
for w in scanners:
|
|
210
|
+
w.join()
|
|
211
|
+
ingester.join()
|
|
212
|
+
if writer is not None and writer.is_alive():
|
|
213
|
+
log.info("Waiting for writer process to close (garbage collecting can be very slow).")
|
|
214
|
+
writer.join()
|
|
215
|
+
# We can't get memory usage for children until they've joined.
|
|
216
|
+
parent_mem, child_mem = get_peak_mem_usage()
|
|
217
|
+
# This is actually an upper bound on the peak (since the peaks could be
|
|
218
|
+
# at different times), but since we expect memory usage to be more smooth
|
|
219
|
+
# than spiky that's fine.
|
|
220
|
+
total_mem: u.Quantity = parent_mem + child_mem
|
|
221
|
+
log.info(
|
|
222
|
+
"All aggregation tasks complete after %0.1fs; peak memory usage ≤ %0.1f MB.",
|
|
223
|
+
comms.progress.elapsed_time,
|
|
224
|
+
total_mem.to(u.MB).value,
|
|
225
|
+
)
|