lsst-pipe-base 30.0.0rc2__py3-none-any.whl → 30.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_instrument.py +31 -20
- lsst/pipe/base/_quantumContext.py +3 -3
- lsst/pipe/base/_status.py +43 -10
- lsst/pipe/base/_task_metadata.py +2 -2
- lsst/pipe/base/all_dimensions_quantum_graph_builder.py +8 -3
- lsst/pipe/base/automatic_connection_constants.py +20 -1
- lsst/pipe/base/cli/cmd/__init__.py +18 -2
- lsst/pipe/base/cli/cmd/commands.py +149 -4
- lsst/pipe/base/connectionTypes.py +72 -160
- lsst/pipe/base/connections.py +6 -9
- lsst/pipe/base/execution_reports.py +0 -5
- lsst/pipe/base/graph/graph.py +11 -10
- lsst/pipe/base/graph/quantumNode.py +4 -4
- lsst/pipe/base/graph_walker.py +8 -10
- lsst/pipe/base/log_capture.py +40 -80
- lsst/pipe/base/log_on_close.py +76 -0
- lsst/pipe/base/mp_graph_executor.py +51 -15
- lsst/pipe/base/pipeline.py +5 -6
- lsst/pipe/base/pipelineIR.py +2 -8
- lsst/pipe/base/pipelineTask.py +5 -7
- lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
- lsst/pipe/base/pipeline_graph/_edges.py +32 -22
- lsst/pipe/base/pipeline_graph/_mapping_views.py +4 -7
- lsst/pipe/base/pipeline_graph/_pipeline_graph.py +14 -7
- lsst/pipe/base/pipeline_graph/expressions.py +2 -2
- lsst/pipe/base/pipeline_graph/io.py +7 -10
- lsst/pipe/base/pipeline_graph/visualization/_dot.py +13 -12
- lsst/pipe/base/pipeline_graph/visualization/_layout.py +16 -18
- lsst/pipe/base/pipeline_graph/visualization/_merge.py +4 -7
- lsst/pipe/base/pipeline_graph/visualization/_printer.py +10 -10
- lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +7 -0
- lsst/pipe/base/prerequisite_helpers.py +2 -1
- lsst/pipe/base/quantum_graph/_common.py +19 -20
- lsst/pipe/base/quantum_graph/_multiblock.py +37 -31
- lsst/pipe/base/quantum_graph/_predicted.py +113 -15
- lsst/pipe/base/quantum_graph/_provenance.py +1136 -45
- lsst/pipe/base/quantum_graph/aggregator/__init__.py +0 -1
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +204 -289
- lsst/pipe/base/quantum_graph/aggregator/_config.py +87 -9
- lsst/pipe/base/quantum_graph/aggregator/_ingester.py +13 -12
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +49 -235
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +6 -116
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +29 -39
- lsst/pipe/base/quantum_graph/aggregator/_workers.py +303 -0
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +34 -351
- lsst/pipe/base/quantum_graph/formatter.py +171 -0
- lsst/pipe/base/quantum_graph/ingest_graph.py +413 -0
- lsst/pipe/base/quantum_graph/visualization.py +5 -1
- lsst/pipe/base/quantum_graph_builder.py +33 -9
- lsst/pipe/base/quantum_graph_executor.py +116 -13
- lsst/pipe/base/quantum_graph_skeleton.py +31 -35
- lsst/pipe/base/quantum_provenance_graph.py +29 -12
- lsst/pipe/base/separable_pipeline_executor.py +19 -3
- lsst/pipe/base/single_quantum_executor.py +67 -42
- lsst/pipe/base/struct.py +4 -0
- lsst/pipe/base/testUtils.py +3 -3
- lsst/pipe/base/tests/mocks/_storage_class.py +2 -1
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/METADATA +3 -3
- lsst_pipe_base-30.0.1.dist-info/RECORD +129 -0
- {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/WHEEL +1 -1
- lsst_pipe_base-30.0.0rc2.dist-info/RECORD +0 -125
- {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/zip-safe +0 -0
lsst/pipe/base/log_capture.py
CHANGED
|
@@ -31,17 +31,15 @@ __all__ = ["LogCapture"]
|
|
|
31
31
|
|
|
32
32
|
import dataclasses
|
|
33
33
|
import logging
|
|
34
|
-
import os
|
|
35
|
-
import shutil
|
|
36
|
-
import tempfile
|
|
37
34
|
import uuid
|
|
38
35
|
from collections.abc import Iterator
|
|
39
|
-
from contextlib import contextmanager
|
|
36
|
+
from contextlib import contextmanager
|
|
40
37
|
from logging import FileHandler
|
|
41
38
|
|
|
42
39
|
import pydantic
|
|
43
40
|
|
|
44
|
-
from lsst.daf.butler import Butler,
|
|
41
|
+
from lsst.daf.butler import Butler, LimitedButler, Quantum
|
|
42
|
+
from lsst.daf.butler._rubin.temporary_for_ingest import TemporaryForIngest
|
|
45
43
|
from lsst.daf.butler.logging import (
|
|
46
44
|
ButlerLogRecord,
|
|
47
45
|
ButlerLogRecordHandler,
|
|
@@ -105,7 +103,7 @@ class _ExecutionLogRecordsExtra(pydantic.BaseModel):
|
|
|
105
103
|
|
|
106
104
|
Parameters
|
|
107
105
|
----------
|
|
108
|
-
log_records : `ButlerLogRecords`
|
|
106
|
+
log_records : `lsst.daf.butler.ButlerLogRecords`
|
|
109
107
|
Logs from a past attempt to run a quantum.
|
|
110
108
|
"""
|
|
111
109
|
previous = self.model_validate(log_records.extra)
|
|
@@ -165,7 +163,9 @@ class LogCapture:
|
|
|
165
163
|
return cls(butler, butler)
|
|
166
164
|
|
|
167
165
|
@contextmanager
|
|
168
|
-
def capture_logging(
|
|
166
|
+
def capture_logging(
|
|
167
|
+
self, task_node: TaskNode, /, quantum: Quantum, records: ButlerLogRecords | None = None
|
|
168
|
+
) -> Iterator[_LogCaptureContext]:
|
|
169
169
|
"""Configure logging system to capture logs for execution of this task.
|
|
170
170
|
|
|
171
171
|
Parameters
|
|
@@ -174,6 +174,9 @@ class LogCapture:
|
|
|
174
174
|
The task definition.
|
|
175
175
|
quantum : `~lsst.daf.butler.Quantum`
|
|
176
176
|
Single Quantum instance.
|
|
177
|
+
records : `lsst.daf.butler.logging.ButlerLogRecords`, optional
|
|
178
|
+
Log record container to append to and save. If provided, streaming
|
|
179
|
+
mode is disabled (since we'll be saving logs in memory anyway).
|
|
177
180
|
|
|
178
181
|
Notes
|
|
179
182
|
-----
|
|
@@ -205,44 +208,40 @@ class LogCapture:
|
|
|
205
208
|
|
|
206
209
|
# Add a handler to the root logger to capture execution log output.
|
|
207
210
|
if log_dataset_name is not None:
|
|
211
|
+
try:
|
|
212
|
+
[ref] = quantum.outputs[log_dataset_name]
|
|
213
|
+
except LookupError as exc:
|
|
214
|
+
raise InvalidQuantumError(
|
|
215
|
+
f"Quantum outputs is missing log output dataset type {log_dataset_name};"
|
|
216
|
+
" this could happen due to inconsistent options between QuantumGraph generation"
|
|
217
|
+
" and execution"
|
|
218
|
+
) from exc
|
|
208
219
|
# Either accumulate into ButlerLogRecords or stream JSON records to
|
|
209
220
|
# file and ingest that (ingest is possible only with full butler).
|
|
210
|
-
if self.stream_json_logs and self.full_butler is not None:
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
# Ensure that the logs are stored in butler.
|
|
232
|
-
logging.getLogger().removeHandler(log_handler_file)
|
|
233
|
-
log_handler_file.close()
|
|
234
|
-
if ctx.extra:
|
|
235
|
-
with open(log_file, "a") as log_stream:
|
|
236
|
-
ButlerLogRecords.write_streaming_extra(
|
|
237
|
-
log_stream,
|
|
238
|
-
ctx.extra.model_dump_json(exclude_unset=True, exclude_defaults=True),
|
|
239
|
-
)
|
|
240
|
-
if ctx.store:
|
|
241
|
-
self._ingest_log_records(quantum, log_dataset_name, log_file)
|
|
242
|
-
shutil.rmtree(tmpdir, ignore_errors=True)
|
|
221
|
+
if self.stream_json_logs and self.full_butler is not None and records is None:
|
|
222
|
+
with TemporaryForIngest(self.full_butler, ref) as temporary:
|
|
223
|
+
log_handler_file = FileHandler(temporary.ospath)
|
|
224
|
+
log_handler_file.setFormatter(JsonLogFormatter())
|
|
225
|
+
logging.getLogger().addHandler(log_handler_file)
|
|
226
|
+
|
|
227
|
+
try:
|
|
228
|
+
with ButlerMDC.set_mdc(mdc):
|
|
229
|
+
yield ctx
|
|
230
|
+
finally:
|
|
231
|
+
# Ensure that the logs are stored in butler.
|
|
232
|
+
logging.getLogger().removeHandler(log_handler_file)
|
|
233
|
+
log_handler_file.close()
|
|
234
|
+
if ctx.extra:
|
|
235
|
+
with open(temporary.ospath, "a") as log_stream:
|
|
236
|
+
ButlerLogRecords.write_streaming_extra(
|
|
237
|
+
log_stream,
|
|
238
|
+
ctx.extra.model_dump_json(exclude_unset=True, exclude_defaults=True),
|
|
239
|
+
)
|
|
240
|
+
if ctx.store:
|
|
241
|
+
temporary.ingest()
|
|
243
242
|
|
|
244
243
|
else:
|
|
245
|
-
log_handler_memory = ButlerLogRecordHandler()
|
|
244
|
+
log_handler_memory = ButlerLogRecordHandler(records)
|
|
246
245
|
logging.getLogger().addHandler(log_handler_memory)
|
|
247
246
|
|
|
248
247
|
try:
|
|
@@ -261,7 +260,6 @@ class LogCapture:
|
|
|
261
260
|
logging.getLogger().removeHandler(log_handler_memory)
|
|
262
261
|
if ctx.store:
|
|
263
262
|
self._store_log_records(quantum, log_dataset_name, log_handler_memory)
|
|
264
|
-
log_handler_memory.records.clear()
|
|
265
263
|
|
|
266
264
|
else:
|
|
267
265
|
with ButlerMDC.set_mdc(mdc):
|
|
@@ -281,41 +279,3 @@ class LogCapture:
|
|
|
281
279
|
) from exc
|
|
282
280
|
|
|
283
281
|
self.butler.put(log_handler.records, ref)
|
|
284
|
-
|
|
285
|
-
def _ingest_log_records(self, quantum: Quantum, dataset_type: str, filename: str) -> None:
|
|
286
|
-
# If we are logging to an external file we must always try to
|
|
287
|
-
# close it.
|
|
288
|
-
assert self.full_butler is not None, "Expected to have full butler for ingest"
|
|
289
|
-
ingested = False
|
|
290
|
-
try:
|
|
291
|
-
# DatasetRef has to be in the Quantum outputs, can lookup by name.
|
|
292
|
-
try:
|
|
293
|
-
[ref] = quantum.outputs[dataset_type]
|
|
294
|
-
except LookupError as exc:
|
|
295
|
-
raise InvalidQuantumError(
|
|
296
|
-
f"Quantum outputs is missing log output dataset type {dataset_type};"
|
|
297
|
-
" this could happen due to inconsistent options between QuantumGraph generation"
|
|
298
|
-
" and execution"
|
|
299
|
-
) from exc
|
|
300
|
-
|
|
301
|
-
# Need to ingest this file directly into butler.
|
|
302
|
-
dataset = FileDataset(path=filename, refs=ref)
|
|
303
|
-
try:
|
|
304
|
-
self.full_butler.ingest(dataset, transfer="move")
|
|
305
|
-
ingested = True
|
|
306
|
-
except NotImplementedError:
|
|
307
|
-
# Some datastores can't receive files (e.g. in-memory datastore
|
|
308
|
-
# when testing), we store empty list for those just to have a
|
|
309
|
-
# dataset. Alternative is to read the file as a
|
|
310
|
-
# ButlerLogRecords object and put it.
|
|
311
|
-
_LOG.info(
|
|
312
|
-
"Log records could not be stored in this butler because the"
|
|
313
|
-
" datastore can not ingest files, empty record list is stored instead."
|
|
314
|
-
)
|
|
315
|
-
records = ButlerLogRecords.from_records([])
|
|
316
|
-
self.full_butler.put(records, ref)
|
|
317
|
-
finally:
|
|
318
|
-
# remove file if it is not ingested
|
|
319
|
-
if not ingested:
|
|
320
|
-
with suppress(OSError):
|
|
321
|
-
os.remove(filename)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# This file is part of pipe_base.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (http://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# This software is dual licensed under the GNU General Public License and also
|
|
10
|
+
# under a 3-clause BSD license. Recipients may choose which of these licenses
|
|
11
|
+
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
|
|
12
|
+
# respectively. If you choose the GPL option then the following text applies
|
|
13
|
+
# (but note that there is still no warranty even if you opt for BSD instead):
|
|
14
|
+
#
|
|
15
|
+
# This program is free software: you can redistribute it and/or modify
|
|
16
|
+
# it under the terms of the GNU General Public License as published by
|
|
17
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
18
|
+
# (at your option) any later version.
|
|
19
|
+
#
|
|
20
|
+
# This program is distributed in the hope that it will be useful,
|
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
23
|
+
# GNU General Public License for more details.
|
|
24
|
+
#
|
|
25
|
+
# You should have received a copy of the GNU General Public License
|
|
26
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
__all__ = ("LogOnClose",)
|
|
31
|
+
|
|
32
|
+
from collections.abc import Callable, Iterator
|
|
33
|
+
from contextlib import AbstractContextManager, contextmanager
|
|
34
|
+
|
|
35
|
+
from lsst.utils.logging import VERBOSE
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class LogOnClose:
|
|
39
|
+
"""A factory for context manager wrappers that emit a log message when
|
|
40
|
+
they are closed.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
log_func : `~collections.abc.Callable` [ `int`, `str` ]
|
|
45
|
+
Callable that takes an integer log level and a string message and emits
|
|
46
|
+
a log message. Note that placeholder formatting is not supported.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(self, log_func: Callable[[int, str], None]):
|
|
50
|
+
self.log_func = log_func
|
|
51
|
+
|
|
52
|
+
def wrap[T](
|
|
53
|
+
self,
|
|
54
|
+
cm: AbstractContextManager[T],
|
|
55
|
+
msg: str,
|
|
56
|
+
level: int = VERBOSE,
|
|
57
|
+
) -> AbstractContextManager[T]:
|
|
58
|
+
"""Wrap a context manager to log when it is exited.
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
cm : `contextlib.AbstractContextManager`
|
|
63
|
+
Context manager to wrap.
|
|
64
|
+
msg : `str`
|
|
65
|
+
Log message.
|
|
66
|
+
level : `int`, optional
|
|
67
|
+
Log level.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
@contextmanager
|
|
71
|
+
def wrapper() -> Iterator[T]:
|
|
72
|
+
with cm as result:
|
|
73
|
+
yield result
|
|
74
|
+
self.log_func(level, msg)
|
|
75
|
+
|
|
76
|
+
return wrapper()
|
|
@@ -39,20 +39,24 @@ import sys
|
|
|
39
39
|
import threading
|
|
40
40
|
import time
|
|
41
41
|
import uuid
|
|
42
|
+
from contextlib import ExitStack
|
|
42
43
|
from typing import Literal, cast
|
|
43
44
|
|
|
44
45
|
import networkx
|
|
45
46
|
|
|
46
47
|
from lsst.daf.butler import DataCoordinate, Quantum
|
|
47
48
|
from lsst.daf.butler.cli.cliLog import CliLog
|
|
49
|
+
from lsst.daf.butler.logging import ButlerLogRecords
|
|
48
50
|
from lsst.utils.threads import disable_implicit_threading
|
|
49
51
|
|
|
50
52
|
from ._status import InvalidQuantumError, RepeatableQuantumError
|
|
53
|
+
from ._task_metadata import TaskMetadata
|
|
51
54
|
from .execution_graph_fixup import ExecutionGraphFixup
|
|
52
55
|
from .graph import QuantumGraph
|
|
53
56
|
from .graph_walker import GraphWalker
|
|
57
|
+
from .log_on_close import LogOnClose
|
|
54
58
|
from .pipeline_graph import TaskNode
|
|
55
|
-
from .quantum_graph import PredictedQuantumGraph, PredictedQuantumInfo
|
|
59
|
+
from .quantum_graph import PredictedQuantumGraph, PredictedQuantumInfo, ProvenanceQuantumGraphWriter
|
|
56
60
|
from .quantum_graph_executor import QuantumExecutor, QuantumGraphExecutor
|
|
57
61
|
from .quantum_reports import ExecutionStatus, QuantumReport, Report
|
|
58
62
|
|
|
@@ -515,7 +519,9 @@ class MPGraphExecutor(QuantumGraphExecutor):
|
|
|
515
519
|
start_method = "spawn"
|
|
516
520
|
self._start_method = start_method
|
|
517
521
|
|
|
518
|
-
def execute(
|
|
522
|
+
def execute(
|
|
523
|
+
self, graph: QuantumGraph | PredictedQuantumGraph, *, provenance_graph_file: str | None = None
|
|
524
|
+
) -> None:
|
|
519
525
|
# Docstring inherited from QuantumGraphExecutor.execute
|
|
520
526
|
old_graph: QuantumGraph | None = None
|
|
521
527
|
if isinstance(graph, QuantumGraph):
|
|
@@ -525,14 +531,31 @@ class MPGraphExecutor(QuantumGraphExecutor):
|
|
|
525
531
|
new_graph = graph
|
|
526
532
|
xgraph = self._make_xgraph(new_graph, old_graph)
|
|
527
533
|
self._report = Report(qgraphSummary=new_graph._make_summary())
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
534
|
+
with ExitStack() as exit_stack:
|
|
535
|
+
provenance_writer: ProvenanceQuantumGraphWriter | None = None
|
|
536
|
+
if provenance_graph_file is not None:
|
|
537
|
+
if provenance_graph_file is not None and self._num_proc > 1:
|
|
538
|
+
raise NotImplementedError(
|
|
539
|
+
"Provenance writing is not implemented for multiprocess execution."
|
|
540
|
+
)
|
|
541
|
+
provenance_writer = ProvenanceQuantumGraphWriter(
|
|
542
|
+
provenance_graph_file,
|
|
543
|
+
exit_stack=exit_stack,
|
|
544
|
+
log_on_close=LogOnClose(_LOG.log),
|
|
545
|
+
predicted=new_graph,
|
|
546
|
+
)
|
|
547
|
+
try:
|
|
548
|
+
if self._num_proc > 1:
|
|
549
|
+
self._execute_quanta_mp(xgraph, self._report)
|
|
550
|
+
else:
|
|
551
|
+
self._execute_quanta_in_process(xgraph, self._report, provenance_writer)
|
|
552
|
+
except Exception as exc:
|
|
553
|
+
self._report.set_exception(exc)
|
|
554
|
+
raise
|
|
555
|
+
if provenance_writer is not None:
|
|
556
|
+
provenance_writer.write_overall_inputs()
|
|
557
|
+
provenance_writer.write_packages()
|
|
558
|
+
provenance_writer.write_init_outputs(assume_existence=True)
|
|
536
559
|
|
|
537
560
|
def _make_xgraph(
|
|
538
561
|
self, new_graph: PredictedQuantumGraph, old_graph: QuantumGraph | None
|
|
@@ -576,7 +599,9 @@ class MPGraphExecutor(QuantumGraphExecutor):
|
|
|
576
599
|
raise MPGraphExecutorError("Updated execution graph has dependency cycle.")
|
|
577
600
|
return xgraph
|
|
578
601
|
|
|
579
|
-
def _execute_quanta_in_process(
|
|
602
|
+
def _execute_quanta_in_process(
|
|
603
|
+
self, xgraph: networkx.DiGraph, report: Report, provenance_writer: ProvenanceQuantumGraphWriter | None
|
|
604
|
+
) -> None:
|
|
580
605
|
"""Execute all Quanta in current process.
|
|
581
606
|
|
|
582
607
|
Parameters
|
|
@@ -589,6 +614,9 @@ class MPGraphExecutor(QuantumGraphExecutor):
|
|
|
589
614
|
`.quantum_graph.PredictedQuantumGraph.quantum_only_xgraph`.
|
|
590
615
|
report : `Report`
|
|
591
616
|
Object for reporting execution status.
|
|
617
|
+
provenance_writer : `.quantum_graph.ProvenanceQuantumGraphWriter` or \
|
|
618
|
+
`None`
|
|
619
|
+
Object for recording provenance.
|
|
592
620
|
"""
|
|
593
621
|
|
|
594
622
|
def tiebreaker_sort_key(quantum_id: uuid.UUID) -> tuple:
|
|
@@ -606,16 +634,19 @@ class MPGraphExecutor(QuantumGraphExecutor):
|
|
|
606
634
|
|
|
607
635
|
_LOG.debug("Executing %s (%s@%s)", quantum_id, task_node.label, data_id)
|
|
608
636
|
fail_exit_code: int | None = None
|
|
637
|
+
task_metadata: TaskMetadata | None = None
|
|
638
|
+
task_logs = ButlerLogRecords([])
|
|
609
639
|
try:
|
|
610
640
|
# For some exception types we want to exit immediately with
|
|
611
641
|
# exception-specific exit code, but we still want to start
|
|
612
642
|
# debugger before exiting if debugging is enabled.
|
|
613
643
|
try:
|
|
614
|
-
|
|
615
|
-
task_node, quantum, quantum_id=quantum_id
|
|
644
|
+
execution_result = self._quantum_executor.execute(
|
|
645
|
+
task_node, quantum, quantum_id=quantum_id, log_records=task_logs
|
|
616
646
|
)
|
|
617
|
-
if
|
|
618
|
-
report.quantaReports.append(
|
|
647
|
+
if execution_result.report:
|
|
648
|
+
report.quantaReports.append(execution_result.report)
|
|
649
|
+
task_metadata = execution_result.task_metadata
|
|
619
650
|
success_count += 1
|
|
620
651
|
walker.finish(quantum_id)
|
|
621
652
|
except RepeatableQuantumError as exc:
|
|
@@ -701,6 +732,11 @@ class MPGraphExecutor(QuantumGraphExecutor):
|
|
|
701
732
|
)
|
|
702
733
|
failed_count += 1
|
|
703
734
|
|
|
735
|
+
if provenance_writer is not None:
|
|
736
|
+
provenance_writer.write_quantum_provenance(
|
|
737
|
+
quantum_id, metadata=task_metadata, logs=task_logs
|
|
738
|
+
)
|
|
739
|
+
|
|
704
740
|
_LOG.info(
|
|
705
741
|
"Executed %d quanta successfully, %d failed and %d remain out of total %d quanta.",
|
|
706
742
|
success_count,
|
lsst/pipe/base/pipeline.py
CHANGED
|
@@ -54,13 +54,12 @@ from lsst.utils.introspection import get_full_type_name
|
|
|
54
54
|
|
|
55
55
|
from . import automatic_connection_constants as acc
|
|
56
56
|
from . import pipeline_graph, pipelineIR
|
|
57
|
-
from ._instrument import Instrument as
|
|
57
|
+
from ._instrument import Instrument as Instrument
|
|
58
58
|
from .config import PipelineTaskConfig
|
|
59
59
|
from .connections import PipelineTaskConnections
|
|
60
60
|
from .pipelineTask import PipelineTask
|
|
61
61
|
|
|
62
62
|
if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
|
|
63
|
-
from lsst.obs.base import Instrument
|
|
64
63
|
from lsst.pex.config import Config
|
|
65
64
|
|
|
66
65
|
# ----------------------------------
|
|
@@ -496,7 +495,7 @@ class Pipeline:
|
|
|
496
495
|
Returns
|
|
497
496
|
-------
|
|
498
497
|
pipeline: `Pipeline`
|
|
499
|
-
|
|
498
|
+
The new pipeline.
|
|
500
499
|
"""
|
|
501
500
|
return cls.fromIR(copy.deepcopy(pipeline._pipelineIR))
|
|
502
501
|
|
|
@@ -606,7 +605,7 @@ class Pipeline:
|
|
|
606
605
|
|
|
607
606
|
@property
|
|
608
607
|
def subsets(self) -> MappingProxyType[str, set]:
|
|
609
|
-
"""Returns a `MappingProxyType` where the keys are the labels of
|
|
608
|
+
"""Returns a `types.MappingProxyType` where the keys are the labels of
|
|
610
609
|
labeled subsets in the `Pipeline` and the values are the set of task
|
|
611
610
|
labels contained within that subset.
|
|
612
611
|
"""
|
|
@@ -702,7 +701,7 @@ class Pipeline:
|
|
|
702
701
|
"""
|
|
703
702
|
instrument_class_name = self._pipelineIR.instrument
|
|
704
703
|
if instrument_class_name is not None:
|
|
705
|
-
instrument_class = cast(
|
|
704
|
+
instrument_class = cast(Instrument, doImportType(instrument_class_name))
|
|
706
705
|
if instrument_class is not None:
|
|
707
706
|
return DataCoordinate.standardize(instrument=instrument_class.getName(), universe=universe)
|
|
708
707
|
return DataCoordinate.make_empty(universe)
|
|
@@ -893,7 +892,7 @@ class Pipeline:
|
|
|
893
892
|
raise NameError(f"Label {label} does not appear in this pipeline")
|
|
894
893
|
taskClass: type[PipelineTask] = doImportType(taskIR.klass)
|
|
895
894
|
config = taskClass.ConfigClass()
|
|
896
|
-
instrument:
|
|
895
|
+
instrument: Instrument | None = None
|
|
897
896
|
if (instrumentName := self._pipelineIR.instrument) is not None:
|
|
898
897
|
instrument_cls: type = doImportType(instrumentName)
|
|
899
898
|
instrument = instrument_cls()
|
lsst/pipe/base/pipelineIR.py
CHANGED
|
@@ -220,12 +220,6 @@ class LabeledSubset:
|
|
|
220
220
|
class ParametersIR:
|
|
221
221
|
"""Intermediate representation of parameters that are global to a pipeline.
|
|
222
222
|
|
|
223
|
-
Attributes
|
|
224
|
-
----------
|
|
225
|
-
mapping : `dict` [`str`, `str`]
|
|
226
|
-
A mutable mapping of identifiers as keys, and shared configuration
|
|
227
|
-
as values.
|
|
228
|
-
|
|
229
223
|
Notes
|
|
230
224
|
-----
|
|
231
225
|
These parameters are specified under a top level key named ``parameters``
|
|
@@ -343,7 +337,7 @@ class ConfigIR:
|
|
|
343
337
|
)
|
|
344
338
|
return new_config
|
|
345
339
|
|
|
346
|
-
def maybe_merge(self, other_config: ConfigIR) -> Generator[ConfigIR
|
|
340
|
+
def maybe_merge(self, other_config: ConfigIR) -> Generator[ConfigIR]:
|
|
347
341
|
"""Merge another instance of a `ConfigIR` into this instance if
|
|
348
342
|
possible. This function returns a generator that is either self
|
|
349
343
|
if the configs were merged, or self, and other_config if that could
|
|
@@ -706,7 +700,7 @@ class PipelineIR:
|
|
|
706
700
|
|
|
707
701
|
Parameters
|
|
708
702
|
----------
|
|
709
|
-
loaded_yaml: `dict`
|
|
703
|
+
loaded_yaml : `dict`
|
|
710
704
|
A dictionary which matches the structure that would be produced
|
|
711
705
|
by a yaml reader which parses a pipeline definition document
|
|
712
706
|
"""
|
lsst/pipe/base/pipelineTask.py
CHANGED
|
@@ -55,7 +55,7 @@ class PipelineTask(Task):
|
|
|
55
55
|
resulting data is also stored in a data butler.
|
|
56
56
|
|
|
57
57
|
PipelineTask inherits from a `~lsst.pipe.base.Task` and uses the same
|
|
58
|
-
configuration mechanism based on
|
|
58
|
+
configuration mechanism based on `lsst.pex.config`. `PipelineTask`
|
|
59
59
|
classes also have a `PipelineTaskConnections` class associated with their
|
|
60
60
|
config which defines all of the IO a `PipelineTask` will need to do.
|
|
61
61
|
PipelineTask sub-class typically implements `run()` method which receives
|
|
@@ -75,12 +75,6 @@ class PipelineTask(Task):
|
|
|
75
75
|
PipelineTask base class constructor, but may support other signatures as
|
|
76
76
|
well.
|
|
77
77
|
|
|
78
|
-
Attributes
|
|
79
|
-
----------
|
|
80
|
-
canMultiprocess : bool, True by default (class attribute)
|
|
81
|
-
This class attribute is checked by execution framework, sub-classes
|
|
82
|
-
can set it to ``False`` in case task does not support multiprocessing.
|
|
83
|
-
|
|
84
78
|
Parameters
|
|
85
79
|
----------
|
|
86
80
|
config : `~lsst.pex.config.Config`, optional
|
|
@@ -102,7 +96,11 @@ class PipelineTask(Task):
|
|
|
102
96
|
"""
|
|
103
97
|
|
|
104
98
|
ConfigClass: ClassVar[type[PipelineTaskConfig]]
|
|
99
|
+
|
|
105
100
|
canMultiprocess: ClassVar[bool] = True
|
|
101
|
+
"""Whether this task can be run by an executor that uses subprocesses for
|
|
102
|
+
parallelism.
|
|
103
|
+
"""
|
|
106
104
|
|
|
107
105
|
def __init__(
|
|
108
106
|
self,
|
|
@@ -106,8 +106,8 @@ class DatasetTypeNode:
|
|
|
106
106
|
The internal networkx graph.
|
|
107
107
|
get_registered : `~collections.abc.Callable` or `None`
|
|
108
108
|
Callable that takes a dataset type name and returns the
|
|
109
|
-
|
|
110
|
-
not registered.
|
|
109
|
+
`~lsst.daf.butler.DatasetType` registered in the data repository,
|
|
110
|
+
or `None` if it is not registered.
|
|
111
111
|
dimensions : `lsst.daf.butler.DimensionUniverse`
|
|
112
112
|
Definitions of all dimensions.
|
|
113
113
|
previous : `DatasetTypeNode` or `None`
|
|
@@ -30,7 +30,7 @@ __all__ = ("Edge", "ReadEdge", "WriteEdge")
|
|
|
30
30
|
|
|
31
31
|
from abc import ABC, abstractmethod
|
|
32
32
|
from collections.abc import Callable, Mapping, Sequence
|
|
33
|
-
from typing import Any, ClassVar, Self
|
|
33
|
+
from typing import Any, ClassVar, Self
|
|
34
34
|
|
|
35
35
|
from lsst.daf.butler import DatasetRef, DatasetType, DimensionUniverse, StorageClassFactory
|
|
36
36
|
from lsst.daf.butler.registry import MissingDatasetTypeError
|
|
@@ -40,8 +40,6 @@ from ..connectionTypes import BaseConnection
|
|
|
40
40
|
from ._exceptions import ConnectionTypeConsistencyError, IncompatibleDatasetTypeError
|
|
41
41
|
from ._nodes import NodeKey, NodeType
|
|
42
42
|
|
|
43
|
-
_S = TypeVar("_S", bound="Edge")
|
|
44
|
-
|
|
45
43
|
|
|
46
44
|
@immutable
|
|
47
45
|
class Edge(ABC):
|
|
@@ -172,7 +170,7 @@ class Edge(ABC):
|
|
|
172
170
|
"""
|
|
173
171
|
return self.parent_dataset_type_name
|
|
174
172
|
|
|
175
|
-
def diff(self:
|
|
173
|
+
def diff[S: Edge](self: S, other: S, connection_type: str = "connection") -> list[str]:
|
|
176
174
|
"""Compare this edge to another one from a possibly-different
|
|
177
175
|
configuration of the same task label.
|
|
178
176
|
|
|
@@ -480,11 +478,11 @@ class ReadEdge(Edge):
|
|
|
480
478
|
Parameters
|
|
481
479
|
----------
|
|
482
480
|
current : `lsst.daf.butler.DatasetType` or `None`
|
|
483
|
-
The current graph-wide
|
|
484
|
-
be the registry's definition of the parent dataset
|
|
485
|
-
exists. If not, it will be the dataset type
|
|
486
|
-
task in the graph that writes it, if there is
|
|
487
|
-
such task, this will be `None`.
|
|
481
|
+
The current graph-wide `~lsst.daf.butler.DatasetType`, or `None`.
|
|
482
|
+
This will always be the registry's definition of the parent dataset
|
|
483
|
+
type, if one exists. If not, it will be the dataset type
|
|
484
|
+
definition from the task in the graph that writes it, if there is
|
|
485
|
+
one. If there is no such task, this will be `None`.
|
|
488
486
|
is_initial_query_constraint : `bool`
|
|
489
487
|
Whether this dataset type is currently marked as a constraint on
|
|
490
488
|
the initial data ID query in QuantumGraph generation.
|
|
@@ -496,7 +494,7 @@ class ReadEdge(Edge):
|
|
|
496
494
|
producer : `str` or `None`
|
|
497
495
|
The label of the task that produces this dataset type in the
|
|
498
496
|
pipeline, or `None` if it is an overall input.
|
|
499
|
-
consumers :
|
|
497
|
+
consumers : `~collections.abc.Sequence` [ `str` ]
|
|
500
498
|
Labels for other consuming tasks that have already participated in
|
|
501
499
|
this dataset type's resolution.
|
|
502
500
|
is_registered : `bool`
|
|
@@ -512,7 +510,7 @@ class ReadEdge(Edge):
|
|
|
512
510
|
|
|
513
511
|
Returns
|
|
514
512
|
-------
|
|
515
|
-
dataset_type :
|
|
513
|
+
dataset_type : `~lsst.daf.butler.DatasetType`
|
|
516
514
|
The updated graph-wide dataset type. If ``current`` was provided,
|
|
517
515
|
this must be equal to it.
|
|
518
516
|
is_initial_query_constraint : `bool`
|
|
@@ -659,13 +657,25 @@ class ReadEdge(Edge):
|
|
|
659
657
|
# compatible), since neither connection should take
|
|
660
658
|
# precedence.
|
|
661
659
|
if dataset_type != current:
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
"
|
|
667
|
-
|
|
668
|
-
|
|
660
|
+
if visualization_only and dataset_type.dimensions == current.dimensions:
|
|
661
|
+
# Make a visualization-only ambiguous storage class
|
|
662
|
+
# "name".
|
|
663
|
+
all_storage_classes = set(current.storageClass_name.split("/"))
|
|
664
|
+
all_storage_classes.update(dataset_type.storageClass_name.split("/"))
|
|
665
|
+
current = DatasetType(
|
|
666
|
+
current.name,
|
|
667
|
+
current.dimensions,
|
|
668
|
+
"/".join(sorted(all_storage_classes)),
|
|
669
|
+
)
|
|
670
|
+
else:
|
|
671
|
+
raise MissingDatasetTypeError(
|
|
672
|
+
f"Definitions differ for input dataset type "
|
|
673
|
+
f"{self.parent_dataset_type_name!r}; task {self.task_label!r} has "
|
|
674
|
+
f"{dataset_type}, but the definition from {report_current_origin()} is "
|
|
675
|
+
f"{current}. If the storage classes are compatible but different, "
|
|
676
|
+
"registering the dataset type in the data repository in advance will avoid "
|
|
677
|
+
"this error."
|
|
678
|
+
)
|
|
669
679
|
elif not visualization_only and not dataset_type.is_compatible_with(current):
|
|
670
680
|
raise IncompatibleDatasetTypeError(
|
|
671
681
|
f"Incompatible definition for input dataset type {self.parent_dataset_type_name!r}; "
|
|
@@ -788,15 +798,15 @@ class WriteEdge(Edge):
|
|
|
788
798
|
Parameters
|
|
789
799
|
----------
|
|
790
800
|
current : `lsst.daf.butler.DatasetType` or `None`
|
|
791
|
-
The current graph-wide
|
|
792
|
-
be the registry's definition of the parent dataset
|
|
793
|
-
exists.
|
|
801
|
+
The current graph-wide `~lsst.daf.butler.DatasetType`, or `None`.
|
|
802
|
+
This will always be the registry's definition of the parent dataset
|
|
803
|
+
type, if one exists.
|
|
794
804
|
universe : `lsst.daf.butler.DimensionUniverse`
|
|
795
805
|
Object that holds all dimension definitions.
|
|
796
806
|
|
|
797
807
|
Returns
|
|
798
808
|
-------
|
|
799
|
-
dataset_type :
|
|
809
|
+
dataset_type : `~lsst.daf.butler.DatasetType`
|
|
800
810
|
A dataset type compatible with this edge. If ``current`` was
|
|
801
811
|
provided, this must be equal to it.
|
|
802
812
|
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
from __future__ import annotations
|
|
28
28
|
|
|
29
29
|
from collections.abc import Iterable, Iterator, Mapping, Sequence
|
|
30
|
-
from typing import Any, ClassVar,
|
|
30
|
+
from typing import Any, ClassVar, cast, overload
|
|
31
31
|
|
|
32
32
|
import networkx
|
|
33
33
|
|
|
@@ -36,11 +36,8 @@ from ._exceptions import UnresolvedGraphError
|
|
|
36
36
|
from ._nodes import NodeKey, NodeType
|
|
37
37
|
from ._tasks import TaskInitNode, TaskNode
|
|
38
38
|
|
|
39
|
-
_N = TypeVar("_N", covariant=True)
|
|
40
|
-
_T = TypeVar("_T")
|
|
41
39
|
|
|
42
|
-
|
|
43
|
-
class MappingView(Mapping[str, _N]):
|
|
40
|
+
class MappingView[N](Mapping[str, N]):
|
|
44
41
|
"""Base class for mapping views into nodes of certain types in a
|
|
45
42
|
`PipelineGraph`.
|
|
46
43
|
|
|
@@ -74,7 +71,7 @@ class MappingView(Mapping[str, _N]):
|
|
|
74
71
|
self._keys = self._make_keys(self._parent_xgraph)
|
|
75
72
|
return iter(self._keys)
|
|
76
73
|
|
|
77
|
-
def __getitem__(self, key: str) ->
|
|
74
|
+
def __getitem__(self, key: str) -> N:
|
|
78
75
|
return self._parent_xgraph.nodes[NodeKey(self._NODE_TYPE, key)]["instance"]
|
|
79
76
|
|
|
80
77
|
def __len__(self) -> int:
|
|
@@ -230,7 +227,7 @@ class DatasetTypeMappingView(MappingView[DatasetTypeNode]):
|
|
|
230
227
|
def get_if_resolved(self, key: str) -> DatasetTypeNode | None: ... # pragma: nocover
|
|
231
228
|
|
|
232
229
|
@overload
|
|
233
|
-
def get_if_resolved(self, key: str, default:
|
|
230
|
+
def get_if_resolved[T](self, key: str, default: T) -> DatasetTypeNode | T: ... # pragma: nocover
|
|
234
231
|
|
|
235
232
|
def get_if_resolved(self, key: str, default: Any = None) -> DatasetTypeNode | Any:
|
|
236
233
|
"""Get a node or return a default if it has not been resolved.
|