lsst-pipe-base 29.2025.1100__py3-none-any.whl → 29.2025.1200__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/all_dimensions_quantum_graph_builder.py +4 -0
- lsst/pipe/base/connections.py +179 -2
- lsst/pipe/base/pipeline_graph/visualization/_mermaid.py +10 -4
- lsst/pipe/base/quantum_graph_builder.py +91 -60
- lsst/pipe/base/quantum_graph_skeleton.py +20 -0
- lsst/pipe/base/quantum_provenance_graph.py +790 -421
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.1100.dist-info → lsst_pipe_base-29.2025.1200.dist-info}/METADATA +4 -3
- {lsst_pipe_base-29.2025.1100.dist-info → lsst_pipe_base-29.2025.1200.dist-info}/RECORD +17 -17
- {lsst_pipe_base-29.2025.1100.dist-info → lsst_pipe_base-29.2025.1200.dist-info}/WHEEL +1 -1
- {lsst_pipe_base-29.2025.1100.dist-info → lsst_pipe_base-29.2025.1200.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.1100.dist-info → lsst_pipe_base-29.2025.1200.dist-info/licenses}/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.1100.dist-info → lsst_pipe_base-29.2025.1200.dist-info/licenses}/LICENSE +0 -0
- {lsst_pipe_base-29.2025.1100.dist-info → lsst_pipe_base-29.2025.1200.dist-info/licenses}/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.1100.dist-info → lsst_pipe_base-29.2025.1200.dist-info/licenses}/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.1100.dist-info → lsst_pipe_base-29.2025.1200.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.1100.dist-info → lsst_pipe_base-29.2025.1200.dist-info}/zip-safe +0 -0
|
@@ -39,25 +39,42 @@ __all__ = (
|
|
|
39
39
|
"QuantumProvenanceGraph",
|
|
40
40
|
)
|
|
41
41
|
|
|
42
|
+
import concurrent.futures
|
|
42
43
|
import dataclasses
|
|
44
|
+
import datetime
|
|
43
45
|
import itertools
|
|
44
46
|
import logging
|
|
45
47
|
import textwrap
|
|
48
|
+
import threading
|
|
46
49
|
import uuid
|
|
47
|
-
from collections.abc import Iterator, Mapping, Sequence, Set
|
|
50
|
+
from collections.abc import Callable, Iterator, Mapping, Sequence, Set
|
|
48
51
|
from enum import Enum
|
|
49
|
-
from typing import TYPE_CHECKING, ClassVar, Literal, TypedDict, cast
|
|
52
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Literal, TypedDict, cast
|
|
50
53
|
|
|
51
54
|
import astropy.table
|
|
52
55
|
import networkx
|
|
53
56
|
import pydantic
|
|
54
57
|
|
|
55
|
-
from lsst.daf.butler import
|
|
58
|
+
from lsst.daf.butler import (
|
|
59
|
+
Butler,
|
|
60
|
+
ButlerConfig,
|
|
61
|
+
ButlerLogRecords,
|
|
62
|
+
DataCoordinate,
|
|
63
|
+
DataIdValue,
|
|
64
|
+
DatasetId,
|
|
65
|
+
DatasetRef,
|
|
66
|
+
DatasetType,
|
|
67
|
+
DimensionUniverse,
|
|
68
|
+
LimitedButler,
|
|
69
|
+
MissingDatasetTypeError,
|
|
70
|
+
QuantumBackedButler,
|
|
71
|
+
)
|
|
56
72
|
from lsst.resources import ResourcePathExpression
|
|
57
|
-
from lsst.utils.logging import getLogger
|
|
73
|
+
from lsst.utils.logging import PeriodicLogger, getLogger
|
|
58
74
|
|
|
59
75
|
from ._status import QuantumSuccessCaveats
|
|
60
|
-
from .
|
|
76
|
+
from .automatic_connection_constants import LOG_OUTPUT_TEMPLATE, METADATA_OUTPUT_TEMPLATE
|
|
77
|
+
from .graph import QuantumGraph, QuantumNode
|
|
61
78
|
|
|
62
79
|
if TYPE_CHECKING:
|
|
63
80
|
from ._task_metadata import TaskMetadata
|
|
@@ -178,7 +195,7 @@ class ExceptionInfo(pydantic.BaseModel):
|
|
|
178
195
|
"""Additional metadata included in the exception."""
|
|
179
196
|
|
|
180
197
|
@classmethod
|
|
181
|
-
def
|
|
198
|
+
def _from_metadata(cls, md: TaskMetadata) -> ExceptionInfo:
|
|
182
199
|
"""Construct from task metadata.
|
|
183
200
|
|
|
184
201
|
Parameters
|
|
@@ -476,7 +493,7 @@ class UnsuccessfulQuantumSummary(pydantic.BaseModel):
|
|
|
476
493
|
"""
|
|
477
494
|
|
|
478
495
|
@classmethod
|
|
479
|
-
def
|
|
496
|
+
def _from_info(cls, info: QuantumInfo) -> UnsuccessfulQuantumSummary:
|
|
480
497
|
"""Summarize all relevant information from the `QuantumInfo` in an
|
|
481
498
|
`UnsuccessfulQuantumSummary`; return an `UnsuccessfulQuantumSummary`.
|
|
482
499
|
|
|
@@ -595,7 +612,12 @@ class TaskSummary(pydantic.BaseModel):
|
|
|
595
612
|
this module) associated with the particular issue identified.
|
|
596
613
|
"""
|
|
597
614
|
|
|
598
|
-
def
|
|
615
|
+
def _add_quantum_info(
|
|
616
|
+
self,
|
|
617
|
+
info: QuantumInfo,
|
|
618
|
+
log_getter: Callable[[DatasetRef], ButlerLogRecords] | None,
|
|
619
|
+
executor: concurrent.futures.Executor,
|
|
620
|
+
) -> concurrent.futures.Future[None] | None:
|
|
599
621
|
"""Add a `QuantumInfo` to a `TaskSummary`.
|
|
600
622
|
|
|
601
623
|
Unpack the `QuantumInfo` object, sorting quanta of each status into
|
|
@@ -607,12 +629,19 @@ class TaskSummary(pydantic.BaseModel):
|
|
|
607
629
|
----------
|
|
608
630
|
info : `QuantumInfo`
|
|
609
631
|
The `QuantumInfo` object to add to the `TaskSummary`.
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
632
|
+
log_getter : `~collections.abc.Callable` or `None`
|
|
633
|
+
A callable that can be passed a `~lsst.daf.butler.DatasetRef` for
|
|
634
|
+
a log dataset to retreive those logs, or `None` to not load any
|
|
635
|
+
logs.
|
|
636
|
+
executor : `concurrent.futures.Executor`
|
|
637
|
+
A possibly-parallel executor that should be used to schedule
|
|
638
|
+
log dataset reads.
|
|
639
|
+
|
|
640
|
+
Returns
|
|
641
|
+
-------
|
|
642
|
+
future : `concurrent.futures.Future` or `None`
|
|
643
|
+
A future that represents a parallelized log read and summary
|
|
644
|
+
update.
|
|
616
645
|
"""
|
|
617
646
|
try:
|
|
618
647
|
final_run, final_quantum_run = QuantumRun.find_final(info)
|
|
@@ -637,35 +666,45 @@ class TaskSummary(pydantic.BaseModel):
|
|
|
637
666
|
exception=final_quantum_run.exception,
|
|
638
667
|
)
|
|
639
668
|
)
|
|
669
|
+
return None
|
|
640
670
|
case QuantumInfoStatus.WONKY:
|
|
641
|
-
self.wonky_quanta.append(UnsuccessfulQuantumSummary.
|
|
671
|
+
self.wonky_quanta.append(UnsuccessfulQuantumSummary._from_info(info))
|
|
672
|
+
return None
|
|
642
673
|
case QuantumInfoStatus.BLOCKED:
|
|
643
674
|
self.n_blocked += 1
|
|
675
|
+
return None
|
|
644
676
|
case QuantumInfoStatus.FAILED:
|
|
645
|
-
failed_quantum_summary = UnsuccessfulQuantumSummary.
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
677
|
+
failed_quantum_summary = UnsuccessfulQuantumSummary._from_info(info)
|
|
678
|
+
future: concurrent.futures.Future[None] | None = None
|
|
679
|
+
if log_getter:
|
|
680
|
+
|
|
681
|
+
def callback() -> None:
|
|
682
|
+
for quantum_run in info["runs"].values():
|
|
683
|
+
try:
|
|
684
|
+
log = log_getter(quantum_run.log_ref)
|
|
685
|
+
except LookupError:
|
|
686
|
+
failed_quantum_summary.messages.append(
|
|
687
|
+
f"Logs not ingested for {quantum_run.log_ref!r}"
|
|
688
|
+
)
|
|
689
|
+
except FileNotFoundError:
|
|
690
|
+
failed_quantum_summary.messages.append(
|
|
691
|
+
f"Logs missing or corrupt for {quantum_run.log_ref!r}"
|
|
692
|
+
)
|
|
693
|
+
else:
|
|
694
|
+
failed_quantum_summary.messages.extend(
|
|
695
|
+
[record.message for record in log if record.levelno >= logging.ERROR]
|
|
696
|
+
)
|
|
697
|
+
|
|
698
|
+
future = executor.submit(callback)
|
|
662
699
|
self.failed_quanta.append(failed_quantum_summary)
|
|
700
|
+
return future
|
|
663
701
|
case QuantumInfoStatus.UNKNOWN:
|
|
664
702
|
self.n_unknown += 1
|
|
703
|
+
return None
|
|
665
704
|
case unrecognized_state:
|
|
666
705
|
raise AssertionError(f"Unrecognized quantum status {unrecognized_state!r}")
|
|
667
706
|
|
|
668
|
-
def
|
|
707
|
+
def _add_data_id_group(self, other_summary: TaskSummary) -> None:
|
|
669
708
|
"""Add information from a `TaskSummary` over one dataquery-identified
|
|
670
709
|
group to another, as part of aggregating `Summary` reports.
|
|
671
710
|
|
|
@@ -712,7 +751,7 @@ class CursedDatasetSummary(pydantic.BaseModel):
|
|
|
712
751
|
"""
|
|
713
752
|
|
|
714
753
|
@classmethod
|
|
715
|
-
def
|
|
754
|
+
def _from_info(cls, info: DatasetInfo, producer_info: QuantumInfo) -> CursedDatasetSummary:
|
|
716
755
|
"""Summarize all relevant information from the `DatasetInfo` in an
|
|
717
756
|
`CursedDatasetSummary`; return a `CursedDatasetSummary`.
|
|
718
757
|
|
|
@@ -797,7 +836,7 @@ class DatasetTypeSummary(pydantic.BaseModel):
|
|
|
797
836
|
"""A list of all unsuccessful datasets by their name and data_id.
|
|
798
837
|
"""
|
|
799
838
|
|
|
800
|
-
def
|
|
839
|
+
def _add_dataset_info(self, info: DatasetInfo, producer_info: QuantumInfo) -> None:
|
|
801
840
|
"""Add a `DatasetInfo` to a `DatasetTypeSummary`.
|
|
802
841
|
|
|
803
842
|
Unpack the `DatasetInfo` object, sorting datasets of each status into
|
|
@@ -822,13 +861,13 @@ class DatasetTypeSummary(pydantic.BaseModel):
|
|
|
822
861
|
case DatasetInfoStatus.UNSUCCESSFUL:
|
|
823
862
|
self.unsuccessful_datasets.append(dict(info["data_id"].mapping))
|
|
824
863
|
case DatasetInfoStatus.CURSED:
|
|
825
|
-
self.cursed_datasets.append(CursedDatasetSummary.
|
|
864
|
+
self.cursed_datasets.append(CursedDatasetSummary._from_info(info, producer_info))
|
|
826
865
|
case DatasetInfoStatus.PREDICTED_ONLY:
|
|
827
866
|
self.n_predicted_only += 1
|
|
828
867
|
case unrecognized_state:
|
|
829
868
|
raise AssertionError(f"Unrecognized dataset status {unrecognized_state!r}")
|
|
830
869
|
|
|
831
|
-
def
|
|
870
|
+
def _add_data_id_group(self, other_summary: DatasetTypeSummary) -> None:
|
|
832
871
|
"""Add information from a `DatasetTypeSummary` over one
|
|
833
872
|
dataquery-identified group to another, as part of aggregating `Summary`
|
|
834
873
|
reports.
|
|
@@ -889,10 +928,10 @@ class Summary(pydantic.BaseModel):
|
|
|
889
928
|
for summary in summaries:
|
|
890
929
|
for label, task_summary in summary.tasks.items():
|
|
891
930
|
result_task_summary = result.tasks.setdefault(label, TaskSummary())
|
|
892
|
-
result_task_summary.
|
|
931
|
+
result_task_summary._add_data_id_group(task_summary)
|
|
893
932
|
for dataset_type, dataset_type_summary in summary.datasets.items():
|
|
894
933
|
result_dataset_summary = result.datasets.setdefault(dataset_type, DatasetTypeSummary())
|
|
895
|
-
result_dataset_summary.
|
|
934
|
+
result_dataset_summary._add_data_id_group(dataset_type_summary)
|
|
896
935
|
return result
|
|
897
936
|
|
|
898
937
|
def pprint(self, brief: bool = False, datasets: bool = True) -> None:
|
|
@@ -1126,19 +1165,53 @@ class QuantumProvenanceGraph:
|
|
|
1126
1165
|
"""A set of already-run, merged quantum graphs with provenance
|
|
1127
1166
|
information.
|
|
1128
1167
|
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1168
|
+
Parameters
|
|
1169
|
+
----------
|
|
1170
|
+
butler : `lsst.daf.butler.Butler`
|
|
1171
|
+
The Butler used for this report. This should match the Butler used
|
|
1172
|
+
for the run associated with the executed quantum graph.
|
|
1173
|
+
qgraphs : `~collections.abc.Sequence` [`QuantumGraph` |\
|
|
1174
|
+
`~lsst.utils.resources.ResourcePathExpression`]
|
|
1175
|
+
A list of either quantum graph objects or their uri's, to be used
|
|
1176
|
+
to assemble the `QuantumProvenanceGraph`.
|
|
1177
|
+
collections : `~collections.abc.Sequence` [`str`] | `None`
|
|
1178
|
+
Collections to use in `lsst.daf.butler.query_datasets` when testing
|
|
1179
|
+
which datasets are available at a high level.
|
|
1180
|
+
where : `str`
|
|
1181
|
+
A "where" string to use to constrain the datasets; should be provided
|
|
1182
|
+
if ``collections`` includes many datasets that are not in any graphs,
|
|
1183
|
+
to select just those that might be (e.g. when sharding over dimensions
|
|
1184
|
+
and using a final collection that spans multiple shards).
|
|
1185
|
+
curse_failed_logs : `bool`
|
|
1186
|
+
Mark log datasets as CURSED if they are visible in the final output
|
|
1187
|
+
collection. Note that a campaign-level collection must be used here for
|
|
1188
|
+
`collections` if `curse_failed_logs` is `True`.
|
|
1189
|
+
read_caveats : `str` or `None`, optional
|
|
1190
|
+
Whether to read metadata files to get flags that describe qualified
|
|
1191
|
+
successes. If `None`, no metadata files will be read and all
|
|
1192
|
+
``caveats`` fields will be `None`. If "exhaustive", all metadata files
|
|
1193
|
+
will be read. If "lazy", only metadata files where at least one
|
|
1194
|
+
predicted output is missing will be read.
|
|
1195
|
+
use_qbb : `bool`, optional
|
|
1196
|
+
If `True`, use a quantum-backed butler when reading metadata files.
|
|
1197
|
+
Note that some butler database queries are still run even if this is
|
|
1198
|
+
`True`; this does not avoid database access entirely.
|
|
1199
|
+
n_cores : `int`, optional
|
|
1200
|
+
Number of threads to use for parallelization.
|
|
1201
|
+
"""
|
|
1202
|
+
|
|
1203
|
+
def __init__(
|
|
1204
|
+
self,
|
|
1205
|
+
butler: Butler | None = None,
|
|
1206
|
+
qgraphs: Sequence[QuantumGraph | ResourcePathExpression] = (),
|
|
1207
|
+
*,
|
|
1208
|
+
collections: Sequence[str] | None = None,
|
|
1209
|
+
where: str = "",
|
|
1210
|
+
curse_failed_logs: bool = False,
|
|
1211
|
+
read_caveats: Literal["lazy", "exhaustive"] | None = "lazy",
|
|
1212
|
+
use_qbb: bool = True,
|
|
1213
|
+
n_cores: int = 1,
|
|
1214
|
+
) -> None:
|
|
1142
1215
|
# The graph we annotate as we step through all the graphs associated
|
|
1143
1216
|
# with the processing to create the `QuantumProvenanceGraph`.
|
|
1144
1217
|
self._xgraph = networkx.DiGraph()
|
|
@@ -1150,6 +1223,24 @@ class QuantumProvenanceGraph:
|
|
|
1150
1223
|
# Bool representing whether the graph has been finalized. This is set
|
|
1151
1224
|
# to True when resolve_duplicates completes.
|
|
1152
1225
|
self._finalized: bool = False
|
|
1226
|
+
# In order to both parallelize metadata/log reads and potentially use
|
|
1227
|
+
# QBB to do it, we in general need one butler for each output_run and
|
|
1228
|
+
# thread combination. This dict is keyed by the former, and the
|
|
1229
|
+
# wrapper type used for the value handles the latter.
|
|
1230
|
+
self._butler_wrappers: dict[str, _ThreadLocalButlerWrapper] = {}
|
|
1231
|
+
if butler is not None:
|
|
1232
|
+
self.assemble_quantum_provenance_graph(
|
|
1233
|
+
butler,
|
|
1234
|
+
qgraphs,
|
|
1235
|
+
collections=collections,
|
|
1236
|
+
where=where,
|
|
1237
|
+
curse_failed_logs=curse_failed_logs,
|
|
1238
|
+
read_caveats=read_caveats,
|
|
1239
|
+
use_qbb=use_qbb,
|
|
1240
|
+
n_cores=n_cores,
|
|
1241
|
+
)
|
|
1242
|
+
elif qgraphs:
|
|
1243
|
+
raise TypeError("'butler' must be provided if `qgraphs` is.")
|
|
1153
1244
|
|
|
1154
1245
|
@property
|
|
1155
1246
|
def quanta(self) -> Mapping[str, Set[QuantumKey]]:
|
|
@@ -1195,245 +1286,562 @@ class QuantumProvenanceGraph:
|
|
|
1195
1286
|
"""
|
|
1196
1287
|
return self._xgraph.nodes[key]
|
|
1197
1288
|
|
|
1198
|
-
def
|
|
1289
|
+
def to_summary(
|
|
1290
|
+
self, butler: Butler | None = None, do_store_logs: bool = True, n_cores: int = 1
|
|
1291
|
+
) -> Summary:
|
|
1292
|
+
"""Summarize the `QuantumProvenanceGraph`.
|
|
1293
|
+
|
|
1294
|
+
Parameters
|
|
1295
|
+
----------
|
|
1296
|
+
butler : `lsst.daf.butler.Butler`, optional
|
|
1297
|
+
Ignored; accepted for backwards compatibility.
|
|
1298
|
+
do_store_logs : `bool`
|
|
1299
|
+
Store the logs in the summary dictionary.
|
|
1300
|
+
n_cores : `int`, optional
|
|
1301
|
+
|
|
1302
|
+
Returns
|
|
1303
|
+
-------
|
|
1304
|
+
result : `Summary`
|
|
1305
|
+
A struct containing counts of quanta and datasets in each of
|
|
1306
|
+
the overall states defined in `QuantumInfo` and `DatasetInfo`,
|
|
1307
|
+
as well as diagnostic information and error messages for failed
|
|
1308
|
+
quanta and strange edge cases, and a list of recovered quanta.
|
|
1309
|
+
"""
|
|
1310
|
+
status_log = PeriodicLogger(_LOG)
|
|
1311
|
+
if not self._finalized:
|
|
1312
|
+
raise RuntimeError(
|
|
1313
|
+
"""resolve_duplicates must be called to finalize the
|
|
1314
|
+
QuantumProvenanceGraph before making a summary."""
|
|
1315
|
+
)
|
|
1316
|
+
result = Summary()
|
|
1317
|
+
futures: list[concurrent.futures.Future[None]] = []
|
|
1318
|
+
_LOG.verbose("Summarizing %s tasks.", len(self._quanta.keys()))
|
|
1319
|
+
with concurrent.futures.ThreadPoolExecutor(n_cores) as executor:
|
|
1320
|
+
for m, (task_label, quanta) in enumerate(self._quanta.items()):
|
|
1321
|
+
task_summary = TaskSummary()
|
|
1322
|
+
task_summary.n_expected = len(quanta)
|
|
1323
|
+
for n, quantum_key in enumerate(quanta):
|
|
1324
|
+
quantum_info = self.get_quantum_info(quantum_key)
|
|
1325
|
+
future = task_summary._add_quantum_info(
|
|
1326
|
+
quantum_info,
|
|
1327
|
+
log_getter=self._butler_get if do_store_logs else None,
|
|
1328
|
+
executor=executor,
|
|
1329
|
+
)
|
|
1330
|
+
if future is not None:
|
|
1331
|
+
futures.append(future)
|
|
1332
|
+
status_log.log(
|
|
1333
|
+
"Summarized %s of %s quanta of task %s of %s.",
|
|
1334
|
+
n + 1,
|
|
1335
|
+
len(quanta),
|
|
1336
|
+
m + 1,
|
|
1337
|
+
len(self._quanta.keys()),
|
|
1338
|
+
)
|
|
1339
|
+
result.tasks[task_label] = task_summary
|
|
1340
|
+
for n, future in enumerate(concurrent.futures.as_completed(futures)):
|
|
1341
|
+
if (err := future.exception()) is not None:
|
|
1342
|
+
raise err
|
|
1343
|
+
status_log.log("Loaded messages from %s of %s log datasets.", n + 1, len(futures))
|
|
1344
|
+
_LOG.verbose("Summarizing %s dataset types.", len(self._datasets.keys()))
|
|
1345
|
+
for m, (dataset_type_name, datasets) in enumerate(self._datasets.items()):
|
|
1346
|
+
dataset_type_summary = DatasetTypeSummary(producer="")
|
|
1347
|
+
dataset_type_summary.n_expected = len(datasets)
|
|
1348
|
+
for n, dataset_key in enumerate(datasets):
|
|
1349
|
+
dataset_info = self.get_dataset_info(dataset_key)
|
|
1350
|
+
producer_key = self.get_producer_of(dataset_key)
|
|
1351
|
+
producer_info = self.get_quantum_info(producer_key)
|
|
1352
|
+
# Not ideal, but hard to get out of the graph at the moment.
|
|
1353
|
+
# Change after DM-40441
|
|
1354
|
+
dataset_type_summary.producer = producer_key.task_label
|
|
1355
|
+
dataset_type_summary._add_dataset_info(dataset_info, producer_info)
|
|
1356
|
+
status_log.log(
|
|
1357
|
+
"Summarized %s of %s datasets of type %s of %s.",
|
|
1358
|
+
n + 1,
|
|
1359
|
+
len(datasets),
|
|
1360
|
+
m + 1,
|
|
1361
|
+
len(self._datasets.keys()),
|
|
1362
|
+
)
|
|
1363
|
+
result.datasets[dataset_type_name] = dataset_type_summary
|
|
1364
|
+
return result
|
|
1365
|
+
|
|
1366
|
+
def iter_outputs_of(self, quantum_key: QuantumKey) -> Iterator[DatasetKey]:
|
|
1367
|
+
"""Iterate through the outputs of a quantum, yielding the keys of
|
|
1368
|
+
all of the datasets produced by the quantum.
|
|
1369
|
+
|
|
1370
|
+
Parameters
|
|
1371
|
+
----------
|
|
1372
|
+
quantum_key : `QuantumKey`
|
|
1373
|
+
The key for the quantum whose outputs are needed.
|
|
1374
|
+
"""
|
|
1375
|
+
yield from self._xgraph.successors(quantum_key)
|
|
1376
|
+
|
|
1377
|
+
def get_producer_of(self, dataset_key: DatasetKey) -> QuantumKey:
|
|
1378
|
+
"""Unpack the predecessor (producer quantum) of a given dataset key
|
|
1379
|
+
from a graph.
|
|
1380
|
+
|
|
1381
|
+
Parameters
|
|
1382
|
+
----------
|
|
1383
|
+
dataset_key : `DatasetKey`
|
|
1384
|
+
The key for the dataset whose producer quantum is needed.
|
|
1385
|
+
|
|
1386
|
+
Returns
|
|
1387
|
+
-------
|
|
1388
|
+
result : `QuantumKey`
|
|
1389
|
+
The key for the quantum which produced the dataset.
|
|
1390
|
+
"""
|
|
1391
|
+
(result,) = self._xgraph.predecessors(dataset_key)
|
|
1392
|
+
return result
|
|
1393
|
+
|
|
1394
|
+
def iter_downstream(
|
|
1395
|
+
self, key: QuantumKey | DatasetKey
|
|
1396
|
+
) -> Iterator[tuple[QuantumKey, QuantumInfo] | tuple[DatasetKey, DatasetInfo]]:
|
|
1397
|
+
"""Iterate over the quanta and datasets that are downstream of a
|
|
1398
|
+
quantum or dataset.
|
|
1399
|
+
|
|
1400
|
+
Parameters
|
|
1401
|
+
----------
|
|
1402
|
+
key : `QuantumKey` or `DatasetKey`
|
|
1403
|
+
Starting node.
|
|
1404
|
+
|
|
1405
|
+
Returns
|
|
1406
|
+
-------
|
|
1407
|
+
iter : `~collections.abc.Iterator` [ `tuple` ]
|
|
1408
|
+
An iterator over pairs of (`QuantumKey`, `QuantumInfo`) or
|
|
1409
|
+
(`DatasetKey`, `DatasetInfo`).
|
|
1410
|
+
"""
|
|
1411
|
+
for key in networkx.dag.descendants(self._xgraph, key):
|
|
1412
|
+
yield (key, self._xgraph.nodes[key]) # type: ignore
|
|
1413
|
+
|
|
1414
|
+
def assemble_quantum_provenance_graph(
|
|
1415
|
+
self,
|
|
1416
|
+
butler: Butler,
|
|
1417
|
+
qgraphs: Sequence[QuantumGraph | ResourcePathExpression],
|
|
1418
|
+
collections: Sequence[str] | None = None,
|
|
1419
|
+
where: str = "",
|
|
1420
|
+
curse_failed_logs: bool = False,
|
|
1421
|
+
read_caveats: Literal["lazy", "exhaustive"] | None = "lazy",
|
|
1422
|
+
use_qbb: bool = True,
|
|
1423
|
+
n_cores: int = 1,
|
|
1424
|
+
) -> None:
|
|
1425
|
+
"""Assemble the quantum provenance graph from a list of all graphs
|
|
1426
|
+
corresponding to processing attempts.
|
|
1427
|
+
|
|
1428
|
+
Parameters
|
|
1429
|
+
----------
|
|
1430
|
+
butler : `lsst.daf.butler.Butler`
|
|
1431
|
+
The Butler used for this report. This should match the Butler used
|
|
1432
|
+
for the run associated with the executed quantum graph.
|
|
1433
|
+
qgraphs : `~collections.abc.Sequence` [`QuantumGraph` |\
|
|
1434
|
+
`~lsst.utils.resources.ResourcePathExpression`]
|
|
1435
|
+
A list of either quantum graph objects or their uri's, to be used
|
|
1436
|
+
to assemble the `QuantumProvenanceGraph`.
|
|
1437
|
+
collections : `~collections.abc.Sequence` [`str`] | `None`
|
|
1438
|
+
Collections to use in `lsst.daf.butler.query_datasets` when testing
|
|
1439
|
+
which datasets are available at a high level.
|
|
1440
|
+
where : `str`
|
|
1441
|
+
A "where" string to use to constrain the datasets; should be
|
|
1442
|
+
provided if ``collections`` includes many datasets that are not in
|
|
1443
|
+
any graphs, to select just those that might be (e.g. when sharding
|
|
1444
|
+
over dimensions and using a final collection that spans multiple
|
|
1445
|
+
shards).
|
|
1446
|
+
curse_failed_logs : `bool`
|
|
1447
|
+
Mark log datasets as CURSED if they are visible in the final
|
|
1448
|
+
output collection. Note that a campaign-level collection must be
|
|
1449
|
+
used here for `collections` if `curse_failed_logs` is `True`.
|
|
1450
|
+
read_caveats : `str` or `None`, optional
|
|
1451
|
+
Whether to read metadata files to get flags that describe qualified
|
|
1452
|
+
successes. If `None`, no metadata files will be read and all
|
|
1453
|
+
``caveats`` fields will be `None`. If "exhaustive", all
|
|
1454
|
+
metadata files will be read. If "lazy", only metadata files where
|
|
1455
|
+
at least one predicted output is missing will be read.
|
|
1456
|
+
use_qbb : `bool`, optional
|
|
1457
|
+
If `True`, use a quantum-backed butler when reading metadata files.
|
|
1458
|
+
Note that some butler database queries are still run even if this
|
|
1459
|
+
is `True`; this does not avoid database access entirely.
|
|
1460
|
+
n_cores : `int`, optional
|
|
1461
|
+
Number of threads to use for parallelization.
|
|
1462
|
+
"""
|
|
1463
|
+
if read_caveats not in ("lazy", "exhaustive", None):
|
|
1464
|
+
raise TypeError(
|
|
1465
|
+
f"Invalid option {read_caveats!r} for read_caveats; should be 'lazy', 'exhaustive', or None."
|
|
1466
|
+
)
|
|
1467
|
+
output_runs = []
|
|
1468
|
+
last_time: datetime.datetime | None = None
|
|
1469
|
+
for graph in qgraphs:
|
|
1470
|
+
if not isinstance(graph, QuantumGraph):
|
|
1471
|
+
_LOG.verbose("Loading quantum graph %r.", graph)
|
|
1472
|
+
qgraph = QuantumGraph.loadUri(graph)
|
|
1473
|
+
else:
|
|
1474
|
+
qgraph = graph
|
|
1475
|
+
assert qgraph.metadata is not None, "Saved QGs always have metadata."
|
|
1476
|
+
self._add_new_graph(butler, qgraph, read_caveats=read_caveats, use_qbb=use_qbb, n_cores=n_cores)
|
|
1477
|
+
output_runs.append(qgraph.metadata["output_run"])
|
|
1478
|
+
if last_time is not None and last_time > qgraph.metadata["time"]:
|
|
1479
|
+
raise RuntimeError("Quantum graphs must be passed in chronological order.")
|
|
1480
|
+
last_time = qgraph.metadata["time"]
|
|
1481
|
+
if not collections:
|
|
1482
|
+
# We reverse the order of the associated output runs because the
|
|
1483
|
+
# query in _resolve_duplicates must be done most-recent first.
|
|
1484
|
+
collections = list(reversed(output_runs))
|
|
1485
|
+
assert not curse_failed_logs, (
|
|
1486
|
+
"curse_failed_logs option must be used with one campaign-level collection."
|
|
1487
|
+
)
|
|
1488
|
+
self._resolve_duplicates(butler, collections, where, curse_failed_logs)
|
|
1489
|
+
|
|
1490
|
+
def _add_new_graph(
|
|
1199
1491
|
self,
|
|
1200
1492
|
butler: Butler,
|
|
1201
|
-
qgraph: QuantumGraph
|
|
1493
|
+
qgraph: QuantumGraph,
|
|
1202
1494
|
read_caveats: Literal["lazy", "exhaustive"] | None,
|
|
1495
|
+
use_qbb: bool = True,
|
|
1496
|
+
n_cores: int = 1,
|
|
1203
1497
|
) -> None:
|
|
1204
1498
|
"""Add a new quantum graph to the `QuantumProvenanceGraph`.
|
|
1205
1499
|
|
|
1206
|
-
Notes
|
|
1207
|
-
-----
|
|
1208
|
-
The algorithm: step through the quantum graph. Annotate a
|
|
1209
|
-
`networkx.DiGraph` (`QuantumProvenanceGraph._xgraph`) with all of the
|
|
1210
|
-
relevant information: quanta, dataset types and their associated run
|
|
1211
|
-
collections (these unique quanta- and dataset type-run
|
|
1212
|
-
collection combinations are encapsulated in the classes
|
|
1213
|
-
`DatasetRun` and `QuantumRun`). For each new quantum, annotate
|
|
1214
|
-
the status of the `QuantumRun` by inspecting the graph. If a
|
|
1215
|
-
DatasetType was produced, annotate this in the run by setting
|
|
1216
|
-
`DatasetRun.produced = True`. If a quantum is given BLOCKED
|
|
1217
|
-
or FAILED status, annotate all their successors in the graph
|
|
1218
|
-
as BLOCKED. For each new quantum, use the transition between
|
|
1219
|
-
the current and last `QuantumRun.status` to determine the status
|
|
1220
|
-
to assign to the overall `QuantumInfo`. For example, if a
|
|
1221
|
-
previous run associated with a quantum had the status FAILED,
|
|
1222
|
-
and the status from the new graph reads SUCCESSFUL, we can
|
|
1223
|
-
mark the overall quantum status as SUCCESSFUL and list the data_id
|
|
1224
|
-
as RECOVERED.
|
|
1225
|
-
|
|
1226
1500
|
Parameters
|
|
1227
1501
|
----------
|
|
1228
1502
|
butler : `lsst.daf.butler.Butler`
|
|
1229
1503
|
The Butler used for this report. This should match the Butler
|
|
1230
1504
|
used for the run associated with the executed quantum graph.
|
|
1231
|
-
qgraph : `QuantumGraph`
|
|
1232
|
-
|
|
1233
|
-
location of said quantum graph.
|
|
1505
|
+
qgraph : `QuantumGraph`
|
|
1506
|
+
The quantum graph object to add.
|
|
1234
1507
|
read_caveats : `str` or `None`
|
|
1235
1508
|
Whether to read metadata files to get flags that describe qualified
|
|
1236
1509
|
successes. If `None`, no metadata files will be read and all
|
|
1237
1510
|
``caveats`` fields will be `None`. If "exhaustive", all
|
|
1238
1511
|
metadata files will be read. If "lazy", only metadata files where
|
|
1239
1512
|
at least one predicted output is missing will be read.
|
|
1513
|
+
use_qbb : `bool`, optional
|
|
1514
|
+
If `True`, use a quantum-backed butler when reading metadata files.
|
|
1515
|
+
Note that some butler database queries are still run even if this
|
|
1516
|
+
is `True`; this does not avoid database access entirely.
|
|
1517
|
+
n_cores : `int`, optional
|
|
1518
|
+
Number of threads to use for parallelization.
|
|
1240
1519
|
"""
|
|
1241
|
-
|
|
1242
|
-
if not isinstance(qgraph, QuantumGraph):
|
|
1243
|
-
qgraph = QuantumGraph.loadUri(qgraph)
|
|
1244
|
-
assert qgraph.metadata is not None, "Saved QGs always have metadata."
|
|
1520
|
+
status_log = PeriodicLogger(_LOG)
|
|
1245
1521
|
output_run = qgraph.metadata["output_run"]
|
|
1522
|
+
# Add QuantumRun and DatasetRun (and nodes/edges, as needed) to the
|
|
1523
|
+
# QPG for all quanta in the QG.
|
|
1524
|
+
_LOG.verbose("Adding output run to provenance graph.")
|
|
1246
1525
|
new_quanta: list[QuantumKey] = []
|
|
1247
|
-
for node in qgraph:
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
quantum_info.setdefault("recovered", False)
|
|
1262
|
-
new_quanta.append(quantum_key)
|
|
1263
|
-
self._quanta.setdefault(quantum_key.task_label, set()).add(quantum_key)
|
|
1264
|
-
metadata_ref = node.quantum.outputs[f"{node.taskDef.label}_metadata"][0]
|
|
1265
|
-
log_ref = node.quantum.outputs[f"{node.taskDef.label}_log"][0]
|
|
1266
|
-
# associate run collections with specific quanta. this is important
|
|
1267
|
-
# if the same quanta are processed in multiple runs as in recovery
|
|
1268
|
-
# workflows.
|
|
1269
|
-
quantum_runs = quantum_info.setdefault("runs", {})
|
|
1270
|
-
# the `QuantumRun` here is the specific quantum-run collection
|
|
1271
|
-
# combination.
|
|
1272
|
-
quantum_runs[output_run] = QuantumRun(id=node.nodeId, metadata_ref=metadata_ref, log_ref=log_ref)
|
|
1273
|
-
# For each of the outputs of the quanta (datasets) make a key to
|
|
1274
|
-
# refer to the dataset.
|
|
1275
|
-
for ref in itertools.chain.from_iterable(node.quantum.outputs.values()):
|
|
1276
|
-
dataset_key = DatasetKey(ref.datasetType.name, ref.dataId.required_values)
|
|
1277
|
-
# add datasets to the nodes of the graph, with edges on the
|
|
1278
|
-
# quanta.
|
|
1279
|
-
self._xgraph.add_edge(quantum_key, dataset_key)
|
|
1280
|
-
# use the dataset key to make a `DatasetInfo` object for
|
|
1281
|
-
# the dataset and set defaults for its values.
|
|
1282
|
-
dataset_info = self.get_dataset_info(dataset_key)
|
|
1283
|
-
dataset_info.setdefault("data_id", ref.dataId)
|
|
1284
|
-
dataset_info.setdefault("status", DatasetInfoStatus.PREDICTED_ONLY)
|
|
1285
|
-
dataset_info.setdefault("messages", [])
|
|
1286
|
-
self._datasets.setdefault(dataset_key.dataset_type_name, set()).add(dataset_key)
|
|
1287
|
-
dataset_runs = dataset_info.setdefault("runs", {})
|
|
1288
|
-
# make a `DatasetRun` for the specific dataset-run
|
|
1289
|
-
# collection combination.
|
|
1290
|
-
dataset_runs[output_run] = DatasetRun(id=ref.id)
|
|
1291
|
-
# save metadata and logs for easier status interpretation later
|
|
1292
|
-
if dataset_key.dataset_type_name.endswith("_metadata"):
|
|
1293
|
-
quantum_info["metadata"] = dataset_key
|
|
1294
|
-
quantum_runs[output_run].metadata_ref = ref
|
|
1295
|
-
if dataset_key.dataset_type_name.endswith("_log"):
|
|
1296
|
-
quantum_info["log"] = dataset_key
|
|
1297
|
-
quantum_runs[output_run].log_ref = ref
|
|
1298
|
-
for ref in itertools.chain.from_iterable(node.quantum.inputs.values()):
|
|
1299
|
-
dataset_key = DatasetKey(ref.datasetType.nameAndComponent()[0], ref.dataId.required_values)
|
|
1300
|
-
if dataset_key in self._xgraph:
|
|
1301
|
-
# add another edge if the input datasetType and quantum are
|
|
1302
|
-
# in the graph
|
|
1303
|
-
self._xgraph.add_edge(dataset_key, quantum_key)
|
|
1304
|
-
for dataset_type_name in self._datasets:
|
|
1305
|
-
for ref in butler.registry.queryDatasets(dataset_type_name, collections=output_run):
|
|
1306
|
-
# find the datasets in the butler
|
|
1526
|
+
for n, node in enumerate(qgraph):
|
|
1527
|
+
new_quanta.append(self._add_new_quantum(node, output_run))
|
|
1528
|
+
status_log.log("Added nodes for %s of %s quanta.", n + 1, len(qgraph))
|
|
1529
|
+
# Query for datasets in the output run to see which ones were actually
|
|
1530
|
+
# produced.
|
|
1531
|
+
_LOG.verbose("Querying for existence for %s dataset types.", len(self._datasets.keys()))
|
|
1532
|
+
for m, dataset_type_name in enumerate(self._datasets):
|
|
1533
|
+
try:
|
|
1534
|
+
refs = butler.query_datasets(
|
|
1535
|
+
dataset_type_name, collections=output_run, explain=False, limit=None
|
|
1536
|
+
)
|
|
1537
|
+
except MissingDatasetTypeError:
|
|
1538
|
+
continue
|
|
1539
|
+
for n, ref in enumerate(refs):
|
|
1307
1540
|
dataset_key = DatasetKey(ref.datasetType.name, ref.dataId.required_values)
|
|
1308
1541
|
dataset_info = self.get_dataset_info(dataset_key)
|
|
1309
1542
|
dataset_run = dataset_info["runs"][output_run] # dataset run (singular)
|
|
1310
|
-
# if the dataset is in the output run collection, we produced
|
|
1311
|
-
# it!
|
|
1312
1543
|
dataset_run.produced = True
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
read_caveats
|
|
1337
|
-
and not all(
|
|
1338
|
-
self.get_dataset_info(dataset_key)["runs"][output_run].produced
|
|
1339
|
-
for dataset_key in self._xgraph.successors(quantum_key)
|
|
1340
|
-
)
|
|
1544
|
+
status_log.log(
|
|
1545
|
+
"Updated status for %s of %s datasets of %s of %s types.",
|
|
1546
|
+
n + 1,
|
|
1547
|
+
len(refs),
|
|
1548
|
+
m + 1,
|
|
1549
|
+
len(self._datasets.keys()),
|
|
1550
|
+
)
|
|
1551
|
+
if use_qbb:
|
|
1552
|
+
_LOG.verbose("Using quantum-backed butler for metadata loads.")
|
|
1553
|
+
self._butler_wrappers[output_run] = _ThreadLocalButlerWrapper.wrap_qbb(butler, qgraph)
|
|
1554
|
+
else:
|
|
1555
|
+
_LOG.verbose("Using full butler for metadata loads.")
|
|
1556
|
+
self._butler_wrappers[output_run] = _ThreadLocalButlerWrapper.wrap_full(butler)
|
|
1557
|
+
|
|
1558
|
+
_LOG.verbose("Setting quantum status from dataset existence.")
|
|
1559
|
+
# Update quantum status information based on which datasets were
|
|
1560
|
+
# produced.
|
|
1561
|
+
blocked: set[DatasetKey] = set() # the outputs of failed or blocked quanta in this run.
|
|
1562
|
+
with concurrent.futures.ThreadPoolExecutor(n_cores) as executor:
|
|
1563
|
+
futures: list[concurrent.futures.Future[None]] = []
|
|
1564
|
+
for n, quantum_key in enumerate(new_quanta):
|
|
1565
|
+
if (
|
|
1566
|
+
self._update_run_status(quantum_key, output_run, blocked) == QuantumRunStatus.SUCCESSFUL
|
|
1567
|
+
and read_caveats is not None
|
|
1341
1568
|
):
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1569
|
+
self._update_caveats(quantum_key, output_run, read_caveats, executor, futures)
|
|
1570
|
+
self._update_info_status(quantum_key, output_run)
|
|
1571
|
+
status_log.log("Updated status for %s of %s quanta.", n + 1, len(new_quanta))
|
|
1572
|
+
for n, future in enumerate(concurrent.futures.as_completed(futures)):
|
|
1573
|
+
if (err := future.exception()) is not None:
|
|
1574
|
+
raise err
|
|
1575
|
+
status_log.log("Added exception/caveat information for %s of %s quanta.", n + 1, len(futures))
|
|
1576
|
+
|
|
1577
|
+
def _add_new_quantum(self, node: QuantumNode, output_run: str) -> QuantumKey:
|
|
1578
|
+
"""Add a quantum from a new quantum graph to the provenance graph.
|
|
1579
|
+
|
|
1580
|
+
Parameters
|
|
1581
|
+
----------
|
|
1582
|
+
node : `QuantumNode`
|
|
1583
|
+
Node in the quantum graph.
|
|
1584
|
+
output_run : `str`
|
|
1585
|
+
Output run collection.
|
|
1586
|
+
|
|
1587
|
+
Returns
|
|
1588
|
+
-------
|
|
1589
|
+
quantum_key : `QuantumKey`
|
|
1590
|
+
Key for the new or existing node in the provenance graph.
|
|
1591
|
+
|
|
1592
|
+
Notes
|
|
1593
|
+
-----
|
|
1594
|
+
This method adds new quantum and dataset nodes to the provenance graph
|
|
1595
|
+
if they don't already exist, while adding new `QuantumRun` and
|
|
1596
|
+
`DatasetRun` objects to both new and existing nodes. All status
|
|
1597
|
+
information on those nodes is set to initial, default values that
|
|
1598
|
+
generally reflect quanta that have not been attempted to be run.
|
|
1599
|
+
"""
|
|
1600
|
+
# make a key to refer to the quantum and add it to the quantum
|
|
1601
|
+
# provenance graph.
|
|
1602
|
+
quantum_key = QuantumKey(
|
|
1603
|
+
node.taskDef.label, cast(DataCoordinate, node.quantum.dataId).required_values
|
|
1604
|
+
)
|
|
1605
|
+
self._xgraph.add_node(quantum_key)
|
|
1606
|
+
# use the key to get a `QuantumInfo` object for the quantum
|
|
1607
|
+
# and set defaults for its values.
|
|
1608
|
+
quantum_info = self.get_quantum_info(quantum_key)
|
|
1609
|
+
quantum_info.setdefault("messages", [])
|
|
1610
|
+
quantum_info.setdefault("runs", {})
|
|
1611
|
+
quantum_info.setdefault("data_id", cast(DataCoordinate, node.quantum.dataId))
|
|
1612
|
+
quantum_info.setdefault("status", QuantumInfoStatus.UNKNOWN)
|
|
1613
|
+
quantum_info.setdefault("recovered", False)
|
|
1614
|
+
self._quanta.setdefault(quantum_key.task_label, set()).add(quantum_key)
|
|
1615
|
+
metadata_ref = node.quantum.outputs[f"{node.taskDef.label}_metadata"][0]
|
|
1616
|
+
log_ref = node.quantum.outputs[f"{node.taskDef.label}_log"][0]
|
|
1617
|
+
# associate run collections with specific quanta. this is important
|
|
1618
|
+
# if the same quanta are processed in multiple runs as in recovery
|
|
1619
|
+
# workflows.
|
|
1620
|
+
quantum_runs = quantum_info.setdefault("runs", {})
|
|
1621
|
+
# the `QuantumRun` here is the specific quantum-run collection
|
|
1622
|
+
# combination.
|
|
1623
|
+
quantum_runs[output_run] = QuantumRun(id=node.nodeId, metadata_ref=metadata_ref, log_ref=log_ref)
|
|
1624
|
+
# For each of the outputs of the quanta (datasets) make a key to
|
|
1625
|
+
# refer to the dataset.
|
|
1626
|
+
for ref in itertools.chain.from_iterable(node.quantum.outputs.values()):
|
|
1627
|
+
dataset_key = DatasetKey(ref.datasetType.name, ref.dataId.required_values)
|
|
1628
|
+
# add datasets to the nodes of the graph, with edges on the
|
|
1629
|
+
# quanta.
|
|
1630
|
+
self._xgraph.add_edge(quantum_key, dataset_key)
|
|
1631
|
+
# use the dataset key to make a `DatasetInfo` object for
|
|
1632
|
+
# the dataset and set defaults for its values.
|
|
1633
|
+
dataset_info = self.get_dataset_info(dataset_key)
|
|
1634
|
+
dataset_info.setdefault("data_id", ref.dataId)
|
|
1635
|
+
dataset_info.setdefault("status", DatasetInfoStatus.PREDICTED_ONLY)
|
|
1636
|
+
dataset_info.setdefault("messages", [])
|
|
1637
|
+
self._datasets.setdefault(dataset_key.dataset_type_name, set()).add(dataset_key)
|
|
1638
|
+
dataset_runs = dataset_info.setdefault("runs", {})
|
|
1639
|
+
# make a `DatasetRun` for the specific dataset-run
|
|
1640
|
+
# collection combination.
|
|
1641
|
+
dataset_runs[output_run] = DatasetRun(id=ref.id)
|
|
1642
|
+
# save metadata and logs for easier status interpretation later
|
|
1643
|
+
if dataset_key.dataset_type_name.endswith("_metadata"):
|
|
1644
|
+
quantum_info["metadata"] = dataset_key
|
|
1645
|
+
quantum_runs[output_run].metadata_ref = ref
|
|
1646
|
+
if dataset_key.dataset_type_name.endswith("_log"):
|
|
1647
|
+
quantum_info["log"] = dataset_key
|
|
1648
|
+
quantum_runs[output_run].log_ref = ref
|
|
1649
|
+
for ref in itertools.chain.from_iterable(node.quantum.inputs.values()):
|
|
1650
|
+
dataset_key = DatasetKey(ref.datasetType.nameAndComponent()[0], ref.dataId.required_values)
|
|
1651
|
+
if dataset_key in self._xgraph:
|
|
1652
|
+
# add another edge if the input datasetType and quantum are
|
|
1653
|
+
# in the graph
|
|
1654
|
+
self._xgraph.add_edge(dataset_key, quantum_key)
|
|
1655
|
+
return quantum_key
|
|
1656
|
+
|
|
1657
|
+
def _update_run_status(
|
|
1658
|
+
self, quantum_key: QuantumKey, output_run: str, blocked: set[DatasetKey]
|
|
1659
|
+
) -> QuantumRunStatus:
|
|
1660
|
+
"""Update the status of this quantum in its own output run, using
|
|
1661
|
+
information in the graph about which of its output datasets exist.
|
|
1662
|
+
|
|
1663
|
+
Parameters
|
|
1664
|
+
----------
|
|
1665
|
+
quantum_key : `QuantumKey`
|
|
1666
|
+
Key for the node in the provenance graph.
|
|
1667
|
+
output_run : `str`
|
|
1668
|
+
Output run collection.
|
|
1669
|
+
blocked : `set` [ `DatasetKey` ]
|
|
1670
|
+
A set of output datasets (for all quanta, not just this one) that
|
|
1671
|
+
were blocked by failures. Will be modified in place.
|
|
1672
|
+
|
|
1673
|
+
Returns
|
|
1674
|
+
-------
|
|
1675
|
+
run_status : `QuantumRunStatus`
|
|
1676
|
+
Run-specific status for this quantum.
|
|
1677
|
+
"""
|
|
1678
|
+
quantum_info = self.get_quantum_info(quantum_key)
|
|
1679
|
+
quantum_run = quantum_info["runs"][output_run]
|
|
1680
|
+
metadata_key = quantum_info["metadata"]
|
|
1681
|
+
log_key = quantum_info["log"]
|
|
1682
|
+
metadata_dataset_run = self.get_dataset_info(metadata_key)["runs"][output_run]
|
|
1683
|
+
log_dataset_run = self.get_dataset_info(log_key)["runs"][output_run]
|
|
1684
|
+
# if we do have metadata, we know that the task finished.
|
|
1685
|
+
if metadata_dataset_run.produced:
|
|
1686
|
+
# if we also have logs, this is a success.
|
|
1687
|
+
if log_dataset_run.produced:
|
|
1688
|
+
quantum_run.status = QuantumRunStatus.SUCCESSFUL
|
|
1357
1689
|
else:
|
|
1358
|
-
# if we have
|
|
1359
|
-
#
|
|
1360
|
-
#
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1690
|
+
# if we have metadata and no logs, this is a very rare
|
|
1691
|
+
# case. either the task ran successfully and the datastore
|
|
1692
|
+
# died immediately afterwards, or some supporting
|
|
1693
|
+
# infrastructure for transferring the logs to the datastore
|
|
1694
|
+
# failed.
|
|
1695
|
+
quantum_run.status = QuantumRunStatus.LOGS_MISSING
|
|
1696
|
+
|
|
1697
|
+
# missing metadata means that the task did not finish.
|
|
1698
|
+
else:
|
|
1699
|
+
# if we have logs and no metadata, the task not finishing is
|
|
1700
|
+
# a failure in the task itself. This includes all payload
|
|
1701
|
+
# errors and some other problems.
|
|
1702
|
+
if log_dataset_run.produced:
|
|
1703
|
+
quantum_run.status = QuantumRunStatus.FAILED
|
|
1704
|
+
# if a quantum fails, all its successor datasets are
|
|
1705
|
+
# blocked.
|
|
1706
|
+
blocked.update(self._xgraph.successors(quantum_key))
|
|
1707
|
+
# if we are missing metadata and logs, either the task was not
|
|
1708
|
+
# started, or a hard external environmental error prevented
|
|
1709
|
+
# it from writing logs or metadata.
|
|
1710
|
+
else:
|
|
1711
|
+
# if none of this quantum's inputs were blocked, the
|
|
1712
|
+
# metadata must just be missing.
|
|
1713
|
+
if blocked.isdisjoint(self._xgraph.predecessors(quantum_key)):
|
|
1714
|
+
# None of this quantum's inputs were blocked.
|
|
1715
|
+
quantum_run.status = QuantumRunStatus.METADATA_MISSING
|
|
1716
|
+
# otherwise we can assume from no metadata and no logs
|
|
1717
|
+
# that the task was blocked by an upstream failure.
|
|
1369
1718
|
else:
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
quantum_info["
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
#
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1719
|
+
quantum_run.status = QuantumRunStatus.BLOCKED
|
|
1720
|
+
blocked.update(self._xgraph.successors(quantum_key))
|
|
1721
|
+
return quantum_run.status
|
|
1722
|
+
|
|
1723
|
+
def _update_info_status(self, quantum_key: QuantumKey, output_run: str) -> QuantumInfoStatus:
|
|
1724
|
+
"""Update the status of this quantum across all runs with the status
|
|
1725
|
+
for its latest run.
|
|
1726
|
+
|
|
1727
|
+
Parameters
|
|
1728
|
+
----------
|
|
1729
|
+
quantum_key : `QuantumKey`
|
|
1730
|
+
Key for the node in the provenance graph.
|
|
1731
|
+
output_run : `str`
|
|
1732
|
+
Output run collection.
|
|
1733
|
+
|
|
1734
|
+
Returns
|
|
1735
|
+
-------
|
|
1736
|
+
info_status : `QuantumRunStatus`
|
|
1737
|
+
Run-specific status for this quantum.
|
|
1738
|
+
"""
|
|
1739
|
+
# Now we can start using state transitions to mark overall status.
|
|
1740
|
+
quantum_info = self.get_quantum_info(quantum_key)
|
|
1741
|
+
quantum_run = quantum_info["runs"][output_run]
|
|
1742
|
+
last_status = quantum_info["status"]
|
|
1743
|
+
new_status: QuantumInfoStatus
|
|
1744
|
+
match last_status, quantum_run.status:
|
|
1745
|
+
# A quantum can never escape a WONKY state.
|
|
1746
|
+
case (QuantumInfoStatus.WONKY, _):
|
|
1747
|
+
new_status = QuantumInfoStatus.WONKY
|
|
1748
|
+
# Any transition to a success (excluding from WONKY) is
|
|
1749
|
+
# a success; any transition from a failed state is also a
|
|
1750
|
+
# recovery.
|
|
1751
|
+
case (_, QuantumRunStatus.SUCCESSFUL):
|
|
1752
|
+
new_status = QuantumInfoStatus.SUCCESSFUL
|
|
1753
|
+
if last_status != QuantumInfoStatus.SUCCESSFUL and last_status != QuantumInfoStatus.UNKNOWN:
|
|
1754
|
+
quantum_info["recovered"] = True
|
|
1755
|
+
# Missing logs are one of the categories of wonky quanta. They
|
|
1756
|
+
# interfere with our ability to discern quantum status and are
|
|
1757
|
+
# signs of weird things afoot in processing. Add a message
|
|
1758
|
+
# noting why this quantum is being marked as wonky to be stored
|
|
1759
|
+
# in its `UnsuccessfulQuantumInfo`.
|
|
1760
|
+
case (_, QuantumRunStatus.LOGS_MISSING):
|
|
1761
|
+
new_status = QuantumInfoStatus.WONKY
|
|
1762
|
+
quantum_info["messages"].append(f"Logs missing for run {output_run!r}.")
|
|
1763
|
+
# Leaving a successful state is another category of wonky
|
|
1764
|
+
# quanta. If a previous success fails on a subsequent run,
|
|
1765
|
+
# a human should inspect why. Add a message noting why this
|
|
1766
|
+
# quantum is being marked as wonky to be stored in its
|
|
1767
|
+
# `UnsuccessfulQuantumInfo`.
|
|
1768
|
+
case (QuantumInfoStatus.SUCCESSFUL, _):
|
|
1769
|
+
new_status = QuantumInfoStatus.WONKY
|
|
1770
|
+
quantum_info["messages"].append(
|
|
1771
|
+
f"Status went from successful in run {list(quantum_info['runs'].values())[-1]!r} "
|
|
1772
|
+
f"to {quantum_run.status!r} in {output_run!r}."
|
|
1773
|
+
)
|
|
1774
|
+
# If a quantum status is unknown and it moves to blocked, we
|
|
1775
|
+
# know for sure that it is a blocked quantum.
|
|
1776
|
+
case (QuantumInfoStatus.UNKNOWN, QuantumRunStatus.BLOCKED):
|
|
1777
|
+
new_status = QuantumInfoStatus.BLOCKED
|
|
1778
|
+
# A transition into blocked does not change the overall quantum
|
|
1779
|
+
# status for a failure.
|
|
1780
|
+
case (_, QuantumRunStatus.BLOCKED):
|
|
1781
|
+
new_status = last_status
|
|
1782
|
+
# If a quantum transitions from any state into missing
|
|
1783
|
+
# metadata, we don't have enough information to diagnose its
|
|
1784
|
+
# state.
|
|
1785
|
+
case (_, QuantumRunStatus.METADATA_MISSING):
|
|
1786
|
+
new_status = QuantumInfoStatus.UNKNOWN
|
|
1787
|
+
# Any transition into failure is a failed state.
|
|
1788
|
+
case (_, QuantumRunStatus.FAILED):
|
|
1789
|
+
new_status = QuantumInfoStatus.FAILED
|
|
1790
|
+
# Update `QuantumInfo.status` for this quantum.
|
|
1791
|
+
quantum_info["status"] = new_status
|
|
1792
|
+
return new_status
|
|
1793
|
+
|
|
1794
|
+
def _update_caveats(
|
|
1795
|
+
self,
|
|
1796
|
+
quantum_key: QuantumKey,
|
|
1797
|
+
output_run: str,
|
|
1798
|
+
read_caveats: Literal["lazy", "exhaustive"],
|
|
1799
|
+
executor: concurrent.futures.Executor,
|
|
1800
|
+
futures: list[concurrent.futures.Future[None]],
|
|
1801
|
+
) -> None:
|
|
1802
|
+
"""Read quantum success caveats and exception information from task
|
|
1803
|
+
metadata.
|
|
1804
|
+
|
|
1805
|
+
Parameters
|
|
1806
|
+
----------
|
|
1807
|
+
quantum_key : `QuantumKey`
|
|
1808
|
+
Key for the node in the provenance graph.
|
|
1809
|
+
output_run : `str`
|
|
1810
|
+
Output run collection.
|
|
1811
|
+
read_caveats : `str`
|
|
1812
|
+
Whether to read metadata files to get flags that describe qualified
|
|
1813
|
+
successes. If "exhaustive", all metadata files will be read. If
|
|
1814
|
+
"lazy", only metadata files where at least one predicted output is
|
|
1815
|
+
missing will be read.
|
|
1816
|
+
butler : `lsst.daf.butler.Butler`
|
|
1817
|
+
The Butler used for this report. This should match the Butler
|
|
1818
|
+
used for the run associated with the executed quantum graph.
|
|
1819
|
+
"""
|
|
1820
|
+
if read_caveats == "lazy" and all(
|
|
1821
|
+
self.get_dataset_info(dataset_key)["runs"][output_run].produced
|
|
1822
|
+
for dataset_key in self._xgraph.successors(quantum_key)
|
|
1823
|
+
):
|
|
1824
|
+
return
|
|
1825
|
+
quantum_info = self.get_quantum_info(quantum_key)
|
|
1826
|
+
quantum_run = quantum_info["runs"][output_run]
|
|
1827
|
+
|
|
1828
|
+
def read_metadata() -> None:
|
|
1829
|
+
md = self._butler_get(quantum_run.metadata_ref, storageClass="TaskMetadata")
|
|
1830
|
+
try:
|
|
1831
|
+
# Int conversion guards against spurious conversion to
|
|
1832
|
+
# float that can apparently sometimes happen in
|
|
1833
|
+
# TaskMetadata.
|
|
1834
|
+
quantum_run.caveats = QuantumSuccessCaveats(int(md["quantum"]["caveats"]))
|
|
1835
|
+
except LookupError:
|
|
1836
|
+
pass
|
|
1837
|
+
try:
|
|
1838
|
+
quantum_run.exception = ExceptionInfo._from_metadata(md[quantum_key.task_label]["failure"])
|
|
1839
|
+
except LookupError:
|
|
1840
|
+
pass
|
|
1841
|
+
|
|
1842
|
+
futures.append(executor.submit(read_metadata))
|
|
1843
|
+
|
|
1844
|
+
def _resolve_duplicates(
|
|
1437
1845
|
self,
|
|
1438
1846
|
butler: Butler,
|
|
1439
1847
|
collections: Sequence[str] | None = None,
|
|
@@ -1450,7 +1858,7 @@ class QuantumProvenanceGraph:
|
|
|
1450
1858
|
dataset, mark the producer quantum as WONKY.
|
|
1451
1859
|
|
|
1452
1860
|
This method should be called after
|
|
1453
|
-
`QuantumProvenanceGraph.
|
|
1861
|
+
`QuantumProvenanceGraph._add_new_graph` has been called on every graph
|
|
1454
1862
|
associated with the data processing.
|
|
1455
1863
|
|
|
1456
1864
|
Parameters
|
|
@@ -1458,19 +1866,20 @@ class QuantumProvenanceGraph:
|
|
|
1458
1866
|
butler : `lsst.daf.butler.Butler`
|
|
1459
1867
|
The Butler used for this report. This should match the Butler used
|
|
1460
1868
|
for the run associated with the executed quantum graph.
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
paring down the query would be useful.
|
|
1465
|
-
|
|
1869
|
+
collections : `~collections.abc.Sequence` [`str`] | `None`
|
|
1870
|
+
Collections to use in `lsst.daf.butler.query_datasets` when testing
|
|
1871
|
+
which datasets are available at a high level.
|
|
1466
1872
|
where : `str`
|
|
1467
|
-
A "where" string to use to constrain the
|
|
1468
|
-
|
|
1873
|
+
A "where" string to use to constrain the datasets; should be
|
|
1874
|
+
provided if ``collections`` includes many datasets that are not in
|
|
1875
|
+
any graphs, to select just those that might be (e.g. when sharding
|
|
1876
|
+
over dimensions and using a final collection that spans multiple
|
|
1877
|
+
shards).
|
|
1469
1878
|
curse_failed_logs : `bool`
|
|
1470
1879
|
Mark log datasets as CURSED if they are visible in the final
|
|
1471
1880
|
output collection. Note that a campaign-level collection must be
|
|
1472
1881
|
used here for `collections` if `curse_failed_logs` is `True`; if
|
|
1473
|
-
`
|
|
1882
|
+
`_resolve_duplicates` is run on a list of group-level collections
|
|
1474
1883
|
then each will only show log datasets from their own failures as
|
|
1475
1884
|
visible and datasets from others will be marked as cursed.
|
|
1476
1885
|
"""
|
|
@@ -1483,14 +1892,17 @@ class QuantumProvenanceGraph:
|
|
|
1483
1892
|
been added, or make a new graph with all constituent
|
|
1484
1893
|
attempts."""
|
|
1485
1894
|
)
|
|
1486
|
-
|
|
1895
|
+
status_log = PeriodicLogger(_LOG)
|
|
1896
|
+
_LOG.verbose("Querying for dataset visibility.")
|
|
1897
|
+
for m, dataset_type_name in enumerate(self._datasets):
|
|
1487
1898
|
# find datasets in a larger collection.
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1899
|
+
try:
|
|
1900
|
+
refs = butler.query_datasets(
|
|
1901
|
+
dataset_type_name, collections=collections, where=where, limit=None, explain=False
|
|
1902
|
+
)
|
|
1903
|
+
except MissingDatasetTypeError:
|
|
1904
|
+
continue
|
|
1905
|
+
for n, ref in enumerate(refs):
|
|
1494
1906
|
dataset_key = DatasetKey(ref.datasetType.name, ref.dataId.required_values)
|
|
1495
1907
|
try:
|
|
1496
1908
|
dataset_info = self.get_dataset_info(dataset_key)
|
|
@@ -1500,9 +1912,16 @@ class QuantumProvenanceGraph:
|
|
|
1500
1912
|
continue
|
|
1501
1913
|
# queryable datasets are `visible`.
|
|
1502
1914
|
dataset_info["runs"][ref.run].visible = True
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1915
|
+
status_log.log(
|
|
1916
|
+
"Updated visibility for %s of %s datasets of type %s of %s.",
|
|
1917
|
+
n + 1,
|
|
1918
|
+
len(refs),
|
|
1919
|
+
m + 1,
|
|
1920
|
+
len(self._datasets.keys()),
|
|
1921
|
+
)
|
|
1922
|
+
_LOG.verbose("Updating task status from dataset visibility.")
|
|
1923
|
+
for m, task_quanta in enumerate(self._quanta.values()):
|
|
1924
|
+
for n, quantum_key in enumerate(task_quanta):
|
|
1506
1925
|
# runs associated with visible datasets.
|
|
1507
1926
|
visible_runs: set[str] = set()
|
|
1508
1927
|
quantum_info = self.get_quantum_info(quantum_key)
|
|
@@ -1570,168 +1989,118 @@ class QuantumProvenanceGraph:
|
|
|
1570
1989
|
+ f"from {str(dataset_info['runs'])};"
|
|
1571
1990
|
+ f"{str(dataset_info['status'])}"
|
|
1572
1991
|
)
|
|
1992
|
+
status_log.log(
|
|
1993
|
+
"Updated task status from visibility for %s of %s quanta of task %s of %s.",
|
|
1994
|
+
n + 1,
|
|
1995
|
+
len(task_quanta),
|
|
1996
|
+
m + 1,
|
|
1997
|
+
len(self._quanta.keys()),
|
|
1998
|
+
)
|
|
1573
1999
|
# If we make it all the way through resolve_duplicates, set
|
|
1574
2000
|
# self._finalized = True so that it cannot be run again.
|
|
1575
2001
|
self._finalized = True
|
|
1576
2002
|
|
|
1577
|
-
def
|
|
1578
|
-
self,
|
|
1579
|
-
butler: Butler,
|
|
1580
|
-
qgraphs: Sequence[QuantumGraph | ResourcePathExpression],
|
|
1581
|
-
collections: Sequence[str] | None = None,
|
|
1582
|
-
where: str = "",
|
|
1583
|
-
curse_failed_logs: bool = False,
|
|
1584
|
-
read_caveats: Literal["lazy", "exhaustive"] | None = "exhaustive",
|
|
1585
|
-
) -> None:
|
|
1586
|
-
"""Assemble the quantum provenance graph from a list of all graphs
|
|
1587
|
-
corresponding to processing attempts.
|
|
2003
|
+
def _butler_get(self, ref: DatasetRef, **kwargs: Any) -> Any:
|
|
2004
|
+
return self._butler_wrappers[ref.run].butler.get(ref, **kwargs)
|
|
1588
2005
|
|
|
1589
|
-
This method calls the private method `__add_new_graph` on each of the
|
|
1590
|
-
constituent graphs, verifying that the graphs have been passed in
|
|
1591
|
-
order. After `__add_new_graph` has been called on all graphs in the
|
|
1592
|
-
`Sequence`, the method calls `__resolve_duplicates`.
|
|
1593
2006
|
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
butler : `lsst.daf.butler.Butler`
|
|
1597
|
-
The Butler used for this report. This should match the Butler used
|
|
1598
|
-
for the run associated with the executed quantum graph.
|
|
1599
|
-
qgraphs : `Sequence` [`QuantumGraph` | `ResourcePathExpression`]
|
|
1600
|
-
A list of either quantum graph objects or their uri's, to be used
|
|
1601
|
-
to assemble the `QuantumProvenanceGraph`.
|
|
1602
|
-
collections : `Sequence` [`str`] | `None`
|
|
1603
|
-
Collections to use in `lsst.daf.butler.registry.queryDatasets` if
|
|
1604
|
-
paring down the query would be useful.
|
|
1605
|
-
where : `str`
|
|
1606
|
-
A "where" string to use to constrain the collections, if passed.
|
|
1607
|
-
curse_failed_logs : `bool`
|
|
1608
|
-
Mark log datasets as CURSED if they are visible in the final
|
|
1609
|
-
output collection. Note that a campaign-level collection must be
|
|
1610
|
-
used here for `collections` if `curse_failed_logs` is `True`; if
|
|
1611
|
-
`__resolve_duplicates` is run on a list of group-level collections
|
|
1612
|
-
then each will only show log datasets from their own failures as
|
|
1613
|
-
visible and datasets from others will be marked as cursed.
|
|
1614
|
-
read_caveats : `str` or `None`, optional
|
|
1615
|
-
Whether to read metadata files to get flags that describe qualified
|
|
1616
|
-
successes. If `None`, no metadata files will be read and all
|
|
1617
|
-
``caveats`` fields will be `None`. If "exhaustive", all
|
|
1618
|
-
metadata files will be read. If "lazy", only metadata files where
|
|
1619
|
-
at least one predicted output is missing will be read.
|
|
1620
|
-
"""
|
|
1621
|
-
if read_caveats not in ("lazy", "exhaustive", None):
|
|
1622
|
-
raise TypeError(
|
|
1623
|
-
f"Invalid option {read_caveats!r} for read_caveats; should be 'lazy', 'exhaustive', or None."
|
|
1624
|
-
)
|
|
1625
|
-
output_runs = []
|
|
1626
|
-
for graph in qgraphs:
|
|
1627
|
-
qgraph = graph if isinstance(graph, QuantumGraph) else QuantumGraph.loadUri(graph)
|
|
1628
|
-
assert qgraph.metadata is not None, "Saved QGs always have metadata."
|
|
1629
|
-
self.__add_new_graph(butler, qgraph, read_caveats=read_caveats)
|
|
1630
|
-
output_runs.append(qgraph.metadata["output_run"])
|
|
1631
|
-
# If the user has not passed a `collections` variable
|
|
1632
|
-
if not collections:
|
|
1633
|
-
# We reverse the order of the associated output runs because the
|
|
1634
|
-
# query in __resolve_duplicates must be done most recent-first.
|
|
1635
|
-
collections = list(reversed(output_runs))
|
|
1636
|
-
assert not curse_failed_logs, (
|
|
1637
|
-
"curse_failed_logs option must be used with one campaign-level collection."
|
|
1638
|
-
)
|
|
1639
|
-
self.__resolve_duplicates(butler, collections, where, curse_failed_logs)
|
|
1640
|
-
|
|
1641
|
-
def to_summary(self, butler: Butler, do_store_logs: bool = True) -> Summary:
|
|
1642
|
-
"""Summarize the `QuantumProvenanceGraph`.
|
|
1643
|
-
|
|
1644
|
-
Parameters
|
|
1645
|
-
----------
|
|
1646
|
-
butler : `lsst.daf.butler.Butler`
|
|
1647
|
-
The Butler used for this report.
|
|
1648
|
-
do_store_logs : `bool`
|
|
1649
|
-
Store the logs in the summary dictionary.
|
|
1650
|
-
|
|
1651
|
-
Returns
|
|
1652
|
-
-------
|
|
1653
|
-
result : `Summary`
|
|
1654
|
-
A struct containing counts of quanta and datasets in each of
|
|
1655
|
-
the overall states defined in `QuantumInfo` and `DatasetInfo`,
|
|
1656
|
-
as well as diagnostic information and error messages for failed
|
|
1657
|
-
quanta and strange edge cases, and a list of recovered quanta.
|
|
1658
|
-
"""
|
|
1659
|
-
if not self._finalized:
|
|
1660
|
-
raise RuntimeError(
|
|
1661
|
-
"""resolve_duplicates must be called to finalize the
|
|
1662
|
-
QuantumProvenanceGraph before making a summary."""
|
|
1663
|
-
)
|
|
1664
|
-
result = Summary()
|
|
1665
|
-
for task_label, quanta in self._quanta.items():
|
|
1666
|
-
task_summary = TaskSummary()
|
|
1667
|
-
task_summary.n_expected = len(quanta)
|
|
1668
|
-
for quantum_key in quanta:
|
|
1669
|
-
quantum_info = self.get_quantum_info(quantum_key)
|
|
1670
|
-
task_summary.add_quantum_info(quantum_info, butler, do_store_logs)
|
|
1671
|
-
result.tasks[task_label] = task_summary
|
|
1672
|
-
|
|
1673
|
-
for dataset_type_name, datasets in self._datasets.items():
|
|
1674
|
-
dataset_type_summary = DatasetTypeSummary(producer="")
|
|
1675
|
-
dataset_type_summary.n_expected = len(datasets)
|
|
1676
|
-
for dataset_key in datasets:
|
|
1677
|
-
dataset_info = self.get_dataset_info(dataset_key)
|
|
1678
|
-
producer_key = self.get_producer_of(dataset_key)
|
|
1679
|
-
producer_info = self.get_quantum_info(producer_key)
|
|
1680
|
-
# Not ideal, but hard to get out of the graph at the moment.
|
|
1681
|
-
# Change after DM-40441
|
|
1682
|
-
dataset_type_summary.producer = producer_key.task_label
|
|
1683
|
-
dataset_type_summary.add_dataset_info(dataset_info, producer_info)
|
|
2007
|
+
class _ThreadLocalButlerWrapper:
|
|
2008
|
+
"""A wrapper for a thread-local limited butler.
|
|
1684
2009
|
|
|
1685
|
-
|
|
1686
|
-
|
|
2010
|
+
Parameter
|
|
2011
|
+
---------
|
|
2012
|
+
factory : `~collections.abc.Callable`
|
|
2013
|
+
A callable that takes no arguments and returns a limited butler.
|
|
2014
|
+
"""
|
|
1687
2015
|
|
|
1688
|
-
def
|
|
1689
|
-
|
|
1690
|
-
|
|
2016
|
+
def __init__(self, factory: Callable[[], LimitedButler]):
|
|
2017
|
+
self._factory = factory
|
|
2018
|
+
self._thread_local = threading.local()
|
|
1691
2019
|
|
|
1692
|
-
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
"""
|
|
1697
|
-
yield from self._xgraph.successors(quantum_key)
|
|
1698
|
-
|
|
1699
|
-
def get_producer_of(self, dataset_key: DatasetKey) -> QuantumKey:
|
|
1700
|
-
"""Unpack the predecessor (producer quantum) of a given dataset key
|
|
1701
|
-
from a graph.
|
|
2020
|
+
@classmethod
|
|
2021
|
+
def wrap_qbb(cls, full_butler: Butler, qg: QuantumGraph) -> _ThreadLocalButlerWrapper:
|
|
2022
|
+
"""Wrap a `~lsst.daf.butler.QuantumBackedButler` suitable for reading
|
|
2023
|
+
log and metadata files.
|
|
1702
2024
|
|
|
1703
2025
|
Parameters
|
|
1704
2026
|
----------
|
|
1705
|
-
|
|
1706
|
-
|
|
2027
|
+
full_butler : `~lsst.daf.butler.Butler`
|
|
2028
|
+
Full butler to draw datastore and dimension configuration from.
|
|
2029
|
+
qg : `QuantumGraph`
|
|
2030
|
+
Quantum graph,
|
|
1707
2031
|
|
|
1708
2032
|
Returns
|
|
1709
2033
|
-------
|
|
1710
|
-
|
|
1711
|
-
|
|
2034
|
+
wrapper : `_ThreadLocalButlerWrapper`
|
|
2035
|
+
A wrapper that provides access to a thread-local QBB, constructing]
|
|
2036
|
+
it on first use.
|
|
1712
2037
|
"""
|
|
1713
|
-
|
|
1714
|
-
|
|
2038
|
+
dataset_ids = []
|
|
2039
|
+
for task_label in qg.pipeline_graph.tasks.keys():
|
|
2040
|
+
for quantum in qg.get_task_quanta(task_label).values():
|
|
2041
|
+
dataset_ids.append(quantum.outputs[LOG_OUTPUT_TEMPLATE.format(label=task_label)][0].id)
|
|
2042
|
+
dataset_ids.append(quantum.outputs[METADATA_OUTPUT_TEMPLATE.format(label=task_label)][0].id)
|
|
2043
|
+
try:
|
|
2044
|
+
butler_config = full_butler._config # type: ignore[attr-defined]
|
|
2045
|
+
except AttributeError:
|
|
2046
|
+
raise RuntimeError("use_qbb=True requires a direct butler.") from None
|
|
2047
|
+
factory = _QuantumBackedButlerFactory(
|
|
2048
|
+
butler_config,
|
|
2049
|
+
dataset_ids,
|
|
2050
|
+
full_butler.dimensions,
|
|
2051
|
+
dataset_types={dt.name: dt for dt in qg.registryDatasetTypes()},
|
|
2052
|
+
)
|
|
2053
|
+
return cls(factory)
|
|
1715
2054
|
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
"""Iterate over the quanta and datasets that are downstream of a
|
|
1720
|
-
quantum or dataset.
|
|
2055
|
+
@classmethod
|
|
2056
|
+
def wrap_full(cls, full_butler: Butler) -> _ThreadLocalButlerWrapper:
|
|
2057
|
+
"""Wrap a full `~lsst.daf.butler.Butler`.
|
|
1721
2058
|
|
|
1722
2059
|
Parameters
|
|
1723
2060
|
----------
|
|
1724
|
-
|
|
1725
|
-
|
|
2061
|
+
full_butler : `~lsst.daf.butler.Butler`
|
|
2062
|
+
Full butler to clone when making thread-local copies.
|
|
1726
2063
|
|
|
1727
2064
|
Returns
|
|
1728
2065
|
-------
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
2066
|
+
wrapper : `_ThreadLocalButlerWrapper`
|
|
2067
|
+
A wrapper that provides access to a thread-local butler,
|
|
2068
|
+
constructing it on first use.
|
|
1732
2069
|
"""
|
|
1733
|
-
|
|
1734
|
-
|
|
2070
|
+
return cls(full_butler.clone)
|
|
2071
|
+
|
|
2072
|
+
@property
|
|
2073
|
+
def butler(self) -> LimitedButler:
|
|
2074
|
+
"""The wrapped butler, constructed on first use within each thread."""
|
|
2075
|
+
if (butler := getattr(self._thread_local, "butler", None)) is None:
|
|
2076
|
+
self._thread_local.butler = self._factory()
|
|
2077
|
+
butler = self._thread_local.butler
|
|
2078
|
+
return butler
|
|
2079
|
+
|
|
2080
|
+
|
|
2081
|
+
@dataclasses.dataclass
|
|
2082
|
+
class _QuantumBackedButlerFactory:
|
|
2083
|
+
"""A factory for `~lsst.daf.butler.QuantumBackedButler`, for use by
|
|
2084
|
+
`_ThreadLocalButlerWrapper`.
|
|
2085
|
+
"""
|
|
2086
|
+
|
|
2087
|
+
config: ButlerConfig
|
|
2088
|
+
dataset_ids: list[DatasetId]
|
|
2089
|
+
universe: DimensionUniverse
|
|
2090
|
+
dataset_types: dict[str, DatasetType]
|
|
2091
|
+
|
|
2092
|
+
def __call__(self) -> QuantumBackedButler:
|
|
2093
|
+
return QuantumBackedButler.from_predicted(
|
|
2094
|
+
self.config,
|
|
2095
|
+
predicted_inputs=self.dataset_ids,
|
|
2096
|
+
predicted_outputs=[],
|
|
2097
|
+
dimensions=self.universe,
|
|
2098
|
+
# We don't need the datastore records in the QG because we're
|
|
2099
|
+
# only going to read metadata and logs, and those are never
|
|
2100
|
+
# overall inputs.
|
|
2101
|
+
datastore_records={},
|
|
2102
|
+
dataset_types=self.dataset_types,
|
|
2103
|
+
)
|
|
1735
2104
|
|
|
1736
2105
|
|
|
1737
2106
|
def _cli() -> None:
|