lsst-pipe-base 30.2026.400__py3-none-any.whl → 30.2026.500__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_instrument.py +17 -5
- lsst/pipe/base/_quantumContext.py +3 -3
- lsst/pipe/base/_status.py +14 -0
- lsst/pipe/base/automatic_connection_constants.py +11 -0
- lsst/pipe/base/cli/cmd/__init__.py +2 -0
- lsst/pipe/base/cli/cmd/commands.py +108 -1
- lsst/pipe/base/graph/graph.py +9 -8
- lsst/pipe/base/log_capture.py +1 -1
- lsst/pipe/base/pipeline.py +2 -2
- lsst/pipe/base/pipelineIR.py +1 -1
- lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
- lsst/pipe/base/pipeline_graph/_edges.py +11 -11
- lsst/pipe/base/pipeline_graph/_pipeline_graph.py +2 -2
- lsst/pipe/base/pipeline_graph/visualization/_dot.py +13 -12
- lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +7 -0
- lsst/pipe/base/prerequisite_helpers.py +2 -1
- lsst/pipe/base/quantum_graph/_multiblock.py +29 -13
- lsst/pipe/base/quantum_graph/_provenance.py +411 -19
- lsst/pipe/base/quantum_graph/aggregator/__init__.py +0 -1
- lsst/pipe/base/quantum_graph/formatter.py +4 -4
- lsst/pipe/base/quantum_graph/ingest_graph.py +61 -4
- lsst/pipe/base/quantum_graph_builder.py +1 -8
- lsst/pipe/base/quantum_graph_skeleton.py +29 -27
- lsst/pipe/base/quantum_provenance_graph.py +12 -10
- lsst/pipe/base/separable_pipeline_executor.py +1 -1
- lsst/pipe/base/single_quantum_executor.py +1 -1
- lsst/pipe/base/tests/mocks/_storage_class.py +2 -1
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/METADATA +1 -1
- {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/RECORD +38 -38
- {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/WHEEL +1 -1
- {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/zip-safe +0 -0
|
@@ -38,13 +38,14 @@ __all__ = (
|
|
|
38
38
|
"ProvenanceQuantumGraphWriter",
|
|
39
39
|
"ProvenanceQuantumInfo",
|
|
40
40
|
"ProvenanceQuantumModel",
|
|
41
|
+
"ProvenanceQuantumReport",
|
|
41
42
|
"ProvenanceQuantumScanData",
|
|
42
43
|
"ProvenanceQuantumScanModels",
|
|
43
44
|
"ProvenanceQuantumScanStatus",
|
|
45
|
+
"ProvenanceReport",
|
|
44
46
|
"ProvenanceTaskMetadataModel",
|
|
45
47
|
)
|
|
46
48
|
|
|
47
|
-
|
|
48
49
|
import dataclasses
|
|
49
50
|
import enum
|
|
50
51
|
import itertools
|
|
@@ -60,9 +61,9 @@ import networkx
|
|
|
60
61
|
import numpy as np
|
|
61
62
|
import pydantic
|
|
62
63
|
|
|
63
|
-
from lsst.daf.butler import DataCoordinate
|
|
64
|
+
from lsst.daf.butler import Butler, DataCoordinate
|
|
64
65
|
from lsst.daf.butler.logging import ButlerLogRecord, ButlerLogRecords
|
|
65
|
-
from lsst.resources import ResourcePathExpression
|
|
66
|
+
from lsst.resources import ResourcePath, ResourcePathExpression
|
|
66
67
|
from lsst.utils.iteration import ensure_iterable
|
|
67
68
|
from lsst.utils.logging import LsstLogAdapter, getLogger
|
|
68
69
|
from lsst.utils.packages import Packages
|
|
@@ -559,6 +560,131 @@ class ProvenanceTaskMetadataModel(pydantic.BaseModel):
|
|
|
559
560
|
return super().model_validate_strings(*args, **kwargs)
|
|
560
561
|
|
|
561
562
|
|
|
563
|
+
class ProvenanceQuantumReport(pydantic.BaseModel):
|
|
564
|
+
"""A Pydantic model that used to report information about a single
|
|
565
|
+
(generally problematic) quantum.
|
|
566
|
+
"""
|
|
567
|
+
|
|
568
|
+
quantum_id: uuid.UUID
|
|
569
|
+
data_id: dict[str, int | str]
|
|
570
|
+
attempts: list[ProvenanceQuantumAttemptModel]
|
|
571
|
+
|
|
572
|
+
@classmethod
|
|
573
|
+
def from_info(cls, quantum_id: uuid.UUID, quantum_info: ProvenanceQuantumInfo) -> ProvenanceQuantumReport:
|
|
574
|
+
"""Construct from a provenance quantum graph node.
|
|
575
|
+
|
|
576
|
+
Parameters
|
|
577
|
+
----------
|
|
578
|
+
quantum_id : `uuid.UUID`
|
|
579
|
+
Unique ID for the quantum.
|
|
580
|
+
quantum_info : `ProvenanceQuantumInfo`
|
|
581
|
+
Node attributes for this quantum.
|
|
582
|
+
"""
|
|
583
|
+
return cls(
|
|
584
|
+
quantum_id=quantum_id,
|
|
585
|
+
data_id=dict(quantum_info["data_id"].mapping),
|
|
586
|
+
attempts=quantum_info["attempts"],
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
590
|
+
# when we inherit those docstrings in our public classes.
|
|
591
|
+
if "sphinx" in sys.modules and not TYPE_CHECKING:
|
|
592
|
+
|
|
593
|
+
def copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
594
|
+
"""See `pydantic.BaseModel.copy`."""
|
|
595
|
+
return super().copy(*args, **kwargs)
|
|
596
|
+
|
|
597
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> Any:
|
|
598
|
+
"""See `pydantic.BaseModel.model_dump`."""
|
|
599
|
+
return super().model_dump(*args, **kwargs)
|
|
600
|
+
|
|
601
|
+
def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
|
|
602
|
+
"""See `pydantic.BaseModel.model_dump_json`."""
|
|
603
|
+
return super().model_dump(*args, **kwargs)
|
|
604
|
+
|
|
605
|
+
def model_copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
606
|
+
"""See `pydantic.BaseModel.model_copy`."""
|
|
607
|
+
return super().model_copy(*args, **kwargs)
|
|
608
|
+
|
|
609
|
+
@classmethod
|
|
610
|
+
def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
|
|
611
|
+
"""See `pydantic.BaseModel.model_construct`."""
|
|
612
|
+
return super().model_construct(*args, **kwargs)
|
|
613
|
+
|
|
614
|
+
@classmethod
|
|
615
|
+
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
616
|
+
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
617
|
+
return super().model_json_schema(*args, **kwargs)
|
|
618
|
+
|
|
619
|
+
@classmethod
|
|
620
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
621
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
622
|
+
return super().model_validate(*args, **kwargs)
|
|
623
|
+
|
|
624
|
+
@classmethod
|
|
625
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
626
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
627
|
+
return super().model_validate_json(*args, **kwargs)
|
|
628
|
+
|
|
629
|
+
@classmethod
|
|
630
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
631
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
632
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
class ProvenanceReport(pydantic.RootModel):
|
|
636
|
+
"""A Pydantic model that groups quantum information by task label, then
|
|
637
|
+
status (as a string), and then exception type.
|
|
638
|
+
"""
|
|
639
|
+
|
|
640
|
+
root: dict[TaskLabel, dict[str, dict[str | None, list[ProvenanceQuantumReport]]]] = {}
|
|
641
|
+
|
|
642
|
+
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
643
|
+
# when we inherit those docstrings in our public classes.
|
|
644
|
+
if "sphinx" in sys.modules and not TYPE_CHECKING:
|
|
645
|
+
|
|
646
|
+
def copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
647
|
+
"""See `pydantic.BaseModel.copy`."""
|
|
648
|
+
return super().copy(*args, **kwargs)
|
|
649
|
+
|
|
650
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> Any:
|
|
651
|
+
"""See `pydantic.BaseModel.model_dump`."""
|
|
652
|
+
return super().model_dump(*args, **kwargs)
|
|
653
|
+
|
|
654
|
+
def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
|
|
655
|
+
"""See `pydantic.BaseModel.model_dump_json`."""
|
|
656
|
+
return super().model_dump(*args, **kwargs)
|
|
657
|
+
|
|
658
|
+
def model_copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
659
|
+
"""See `pydantic.BaseModel.model_copy`."""
|
|
660
|
+
return super().model_copy(*args, **kwargs)
|
|
661
|
+
|
|
662
|
+
@classmethod
|
|
663
|
+
def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
|
|
664
|
+
"""See `pydantic.BaseModel.model_construct`."""
|
|
665
|
+
return super().model_construct(*args, **kwargs)
|
|
666
|
+
|
|
667
|
+
@classmethod
|
|
668
|
+
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
669
|
+
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
670
|
+
return super().model_json_schema(*args, **kwargs)
|
|
671
|
+
|
|
672
|
+
@classmethod
|
|
673
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
674
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
675
|
+
return super().model_validate(*args, **kwargs)
|
|
676
|
+
|
|
677
|
+
@classmethod
|
|
678
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
679
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
680
|
+
return super().model_validate_json(*args, **kwargs)
|
|
681
|
+
|
|
682
|
+
@classmethod
|
|
683
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
684
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
685
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
686
|
+
|
|
687
|
+
|
|
562
688
|
class ProvenanceQuantumModel(pydantic.BaseModel):
|
|
563
689
|
"""Data model for the quanta in a provenance quantum graph file."""
|
|
564
690
|
|
|
@@ -1005,6 +1131,83 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
|
|
|
1005
1131
|
dataset_type_name: {} for dataset_type_name in self.pipeline_graph.dataset_types.keys()
|
|
1006
1132
|
}
|
|
1007
1133
|
|
|
1134
|
+
@classmethod
|
|
1135
|
+
@contextmanager
|
|
1136
|
+
def from_args(
|
|
1137
|
+
cls,
|
|
1138
|
+
repo_or_filename: str,
|
|
1139
|
+
/,
|
|
1140
|
+
collection: str | None = None,
|
|
1141
|
+
*,
|
|
1142
|
+
quanta: Iterable[uuid.UUID] | None = None,
|
|
1143
|
+
datasets: Iterable[uuid.UUID] | None = None,
|
|
1144
|
+
writeable: bool = False,
|
|
1145
|
+
) -> Iterator[tuple[ProvenanceQuantumGraph, Butler | None]]:
|
|
1146
|
+
"""Construct a `ProvenanceQuantumGraph` fron CLI-friendly arguments for
|
|
1147
|
+
a file or butler-ingested graph dataset.
|
|
1148
|
+
|
|
1149
|
+
Parameters
|
|
1150
|
+
----------
|
|
1151
|
+
repo_or_filename : `str`
|
|
1152
|
+
Either a provenance quantum graph filename or a butler repository
|
|
1153
|
+
path or alias.
|
|
1154
|
+
collections : `~collections.abc.Iterable` [ `str` ], optional
|
|
1155
|
+
Collections to search; presence indicates that the first argument
|
|
1156
|
+
is a butler repository, not a filename.
|
|
1157
|
+
quanta : `~collections.abc.Iterable` [ `str` ] or `None`, optional
|
|
1158
|
+
IDs of the quanta to load, or `None` to load all.
|
|
1159
|
+
datasets : `~collections.abc.Iterable` [ `str` ], optional
|
|
1160
|
+
IDs of the datasets to load, or `None` to load all.
|
|
1161
|
+
writeable : `bool`, optional
|
|
1162
|
+
Whether the butler should be constructed with write support.
|
|
1163
|
+
|
|
1164
|
+
Returns
|
|
1165
|
+
-------
|
|
1166
|
+
context : `contextlib.AbstractContextManager`
|
|
1167
|
+
A context manager that yields a tuple of
|
|
1168
|
+
|
|
1169
|
+
- the `ProvenanceQuantumGraph`
|
|
1170
|
+
- the `Butler` constructed (or `None`)
|
|
1171
|
+
|
|
1172
|
+
when entered.
|
|
1173
|
+
"""
|
|
1174
|
+
exit_stack = ExitStack()
|
|
1175
|
+
if collection is not None:
|
|
1176
|
+
try:
|
|
1177
|
+
butler = exit_stack.enter_context(
|
|
1178
|
+
Butler.from_config(repo_or_filename, collections=[collection], writeable=writeable)
|
|
1179
|
+
)
|
|
1180
|
+
except Exception as err:
|
|
1181
|
+
err.add_note(
|
|
1182
|
+
f"Expected {repo_or_filename!r} to be a butler repository path or alias because a "
|
|
1183
|
+
f"collection ({collection}) was provided."
|
|
1184
|
+
)
|
|
1185
|
+
raise
|
|
1186
|
+
with exit_stack:
|
|
1187
|
+
graph = butler.get(
|
|
1188
|
+
acc.PROVENANCE_DATASET_TYPE_NAME, parameters={"quanta": quanta, "datasets": datasets}
|
|
1189
|
+
)
|
|
1190
|
+
yield graph, butler
|
|
1191
|
+
else:
|
|
1192
|
+
try:
|
|
1193
|
+
reader = exit_stack.enter_context(ProvenanceQuantumGraphReader.open(repo_or_filename))
|
|
1194
|
+
except Exception as err:
|
|
1195
|
+
err.add_note(
|
|
1196
|
+
f"Expected a {repo_or_filename} to be a provenance quantum graph filename "
|
|
1197
|
+
f"because no collection was provided."
|
|
1198
|
+
)
|
|
1199
|
+
raise
|
|
1200
|
+
with exit_stack:
|
|
1201
|
+
if quanta is None:
|
|
1202
|
+
reader.read_quanta()
|
|
1203
|
+
elif not quanta:
|
|
1204
|
+
reader.read_quanta(quanta)
|
|
1205
|
+
if datasets is None:
|
|
1206
|
+
reader.read_datasets()
|
|
1207
|
+
elif not datasets:
|
|
1208
|
+
reader.read_datasets(datasets)
|
|
1209
|
+
yield reader.graph, None
|
|
1210
|
+
|
|
1008
1211
|
@property
|
|
1009
1212
|
def init_quanta(self) -> Mapping[TaskLabel, uuid.UUID]:
|
|
1010
1213
|
"""A mapping from task label to the ID of the special init quantum for
|
|
@@ -1101,10 +1304,16 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
|
|
|
1101
1304
|
"""
|
|
1102
1305
|
return self._bipartite_xgraph.copy(as_view=True)
|
|
1103
1306
|
|
|
1104
|
-
def make_quantum_table(self) -> astropy.table.Table:
|
|
1307
|
+
def make_quantum_table(self, drop_unused_columns: bool = True) -> astropy.table.Table:
|
|
1105
1308
|
"""Construct an `astropy.table.Table` with a tabular summary of the
|
|
1106
1309
|
quanta.
|
|
1107
1310
|
|
|
1311
|
+
Parameters
|
|
1312
|
+
----------
|
|
1313
|
+
drop_unused_columns : `bool`, optional
|
|
1314
|
+
Whether to drop columns for rare states that did not actually
|
|
1315
|
+
occur in this run.
|
|
1316
|
+
|
|
1108
1317
|
Returns
|
|
1109
1318
|
-------
|
|
1110
1319
|
table : `astropy.table.Table`
|
|
@@ -1140,19 +1349,25 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
|
|
|
1140
1349
|
caveats = f"{code.concise()}({count})" # type: ignore[union-attr]
|
|
1141
1350
|
else:
|
|
1142
1351
|
caveats = ""
|
|
1143
|
-
|
|
1352
|
+
row: dict[str, Any] = {
|
|
1353
|
+
"Task": task_label,
|
|
1354
|
+
"Caveats": caveats,
|
|
1355
|
+
}
|
|
1356
|
+
for status in QuantumAttemptStatus:
|
|
1357
|
+
row[status.title] = status_counts.get(status, 0)
|
|
1358
|
+
row.update(
|
|
1144
1359
|
{
|
|
1145
|
-
"Task": task_label,
|
|
1146
|
-
"Unknown": status_counts.get(QuantumAttemptStatus.UNKNOWN, 0),
|
|
1147
|
-
"Successful": status_counts.get(QuantumAttemptStatus.SUCCESSFUL, 0),
|
|
1148
|
-
"Caveats": caveats,
|
|
1149
|
-
"Blocked": status_counts.get(QuantumAttemptStatus.BLOCKED, 0),
|
|
1150
|
-
"Failed": status_counts.get(QuantumAttemptStatus.FAILED, 0),
|
|
1151
1360
|
"TOTAL": len(quanta_for_task),
|
|
1152
1361
|
"EXPECTED": self.header.n_task_quanta[task_label],
|
|
1153
1362
|
}
|
|
1154
1363
|
)
|
|
1155
|
-
|
|
1364
|
+
rows.append(row)
|
|
1365
|
+
table = astropy.table.Table(rows)
|
|
1366
|
+
if drop_unused_columns:
|
|
1367
|
+
for status in QuantumAttemptStatus:
|
|
1368
|
+
if status.is_rare and not table[status.title].any():
|
|
1369
|
+
del table[status.title]
|
|
1370
|
+
return table
|
|
1156
1371
|
|
|
1157
1372
|
def make_exception_table(self) -> astropy.table.Table:
|
|
1158
1373
|
"""Construct an `astropy.table.Table` with counts for each exception
|
|
@@ -1165,13 +1380,25 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
|
|
|
1165
1380
|
"""
|
|
1166
1381
|
rows = []
|
|
1167
1382
|
for task_label, quanta_for_task in self.quanta_by_task.items():
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1383
|
+
success_counts = Counter[str]()
|
|
1384
|
+
failed_counts = Counter[str]()
|
|
1385
|
+
for quantum_id in quanta_for_task.values():
|
|
1386
|
+
quantum_info: ProvenanceQuantumInfo = self._quantum_only_xgraph.nodes[quantum_id]
|
|
1387
|
+
exc_info = quantum_info["exception"]
|
|
1388
|
+
if exc_info is not None:
|
|
1389
|
+
if quantum_info["status"] is QuantumAttemptStatus.SUCCESSFUL:
|
|
1390
|
+
success_counts[exc_info.type_name] += 1
|
|
1391
|
+
else:
|
|
1392
|
+
failed_counts[exc_info.type_name] += 1
|
|
1393
|
+
for type_name in sorted(success_counts.keys() | failed_counts.keys()):
|
|
1394
|
+
rows.append(
|
|
1395
|
+
{
|
|
1396
|
+
"Task": task_label,
|
|
1397
|
+
"Exception": type_name,
|
|
1398
|
+
"Successes": success_counts.get(type_name, 0),
|
|
1399
|
+
"Failures": failed_counts.get(type_name, 0),
|
|
1400
|
+
}
|
|
1401
|
+
)
|
|
1175
1402
|
return astropy.table.Table(rows)
|
|
1176
1403
|
|
|
1177
1404
|
def make_task_resource_usage_table(
|
|
@@ -1214,6 +1441,171 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
|
|
|
1214
1441
|
array = np.array(rows, dtype=row_dtype)
|
|
1215
1442
|
return astropy.table.Table(array, units=QuantumResourceUsage.get_units())
|
|
1216
1443
|
|
|
1444
|
+
def make_status_report(
|
|
1445
|
+
self,
|
|
1446
|
+
states: Iterable[QuantumAttemptStatus] = (
|
|
1447
|
+
QuantumAttemptStatus.FAILED,
|
|
1448
|
+
QuantumAttemptStatus.ABORTED,
|
|
1449
|
+
QuantumAttemptStatus.ABORTED_SUCCESS,
|
|
1450
|
+
),
|
|
1451
|
+
*,
|
|
1452
|
+
also: QuantumAttemptStatus | Iterable[QuantumAttemptStatus] = (),
|
|
1453
|
+
with_caveats: QuantumSuccessCaveats | None = QuantumSuccessCaveats.PARTIAL_OUTPUTS_ERROR,
|
|
1454
|
+
data_id_table_dir: ResourcePathExpression | None = None,
|
|
1455
|
+
) -> ProvenanceReport:
|
|
1456
|
+
"""Make a JSON- or YAML-friendly report of all quanta with the given
|
|
1457
|
+
states.
|
|
1458
|
+
|
|
1459
|
+
Parameters
|
|
1460
|
+
----------
|
|
1461
|
+
states : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
|
|
1462
|
+
`..QuantumAttemptStatus`, optional
|
|
1463
|
+
A quantum is included if it has any of these states. Defaults to
|
|
1464
|
+
states that clearly represent problems.
|
|
1465
|
+
also : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
|
|
1466
|
+
`..QuantumAttemptStatus`, optional
|
|
1467
|
+
Additional states to consider; unioned with ``states``. This is
|
|
1468
|
+
provided so users can easily request additional states while also
|
|
1469
|
+
getting the defaults.
|
|
1470
|
+
with_caveats : `..QuantumSuccessCaveats` or `None`, optional
|
|
1471
|
+
If `..QuantumAttemptStatus.SUCCESSFUL` is in ``states``, only
|
|
1472
|
+
include quanta with these caveat flags. May be set to `None`
|
|
1473
|
+
to report on all successful quanta.
|
|
1474
|
+
data_id_table_dir : convertible to `~lsst.resources.ResourcePath`, \
|
|
1475
|
+
optional
|
|
1476
|
+
If provided, a directory to write data ID tables (in ECSV format)
|
|
1477
|
+
with all of the data IDs with the given states, for use with the
|
|
1478
|
+
``--data-id-tables`` argument to the quantum graph builder.
|
|
1479
|
+
Subdirectories for each task and status will created within this
|
|
1480
|
+
directory, with one file for each exception type (or ``UNKNOWN``
|
|
1481
|
+
when there is no exception).
|
|
1482
|
+
|
|
1483
|
+
Returns
|
|
1484
|
+
-------
|
|
1485
|
+
report : `ProvenanceModel`
|
|
1486
|
+
A Pydantic model that groups quanta by task label and exception
|
|
1487
|
+
type.
|
|
1488
|
+
"""
|
|
1489
|
+
states = set(ensure_iterable(states))
|
|
1490
|
+
states.update(ensure_iterable(also))
|
|
1491
|
+
result = ProvenanceReport(root={})
|
|
1492
|
+
if data_id_table_dir is not None:
|
|
1493
|
+
data_id_table_dir = ResourcePath(data_id_table_dir)
|
|
1494
|
+
for task_label, quanta_for_task in self.quanta_by_task.items():
|
|
1495
|
+
reports_for_task: dict[str, dict[str | None, list[ProvenanceQuantumReport]]] = {}
|
|
1496
|
+
table_rows_for_task: dict[str, dict[str | None, list[tuple[int | str, ...]]]] = {}
|
|
1497
|
+
for quantum_id in quanta_for_task.values():
|
|
1498
|
+
quantum_info: ProvenanceQuantumInfo = self._quantum_only_xgraph.nodes[quantum_id]
|
|
1499
|
+
quantum_status = quantum_info["status"]
|
|
1500
|
+
if quantum_status not in states:
|
|
1501
|
+
continue
|
|
1502
|
+
if (
|
|
1503
|
+
quantum_status is QuantumAttemptStatus.SUCCESSFUL
|
|
1504
|
+
and with_caveats is not None
|
|
1505
|
+
and (quantum_info["caveats"] is None or not (quantum_info["caveats"] & with_caveats))
|
|
1506
|
+
):
|
|
1507
|
+
continue
|
|
1508
|
+
key1 = quantum_status.name
|
|
1509
|
+
exc_info = quantum_info["exception"]
|
|
1510
|
+
key2 = exc_info.type_name if exc_info is not None else None
|
|
1511
|
+
reports_for_task.setdefault(key1, {}).setdefault(key2, []).append(
|
|
1512
|
+
ProvenanceQuantumReport.from_info(quantum_id, quantum_info)
|
|
1513
|
+
)
|
|
1514
|
+
if data_id_table_dir:
|
|
1515
|
+
table_rows_for_task.setdefault(key1, {}).setdefault(key2, []).append(
|
|
1516
|
+
quantum_info["data_id"].required_values
|
|
1517
|
+
)
|
|
1518
|
+
if reports_for_task:
|
|
1519
|
+
result.root[task_label] = reports_for_task
|
|
1520
|
+
if table_rows_for_task:
|
|
1521
|
+
assert data_id_table_dir is not None, "table_rows_for_task should be empty"
|
|
1522
|
+
for status_name, table_rows_for_status in table_rows_for_task.items():
|
|
1523
|
+
dir_for_task_and_status = data_id_table_dir.join(task_label, forceDirectory=True).join(
|
|
1524
|
+
status_name, forceDirectory=True
|
|
1525
|
+
)
|
|
1526
|
+
if dir_for_task_and_status.isLocal:
|
|
1527
|
+
dir_for_task_and_status.mkdir()
|
|
1528
|
+
for exc_name, data_id_rows in table_rows_for_status.items():
|
|
1529
|
+
table = astropy.table.Table(
|
|
1530
|
+
rows=data_id_rows,
|
|
1531
|
+
names=list(self.pipeline_graph.tasks[task_label].dimensions.required),
|
|
1532
|
+
)
|
|
1533
|
+
filename = f"{exc_name}.ecsv" if exc_name is not None else "UNKNOWN.ecsv"
|
|
1534
|
+
with dir_for_task_and_status.join(filename).open("w") as stream:
|
|
1535
|
+
table.write(stream, format="ecsv")
|
|
1536
|
+
return result
|
|
1537
|
+
|
|
1538
|
+
def make_many_reports(
|
|
1539
|
+
self,
|
|
1540
|
+
states: Iterable[QuantumAttemptStatus] = (
|
|
1541
|
+
QuantumAttemptStatus.FAILED,
|
|
1542
|
+
QuantumAttemptStatus.ABORTED,
|
|
1543
|
+
QuantumAttemptStatus.ABORTED_SUCCESS,
|
|
1544
|
+
),
|
|
1545
|
+
*,
|
|
1546
|
+
status_report_file: ResourcePathExpression | None = None,
|
|
1547
|
+
print_quantum_table: bool = False,
|
|
1548
|
+
print_exception_table: bool = False,
|
|
1549
|
+
also: QuantumAttemptStatus | Iterable[QuantumAttemptStatus] = (),
|
|
1550
|
+
with_caveats: QuantumSuccessCaveats | None = None,
|
|
1551
|
+
data_id_table_dir: ResourcePathExpression | None = None,
|
|
1552
|
+
) -> None:
|
|
1553
|
+
"""Write multiple reports.
|
|
1554
|
+
|
|
1555
|
+
Parameters
|
|
1556
|
+
----------
|
|
1557
|
+
states : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
|
|
1558
|
+
`..QuantumAttemptStatus`, optional
|
|
1559
|
+
A quantum is included in the status report and data ID tables if it
|
|
1560
|
+
has any of these states. Defaults to states that clearly represent
|
|
1561
|
+
problems.
|
|
1562
|
+
status_report_file : convertible to `~lsst.resources.ResourcePath`,
|
|
1563
|
+
optional
|
|
1564
|
+
Filename for the JSON status report (see `make_status_report`).
|
|
1565
|
+
print_quantum_table : `bool`, optional
|
|
1566
|
+
If `True`, print a quantum summary table (counts only) to STDOUT.
|
|
1567
|
+
print_exception_table : `bool`, optional
|
|
1568
|
+
If `True`, print an exception-type summary table (counts only) to
|
|
1569
|
+
STDOUT.
|
|
1570
|
+
also : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
|
|
1571
|
+
`..QuantumAttemptStatus`, optional
|
|
1572
|
+
Additional states to consider in the status report and data ID
|
|
1573
|
+
tables; unioned with ``states``. This is provided so users can
|
|
1574
|
+
easily request additional states while also getting the defaults.
|
|
1575
|
+
with_caveats : `..QuantumSuccessCaveats` or `None`, optional
|
|
1576
|
+
Only include quanta with these caveat flags in the status report
|
|
1577
|
+
and data ID tables. May be set to `None` to report on all
|
|
1578
|
+
successful quanta (an empty sequence reports on only quanta with no
|
|
1579
|
+
caveats). If provided, `QuantumAttemptStatus.SUCCESSFUL` is
|
|
1580
|
+
automatically included in ``states``.
|
|
1581
|
+
data_id_table_dir : convertible to `~lsst.resources.ResourcePath`, \
|
|
1582
|
+
optional
|
|
1583
|
+
If provided, a directory to write data ID tables (in ECSV format)
|
|
1584
|
+
with all of the data IDs with the given states, for use with the
|
|
1585
|
+
``--data-id-tables`` argument to the quantum graph builder.
|
|
1586
|
+
Subdirectories for each task and status will created within this
|
|
1587
|
+
directory, with one file for each exception type (or ``UNKNOWN``
|
|
1588
|
+
when there is no exception).
|
|
1589
|
+
"""
|
|
1590
|
+
if status_report_file is not None or data_id_table_dir is not None:
|
|
1591
|
+
status_report = self.make_status_report(
|
|
1592
|
+
states, also=also, with_caveats=with_caveats, data_id_table_dir=data_id_table_dir
|
|
1593
|
+
)
|
|
1594
|
+
if status_report_file is not None:
|
|
1595
|
+
status_report_file = ResourcePath(status_report_file)
|
|
1596
|
+
if status_report_file.isLocal:
|
|
1597
|
+
status_report_file.dirname().mkdir()
|
|
1598
|
+
with ResourcePath(status_report_file).open("w") as stream:
|
|
1599
|
+
stream.write(status_report.model_dump_json(indent=2))
|
|
1600
|
+
if print_quantum_table:
|
|
1601
|
+
quantum_table = self.make_quantum_table()
|
|
1602
|
+
quantum_table.pprint_all()
|
|
1603
|
+
print("")
|
|
1604
|
+
if print_exception_table:
|
|
1605
|
+
exception_table = self.make_exception_table()
|
|
1606
|
+
exception_table.pprint_all()
|
|
1607
|
+
print("")
|
|
1608
|
+
|
|
1217
1609
|
|
|
1218
1610
|
@dataclasses.dataclass
|
|
1219
1611
|
class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
|
|
@@ -60,13 +60,13 @@ class _ProvenanceFormatterParameters(pydantic.BaseModel):
|
|
|
60
60
|
|
|
61
61
|
@pydantic.field_validator("quanta", mode="before")
|
|
62
62
|
@classmethod
|
|
63
|
-
def quanta_to_list(cls, v: Any) -> list[uuid.UUID]:
|
|
64
|
-
return list(v)
|
|
63
|
+
def quanta_to_list(cls, v: Any) -> list[uuid.UUID] | None:
|
|
64
|
+
return list(v) if v is not None else None
|
|
65
65
|
|
|
66
66
|
@pydantic.field_validator("datasets", mode="before")
|
|
67
67
|
@classmethod
|
|
68
|
-
def datasets_to_list(cls, v: Any) -> list[uuid.UUID]:
|
|
69
|
-
return list(v)
|
|
68
|
+
def datasets_to_list(cls, v: Any) -> list[uuid.UUID] | None:
|
|
69
|
+
return list(v) if v is not None else None
|
|
70
70
|
|
|
71
71
|
@property
|
|
72
72
|
def nodes(self) -> list[uuid.UUID]:
|
|
@@ -62,6 +62,7 @@ __all__ = ("ingest_graph",)
|
|
|
62
62
|
|
|
63
63
|
import dataclasses
|
|
64
64
|
import itertools
|
|
65
|
+
import os
|
|
65
66
|
import uuid
|
|
66
67
|
from collections.abc import Iterator
|
|
67
68
|
from contextlib import contextmanager
|
|
@@ -107,10 +108,13 @@ def ingest_graph(
|
|
|
107
108
|
butler_config : `str`
|
|
108
109
|
Path or alias for the butler repository, or a butler repository config
|
|
109
110
|
object.
|
|
110
|
-
uri :
|
|
111
|
+
uri : `lsst.resources.ResourcePathExpression` or `None`, optional
|
|
111
112
|
Location of the provenance quantum graph to ingest. `None` indicates
|
|
112
113
|
that the quantum graph has already been ingested, but other ingests
|
|
113
114
|
and/or deletions failed and need to be resumed.
|
|
115
|
+
transfer : `str` or `None`, optional
|
|
116
|
+
Transfer mode to use when ingesting graph. Matches those supported
|
|
117
|
+
by `lsst.resources.ResourcePath.transfer_from`.
|
|
114
118
|
batch_size : `int`, optional
|
|
115
119
|
Number of datasets to process in each transaction.
|
|
116
120
|
output_run : `str`, optional
|
|
@@ -120,8 +124,9 @@ def ingest_graph(
|
|
|
120
124
|
|
|
121
125
|
Notes
|
|
122
126
|
-----
|
|
123
|
-
After this operation,
|
|
124
|
-
`~lsst.daf.butler.CollectionType.RUN` collection
|
|
127
|
+
After this operation, any further processing done in the
|
|
128
|
+
`~lsst.daf.butler.CollectionType.RUN` collection will not be included in
|
|
129
|
+
the provenance.
|
|
125
130
|
|
|
126
131
|
If this process is interrupted, it can pick up where it left off if run
|
|
127
132
|
again (at the cost of some duplicate work to figure out how much progress
|
|
@@ -134,6 +139,32 @@ def ingest_graph(
|
|
|
134
139
|
helper.forget_ingested_datasets(batch_size=batch_size)
|
|
135
140
|
helper.ingest_graph_dataset(uri, transfer=transfer)
|
|
136
141
|
helper.clean_and_reingest_datasets(batch_size=batch_size)
|
|
142
|
+
if helper.directories_to_delete:
|
|
143
|
+
_LOG.info(
|
|
144
|
+
"Deleting %d directories after checking that they are empty.",
|
|
145
|
+
len(helper.directories_to_delete),
|
|
146
|
+
)
|
|
147
|
+
n_deleted: int = 0
|
|
148
|
+
for top in sorted(helper.directories_to_delete):
|
|
149
|
+
nonempty: set[str] = set()
|
|
150
|
+
for root, dirnames, filenames in os.walk(top, topdown=False):
|
|
151
|
+
if filenames:
|
|
152
|
+
nonempty.add(root)
|
|
153
|
+
for dirname in dirnames:
|
|
154
|
+
dirpath = os.path.join(root, dirname)
|
|
155
|
+
if dirpath in nonempty:
|
|
156
|
+
nonempty.add(root)
|
|
157
|
+
else:
|
|
158
|
+
os.rmdir(dirpath)
|
|
159
|
+
if nonempty:
|
|
160
|
+
_LOG.warning(
|
|
161
|
+
"Directory %r was not deleted because it unexpectedly still had files in it.",
|
|
162
|
+
top,
|
|
163
|
+
)
|
|
164
|
+
else:
|
|
165
|
+
os.rmdir(root)
|
|
166
|
+
n_deleted += 1
|
|
167
|
+
_LOG.info("Deleted %d directories.", n_deleted)
|
|
137
168
|
|
|
138
169
|
|
|
139
170
|
@dataclasses.dataclass
|
|
@@ -144,6 +175,7 @@ class _GraphIngester:
|
|
|
144
175
|
graph_already_ingested: bool
|
|
145
176
|
n_datasets: int
|
|
146
177
|
datasets_already_ingested: set[uuid.UUID] = dataclasses.field(default_factory=set)
|
|
178
|
+
directories_to_delete: set[str] = dataclasses.field(default_factory=set)
|
|
147
179
|
|
|
148
180
|
@property
|
|
149
181
|
def output_run(self) -> str:
|
|
@@ -308,7 +340,7 @@ class _GraphIngester:
|
|
|
308
340
|
if not to_process:
|
|
309
341
|
return 0
|
|
310
342
|
_LOG.verbose(
|
|
311
|
-
"Deleting and
|
|
343
|
+
"Deleting and re-ingesting a %d-dataset batch; %d/%d complete.",
|
|
312
344
|
len(to_process),
|
|
313
345
|
n_current,
|
|
314
346
|
self.n_datasets,
|
|
@@ -331,6 +363,20 @@ class _GraphIngester:
|
|
|
331
363
|
raise status.exception
|
|
332
364
|
file_dataset = FileDataset(refs=expanded_refs, path=direct_uri, formatter=ProvenanceFormatter)
|
|
333
365
|
self.butler.ingest(file_dataset, transfer=None)
|
|
366
|
+
if len(original_uris) == len(expanded_refs):
|
|
367
|
+
for uri, ref in zip(original_uris, expanded_refs):
|
|
368
|
+
if uri.isLocal:
|
|
369
|
+
if (
|
|
370
|
+
parent_dir := self.find_dataset_type_directory(uri.ospath, ref.datasetType.name)
|
|
371
|
+
) is not None:
|
|
372
|
+
self.directories_to_delete.add(parent_dir)
|
|
373
|
+
elif any(uri.isLocal for uri in original_uris):
|
|
374
|
+
_LOG.warning(
|
|
375
|
+
"Not attempting to delete empty metadata/log/config directories because the number "
|
|
376
|
+
"of paths (%s) did not match the number of datasets (%s).",
|
|
377
|
+
len(original_uris),
|
|
378
|
+
len(expanded_refs),
|
|
379
|
+
)
|
|
334
380
|
n = len(to_process)
|
|
335
381
|
to_process.clear()
|
|
336
382
|
return n
|
|
@@ -354,3 +400,14 @@ class _GraphIngester:
|
|
|
354
400
|
datastore_records={},
|
|
355
401
|
dataset_types=dataset_types,
|
|
356
402
|
)
|
|
403
|
+
|
|
404
|
+
def find_dataset_type_directory(self, ospath: str, dataset_type: str) -> str | None:
|
|
405
|
+
dir_components: list[str] = []
|
|
406
|
+
for component in os.path.dirname(ospath).split(os.path.sep):
|
|
407
|
+
dir_components.append(component)
|
|
408
|
+
# If the full dataset type name is in a single directory path
|
|
409
|
+
# component, we guess that directory can only have datasets of
|
|
410
|
+
# that type.
|
|
411
|
+
if dataset_type in component:
|
|
412
|
+
return os.path.sep.join(dir_components)
|
|
413
|
+
return None
|
|
@@ -380,8 +380,6 @@ class QuantumGraphBuilder(ABC):
|
|
|
380
380
|
|
|
381
381
|
Parameters
|
|
382
382
|
----------
|
|
383
|
-
metadata : `~collections.abc.Mapping`, optional
|
|
384
|
-
Flexible metadata to add to the quantum graph.
|
|
385
383
|
attach_datastore_records : `bool`, optional
|
|
386
384
|
Whether to include datastore records in the graph. Required for
|
|
387
385
|
`lsst.daf.butler.QuantumBackedButler` execution.
|
|
@@ -887,11 +885,6 @@ class QuantumGraphBuilder(ABC):
|
|
|
887
885
|
Identifier for this quantum in the graph.
|
|
888
886
|
skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
|
|
889
887
|
Preliminary quantum graph, to be modified in-place.
|
|
890
|
-
skypix_bounds_builder : `~prerequisite_helpers.SkyPixBoundsBuilder`
|
|
891
|
-
An object that accumulates the appropriate spatial bounds for a
|
|
892
|
-
quantum.
|
|
893
|
-
timespan_builder : `~prerequisite_helpers.TimespanBuilder`
|
|
894
|
-
An object that accumulates the appropriate timespan for a quantum.
|
|
895
888
|
|
|
896
889
|
Returns
|
|
897
890
|
-------
|
|
@@ -1144,7 +1137,7 @@ class QuantumGraphBuilder(ABC):
|
|
|
1144
1137
|
"outputs" attributes on all quantum nodes, as added by
|
|
1145
1138
|
`_resolve_task_quanta`, as well as a "datastore_records" attribute
|
|
1146
1139
|
as added by `_attach_datastore_records`.
|
|
1147
|
-
metadata :
|
|
1140
|
+
metadata : `~collections.abc.Mapping`
|
|
1148
1141
|
Flexible metadata to add to the graph.
|
|
1149
1142
|
|
|
1150
1143
|
Returns
|