lsst-pipe-base 30.2026.400__py3-none-any.whl → 30.2026.500__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. lsst/pipe/base/_instrument.py +17 -5
  2. lsst/pipe/base/_quantumContext.py +3 -3
  3. lsst/pipe/base/_status.py +14 -0
  4. lsst/pipe/base/automatic_connection_constants.py +11 -0
  5. lsst/pipe/base/cli/cmd/__init__.py +2 -0
  6. lsst/pipe/base/cli/cmd/commands.py +108 -1
  7. lsst/pipe/base/graph/graph.py +9 -8
  8. lsst/pipe/base/log_capture.py +1 -1
  9. lsst/pipe/base/pipeline.py +2 -2
  10. lsst/pipe/base/pipelineIR.py +1 -1
  11. lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
  12. lsst/pipe/base/pipeline_graph/_edges.py +11 -11
  13. lsst/pipe/base/pipeline_graph/_pipeline_graph.py +2 -2
  14. lsst/pipe/base/pipeline_graph/visualization/_dot.py +13 -12
  15. lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +7 -0
  16. lsst/pipe/base/prerequisite_helpers.py +2 -1
  17. lsst/pipe/base/quantum_graph/_multiblock.py +29 -13
  18. lsst/pipe/base/quantum_graph/_provenance.py +411 -19
  19. lsst/pipe/base/quantum_graph/aggregator/__init__.py +0 -1
  20. lsst/pipe/base/quantum_graph/formatter.py +4 -4
  21. lsst/pipe/base/quantum_graph/ingest_graph.py +61 -4
  22. lsst/pipe/base/quantum_graph_builder.py +1 -8
  23. lsst/pipe/base/quantum_graph_skeleton.py +29 -27
  24. lsst/pipe/base/quantum_provenance_graph.py +12 -10
  25. lsst/pipe/base/separable_pipeline_executor.py +1 -1
  26. lsst/pipe/base/single_quantum_executor.py +1 -1
  27. lsst/pipe/base/tests/mocks/_storage_class.py +2 -1
  28. lsst/pipe/base/version.py +1 -1
  29. {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/METADATA +1 -1
  30. {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/RECORD +38 -38
  31. {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/WHEEL +1 -1
  32. {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/entry_points.txt +0 -0
  33. {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/COPYRIGHT +0 -0
  34. {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/LICENSE +0 -0
  35. {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/bsd_license.txt +0 -0
  36. {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/gpl-v3.0.txt +0 -0
  37. {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/top_level.txt +0 -0
  38. {lsst_pipe_base-30.2026.400.dist-info → lsst_pipe_base-30.2026.500.dist-info}/zip-safe +0 -0
@@ -38,13 +38,14 @@ __all__ = (
38
38
  "ProvenanceQuantumGraphWriter",
39
39
  "ProvenanceQuantumInfo",
40
40
  "ProvenanceQuantumModel",
41
+ "ProvenanceQuantumReport",
41
42
  "ProvenanceQuantumScanData",
42
43
  "ProvenanceQuantumScanModels",
43
44
  "ProvenanceQuantumScanStatus",
45
+ "ProvenanceReport",
44
46
  "ProvenanceTaskMetadataModel",
45
47
  )
46
48
 
47
-
48
49
  import dataclasses
49
50
  import enum
50
51
  import itertools
@@ -60,9 +61,9 @@ import networkx
60
61
  import numpy as np
61
62
  import pydantic
62
63
 
63
- from lsst.daf.butler import DataCoordinate
64
+ from lsst.daf.butler import Butler, DataCoordinate
64
65
  from lsst.daf.butler.logging import ButlerLogRecord, ButlerLogRecords
65
- from lsst.resources import ResourcePathExpression
66
+ from lsst.resources import ResourcePath, ResourcePathExpression
66
67
  from lsst.utils.iteration import ensure_iterable
67
68
  from lsst.utils.logging import LsstLogAdapter, getLogger
68
69
  from lsst.utils.packages import Packages
@@ -559,6 +560,131 @@ class ProvenanceTaskMetadataModel(pydantic.BaseModel):
559
560
  return super().model_validate_strings(*args, **kwargs)
560
561
 
561
562
 
563
+ class ProvenanceQuantumReport(pydantic.BaseModel):
564
+ """A Pydantic model that used to report information about a single
565
+ (generally problematic) quantum.
566
+ """
567
+
568
+ quantum_id: uuid.UUID
569
+ data_id: dict[str, int | str]
570
+ attempts: list[ProvenanceQuantumAttemptModel]
571
+
572
+ @classmethod
573
+ def from_info(cls, quantum_id: uuid.UUID, quantum_info: ProvenanceQuantumInfo) -> ProvenanceQuantumReport:
574
+ """Construct from a provenance quantum graph node.
575
+
576
+ Parameters
577
+ ----------
578
+ quantum_id : `uuid.UUID`
579
+ Unique ID for the quantum.
580
+ quantum_info : `ProvenanceQuantumInfo`
581
+ Node attributes for this quantum.
582
+ """
583
+ return cls(
584
+ quantum_id=quantum_id,
585
+ data_id=dict(quantum_info["data_id"].mapping),
586
+ attempts=quantum_info["attempts"],
587
+ )
588
+
589
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
590
+ # when we inherit those docstrings in our public classes.
591
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
592
+
593
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
594
+ """See `pydantic.BaseModel.copy`."""
595
+ return super().copy(*args, **kwargs)
596
+
597
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
598
+ """See `pydantic.BaseModel.model_dump`."""
599
+ return super().model_dump(*args, **kwargs)
600
+
601
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
602
+ """See `pydantic.BaseModel.model_dump_json`."""
603
+ return super().model_dump(*args, **kwargs)
604
+
605
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
606
+ """See `pydantic.BaseModel.model_copy`."""
607
+ return super().model_copy(*args, **kwargs)
608
+
609
+ @classmethod
610
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
611
+ """See `pydantic.BaseModel.model_construct`."""
612
+ return super().model_construct(*args, **kwargs)
613
+
614
+ @classmethod
615
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
616
+ """See `pydantic.BaseModel.model_json_schema`."""
617
+ return super().model_json_schema(*args, **kwargs)
618
+
619
+ @classmethod
620
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
621
+ """See `pydantic.BaseModel.model_validate`."""
622
+ return super().model_validate(*args, **kwargs)
623
+
624
+ @classmethod
625
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
626
+ """See `pydantic.BaseModel.model_validate_json`."""
627
+ return super().model_validate_json(*args, **kwargs)
628
+
629
+ @classmethod
630
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
631
+ """See `pydantic.BaseModel.model_validate_strings`."""
632
+ return super().model_validate_strings(*args, **kwargs)
633
+
634
+
635
+ class ProvenanceReport(pydantic.RootModel):
636
+ """A Pydantic model that groups quantum information by task label, then
637
+ status (as a string), and then exception type.
638
+ """
639
+
640
+ root: dict[TaskLabel, dict[str, dict[str | None, list[ProvenanceQuantumReport]]]] = {}
641
+
642
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
643
+ # when we inherit those docstrings in our public classes.
644
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
645
+
646
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
647
+ """See `pydantic.BaseModel.copy`."""
648
+ return super().copy(*args, **kwargs)
649
+
650
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
651
+ """See `pydantic.BaseModel.model_dump`."""
652
+ return super().model_dump(*args, **kwargs)
653
+
654
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
655
+ """See `pydantic.BaseModel.model_dump_json`."""
656
+ return super().model_dump(*args, **kwargs)
657
+
658
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
659
+ """See `pydantic.BaseModel.model_copy`."""
660
+ return super().model_copy(*args, **kwargs)
661
+
662
+ @classmethod
663
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
664
+ """See `pydantic.BaseModel.model_construct`."""
665
+ return super().model_construct(*args, **kwargs)
666
+
667
+ @classmethod
668
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
669
+ """See `pydantic.BaseModel.model_json_schema`."""
670
+ return super().model_json_schema(*args, **kwargs)
671
+
672
+ @classmethod
673
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
674
+ """See `pydantic.BaseModel.model_validate`."""
675
+ return super().model_validate(*args, **kwargs)
676
+
677
+ @classmethod
678
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
679
+ """See `pydantic.BaseModel.model_validate_json`."""
680
+ return super().model_validate_json(*args, **kwargs)
681
+
682
+ @classmethod
683
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
684
+ """See `pydantic.BaseModel.model_validate_strings`."""
685
+ return super().model_validate_strings(*args, **kwargs)
686
+
687
+
562
688
  class ProvenanceQuantumModel(pydantic.BaseModel):
563
689
  """Data model for the quanta in a provenance quantum graph file."""
564
690
 
@@ -1005,6 +1131,83 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
1005
1131
  dataset_type_name: {} for dataset_type_name in self.pipeline_graph.dataset_types.keys()
1006
1132
  }
1007
1133
 
1134
+ @classmethod
1135
+ @contextmanager
1136
+ def from_args(
1137
+ cls,
1138
+ repo_or_filename: str,
1139
+ /,
1140
+ collection: str | None = None,
1141
+ *,
1142
+ quanta: Iterable[uuid.UUID] | None = None,
1143
+ datasets: Iterable[uuid.UUID] | None = None,
1144
+ writeable: bool = False,
1145
+ ) -> Iterator[tuple[ProvenanceQuantumGraph, Butler | None]]:
1146
+ """Construct a `ProvenanceQuantumGraph` fron CLI-friendly arguments for
1147
+ a file or butler-ingested graph dataset.
1148
+
1149
+ Parameters
1150
+ ----------
1151
+ repo_or_filename : `str`
1152
+ Either a provenance quantum graph filename or a butler repository
1153
+ path or alias.
1154
+ collections : `~collections.abc.Iterable` [ `str` ], optional
1155
+ Collections to search; presence indicates that the first argument
1156
+ is a butler repository, not a filename.
1157
+ quanta : `~collections.abc.Iterable` [ `str` ] or `None`, optional
1158
+ IDs of the quanta to load, or `None` to load all.
1159
+ datasets : `~collections.abc.Iterable` [ `str` ], optional
1160
+ IDs of the datasets to load, or `None` to load all.
1161
+ writeable : `bool`, optional
1162
+ Whether the butler should be constructed with write support.
1163
+
1164
+ Returns
1165
+ -------
1166
+ context : `contextlib.AbstractContextManager`
1167
+ A context manager that yields a tuple of
1168
+
1169
+ - the `ProvenanceQuantumGraph`
1170
+ - the `Butler` constructed (or `None`)
1171
+
1172
+ when entered.
1173
+ """
1174
+ exit_stack = ExitStack()
1175
+ if collection is not None:
1176
+ try:
1177
+ butler = exit_stack.enter_context(
1178
+ Butler.from_config(repo_or_filename, collections=[collection], writeable=writeable)
1179
+ )
1180
+ except Exception as err:
1181
+ err.add_note(
1182
+ f"Expected {repo_or_filename!r} to be a butler repository path or alias because a "
1183
+ f"collection ({collection}) was provided."
1184
+ )
1185
+ raise
1186
+ with exit_stack:
1187
+ graph = butler.get(
1188
+ acc.PROVENANCE_DATASET_TYPE_NAME, parameters={"quanta": quanta, "datasets": datasets}
1189
+ )
1190
+ yield graph, butler
1191
+ else:
1192
+ try:
1193
+ reader = exit_stack.enter_context(ProvenanceQuantumGraphReader.open(repo_or_filename))
1194
+ except Exception as err:
1195
+ err.add_note(
1196
+ f"Expected a {repo_or_filename} to be a provenance quantum graph filename "
1197
+ f"because no collection was provided."
1198
+ )
1199
+ raise
1200
+ with exit_stack:
1201
+ if quanta is None:
1202
+ reader.read_quanta()
1203
+ elif not quanta:
1204
+ reader.read_quanta(quanta)
1205
+ if datasets is None:
1206
+ reader.read_datasets()
1207
+ elif not datasets:
1208
+ reader.read_datasets(datasets)
1209
+ yield reader.graph, None
1210
+
1008
1211
  @property
1009
1212
  def init_quanta(self) -> Mapping[TaskLabel, uuid.UUID]:
1010
1213
  """A mapping from task label to the ID of the special init quantum for
@@ -1101,10 +1304,16 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
1101
1304
  """
1102
1305
  return self._bipartite_xgraph.copy(as_view=True)
1103
1306
 
1104
- def make_quantum_table(self) -> astropy.table.Table:
1307
+ def make_quantum_table(self, drop_unused_columns: bool = True) -> astropy.table.Table:
1105
1308
  """Construct an `astropy.table.Table` with a tabular summary of the
1106
1309
  quanta.
1107
1310
 
1311
+ Parameters
1312
+ ----------
1313
+ drop_unused_columns : `bool`, optional
1314
+ Whether to drop columns for rare states that did not actually
1315
+ occur in this run.
1316
+
1108
1317
  Returns
1109
1318
  -------
1110
1319
  table : `astropy.table.Table`
@@ -1140,19 +1349,25 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
1140
1349
  caveats = f"{code.concise()}({count})" # type: ignore[union-attr]
1141
1350
  else:
1142
1351
  caveats = ""
1143
- rows.append(
1352
+ row: dict[str, Any] = {
1353
+ "Task": task_label,
1354
+ "Caveats": caveats,
1355
+ }
1356
+ for status in QuantumAttemptStatus:
1357
+ row[status.title] = status_counts.get(status, 0)
1358
+ row.update(
1144
1359
  {
1145
- "Task": task_label,
1146
- "Unknown": status_counts.get(QuantumAttemptStatus.UNKNOWN, 0),
1147
- "Successful": status_counts.get(QuantumAttemptStatus.SUCCESSFUL, 0),
1148
- "Caveats": caveats,
1149
- "Blocked": status_counts.get(QuantumAttemptStatus.BLOCKED, 0),
1150
- "Failed": status_counts.get(QuantumAttemptStatus.FAILED, 0),
1151
1360
  "TOTAL": len(quanta_for_task),
1152
1361
  "EXPECTED": self.header.n_task_quanta[task_label],
1153
1362
  }
1154
1363
  )
1155
- return astropy.table.Table(rows)
1364
+ rows.append(row)
1365
+ table = astropy.table.Table(rows)
1366
+ if drop_unused_columns:
1367
+ for status in QuantumAttemptStatus:
1368
+ if status.is_rare and not table[status.title].any():
1369
+ del table[status.title]
1370
+ return table
1156
1371
 
1157
1372
  def make_exception_table(self) -> astropy.table.Table:
1158
1373
  """Construct an `astropy.table.Table` with counts for each exception
@@ -1165,13 +1380,25 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
1165
1380
  """
1166
1381
  rows = []
1167
1382
  for task_label, quanta_for_task in self.quanta_by_task.items():
1168
- counts_by_type = Counter(
1169
- exc_info.type_name
1170
- for q in quanta_for_task.values()
1171
- if (exc_info := self._quantum_only_xgraph.nodes[q]["exception"]) is not None
1172
- )
1173
- for type_name, count in counts_by_type.items():
1174
- rows.append({"Task": task_label, "Exception": type_name, "Count": count})
1383
+ success_counts = Counter[str]()
1384
+ failed_counts = Counter[str]()
1385
+ for quantum_id in quanta_for_task.values():
1386
+ quantum_info: ProvenanceQuantumInfo = self._quantum_only_xgraph.nodes[quantum_id]
1387
+ exc_info = quantum_info["exception"]
1388
+ if exc_info is not None:
1389
+ if quantum_info["status"] is QuantumAttemptStatus.SUCCESSFUL:
1390
+ success_counts[exc_info.type_name] += 1
1391
+ else:
1392
+ failed_counts[exc_info.type_name] += 1
1393
+ for type_name in sorted(success_counts.keys() | failed_counts.keys()):
1394
+ rows.append(
1395
+ {
1396
+ "Task": task_label,
1397
+ "Exception": type_name,
1398
+ "Successes": success_counts.get(type_name, 0),
1399
+ "Failures": failed_counts.get(type_name, 0),
1400
+ }
1401
+ )
1175
1402
  return astropy.table.Table(rows)
1176
1403
 
1177
1404
  def make_task_resource_usage_table(
@@ -1214,6 +1441,171 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
1214
1441
  array = np.array(rows, dtype=row_dtype)
1215
1442
  return astropy.table.Table(array, units=QuantumResourceUsage.get_units())
1216
1443
 
1444
+ def make_status_report(
1445
+ self,
1446
+ states: Iterable[QuantumAttemptStatus] = (
1447
+ QuantumAttemptStatus.FAILED,
1448
+ QuantumAttemptStatus.ABORTED,
1449
+ QuantumAttemptStatus.ABORTED_SUCCESS,
1450
+ ),
1451
+ *,
1452
+ also: QuantumAttemptStatus | Iterable[QuantumAttemptStatus] = (),
1453
+ with_caveats: QuantumSuccessCaveats | None = QuantumSuccessCaveats.PARTIAL_OUTPUTS_ERROR,
1454
+ data_id_table_dir: ResourcePathExpression | None = None,
1455
+ ) -> ProvenanceReport:
1456
+ """Make a JSON- or YAML-friendly report of all quanta with the given
1457
+ states.
1458
+
1459
+ Parameters
1460
+ ----------
1461
+ states : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
1462
+ `..QuantumAttemptStatus`, optional
1463
+ A quantum is included if it has any of these states. Defaults to
1464
+ states that clearly represent problems.
1465
+ also : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
1466
+ `..QuantumAttemptStatus`, optional
1467
+ Additional states to consider; unioned with ``states``. This is
1468
+ provided so users can easily request additional states while also
1469
+ getting the defaults.
1470
+ with_caveats : `..QuantumSuccessCaveats` or `None`, optional
1471
+ If `..QuantumAttemptStatus.SUCCESSFUL` is in ``states``, only
1472
+ include quanta with these caveat flags. May be set to `None`
1473
+ to report on all successful quanta.
1474
+ data_id_table_dir : convertible to `~lsst.resources.ResourcePath`, \
1475
+ optional
1476
+ If provided, a directory to write data ID tables (in ECSV format)
1477
+ with all of the data IDs with the given states, for use with the
1478
+ ``--data-id-tables`` argument to the quantum graph builder.
1479
+ Subdirectories for each task and status will created within this
1480
+ directory, with one file for each exception type (or ``UNKNOWN``
1481
+ when there is no exception).
1482
+
1483
+ Returns
1484
+ -------
1485
+ report : `ProvenanceModel`
1486
+ A Pydantic model that groups quanta by task label and exception
1487
+ type.
1488
+ """
1489
+ states = set(ensure_iterable(states))
1490
+ states.update(ensure_iterable(also))
1491
+ result = ProvenanceReport(root={})
1492
+ if data_id_table_dir is not None:
1493
+ data_id_table_dir = ResourcePath(data_id_table_dir)
1494
+ for task_label, quanta_for_task in self.quanta_by_task.items():
1495
+ reports_for_task: dict[str, dict[str | None, list[ProvenanceQuantumReport]]] = {}
1496
+ table_rows_for_task: dict[str, dict[str | None, list[tuple[int | str, ...]]]] = {}
1497
+ for quantum_id in quanta_for_task.values():
1498
+ quantum_info: ProvenanceQuantumInfo = self._quantum_only_xgraph.nodes[quantum_id]
1499
+ quantum_status = quantum_info["status"]
1500
+ if quantum_status not in states:
1501
+ continue
1502
+ if (
1503
+ quantum_status is QuantumAttemptStatus.SUCCESSFUL
1504
+ and with_caveats is not None
1505
+ and (quantum_info["caveats"] is None or not (quantum_info["caveats"] & with_caveats))
1506
+ ):
1507
+ continue
1508
+ key1 = quantum_status.name
1509
+ exc_info = quantum_info["exception"]
1510
+ key2 = exc_info.type_name if exc_info is not None else None
1511
+ reports_for_task.setdefault(key1, {}).setdefault(key2, []).append(
1512
+ ProvenanceQuantumReport.from_info(quantum_id, quantum_info)
1513
+ )
1514
+ if data_id_table_dir:
1515
+ table_rows_for_task.setdefault(key1, {}).setdefault(key2, []).append(
1516
+ quantum_info["data_id"].required_values
1517
+ )
1518
+ if reports_for_task:
1519
+ result.root[task_label] = reports_for_task
1520
+ if table_rows_for_task:
1521
+ assert data_id_table_dir is not None, "table_rows_for_task should be empty"
1522
+ for status_name, table_rows_for_status in table_rows_for_task.items():
1523
+ dir_for_task_and_status = data_id_table_dir.join(task_label, forceDirectory=True).join(
1524
+ status_name, forceDirectory=True
1525
+ )
1526
+ if dir_for_task_and_status.isLocal:
1527
+ dir_for_task_and_status.mkdir()
1528
+ for exc_name, data_id_rows in table_rows_for_status.items():
1529
+ table = astropy.table.Table(
1530
+ rows=data_id_rows,
1531
+ names=list(self.pipeline_graph.tasks[task_label].dimensions.required),
1532
+ )
1533
+ filename = f"{exc_name}.ecsv" if exc_name is not None else "UNKNOWN.ecsv"
1534
+ with dir_for_task_and_status.join(filename).open("w") as stream:
1535
+ table.write(stream, format="ecsv")
1536
+ return result
1537
+
1538
+ def make_many_reports(
1539
+ self,
1540
+ states: Iterable[QuantumAttemptStatus] = (
1541
+ QuantumAttemptStatus.FAILED,
1542
+ QuantumAttemptStatus.ABORTED,
1543
+ QuantumAttemptStatus.ABORTED_SUCCESS,
1544
+ ),
1545
+ *,
1546
+ status_report_file: ResourcePathExpression | None = None,
1547
+ print_quantum_table: bool = False,
1548
+ print_exception_table: bool = False,
1549
+ also: QuantumAttemptStatus | Iterable[QuantumAttemptStatus] = (),
1550
+ with_caveats: QuantumSuccessCaveats | None = None,
1551
+ data_id_table_dir: ResourcePathExpression | None = None,
1552
+ ) -> None:
1553
+ """Write multiple reports.
1554
+
1555
+ Parameters
1556
+ ----------
1557
+ states : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
1558
+ `..QuantumAttemptStatus`, optional
1559
+ A quantum is included in the status report and data ID tables if it
1560
+ has any of these states. Defaults to states that clearly represent
1561
+ problems.
1562
+ status_report_file : convertible to `~lsst.resources.ResourcePath`,
1563
+ optional
1564
+ Filename for the JSON status report (see `make_status_report`).
1565
+ print_quantum_table : `bool`, optional
1566
+ If `True`, print a quantum summary table (counts only) to STDOUT.
1567
+ print_exception_table : `bool`, optional
1568
+ If `True`, print an exception-type summary table (counts only) to
1569
+ STDOUT.
1570
+ also : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
1571
+ `..QuantumAttemptStatus`, optional
1572
+ Additional states to consider in the status report and data ID
1573
+ tables; unioned with ``states``. This is provided so users can
1574
+ easily request additional states while also getting the defaults.
1575
+ with_caveats : `..QuantumSuccessCaveats` or `None`, optional
1576
+ Only include quanta with these caveat flags in the status report
1577
+ and data ID tables. May be set to `None` to report on all
1578
+ successful quanta (an empty sequence reports on only quanta with no
1579
+ caveats). If provided, `QuantumAttemptStatus.SUCCESSFUL` is
1580
+ automatically included in ``states``.
1581
+ data_id_table_dir : convertible to `~lsst.resources.ResourcePath`, \
1582
+ optional
1583
+ If provided, a directory to write data ID tables (in ECSV format)
1584
+ with all of the data IDs with the given states, for use with the
1585
+ ``--data-id-tables`` argument to the quantum graph builder.
1586
+ Subdirectories for each task and status will created within this
1587
+ directory, with one file for each exception type (or ``UNKNOWN``
1588
+ when there is no exception).
1589
+ """
1590
+ if status_report_file is not None or data_id_table_dir is not None:
1591
+ status_report = self.make_status_report(
1592
+ states, also=also, with_caveats=with_caveats, data_id_table_dir=data_id_table_dir
1593
+ )
1594
+ if status_report_file is not None:
1595
+ status_report_file = ResourcePath(status_report_file)
1596
+ if status_report_file.isLocal:
1597
+ status_report_file.dirname().mkdir()
1598
+ with ResourcePath(status_report_file).open("w") as stream:
1599
+ stream.write(status_report.model_dump_json(indent=2))
1600
+ if print_quantum_table:
1601
+ quantum_table = self.make_quantum_table()
1602
+ quantum_table.pprint_all()
1603
+ print("")
1604
+ if print_exception_table:
1605
+ exception_table = self.make_exception_table()
1606
+ exception_table.pprint_all()
1607
+ print("")
1608
+
1217
1609
 
1218
1610
  @dataclasses.dataclass
1219
1611
  class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
@@ -140,4 +140,3 @@ from ._communicators import FatalWorkerError
140
140
  # - having the worker logs go to separate files is actually very nice, and it's
141
141
  # more efficient if they just do that themselves, and that's not something
142
142
  # our logging CLI can actually do, AFAICT.
143
-
@@ -60,13 +60,13 @@ class _ProvenanceFormatterParameters(pydantic.BaseModel):
60
60
 
61
61
  @pydantic.field_validator("quanta", mode="before")
62
62
  @classmethod
63
- def quanta_to_list(cls, v: Any) -> list[uuid.UUID]:
64
- return list(v)
63
+ def quanta_to_list(cls, v: Any) -> list[uuid.UUID] | None:
64
+ return list(v) if v is not None else None
65
65
 
66
66
  @pydantic.field_validator("datasets", mode="before")
67
67
  @classmethod
68
- def datasets_to_list(cls, v: Any) -> list[uuid.UUID]:
69
- return list(v)
68
+ def datasets_to_list(cls, v: Any) -> list[uuid.UUID] | None:
69
+ return list(v) if v is not None else None
70
70
 
71
71
  @property
72
72
  def nodes(self) -> list[uuid.UUID]:
@@ -62,6 +62,7 @@ __all__ = ("ingest_graph",)
62
62
 
63
63
  import dataclasses
64
64
  import itertools
65
+ import os
65
66
  import uuid
66
67
  from collections.abc import Iterator
67
68
  from contextlib import contextmanager
@@ -107,10 +108,13 @@ def ingest_graph(
107
108
  butler_config : `str`
108
109
  Path or alias for the butler repository, or a butler repository config
109
110
  object.
110
- uri : convertible to `lsst.resources.ResourcePath` or `None`, optional
111
+ uri : `lsst.resources.ResourcePathExpression` or `None`, optional
111
112
  Location of the provenance quantum graph to ingest. `None` indicates
112
113
  that the quantum graph has already been ingested, but other ingests
113
114
  and/or deletions failed and need to be resumed.
115
+ transfer : `str` or `None`, optional
116
+ Transfer mode to use when ingesting graph. Matches those supported
117
+ by `lsst.resources.ResourcePath.transfer_from`.
114
118
  batch_size : `int`, optional
115
119
  Number of datasets to process in each transaction.
116
120
  output_run : `str`, optional
@@ -120,8 +124,9 @@ def ingest_graph(
120
124
 
121
125
  Notes
122
126
  -----
123
- After this operation, no further processing may be done in the
124
- `~lsst.daf.butler.CollectionType.RUN` collection.
127
+ After this operation, any further processing done in the
128
+ `~lsst.daf.butler.CollectionType.RUN` collection will not be included in
129
+ the provenance.
125
130
 
126
131
  If this process is interrupted, it can pick up where it left off if run
127
132
  again (at the cost of some duplicate work to figure out how much progress
@@ -134,6 +139,32 @@ def ingest_graph(
134
139
  helper.forget_ingested_datasets(batch_size=batch_size)
135
140
  helper.ingest_graph_dataset(uri, transfer=transfer)
136
141
  helper.clean_and_reingest_datasets(batch_size=batch_size)
142
+ if helper.directories_to_delete:
143
+ _LOG.info(
144
+ "Deleting %d directories after checking that they are empty.",
145
+ len(helper.directories_to_delete),
146
+ )
147
+ n_deleted: int = 0
148
+ for top in sorted(helper.directories_to_delete):
149
+ nonempty: set[str] = set()
150
+ for root, dirnames, filenames in os.walk(top, topdown=False):
151
+ if filenames:
152
+ nonempty.add(root)
153
+ for dirname in dirnames:
154
+ dirpath = os.path.join(root, dirname)
155
+ if dirpath in nonempty:
156
+ nonempty.add(root)
157
+ else:
158
+ os.rmdir(dirpath)
159
+ if nonempty:
160
+ _LOG.warning(
161
+ "Directory %r was not deleted because it unexpectedly still had files in it.",
162
+ top,
163
+ )
164
+ else:
165
+ os.rmdir(root)
166
+ n_deleted += 1
167
+ _LOG.info("Deleted %d directories.", n_deleted)
137
168
 
138
169
 
139
170
  @dataclasses.dataclass
@@ -144,6 +175,7 @@ class _GraphIngester:
144
175
  graph_already_ingested: bool
145
176
  n_datasets: int
146
177
  datasets_already_ingested: set[uuid.UUID] = dataclasses.field(default_factory=set)
178
+ directories_to_delete: set[str] = dataclasses.field(default_factory=set)
147
179
 
148
180
  @property
149
181
  def output_run(self) -> str:
@@ -308,7 +340,7 @@ class _GraphIngester:
308
340
  if not to_process:
309
341
  return 0
310
342
  _LOG.verbose(
311
- "Deleting and deleting a %d-dataset batch; %d/%d complete.",
343
+ "Deleting and re-ingesting a %d-dataset batch; %d/%d complete.",
312
344
  len(to_process),
313
345
  n_current,
314
346
  self.n_datasets,
@@ -331,6 +363,20 @@ class _GraphIngester:
331
363
  raise status.exception
332
364
  file_dataset = FileDataset(refs=expanded_refs, path=direct_uri, formatter=ProvenanceFormatter)
333
365
  self.butler.ingest(file_dataset, transfer=None)
366
+ if len(original_uris) == len(expanded_refs):
367
+ for uri, ref in zip(original_uris, expanded_refs):
368
+ if uri.isLocal:
369
+ if (
370
+ parent_dir := self.find_dataset_type_directory(uri.ospath, ref.datasetType.name)
371
+ ) is not None:
372
+ self.directories_to_delete.add(parent_dir)
373
+ elif any(uri.isLocal for uri in original_uris):
374
+ _LOG.warning(
375
+ "Not attempting to delete empty metadata/log/config directories because the number "
376
+ "of paths (%s) did not match the number of datasets (%s).",
377
+ len(original_uris),
378
+ len(expanded_refs),
379
+ )
334
380
  n = len(to_process)
335
381
  to_process.clear()
336
382
  return n
@@ -354,3 +400,14 @@ class _GraphIngester:
354
400
  datastore_records={},
355
401
  dataset_types=dataset_types,
356
402
  )
403
+
404
+ def find_dataset_type_directory(self, ospath: str, dataset_type: str) -> str | None:
405
+ dir_components: list[str] = []
406
+ for component in os.path.dirname(ospath).split(os.path.sep):
407
+ dir_components.append(component)
408
+ # If the full dataset type name is in a single directory path
409
+ # component, we guess that directory can only have datasets of
410
+ # that type.
411
+ if dataset_type in component:
412
+ return os.path.sep.join(dir_components)
413
+ return None
@@ -380,8 +380,6 @@ class QuantumGraphBuilder(ABC):
380
380
 
381
381
  Parameters
382
382
  ----------
383
- metadata : `~collections.abc.Mapping`, optional
384
- Flexible metadata to add to the quantum graph.
385
383
  attach_datastore_records : `bool`, optional
386
384
  Whether to include datastore records in the graph. Required for
387
385
  `lsst.daf.butler.QuantumBackedButler` execution.
@@ -887,11 +885,6 @@ class QuantumGraphBuilder(ABC):
887
885
  Identifier for this quantum in the graph.
888
886
  skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
889
887
  Preliminary quantum graph, to be modified in-place.
890
- skypix_bounds_builder : `~prerequisite_helpers.SkyPixBoundsBuilder`
891
- An object that accumulates the appropriate spatial bounds for a
892
- quantum.
893
- timespan_builder : `~prerequisite_helpers.TimespanBuilder`
894
- An object that accumulates the appropriate timespan for a quantum.
895
888
 
896
889
  Returns
897
890
  -------
@@ -1144,7 +1137,7 @@ class QuantumGraphBuilder(ABC):
1144
1137
  "outputs" attributes on all quantum nodes, as added by
1145
1138
  `_resolve_task_quanta`, as well as a "datastore_records" attribute
1146
1139
  as added by `_attach_datastore_records`.
1147
- metadata : `Mapping`
1140
+ metadata : `~collections.abc.Mapping`
1148
1141
  Flexible metadata to add to the graph.
1149
1142
 
1150
1143
  Returns