lsst-pipe-base 30.0.0rc3__py3-none-any.whl → 30.2025.5100__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. lsst/pipe/base/_instrument.py +5 -6
  2. lsst/pipe/base/log_capture.py +79 -39
  3. lsst/pipe/base/mp_graph_executor.py +15 -51
  4. lsst/pipe/base/quantum_graph/_common.py +3 -4
  5. lsst/pipe/base/quantum_graph/_multiblock.py +16 -6
  6. lsst/pipe/base/quantum_graph/_predicted.py +10 -104
  7. lsst/pipe/base/quantum_graph/_provenance.py +6 -657
  8. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +50 -18
  9. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +229 -35
  10. lsst/pipe/base/quantum_graph/aggregator/_structs.py +113 -3
  11. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +5 -10
  12. lsst/pipe/base/quantum_graph/aggregator/_writer.py +348 -31
  13. lsst/pipe/base/quantum_graph_executor.py +13 -116
  14. lsst/pipe/base/separable_pipeline_executor.py +2 -18
  15. lsst/pipe/base/single_quantum_executor.py +35 -53
  16. lsst/pipe/base/version.py +1 -1
  17. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/METADATA +1 -1
  18. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/RECORD +26 -28
  19. lsst/pipe/base/log_on_close.py +0 -79
  20. lsst/pipe/base/quantum_graph/formatter.py +0 -101
  21. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/WHEEL +0 -0
  22. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/entry_points.txt +0 -0
  23. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/COPYRIGHT +0 -0
  24. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/LICENSE +0 -0
  25. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/bsd_license.txt +0 -0
  26. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/gpl-v3.0.txt +0 -0
  27. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/top_level.txt +0 -0
  28. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/zip-safe +0 -0
@@ -35,25 +35,19 @@ __all__ = (
35
35
  "ProvenanceLogRecordsModel",
36
36
  "ProvenanceQuantumGraph",
37
37
  "ProvenanceQuantumGraphReader",
38
- "ProvenanceQuantumGraphWriter",
39
38
  "ProvenanceQuantumInfo",
40
39
  "ProvenanceQuantumModel",
41
- "ProvenanceQuantumScanData",
42
- "ProvenanceQuantumScanModels",
43
- "ProvenanceQuantumScanStatus",
44
40
  "ProvenanceTaskMetadataModel",
45
41
  )
46
42
 
47
43
 
48
44
  import dataclasses
49
- import enum
50
- import itertools
51
45
  import sys
52
46
  import uuid
53
47
  from collections import Counter
54
- from collections.abc import Callable, Iterable, Iterator, Mapping
55
- from contextlib import ExitStack, contextmanager
56
- from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict, TypeVar
48
+ from collections.abc import Iterable, Iterator, Mapping
49
+ from contextlib import contextmanager
50
+ from typing import TYPE_CHECKING, Any, TypedDict, TypeVar
57
51
 
58
52
  import astropy.table
59
53
  import networkx
@@ -63,21 +57,15 @@ import pydantic
63
57
  from lsst.daf.butler import DataCoordinate
64
58
  from lsst.daf.butler.logging import ButlerLogRecord, ButlerLogRecords
65
59
  from lsst.resources import ResourcePathExpression
66
- from lsst.utils.iteration import ensure_iterable
67
- from lsst.utils.logging import LsstLogAdapter, getLogger
68
60
  from lsst.utils.packages import Packages
69
61
 
70
- from .. import automatic_connection_constants as acc
71
62
  from .._status import ExceptionInfo, QuantumAttemptStatus, QuantumSuccessCaveats
72
63
  from .._task_metadata import TaskMetadata
73
- from ..log_capture import _ExecutionLogRecordsExtra
74
- from ..log_on_close import LogOnClose
75
64
  from ..pipeline_graph import PipelineGraph, TaskImportMode, TaskInitNode
76
65
  from ..resource_usage import QuantumResourceUsage
77
66
  from ._common import (
78
67
  BaseQuantumGraph,
79
68
  BaseQuantumGraphReader,
80
- BaseQuantumGraphWriter,
81
69
  ConnectionName,
82
70
  DataCoordinateValues,
83
71
  DatasetInfo,
@@ -86,19 +74,8 @@ from ._common import (
86
74
  QuantumInfo,
87
75
  TaskLabel,
88
76
  )
89
- from ._multiblock import Compressor, MultiblockReader, MultiblockWriter
90
- from ._predicted import (
91
- PredictedDatasetModel,
92
- PredictedQuantumDatasetsModel,
93
- PredictedQuantumGraph,
94
- PredictedQuantumGraphComponents,
95
- )
96
-
97
- _T = TypeVar("_T")
98
-
99
- LoopWrapper: TypeAlias = Callable[[Iterable[_T]], Iterable[_T]]
100
-
101
- _LOG = getLogger(__file__)
77
+ from ._multiblock import MultiblockReader
78
+ from ._predicted import PredictedDatasetModel, PredictedQuantumDatasetsModel
102
79
 
103
80
  DATASET_ADDRESS_INDEX = 0
104
81
  QUANTUM_ADDRESS_INDEX = 1
@@ -110,9 +87,7 @@ QUANTUM_MB_NAME = "quanta"
110
87
  LOG_MB_NAME = "logs"
111
88
  METADATA_MB_NAME = "metadata"
112
89
 
113
-
114
- def pass_through(arg: _T) -> _T:
115
- return arg
90
+ _I = TypeVar("_I", bound=uuid.UUID | int)
116
91
 
117
92
 
118
93
  class ProvenanceDatasetInfo(DatasetInfo):
@@ -1377,629 +1352,3 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
1377
1352
  """Fetch package version information."""
1378
1353
  data = self._read_single_block_raw("packages")
1379
1354
  return Packages.fromBytes(data, format="json")
1380
-
1381
-
1382
- class ProvenanceQuantumGraphWriter:
1383
- """A struct of low-level writer objects for the main components of a
1384
- provenance quantum graph.
1385
-
1386
- Parameters
1387
- ----------
1388
- output_path : `str`
1389
- Path to write the graph to.
1390
- exit_stack : `contextlib.ExitStack`
1391
- Object that can be used to manage multiple context managers.
1392
- log_on_close : `LogOnClose`
1393
- Factory for context managers that log when closed.
1394
- predicted : `.PredictedQuantumGraphComponents`
1395
- Components of the predicted graph.
1396
- zstd_level : `int`, optional
1397
- Compression level.
1398
- cdict_data : `bytes` or `None`, optional
1399
- Bytes representation of the compression dictionary used by the
1400
- compressor.
1401
- loop_wrapper : `~collections.abc.Callable`, optional
1402
- A callable that takes an iterable and returns an equivalent one, to be
1403
- used in all potentially-large loops. This can be used to add progress
1404
- reporting or check for cancelation signals.
1405
- log : `LsstLogAdapter`, optional
1406
- Logger to use for debug messages.
1407
- """
1408
-
1409
- def __init__(
1410
- self,
1411
- output_path: str,
1412
- *,
1413
- exit_stack: ExitStack,
1414
- log_on_close: LogOnClose,
1415
- predicted: PredictedQuantumGraphComponents | PredictedQuantumGraph,
1416
- zstd_level: int = 10,
1417
- cdict_data: bytes | None = None,
1418
- loop_wrapper: LoopWrapper = pass_through,
1419
- log: LsstLogAdapter | None = None,
1420
- ) -> None:
1421
- header = predicted.header.model_copy()
1422
- header.graph_type = "provenance"
1423
- if log is None:
1424
- log = _LOG
1425
- self.log = log
1426
- self._base_writer = exit_stack.enter_context(
1427
- log_on_close.wrap(
1428
- BaseQuantumGraphWriter.open(
1429
- output_path,
1430
- header,
1431
- predicted.pipeline_graph,
1432
- address_filename="nodes",
1433
- zstd_level=zstd_level,
1434
- cdict_data=cdict_data,
1435
- ),
1436
- "Finishing writing provenance quantum graph.",
1437
- )
1438
- )
1439
- self._base_writer.address_writer.addresses = [{}, {}, {}, {}]
1440
- self._log_writer = exit_stack.enter_context(
1441
- log_on_close.wrap(
1442
- MultiblockWriter.open_in_zip(
1443
- self._base_writer.zf, LOG_MB_NAME, header.int_size, use_tempfile=True
1444
- ),
1445
- "Copying logs into zip archive.",
1446
- ),
1447
- )
1448
- self._base_writer.address_writer.addresses[LOG_ADDRESS_INDEX] = self._log_writer.addresses
1449
- self._metadata_writer = exit_stack.enter_context(
1450
- log_on_close.wrap(
1451
- MultiblockWriter.open_in_zip(
1452
- self._base_writer.zf, METADATA_MB_NAME, header.int_size, use_tempfile=True
1453
- ),
1454
- "Copying metadata into zip archive.",
1455
- )
1456
- )
1457
- self._base_writer.address_writer.addresses[METADATA_ADDRESS_INDEX] = self._metadata_writer.addresses
1458
- self._dataset_writer = exit_stack.enter_context(
1459
- log_on_close.wrap(
1460
- MultiblockWriter.open_in_zip(
1461
- self._base_writer.zf, DATASET_MB_NAME, header.int_size, use_tempfile=True
1462
- ),
1463
- "Copying dataset provenance into zip archive.",
1464
- )
1465
- )
1466
- self._base_writer.address_writer.addresses[DATASET_ADDRESS_INDEX] = self._dataset_writer.addresses
1467
- self._quantum_writer = exit_stack.enter_context(
1468
- log_on_close.wrap(
1469
- MultiblockWriter.open_in_zip(
1470
- self._base_writer.zf, QUANTUM_MB_NAME, header.int_size, use_tempfile=True
1471
- ),
1472
- "Copying quantum provenance into zip archive.",
1473
- )
1474
- )
1475
- self._base_writer.address_writer.addresses[QUANTUM_ADDRESS_INDEX] = self._quantum_writer.addresses
1476
- self._init_predicted_quanta(predicted)
1477
- self._populate_xgraph_and_inputs(loop_wrapper)
1478
- self._existing_init_outputs: set[uuid.UUID] = set()
1479
-
1480
- def _init_predicted_quanta(
1481
- self, predicted: PredictedQuantumGraph | PredictedQuantumGraphComponents
1482
- ) -> None:
1483
- self._predicted_init_quanta: list[PredictedQuantumDatasetsModel] = []
1484
- self._predicted_quanta: dict[uuid.UUID, PredictedQuantumDatasetsModel] = {}
1485
- if isinstance(predicted, PredictedQuantumGraph):
1486
- self._predicted_init_quanta.extend(predicted._init_quanta.values())
1487
- self._predicted_quanta.update(predicted._quantum_datasets)
1488
- else:
1489
- self._predicted_init_quanta.extend(predicted.init_quanta.root)
1490
- self._predicted_quanta.update(predicted.quantum_datasets)
1491
- self._predicted_quanta.update({q.quantum_id: q for q in self._predicted_init_quanta})
1492
-
1493
- def _populate_xgraph_and_inputs(self, loop_wrapper: LoopWrapper = pass_through) -> None:
1494
- self._xgraph = networkx.DiGraph()
1495
- self._overall_inputs: dict[uuid.UUID, PredictedDatasetModel] = {}
1496
- output_dataset_ids: set[uuid.UUID] = set()
1497
- for predicted_quantum in loop_wrapper(self._predicted_quanta.values()):
1498
- if not predicted_quantum.task_label:
1499
- # Skip the 'packages' producer quantum.
1500
- continue
1501
- output_dataset_ids.update(predicted_quantum.iter_output_dataset_ids())
1502
- for predicted_quantum in loop_wrapper(self._predicted_quanta.values()):
1503
- if not predicted_quantum.task_label:
1504
- # Skip the 'packages' producer quantum.
1505
- continue
1506
- for predicted_input in itertools.chain.from_iterable(predicted_quantum.inputs.values()):
1507
- self._xgraph.add_edge(predicted_input.dataset_id, predicted_quantum.quantum_id)
1508
- if predicted_input.dataset_id not in output_dataset_ids:
1509
- self._overall_inputs.setdefault(predicted_input.dataset_id, predicted_input)
1510
- for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
1511
- self._xgraph.add_edge(predicted_quantum.quantum_id, predicted_output.dataset_id)
1512
-
1513
- @property
1514
- def compressor(self) -> Compressor:
1515
- """Object that should be used to compress all JSON blocks."""
1516
- return self._base_writer.compressor
1517
-
1518
- def write_packages(self) -> None:
1519
- """Write package version information to the provenance graph."""
1520
- packages = Packages.fromSystem(include_all=True)
1521
- data = packages.toBytes("json")
1522
- self._base_writer.write_single_block("packages", data)
1523
-
1524
- def write_overall_inputs(self, loop_wrapper: LoopWrapper = pass_through) -> None:
1525
- """Write provenance for overall-input datasets.
1526
-
1527
- Parameters
1528
- ----------
1529
- loop_wrapper : `~collections.abc.Callable`, optional
1530
- A callable that takes an iterable and returns an equivalent one, to
1531
- be used in all potentially-large loops. This can be used to add
1532
- progress reporting or check for cancelation signals.
1533
- """
1534
- for predicted_input in loop_wrapper(self._overall_inputs.values()):
1535
- if predicted_input.dataset_id not in self._dataset_writer.addresses:
1536
- self._dataset_writer.write_model(
1537
- predicted_input.dataset_id,
1538
- ProvenanceDatasetModel.from_predicted(
1539
- predicted_input,
1540
- producer=None,
1541
- consumers=self._xgraph.successors(predicted_input.dataset_id),
1542
- ),
1543
- self.compressor,
1544
- )
1545
- del self._overall_inputs
1546
-
1547
- def write_init_outputs(self, assume_existence: bool = True) -> None:
1548
- """Write provenance for init-output datasets and init-quanta.
1549
-
1550
- Parameters
1551
- ----------
1552
- assume_existence : `bool`, optional
1553
- If `True`, just assume all init-outputs exist.
1554
- """
1555
- init_quanta = ProvenanceInitQuantaModel()
1556
- for predicted_init_quantum in self._predicted_init_quanta:
1557
- if not predicted_init_quantum.task_label:
1558
- # Skip the 'packages' producer quantum.
1559
- continue
1560
- for predicted_output in itertools.chain.from_iterable(predicted_init_quantum.outputs.values()):
1561
- provenance_output = ProvenanceDatasetModel.from_predicted(
1562
- predicted_output,
1563
- producer=predicted_init_quantum.quantum_id,
1564
- consumers=self._xgraph.successors(predicted_output.dataset_id),
1565
- )
1566
- provenance_output.produced = assume_existence or (
1567
- provenance_output.dataset_id in self._existing_init_outputs
1568
- )
1569
- self._dataset_writer.write_model(
1570
- provenance_output.dataset_id, provenance_output, self.compressor
1571
- )
1572
- init_quanta.root.append(ProvenanceInitQuantumModel.from_predicted(predicted_init_quantum))
1573
- self._base_writer.write_single_model("init_quanta", init_quanta)
1574
-
1575
- def write_quantum_provenance(
1576
- self, quantum_id: uuid.UUID, metadata: TaskMetadata | None, logs: ButlerLogRecords | None
1577
- ) -> None:
1578
- """Gather and write provenance for a quantum.
1579
-
1580
- Parameters
1581
- ----------
1582
- quantum_id : `uuid.UUID`
1583
- Unique ID for the quantum.
1584
- metadata : `..TaskMetadata` or `None`
1585
- Task metadata.
1586
- logs : `lsst.daf.butler.logging.ButlerLogRecords` or `None`
1587
- Task logs.
1588
- """
1589
- predicted_quantum = self._predicted_quanta[quantum_id]
1590
- provenance_models = ProvenanceQuantumScanModels.from_metadata_and_logs(
1591
- predicted_quantum, metadata, logs, assume_complete=True
1592
- )
1593
- scan_data = provenance_models.to_scan_data(predicted_quantum, compressor=self.compressor)
1594
- self.write_scan_data(scan_data)
1595
-
1596
- def write_scan_data(self, scan_data: ProvenanceQuantumScanData) -> None:
1597
- """Write the output of a quantum provenance scan to disk.
1598
-
1599
- Parameters
1600
- ----------
1601
- scan_data : `ProvenanceQuantumScanData`
1602
- Result of a quantum provenance scan.
1603
- """
1604
- if scan_data.status is ProvenanceQuantumScanStatus.INIT:
1605
- self.log.debug("Handling init-output scan for %s.", scan_data.quantum_id)
1606
- self._existing_init_outputs.update(scan_data.existing_outputs)
1607
- return
1608
- self.log.debug("Handling quantum scan for %s.", scan_data.quantum_id)
1609
- # We shouldn't need this predicted quantum after this method runs; pop
1610
- # from the dict it in the hopes that'll free up some memory when we're
1611
- # done.
1612
- predicted_quantum = self._predicted_quanta.pop(scan_data.quantum_id)
1613
- outputs: dict[uuid.UUID, bytes] = {}
1614
- for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
1615
- provenance_output = ProvenanceDatasetModel.from_predicted(
1616
- predicted_output,
1617
- producer=predicted_quantum.quantum_id,
1618
- consumers=self._xgraph.successors(predicted_output.dataset_id),
1619
- )
1620
- provenance_output.produced = provenance_output.dataset_id in scan_data.existing_outputs
1621
- outputs[provenance_output.dataset_id] = self.compressor.compress(
1622
- provenance_output.model_dump_json().encode()
1623
- )
1624
- if not scan_data.quantum:
1625
- scan_data.quantum = (
1626
- ProvenanceQuantumModel.from_predicted(predicted_quantum).model_dump_json().encode()
1627
- )
1628
- if scan_data.is_compressed:
1629
- scan_data.quantum = self.compressor.compress(scan_data.quantum)
1630
- if not scan_data.is_compressed:
1631
- scan_data.quantum = self.compressor.compress(scan_data.quantum)
1632
- if scan_data.metadata:
1633
- scan_data.metadata = self.compressor.compress(scan_data.metadata)
1634
- if scan_data.logs:
1635
- scan_data.logs = self.compressor.compress(scan_data.logs)
1636
- self.log.debug("Writing quantum %s.", scan_data.quantum_id)
1637
- self._quantum_writer.write_bytes(scan_data.quantum_id, scan_data.quantum)
1638
- for dataset_id, dataset_data in outputs.items():
1639
- self._dataset_writer.write_bytes(dataset_id, dataset_data)
1640
- if scan_data.metadata:
1641
- (metadata_output,) = predicted_quantum.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME]
1642
- address = self._metadata_writer.write_bytes(scan_data.quantum_id, scan_data.metadata)
1643
- self._metadata_writer.addresses[metadata_output.dataset_id] = address
1644
- if scan_data.logs:
1645
- (log_output,) = predicted_quantum.outputs[acc.LOG_OUTPUT_CONNECTION_NAME]
1646
- address = self._log_writer.write_bytes(scan_data.quantum_id, scan_data.logs)
1647
- self._log_writer.addresses[log_output.dataset_id] = address
1648
-
1649
-
1650
- class ProvenanceQuantumScanStatus(enum.Enum):
1651
- """Status enum for quantum scanning.
1652
-
1653
- Note that this records the status for the *scanning* which is distinct
1654
- from the status of the quantum's execution.
1655
- """
1656
-
1657
- INCOMPLETE = enum.auto()
1658
- """The quantum is not necessarily done running, and cannot be scanned
1659
- conclusively yet.
1660
- """
1661
-
1662
- ABANDONED = enum.auto()
1663
- """The quantum's execution appears to have failed but we cannot rule out
1664
- the possibility that it could be recovered, but we've also waited long
1665
- enough (according to `ScannerTimeConfigDict.retry_timeout`) that it's time
1666
- to stop trying for now.
1667
-
1668
- This state means a later run with `ScannerConfig.assume_complete` is
1669
- required.
1670
- """
1671
-
1672
- SUCCESSFUL = enum.auto()
1673
- """The quantum was conclusively scanned and was executed successfully,
1674
- unblocking scans for downstream quanta.
1675
- """
1676
-
1677
- FAILED = enum.auto()
1678
- """The quantum was conclusively scanned and failed execution, blocking
1679
- scans for downstream quanta.
1680
- """
1681
-
1682
- BLOCKED = enum.auto()
1683
- """A quantum upstream of this one failed."""
1684
-
1685
- INIT = enum.auto()
1686
- """Init quanta need special handling, because they don't have logs and
1687
- metadata.
1688
- """
1689
-
1690
-
1691
- @dataclasses.dataclass
1692
- class ProvenanceQuantumScanModels:
1693
- """A struct that represents provenance information for a single quantum."""
1694
-
1695
- quantum_id: uuid.UUID
1696
- """Unique ID for the quantum."""
1697
-
1698
- status: ProvenanceQuantumScanStatus = ProvenanceQuantumScanStatus.INCOMPLETE
1699
- """Combined status for the scan and the execution of the quantum."""
1700
-
1701
- attempts: list[ProvenanceQuantumAttemptModel] = dataclasses.field(default_factory=list)
1702
- """Provenance information about each attempt to run the quantum."""
1703
-
1704
- output_existence: dict[uuid.UUID, bool] = dataclasses.field(default_factory=dict)
1705
- """Unique IDs of the output datasets mapped to whether they were actually
1706
- produced.
1707
- """
1708
-
1709
- metadata: ProvenanceTaskMetadataModel = dataclasses.field(default_factory=ProvenanceTaskMetadataModel)
1710
- """Task metadata information for each attempt.
1711
- """
1712
-
1713
- logs: ProvenanceLogRecordsModel = dataclasses.field(default_factory=ProvenanceLogRecordsModel)
1714
- """Log records for each attempt.
1715
- """
1716
-
1717
- @classmethod
1718
- def from_metadata_and_logs(
1719
- cls,
1720
- predicted: PredictedQuantumDatasetsModel,
1721
- metadata: TaskMetadata | None,
1722
- logs: ButlerLogRecords | None,
1723
- *,
1724
- assume_complete: bool = True,
1725
- ) -> ProvenanceQuantumScanModels:
1726
- """Construct provenance information from task metadata and logs.
1727
-
1728
- Parameters
1729
- ----------
1730
- predicted : `PredictedQuantumDatasetsModel`
1731
- Information about the predicted quantum.
1732
- metadata : `..TaskMetadata` or `None`
1733
- Task metadata.
1734
- logs : `lsst.daf.butler.logging.ButlerLogRecords` or `None`
1735
- Task logs.
1736
- assume_complete : `bool`, optional
1737
- If `False`, treat execution failures as possibly-incomplete quanta
1738
- and do not fully process them; instead just set the status to
1739
- `ProvenanceQuantumScanStatus.ABANDONED` and return.
1740
-
1741
- Returns
1742
- -------
1743
- scan_models : `ProvenanceQuantumScanModels`
1744
- Struct of models that describe quantum provenance.
1745
-
1746
- Notes
1747
- -----
1748
- This method does not necessarily fully populate the `output_existence`
1749
- field; it does what it can given the information in the metadata and
1750
- logs, but the caller is responsible for filling in the existence status
1751
- for any predicted outputs that are not present at all in that `dict`.
1752
- """
1753
- self = ProvenanceQuantumScanModels(predicted.quantum_id)
1754
- last_attempt = ProvenanceQuantumAttemptModel()
1755
- self._process_logs(predicted, logs, last_attempt, assume_complete=assume_complete)
1756
- self._process_metadata(predicted, metadata, last_attempt, assume_complete=assume_complete)
1757
- if self.status is ProvenanceQuantumScanStatus.ABANDONED:
1758
- return self
1759
- self._reconcile_attempts(last_attempt)
1760
- self._extract_output_existence(predicted)
1761
- return self
1762
-
1763
- def _process_logs(
1764
- self,
1765
- predicted: PredictedQuantumDatasetsModel,
1766
- logs: ButlerLogRecords | None,
1767
- last_attempt: ProvenanceQuantumAttemptModel,
1768
- *,
1769
- assume_complete: bool,
1770
- ) -> None:
1771
- (predicted_log_dataset,) = predicted.outputs[acc.LOG_OUTPUT_CONNECTION_NAME]
1772
- if logs is None:
1773
- self.output_existence[predicted_log_dataset.dataset_id] = False
1774
- if assume_complete:
1775
- self.status = ProvenanceQuantumScanStatus.FAILED
1776
- else:
1777
- self.status = ProvenanceQuantumScanStatus.ABANDONED
1778
- else:
1779
- # Set the attempt's run status to FAILED, since the default is
1780
- # UNKNOWN (i.e. logs *and* metadata are missing) and we now know
1781
- # the logs exist. This will usually get replaced by SUCCESSFUL
1782
- # when we look for metadata next.
1783
- last_attempt.status = QuantumAttemptStatus.FAILED
1784
- self.output_existence[predicted_log_dataset.dataset_id] = True
1785
- if logs.extra:
1786
- log_extra = _ExecutionLogRecordsExtra.model_validate(logs.extra)
1787
- self._extract_from_log_extra(log_extra, last_attempt=last_attempt)
1788
- self.logs.attempts.append(list(logs))
1789
-
1790
- def _extract_from_log_extra(
1791
- self,
1792
- log_extra: _ExecutionLogRecordsExtra,
1793
- last_attempt: ProvenanceQuantumAttemptModel | None,
1794
- ) -> None:
1795
- for previous_attempt_log_extra in log_extra.previous_attempts:
1796
- self._extract_from_log_extra(
1797
- previous_attempt_log_extra,
1798
- last_attempt=None,
1799
- )
1800
- quantum_attempt: ProvenanceQuantumAttemptModel
1801
- if last_attempt is None:
1802
- # This is not the last attempt, so it must be a failure.
1803
- quantum_attempt = ProvenanceQuantumAttemptModel(
1804
- attempt=len(self.attempts), status=QuantumAttemptStatus.FAILED
1805
- )
1806
- # We also need to get the logs from this extra provenance, since
1807
- # they won't be the main section of the log records.
1808
- self.logs.attempts.append(log_extra.logs)
1809
- # The special last attempt is only appended after we attempt to
1810
- # read metadata later, but we have to append this one now.
1811
- self.attempts.append(quantum_attempt)
1812
- else:
1813
- assert not log_extra.logs, "Logs for the last attempt should not be stored in the extra JSON."
1814
- quantum_attempt = last_attempt
1815
- if log_extra.exception is not None or log_extra.metadata is not None or last_attempt is None:
1816
- # We won't be getting a separate metadata dataset, so anything we
1817
- # might get from the metadata has to come from this extra
1818
- # provenance in the logs.
1819
- quantum_attempt.exception = log_extra.exception
1820
- if log_extra.metadata is not None:
1821
- quantum_attempt.resource_usage = QuantumResourceUsage.from_task_metadata(log_extra.metadata)
1822
- self.metadata.attempts.append(log_extra.metadata)
1823
- else:
1824
- self.metadata.attempts.append(None)
1825
- # Regardless of whether this is the last attempt or not, we can only
1826
- # get the previous_process_quanta from the log extra.
1827
- quantum_attempt.previous_process_quanta.extend(log_extra.previous_process_quanta)
1828
-
1829
- def _process_metadata(
1830
- self,
1831
- predicted: PredictedQuantumDatasetsModel,
1832
- metadata: TaskMetadata | None,
1833
- last_attempt: ProvenanceQuantumAttemptModel,
1834
- *,
1835
- assume_complete: bool,
1836
- ) -> None:
1837
- (predicted_metadata_dataset,) = predicted.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME]
1838
- if metadata is None:
1839
- self.output_existence[predicted_metadata_dataset.dataset_id] = False
1840
- if assume_complete:
1841
- self.status = ProvenanceQuantumScanStatus.FAILED
1842
- else:
1843
- self.status = ProvenanceQuantumScanStatus.ABANDONED
1844
- else:
1845
- self.status = ProvenanceQuantumScanStatus.SUCCESSFUL
1846
- self.output_existence[predicted_metadata_dataset.dataset_id] = True
1847
- last_attempt.status = QuantumAttemptStatus.SUCCESSFUL
1848
- try:
1849
- # Int conversion guards against spurious conversion to
1850
- # float that can apparently sometimes happen in
1851
- # TaskMetadata.
1852
- last_attempt.caveats = QuantumSuccessCaveats(int(metadata["quantum"]["caveats"]))
1853
- except LookupError:
1854
- pass
1855
- try:
1856
- last_attempt.exception = ExceptionInfo._from_metadata(
1857
- metadata[predicted.task_label]["failure"]
1858
- )
1859
- except LookupError:
1860
- pass
1861
- last_attempt.resource_usage = QuantumResourceUsage.from_task_metadata(metadata)
1862
- self.metadata.attempts.append(metadata)
1863
-
1864
- def _reconcile_attempts(self, last_attempt: ProvenanceQuantumAttemptModel) -> None:
1865
- last_attempt.attempt = len(self.attempts)
1866
- self.attempts.append(last_attempt)
1867
- assert self.status is not ProvenanceQuantumScanStatus.INCOMPLETE
1868
- assert self.status is not ProvenanceQuantumScanStatus.ABANDONED
1869
- if len(self.logs.attempts) < len(self.attempts):
1870
- # Logs were not found for this attempt; must have been a hard error
1871
- # that kept the `finally` block from running or otherwise
1872
- # interrupted the writing of the logs.
1873
- self.logs.attempts.append(None)
1874
- if self.status is ProvenanceQuantumScanStatus.SUCCESSFUL:
1875
- # But we found the metadata! Either that hard error happened
1876
- # at a very unlucky time (in between those two writes), or
1877
- # something even weirder happened.
1878
- self.attempts[-1].status = QuantumAttemptStatus.LOGS_MISSING
1879
- else:
1880
- self.attempts[-1].status = QuantumAttemptStatus.FAILED
1881
- if len(self.metadata.attempts) < len(self.attempts):
1882
- # Metadata missing usually just means a failure. In any case, the
1883
- # status will already be correct, either because it was set to a
1884
- # failure when we read the logs, or left at UNKNOWN if there were
1885
- # no logs. Note that scanners never process BLOCKED quanta at all.
1886
- self.metadata.attempts.append(None)
1887
- assert len(self.logs.attempts) == len(self.attempts) or len(self.metadata.attempts) == len(
1888
- self.attempts
1889
- ), (
1890
- "The only way we can add more than one quantum attempt is by "
1891
- "extracting info stored with the logs, and that always appends "
1892
- "a log attempt and a metadata attempt, so this must be a bug in "
1893
- "this class."
1894
- )
1895
-
1896
- def _extract_output_existence(self, predicted: PredictedQuantumDatasetsModel) -> None:
1897
- try:
1898
- outputs_put = self.metadata.attempts[-1]["quantum"].getArray("outputs") # type: ignore[index]
1899
- except (
1900
- IndexError, # metadata.attempts is empty
1901
- TypeError, # metadata.attempts[-1] is None
1902
- LookupError, # no 'quantum' entry in metadata or 'outputs' in that
1903
- ):
1904
- pass
1905
- else:
1906
- for id_str in ensure_iterable(outputs_put):
1907
- self.output_existence[uuid.UUID(id_str)] = True
1908
- # If the metadata told us what it wrote, anything not in that
1909
- # list was not written.
1910
- for predicted_output in itertools.chain.from_iterable(predicted.outputs.values()):
1911
- self.output_existence.setdefault(predicted_output.dataset_id, False)
1912
-
1913
- def to_scan_data(
1914
- self: ProvenanceQuantumScanModels,
1915
- predicted_quantum: PredictedQuantumDatasetsModel,
1916
- compressor: Compressor | None = None,
1917
- ) -> ProvenanceQuantumScanData:
1918
- """Convert these models to JSON data.
1919
-
1920
- Parameters
1921
- ----------
1922
- predicted_quantum : `PredictedQuantumDatasetsModel`
1923
- Information about the predicted quantum.
1924
- compressor : `Compressor`
1925
- Object that can compress bytes.
1926
-
1927
- Returns
1928
- -------
1929
- scan_data : `ProvenanceQuantumScanData`
1930
- Scan information ready for serialization.
1931
- """
1932
- quantum: ProvenanceInitQuantumModel | ProvenanceQuantumModel
1933
- if self.status is ProvenanceQuantumScanStatus.INIT:
1934
- quantum = ProvenanceInitQuantumModel.from_predicted(predicted_quantum)
1935
- else:
1936
- quantum = ProvenanceQuantumModel.from_predicted(predicted_quantum)
1937
- quantum.attempts = self.attempts
1938
- for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
1939
- if predicted_output.dataset_id not in self.output_existence:
1940
- raise RuntimeError(
1941
- "Logic bug in provenance gathering or execution invariants: "
1942
- f"no existence information for output {predicted_output.dataset_id} "
1943
- f"({predicted_output.dataset_type_name}@{predicted_output.data_coordinate})."
1944
- )
1945
- data = ProvenanceQuantumScanData(
1946
- self.quantum_id,
1947
- self.status,
1948
- existing_outputs={
1949
- dataset_id for dataset_id, was_produced in self.output_existence.items() if was_produced
1950
- },
1951
- quantum=quantum.model_dump_json().encode(),
1952
- logs=self.logs.model_dump_json().encode() if self.logs.attempts else b"",
1953
- metadata=self.metadata.model_dump_json().encode() if self.metadata.attempts else b"",
1954
- )
1955
- if compressor is not None:
1956
- data.compress(compressor)
1957
- return data
1958
-
1959
-
1960
- @dataclasses.dataclass
1961
- class ProvenanceQuantumScanData:
1962
- """A struct that represents ready-for-serialization provenance information
1963
- for a single quantum.
1964
- """
1965
-
1966
- quantum_id: uuid.UUID
1967
- """Unique ID for the quantum."""
1968
-
1969
- status: ProvenanceQuantumScanStatus
1970
- """Combined status for the scan and the execution of the quantum."""
1971
-
1972
- existing_outputs: set[uuid.UUID] = dataclasses.field(default_factory=set)
1973
- """Unique IDs of the output datasets that were actually written."""
1974
-
1975
- quantum: bytes = b""
1976
- """Serialized quantum provenance model.
1977
-
1978
- This may be empty for quanta that had no attempts.
1979
- """
1980
-
1981
- metadata: bytes = b""
1982
- """Serialized task metadata."""
1983
-
1984
- logs: bytes = b""
1985
- """Serialized logs."""
1986
-
1987
- is_compressed: bool = False
1988
- """Whether the `quantum`, `metadata`, and `log` attributes are
1989
- compressed.
1990
- """
1991
-
1992
- def compress(self, compressor: Compressor) -> None:
1993
- """Compress the data in this struct if it has not been compressed
1994
- already.
1995
-
1996
- Parameters
1997
- ----------
1998
- compressor : `Compressor`
1999
- Object with a ``compress`` method that takes and returns `bytes`.
2000
- """
2001
- if not self.is_compressed:
2002
- self.quantum = compressor.compress(self.quantum)
2003
- self.logs = compressor.compress(self.logs) if self.logs else b""
2004
- self.metadata = compressor.compress(self.metadata) if self.metadata else b""
2005
- self.is_compressed = True