lsst-pipe-base 30.0.0rc2__py3-none-any.whl → 30.0.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. lsst/pipe/base/_instrument.py +6 -5
  2. lsst/pipe/base/log_capture.py +39 -79
  3. lsst/pipe/base/log_on_close.py +79 -0
  4. lsst/pipe/base/mp_graph_executor.py +51 -15
  5. lsst/pipe/base/quantum_graph/_common.py +4 -3
  6. lsst/pipe/base/quantum_graph/_multiblock.py +6 -16
  7. lsst/pipe/base/quantum_graph/_predicted.py +106 -12
  8. lsst/pipe/base/quantum_graph/_provenance.py +657 -6
  9. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +18 -50
  10. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +35 -229
  11. lsst/pipe/base/quantum_graph/aggregator/_structs.py +3 -113
  12. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +10 -5
  13. lsst/pipe/base/quantum_graph/aggregator/_writer.py +31 -348
  14. lsst/pipe/base/quantum_graph/formatter.py +101 -0
  15. lsst/pipe/base/quantum_graph_builder.py +12 -1
  16. lsst/pipe/base/quantum_graph_executor.py +116 -13
  17. lsst/pipe/base/quantum_graph_skeleton.py +1 -7
  18. lsst/pipe/base/separable_pipeline_executor.py +18 -2
  19. lsst/pipe/base/single_quantum_executor.py +53 -35
  20. lsst/pipe/base/version.py +1 -1
  21. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.0rc3.dist-info}/METADATA +1 -1
  22. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.0rc3.dist-info}/RECORD +30 -28
  23. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.0rc3.dist-info}/WHEEL +0 -0
  24. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.0rc3.dist-info}/entry_points.txt +0 -0
  25. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.0rc3.dist-info}/licenses/COPYRIGHT +0 -0
  26. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.0rc3.dist-info}/licenses/LICENSE +0 -0
  27. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.0rc3.dist-info}/licenses/bsd_license.txt +0 -0
  28. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.0rc3.dist-info}/licenses/gpl-v3.0.txt +0 -0
  29. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.0rc3.dist-info}/top_level.txt +0 -0
  30. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.0rc3.dist-info}/zip-safe +0 -0
@@ -31,7 +31,6 @@ __all__ = ("Instrument",)
31
31
 
32
32
  import contextlib
33
33
  import datetime
34
- import os.path
35
34
  from abc import ABCMeta, abstractmethod
36
35
  from collections.abc import Sequence
37
36
  from typing import TYPE_CHECKING, Any, Self, cast, final
@@ -39,6 +38,7 @@ from typing import TYPE_CHECKING, Any, Self, cast, final
39
38
  from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter
40
39
  from lsst.daf.butler.registry import DataIdError
41
40
  from lsst.pex.config import Config, RegistryField
41
+ from lsst.resources import ResourcePath, ResourcePathExpression
42
42
  from lsst.utils import doImportType
43
43
  from lsst.utils.introspection import get_full_type_name
44
44
 
@@ -65,7 +65,7 @@ class Instrument(metaclass=ABCMeta):
65
65
  the base class.
66
66
  """
67
67
 
68
- configPaths: Sequence[str] = ()
68
+ configPaths: Sequence[ResourcePathExpression] = ()
69
69
  """Paths to config files to read for specific Tasks.
70
70
 
71
71
  The paths in this list should contain files of the form `task.py`, for
@@ -366,9 +366,10 @@ class Instrument(metaclass=ABCMeta):
366
366
  Config instance to which overrides should be applied.
367
367
  """
368
368
  for root in self.configPaths:
369
- path = os.path.join(root, f"{name}.py")
370
- if os.path.exists(path):
371
- config.load(path)
369
+ resource = ResourcePath(root, forceDirectory=True, forceAbsolute=True)
370
+ uri = resource.join(f"{name}.py", forceDirectory=False)
371
+ if uri.exists():
372
+ config.load(uri)
372
373
 
373
374
  @staticmethod
374
375
  def formatCollectionTimestamp(timestamp: str | datetime.datetime) -> str:
@@ -31,17 +31,15 @@ __all__ = ["LogCapture"]
31
31
 
32
32
  import dataclasses
33
33
  import logging
34
- import os
35
- import shutil
36
- import tempfile
37
34
  import uuid
38
35
  from collections.abc import Iterator
39
- from contextlib import contextmanager, suppress
36
+ from contextlib import contextmanager
40
37
  from logging import FileHandler
41
38
 
42
39
  import pydantic
43
40
 
44
- from lsst.daf.butler import Butler, FileDataset, LimitedButler, Quantum
41
+ from lsst.daf.butler import Butler, LimitedButler, Quantum
42
+ from lsst.daf.butler._rubin.temporary_for_ingest import TemporaryForIngest
45
43
  from lsst.daf.butler.logging import (
46
44
  ButlerLogRecord,
47
45
  ButlerLogRecordHandler,
@@ -165,7 +163,9 @@ class LogCapture:
165
163
  return cls(butler, butler)
166
164
 
167
165
  @contextmanager
168
- def capture_logging(self, task_node: TaskNode, /, quantum: Quantum) -> Iterator[_LogCaptureContext]:
166
+ def capture_logging(
167
+ self, task_node: TaskNode, /, quantum: Quantum, records: ButlerLogRecords | None = None
168
+ ) -> Iterator[_LogCaptureContext]:
169
169
  """Configure logging system to capture logs for execution of this task.
170
170
 
171
171
  Parameters
@@ -174,6 +174,9 @@ class LogCapture:
174
174
  The task definition.
175
175
  quantum : `~lsst.daf.butler.Quantum`
176
176
  Single Quantum instance.
177
+ records : `lsst.daf.butler.logging.ButlerLogRecords`, optional
178
+ Log record container to append to and save. If provided, streaming
179
+ mode is disabled (since we'll be saving logs in memory anyway).
177
180
 
178
181
  Notes
179
182
  -----
@@ -205,44 +208,40 @@ class LogCapture:
205
208
 
206
209
  # Add a handler to the root logger to capture execution log output.
207
210
  if log_dataset_name is not None:
211
+ try:
212
+ [ref] = quantum.outputs[log_dataset_name]
213
+ except LookupError as exc:
214
+ raise InvalidQuantumError(
215
+ f"Quantum outputs is missing log output dataset type {log_dataset_name};"
216
+ " this could happen due to inconsistent options between QuantumGraph generation"
217
+ " and execution"
218
+ ) from exc
208
219
  # Either accumulate into ButlerLogRecords or stream JSON records to
209
220
  # file and ingest that (ingest is possible only with full butler).
210
- if self.stream_json_logs and self.full_butler is not None:
211
- # Create the log file in a temporary directory rather than
212
- # creating a temporary file. This is necessary because
213
- # temporary files are created with restrictive permissions
214
- # and during file ingest these permissions persist in the
215
- # datastore. Using a temp directory allows us to create
216
- # a file with umask default permissions.
217
- tmpdir = tempfile.mkdtemp(prefix="butler-temp-logs-")
218
-
219
- # Construct a file to receive the log records and "touch" it.
220
- log_file = os.path.join(tmpdir, f"butler-log-{task_node.label}.json")
221
- with open(log_file, "w"):
222
- pass
223
- log_handler_file = FileHandler(log_file)
224
- log_handler_file.setFormatter(JsonLogFormatter())
225
- logging.getLogger().addHandler(log_handler_file)
226
-
227
- try:
228
- with ButlerMDC.set_mdc(mdc):
229
- yield ctx
230
- finally:
231
- # Ensure that the logs are stored in butler.
232
- logging.getLogger().removeHandler(log_handler_file)
233
- log_handler_file.close()
234
- if ctx.extra:
235
- with open(log_file, "a") as log_stream:
236
- ButlerLogRecords.write_streaming_extra(
237
- log_stream,
238
- ctx.extra.model_dump_json(exclude_unset=True, exclude_defaults=True),
239
- )
240
- if ctx.store:
241
- self._ingest_log_records(quantum, log_dataset_name, log_file)
242
- shutil.rmtree(tmpdir, ignore_errors=True)
221
+ if self.stream_json_logs and self.full_butler is not None and records is None:
222
+ with TemporaryForIngest(self.full_butler, ref) as temporary:
223
+ log_handler_file = FileHandler(temporary.ospath)
224
+ log_handler_file.setFormatter(JsonLogFormatter())
225
+ logging.getLogger().addHandler(log_handler_file)
226
+
227
+ try:
228
+ with ButlerMDC.set_mdc(mdc):
229
+ yield ctx
230
+ finally:
231
+ # Ensure that the logs are stored in butler.
232
+ logging.getLogger().removeHandler(log_handler_file)
233
+ log_handler_file.close()
234
+ if ctx.extra:
235
+ with open(temporary.ospath, "a") as log_stream:
236
+ ButlerLogRecords.write_streaming_extra(
237
+ log_stream,
238
+ ctx.extra.model_dump_json(exclude_unset=True, exclude_defaults=True),
239
+ )
240
+ if ctx.store:
241
+ temporary.ingest()
243
242
 
244
243
  else:
245
- log_handler_memory = ButlerLogRecordHandler()
244
+ log_handler_memory = ButlerLogRecordHandler(records)
246
245
  logging.getLogger().addHandler(log_handler_memory)
247
246
 
248
247
  try:
@@ -261,7 +260,6 @@ class LogCapture:
261
260
  logging.getLogger().removeHandler(log_handler_memory)
262
261
  if ctx.store:
263
262
  self._store_log_records(quantum, log_dataset_name, log_handler_memory)
264
- log_handler_memory.records.clear()
265
263
 
266
264
  else:
267
265
  with ButlerMDC.set_mdc(mdc):
@@ -281,41 +279,3 @@ class LogCapture:
281
279
  ) from exc
282
280
 
283
281
  self.butler.put(log_handler.records, ref)
284
-
285
- def _ingest_log_records(self, quantum: Quantum, dataset_type: str, filename: str) -> None:
286
- # If we are logging to an external file we must always try to
287
- # close it.
288
- assert self.full_butler is not None, "Expected to have full butler for ingest"
289
- ingested = False
290
- try:
291
- # DatasetRef has to be in the Quantum outputs, can lookup by name.
292
- try:
293
- [ref] = quantum.outputs[dataset_type]
294
- except LookupError as exc:
295
- raise InvalidQuantumError(
296
- f"Quantum outputs is missing log output dataset type {dataset_type};"
297
- " this could happen due to inconsistent options between QuantumGraph generation"
298
- " and execution"
299
- ) from exc
300
-
301
- # Need to ingest this file directly into butler.
302
- dataset = FileDataset(path=filename, refs=ref)
303
- try:
304
- self.full_butler.ingest(dataset, transfer="move")
305
- ingested = True
306
- except NotImplementedError:
307
- # Some datastores can't receive files (e.g. in-memory datastore
308
- # when testing), we store empty list for those just to have a
309
- # dataset. Alternative is to read the file as a
310
- # ButlerLogRecords object and put it.
311
- _LOG.info(
312
- "Log records could not be stored in this butler because the"
313
- " datastore can not ingest files, empty record list is stored instead."
314
- )
315
- records = ButlerLogRecords.from_records([])
316
- self.full_butler.put(records, ref)
317
- finally:
318
- # remove file if it is not ingested
319
- if not ingested:
320
- with suppress(OSError):
321
- os.remove(filename)
@@ -0,0 +1,79 @@
1
+ # This file is part of pipe_base.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ __all__ = ("LogOnClose",)
31
+
32
+ from collections.abc import Callable, Iterator
33
+ from contextlib import AbstractContextManager, contextmanager
34
+ from typing import TypeVar
35
+
36
+ from lsst.utils.logging import VERBOSE
37
+
38
+ _T = TypeVar("_T")
39
+
40
+
41
+ class LogOnClose:
42
+ """A factory for context manager wrappers that emit a log message when
43
+ they are closed.
44
+
45
+ Parameters
46
+ ----------
47
+ log_func : `~collections.abc.Callable` [ `int`, `str` ]
48
+ Callable that takes an integer log level and a string message and emits
49
+ a log message. Note that placeholder formatting is not supported.
50
+ """
51
+
52
+ def __init__(self, log_func: Callable[[int, str], None]):
53
+ self.log_func = log_func
54
+
55
+ def wrap(
56
+ self,
57
+ cm: AbstractContextManager[_T],
58
+ msg: str,
59
+ level: int = VERBOSE,
60
+ ) -> AbstractContextManager[_T]:
61
+ """Wrap a context manager to log when it is exited.
62
+
63
+ Parameters
64
+ ----------
65
+ cm : `contextlib.AbstractContextManager`
66
+ Context manager to wrap.
67
+ msg : `str`
68
+ Log message.
69
+ level : `int`, optional
70
+ Log level.
71
+ """
72
+
73
+ @contextmanager
74
+ def wrapper() -> Iterator[_T]:
75
+ with cm as result:
76
+ yield result
77
+ self.log_func(level, msg)
78
+
79
+ return wrapper()
@@ -39,20 +39,24 @@ import sys
39
39
  import threading
40
40
  import time
41
41
  import uuid
42
+ from contextlib import ExitStack
42
43
  from typing import Literal, cast
43
44
 
44
45
  import networkx
45
46
 
46
47
  from lsst.daf.butler import DataCoordinate, Quantum
47
48
  from lsst.daf.butler.cli.cliLog import CliLog
49
+ from lsst.daf.butler.logging import ButlerLogRecords
48
50
  from lsst.utils.threads import disable_implicit_threading
49
51
 
50
52
  from ._status import InvalidQuantumError, RepeatableQuantumError
53
+ from ._task_metadata import TaskMetadata
51
54
  from .execution_graph_fixup import ExecutionGraphFixup
52
55
  from .graph import QuantumGraph
53
56
  from .graph_walker import GraphWalker
57
+ from .log_on_close import LogOnClose
54
58
  from .pipeline_graph import TaskNode
55
- from .quantum_graph import PredictedQuantumGraph, PredictedQuantumInfo
59
+ from .quantum_graph import PredictedQuantumGraph, PredictedQuantumInfo, ProvenanceQuantumGraphWriter
56
60
  from .quantum_graph_executor import QuantumExecutor, QuantumGraphExecutor
57
61
  from .quantum_reports import ExecutionStatus, QuantumReport, Report
58
62
 
@@ -515,7 +519,9 @@ class MPGraphExecutor(QuantumGraphExecutor):
515
519
  start_method = "spawn"
516
520
  self._start_method = start_method
517
521
 
518
- def execute(self, graph: QuantumGraph | PredictedQuantumGraph) -> None:
522
+ def execute(
523
+ self, graph: QuantumGraph | PredictedQuantumGraph, *, provenance_graph_file: str | None = None
524
+ ) -> None:
519
525
  # Docstring inherited from QuantumGraphExecutor.execute
520
526
  old_graph: QuantumGraph | None = None
521
527
  if isinstance(graph, QuantumGraph):
@@ -525,14 +531,31 @@ class MPGraphExecutor(QuantumGraphExecutor):
525
531
  new_graph = graph
526
532
  xgraph = self._make_xgraph(new_graph, old_graph)
527
533
  self._report = Report(qgraphSummary=new_graph._make_summary())
528
- try:
529
- if self._num_proc > 1:
530
- self._execute_quanta_mp(xgraph, self._report)
531
- else:
532
- self._execute_quanta_in_process(xgraph, self._report)
533
- except Exception as exc:
534
- self._report.set_exception(exc)
535
- raise
534
+ with ExitStack() as exit_stack:
535
+ provenance_writer: ProvenanceQuantumGraphWriter | None = None
536
+ if provenance_graph_file is not None:
537
+ if provenance_graph_file is not None and self._num_proc > 1:
538
+ raise NotImplementedError(
539
+ "Provenance writing is not implemented for multiprocess execution."
540
+ )
541
+ provenance_writer = ProvenanceQuantumGraphWriter(
542
+ provenance_graph_file,
543
+ exit_stack=exit_stack,
544
+ log_on_close=LogOnClose(_LOG.log),
545
+ predicted=new_graph,
546
+ )
547
+ try:
548
+ if self._num_proc > 1:
549
+ self._execute_quanta_mp(xgraph, self._report)
550
+ else:
551
+ self._execute_quanta_in_process(xgraph, self._report, provenance_writer)
552
+ except Exception as exc:
553
+ self._report.set_exception(exc)
554
+ raise
555
+ if provenance_writer is not None:
556
+ provenance_writer.write_overall_inputs()
557
+ provenance_writer.write_packages()
558
+ provenance_writer.write_init_outputs(assume_existence=True)
536
559
 
537
560
  def _make_xgraph(
538
561
  self, new_graph: PredictedQuantumGraph, old_graph: QuantumGraph | None
@@ -576,7 +599,9 @@ class MPGraphExecutor(QuantumGraphExecutor):
576
599
  raise MPGraphExecutorError("Updated execution graph has dependency cycle.")
577
600
  return xgraph
578
601
 
579
- def _execute_quanta_in_process(self, xgraph: networkx.DiGraph, report: Report) -> None:
602
+ def _execute_quanta_in_process(
603
+ self, xgraph: networkx.DiGraph, report: Report, provenance_writer: ProvenanceQuantumGraphWriter | None
604
+ ) -> None:
580
605
  """Execute all Quanta in current process.
581
606
 
582
607
  Parameters
@@ -589,6 +614,9 @@ class MPGraphExecutor(QuantumGraphExecutor):
589
614
  `.quantum_graph.PredictedQuantumGraph.quantum_only_xgraph`.
590
615
  report : `Report`
591
616
  Object for reporting execution status.
617
+ provenance_writer : `.quantum_graph.ProvenanceQuantumGraphWriter` or \
618
+ `None`
619
+ Object for recording provenance.
592
620
  """
593
621
 
594
622
  def tiebreaker_sort_key(quantum_id: uuid.UUID) -> tuple:
@@ -606,16 +634,19 @@ class MPGraphExecutor(QuantumGraphExecutor):
606
634
 
607
635
  _LOG.debug("Executing %s (%s@%s)", quantum_id, task_node.label, data_id)
608
636
  fail_exit_code: int | None = None
637
+ task_metadata: TaskMetadata | None = None
638
+ task_logs = ButlerLogRecords([])
609
639
  try:
610
640
  # For some exception types we want to exit immediately with
611
641
  # exception-specific exit code, but we still want to start
612
642
  # debugger before exiting if debugging is enabled.
613
643
  try:
614
- _, quantum_report = self._quantum_executor.execute(
615
- task_node, quantum, quantum_id=quantum_id
644
+ execution_result = self._quantum_executor.execute(
645
+ task_node, quantum, quantum_id=quantum_id, log_records=task_logs
616
646
  )
617
- if quantum_report:
618
- report.quantaReports.append(quantum_report)
647
+ if execution_result.report:
648
+ report.quantaReports.append(execution_result.report)
649
+ task_metadata = execution_result.task_metadata
619
650
  success_count += 1
620
651
  walker.finish(quantum_id)
621
652
  except RepeatableQuantumError as exc:
@@ -701,6 +732,11 @@ class MPGraphExecutor(QuantumGraphExecutor):
701
732
  )
702
733
  failed_count += 1
703
734
 
735
+ if provenance_writer is not None:
736
+ provenance_writer.write_quantum_provenance(
737
+ quantum_id, metadata=task_metadata, logs=task_logs
738
+ )
739
+
704
740
  _LOG.info(
705
741
  "Executed %d quanta successfully, %d failed and %d remain out of total %d quanta.",
706
742
  success_count,
@@ -448,14 +448,15 @@ class BaseQuantumGraphWriter:
448
448
  uri: ResourcePathExpression,
449
449
  header: HeaderModel,
450
450
  pipeline_graph: PipelineGraph,
451
- indices: dict[uuid.UUID, int],
452
451
  *,
453
452
  address_filename: str,
454
- compressor: Compressor,
455
453
  cdict_data: bytes | None = None,
454
+ zstd_level: int = 10,
456
455
  ) -> Iterator[Self]:
457
456
  uri = ResourcePath(uri)
458
- address_writer = AddressWriter(indices)
457
+ address_writer = AddressWriter()
458
+ cdict = zstandard.ZstdCompressionDict(cdict_data) if cdict_data is not None else None
459
+ compressor = zstandard.ZstdCompressor(level=zstd_level, dict_data=cdict)
459
460
  with uri.open(mode="wb") as stream:
460
461
  with zipfile.ZipFile(stream, mode="w", compression=zipfile.ZIP_STORED) as zf:
461
462
  self = cls(zf, compressor, address_writer, header.int_size)
@@ -205,13 +205,6 @@ class AddressRow:
205
205
  class AddressWriter:
206
206
  """A helper object for writing address files for multi-block files."""
207
207
 
208
- indices: dict[uuid.UUID, int] = dataclasses.field(default_factory=dict)
209
- """Mapping from UUID to internal integer ID.
210
-
211
- The internal integer ID must always correspond to the index into the
212
- sorted list of all UUIDs, but this `dict` need not be sorted itself.
213
- """
214
-
215
208
  addresses: list[dict[uuid.UUID, Address]] = dataclasses.field(default_factory=list)
216
209
  """Addresses to store with each UUID.
217
210
 
@@ -229,18 +222,15 @@ class AddressWriter:
229
222
  int_size : `int`
230
223
  Number of bytes to use for all integers.
231
224
  """
232
- for n, address_map in enumerate(self.addresses):
233
- if not self.indices.keys() >= address_map.keys():
234
- raise AssertionError(
235
- f"Logic bug in quantum graph I/O: address map {n} of {len(self.addresses)} has IDs "
236
- f"{address_map.keys() - self.indices.keys()} not in the index map."
237
- )
225
+ indices: set[uuid.UUID] = set()
226
+ for address_map in self.addresses:
227
+ indices.update(address_map.keys())
238
228
  stream.write(int_size.to_bytes(1))
239
- stream.write(len(self.indices).to_bytes(int_size))
229
+ stream.write(len(indices).to_bytes(int_size))
240
230
  stream.write(len(self.addresses).to_bytes(int_size))
241
231
  empty_address = Address()
242
- for key in sorted(self.indices.keys(), key=attrgetter("int")):
243
- row = AddressRow(key, self.indices[key], [m.get(key, empty_address) for m in self.addresses])
232
+ for n, key in enumerate(sorted(indices, key=attrgetter("int"))):
233
+ row = AddressRow(key, n, [m.get(key, empty_address) for m in self.addresses])
244
234
  _LOG.debug("Wrote address %s.", row)
245
235
  row.write(stream, int_size)
246
236
 
@@ -66,6 +66,7 @@ from lsst.daf.butler import (
66
66
  DimensionDataExtractor,
67
67
  DimensionGroup,
68
68
  DimensionRecordSetDeserializer,
69
+ DimensionUniverse,
69
70
  LimitedButler,
70
71
  Quantum,
71
72
  QuantumBackedButler,
@@ -198,10 +199,10 @@ class _PredictedThinGraphModelV0(pydantic.BaseModel):
198
199
  def _upgraded(self, address_rows: Mapping[uuid.UUID, AddressRow]) -> PredictedThinGraphModel:
199
200
  """Convert to the v1+ model."""
200
201
  uuid_by_index = {v.index: k for k, v in address_rows.items()}
201
- return PredictedThinGraphModel(
202
+ return PredictedThinGraphModel.model_construct(
202
203
  quanta={
203
204
  task_label: [
204
- PredictedThinQuantumModel(
205
+ PredictedThinQuantumModel.model_construct(
205
206
  quantum_id=uuid_by_index[q.quantum_index], data_coordinate=q.data_coordinate
206
207
  )
207
208
  for q in quanta
@@ -877,6 +878,49 @@ class PredictedQuantumGraph(BaseQuantumGraph):
877
878
  page_size=page_size,
878
879
  ).assemble()
879
880
 
881
+ @classmethod
882
+ def make_empty(
883
+ cls,
884
+ universe: DimensionUniverse,
885
+ *,
886
+ output_run: str,
887
+ inputs: Iterable[str] = (),
888
+ output: str | None = None,
889
+ add_packages: bool = True,
890
+ ) -> PredictedQuantumGraph:
891
+ """Make an empty quantum graph with no tasks.
892
+
893
+ Parameters
894
+ ----------
895
+ universe : `lsst.daf.butler.DimensionUniverse`
896
+ Definitions for all butler dimensions.
897
+ output_run : `str`
898
+ Output run collection.
899
+ inputs : `~collections.abc.Iterable` [`str`], optional
900
+ Iterable of input collection names.
901
+ output : `str` or `None`, optional
902
+ Output chained collection.
903
+ add_packages : `bool`, optional
904
+ Whether to add the special init quantum that writes the 'packages'
905
+ dataset. The default (`True`) is consistent with
906
+ `~..quantum_graph_builder.QuantumGraphBuilder` behavior when there
907
+ are no regular quanta generated.
908
+
909
+ Returns
910
+ -------
911
+ quantum_graph : `PredictedQuantumGraph`
912
+ An empty quantum graph.
913
+ """
914
+ return cls(
915
+ PredictedQuantumGraphComponents.make_empty(
916
+ universe,
917
+ output_run=output_run,
918
+ inputs=inputs,
919
+ output=output,
920
+ add_packages=add_packages,
921
+ )
922
+ )
923
+
880
924
  @property
881
925
  def quanta_by_task(self) -> Mapping[str, Mapping[DataCoordinate, uuid.UUID]]:
882
926
  """A nested mapping of all quanta, keyed first by task name and then by
@@ -1541,6 +1585,63 @@ class PredictedQuantumGraphComponents:
1541
1585
  This does not include special "init" quanta.
1542
1586
  """
1543
1587
 
1588
+ @classmethod
1589
+ def make_empty(
1590
+ cls,
1591
+ universe: DimensionUniverse,
1592
+ *,
1593
+ output_run: str,
1594
+ inputs: Iterable[str] = (),
1595
+ output: str | None = None,
1596
+ add_packages: bool = True,
1597
+ ) -> PredictedQuantumGraphComponents:
1598
+ """Make components for an empty quantum graph with no tasks.
1599
+
1600
+ Parameters
1601
+ ----------
1602
+ universe : `lsst.daf.butler.DimensionUniverse`
1603
+ Definitions for all butler dimensions.
1604
+ output_run : `str`
1605
+ Output run collection.
1606
+ inputs : `~collections.abc.Iterable` [`str`], optional
1607
+ Iterable of input collection names.
1608
+ output : `str` or `None`, optional
1609
+ Output chained collection.
1610
+ add_packages : `bool`, optional
1611
+ Whether to add the special init quantum that writes the 'packages'
1612
+ dataset. The default (`True`) is consistent with
1613
+ `~..quantum_graph_builder.QuantumGraphBuilder` behavior when there
1614
+ are no regular quanta generated.
1615
+
1616
+ Returns
1617
+ -------
1618
+ components : `PredictedQuantumGraphComponents`
1619
+ Components that can be used to build or write an empty quantum
1620
+ graph.
1621
+ """
1622
+ components = cls(pipeline_graph=PipelineGraph(universe=universe))
1623
+ components.header.inputs = list(inputs)
1624
+ components.header.output_run = output_run
1625
+ components.header.output = output
1626
+ if add_packages:
1627
+ components.init_quanta.root = [
1628
+ PredictedQuantumDatasetsModel.model_construct(
1629
+ quantum_id=generate_uuidv7(),
1630
+ task_label="",
1631
+ outputs={
1632
+ acc.PACKAGES_INIT_OUTPUT_NAME: [
1633
+ PredictedDatasetModel(
1634
+ dataset_id=generate_uuidv7(),
1635
+ dataset_type_name=acc.PACKAGES_INIT_OUTPUT_NAME,
1636
+ data_coordinate=[],
1637
+ run=output_run,
1638
+ )
1639
+ ]
1640
+ },
1641
+ )
1642
+ ]
1643
+ return components
1644
+
1544
1645
  def make_dataset_ref(self, predicted: PredictedDatasetModel) -> DatasetRef:
1545
1646
  """Make a `lsst.daf.butler.DatasetRef` from information in the
1546
1647
  predicted quantum graph.
@@ -1793,7 +1894,6 @@ class PredictedQuantumGraphComponents:
1793
1894
  f"Unsupported extension {ext!r} for quantum graph; "
1794
1895
  "expected '.qg' (or '.qgraph' to force the old format)."
1795
1896
  )
1796
- cdict: zstandard.ZstdCompressionDict | None = None
1797
1897
  cdict_data: bytes | None = None
1798
1898
  quantum_datasets_json: dict[uuid.UUID, bytes] = {}
1799
1899
  if len(self.quantum_datasets) < zstd_dict_n_inputs:
@@ -1807,26 +1907,20 @@ class PredictedQuantumGraphComponents:
1807
1907
  for quantum_model in itertools.islice(self.quantum_datasets.values(), zstd_dict_n_inputs)
1808
1908
  }
1809
1909
  try:
1810
- cdict = zstandard.train_dictionary(
1910
+ cdict_data = zstandard.train_dictionary(
1811
1911
  zstd_dict_size,
1812
1912
  list(quantum_datasets_json.values()),
1813
1913
  level=zstd_level,
1814
- )
1914
+ ).as_bytes()
1815
1915
  except zstandard.ZstdError as err:
1816
1916
  warnings.warn(f"Not using a compression dictionary: {err}.")
1817
- cdict = None
1818
- else:
1819
- cdict_data = cdict.as_bytes()
1820
- compressor = zstandard.ZstdCompressor(level=zstd_level, dict_data=cdict)
1821
- indices = {quantum_id: n for n, quantum_id in enumerate(sorted(self.quantum_datasets.keys()))}
1822
1917
  with BaseQuantumGraphWriter.open(
1823
1918
  uri,
1824
1919
  header=self.header,
1825
1920
  pipeline_graph=self.pipeline_graph,
1826
- indices=indices,
1827
1921
  address_filename="quanta",
1828
- compressor=compressor,
1829
1922
  cdict_data=cdict_data,
1923
+ zstd_level=zstd_level,
1830
1924
  ) as writer:
1831
1925
  writer.write_single_model("thin_graph", self.thin_graph)
1832
1926
  if self.dimension_data is None: