lsst-pipe-base 30.2026.200__py3-none-any.whl → 30.2026.400__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_instrument.py +10 -12
- lsst/pipe/base/_status.py +29 -10
- lsst/pipe/base/automatic_connection_constants.py +9 -1
- lsst/pipe/base/cli/cmd/__init__.py +16 -2
- lsst/pipe/base/cli/cmd/commands.py +42 -4
- lsst/pipe/base/connectionTypes.py +72 -160
- lsst/pipe/base/connections.py +3 -6
- lsst/pipe/base/execution_reports.py +0 -5
- lsst/pipe/base/log_capture.py +8 -4
- lsst/pipe/base/log_on_close.py +79 -0
- lsst/pipe/base/mp_graph_executor.py +51 -15
- lsst/pipe/base/pipeline.py +3 -4
- lsst/pipe/base/pipelineIR.py +0 -6
- lsst/pipe/base/pipelineTask.py +5 -7
- lsst/pipe/base/pipeline_graph/_edges.py +19 -7
- lsst/pipe/base/pipeline_graph/_pipeline_graph.py +8 -0
- lsst/pipe/base/quantum_graph/_common.py +7 -4
- lsst/pipe/base/quantum_graph/_multiblock.py +6 -16
- lsst/pipe/base/quantum_graph/_predicted.py +111 -10
- lsst/pipe/base/quantum_graph/_provenance.py +727 -26
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +26 -50
- lsst/pipe/base/quantum_graph/aggregator/_config.py +78 -9
- lsst/pipe/base/quantum_graph/aggregator/_ingester.py +12 -11
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +48 -234
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +6 -116
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +24 -18
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +33 -350
- lsst/pipe/base/quantum_graph/formatter.py +171 -0
- lsst/pipe/base/quantum_graph/ingest_graph.py +356 -0
- lsst/pipe/base/quantum_graph_executor.py +116 -13
- lsst/pipe/base/quantum_provenance_graph.py +17 -2
- lsst/pipe/base/separable_pipeline_executor.py +18 -2
- lsst/pipe/base/single_quantum_executor.py +59 -41
- lsst/pipe/base/struct.py +4 -0
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/METADATA +2 -1
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/RECORD +45 -42
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/WHEEL +1 -1
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# This file is part of pipe_base.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (http://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# This software is dual licensed under the GNU General Public License and also
|
|
10
|
+
# under a 3-clause BSD license. Recipients may choose which of these licenses
|
|
11
|
+
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
|
|
12
|
+
# respectively. If you choose the GPL option then the following text applies
|
|
13
|
+
# (but note that there is still no warranty even if you opt for BSD instead):
|
|
14
|
+
#
|
|
15
|
+
# This program is free software: you can redistribute it and/or modify
|
|
16
|
+
# it under the terms of the GNU General Public License as published by
|
|
17
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
18
|
+
# (at your option) any later version.
|
|
19
|
+
#
|
|
20
|
+
# This program is distributed in the hope that it will be useful,
|
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
23
|
+
# GNU General Public License for more details.
|
|
24
|
+
#
|
|
25
|
+
# You should have received a copy of the GNU General Public License
|
|
26
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
__all__ = ("LogOnClose",)
|
|
31
|
+
|
|
32
|
+
from collections.abc import Callable, Iterator
|
|
33
|
+
from contextlib import AbstractContextManager, contextmanager
|
|
34
|
+
from typing import TypeVar
|
|
35
|
+
|
|
36
|
+
from lsst.utils.logging import VERBOSE
|
|
37
|
+
|
|
38
|
+
_T = TypeVar("_T")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class LogOnClose:
|
|
42
|
+
"""A factory for context manager wrappers that emit a log message when
|
|
43
|
+
they are closed.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
log_func : `~collections.abc.Callable` [ `int`, `str` ]
|
|
48
|
+
Callable that takes an integer log level and a string message and emits
|
|
49
|
+
a log message. Note that placeholder formatting is not supported.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, log_func: Callable[[int, str], None]):
|
|
53
|
+
self.log_func = log_func
|
|
54
|
+
|
|
55
|
+
def wrap(
|
|
56
|
+
self,
|
|
57
|
+
cm: AbstractContextManager[_T],
|
|
58
|
+
msg: str,
|
|
59
|
+
level: int = VERBOSE,
|
|
60
|
+
) -> AbstractContextManager[_T]:
|
|
61
|
+
"""Wrap a context manager to log when it is exited.
|
|
62
|
+
|
|
63
|
+
Parameters
|
|
64
|
+
----------
|
|
65
|
+
cm : `contextlib.AbstractContextManager`
|
|
66
|
+
Context manager to wrap.
|
|
67
|
+
msg : `str`
|
|
68
|
+
Log message.
|
|
69
|
+
level : `int`, optional
|
|
70
|
+
Log level.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
@contextmanager
|
|
74
|
+
def wrapper() -> Iterator[_T]:
|
|
75
|
+
with cm as result:
|
|
76
|
+
yield result
|
|
77
|
+
self.log_func(level, msg)
|
|
78
|
+
|
|
79
|
+
return wrapper()
|
|
@@ -39,20 +39,24 @@ import sys
|
|
|
39
39
|
import threading
|
|
40
40
|
import time
|
|
41
41
|
import uuid
|
|
42
|
+
from contextlib import ExitStack
|
|
42
43
|
from typing import Literal, cast
|
|
43
44
|
|
|
44
45
|
import networkx
|
|
45
46
|
|
|
46
47
|
from lsst.daf.butler import DataCoordinate, Quantum
|
|
47
48
|
from lsst.daf.butler.cli.cliLog import CliLog
|
|
49
|
+
from lsst.daf.butler.logging import ButlerLogRecords
|
|
48
50
|
from lsst.utils.threads import disable_implicit_threading
|
|
49
51
|
|
|
50
52
|
from ._status import InvalidQuantumError, RepeatableQuantumError
|
|
53
|
+
from ._task_metadata import TaskMetadata
|
|
51
54
|
from .execution_graph_fixup import ExecutionGraphFixup
|
|
52
55
|
from .graph import QuantumGraph
|
|
53
56
|
from .graph_walker import GraphWalker
|
|
57
|
+
from .log_on_close import LogOnClose
|
|
54
58
|
from .pipeline_graph import TaskNode
|
|
55
|
-
from .quantum_graph import PredictedQuantumGraph, PredictedQuantumInfo
|
|
59
|
+
from .quantum_graph import PredictedQuantumGraph, PredictedQuantumInfo, ProvenanceQuantumGraphWriter
|
|
56
60
|
from .quantum_graph_executor import QuantumExecutor, QuantumGraphExecutor
|
|
57
61
|
from .quantum_reports import ExecutionStatus, QuantumReport, Report
|
|
58
62
|
|
|
@@ -515,7 +519,9 @@ class MPGraphExecutor(QuantumGraphExecutor):
|
|
|
515
519
|
start_method = "spawn"
|
|
516
520
|
self._start_method = start_method
|
|
517
521
|
|
|
518
|
-
def execute(
|
|
522
|
+
def execute(
|
|
523
|
+
self, graph: QuantumGraph | PredictedQuantumGraph, *, provenance_graph_file: str | None = None
|
|
524
|
+
) -> None:
|
|
519
525
|
# Docstring inherited from QuantumGraphExecutor.execute
|
|
520
526
|
old_graph: QuantumGraph | None = None
|
|
521
527
|
if isinstance(graph, QuantumGraph):
|
|
@@ -525,14 +531,31 @@ class MPGraphExecutor(QuantumGraphExecutor):
|
|
|
525
531
|
new_graph = graph
|
|
526
532
|
xgraph = self._make_xgraph(new_graph, old_graph)
|
|
527
533
|
self._report = Report(qgraphSummary=new_graph._make_summary())
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
534
|
+
with ExitStack() as exit_stack:
|
|
535
|
+
provenance_writer: ProvenanceQuantumGraphWriter | None = None
|
|
536
|
+
if provenance_graph_file is not None:
|
|
537
|
+
if provenance_graph_file is not None and self._num_proc > 1:
|
|
538
|
+
raise NotImplementedError(
|
|
539
|
+
"Provenance writing is not implemented for multiprocess execution."
|
|
540
|
+
)
|
|
541
|
+
provenance_writer = ProvenanceQuantumGraphWriter(
|
|
542
|
+
provenance_graph_file,
|
|
543
|
+
exit_stack=exit_stack,
|
|
544
|
+
log_on_close=LogOnClose(_LOG.log),
|
|
545
|
+
predicted=new_graph,
|
|
546
|
+
)
|
|
547
|
+
try:
|
|
548
|
+
if self._num_proc > 1:
|
|
549
|
+
self._execute_quanta_mp(xgraph, self._report)
|
|
550
|
+
else:
|
|
551
|
+
self._execute_quanta_in_process(xgraph, self._report, provenance_writer)
|
|
552
|
+
except Exception as exc:
|
|
553
|
+
self._report.set_exception(exc)
|
|
554
|
+
raise
|
|
555
|
+
if provenance_writer is not None:
|
|
556
|
+
provenance_writer.write_overall_inputs()
|
|
557
|
+
provenance_writer.write_packages()
|
|
558
|
+
provenance_writer.write_init_outputs(assume_existence=True)
|
|
536
559
|
|
|
537
560
|
def _make_xgraph(
|
|
538
561
|
self, new_graph: PredictedQuantumGraph, old_graph: QuantumGraph | None
|
|
@@ -576,7 +599,9 @@ class MPGraphExecutor(QuantumGraphExecutor):
|
|
|
576
599
|
raise MPGraphExecutorError("Updated execution graph has dependency cycle.")
|
|
577
600
|
return xgraph
|
|
578
601
|
|
|
579
|
-
def _execute_quanta_in_process(
|
|
602
|
+
def _execute_quanta_in_process(
|
|
603
|
+
self, xgraph: networkx.DiGraph, report: Report, provenance_writer: ProvenanceQuantumGraphWriter | None
|
|
604
|
+
) -> None:
|
|
580
605
|
"""Execute all Quanta in current process.
|
|
581
606
|
|
|
582
607
|
Parameters
|
|
@@ -589,6 +614,9 @@ class MPGraphExecutor(QuantumGraphExecutor):
|
|
|
589
614
|
`.quantum_graph.PredictedQuantumGraph.quantum_only_xgraph`.
|
|
590
615
|
report : `Report`
|
|
591
616
|
Object for reporting execution status.
|
|
617
|
+
provenance_writer : `.quantum_graph.ProvenanceQuantumGraphWriter` or \
|
|
618
|
+
`None`
|
|
619
|
+
Object for recording provenance.
|
|
592
620
|
"""
|
|
593
621
|
|
|
594
622
|
def tiebreaker_sort_key(quantum_id: uuid.UUID) -> tuple:
|
|
@@ -606,16 +634,19 @@ class MPGraphExecutor(QuantumGraphExecutor):
|
|
|
606
634
|
|
|
607
635
|
_LOG.debug("Executing %s (%s@%s)", quantum_id, task_node.label, data_id)
|
|
608
636
|
fail_exit_code: int | None = None
|
|
637
|
+
task_metadata: TaskMetadata | None = None
|
|
638
|
+
task_logs = ButlerLogRecords([])
|
|
609
639
|
try:
|
|
610
640
|
# For some exception types we want to exit immediately with
|
|
611
641
|
# exception-specific exit code, but we still want to start
|
|
612
642
|
# debugger before exiting if debugging is enabled.
|
|
613
643
|
try:
|
|
614
|
-
|
|
615
|
-
task_node, quantum, quantum_id=quantum_id
|
|
644
|
+
execution_result = self._quantum_executor.execute(
|
|
645
|
+
task_node, quantum, quantum_id=quantum_id, log_records=task_logs
|
|
616
646
|
)
|
|
617
|
-
if
|
|
618
|
-
report.quantaReports.append(
|
|
647
|
+
if execution_result.report:
|
|
648
|
+
report.quantaReports.append(execution_result.report)
|
|
649
|
+
task_metadata = execution_result.task_metadata
|
|
619
650
|
success_count += 1
|
|
620
651
|
walker.finish(quantum_id)
|
|
621
652
|
except RepeatableQuantumError as exc:
|
|
@@ -701,6 +732,11 @@ class MPGraphExecutor(QuantumGraphExecutor):
|
|
|
701
732
|
)
|
|
702
733
|
failed_count += 1
|
|
703
734
|
|
|
735
|
+
if provenance_writer is not None:
|
|
736
|
+
provenance_writer.write_quantum_provenance(
|
|
737
|
+
quantum_id, metadata=task_metadata, logs=task_logs
|
|
738
|
+
)
|
|
739
|
+
|
|
704
740
|
_LOG.info(
|
|
705
741
|
"Executed %d quanta successfully, %d failed and %d remain out of total %d quanta.",
|
|
706
742
|
success_count,
|
lsst/pipe/base/pipeline.py
CHANGED
|
@@ -54,13 +54,12 @@ from lsst.utils.introspection import get_full_type_name
|
|
|
54
54
|
|
|
55
55
|
from . import automatic_connection_constants as acc
|
|
56
56
|
from . import pipeline_graph, pipelineIR
|
|
57
|
-
from ._instrument import Instrument as
|
|
57
|
+
from ._instrument import Instrument as Instrument
|
|
58
58
|
from .config import PipelineTaskConfig
|
|
59
59
|
from .connections import PipelineTaskConnections
|
|
60
60
|
from .pipelineTask import PipelineTask
|
|
61
61
|
|
|
62
62
|
if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
|
|
63
|
-
from lsst.obs.base import Instrument
|
|
64
63
|
from lsst.pex.config import Config
|
|
65
64
|
|
|
66
65
|
# ----------------------------------
|
|
@@ -702,7 +701,7 @@ class Pipeline:
|
|
|
702
701
|
"""
|
|
703
702
|
instrument_class_name = self._pipelineIR.instrument
|
|
704
703
|
if instrument_class_name is not None:
|
|
705
|
-
instrument_class = cast(
|
|
704
|
+
instrument_class = cast(Instrument, doImportType(instrument_class_name))
|
|
706
705
|
if instrument_class is not None:
|
|
707
706
|
return DataCoordinate.standardize(instrument=instrument_class.getName(), universe=universe)
|
|
708
707
|
return DataCoordinate.make_empty(universe)
|
|
@@ -893,7 +892,7 @@ class Pipeline:
|
|
|
893
892
|
raise NameError(f"Label {label} does not appear in this pipeline")
|
|
894
893
|
taskClass: type[PipelineTask] = doImportType(taskIR.klass)
|
|
895
894
|
config = taskClass.ConfigClass()
|
|
896
|
-
instrument:
|
|
895
|
+
instrument: Instrument | None = None
|
|
897
896
|
if (instrumentName := self._pipelineIR.instrument) is not None:
|
|
898
897
|
instrument_cls: type = doImportType(instrumentName)
|
|
899
898
|
instrument = instrument_cls()
|
lsst/pipe/base/pipelineIR.py
CHANGED
|
@@ -220,12 +220,6 @@ class LabeledSubset:
|
|
|
220
220
|
class ParametersIR:
|
|
221
221
|
"""Intermediate representation of parameters that are global to a pipeline.
|
|
222
222
|
|
|
223
|
-
Attributes
|
|
224
|
-
----------
|
|
225
|
-
mapping : `dict` [`str`, `str`]
|
|
226
|
-
A mutable mapping of identifiers as keys, and shared configuration
|
|
227
|
-
as values.
|
|
228
|
-
|
|
229
223
|
Notes
|
|
230
224
|
-----
|
|
231
225
|
These parameters are specified under a top level key named ``parameters``
|
lsst/pipe/base/pipelineTask.py
CHANGED
|
@@ -55,7 +55,7 @@ class PipelineTask(Task):
|
|
|
55
55
|
resulting data is also stored in a data butler.
|
|
56
56
|
|
|
57
57
|
PipelineTask inherits from a `~lsst.pipe.base.Task` and uses the same
|
|
58
|
-
configuration mechanism based on
|
|
58
|
+
configuration mechanism based on `lsst.pex.config`. `PipelineTask`
|
|
59
59
|
classes also have a `PipelineTaskConnections` class associated with their
|
|
60
60
|
config which defines all of the IO a `PipelineTask` will need to do.
|
|
61
61
|
PipelineTask sub-class typically implements `run()` method which receives
|
|
@@ -75,12 +75,6 @@ class PipelineTask(Task):
|
|
|
75
75
|
PipelineTask base class constructor, but may support other signatures as
|
|
76
76
|
well.
|
|
77
77
|
|
|
78
|
-
Attributes
|
|
79
|
-
----------
|
|
80
|
-
canMultiprocess : bool, True by default (class attribute)
|
|
81
|
-
This class attribute is checked by execution framework, sub-classes
|
|
82
|
-
can set it to ``False`` in case task does not support multiprocessing.
|
|
83
|
-
|
|
84
78
|
Parameters
|
|
85
79
|
----------
|
|
86
80
|
config : `~lsst.pex.config.Config`, optional
|
|
@@ -102,7 +96,11 @@ class PipelineTask(Task):
|
|
|
102
96
|
"""
|
|
103
97
|
|
|
104
98
|
ConfigClass: ClassVar[type[PipelineTaskConfig]]
|
|
99
|
+
|
|
105
100
|
canMultiprocess: ClassVar[bool] = True
|
|
101
|
+
"""Whether this task can be run by an executor that uses subprocesses for
|
|
102
|
+
parallelism.
|
|
103
|
+
"""
|
|
106
104
|
|
|
107
105
|
def __init__(
|
|
108
106
|
self,
|
|
@@ -659,13 +659,25 @@ class ReadEdge(Edge):
|
|
|
659
659
|
# compatible), since neither connection should take
|
|
660
660
|
# precedence.
|
|
661
661
|
if dataset_type != current:
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
"
|
|
667
|
-
|
|
668
|
-
|
|
662
|
+
if visualization_only and dataset_type.dimensions == current.dimensions:
|
|
663
|
+
# Make a visualization-only ambiguous storage class
|
|
664
|
+
# "name".
|
|
665
|
+
all_storage_classes = set(current.storageClass_name.split("/"))
|
|
666
|
+
all_storage_classes.update(dataset_type.storageClass_name.split("/"))
|
|
667
|
+
current = DatasetType(
|
|
668
|
+
current.name,
|
|
669
|
+
current.dimensions,
|
|
670
|
+
"/".join(sorted(all_storage_classes)),
|
|
671
|
+
)
|
|
672
|
+
else:
|
|
673
|
+
raise MissingDatasetTypeError(
|
|
674
|
+
f"Definitions differ for input dataset type "
|
|
675
|
+
f"{self.parent_dataset_type_name!r}; task {self.task_label!r} has "
|
|
676
|
+
f"{dataset_type}, but the definition from {report_current_origin()} is "
|
|
677
|
+
f"{current}. If the storage classes are compatible but different, "
|
|
678
|
+
"registering the dataset type in the data repository in advance will avoid "
|
|
679
|
+
"this error."
|
|
680
|
+
)
|
|
669
681
|
elif not visualization_only and not dataset_type.is_compatible_with(current):
|
|
670
682
|
raise IncompatibleDatasetTypeError(
|
|
671
683
|
f"Incompatible definition for input dataset type {self.parent_dataset_type_name!r}; "
|
|
@@ -897,6 +897,10 @@ class PipelineGraph:
|
|
|
897
897
|
New config objects or overrides to apply to copies of the current
|
|
898
898
|
config objects, with task labels as the keywords.
|
|
899
899
|
|
|
900
|
+
Returns
|
|
901
|
+
-------
|
|
902
|
+
None
|
|
903
|
+
|
|
900
904
|
Raises
|
|
901
905
|
------
|
|
902
906
|
ValueError
|
|
@@ -1755,6 +1759,10 @@ class PipelineGraph:
|
|
|
1755
1759
|
not considered part of the pipeline graph in other respects, but it
|
|
1756
1760
|
does get written with other provenance datasets).
|
|
1757
1761
|
|
|
1762
|
+
Returns
|
|
1763
|
+
-------
|
|
1764
|
+
None
|
|
1765
|
+
|
|
1758
1766
|
Raises
|
|
1759
1767
|
------
|
|
1760
1768
|
lsst.daf.butler.MissingDatasetTypeError
|
|
@@ -448,14 +448,17 @@ class BaseQuantumGraphWriter:
|
|
|
448
448
|
uri: ResourcePathExpression,
|
|
449
449
|
header: HeaderModel,
|
|
450
450
|
pipeline_graph: PipelineGraph,
|
|
451
|
-
indices: dict[uuid.UUID, int],
|
|
452
451
|
*,
|
|
453
452
|
address_filename: str,
|
|
454
|
-
compressor: Compressor,
|
|
455
453
|
cdict_data: bytes | None = None,
|
|
454
|
+
zstd_level: int = 10,
|
|
456
455
|
) -> Iterator[Self]:
|
|
457
|
-
uri = ResourcePath(uri)
|
|
458
|
-
address_writer = AddressWriter(
|
|
456
|
+
uri = ResourcePath(uri, forceDirectory=False)
|
|
457
|
+
address_writer = AddressWriter()
|
|
458
|
+
if uri.isLocal:
|
|
459
|
+
os.makedirs(uri.dirname().ospath, exist_ok=True)
|
|
460
|
+
cdict = zstandard.ZstdCompressionDict(cdict_data) if cdict_data is not None else None
|
|
461
|
+
compressor = zstandard.ZstdCompressor(level=zstd_level, dict_data=cdict)
|
|
459
462
|
with uri.open(mode="wb") as stream:
|
|
460
463
|
with zipfile.ZipFile(stream, mode="w", compression=zipfile.ZIP_STORED) as zf:
|
|
461
464
|
self = cls(zf, compressor, address_writer, header.int_size)
|
|
@@ -205,13 +205,6 @@ class AddressRow:
|
|
|
205
205
|
class AddressWriter:
|
|
206
206
|
"""A helper object for writing address files for multi-block files."""
|
|
207
207
|
|
|
208
|
-
indices: dict[uuid.UUID, int] = dataclasses.field(default_factory=dict)
|
|
209
|
-
"""Mapping from UUID to internal integer ID.
|
|
210
|
-
|
|
211
|
-
The internal integer ID must always correspond to the index into the
|
|
212
|
-
sorted list of all UUIDs, but this `dict` need not be sorted itself.
|
|
213
|
-
"""
|
|
214
|
-
|
|
215
208
|
addresses: list[dict[uuid.UUID, Address]] = dataclasses.field(default_factory=list)
|
|
216
209
|
"""Addresses to store with each UUID.
|
|
217
210
|
|
|
@@ -229,18 +222,15 @@ class AddressWriter:
|
|
|
229
222
|
int_size : `int`
|
|
230
223
|
Number of bytes to use for all integers.
|
|
231
224
|
"""
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
f"Logic bug in quantum graph I/O: address map {n} of {len(self.addresses)} has IDs "
|
|
236
|
-
f"{address_map.keys() - self.indices.keys()} not in the index map."
|
|
237
|
-
)
|
|
225
|
+
indices: set[uuid.UUID] = set()
|
|
226
|
+
for address_map in self.addresses:
|
|
227
|
+
indices.update(address_map.keys())
|
|
238
228
|
stream.write(int_size.to_bytes(1))
|
|
239
|
-
stream.write(len(
|
|
229
|
+
stream.write(len(indices).to_bytes(int_size))
|
|
240
230
|
stream.write(len(self.addresses).to_bytes(int_size))
|
|
241
231
|
empty_address = Address()
|
|
242
|
-
for key in sorted(
|
|
243
|
-
row = AddressRow(key,
|
|
232
|
+
for n, key in enumerate(sorted(indices, key=attrgetter("int"))):
|
|
233
|
+
row = AddressRow(key, n, [m.get(key, empty_address) for m in self.addresses])
|
|
244
234
|
_LOG.debug("Wrote address %s.", row)
|
|
245
235
|
row.write(stream, int_size)
|
|
246
236
|
|
|
@@ -66,6 +66,7 @@ from lsst.daf.butler import (
|
|
|
66
66
|
DimensionDataExtractor,
|
|
67
67
|
DimensionGroup,
|
|
68
68
|
DimensionRecordSetDeserializer,
|
|
69
|
+
DimensionUniverse,
|
|
69
70
|
LimitedButler,
|
|
70
71
|
Quantum,
|
|
71
72
|
QuantumBackedButler,
|
|
@@ -109,6 +110,13 @@ if TYPE_CHECKING:
|
|
|
109
110
|
from ..config import PipelineTaskConfig
|
|
110
111
|
from ..graph import QgraphSummary, QuantumGraph
|
|
111
112
|
|
|
113
|
+
# Sphinx needs imports for type annotations of base class members.
|
|
114
|
+
if "sphinx" in sys.modules:
|
|
115
|
+
import zipfile # noqa: F401
|
|
116
|
+
|
|
117
|
+
from ._multiblock import AddressReader, Decompressor # noqa: F401
|
|
118
|
+
|
|
119
|
+
|
|
112
120
|
_LOG = logging.getLogger(__name__)
|
|
113
121
|
|
|
114
122
|
|
|
@@ -877,6 +885,49 @@ class PredictedQuantumGraph(BaseQuantumGraph):
|
|
|
877
885
|
page_size=page_size,
|
|
878
886
|
).assemble()
|
|
879
887
|
|
|
888
|
+
@classmethod
|
|
889
|
+
def make_empty(
|
|
890
|
+
cls,
|
|
891
|
+
universe: DimensionUniverse,
|
|
892
|
+
*,
|
|
893
|
+
output_run: str,
|
|
894
|
+
inputs: Iterable[str] = (),
|
|
895
|
+
output: str | None = None,
|
|
896
|
+
add_packages: bool = True,
|
|
897
|
+
) -> PredictedQuantumGraph:
|
|
898
|
+
"""Make an empty quantum graph with no tasks.
|
|
899
|
+
|
|
900
|
+
Parameters
|
|
901
|
+
----------
|
|
902
|
+
universe : `lsst.daf.butler.DimensionUniverse`
|
|
903
|
+
Definitions for all butler dimensions.
|
|
904
|
+
output_run : `str`
|
|
905
|
+
Output run collection.
|
|
906
|
+
inputs : `~collections.abc.Iterable` [`str`], optional
|
|
907
|
+
Iterable of input collection names.
|
|
908
|
+
output : `str` or `None`, optional
|
|
909
|
+
Output chained collection.
|
|
910
|
+
add_packages : `bool`, optional
|
|
911
|
+
Whether to add the special init quantum that writes the 'packages'
|
|
912
|
+
dataset. The default (`True`) is consistent with
|
|
913
|
+
`~..quantum_graph_builder.QuantumGraphBuilder` behavior when there
|
|
914
|
+
are no regular quanta generated.
|
|
915
|
+
|
|
916
|
+
Returns
|
|
917
|
+
-------
|
|
918
|
+
quantum_graph : `PredictedQuantumGraph`
|
|
919
|
+
An empty quantum graph.
|
|
920
|
+
"""
|
|
921
|
+
return cls(
|
|
922
|
+
PredictedQuantumGraphComponents.make_empty(
|
|
923
|
+
universe,
|
|
924
|
+
output_run=output_run,
|
|
925
|
+
inputs=inputs,
|
|
926
|
+
output=output,
|
|
927
|
+
add_packages=add_packages,
|
|
928
|
+
)
|
|
929
|
+
)
|
|
930
|
+
|
|
880
931
|
@property
|
|
881
932
|
def quanta_by_task(self) -> Mapping[str, Mapping[DataCoordinate, uuid.UUID]]:
|
|
882
933
|
"""A nested mapping of all quanta, keyed first by task name and then by
|
|
@@ -1541,6 +1592,63 @@ class PredictedQuantumGraphComponents:
|
|
|
1541
1592
|
This does not include special "init" quanta.
|
|
1542
1593
|
"""
|
|
1543
1594
|
|
|
1595
|
+
@classmethod
|
|
1596
|
+
def make_empty(
|
|
1597
|
+
cls,
|
|
1598
|
+
universe: DimensionUniverse,
|
|
1599
|
+
*,
|
|
1600
|
+
output_run: str,
|
|
1601
|
+
inputs: Iterable[str] = (),
|
|
1602
|
+
output: str | None = None,
|
|
1603
|
+
add_packages: bool = True,
|
|
1604
|
+
) -> PredictedQuantumGraphComponents:
|
|
1605
|
+
"""Make components for an empty quantum graph with no tasks.
|
|
1606
|
+
|
|
1607
|
+
Parameters
|
|
1608
|
+
----------
|
|
1609
|
+
universe : `lsst.daf.butler.DimensionUniverse`
|
|
1610
|
+
Definitions for all butler dimensions.
|
|
1611
|
+
output_run : `str`
|
|
1612
|
+
Output run collection.
|
|
1613
|
+
inputs : `~collections.abc.Iterable` [`str`], optional
|
|
1614
|
+
Iterable of input collection names.
|
|
1615
|
+
output : `str` or `None`, optional
|
|
1616
|
+
Output chained collection.
|
|
1617
|
+
add_packages : `bool`, optional
|
|
1618
|
+
Whether to add the special init quantum that writes the 'packages'
|
|
1619
|
+
dataset. The default (`True`) is consistent with
|
|
1620
|
+
`~..quantum_graph_builder.QuantumGraphBuilder` behavior when there
|
|
1621
|
+
are no regular quanta generated.
|
|
1622
|
+
|
|
1623
|
+
Returns
|
|
1624
|
+
-------
|
|
1625
|
+
components : `PredictedQuantumGraphComponents`
|
|
1626
|
+
Components that can be used to build or write an empty quantum
|
|
1627
|
+
graph.
|
|
1628
|
+
"""
|
|
1629
|
+
components = cls(pipeline_graph=PipelineGraph(universe=universe))
|
|
1630
|
+
components.header.inputs = list(inputs)
|
|
1631
|
+
components.header.output_run = output_run
|
|
1632
|
+
components.header.output = output
|
|
1633
|
+
if add_packages:
|
|
1634
|
+
components.init_quanta.root = [
|
|
1635
|
+
PredictedQuantumDatasetsModel.model_construct(
|
|
1636
|
+
quantum_id=generate_uuidv7(),
|
|
1637
|
+
task_label="",
|
|
1638
|
+
outputs={
|
|
1639
|
+
acc.PACKAGES_INIT_OUTPUT_NAME: [
|
|
1640
|
+
PredictedDatasetModel(
|
|
1641
|
+
dataset_id=generate_uuidv7(),
|
|
1642
|
+
dataset_type_name=acc.PACKAGES_INIT_OUTPUT_NAME,
|
|
1643
|
+
data_coordinate=[],
|
|
1644
|
+
run=output_run,
|
|
1645
|
+
)
|
|
1646
|
+
]
|
|
1647
|
+
},
|
|
1648
|
+
)
|
|
1649
|
+
]
|
|
1650
|
+
return components
|
|
1651
|
+
|
|
1544
1652
|
def make_dataset_ref(self, predicted: PredictedDatasetModel) -> DatasetRef:
|
|
1545
1653
|
"""Make a `lsst.daf.butler.DatasetRef` from information in the
|
|
1546
1654
|
predicted quantum graph.
|
|
@@ -1793,7 +1901,6 @@ class PredictedQuantumGraphComponents:
|
|
|
1793
1901
|
f"Unsupported extension {ext!r} for quantum graph; "
|
|
1794
1902
|
"expected '.qg' (or '.qgraph' to force the old format)."
|
|
1795
1903
|
)
|
|
1796
|
-
cdict: zstandard.ZstdCompressionDict | None = None
|
|
1797
1904
|
cdict_data: bytes | None = None
|
|
1798
1905
|
quantum_datasets_json: dict[uuid.UUID, bytes] = {}
|
|
1799
1906
|
if len(self.quantum_datasets) < zstd_dict_n_inputs:
|
|
@@ -1807,26 +1914,20 @@ class PredictedQuantumGraphComponents:
|
|
|
1807
1914
|
for quantum_model in itertools.islice(self.quantum_datasets.values(), zstd_dict_n_inputs)
|
|
1808
1915
|
}
|
|
1809
1916
|
try:
|
|
1810
|
-
|
|
1917
|
+
cdict_data = zstandard.train_dictionary(
|
|
1811
1918
|
zstd_dict_size,
|
|
1812
1919
|
list(quantum_datasets_json.values()),
|
|
1813
1920
|
level=zstd_level,
|
|
1814
|
-
)
|
|
1921
|
+
).as_bytes()
|
|
1815
1922
|
except zstandard.ZstdError as err:
|
|
1816
1923
|
warnings.warn(f"Not using a compression dictionary: {err}.")
|
|
1817
|
-
cdict = None
|
|
1818
|
-
else:
|
|
1819
|
-
cdict_data = cdict.as_bytes()
|
|
1820
|
-
compressor = zstandard.ZstdCompressor(level=zstd_level, dict_data=cdict)
|
|
1821
|
-
indices = {quantum_id: n for n, quantum_id in enumerate(sorted(self.quantum_datasets.keys()))}
|
|
1822
1924
|
with BaseQuantumGraphWriter.open(
|
|
1823
1925
|
uri,
|
|
1824
1926
|
header=self.header,
|
|
1825
1927
|
pipeline_graph=self.pipeline_graph,
|
|
1826
|
-
indices=indices,
|
|
1827
1928
|
address_filename="quanta",
|
|
1828
|
-
compressor=compressor,
|
|
1829
1929
|
cdict_data=cdict_data,
|
|
1930
|
+
zstd_level=zstd_level,
|
|
1830
1931
|
) as writer:
|
|
1831
1932
|
writer.write_single_model("thin_graph", self.thin_graph)
|
|
1832
1933
|
if self.dimension_data is None:
|