lsst-pipe-base 30.2026.200__py3-none-any.whl → 30.2026.400__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_instrument.py +10 -12
- lsst/pipe/base/_status.py +29 -10
- lsst/pipe/base/automatic_connection_constants.py +9 -1
- lsst/pipe/base/cli/cmd/__init__.py +16 -2
- lsst/pipe/base/cli/cmd/commands.py +42 -4
- lsst/pipe/base/connectionTypes.py +72 -160
- lsst/pipe/base/connections.py +3 -6
- lsst/pipe/base/execution_reports.py +0 -5
- lsst/pipe/base/log_capture.py +8 -4
- lsst/pipe/base/log_on_close.py +79 -0
- lsst/pipe/base/mp_graph_executor.py +51 -15
- lsst/pipe/base/pipeline.py +3 -4
- lsst/pipe/base/pipelineIR.py +0 -6
- lsst/pipe/base/pipelineTask.py +5 -7
- lsst/pipe/base/pipeline_graph/_edges.py +19 -7
- lsst/pipe/base/pipeline_graph/_pipeline_graph.py +8 -0
- lsst/pipe/base/quantum_graph/_common.py +7 -4
- lsst/pipe/base/quantum_graph/_multiblock.py +6 -16
- lsst/pipe/base/quantum_graph/_predicted.py +111 -10
- lsst/pipe/base/quantum_graph/_provenance.py +727 -26
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +26 -50
- lsst/pipe/base/quantum_graph/aggregator/_config.py +78 -9
- lsst/pipe/base/quantum_graph/aggregator/_ingester.py +12 -11
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +48 -234
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +6 -116
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +24 -18
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +33 -350
- lsst/pipe/base/quantum_graph/formatter.py +171 -0
- lsst/pipe/base/quantum_graph/ingest_graph.py +356 -0
- lsst/pipe/base/quantum_graph_executor.py +116 -13
- lsst/pipe/base/quantum_provenance_graph.py +17 -2
- lsst/pipe/base/separable_pipeline_executor.py +18 -2
- lsst/pipe/base/single_quantum_executor.py +59 -41
- lsst/pipe/base/struct.py +4 -0
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/METADATA +2 -1
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/RECORD +45 -42
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/WHEEL +1 -1
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/zip-safe +0 -0
lsst/pipe/base/_instrument.py
CHANGED
|
@@ -31,7 +31,6 @@ __all__ = ("Instrument",)
|
|
|
31
31
|
|
|
32
32
|
import contextlib
|
|
33
33
|
import datetime
|
|
34
|
-
import os.path
|
|
35
34
|
from abc import ABCMeta, abstractmethod
|
|
36
35
|
from collections.abc import Sequence
|
|
37
36
|
from typing import TYPE_CHECKING, Any, Self, cast, final
|
|
@@ -39,6 +38,7 @@ from typing import TYPE_CHECKING, Any, Self, cast, final
|
|
|
39
38
|
from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter
|
|
40
39
|
from lsst.daf.butler.registry import DataIdError
|
|
41
40
|
from lsst.pex.config import Config, RegistryField
|
|
41
|
+
from lsst.resources import ResourcePath, ResourcePathExpression
|
|
42
42
|
from lsst.utils import doImportType
|
|
43
43
|
from lsst.utils.introspection import get_full_type_name
|
|
44
44
|
|
|
@@ -65,7 +65,7 @@ class Instrument(metaclass=ABCMeta):
|
|
|
65
65
|
the base class.
|
|
66
66
|
"""
|
|
67
67
|
|
|
68
|
-
configPaths: Sequence[
|
|
68
|
+
configPaths: Sequence[ResourcePathExpression] = ()
|
|
69
69
|
"""Paths to config files to read for specific Tasks.
|
|
70
70
|
|
|
71
71
|
The paths in this list should contain files of the form `task.py`, for
|
|
@@ -109,6 +109,10 @@ class Instrument(metaclass=ABCMeta):
|
|
|
109
109
|
If `True` (`False` is default), update existing records if they
|
|
110
110
|
differ from the new ones.
|
|
111
111
|
|
|
112
|
+
Returns
|
|
113
|
+
-------
|
|
114
|
+
None
|
|
115
|
+
|
|
112
116
|
Raises
|
|
113
117
|
------
|
|
114
118
|
lsst.daf.butler.registry.ConflictingDefinitionError
|
|
@@ -127,13 +131,6 @@ class Instrument(metaclass=ABCMeta):
|
|
|
127
131
|
the level of individual dimension entries; new detectors and filters
|
|
128
132
|
should be added, but changes to any existing record should not be.
|
|
129
133
|
This can generally be achieved via a block like
|
|
130
|
-
|
|
131
|
-
.. code-block:: python
|
|
132
|
-
|
|
133
|
-
with registry.transaction():
|
|
134
|
-
registry.syncDimensionData("instrument", ...)
|
|
135
|
-
registry.syncDimensionData("detector", ...)
|
|
136
|
-
self.registerFilters(registry)
|
|
137
134
|
"""
|
|
138
135
|
raise NotImplementedError()
|
|
139
136
|
|
|
@@ -366,9 +363,10 @@ class Instrument(metaclass=ABCMeta):
|
|
|
366
363
|
Config instance to which overrides should be applied.
|
|
367
364
|
"""
|
|
368
365
|
for root in self.configPaths:
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
366
|
+
resource = ResourcePath(root, forceDirectory=True, forceAbsolute=True)
|
|
367
|
+
uri = resource.join(f"{name}.py", forceDirectory=False)
|
|
368
|
+
if uri.exists():
|
|
369
|
+
config.load(uri)
|
|
372
370
|
|
|
373
371
|
@staticmethod
|
|
374
372
|
def formatCollectionTimestamp(timestamp: str | datetime.datetime) -> str:
|
lsst/pipe/base/_status.py
CHANGED
|
@@ -275,15 +275,23 @@ class ExceptionInfo(pydantic.BaseModel):
|
|
|
275
275
|
class QuantumAttemptStatus(enum.Enum):
|
|
276
276
|
"""Enum summarizing an attempt to run a quantum."""
|
|
277
277
|
|
|
278
|
+
ABORTED = -4
|
|
279
|
+
"""The quantum failed with a hard error that prevented both logs and
|
|
280
|
+
metadata from being written.
|
|
281
|
+
|
|
282
|
+
This state is only set if information from higher-level tooling (e.g. BPS)
|
|
283
|
+
is available to distinguish it from ``UNKNOWN``.
|
|
284
|
+
"""
|
|
285
|
+
|
|
278
286
|
UNKNOWN = -3
|
|
279
287
|
"""The status of this attempt is unknown.
|
|
280
288
|
|
|
281
|
-
This
|
|
282
|
-
|
|
283
|
-
|
|
289
|
+
This means no logs or metadata were written, and it at least could not be
|
|
290
|
+
determined whether the quantum was blocked by an upstream failure (if it
|
|
291
|
+
was definitely blocked, `BLOCKED` is set instead).
|
|
284
292
|
"""
|
|
285
293
|
|
|
286
|
-
|
|
294
|
+
ABORTED_SUCCESS = -2
|
|
287
295
|
"""Task metadata was written for this attempt but logs were not.
|
|
288
296
|
|
|
289
297
|
This is a rare condition that requires a hard failure (i.e. the kind that
|
|
@@ -292,20 +300,21 @@ class QuantumAttemptStatus(enum.Enum):
|
|
|
292
300
|
"""
|
|
293
301
|
|
|
294
302
|
FAILED = -1
|
|
295
|
-
"""Execution of the quantum failed.
|
|
303
|
+
"""Execution of the quantum failed gracefully.
|
|
296
304
|
|
|
297
305
|
This is always set if the task metadata dataset was not written but logs
|
|
298
306
|
were, as is the case when a Python exception is caught and handled by the
|
|
299
|
-
execution system.
|
|
300
|
-
|
|
301
|
-
|
|
307
|
+
execution system.
|
|
308
|
+
|
|
309
|
+
This status guarantees that the task log dataset was produced but the
|
|
310
|
+
metadata dataset was not.
|
|
302
311
|
"""
|
|
303
312
|
|
|
304
313
|
BLOCKED = 0
|
|
305
314
|
"""This quantum was not executed because an upstream quantum failed.
|
|
306
315
|
|
|
307
|
-
Upstream quanta with status `UNKNOWN` or `
|
|
308
|
-
`
|
|
316
|
+
Upstream quanta with status `UNKNOWN`, `FAILED`, or `ABORTED` are
|
|
317
|
+
considered blockers; `ABORTED_SUCCESS` is not.
|
|
309
318
|
"""
|
|
310
319
|
|
|
311
320
|
SUCCESSFUL = 1
|
|
@@ -319,6 +328,16 @@ class QuantumAttemptStatus(enum.Enum):
|
|
|
319
328
|
these "successes with caveats" are reported.
|
|
320
329
|
"""
|
|
321
330
|
|
|
331
|
+
@property
|
|
332
|
+
def has_metadata(self) -> bool:
|
|
333
|
+
"""Whether the task metadata dataset was produced."""
|
|
334
|
+
return self is self.SUCCESSFUL or self is self.ABORTED_SUCCESS
|
|
335
|
+
|
|
336
|
+
@property
|
|
337
|
+
def has_log(self) -> bool:
|
|
338
|
+
"""Whether the log dataset was produced."""
|
|
339
|
+
return self is self.SUCCESSFUL or self is self.FAILED
|
|
340
|
+
|
|
322
341
|
|
|
323
342
|
class GetSetDictMetadataHolder(Protocol):
|
|
324
343
|
"""Protocol for objects that have a ``metadata`` attribute that satisfies
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
27
|
|
|
28
28
|
"""Constants used to define the connections automatically added for each
|
|
29
|
-
PipelineTask by the execution system.
|
|
29
|
+
PipelineTask by the execution system, as well as other special dataset types.
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
32
|
from __future__ import annotations
|
|
@@ -43,6 +43,8 @@ __all__ = (
|
|
|
43
43
|
"METADATA_OUTPUT_TEMPLATE",
|
|
44
44
|
"PACKAGES_INIT_OUTPUT_NAME",
|
|
45
45
|
"PACKAGES_INIT_OUTPUT_STORAGE_CLASS",
|
|
46
|
+
"PROVENANCE_DATASET_TYPE_NAME",
|
|
47
|
+
"PROVENANCE_STORAGE_CLASS",
|
|
46
48
|
)
|
|
47
49
|
|
|
48
50
|
|
|
@@ -91,3 +93,9 @@ type names.
|
|
|
91
93
|
METADATA_OUTPUT_STORAGE_CLASS: str = "TaskMetadata"
|
|
92
94
|
"""Name of the storage class for task metadata output datasets.
|
|
93
95
|
"""
|
|
96
|
+
|
|
97
|
+
PROVENANCE_DATASET_TYPE_NAME: str = "run_provenance"
|
|
98
|
+
"""Name of the dataset used to store per-RUN provenance."""
|
|
99
|
+
|
|
100
|
+
PROVENANCE_STORAGE_CLASS: str = "ProvenanceQuantumGraph"
|
|
101
|
+
"""Name of the storage class used to store provenance."""
|
|
@@ -25,6 +25,20 @@
|
|
|
25
25
|
# You should have received a copy of the GNU General Public License
|
|
26
26
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
27
27
|
|
|
28
|
-
__all__ = [
|
|
28
|
+
__all__ = [
|
|
29
|
+
"register_instrument",
|
|
30
|
+
"transfer_from_graph",
|
|
31
|
+
"zip_from_graph",
|
|
32
|
+
"retrieve_artifacts_for_quanta",
|
|
33
|
+
"aggregate_graph",
|
|
34
|
+
"ingest_graph",
|
|
35
|
+
]
|
|
29
36
|
|
|
30
|
-
from .commands import (
|
|
37
|
+
from .commands import (
|
|
38
|
+
register_instrument,
|
|
39
|
+
retrieve_artifacts_for_quanta,
|
|
40
|
+
transfer_from_graph,
|
|
41
|
+
zip_from_graph,
|
|
42
|
+
aggregate_graph,
|
|
43
|
+
ingest_graph,
|
|
44
|
+
)
|
|
@@ -161,7 +161,7 @@ _AGGREGATOR_DEFAULTS = aggregator.AggregatorConfig()
|
|
|
161
161
|
|
|
162
162
|
@click.command(short_help="Scan for the outputs of an active or completed quantum graph.", cls=ButlerCommand)
|
|
163
163
|
@click.argument("predicted_graph", required=True)
|
|
164
|
-
@repo_argument(required=True, help="Path
|
|
164
|
+
@repo_argument(required=True, help="Path or alias for the butler repository.")
|
|
165
165
|
@click.option(
|
|
166
166
|
"-o",
|
|
167
167
|
"--output",
|
|
@@ -181,9 +181,9 @@ _AGGREGATOR_DEFAULTS = aggregator.AggregatorConfig()
|
|
|
181
181
|
help="Number of processes to use.",
|
|
182
182
|
)
|
|
183
183
|
@click.option(
|
|
184
|
-
"--complete
|
|
185
|
-
"
|
|
186
|
-
default=_AGGREGATOR_DEFAULTS.
|
|
184
|
+
"--incomplete/--complete",
|
|
185
|
+
"incomplete",
|
|
186
|
+
default=_AGGREGATOR_DEFAULTS.incomplete,
|
|
187
187
|
help="Whether execution has completed (and failures cannot be retried).",
|
|
188
188
|
)
|
|
189
189
|
@click.option(
|
|
@@ -249,6 +249,14 @@ _AGGREGATOR_DEFAULTS = aggregator.AggregatorConfig()
|
|
|
249
249
|
default=_AGGREGATOR_DEFAULTS.mock_storage_classes,
|
|
250
250
|
help="Enable support for storage classes created by the lsst.pipe.base.tests.mocks package.",
|
|
251
251
|
)
|
|
252
|
+
@click.option(
|
|
253
|
+
"--promise-ingest-graph/--no-promise-ingest-graph",
|
|
254
|
+
default=_AGGREGATOR_DEFAULTS.promise_ingest_graph,
|
|
255
|
+
help=(
|
|
256
|
+
"Promise to run 'butler ingest-graph' later, allowing aggregate-graph "
|
|
257
|
+
"to skip metadata/log/config ingestion for now."
|
|
258
|
+
),
|
|
259
|
+
)
|
|
252
260
|
def aggregate_graph(predicted_graph: str, repo: str, **kwargs: Any) -> None:
|
|
253
261
|
"""Scan for quantum graph's outputs to gather provenance, ingest datasets
|
|
254
262
|
into the central butler repository, and delete datasets that are no
|
|
@@ -268,3 +276,33 @@ def aggregate_graph(predicted_graph: str, repo: str, **kwargs: Any) -> None:
|
|
|
268
276
|
# When this exception is raised, we'll have already logged the relevant
|
|
269
277
|
# traceback from a separate worker.
|
|
270
278
|
raise click.ClickException(str(err)) from None
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
@click.command(
|
|
282
|
+
short_help="Ingest a provenance quantum graph into a butler, finalizing a RUN collection.",
|
|
283
|
+
cls=ButlerCommand,
|
|
284
|
+
)
|
|
285
|
+
@repo_argument(required=True, help="Path or alias for the butler repository.")
|
|
286
|
+
@click.argument("provenance_graph", required=False)
|
|
287
|
+
@transfer_option(default="move")
|
|
288
|
+
@click.option("--batch-size", default=10000, help="How many datasets to process in each transaction.")
|
|
289
|
+
@click.option(
|
|
290
|
+
"--output-run",
|
|
291
|
+
default=None,
|
|
292
|
+
help=(
|
|
293
|
+
"Name of the output RUN collection. Must be provided if the provenance graph is not"
|
|
294
|
+
" provided (so the graph can be found in the butler)."
|
|
295
|
+
),
|
|
296
|
+
)
|
|
297
|
+
def ingest_graph(
|
|
298
|
+
*,
|
|
299
|
+
repo: str,
|
|
300
|
+
provenance_graph: str | None,
|
|
301
|
+
transfer: str | None,
|
|
302
|
+
batch_size: int,
|
|
303
|
+
output_run: str | None,
|
|
304
|
+
) -> None:
|
|
305
|
+
"""Ingest a provenance graph into a butler repository."""
|
|
306
|
+
from ...quantum_graph.ingest_graph import ingest_graph as ingest_graph_py
|
|
307
|
+
|
|
308
|
+
ingest_graph_py(repo, provenance_graph, transfer=transfer, batch_size=batch_size, output_run=output_run)
|
|
@@ -41,35 +41,36 @@ from lsst.utils.introspection import find_outside_stacklevel
|
|
|
41
41
|
|
|
42
42
|
@dataclasses.dataclass(frozen=True)
|
|
43
43
|
class BaseConnection:
|
|
44
|
-
"""Base class used for declaring `PipelineTask` connections.
|
|
45
|
-
|
|
46
|
-
Attributes
|
|
47
|
-
----------
|
|
48
|
-
name : `str`
|
|
49
|
-
The name used to identify the dataset type.
|
|
50
|
-
storageClass : `str`
|
|
51
|
-
The storage class used when (un)/persisting the dataset type.
|
|
52
|
-
multiple : `bool`
|
|
53
|
-
Indicates if this connection should expect to contain multiple objects
|
|
54
|
-
of the given dataset type. Tasks with more than one connection with
|
|
55
|
-
``multiple=True`` with the same dimensions may want to implement
|
|
56
|
-
`.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
|
|
57
|
-
consistent (i.e. zip-iterable) in `PipelineTask.runQuantum()` and
|
|
58
|
-
notify the execution system as early as possible of outputs that will
|
|
59
|
-
not be produced because the corresponding input is missing.
|
|
60
|
-
deprecated : `str`, optional
|
|
61
|
-
A description of why this connection is deprecated, including the
|
|
62
|
-
version after which it may be removed.
|
|
63
|
-
|
|
64
|
-
If not `None`, the string is appended to the docstring for this
|
|
65
|
-
connection and the corresponding config Field.
|
|
66
|
-
"""
|
|
44
|
+
"""Base class used for declaring `PipelineTask` connections."""
|
|
67
45
|
|
|
68
46
|
name: str
|
|
47
|
+
"""The name used to identify the dataset type."""
|
|
48
|
+
|
|
69
49
|
storageClass: str
|
|
50
|
+
"""The storage class used when (un)/persisting the dataset type."""
|
|
51
|
+
|
|
70
52
|
doc: str = ""
|
|
53
|
+
"""Documentation for this connection."""
|
|
54
|
+
|
|
71
55
|
multiple: bool = False
|
|
56
|
+
"""Indicates if this connection should expect to contain multiple objects
|
|
57
|
+
of the given dataset type.
|
|
58
|
+
|
|
59
|
+
Tasks with more than one connection with ``multiple=True`` with the same
|
|
60
|
+
dimensions may want to implement `.PipelineTaskConnections.adjustQuantum`
|
|
61
|
+
to ensure those datasets are consistent (i.e. zip-iterable) in
|
|
62
|
+
`PipelineTask.runQuantum()` and notify the execution system as early as
|
|
63
|
+
possible of outputs that will not be produced because the corresponding
|
|
64
|
+
input is missing.
|
|
65
|
+
"""
|
|
66
|
+
|
|
72
67
|
deprecated: str | None = dataclasses.field(default=None, kw_only=True)
|
|
68
|
+
"""A description of why this connection is deprecated, including the
|
|
69
|
+
version after which it may be removed.
|
|
70
|
+
|
|
71
|
+
If not `None`, the string is appended to the docstring for this
|
|
72
|
+
connection and the corresponding config Field.
|
|
73
|
+
"""
|
|
73
74
|
|
|
74
75
|
_connection_type_set: ClassVar[str]
|
|
75
76
|
_deprecation_context: str = ""
|
|
@@ -110,32 +111,15 @@ class BaseConnection:
|
|
|
110
111
|
class DimensionedConnection(BaseConnection):
|
|
111
112
|
"""Class used for declaring PipelineTask connections that includes
|
|
112
113
|
dimensions.
|
|
113
|
-
|
|
114
|
-
Attributes
|
|
115
|
-
----------
|
|
116
|
-
name : `str`
|
|
117
|
-
The name used to identify the dataset type.
|
|
118
|
-
storageClass : `str`
|
|
119
|
-
The storage class used when (un)/persisting the dataset type.
|
|
120
|
-
multiple : `bool`
|
|
121
|
-
Indicates if this connection should expect to contain multiple objects
|
|
122
|
-
of the given dataset type. Tasks with more than one connection with
|
|
123
|
-
``multiple=True`` with the same dimensions may want to implement
|
|
124
|
-
`.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
|
|
125
|
-
consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
|
|
126
|
-
the execution system as early as possible of outputs that will not be
|
|
127
|
-
produced because the corresponding input is missing.
|
|
128
|
-
dimensions : iterable of `str`
|
|
129
|
-
The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used
|
|
130
|
-
to identify the dataset type identified by the specified name.
|
|
131
|
-
isCalibration : `bool`, optional
|
|
132
|
-
`True` if this dataset type may be included in CALIBRATION-type
|
|
133
|
-
collections to associate it with a validity range, `False` (default)
|
|
134
|
-
otherwise.
|
|
135
114
|
"""
|
|
136
115
|
|
|
137
116
|
dimensions: Iterable[str] = ()
|
|
117
|
+
"""The keys of the butler data coordinates for this dataset type."""
|
|
118
|
+
|
|
138
119
|
isCalibration: bool = False
|
|
120
|
+
""" `True` if this dataset type may be included in
|
|
121
|
+
`~lsst.daf.butler.CollectionType.CALIBRATION` collections to associate it
|
|
122
|
+
with a validity range, `False` (default) otherwise."""
|
|
139
123
|
|
|
140
124
|
def __post_init__(self):
|
|
141
125
|
super().__post_init__()
|
|
@@ -151,39 +135,6 @@ class DimensionedConnection(BaseConnection):
|
|
|
151
135
|
class BaseInput(DimensionedConnection):
|
|
152
136
|
"""Class used for declaring PipelineTask input connections.
|
|
153
137
|
|
|
154
|
-
Attributes
|
|
155
|
-
----------
|
|
156
|
-
name : `str`
|
|
157
|
-
The default name used to identify the dataset type.
|
|
158
|
-
storageClass : `str`
|
|
159
|
-
The storage class used when (un)/persisting the dataset type.
|
|
160
|
-
multiple : `bool`
|
|
161
|
-
Indicates if this connection should expect to contain multiple objects
|
|
162
|
-
of the given dataset type. Tasks with more than one connection with
|
|
163
|
-
``multiple=True`` with the same dimensions may want to implement
|
|
164
|
-
`.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
|
|
165
|
-
consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
|
|
166
|
-
the execution system as early as possible of outputs that will not be
|
|
167
|
-
produced because the corresponding input is missing.
|
|
168
|
-
dimensions : iterable of `str`
|
|
169
|
-
The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used
|
|
170
|
-
to identify the dataset type identified by the specified name.
|
|
171
|
-
deferLoad : `bool`
|
|
172
|
-
Indicates that this dataset type will be loaded as a
|
|
173
|
-
`lsst.daf.butler.DeferredDatasetHandle`. PipelineTasks can use this
|
|
174
|
-
object to load the object at a later time.
|
|
175
|
-
minimum : `bool`
|
|
176
|
-
Minimum number of datasets required for this connection, per quantum.
|
|
177
|
-
This is checked in the base implementation of
|
|
178
|
-
`.PipelineTaskConnections.adjustQuantum`, which raises `NoWorkFound` if
|
|
179
|
-
the minimum is not met for `Input` connections (causing the quantum to
|
|
180
|
-
be pruned, skipped, or never created, depending on the context), and
|
|
181
|
-
`FileNotFoundError` for `PrerequisiteInput` connections (causing
|
|
182
|
-
QuantumGraph generation to fail). `PipelineTask` implementations may
|
|
183
|
-
provide custom `~.PipelineTaskConnections.adjustQuantum`
|
|
184
|
-
implementations for more fine-grained or configuration-driven
|
|
185
|
-
constraints, as long as they are compatible with this minium.
|
|
186
|
-
|
|
187
138
|
Raises
|
|
188
139
|
------
|
|
189
140
|
TypeError
|
|
@@ -194,7 +145,24 @@ class BaseInput(DimensionedConnection):
|
|
|
194
145
|
"""
|
|
195
146
|
|
|
196
147
|
deferLoad: bool = False
|
|
148
|
+
"""Whether this dataset type will be loaded as a
|
|
149
|
+
`lsst.daf.butler.DeferredDatasetHandle`. PipelineTasks can use this
|
|
150
|
+
object to load the object at a later time.
|
|
151
|
+
"""
|
|
152
|
+
|
|
197
153
|
minimum: int = 1
|
|
154
|
+
"""Minimum number of datasets required for this connection, per quantum.
|
|
155
|
+
|
|
156
|
+
This is checked in the base implementation of
|
|
157
|
+
`.PipelineTaskConnections.adjustQuantum`, which raises `NoWorkFound` if the
|
|
158
|
+
minimum is not met for `Input` connections (causing the quantum to be
|
|
159
|
+
pruned, skipped, or never created, depending on the context), and
|
|
160
|
+
`FileNotFoundError` for `PrerequisiteInput` connections (causing
|
|
161
|
+
QuantumGraph generation to fail). `PipelineTask` implementations may
|
|
162
|
+
provide custom `~.PipelineTaskConnections.adjustQuantum` implementations
|
|
163
|
+
for more fine-grained or configuration-driven constraints, as long as they
|
|
164
|
+
are compatible with this minimum.
|
|
165
|
+
"""
|
|
198
166
|
|
|
199
167
|
def __post_init__(self) -> None:
|
|
200
168
|
super().__post_init__()
|
|
@@ -206,56 +174,6 @@ class BaseInput(DimensionedConnection):
|
|
|
206
174
|
class Input(BaseInput):
|
|
207
175
|
"""Class used for declaring PipelineTask input connections.
|
|
208
176
|
|
|
209
|
-
Attributes
|
|
210
|
-
----------
|
|
211
|
-
name : `str`
|
|
212
|
-
The default name used to identify the dataset type.
|
|
213
|
-
storageClass : `str`
|
|
214
|
-
The storage class used when (un)/persisting the dataset type.
|
|
215
|
-
multiple : `bool`
|
|
216
|
-
Indicates if this connection should expect to contain multiple objects
|
|
217
|
-
of the given dataset type. Tasks with more than one connection with
|
|
218
|
-
``multiple=True`` with the same dimensions may want to implement
|
|
219
|
-
`.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
|
|
220
|
-
consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
|
|
221
|
-
the execution system as early as possible of outputs that will not be
|
|
222
|
-
produced because the corresponding input is missing.
|
|
223
|
-
dimensions : iterable of `str`
|
|
224
|
-
The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used
|
|
225
|
-
to identify the dataset type identified by the specified name.
|
|
226
|
-
deferLoad : `bool`
|
|
227
|
-
Indicates that this dataset type will be loaded as a
|
|
228
|
-
`lsst.daf.butler.DeferredDatasetHandle`. PipelineTasks can use this
|
|
229
|
-
object to load the object at a later time.
|
|
230
|
-
minimum : `bool`
|
|
231
|
-
Minimum number of datasets required for this connection, per quantum.
|
|
232
|
-
This is checked in the base implementation of
|
|
233
|
-
`.PipelineTaskConnections.adjustQuantum`, which raises `NoWorkFound` if
|
|
234
|
-
the minimum is not met for `Input` connections (causing the quantum to
|
|
235
|
-
be pruned, skipped, or never created, depending on the context), and
|
|
236
|
-
`FileNotFoundError` for `PrerequisiteInput` connections (causing
|
|
237
|
-
QuantumGraph generation to fail). `PipelineTask` implementations may
|
|
238
|
-
provide custom `~.PipelineTaskConnections.adjustQuantum`
|
|
239
|
-
implementations for more fine-grained or configuration-driven
|
|
240
|
-
constraints, as long as they are compatible with this minium.
|
|
241
|
-
deferGraphConstraint : `bool`, optional
|
|
242
|
-
If `True`, do not include this dataset type's existence in the initial
|
|
243
|
-
query that starts the QuantumGraph generation process. This can be
|
|
244
|
-
used to make QuantumGraph generation faster by avoiding redundant
|
|
245
|
-
datasets, and in certain cases it can (along with careful attention to
|
|
246
|
-
which tasks are included in the same QuantumGraph) be used to work
|
|
247
|
-
around the QuantumGraph generation algorithm's inflexible handling of
|
|
248
|
-
spatial overlaps. This option has no effect when the connection is not
|
|
249
|
-
an overall input of the pipeline (or subset thereof) for which a graph
|
|
250
|
-
is being created, and it never affects the ordering of quanta.
|
|
251
|
-
deferBinding : `bool`, optional
|
|
252
|
-
If `True`, the dataset will not be automatically included in
|
|
253
|
-
the pipeline graph, ``deferGraphConstraint`` is implied.
|
|
254
|
-
The custom QuantumGraphBuilder is required to bind it and add a
|
|
255
|
-
corresponding edge to the pipeline graph.
|
|
256
|
-
This option allows to have the same dataset type as both
|
|
257
|
-
input and output of a quantum.
|
|
258
|
-
|
|
259
177
|
Raises
|
|
260
178
|
------
|
|
261
179
|
TypeError
|
|
@@ -266,8 +184,27 @@ class Input(BaseInput):
|
|
|
266
184
|
"""
|
|
267
185
|
|
|
268
186
|
deferGraphConstraint: bool = False
|
|
187
|
+
"""If `True`, do not include this dataset type's existence in the initial
|
|
188
|
+
query that starts the QuantumGraph generation process.
|
|
189
|
+
|
|
190
|
+
This can be used to make QuantumGraph generation faster by avoiding
|
|
191
|
+
redundant datasets, and in certain cases it can (along with careful
|
|
192
|
+
attention to which tasks are included in the same QuantumGraph) be used to
|
|
193
|
+
work around the QuantumGraph generation algorithm's inflexible handling of
|
|
194
|
+
spatial overlaps. This option has no effect when the connection is not an
|
|
195
|
+
overall input of the pipeline (or subset thereof) for which a graph is
|
|
196
|
+
being created, and it never affects the ordering of quanta.
|
|
197
|
+
"""
|
|
269
198
|
|
|
270
199
|
deferBinding: bool = False
|
|
200
|
+
"""If `True`, the dataset will not be automatically included in the
|
|
201
|
+
pipeline graph (``deferGraphConstraint=True`` is implied).
|
|
202
|
+
|
|
203
|
+
A custom `~.quantum_graph_builder.QuantumGraphBuilder` is required to bind
|
|
204
|
+
it and add a corresponding edge to the pipeline graph. This option allows
|
|
205
|
+
the same dataset type to be used as both an input and an output of a
|
|
206
|
+
quantum.
|
|
207
|
+
"""
|
|
271
208
|
|
|
272
209
|
_connection_type_set: ClassVar[str] = "inputs"
|
|
273
210
|
|
|
@@ -276,38 +213,6 @@ class Input(BaseInput):
|
|
|
276
213
|
class PrerequisiteInput(BaseInput):
|
|
277
214
|
"""Class used for declaring PipelineTask prerequisite connections.
|
|
278
215
|
|
|
279
|
-
Attributes
|
|
280
|
-
----------
|
|
281
|
-
name : `str`
|
|
282
|
-
The default name used to identify the dataset type.
|
|
283
|
-
storageClass : `str`
|
|
284
|
-
The storage class used when (un)/persisting the dataset type.
|
|
285
|
-
multiple : `bool`
|
|
286
|
-
Indicates if this connection should expect to contain multiple objects
|
|
287
|
-
of the given dataset type. Tasks with more than one connection with
|
|
288
|
-
``multiple=True`` with the same dimensions may want to implement
|
|
289
|
-
`.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
|
|
290
|
-
consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
|
|
291
|
-
the execution system as early as possible of outputs that will not be
|
|
292
|
-
produced because the corresponding input is missing.
|
|
293
|
-
dimensions : iterable of `str`
|
|
294
|
-
The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used
|
|
295
|
-
to identify the dataset type identified by the specified name.
|
|
296
|
-
minimum : `bool`
|
|
297
|
-
Minimum number of datasets required for this connection, per quantum.
|
|
298
|
-
This is checked in the base implementation of
|
|
299
|
-
`.PipelineTaskConnections.adjustQuantum`, which raises
|
|
300
|
-
`FileNotFoundError` (causing QuantumGraph generation to fail).
|
|
301
|
-
`PipelineTask` implementations may provide custom
|
|
302
|
-
`~.PipelineTaskConnections.adjustQuantum` implementations for more
|
|
303
|
-
fine-grained or configuration-driven constraints, as long as they are
|
|
304
|
-
compatible with this minium.
|
|
305
|
-
lookupFunction : `typing.Callable`, optional
|
|
306
|
-
An optional callable function that will look up PrerequisiteInputs
|
|
307
|
-
using the DatasetType, registry, quantum dataId, and input collections
|
|
308
|
-
passed to it. If no function is specified, the default temporal spatial
|
|
309
|
-
lookup will be used.
|
|
310
|
-
|
|
311
216
|
Raises
|
|
312
217
|
------
|
|
313
218
|
TypeError
|
|
@@ -342,6 +247,13 @@ class PrerequisiteInput(BaseInput):
|
|
|
342
247
|
lookupFunction: (
|
|
343
248
|
Callable[[DatasetType, Registry, DataCoordinate, Sequence[str]], Iterable[DatasetRef]] | None
|
|
344
249
|
) = None
|
|
250
|
+
"""An optional callable function that will look up PrerequisiteInputs
|
|
251
|
+
using the DatasetType, registry, quantum dataId, and input collections
|
|
252
|
+
passed to it.
|
|
253
|
+
|
|
254
|
+
If no function is specified, the default temporal/spatial lookup will be
|
|
255
|
+
used.
|
|
256
|
+
"""
|
|
345
257
|
|
|
346
258
|
_connection_type_set: ClassVar[str] = "prerequisiteInputs"
|
|
347
259
|
|
lsst/pipe/base/connections.py
CHANGED
|
@@ -495,15 +495,12 @@ class DeferredDatasetRef:
|
|
|
495
495
|
"""A wrapper class for `~lsst.daf.butler.DatasetRef` that indicates that a
|
|
496
496
|
`PipelineTask` should receive a `~lsst.daf.butler.DeferredDatasetHandle`
|
|
497
497
|
instead of an in-memory dataset.
|
|
498
|
-
|
|
499
|
-
Attributes
|
|
500
|
-
----------
|
|
501
|
-
datasetRef : `lsst.daf.butler.DatasetRef`
|
|
502
|
-
The `lsst.daf.butler.DatasetRef` that will be eventually used to
|
|
503
|
-
resolve a dataset.
|
|
504
498
|
"""
|
|
505
499
|
|
|
506
500
|
datasetRef: DatasetRef
|
|
501
|
+
"""The `lsst.daf.butler.DatasetRef` that will be eventually used to
|
|
502
|
+
resolve a dataset.
|
|
503
|
+
"""
|
|
507
504
|
|
|
508
505
|
def __getattr__(self, name: str) -> Any:
|
|
509
506
|
# make sure reduce is called on DeferredDatasetRef and not on
|
|
@@ -299,11 +299,6 @@ class QuantumGraphExecutionReport:
|
|
|
299
299
|
produced DatasetTypes for each task. This report can be output as a
|
|
300
300
|
dictionary or a yaml file.
|
|
301
301
|
|
|
302
|
-
Attributes
|
|
303
|
-
----------
|
|
304
|
-
tasks : `dict`
|
|
305
|
-
A dictionary of TaskExecutionReports by task label.
|
|
306
|
-
|
|
307
302
|
See Also
|
|
308
303
|
--------
|
|
309
304
|
TaskExecutionReport : A task report.
|
lsst/pipe/base/log_capture.py
CHANGED
|
@@ -163,7 +163,9 @@ class LogCapture:
|
|
|
163
163
|
return cls(butler, butler)
|
|
164
164
|
|
|
165
165
|
@contextmanager
|
|
166
|
-
def capture_logging(
|
|
166
|
+
def capture_logging(
|
|
167
|
+
self, task_node: TaskNode, /, quantum: Quantum, records: ButlerLogRecords | None = None
|
|
168
|
+
) -> Iterator[_LogCaptureContext]:
|
|
167
169
|
"""Configure logging system to capture logs for execution of this task.
|
|
168
170
|
|
|
169
171
|
Parameters
|
|
@@ -172,6 +174,9 @@ class LogCapture:
|
|
|
172
174
|
The task definition.
|
|
173
175
|
quantum : `~lsst.daf.butler.Quantum`
|
|
174
176
|
Single Quantum instance.
|
|
177
|
+
records : `lsst.daf.butler.logging.ButlerLogRecords`, optional
|
|
178
|
+
Log record container to append to and save. If provided, streaming
|
|
179
|
+
mode is disabled (since we'll be saving logs in memory anyway).
|
|
175
180
|
|
|
176
181
|
Notes
|
|
177
182
|
-----
|
|
@@ -213,7 +218,7 @@ class LogCapture:
|
|
|
213
218
|
) from exc
|
|
214
219
|
# Either accumulate into ButlerLogRecords or stream JSON records to
|
|
215
220
|
# file and ingest that (ingest is possible only with full butler).
|
|
216
|
-
if self.stream_json_logs and self.full_butler is not None:
|
|
221
|
+
if self.stream_json_logs and self.full_butler is not None and records is None:
|
|
217
222
|
with TemporaryForIngest(self.full_butler, ref) as temporary:
|
|
218
223
|
log_handler_file = FileHandler(temporary.ospath)
|
|
219
224
|
log_handler_file.setFormatter(JsonLogFormatter())
|
|
@@ -236,7 +241,7 @@ class LogCapture:
|
|
|
236
241
|
temporary.ingest()
|
|
237
242
|
|
|
238
243
|
else:
|
|
239
|
-
log_handler_memory = ButlerLogRecordHandler()
|
|
244
|
+
log_handler_memory = ButlerLogRecordHandler(records)
|
|
240
245
|
logging.getLogger().addHandler(log_handler_memory)
|
|
241
246
|
|
|
242
247
|
try:
|
|
@@ -255,7 +260,6 @@ class LogCapture:
|
|
|
255
260
|
logging.getLogger().removeHandler(log_handler_memory)
|
|
256
261
|
if ctx.store:
|
|
257
262
|
self._store_log_records(quantum, log_dataset_name, log_handler_memory)
|
|
258
|
-
log_handler_memory.records.clear()
|
|
259
263
|
|
|
260
264
|
else:
|
|
261
265
|
with ButlerMDC.set_mdc(mdc):
|