lsst-pipe-base 29.2025.3000__py3-none-any.whl → 29.2025.3200__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. lsst/pipe/base/__init__.py +0 -1
  2. lsst/pipe/base/_datasetQueryConstraints.py +1 -1
  3. lsst/pipe/base/all_dimensions_quantum_graph_builder.py +10 -46
  4. lsst/pipe/base/caching_limited_butler.py +8 -4
  5. lsst/pipe/base/connectionTypes.py +19 -19
  6. lsst/pipe/base/connections.py +2 -2
  7. lsst/pipe/base/exec_fixup_data_id.py +131 -0
  8. lsst/pipe/base/execution_graph_fixup.py +69 -0
  9. lsst/pipe/base/graph/graphSummary.py +4 -4
  10. lsst/pipe/base/log_capture.py +227 -0
  11. lsst/pipe/base/mp_graph_executor.py +786 -0
  12. lsst/pipe/base/pipeline_graph/_pipeline_graph.py +40 -10
  13. lsst/pipe/base/pipeline_graph/_tasks.py +106 -0
  14. lsst/pipe/base/pipeline_graph/io.py +1 -1
  15. lsst/pipe/base/quantum_graph_builder.py +85 -58
  16. lsst/pipe/base/quantum_graph_executor.py +125 -0
  17. lsst/pipe/base/quantum_graph_skeleton.py +60 -1
  18. lsst/pipe/base/quantum_reports.py +334 -0
  19. lsst/pipe/base/script/transfer_from_graph.py +4 -1
  20. lsst/pipe/base/separable_pipeline_executor.py +296 -0
  21. lsst/pipe/base/simple_pipeline_executor.py +674 -0
  22. lsst/pipe/base/single_quantum_executor.py +635 -0
  23. lsst/pipe/base/taskFactory.py +18 -12
  24. lsst/pipe/base/tests/in_memory_limited_butler.py +223 -0
  25. lsst/pipe/base/tests/mocks/__init__.py +1 -0
  26. lsst/pipe/base/tests/mocks/_in_memory_repo.py +357 -0
  27. lsst/pipe/base/tests/mocks/_pipeline_task.py +19 -2
  28. lsst/pipe/base/version.py +1 -1
  29. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/METADATA +1 -1
  30. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/RECORD +38 -28
  31. lsst/pipe/base/executionButlerBuilder.py +0 -493
  32. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/WHEEL +0 -0
  33. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/entry_points.txt +0 -0
  34. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/COPYRIGHT +0 -0
  35. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/LICENSE +0 -0
  36. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/bsd_license.txt +0 -0
  37. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/gpl-v3.0.txt +0 -0
  38. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/top_level.txt +0 -0
  39. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/zip-safe +0 -0
@@ -0,0 +1,227 @@
1
+ # This file is part of pipe_Base.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ __all__ = ["LogCapture"]
31
+
32
+ import logging
33
+ import os
34
+ import shutil
35
+ import tempfile
36
+ from collections.abc import Iterator
37
+ from contextlib import contextmanager, suppress
38
+ from logging import FileHandler
39
+
40
+ from lsst.daf.butler import Butler, FileDataset, LimitedButler, Quantum
41
+ from lsst.daf.butler.logging import ButlerLogRecordHandler, ButlerLogRecords, ButlerMDC, JsonLogFormatter
42
+
43
+ from ._status import InvalidQuantumError
44
+ from .pipeline_graph import TaskNode
45
+
46
+ _LOG = logging.getLogger(__name__)
47
+
48
+
49
+ class _LogCaptureFlag:
50
+ """Simple flag to enable/disable log-to-butler saving."""
51
+
52
+ store: bool = True
53
+
54
+
55
+ class LogCapture:
56
+ """Class handling capture of logging messages and their export to butler.
57
+
58
+ Parameters
59
+ ----------
60
+ butler : `~lsst.daf.butler.LimitedButler`
61
+ Data butler with limited API.
62
+ full_butler : `~lsst.daf.butler.Butler` or `None`
63
+ Data butler with full API, or `None` if full Butler is not available.
64
+ If not none, then this must be the same instance as ``butler``.
65
+ """
66
+
67
+ stream_json_logs = True
68
+ """If True each log record is written to a temporary file and ingested
69
+ when quantum completes. If False the records are accumulated in memory
70
+ and stored in butler on quantum completion. If full butler is not available
71
+ then temporary file is not used."""
72
+
73
+ def __init__(
74
+ self,
75
+ butler: LimitedButler,
76
+ full_butler: Butler | None,
77
+ ):
78
+ self.butler = butler
79
+ self.full_butler = full_butler
80
+
81
+ @classmethod
82
+ def from_limited(cls, butler: LimitedButler) -> LogCapture:
83
+ return cls(butler, None)
84
+
85
+ @classmethod
86
+ def from_full(cls, butler: Butler) -> LogCapture:
87
+ return cls(butler, butler)
88
+
89
+ @contextmanager
90
+ def capture_logging(self, task_node: TaskNode, /, quantum: Quantum) -> Iterator[_LogCaptureFlag]:
91
+ """Configure logging system to capture logs for execution of this task.
92
+
93
+ Parameters
94
+ ----------
95
+ task_node : `~lsst.pipe.base.pipeline_graph.TaskNode`
96
+ The task definition.
97
+ quantum : `~lsst.daf.butler.Quantum`
98
+ Single Quantum instance.
99
+
100
+ Notes
101
+ -----
102
+ Expected to be used as a context manager to ensure that logging
103
+ records are inserted into the butler once the quantum has been
104
+ executed:
105
+
106
+ .. code-block:: py
107
+
108
+ with self.capture_logging(task_node, quantum):
109
+ # Run quantum and capture logs.
110
+
111
+ Ths method can also setup logging to attach task- or
112
+ quantum-specific information to log messages. Potentially this can
113
+ take into account some info from task configuration as well.
114
+ """
115
+ # include quantum dataId and task label into MDC
116
+ mdc = {"LABEL": task_node.label, "RUN": ""}
117
+ if quantum.dataId:
118
+ mdc["LABEL"] += f":{quantum.dataId}"
119
+ if self.full_butler is not None:
120
+ mdc["RUN"] = self.full_butler.run or ""
121
+ ctx = _LogCaptureFlag()
122
+ log_dataset_name = (
123
+ task_node.log_output.dataset_type_name if task_node.log_output is not None else None
124
+ )
125
+
126
+ # Add a handler to the root logger to capture execution log output.
127
+ if log_dataset_name is not None:
128
+ # Either accumulate into ButlerLogRecords or stream JSON records to
129
+ # file and ingest that (ingest is possible only with full butler).
130
+ if self.stream_json_logs and self.full_butler is not None:
131
+ # Create the log file in a temporary directory rather than
132
+ # creating a temporary file. This is necessary because
133
+ # temporary files are created with restrictive permissions
134
+ # and during file ingest these permissions persist in the
135
+ # datastore. Using a temp directory allows us to create
136
+ # a file with umask default permissions.
137
+ tmpdir = tempfile.mkdtemp(prefix="butler-temp-logs-")
138
+
139
+ # Construct a file to receive the log records and "touch" it.
140
+ log_file = os.path.join(tmpdir, f"butler-log-{task_node.label}.json")
141
+ with open(log_file, "w"):
142
+ pass
143
+ log_handler_file = FileHandler(log_file)
144
+ log_handler_file.setFormatter(JsonLogFormatter())
145
+ logging.getLogger().addHandler(log_handler_file)
146
+
147
+ try:
148
+ with ButlerMDC.set_mdc(mdc):
149
+ yield ctx
150
+ finally:
151
+ # Ensure that the logs are stored in butler.
152
+ logging.getLogger().removeHandler(log_handler_file)
153
+ log_handler_file.close()
154
+ if ctx.store:
155
+ self._ingest_log_records(quantum, log_dataset_name, log_file)
156
+ shutil.rmtree(tmpdir, ignore_errors=True)
157
+
158
+ else:
159
+ log_handler_memory = ButlerLogRecordHandler()
160
+ logging.getLogger().addHandler(log_handler_memory)
161
+
162
+ try:
163
+ with ButlerMDC.set_mdc(mdc):
164
+ yield ctx
165
+ finally:
166
+ # Ensure that the logs are stored in butler.
167
+ logging.getLogger().removeHandler(log_handler_memory)
168
+ if ctx.store:
169
+ self._store_log_records(quantum, log_dataset_name, log_handler_memory)
170
+ log_handler_memory.records.clear()
171
+
172
+ else:
173
+ with ButlerMDC.set_mdc(mdc):
174
+ yield ctx
175
+
176
+ def _store_log_records(
177
+ self, quantum: Quantum, dataset_type: str, log_handler: ButlerLogRecordHandler
178
+ ) -> None:
179
+ # DatasetRef has to be in the Quantum outputs, can lookup by name.
180
+ try:
181
+ [ref] = quantum.outputs[dataset_type]
182
+ except LookupError as exc:
183
+ raise InvalidQuantumError(
184
+ f"Quantum outputs is missing log output dataset type {dataset_type};"
185
+ " this could happen due to inconsistent options between QuantumGraph generation"
186
+ " and execution"
187
+ ) from exc
188
+
189
+ self.butler.put(log_handler.records, ref)
190
+
191
+ def _ingest_log_records(self, quantum: Quantum, dataset_type: str, filename: str) -> None:
192
+ # If we are logging to an external file we must always try to
193
+ # close it.
194
+ assert self.full_butler is not None, "Expected to have full butler for ingest"
195
+ ingested = False
196
+ try:
197
+ # DatasetRef has to be in the Quantum outputs, can lookup by name.
198
+ try:
199
+ [ref] = quantum.outputs[dataset_type]
200
+ except LookupError as exc:
201
+ raise InvalidQuantumError(
202
+ f"Quantum outputs is missing log output dataset type {dataset_type};"
203
+ " this could happen due to inconsistent options between QuantumGraph generation"
204
+ " and execution"
205
+ ) from exc
206
+
207
+ # Need to ingest this file directly into butler.
208
+ dataset = FileDataset(path=filename, refs=ref)
209
+ try:
210
+ self.full_butler.ingest(dataset, transfer="move")
211
+ ingested = True
212
+ except NotImplementedError:
213
+ # Some datastores can't receive files (e.g. in-memory datastore
214
+ # when testing), we store empty list for those just to have a
215
+ # dataset. Alternative is to read the file as a
216
+ # ButlerLogRecords object and put it.
217
+ _LOG.info(
218
+ "Log records could not be stored in this butler because the"
219
+ " datastore can not ingest files, empty record list is stored instead."
220
+ )
221
+ records = ButlerLogRecords.from_records([])
222
+ self.full_butler.put(records, ref)
223
+ finally:
224
+ # remove file if it is not ingested
225
+ if not ingested:
226
+ with suppress(OSError):
227
+ os.remove(filename)