lsst-pipe-base 29.2025.4100__py3-none-any.whl → 29.2025.4300__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. lsst/pipe/base/_status.py +1 -1
  2. lsst/pipe/base/cli/cmd/__init__.py +2 -2
  3. lsst/pipe/base/cli/cmd/commands.py +116 -1
  4. lsst/pipe/base/graph_walker.py +8 -4
  5. lsst/pipe/base/pipeline_graph/_pipeline_graph.py +30 -5
  6. lsst/pipe/base/quantum_graph/__init__.py +1 -0
  7. lsst/pipe/base/quantum_graph/_common.py +2 -1
  8. lsst/pipe/base/quantum_graph/_multiblock.py +41 -7
  9. lsst/pipe/base/quantum_graph/_predicted.py +62 -5
  10. lsst/pipe/base/quantum_graph/_provenance.py +1209 -0
  11. lsst/pipe/base/quantum_graph/aggregator/__init__.py +143 -0
  12. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +981 -0
  13. lsst/pipe/base/quantum_graph/aggregator/_config.py +139 -0
  14. lsst/pipe/base/quantum_graph/aggregator/_ingester.py +312 -0
  15. lsst/pipe/base/quantum_graph/aggregator/_progress.py +208 -0
  16. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +371 -0
  17. lsst/pipe/base/quantum_graph/aggregator/_structs.py +167 -0
  18. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +225 -0
  19. lsst/pipe/base/quantum_graph/aggregator/_writer.py +593 -0
  20. lsst/pipe/base/resource_usage.py +183 -0
  21. lsst/pipe/base/simple_pipeline_executor.py +4 -1
  22. lsst/pipe/base/tests/util.py +31 -0
  23. lsst/pipe/base/version.py +1 -1
  24. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/METADATA +1 -1
  25. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/RECORD +33 -22
  26. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/WHEEL +0 -0
  27. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/entry_points.txt +0 -0
  28. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/licenses/COPYRIGHT +0 -0
  29. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/licenses/LICENSE +0 -0
  30. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/licenses/bsd_license.txt +0 -0
  31. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/licenses/gpl-v3.0.txt +0 -0
  32. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/top_level.txt +0 -0
  33. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/zip-safe +0 -0
@@ -0,0 +1,139 @@
1
+ # This file is part of pipe_base.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ __all__ = ("AggregatorConfig",)
31
+
32
+
33
+ import pydantic
34
+
35
+
36
+ class AggregatorConfig(pydantic.BaseModel):
37
+ """Configuration for the provenance aggregator."""
38
+
39
+ output_path: str | None = None
40
+ """Path for the output provenance quantum graph file.
41
+
42
+ At present this option is intended only for debugging.
43
+ """
44
+
45
+ worker_log_dir: str | None = None
46
+ """Path to a directory (POSIX only) for parallel worker logs."""
47
+
48
+ worker_log_level: str = "VERBOSE"
49
+ """Log level for worker processes/threads.
50
+
51
+ Per-quantum messages only appear at ``DEBUG`` level.
52
+ """
53
+
54
+ worker_profile_dir: str | None = None
55
+ """Path to a directory (POSIX only) for parallel worker profiling dumps.
56
+
57
+ This option is ignored when `n_processes` is `1`.
58
+ """
59
+
60
+ n_processes: int = 1
61
+ """Number of processes the scanner should use."""
62
+
63
+ assume_complete: bool = True
64
+ """If `True`, the aggregator can assume all quanta have run to completion
65
+ (including any automatic retries). If `False`, only successes can be
66
+ considered final, and quanta that appear to have failed or to have not been
67
+ executed are ignored.
68
+ """
69
+
70
+ defensive_ingest: bool = False
71
+ """If `True`, guard against datasets having already been ingested into the
72
+ central butler repository.
73
+
74
+ Defensive ingest mode is automatically turned on (with a warning emitted)
75
+ if an ingest attempt fails due to a database constraint violation. Enabling
76
+ defensive mode up-front avoids this warning and is slightly more efficient
77
+ when it is already known that some datasets have already been ingested.
78
+
79
+ Defensive mode does not guard against race conditions from multiple ingest
80
+ processes running simultaneously, as it relies on a one-time query to
81
+ determine what is already present in the central repository.
82
+ """
83
+
84
+ ingest_batch_size: int = 10000
85
+ """Number of butler datasets that must accumulate to trigger an ingest."""
86
+
87
+ register_dataset_types: bool = True
88
+ """Whether to register output dataset types in the central butler
89
+ repository before starting ingest.
90
+ """
91
+
92
+ update_output_chain: bool = True
93
+ """Whether to prepend the output `~lsst.daf.butler.CollectionType.RUN` to
94
+ the output `~lsst.daf.butler.CollectionType.CHAINED` collection.
95
+ """
96
+
97
+ dry_run: bool = False
98
+ """If `True`, do not actually perform any deletions or central butler
99
+ ingests.
100
+
101
+ Most log messages concerning deletions and ingests will still be emitted in
102
+ order to provide a better emulation of a real run.
103
+ """
104
+
105
+ interactive_status: bool = False
106
+ """Whether to use an interactive status display with progress bars.
107
+
108
+ If this is `True`, the `tqdm` module must be available. If this is
109
+ `False`, a periodic logger will be used to display status at a fixed
110
+ interval instead (see `log_status_interval`).
111
+ """
112
+
113
+ log_status_interval: float | None = None
114
+ """Interval (in seconds) between periodic logger status updates."""
115
+
116
+ worker_sleep: float = 0.01
117
+ """Time (in seconds) a worker should wait when there are no requests from
118
+ the main aggregator process.
119
+ """
120
+
121
+ zstd_level: int = 10
122
+ """ZStandard compression level to use for all compressed-JSON blocks."""
123
+
124
+ zstd_dict_size: int = 32768
125
+ """Size (in bytes) of the ZStandard compression dictionary."""
126
+
127
+ zstd_dict_n_inputs: int = 512
128
+ """Number of samples of each type (see below) to include in ZStandard
129
+ compression dictionary training.
130
+
131
+ Training is run on a random subset of the `PredictedQuantumDatasetsModel`
132
+ objects in the predicted graph, as well as the first provenance quanta,
133
+ logs, and metadata blocks encountered.
134
+ """
135
+
136
+ mock_storage_classes: bool = False
137
+ """Enable support for storage classes by created by the
138
+ lsst.pipe.base.tests.mocks package.
139
+ """
@@ -0,0 +1,312 @@
1
+ # This file is part of pipe_base.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ __all__ = ("Ingester",)
31
+
32
+ import dataclasses
33
+ import logging
34
+ import time
35
+ import uuid
36
+ from collections import defaultdict
37
+
38
+ from lsst.daf.butler import Butler, CollectionType, DatasetRef, DimensionGroup
39
+ from lsst.daf.butler.datastore.record_data import DatastoreRecordData
40
+ from lsst.daf.butler.registry import ConflictingDefinitionError
41
+
42
+ from ...pipeline_graph import TaskImportMode
43
+ from .._common import DatastoreName
44
+ from .._predicted import PredictedDatasetModel, PredictedQuantumGraphComponents, PredictedQuantumGraphReader
45
+ from ._communicators import IngesterCommunicator
46
+
47
+
48
+ @dataclasses.dataclass
49
+ class Ingester:
50
+ """A helper class for the provenance aggregator that handles ingestion into
51
+ the central butler repository.
52
+ """
53
+
54
+ predicted_path: str
55
+ """Path to the predicted quantum graph."""
56
+
57
+ butler_path: str
58
+ """Path or alias to the central butler repository."""
59
+
60
+ comms: IngesterCommunicator
61
+ """Communicator object for this worker."""
62
+
63
+ predicted: PredictedQuantumGraphComponents = dataclasses.field(init=False)
64
+ """Components of the predicted graph."""
65
+
66
+ butler: Butler = dataclasses.field(init=False)
67
+ """Client for the central butler repository."""
68
+
69
+ n_datasets_ingested: int = 0
70
+ """Total number of datasets ingested by this invocation."""
71
+
72
+ n_datasets_skipped: int = 0
73
+ """Total number of datasets skipped because they were already present."""
74
+
75
+ n_producers_pending: int = 0
76
+ """Number of quanta whose outputs are currently pending ingest."""
77
+
78
+ refs_pending: defaultdict[DimensionGroup, list[DatasetRef]] = dataclasses.field(
79
+ default_factory=lambda: defaultdict(list)
80
+ )
81
+ """Dataset references pending ingest, grouped by their dimensions."""
82
+
83
+ records_pending: dict[DatastoreName, DatastoreRecordData] = dataclasses.field(default_factory=dict)
84
+ """Datastore records pending ingest, grouped by datastore name."""
85
+
86
+ already_ingested: set[uuid.UUID] | None = None
87
+ """A set of all dataset IDs already present in the output RUN
88
+ collection.
89
+
90
+ If this is not `None`, the ingester is in defensive ingest mode, either
91
+ because it was configured to query for these dataset IDs up front, or
92
+ because a transaction failed due to a dataset already being present.
93
+ """
94
+
95
+ last_ingest_time: float = dataclasses.field(default_factory=time.time)
96
+ """POSIX timestamp since the last ingest transaction concluded."""
97
+
98
+ def __post_init__(self) -> None:
99
+ self.comms.log.verbose("Reading from predicted quantum graph.")
100
+ with PredictedQuantumGraphReader.open(
101
+ self.predicted_path, import_mode=TaskImportMode.DO_NOT_IMPORT
102
+ ) as reader:
103
+ # We only need the header and pipeline graph.
104
+ self.predicted = reader.components
105
+ if self.comms.config.mock_storage_classes:
106
+ import lsst.pipe.base.tests.mocks # noqa: F401
107
+ self.comms.log.verbose("Initializing butler.")
108
+ self.butler = Butler.from_config(self.butler_path, writeable=not self.comms.config.dry_run)
109
+
110
+ @property
111
+ def n_datasets_pending(self) -> int:
112
+ """The number of butler datasets currently pending."""
113
+ return sum(len(v) for v in self.refs_pending.values())
114
+
115
+ @staticmethod
116
+ def run(predicted_path: str, butler_path: str, comms: IngesterCommunicator) -> None:
117
+ """Run the ingester.
118
+
119
+ Parameters
120
+ ----------
121
+ predicted_path : `str`
122
+ Path to the predicted quantum graph.
123
+ butler_path : `str`
124
+ Path or alias to the central butler repository.
125
+ comms : `IngesterCommunicator`
126
+ Communicator for the ingester.
127
+
128
+ Notes
129
+ -----
130
+ This method is designed to run as the ``target`` in
131
+ `WorkerContext.make_worker`.
132
+ """
133
+ with comms:
134
+ ingester = Ingester(predicted_path, butler_path, comms)
135
+ ingester.loop()
136
+
137
+ def loop(self) -> None:
138
+ """Run the main loop for the ingester."""
139
+ self.comms.log.verbose("Registering collections and dataset types.")
140
+ if not self.comms.config.dry_run:
141
+ if self.comms.config.register_dataset_types:
142
+ self.predicted.pipeline_graph.register_dataset_types(
143
+ self.butler,
144
+ include_inputs=False,
145
+ include_packages=True,
146
+ include_configs=True,
147
+ include_logs=True,
148
+ )
149
+ self.butler.collections.register(self.predicted.header.output_run)
150
+ # Updating the output chain cannot happen inside the caching
151
+ # context.
152
+ if self.comms.config.update_output_chain:
153
+ self.update_output_chain()
154
+ with self.butler.registry.caching_context():
155
+ if self.comms.config.defensive_ingest:
156
+ self.fetch_already_ingested()
157
+ self.comms.log.info("Startup completed in %ss.", time.time() - self.last_ingest_time)
158
+ self.last_ingest_time = time.time()
159
+ for ingest_request in self.comms.poll():
160
+ self.n_producers_pending += 1
161
+ self.comms.log.debug(f"Got ingest request for producer {ingest_request.producer_id}.")
162
+ self.update_pending(ingest_request.datasets, ingest_request.records)
163
+ if self.n_datasets_pending > self.comms.config.ingest_batch_size:
164
+ self.ingest()
165
+ self.comms.log.info("All ingest requests received.")
166
+ # We use 'while' in case this fails with a conflict and we switch
167
+ # to defensive mode (should be at most two iterations).
168
+ ingest_start_time = time.time()
169
+ while self.n_datasets_pending:
170
+ n_datasets = self.n_datasets_pending
171
+ self.ingest()
172
+ self.comms.log.verbose(
173
+ "Gathered %d final datasets in %ss and ingested them in %ss.",
174
+ n_datasets,
175
+ ingest_start_time - self.last_ingest_time,
176
+ time.time() - ingest_start_time,
177
+ )
178
+ if self.n_producers_pending:
179
+ # We can finish with returns pending if we filtered out all of
180
+ # the datasets we started with as already existing.
181
+ self.report()
182
+ self.comms.log_progress(
183
+ logging.INFO,
184
+ f"Ingested {self.n_datasets_ingested} dataset(s); "
185
+ f"skipped {self.n_datasets_skipped} already present.",
186
+ )
187
+
188
+ def ingest(self) -> None:
189
+ """Ingest all pending datasets and report success to the supervisor."""
190
+ ingest_start_time = time.time()
191
+ self.comms.log.verbose(
192
+ "Gathered %d datasets from %d quanta in %ss.",
193
+ self.n_datasets_pending,
194
+ self.n_producers_pending,
195
+ ingest_start_time - self.last_ingest_time,
196
+ )
197
+ try:
198
+ if not self.comms.config.dry_run:
199
+ with self.butler.registry.transaction():
200
+ for refs in self.refs_pending.values():
201
+ self.butler.registry._importDatasets(refs, expand=False, assume_new=True)
202
+ self.butler._datastore.import_records(self.records_pending)
203
+ self.last_ingest_time = time.time()
204
+ self.comms.log.verbose(
205
+ "Ingested %d datasets from %d quanta in %ss.",
206
+ self.n_datasets_pending,
207
+ self.n_producers_pending,
208
+ self.last_ingest_time - ingest_start_time,
209
+ )
210
+ self.n_datasets_ingested += self.n_datasets_pending
211
+ except ConflictingDefinitionError:
212
+ if self.already_ingested is None:
213
+ self.comms.log_progress(
214
+ logging.INFO,
215
+ "Some outputs seem to have already been ingested; querying for existing datasets and "
216
+ "switching to defensive ingest mode.",
217
+ )
218
+ self.fetch_already_ingested()
219
+ # We just return instead of trying again immediately because we
220
+ # might have just shrunk the number of pending datasets below
221
+ # the batch threshold.
222
+ return
223
+ else:
224
+ raise
225
+ self.report()
226
+ self.records_pending.clear()
227
+ self.refs_pending.clear()
228
+
229
+ def report(self) -> None:
230
+ """Report a successful ingest to the supervisor."""
231
+ self.comms.report_ingest(self.n_producers_pending)
232
+ self.n_producers_pending = 0
233
+
234
+ def fetch_already_ingested(self) -> None:
235
+ """Query for the UUIDs of all dataset already present in the output
236
+ RUN collection, and filter and pending datasets accordingly.
237
+ """
238
+ self.comms.log.info("Fetching all UUIDs in output collection %r.", self.predicted.header.output_run)
239
+ self.already_ingested = set(
240
+ self.butler.registry._fetch_run_dataset_ids(self.predicted.header.output_run)
241
+ )
242
+ kept: set[uuid.UUID] = set()
243
+ for dimensions, refs in self.refs_pending.items():
244
+ filtered_refs: list[DatasetRef] = []
245
+ for ref in refs:
246
+ if ref.id not in self.already_ingested:
247
+ kept.add(ref.id)
248
+ filtered_refs.append(ref)
249
+ else:
250
+ self.n_datasets_skipped += 1
251
+ self.refs_pending[dimensions] = filtered_refs
252
+ for datastore_name, datastore_records in list(self.records_pending.items()):
253
+ if (filtered_records := datastore_records.subset(kept)) is not None:
254
+ self.records_pending[datastore_name] = filtered_records
255
+ else:
256
+ del self.records_pending[datastore_name]
257
+
258
+ def update_pending(
259
+ self, datasets: list[PredictedDatasetModel], records: dict[DatastoreName, DatastoreRecordData]
260
+ ) -> None:
261
+ """Add an ingest request to the pending-ingest data structures.
262
+
263
+ Parameters
264
+ ----------
265
+ datasets : `list` [ `PredictedDatasetModel` ]
266
+ Registry information about the datasets.
267
+ records : `dict` [ `str`, \
268
+ `lsst.daf.butler.datastore.record_data.DatastoreRecordData` ]
269
+ Datastore information about the datasets.
270
+ """
271
+ n_given = len(datasets)
272
+ if self.already_ingested is not None:
273
+ datasets = [d for d in datasets if d.dataset_id not in self.already_ingested]
274
+ kept = {d.dataset_id for d in datasets}
275
+ self.n_datasets_skipped += n_given - len(kept)
276
+ records = {
277
+ datastore_name: filtered_records
278
+ for datastore_name, original_records in records.items()
279
+ if (filtered_records := original_records.subset(kept)) is not None
280
+ }
281
+ for dataset in datasets:
282
+ ref = self.predicted.make_dataset_ref(dataset)
283
+ self.refs_pending[ref.datasetType.dimensions].append(ref)
284
+ for datastore_name, datastore_records in records.items():
285
+ if (existing_records := self.records_pending.get(datastore_name)) is not None:
286
+ existing_records.update(datastore_records)
287
+ else:
288
+ self.records_pending[datastore_name] = datastore_records
289
+
290
+ def update_output_chain(self) -> None:
291
+ """Update the output CHAINED collection to include the output RUN
292
+ collection (and the inputs, if the output CHAINED collection does not
293
+ exist).
294
+
295
+ Notes
296
+ -----
297
+ This method cannot be called inside the registry caching context.
298
+ """
299
+ if self.predicted.header.output is None:
300
+ return
301
+ self.comms.log.info(
302
+ "Updating output collection %s to include %s.",
303
+ self.predicted.header.output,
304
+ self.predicted.header.output_run,
305
+ )
306
+ if self.butler.collections.register(self.predicted.header.output, CollectionType.CHAINED):
307
+ # Chain is new; need to add inputs, but we want to flatten them
308
+ # first.
309
+ if self.predicted.header.inputs:
310
+ flattened = self.butler.collections.query(self.predicted.header.inputs, flatten_chains=True)
311
+ self.butler.collections.extend_chain(self.predicted.header.output, flattened)
312
+ self.butler.collections.prepend_chain(self.predicted.header.output, self.predicted.header.output_run)
@@ -0,0 +1,208 @@
1
+ # This file is part of pipe_base.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ __all__ = ("Progress", "make_worker_log")
31
+
32
+ import logging
33
+ import os
34
+ import time
35
+ from types import TracebackType
36
+ from typing import Self
37
+
38
+ from lsst.utils.logging import TRACE, VERBOSE, LsstLogAdapter, PeriodicLogger, getLogger
39
+
40
+ from ._config import AggregatorConfig
41
+
42
+
43
+ class Progress:
44
+ """A helper class for the provenance aggregator that handles reporting
45
+ progress to the user.
46
+
47
+ This includes both logging (including periodic logging) and optional
48
+ progress bars.
49
+
50
+ Parameters
51
+ ----------
52
+ log : `lsst.utils.logging.LsstLogAdapter`
53
+ LSST-customized logger.
54
+ config : `AggregatorConfig`
55
+ Configuration for the aggregator.
56
+
57
+ Notes
58
+ -----
59
+ This class is a context manager in order to manage the redirection of
60
+ logging when progress bars for interactive display are in use. The context
61
+ manager does nothing otherwise.
62
+ """
63
+
64
+ def __init__(self, log: LsstLogAdapter, config: AggregatorConfig):
65
+ self.start = time.time()
66
+ self.log = log
67
+ self.config = config
68
+ self._periodic_log = PeriodicLogger(self.log, config.log_status_interval)
69
+ self._n_scanned: int = 0
70
+ self._n_ingested: int = 0
71
+ self._n_written: int = 0
72
+ self._n_quanta: int | None = None
73
+ self.interactive = config.interactive_status
74
+
75
+ def __enter__(self) -> Self:
76
+ if self.interactive:
77
+ from tqdm.contrib.logging import logging_redirect_tqdm
78
+
79
+ self._logging_redirect = logging_redirect_tqdm()
80
+ self._logging_redirect.__enter__()
81
+ return self
82
+
83
+ def __exit__(
84
+ self,
85
+ exc_type: type[BaseException] | None,
86
+ exc_value: BaseException | None,
87
+ traceback: TracebackType | None,
88
+ ) -> bool | None:
89
+ if self.interactive:
90
+ self._logging_redirect.__exit__(exc_type, exc_value, traceback)
91
+ return None
92
+
93
+ def set_n_quanta(self, n_quanta: int) -> None:
94
+ """Set the total number of quanta.
95
+
96
+ Parameters
97
+ ----------
98
+ n_quanta : `int`
99
+ Total number of quanta, including special "init" quanta.
100
+
101
+ Notes
102
+ -----
103
+ This method must be called before any of the ``report_*`` methods.
104
+ """
105
+ self._n_quanta = n_quanta
106
+ if self.interactive:
107
+ from tqdm import tqdm
108
+
109
+ self._scan_progress = tqdm(desc="Scanning", total=n_quanta, leave=False, unit="quanta")
110
+ self._ingest_progress = tqdm(
111
+ desc="Ingesting", total=n_quanta, leave=False, smoothing=0.1, unit="quanta"
112
+ )
113
+ if self.config.output_path is not None:
114
+ self._write_progress = tqdm(desc="Writing", total=n_quanta, leave=False, unit="quanta")
115
+
116
+ @property
117
+ def elapsed_time(self) -> float:
118
+ """The time in seconds since the start of the aggregator."""
119
+ return time.time() - self.start
120
+
121
+ def _log_status(self) -> None:
122
+ """Invoke the periodic logger with the current status."""
123
+ self._periodic_log.log(
124
+ "%s quanta scanned, %s quantum outputs ingested, "
125
+ "%s provenance quanta written (of %s) after %0.1fs.",
126
+ self._n_scanned,
127
+ self._n_ingested,
128
+ self._n_written,
129
+ self._n_quanta,
130
+ self.elapsed_time,
131
+ )
132
+
133
+ def report_scan(self) -> None:
134
+ """Report that a quantum was scanned."""
135
+ self._n_scanned += 1
136
+ if self.interactive:
137
+ self._scan_progress.update(1)
138
+ else:
139
+ self._log_status()
140
+
141
+ def finish_scans(self) -> None:
142
+ """Report that all scanning is done."""
143
+ if self.interactive:
144
+ self._scan_progress.close()
145
+
146
+ def report_ingests(self, n_quanta: int) -> None:
147
+ """Report that ingests for multiple quanta were completed.
148
+
149
+ Parameters
150
+ ----------
151
+ n_quanta : `int`
152
+ Number of quanta whose outputs were ingested.
153
+ """
154
+ self._n_ingested += n_quanta
155
+ if self.interactive:
156
+ self._ingest_progress.update(n_quanta)
157
+ else:
158
+ self._log_status()
159
+
160
+ def finish_ingests(self) -> None:
161
+ """Report that all ingests are done."""
162
+ if self.interactive:
163
+ self._ingest_progress.close()
164
+
165
+ def report_write(self) -> None:
166
+ """Report that a quantum's provenance was written."""
167
+ self._n_written += 1
168
+ if self.interactive:
169
+ self._write_progress.update()
170
+ else:
171
+ self._log_status()
172
+
173
+ def finish_writes(self) -> None:
174
+ """Report that all writes are done."""
175
+ if self.interactive:
176
+ self._write_progress.close()
177
+
178
+
179
+ def make_worker_log(name: str, config: AggregatorConfig) -> LsstLogAdapter:
180
+ """Make a logger for a worker.
181
+
182
+ Parameters
183
+ ----------
184
+ name : `str`
185
+ Name of the worker, to be used as part of the name for the logger.
186
+ config : `AggregatorConfig`
187
+ Configuration for the aggregator.
188
+ """
189
+ base_log = logging.getLogger(f"lsst.pipe.base.quantum_graph.aggregator.{name}")
190
+ base_log.propagate = False
191
+ log = getLogger(logger=base_log)
192
+ if config.worker_log_dir is not None:
193
+ os.makedirs(config.worker_log_dir, exist_ok=True)
194
+ match config.worker_log_level.upper():
195
+ case "VERBOSE":
196
+ log.setLevel(VERBOSE)
197
+ case "TRACE":
198
+ log.setLevel(TRACE)
199
+ case std:
200
+ log.setLevel(getattr(logging, std))
201
+ handler = logging.FileHandler(os.path.join(config.worker_log_dir, f"{name}.log"))
202
+ handler.setFormatter(
203
+ logging.Formatter("%(levelname)s %(asctime)s.%(msecs)03d %(message)s", "%Y-%m-%dT%H:%M:%S")
204
+ )
205
+ log.addHandler(handler)
206
+ else:
207
+ log.addHandler(logging.NullHandler())
208
+ return log