lsst-pipe-base 29.2025.4100__py3-none-any.whl → 29.2025.4300__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_status.py +1 -1
- lsst/pipe/base/cli/cmd/__init__.py +2 -2
- lsst/pipe/base/cli/cmd/commands.py +116 -1
- lsst/pipe/base/graph_walker.py +8 -4
- lsst/pipe/base/pipeline_graph/_pipeline_graph.py +30 -5
- lsst/pipe/base/quantum_graph/__init__.py +1 -0
- lsst/pipe/base/quantum_graph/_common.py +2 -1
- lsst/pipe/base/quantum_graph/_multiblock.py +41 -7
- lsst/pipe/base/quantum_graph/_predicted.py +62 -5
- lsst/pipe/base/quantum_graph/_provenance.py +1209 -0
- lsst/pipe/base/quantum_graph/aggregator/__init__.py +143 -0
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +981 -0
- lsst/pipe/base/quantum_graph/aggregator/_config.py +139 -0
- lsst/pipe/base/quantum_graph/aggregator/_ingester.py +312 -0
- lsst/pipe/base/quantum_graph/aggregator/_progress.py +208 -0
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +371 -0
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +167 -0
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +225 -0
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +593 -0
- lsst/pipe/base/resource_usage.py +183 -0
- lsst/pipe/base/simple_pipeline_executor.py +4 -1
- lsst/pipe/base/tests/util.py +31 -0
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/METADATA +1 -1
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/RECORD +33 -22
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/WHEEL +0 -0
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,981 @@
|
|
|
1
|
+
# This file is part of pipe_base.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (http://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# This software is dual licensed under the GNU General Public License and also
|
|
10
|
+
# under a 3-clause BSD license. Recipients may choose which of these licenses
|
|
11
|
+
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
|
|
12
|
+
# respectively. If you choose the GPL option then the following text applies
|
|
13
|
+
# (but note that there is still no warranty even if you opt for BSD instead):
|
|
14
|
+
#
|
|
15
|
+
# This program is free software: you can redistribute it and/or modify
|
|
16
|
+
# it under the terms of the GNU General Public License as published by
|
|
17
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
18
|
+
# (at your option) any later version.
|
|
19
|
+
#
|
|
20
|
+
# This program is distributed in the hope that it will be useful,
|
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
23
|
+
# GNU General Public License for more details.
|
|
24
|
+
#
|
|
25
|
+
# You should have received a copy of the GNU General Public License
|
|
26
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
__all__ = (
|
|
31
|
+
"FatalWorkerError",
|
|
32
|
+
"IngesterCommunicator",
|
|
33
|
+
"ScannerCommunicator",
|
|
34
|
+
"SpawnProcessContext",
|
|
35
|
+
"SupervisorCommunicator",
|
|
36
|
+
"ThreadingContext",
|
|
37
|
+
"WorkerContext",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
import cProfile
|
|
41
|
+
import dataclasses
|
|
42
|
+
import enum
|
|
43
|
+
import logging
|
|
44
|
+
import multiprocessing.context
|
|
45
|
+
import multiprocessing.synchronize
|
|
46
|
+
import os
|
|
47
|
+
import queue
|
|
48
|
+
import signal
|
|
49
|
+
import threading
|
|
50
|
+
import time
|
|
51
|
+
import uuid
|
|
52
|
+
from abc import ABC, abstractmethod
|
|
53
|
+
from collections.abc import Callable, Iterable, Iterator
|
|
54
|
+
from contextlib import AbstractContextManager, ExitStack, contextmanager
|
|
55
|
+
from traceback import format_exception
|
|
56
|
+
from types import TracebackType
|
|
57
|
+
from typing import Any, Literal, Self, TypeAlias, TypeVar, Union
|
|
58
|
+
|
|
59
|
+
from lsst.utils.logging import VERBOSE, LsstLogAdapter
|
|
60
|
+
|
|
61
|
+
from ._config import AggregatorConfig
|
|
62
|
+
from ._progress import Progress, make_worker_log
|
|
63
|
+
from ._structs import IngestRequest, ScanReport, ScanResult
|
|
64
|
+
|
|
65
|
+
_T = TypeVar("_T")
|
|
66
|
+
|
|
67
|
+
_TINY_TIMEOUT = 0.01
|
|
68
|
+
|
|
69
|
+
# multiprocessing.Queue is a type according to the standard library type stubs,
|
|
70
|
+
# but it's really a function at runtime. But since the Python <= 3.11 type
|
|
71
|
+
# alias syntax uses the real runtime things we need to use strings, and hence
|
|
72
|
+
# we need to use Union. With Python 3.12's 'type' statement this gets cleaner.
|
|
73
|
+
Queue: TypeAlias = Union["queue.Queue[_T]", "multiprocessing.Queue[_T]"]
|
|
74
|
+
|
|
75
|
+
Event: TypeAlias = threading.Event | multiprocessing.synchronize.Event
|
|
76
|
+
|
|
77
|
+
Worker: TypeAlias = threading.Thread | multiprocessing.context.SpawnProcess
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class WorkerContext(ABC):
|
|
81
|
+
"""A simple abstract interface that can be implemented by both threading
|
|
82
|
+
and multiprocessing.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
@abstractmethod
|
|
86
|
+
def make_queue(self) -> Queue[Any]:
|
|
87
|
+
"""Make an empty queue that can be used to pass objects between
|
|
88
|
+
workers in this context.
|
|
89
|
+
"""
|
|
90
|
+
raise NotImplementedError()
|
|
91
|
+
|
|
92
|
+
@abstractmethod
|
|
93
|
+
def make_event(self) -> Event:
|
|
94
|
+
"""Make an event that can be used to communicate a boolean state change
|
|
95
|
+
to workers in this context.
|
|
96
|
+
"""
|
|
97
|
+
raise NotImplementedError()
|
|
98
|
+
|
|
99
|
+
@abstractmethod
|
|
100
|
+
def make_worker(
|
|
101
|
+
self, target: Callable[..., None], args: tuple[Any, ...], name: str | None = None
|
|
102
|
+
) -> Worker:
|
|
103
|
+
"""Make a worker that runs the given callable.
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
target : `~collections.abc.Callable`
|
|
108
|
+
A callable to invoke on the worker.
|
|
109
|
+
args : `tuple`
|
|
110
|
+
Positional arguments to pass to the callable.
|
|
111
|
+
name : `str`, optional
|
|
112
|
+
Human-readable name for the worker.
|
|
113
|
+
|
|
114
|
+
Returns
|
|
115
|
+
-------
|
|
116
|
+
worker : `threading.Thread` or `multiprocessing.Process`
|
|
117
|
+
Process or thread. Will need to have its ``start`` method called
|
|
118
|
+
to actually begin.
|
|
119
|
+
"""
|
|
120
|
+
raise NotImplementedError()
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class ThreadingContext(WorkerContext):
|
|
124
|
+
"""An implementation of `WorkerContext` backed by the `threading`
|
|
125
|
+
module.
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
def make_queue(self) -> Queue[Any]:
|
|
129
|
+
return queue.Queue()
|
|
130
|
+
|
|
131
|
+
def make_event(self) -> Event:
|
|
132
|
+
return threading.Event()
|
|
133
|
+
|
|
134
|
+
def make_worker(
|
|
135
|
+
self, target: Callable[..., None], args: tuple[Any, ...], name: str | None = None
|
|
136
|
+
) -> Worker:
|
|
137
|
+
return threading.Thread(target=target, args=args, name=name)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class SpawnProcessContext(WorkerContext):
|
|
141
|
+
"""An implementation of `WorkerContext` backed by the `multiprocessing`
|
|
142
|
+
module, with new processes started by spawning.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
def __init__(self) -> None:
|
|
146
|
+
self._ctx = multiprocessing.get_context("spawn")
|
|
147
|
+
|
|
148
|
+
def make_queue(self) -> Queue[Any]:
|
|
149
|
+
return self._ctx.Queue()
|
|
150
|
+
|
|
151
|
+
def make_event(self) -> Event:
|
|
152
|
+
return self._ctx.Event()
|
|
153
|
+
|
|
154
|
+
def make_worker(
|
|
155
|
+
self, target: Callable[..., None], args: tuple[Any, ...], name: str | None = None
|
|
156
|
+
) -> Worker:
|
|
157
|
+
return self._ctx.Process(target=target, args=args, name=name)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _get_from_queue(q: Queue[_T], block: bool = False, timeout: float | None = None) -> _T | None:
|
|
161
|
+
"""Get an object from a queue and return `None` if it is empty.
|
|
162
|
+
|
|
163
|
+
Parameters
|
|
164
|
+
----------
|
|
165
|
+
q : `Queue`
|
|
166
|
+
Queue to get an object from.
|
|
167
|
+
block : `bool`
|
|
168
|
+
Whether to block until an object is available.
|
|
169
|
+
timeout : `float` or `None`, optional
|
|
170
|
+
Maximum number of seconds to wait while blocking.
|
|
171
|
+
|
|
172
|
+
Returns
|
|
173
|
+
-------
|
|
174
|
+
obj : `object` or `None`
|
|
175
|
+
Object from the queue, or `None` if it was empty.
|
|
176
|
+
"""
|
|
177
|
+
try:
|
|
178
|
+
return q.get(block=block, timeout=timeout)
|
|
179
|
+
except queue.Empty:
|
|
180
|
+
return None
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class FatalWorkerError(BaseException):
|
|
184
|
+
"""An exception raised by communicators when one worker (including the
|
|
185
|
+
supervisor) has caught an exception in order to signal the others to shut
|
|
186
|
+
down.
|
|
187
|
+
"""
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class _Sentinel(enum.Enum):
|
|
191
|
+
"""Sentinel values used to indicate sequence points or worker shutdown
|
|
192
|
+
conditions.
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
NO_MORE_SCAN_REQUESTS = enum.auto()
|
|
196
|
+
"""Sentinel sent from the supervisor to scanners to indicate that there are
|
|
197
|
+
no more quanta left to be scanned.
|
|
198
|
+
"""
|
|
199
|
+
|
|
200
|
+
NO_MORE_INGEST_REQUESTS = enum.auto()
|
|
201
|
+
"""Sentinel sent from scanners to the ingester to indicate that there are
|
|
202
|
+
will be no more ingest requests from a particular worker.
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
NO_MORE_WRITE_REQUESTS = enum.auto()
|
|
206
|
+
"""Sentinel sent from scanners and the supervisor to the writer to
|
|
207
|
+
indicate that there are will be no more write requests from a particular
|
|
208
|
+
worker.
|
|
209
|
+
"""
|
|
210
|
+
|
|
211
|
+
WRITE_REPORT = enum.auto()
|
|
212
|
+
"""Sentinel sent from the writer to the supervisor to report that a
|
|
213
|
+
quantum's provenance was written.
|
|
214
|
+
"""
|
|
215
|
+
|
|
216
|
+
SCANNER_DONE = enum.auto()
|
|
217
|
+
"""Sentinel sent from scanners to the supervisor to report that they are
|
|
218
|
+
done and shutting down.
|
|
219
|
+
"""
|
|
220
|
+
|
|
221
|
+
INGESTER_DONE = enum.auto()
|
|
222
|
+
"""Sentinel sent from the ingester to the supervisor to report that it is
|
|
223
|
+
done and shutting down.
|
|
224
|
+
"""
|
|
225
|
+
|
|
226
|
+
WRITER_DONE = enum.auto()
|
|
227
|
+
"""Sentinel sent from the writer to the supervisor to report that it is
|
|
228
|
+
done and shutting down.
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
@dataclasses.dataclass
|
|
233
|
+
class _WorkerErrorMessage:
|
|
234
|
+
"""An internal worker used to pass information about an error that occurred
|
|
235
|
+
on a worker back to the supervisor.
|
|
236
|
+
|
|
237
|
+
As a rule, these are unexpected, unrecoverable exceptions.
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
worker: str
|
|
241
|
+
"""Name of the originating worker."""
|
|
242
|
+
|
|
243
|
+
traceback: str
|
|
244
|
+
"""A logged exception traceback.
|
|
245
|
+
|
|
246
|
+
Note that this is not a `BaseException` subclass that can actually be
|
|
247
|
+
re-raised on the supervisor; it's just something we can log to make the
|
|
248
|
+
right traceback appear on the screen. If something silences that printing
|
|
249
|
+
in favor of its own exception management (pytest!) this information
|
|
250
|
+
disappears.
|
|
251
|
+
"""
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
@dataclasses.dataclass
|
|
255
|
+
class _ScanRequest:
|
|
256
|
+
"""An internal struct passed from the supervisor to the scanners to request
|
|
257
|
+
a quantum be scanned.
|
|
258
|
+
"""
|
|
259
|
+
|
|
260
|
+
quantum_id: uuid.UUID
|
|
261
|
+
"""ID of the quantum to be scanned."""
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
@dataclasses.dataclass
|
|
265
|
+
class _IngestReport:
|
|
266
|
+
"""An internal struct passed from the ingester to the supervisor to report
|
|
267
|
+
a completed ingest batch.
|
|
268
|
+
"""
|
|
269
|
+
|
|
270
|
+
n_producers: int
|
|
271
|
+
"""Number of producing quanta whose datasets were ingested.
|
|
272
|
+
|
|
273
|
+
We use quanta rather than datasets as the count here because the supervisor
|
|
274
|
+
knows the total number of quanta in advance but not the total number of
|
|
275
|
+
datasets to be ingested, so it's a lot easier to attach a denominator
|
|
276
|
+
and/or progress bar to this number.
|
|
277
|
+
"""
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
@dataclasses.dataclass
|
|
281
|
+
class _ProgressLog:
|
|
282
|
+
"""A high-level log message sent from a worker to the supervisor.
|
|
283
|
+
|
|
284
|
+
These are messages that should appear to come from the main
|
|
285
|
+
'aggregate-graph' logger, not a worker-specific one.
|
|
286
|
+
"""
|
|
287
|
+
|
|
288
|
+
message: str
|
|
289
|
+
"""Log message."""
|
|
290
|
+
|
|
291
|
+
level: int
|
|
292
|
+
"""Log level."""
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
@dataclasses.dataclass
|
|
296
|
+
class _CompressionDictionary:
|
|
297
|
+
"""An internal struct used to send the compression dictionary from the
|
|
298
|
+
writer to the scanners.
|
|
299
|
+
"""
|
|
300
|
+
|
|
301
|
+
data: bytes
|
|
302
|
+
"""The `bytes` representation of a `zstandard.ZstdCompressionDict`.
|
|
303
|
+
"""
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
Report: TypeAlias = (
|
|
307
|
+
ScanReport
|
|
308
|
+
| _IngestReport
|
|
309
|
+
| _WorkerErrorMessage
|
|
310
|
+
| _ProgressLog
|
|
311
|
+
| Literal[
|
|
312
|
+
_Sentinel.WRITE_REPORT,
|
|
313
|
+
_Sentinel.SCANNER_DONE,
|
|
314
|
+
_Sentinel.INGESTER_DONE,
|
|
315
|
+
_Sentinel.WRITER_DONE,
|
|
316
|
+
]
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
class SupervisorCommunicator:
|
|
321
|
+
"""A helper object that lets the supervisor direct the other workers.
|
|
322
|
+
|
|
323
|
+
Parameters
|
|
324
|
+
----------
|
|
325
|
+
log : `lsst.utils.logging.LsstLogAdapter`
|
|
326
|
+
LSST-customized logger.
|
|
327
|
+
n_scanners : `int`
|
|
328
|
+
Number of scanner workers.
|
|
329
|
+
context : `WorkerContext`
|
|
330
|
+
Abstraction over threading vs. multiprocessing.
|
|
331
|
+
config : `AggregatorConfig`
|
|
332
|
+
Configuration for the aggregator.
|
|
333
|
+
"""
|
|
334
|
+
|
|
335
|
+
def __init__(
|
|
336
|
+
self,
|
|
337
|
+
log: LsstLogAdapter,
|
|
338
|
+
n_scanners: int,
|
|
339
|
+
context: WorkerContext,
|
|
340
|
+
config: AggregatorConfig,
|
|
341
|
+
) -> None:
|
|
342
|
+
self.config = config
|
|
343
|
+
self.progress = Progress(log, config)
|
|
344
|
+
self.n_scanners = n_scanners
|
|
345
|
+
# The supervisor sends scan requests to scanners on this queue.
|
|
346
|
+
# When complete, the supervisor sends n_scanners sentinals and each
|
|
347
|
+
# scanner is careful to only take one before it starts its shutdown.
|
|
348
|
+
self._scan_requests: Queue[_ScanRequest | Literal[_Sentinel.NO_MORE_SCAN_REQUESTS]] = (
|
|
349
|
+
context.make_queue()
|
|
350
|
+
)
|
|
351
|
+
# The scanners send ingest requests to the ingester on this queue. Each
|
|
352
|
+
# scanner sends one sentinal when it is done, and the ingester is
|
|
353
|
+
# careful to wait for n_scanners sentinals to arrive before it starts
|
|
354
|
+
# its shutdown.
|
|
355
|
+
self._ingest_requests: Queue[IngestRequest | Literal[_Sentinel.NO_MORE_INGEST_REQUESTS]] = (
|
|
356
|
+
context.make_queue()
|
|
357
|
+
)
|
|
358
|
+
# The scanners send write requests to the writer on this queue (which
|
|
359
|
+
# will be `None` if we're not writing). The supervisor also sends
|
|
360
|
+
# write requests for blocked quanta (which we don't scan). Each
|
|
361
|
+
# scanner and the supervisor send one sentinal when done, and the
|
|
362
|
+
# writer waits for (n_scanners + 1) sentinals to arrive before it
|
|
363
|
+
# starts its shutdown.
|
|
364
|
+
self._write_requests: Queue[ScanResult | Literal[_Sentinel.NO_MORE_WRITE_REQUESTS]] | None = (
|
|
365
|
+
context.make_queue() if config.output_path is not None else None
|
|
366
|
+
)
|
|
367
|
+
# All other workers use this queue to send many different kinds of
|
|
368
|
+
# reports the supervisor. The supervisor waits for a _DONE sentinal
|
|
369
|
+
# from each worker before it finishes its shutdown.
|
|
370
|
+
self._reports: Queue[Report] = context.make_queue()
|
|
371
|
+
# The writer sends the compression dictionary to the scanners on this
|
|
372
|
+
# queue. It puts n_scanners copies on the queue, and each scanner only
|
|
373
|
+
# takes one. The compression_dict queue has no sentinal because it is
|
|
374
|
+
# only used at most once; the supervisor takes responsibility for
|
|
375
|
+
# clearing it out shutting down.
|
|
376
|
+
self._compression_dict: Queue[_CompressionDictionary] = context.make_queue()
|
|
377
|
+
# The supervisor sets this event when it receives an interrupt request
|
|
378
|
+
# from an exception in the main process (usually KeyboardInterrupt).
|
|
379
|
+
# Worker communicators check this in their polling loops and raise
|
|
380
|
+
# FatalWorkerError when they see it set.
|
|
381
|
+
self._cancel_event: Event = context.make_event()
|
|
382
|
+
# Track what state we are in closing down, so we can start at the right
|
|
383
|
+
# point if we're interrupted and __exit__ needs to clean up. Note that
|
|
384
|
+
# we can't rely on a non-exception __exit__ to do any shutdown work
|
|
385
|
+
# that might be slow, since a KeyboardInterrupt that occurs when
|
|
386
|
+
# __exit__ is already running can't be caught inside __exit__.
|
|
387
|
+
self._sent_no_more_scan_requests = False
|
|
388
|
+
self._sent_no_more_write_requests = False
|
|
389
|
+
self._n_scanners_done = 0
|
|
390
|
+
self._ingester_done = False
|
|
391
|
+
self._writer_done = self._write_requests is None
|
|
392
|
+
|
|
393
|
+
def wait_for_workers_to_finish(self, already_failing: bool = False) -> None:
|
|
394
|
+
if not self._sent_no_more_scan_requests:
|
|
395
|
+
for _ in range(self.n_scanners):
|
|
396
|
+
self._scan_requests.put(_Sentinel.NO_MORE_SCAN_REQUESTS, block=False)
|
|
397
|
+
self._sent_no_more_scan_requests = True
|
|
398
|
+
if not self._sent_no_more_write_requests and self._write_requests is not None:
|
|
399
|
+
self._write_requests.put(_Sentinel.NO_MORE_WRITE_REQUESTS, block=False)
|
|
400
|
+
self._sent_no_more_write_requests = True
|
|
401
|
+
while not (self._ingester_done and self._writer_done and self._n_scanners_done == self.n_scanners):
|
|
402
|
+
match self._handle_progress_reports(
|
|
403
|
+
self._reports.get(block=True), already_failing=already_failing
|
|
404
|
+
):
|
|
405
|
+
case None | ScanReport() | _IngestReport():
|
|
406
|
+
pass
|
|
407
|
+
case _Sentinel.INGESTER_DONE:
|
|
408
|
+
self._ingester_done = True
|
|
409
|
+
self.progress.finish_ingests()
|
|
410
|
+
case _Sentinel.SCANNER_DONE:
|
|
411
|
+
self._n_scanners_done += 1
|
|
412
|
+
self.progress.finish_scans()
|
|
413
|
+
case _Sentinel.WRITER_DONE:
|
|
414
|
+
self._writer_done = True
|
|
415
|
+
self.progress.finish_writes()
|
|
416
|
+
case unexpected:
|
|
417
|
+
raise AssertionError(f"Unexpected message {unexpected!r} to supervisor.")
|
|
418
|
+
self.log.verbose(
|
|
419
|
+
"Blocking on reports queue: ingester_done=%s, writer_done=%s, n_scanners_done=%s.",
|
|
420
|
+
self._ingester_done,
|
|
421
|
+
self._writer_done,
|
|
422
|
+
self._n_scanners_done,
|
|
423
|
+
)
|
|
424
|
+
while _get_from_queue(self._compression_dict) is not None:
|
|
425
|
+
self.log.verbose("Flushing compression dict queue.")
|
|
426
|
+
self.log.verbose("Checking that all queues are empty.")
|
|
427
|
+
self._expect_empty_queue(self._scan_requests)
|
|
428
|
+
self._expect_empty_queue(self._ingest_requests)
|
|
429
|
+
if self._write_requests is not None:
|
|
430
|
+
self._expect_empty_queue(self._write_requests)
|
|
431
|
+
self._expect_empty_queue(self._reports)
|
|
432
|
+
self._expect_empty_queue(self._compression_dict)
|
|
433
|
+
|
|
434
|
+
def __enter__(self) -> Self:
|
|
435
|
+
self.progress.__enter__()
|
|
436
|
+
# We make the low-level logger in __enter__ instead of __init__ only
|
|
437
|
+
# because that's the pattern used by true workers (where it matters).
|
|
438
|
+
self.log = make_worker_log("supervisor", self.config)
|
|
439
|
+
return self
|
|
440
|
+
|
|
441
|
+
def __exit__(
|
|
442
|
+
self,
|
|
443
|
+
exc_type: type[BaseException] | None,
|
|
444
|
+
exc_value: BaseException | None,
|
|
445
|
+
traceback: TracebackType | None,
|
|
446
|
+
) -> None:
|
|
447
|
+
if exc_type is not None:
|
|
448
|
+
if exc_type is not FatalWorkerError:
|
|
449
|
+
self.progress.log.critical(f"Caught {exc_type.__name__}; attempting to shut down cleanly.")
|
|
450
|
+
self._cancel_event.set()
|
|
451
|
+
self.wait_for_workers_to_finish(already_failing=exc_type is not None)
|
|
452
|
+
self.progress.__exit__(exc_type, exc_value, traceback)
|
|
453
|
+
|
|
454
|
+
def request_scan(self, quantum_id: uuid.UUID) -> None:
|
|
455
|
+
"""Send a request to the scanners to scan the given quantum.
|
|
456
|
+
|
|
457
|
+
Parameters
|
|
458
|
+
----------
|
|
459
|
+
quantum_id : `uuid.UUID`
|
|
460
|
+
ID of the quantum to scan.
|
|
461
|
+
"""
|
|
462
|
+
self._scan_requests.put(_ScanRequest(quantum_id), block=False)
|
|
463
|
+
|
|
464
|
+
def request_write(self, scan_result: ScanResult) -> None:
|
|
465
|
+
"""Send a request to the writer to write provenance for the given scan.
|
|
466
|
+
|
|
467
|
+
Parameters
|
|
468
|
+
----------
|
|
469
|
+
scan_result : `ScanResult`
|
|
470
|
+
Information from scanning a quantum (or knowing you don't have to,
|
|
471
|
+
in the case of blocked quanta).
|
|
472
|
+
"""
|
|
473
|
+
assert self._write_requests is not None, "Writer should not be used if writing is disabled."
|
|
474
|
+
self._write_requests.put(scan_result, block=False)
|
|
475
|
+
|
|
476
|
+
def poll(self) -> Iterator[ScanReport]:
|
|
477
|
+
"""Poll for reports from workers while sending scan requests.
|
|
478
|
+
|
|
479
|
+
Yields
|
|
480
|
+
------
|
|
481
|
+
scan_report : `ScanReport`
|
|
482
|
+
A report from a scanner that a quantum was scanned.
|
|
483
|
+
|
|
484
|
+
Notes
|
|
485
|
+
-----
|
|
486
|
+
This iterator blocks until the first scan report is received, and then
|
|
487
|
+
it continues until the report queue is empty.
|
|
488
|
+
"""
|
|
489
|
+
block = True
|
|
490
|
+
msg = _get_from_queue(self._reports, block=block)
|
|
491
|
+
while msg is not None:
|
|
492
|
+
match self._handle_progress_reports(msg):
|
|
493
|
+
case ScanReport() as scan_report:
|
|
494
|
+
block = False
|
|
495
|
+
yield scan_report
|
|
496
|
+
case None:
|
|
497
|
+
pass
|
|
498
|
+
case unexpected:
|
|
499
|
+
raise AssertionError(f"Unexpected message {unexpected!r} to supervisor.")
|
|
500
|
+
msg = _get_from_queue(self._reports, block=block)
|
|
501
|
+
|
|
502
|
+
def _handle_progress_reports(
|
|
503
|
+
self, report: Report, already_failing: bool = False
|
|
504
|
+
) -> (
|
|
505
|
+
ScanReport
|
|
506
|
+
| Literal[
|
|
507
|
+
_Sentinel.SCANNER_DONE,
|
|
508
|
+
_Sentinel.INGESTER_DONE,
|
|
509
|
+
_Sentinel.WRITER_DONE,
|
|
510
|
+
]
|
|
511
|
+
| None
|
|
512
|
+
):
|
|
513
|
+
"""Handle reports to the supervisor that can appear at any time, and
|
|
514
|
+
are typically just updates to the progress we've made.
|
|
515
|
+
|
|
516
|
+
This includes:
|
|
517
|
+
|
|
518
|
+
- exceptions from workers (which raise `FatalWorkerError` here to
|
|
519
|
+
trigger ``__exit__``);
|
|
520
|
+
- ingest reports;
|
|
521
|
+
- write reports;
|
|
522
|
+
- progress logs.
|
|
523
|
+
|
|
524
|
+
If one of these is handled, `None` is returned; otherwise the original
|
|
525
|
+
report is returned.
|
|
526
|
+
"""
|
|
527
|
+
match report:
|
|
528
|
+
case _WorkerErrorMessage(traceback=traceback, worker=worker):
|
|
529
|
+
self.progress.log.fatal("Exception raised on %s: \n%s", worker, traceback)
|
|
530
|
+
if not already_failing:
|
|
531
|
+
raise FatalWorkerError()
|
|
532
|
+
case _IngestReport(n_producers=n_producers):
|
|
533
|
+
self.progress.report_ingests(n_producers)
|
|
534
|
+
case _Sentinel.WRITE_REPORT:
|
|
535
|
+
self.progress.report_write()
|
|
536
|
+
case _ProgressLog(message=message, level=level):
|
|
537
|
+
self.progress.log.log(level, "%s [after %0.1fs]", message, self.progress.elapsed_time)
|
|
538
|
+
case _:
|
|
539
|
+
return report
|
|
540
|
+
return None
|
|
541
|
+
|
|
542
|
+
@staticmethod
|
|
543
|
+
def _expect_empty_queue(queue: Queue[Any]) -> None:
|
|
544
|
+
"""Assert that the given queue is empty."""
|
|
545
|
+
if (msg := _get_from_queue(queue, block=False, timeout=0)) is not None:
|
|
546
|
+
raise AssertionError(f"Queue is not empty; found {msg!r}.")
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
class WorkerCommunicator:
|
|
550
|
+
"""A base class for non-supervisor workers.
|
|
551
|
+
|
|
552
|
+
Parameters
|
|
553
|
+
----------
|
|
554
|
+
supervisor : `SupervisorCommunicator`
|
|
555
|
+
Communicator for the supervisor to grab queues and information from.
|
|
556
|
+
name : `str`
|
|
557
|
+
Human-readable name for this worker.
|
|
558
|
+
|
|
559
|
+
Notes
|
|
560
|
+
-----
|
|
561
|
+
Each worker communicator is constructed in the main process and entered as
|
|
562
|
+
a context manager on the actual worker process, so attributes that cannot
|
|
563
|
+
be pickled are constructed in ``__enter__`` instead of ``__init__``.
|
|
564
|
+
|
|
565
|
+
Worker communicators provide access to an `AggregatorConfig` and a logger
|
|
566
|
+
to their workers. As context managers, they handle exceptions and ensure
|
|
567
|
+
clean shutdowns, and since most workers need to use a lot of other context
|
|
568
|
+
managers (for file reading and writing, mostly), they provide an `enter`
|
|
569
|
+
method to keep every worker from also having to be a context manager just
|
|
570
|
+
to hold a context manager instance attribute.
|
|
571
|
+
|
|
572
|
+
Worker communicators can also be configured to record and dump profiling
|
|
573
|
+
information.
|
|
574
|
+
"""
|
|
575
|
+
|
|
576
|
+
def __init__(self, supervisor: SupervisorCommunicator, name: str):
|
|
577
|
+
self.name = name
|
|
578
|
+
self.config = supervisor.config
|
|
579
|
+
self._reports = supervisor._reports
|
|
580
|
+
self._cancel_event = supervisor._cancel_event
|
|
581
|
+
|
|
582
|
+
def __enter__(self) -> Self:
|
|
583
|
+
self.log = make_worker_log(self.name, self.config)
|
|
584
|
+
self.log.verbose("%s has PID %s (parent is %s).", self.name, os.getpid(), os.getppid())
|
|
585
|
+
self._exit_stack = ExitStack().__enter__()
|
|
586
|
+
if self.config.n_processes > 1:
|
|
587
|
+
# Multiprocessing: ignore interrupts so we can shut down cleanly.
|
|
588
|
+
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
|
589
|
+
if self.config.worker_profile_dir is not None:
|
|
590
|
+
# We use time.time because we're interested in wall-clock time,
|
|
591
|
+
# not just CPU effort, since this is I/O-bound work.
|
|
592
|
+
self._profiler = cProfile.Profile(timer=time.time)
|
|
593
|
+
self._profiler.enable()
|
|
594
|
+
return self
|
|
595
|
+
|
|
596
|
+
def __exit__(
|
|
597
|
+
self,
|
|
598
|
+
exc_type: type[BaseException] | None,
|
|
599
|
+
exc_value: BaseException | None,
|
|
600
|
+
traceback: TracebackType | None,
|
|
601
|
+
) -> bool | None:
|
|
602
|
+
if self.config.worker_profile_dir is not None and self.config.n_processes > 1:
|
|
603
|
+
self._profiler.disable()
|
|
604
|
+
os.makedirs(self.config.worker_profile_dir, exist_ok=True)
|
|
605
|
+
self._profiler.dump_stats(os.path.join(self.config.worker_profile_dir, f"{self.name}.profile"))
|
|
606
|
+
if exc_value is not None:
|
|
607
|
+
assert exc_type is not None, "Should be guaranteed by Python, but MyPy doesn't know that."
|
|
608
|
+
if exc_type is not FatalWorkerError:
|
|
609
|
+
self.log.warning("Error raised on this worker.", exc_info=(exc_type, exc_value, traceback))
|
|
610
|
+
assert exc_type is not None and traceback is not None
|
|
611
|
+
self._reports.put(
|
|
612
|
+
_WorkerErrorMessage(
|
|
613
|
+
self.name,
|
|
614
|
+
"".join(format_exception(exc_type, exc_value, traceback)),
|
|
615
|
+
),
|
|
616
|
+
block=False,
|
|
617
|
+
)
|
|
618
|
+
self.log.debug("Error message sent to supervisor.")
|
|
619
|
+
else:
|
|
620
|
+
self.log.warning("Shutting down due to exception raised on another worker.")
|
|
621
|
+
self._exit_stack.__exit__(exc_type, exc_value, traceback)
|
|
622
|
+
return True
|
|
623
|
+
|
|
624
|
+
def log_progress(self, level: int, message: str) -> None:
|
|
625
|
+
"""Send a high-level log message to the supervisor.
|
|
626
|
+
|
|
627
|
+
Parameters
|
|
628
|
+
----------
|
|
629
|
+
message : `str`
|
|
630
|
+
Log message.
|
|
631
|
+
level : `int`
|
|
632
|
+
Log level. Should be ``VERBOSE`` or higher.
|
|
633
|
+
"""
|
|
634
|
+
self._reports.put(_ProgressLog(message=message, level=level), block=False)
|
|
635
|
+
|
|
636
|
+
def enter(
|
|
637
|
+
self,
|
|
638
|
+
cm: AbstractContextManager[_T],
|
|
639
|
+
on_close: str | None = None,
|
|
640
|
+
level: int = VERBOSE,
|
|
641
|
+
is_progress_log: bool = False,
|
|
642
|
+
) -> _T:
|
|
643
|
+
"""Enter a context manager that will be exited when the communicator's
|
|
644
|
+
context is exited.
|
|
645
|
+
|
|
646
|
+
Parameters
|
|
647
|
+
----------
|
|
648
|
+
cm : `contextlib.AbstractContextManager`
|
|
649
|
+
A context manager to enter.
|
|
650
|
+
on_close : `str`, optional
|
|
651
|
+
A log message to emit (on the worker's logger) just before the
|
|
652
|
+
given context manager is exited. This can be used to indicate
|
|
653
|
+
what's going on when an ``__exit__`` implementation has a lot of
|
|
654
|
+
work to do (e.g. moving a large file into a zip archive).
|
|
655
|
+
level : `int`, optional
|
|
656
|
+
Level for the ``on_close`` log message.
|
|
657
|
+
is_progress_log : `bool`, optional
|
|
658
|
+
If `True`, send the ``on_close`` message to the supervisor via
|
|
659
|
+
`log_progress` as well as the worker's logger.
|
|
660
|
+
"""
|
|
661
|
+
if on_close is None:
|
|
662
|
+
return self._exit_stack.enter_context(cm)
|
|
663
|
+
|
|
664
|
+
@contextmanager
|
|
665
|
+
def wrapper() -> Iterator[_T]:
|
|
666
|
+
with cm as result:
|
|
667
|
+
yield result
|
|
668
|
+
self.log.log(level, on_close)
|
|
669
|
+
if is_progress_log:
|
|
670
|
+
self.log_progress(level, on_close)
|
|
671
|
+
|
|
672
|
+
return self._exit_stack.enter_context(wrapper())
|
|
673
|
+
|
|
674
|
+
def check_for_cancel(self) -> None:
|
|
675
|
+
"""Check for a cancel signal from the supervisor and raise
|
|
676
|
+
`FatalWorkerError` if it is present.
|
|
677
|
+
"""
|
|
678
|
+
if self._cancel_event.is_set():
|
|
679
|
+
raise FatalWorkerError()
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
class ScannerCommunicator(WorkerCommunicator):
|
|
683
|
+
"""A communicator for scanner workers.
|
|
684
|
+
|
|
685
|
+
Parameters
|
|
686
|
+
----------
|
|
687
|
+
supervisor : `SupervisorCommunicator`
|
|
688
|
+
Communicator for the supervisor to grab queues and information from.
|
|
689
|
+
scanner_id : `int`
|
|
690
|
+
Integer ID for this canner.
|
|
691
|
+
"""
|
|
692
|
+
|
|
693
|
+
def __init__(self, supervisor: SupervisorCommunicator, scanner_id: int):
|
|
694
|
+
super().__init__(supervisor, f"scanner-{scanner_id:03d}")
|
|
695
|
+
self.scanner_id = scanner_id
|
|
696
|
+
self._scan_requests = supervisor._scan_requests
|
|
697
|
+
self._ingest_requests = supervisor._ingest_requests
|
|
698
|
+
self._write_requests = supervisor._write_requests
|
|
699
|
+
self._compression_dict = supervisor._compression_dict
|
|
700
|
+
self._got_no_more_scan_requests: bool = False
|
|
701
|
+
self._sent_no_more_ingest_requests: bool = False
|
|
702
|
+
|
|
703
|
+
def report_scan(self, msg: ScanReport) -> None:
|
|
704
|
+
"""Report a completed scan to the supervisor.
|
|
705
|
+
|
|
706
|
+
Parameters
|
|
707
|
+
----------
|
|
708
|
+
msg : `ScanReport`
|
|
709
|
+
Report to send.
|
|
710
|
+
"""
|
|
711
|
+
self._reports.put(msg, block=False)
|
|
712
|
+
|
|
713
|
+
def request_ingest(self, request: IngestRequest) -> None:
|
|
714
|
+
"""Ask the ingester to ingest a quantum's outputs.
|
|
715
|
+
|
|
716
|
+
Parameters
|
|
717
|
+
----------
|
|
718
|
+
request : `IngestRequest`
|
|
719
|
+
Description of the datasets to ingest.
|
|
720
|
+
|
|
721
|
+
Notes
|
|
722
|
+
-----
|
|
723
|
+
If this request has no datasets, this automatically reports the ingest
|
|
724
|
+
as complete to the supervisor instead of sending it to the ingester.
|
|
725
|
+
"""
|
|
726
|
+
if request:
|
|
727
|
+
self._ingest_requests.put(request, block=False)
|
|
728
|
+
else:
|
|
729
|
+
self._reports.put(_IngestReport(1), block=False)
|
|
730
|
+
|
|
731
|
+
def request_write(self, scan_result: ScanResult) -> None:
|
|
732
|
+
"""Ask the writer to write provenance for a quantum.
|
|
733
|
+
|
|
734
|
+
Parameters
|
|
735
|
+
----------
|
|
736
|
+
scan_result : `ScanResult`
|
|
737
|
+
Result of scanning a quantum.
|
|
738
|
+
"""
|
|
739
|
+
assert self._write_requests is not None, "Writer should not be used if writing is disabled."
|
|
740
|
+
self._write_requests.put(scan_result, block=False)
|
|
741
|
+
|
|
742
|
+
def get_compression_dict(self) -> bytes | None:
|
|
743
|
+
"""Attempt to get the compression dict from the writer.
|
|
744
|
+
|
|
745
|
+
Returns
|
|
746
|
+
-------
|
|
747
|
+
data : `bytes` or `None`
|
|
748
|
+
The `bytes` representation of the compression dictionary, or `None`
|
|
749
|
+
if the compression dictionary is not yet available.
|
|
750
|
+
|
|
751
|
+
Notes
|
|
752
|
+
-----
|
|
753
|
+
A scanner should only call this method before it actually has the
|
|
754
|
+
compression dict.
|
|
755
|
+
"""
|
|
756
|
+
if (cdict := _get_from_queue(self._compression_dict)) is not None:
|
|
757
|
+
return cdict.data
|
|
758
|
+
return None
|
|
759
|
+
|
|
760
|
+
def poll(self) -> Iterator[uuid.UUID]:
|
|
761
|
+
"""Poll for scan requests to process.
|
|
762
|
+
|
|
763
|
+
Yields
|
|
764
|
+
------
|
|
765
|
+
quantum_id : `uuid.UUID`
|
|
766
|
+
ID of a new quantum to scan.
|
|
767
|
+
|
|
768
|
+
Notes
|
|
769
|
+
-----
|
|
770
|
+
This iterator ends when the supervisor reports that it is done
|
|
771
|
+
traversing the graph.
|
|
772
|
+
"""
|
|
773
|
+
while True:
|
|
774
|
+
self.check_for_cancel()
|
|
775
|
+
scan_request = _get_from_queue(self._scan_requests, block=True, timeout=self.config.worker_sleep)
|
|
776
|
+
if scan_request is _Sentinel.NO_MORE_SCAN_REQUESTS:
|
|
777
|
+
self._got_no_more_scan_requests = True
|
|
778
|
+
return
|
|
779
|
+
if scan_request is not None:
|
|
780
|
+
yield scan_request.quantum_id
|
|
781
|
+
|
|
782
|
+
def __exit__(
|
|
783
|
+
self,
|
|
784
|
+
exc_type: type[BaseException] | None,
|
|
785
|
+
exc_value: BaseException | None,
|
|
786
|
+
traceback: TracebackType | None,
|
|
787
|
+
) -> bool | None:
|
|
788
|
+
result = super().__exit__(exc_type, exc_value, traceback)
|
|
789
|
+
self._ingest_requests.put(_Sentinel.NO_MORE_INGEST_REQUESTS, block=False)
|
|
790
|
+
if self._write_requests is not None:
|
|
791
|
+
self._write_requests.put(_Sentinel.NO_MORE_WRITE_REQUESTS, block=False)
|
|
792
|
+
while not self._got_no_more_scan_requests:
|
|
793
|
+
self.log.debug("Clearing scan request queue (~%d remaining)", self._scan_requests.qsize())
|
|
794
|
+
if (
|
|
795
|
+
not self._got_no_more_scan_requests
|
|
796
|
+
and self._scan_requests.get() is _Sentinel.NO_MORE_SCAN_REQUESTS
|
|
797
|
+
):
|
|
798
|
+
self._got_no_more_scan_requests = True
|
|
799
|
+
# We let the supervisor clear out the compression dict queue, because
|
|
800
|
+
# a single scanner can't know if it ever got sent out or not.
|
|
801
|
+
self.log.verbose("Sending done sentinal.")
|
|
802
|
+
self._reports.put(_Sentinel.SCANNER_DONE, block=False)
|
|
803
|
+
return result
|
|
804
|
+
|
|
805
|
+
|
|
806
|
+
class IngesterCommunicator(WorkerCommunicator):
|
|
807
|
+
"""A communicator for the ingester worker.
|
|
808
|
+
|
|
809
|
+
Parameters
|
|
810
|
+
----------
|
|
811
|
+
supervisor : `SupervisorCommunicator`
|
|
812
|
+
Communicator for the supervisor to grab queues and information from.
|
|
813
|
+
"""
|
|
814
|
+
|
|
815
|
+
def __init__(self, supervisor: SupervisorCommunicator):
|
|
816
|
+
super().__init__(supervisor, "ingester")
|
|
817
|
+
self.n_scanners = supervisor.n_scanners
|
|
818
|
+
self._ingest_requests = supervisor._ingest_requests
|
|
819
|
+
self._n_requesters_done = 0
|
|
820
|
+
|
|
821
|
+
def __exit__(
|
|
822
|
+
self,
|
|
823
|
+
exc_type: type[BaseException] | None,
|
|
824
|
+
exc_value: BaseException | None,
|
|
825
|
+
traceback: TracebackType | None,
|
|
826
|
+
) -> bool | None:
|
|
827
|
+
result = super().__exit__(exc_type, exc_value, traceback)
|
|
828
|
+
while self._n_requesters_done != self.n_scanners:
|
|
829
|
+
self.log.debug(
|
|
830
|
+
"Waiting for %d requesters to be done (currently %d).",
|
|
831
|
+
self.n_scanners,
|
|
832
|
+
self._n_requesters_done,
|
|
833
|
+
)
|
|
834
|
+
if self._ingest_requests.get(block=True) is _Sentinel.NO_MORE_INGEST_REQUESTS:
|
|
835
|
+
self._n_requesters_done += 1
|
|
836
|
+
self.log.verbose("Sending done sentinal.")
|
|
837
|
+
self._reports.put(_Sentinel.INGESTER_DONE, block=False)
|
|
838
|
+
return result
|
|
839
|
+
|
|
840
|
+
def report_ingest(self, n_producers: int) -> None:
|
|
841
|
+
"""Report to the supervisor that an ingest batch was completed.
|
|
842
|
+
|
|
843
|
+
Parameters
|
|
844
|
+
----------
|
|
845
|
+
n_producers : `int`
|
|
846
|
+
Number of producing quanta whose datasets were ingested.
|
|
847
|
+
"""
|
|
848
|
+
self._reports.put(_IngestReport(n_producers), block=False)
|
|
849
|
+
|
|
850
|
+
def poll(self) -> Iterator[IngestRequest]:
|
|
851
|
+
"""Poll for ingest requests from the scanner workers.
|
|
852
|
+
|
|
853
|
+
Yields
|
|
854
|
+
------
|
|
855
|
+
request : `IngestRequest`
|
|
856
|
+
A request to ingest datasets produced by a single quantum.
|
|
857
|
+
|
|
858
|
+
Notes
|
|
859
|
+
-----
|
|
860
|
+
This iterator ends when all scanners indicate that they are done making
|
|
861
|
+
ingest requests.
|
|
862
|
+
"""
|
|
863
|
+
while True:
|
|
864
|
+
self.check_for_cancel()
|
|
865
|
+
ingest_request = _get_from_queue(self._ingest_requests, block=True, timeout=_TINY_TIMEOUT)
|
|
866
|
+
if ingest_request is _Sentinel.NO_MORE_INGEST_REQUESTS:
|
|
867
|
+
self._n_requesters_done += 1
|
|
868
|
+
if self._n_requesters_done == self.n_scanners:
|
|
869
|
+
return
|
|
870
|
+
else:
|
|
871
|
+
continue
|
|
872
|
+
if ingest_request is not None:
|
|
873
|
+
yield ingest_request
|
|
874
|
+
|
|
875
|
+
|
|
876
|
+
class WriterCommunicator(WorkerCommunicator):
|
|
877
|
+
"""A communicator for the writer worker.
|
|
878
|
+
|
|
879
|
+
Parameters
|
|
880
|
+
----------
|
|
881
|
+
supervisor : `SupervisorCommunicator`
|
|
882
|
+
Communicator for the supervisor to grab queues and information from.
|
|
883
|
+
"""
|
|
884
|
+
|
|
885
|
+
def __init__(self, supervisor: SupervisorCommunicator):
|
|
886
|
+
assert supervisor._write_requests is not None
|
|
887
|
+
super().__init__(supervisor, "writer")
|
|
888
|
+
self.n_scanners = supervisor.n_scanners
|
|
889
|
+
self._write_requests = supervisor._write_requests
|
|
890
|
+
self._compression_dict = supervisor._compression_dict
|
|
891
|
+
self._n_requesters = supervisor.n_scanners + 1
|
|
892
|
+
self._n_requesters_done = 0
|
|
893
|
+
self._sent_compression_dict = False
|
|
894
|
+
|
|
895
|
+
def __exit__(
|
|
896
|
+
self,
|
|
897
|
+
exc_type: type[BaseException] | None,
|
|
898
|
+
exc_value: BaseException | None,
|
|
899
|
+
traceback: TracebackType | None,
|
|
900
|
+
) -> bool | None:
|
|
901
|
+
result = super().__exit__(exc_type, exc_value, traceback)
|
|
902
|
+
if exc_type is None:
|
|
903
|
+
self.log_progress(logging.INFO, "Provenance quantum graph written successfully.")
|
|
904
|
+
while self._n_requesters_done != self._n_requesters:
|
|
905
|
+
self.log.debug(
|
|
906
|
+
"Waiting for %d requesters to be done (currently %d).",
|
|
907
|
+
self._n_requesters,
|
|
908
|
+
self._n_requesters_done,
|
|
909
|
+
)
|
|
910
|
+
if self._write_requests.get(block=True) is _Sentinel.NO_MORE_WRITE_REQUESTS:
|
|
911
|
+
self._n_requesters_done += 1
|
|
912
|
+
self.log.verbose("Sending done sentinal.")
|
|
913
|
+
self._reports.put(_Sentinel.WRITER_DONE, block=False)
|
|
914
|
+
return result
|
|
915
|
+
|
|
916
|
+
def poll(self) -> Iterator[ScanResult]:
|
|
917
|
+
"""Poll for writer requests from the scanner workers and supervisor.
|
|
918
|
+
|
|
919
|
+
Yields
|
|
920
|
+
------
|
|
921
|
+
request : `ScanResult`
|
|
922
|
+
The result of a quantum scan.
|
|
923
|
+
|
|
924
|
+
Notes
|
|
925
|
+
-----
|
|
926
|
+
This iterator ends when all scanners and the supervisor indicate that
|
|
927
|
+
they are done making write requests.
|
|
928
|
+
"""
|
|
929
|
+
while True:
|
|
930
|
+
self.check_for_cancel()
|
|
931
|
+
write_request = _get_from_queue(self._write_requests, block=True, timeout=_TINY_TIMEOUT)
|
|
932
|
+
if write_request is _Sentinel.NO_MORE_WRITE_REQUESTS:
|
|
933
|
+
self._n_requesters_done += 1
|
|
934
|
+
if self._n_requesters_done == self._n_requesters:
|
|
935
|
+
return
|
|
936
|
+
else:
|
|
937
|
+
continue
|
|
938
|
+
if write_request is not None:
|
|
939
|
+
yield write_request
|
|
940
|
+
|
|
941
|
+
def send_compression_dict(self, cdict_data: bytes) -> None:
|
|
942
|
+
"""Send the compression dictionary to the scanners.
|
|
943
|
+
|
|
944
|
+
Parameters
|
|
945
|
+
----------
|
|
946
|
+
cdict_data : `bytes`
|
|
947
|
+
The `bytes` representation of the compression dictionary.
|
|
948
|
+
"""
|
|
949
|
+
self.log.debug("Sending compression dictionary.")
|
|
950
|
+
for _ in range(self.n_scanners):
|
|
951
|
+
self._compression_dict.put(_CompressionDictionary(cdict_data), block=False)
|
|
952
|
+
self._sent_compression_dict = True
|
|
953
|
+
|
|
954
|
+
def report_write(self) -> None:
|
|
955
|
+
"""Report to the supervisor that provenance for a quantum was written
|
|
956
|
+
to the graph.
|
|
957
|
+
"""
|
|
958
|
+
self._reports.put(_Sentinel.WRITE_REPORT, block=False)
|
|
959
|
+
|
|
960
|
+
def periodically_check_for_cancel(self, iterable: Iterable[_T], n: int = 100) -> Iterator[_T]:
|
|
961
|
+
"""Iterate while checking for a cancellation signal every ``n``
|
|
962
|
+
iterations.
|
|
963
|
+
|
|
964
|
+
Parameters
|
|
965
|
+
----------
|
|
966
|
+
iterable : `~collections.abc.Iterable`
|
|
967
|
+
Object to iterate over.
|
|
968
|
+
n : `int`
|
|
969
|
+
Check for cancellation every ``n`` iterations.
|
|
970
|
+
|
|
971
|
+
Returns
|
|
972
|
+
-------
|
|
973
|
+
iterator : `~collections.abc.Iterator`
|
|
974
|
+
Iterator.
|
|
975
|
+
"""
|
|
976
|
+
i = 0
|
|
977
|
+
for entry in iterable:
|
|
978
|
+
yield entry
|
|
979
|
+
i += 1
|
|
980
|
+
if i % n == 0:
|
|
981
|
+
self.check_for_cancel()
|