lsst-pipe-base 30.0.1rc1__py3-none-any.whl → 30.2025.5200__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_instrument.py +20 -31
- lsst/pipe/base/_quantumContext.py +3 -3
- lsst/pipe/base/_status.py +10 -43
- lsst/pipe/base/_task_metadata.py +2 -2
- lsst/pipe/base/all_dimensions_quantum_graph_builder.py +3 -8
- lsst/pipe/base/automatic_connection_constants.py +1 -20
- lsst/pipe/base/cli/cmd/__init__.py +2 -18
- lsst/pipe/base/cli/cmd/commands.py +4 -149
- lsst/pipe/base/connectionTypes.py +160 -72
- lsst/pipe/base/connections.py +9 -6
- lsst/pipe/base/execution_reports.py +5 -0
- lsst/pipe/base/graph/graph.py +10 -11
- lsst/pipe/base/graph/quantumNode.py +4 -4
- lsst/pipe/base/graph_walker.py +10 -8
- lsst/pipe/base/log_capture.py +5 -9
- lsst/pipe/base/mp_graph_executor.py +15 -51
- lsst/pipe/base/pipeline.py +6 -5
- lsst/pipe/base/pipelineIR.py +8 -2
- lsst/pipe/base/pipelineTask.py +7 -5
- lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
- lsst/pipe/base/pipeline_graph/_edges.py +22 -32
- lsst/pipe/base/pipeline_graph/_mapping_views.py +7 -4
- lsst/pipe/base/pipeline_graph/_pipeline_graph.py +7 -14
- lsst/pipe/base/pipeline_graph/expressions.py +2 -2
- lsst/pipe/base/pipeline_graph/io.py +10 -7
- lsst/pipe/base/pipeline_graph/visualization/_dot.py +12 -13
- lsst/pipe/base/pipeline_graph/visualization/_layout.py +18 -16
- lsst/pipe/base/pipeline_graph/visualization/_merge.py +7 -4
- lsst/pipe/base/pipeline_graph/visualization/_printer.py +10 -10
- lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +0 -7
- lsst/pipe/base/prerequisite_helpers.py +1 -2
- lsst/pipe/base/quantum_graph/_common.py +20 -19
- lsst/pipe/base/quantum_graph/_multiblock.py +31 -37
- lsst/pipe/base/quantum_graph/_predicted.py +13 -111
- lsst/pipe/base/quantum_graph/_provenance.py +45 -1136
- lsst/pipe/base/quantum_graph/aggregator/__init__.py +1 -0
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +289 -204
- lsst/pipe/base/quantum_graph/aggregator/_config.py +9 -87
- lsst/pipe/base/quantum_graph/aggregator/_ingester.py +12 -13
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +235 -49
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +116 -6
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +39 -29
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +351 -34
- lsst/pipe/base/quantum_graph/visualization.py +1 -5
- lsst/pipe/base/quantum_graph_builder.py +8 -21
- lsst/pipe/base/quantum_graph_executor.py +13 -116
- lsst/pipe/base/quantum_graph_skeleton.py +29 -31
- lsst/pipe/base/quantum_provenance_graph.py +12 -29
- lsst/pipe/base/separable_pipeline_executor.py +3 -19
- lsst/pipe/base/single_quantum_executor.py +42 -67
- lsst/pipe/base/struct.py +0 -4
- lsst/pipe/base/testUtils.py +3 -3
- lsst/pipe/base/tests/mocks/_storage_class.py +1 -2
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/METADATA +3 -3
- lsst_pipe_base-30.2025.5200.dist-info/RECORD +125 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/WHEEL +1 -1
- lsst/pipe/base/log_on_close.py +0 -76
- lsst/pipe/base/quantum_graph/aggregator/_workers.py +0 -303
- lsst/pipe/base/quantum_graph/formatter.py +0 -171
- lsst/pipe/base/quantum_graph/ingest_graph.py +0 -413
- lsst_pipe_base-30.0.1rc1.dist-info/RECORD +0 -129
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/zip-safe +0 -0
|
@@ -31,33 +31,154 @@ __all__ = (
|
|
|
31
31
|
"FatalWorkerError",
|
|
32
32
|
"IngesterCommunicator",
|
|
33
33
|
"ScannerCommunicator",
|
|
34
|
+
"SpawnProcessContext",
|
|
34
35
|
"SupervisorCommunicator",
|
|
36
|
+
"ThreadingContext",
|
|
37
|
+
"WorkerContext",
|
|
35
38
|
)
|
|
36
39
|
|
|
37
40
|
import cProfile
|
|
38
41
|
import dataclasses
|
|
39
42
|
import enum
|
|
40
43
|
import logging
|
|
44
|
+
import multiprocessing.context
|
|
45
|
+
import multiprocessing.synchronize
|
|
41
46
|
import os
|
|
47
|
+
import queue
|
|
42
48
|
import signal
|
|
49
|
+
import threading
|
|
43
50
|
import time
|
|
44
51
|
import uuid
|
|
45
|
-
from
|
|
46
|
-
from
|
|
52
|
+
from abc import ABC, abstractmethod
|
|
53
|
+
from collections.abc import Callable, Iterable, Iterator
|
|
54
|
+
from contextlib import AbstractContextManager, ExitStack, contextmanager
|
|
47
55
|
from traceback import format_exception
|
|
48
56
|
from types import TracebackType
|
|
49
|
-
from typing import Literal, Self,
|
|
57
|
+
from typing import Any, Literal, Self, TypeAlias, TypeVar, Union
|
|
50
58
|
|
|
51
|
-
from lsst.utils.logging import LsstLogAdapter
|
|
59
|
+
from lsst.utils.logging import VERBOSE, LsstLogAdapter
|
|
52
60
|
|
|
53
|
-
from .._provenance import ProvenanceQuantumScanData
|
|
54
61
|
from ._config import AggregatorConfig
|
|
55
62
|
from ._progress import ProgressManager, make_worker_log
|
|
56
|
-
from ._structs import IngestRequest, ScanReport
|
|
57
|
-
|
|
63
|
+
from ._structs import IngestRequest, ScanReport, WriteRequest
|
|
64
|
+
|
|
65
|
+
_T = TypeVar("_T")
|
|
58
66
|
|
|
59
67
|
_TINY_TIMEOUT = 0.01
|
|
60
68
|
|
|
69
|
+
# multiprocessing.Queue is a type according to the standard library type stubs,
|
|
70
|
+
# but it's really a function at runtime. But since the Python <= 3.11 type
|
|
71
|
+
# alias syntax uses the real runtime things we need to use strings, and hence
|
|
72
|
+
# we need to use Union. With Python 3.12's 'type' statement this gets cleaner.
|
|
73
|
+
Queue: TypeAlias = Union["queue.Queue[_T]", "multiprocessing.Queue[_T]"]
|
|
74
|
+
|
|
75
|
+
Event: TypeAlias = threading.Event | multiprocessing.synchronize.Event
|
|
76
|
+
|
|
77
|
+
Worker: TypeAlias = threading.Thread | multiprocessing.context.SpawnProcess
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class WorkerContext(ABC):
|
|
81
|
+
"""A simple abstract interface that can be implemented by both threading
|
|
82
|
+
and multiprocessing.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
@abstractmethod
|
|
86
|
+
def make_queue(self) -> Queue[Any]:
|
|
87
|
+
"""Make an empty queue that can be used to pass objects between
|
|
88
|
+
workers in this context.
|
|
89
|
+
"""
|
|
90
|
+
raise NotImplementedError()
|
|
91
|
+
|
|
92
|
+
@abstractmethod
|
|
93
|
+
def make_event(self) -> Event:
|
|
94
|
+
"""Make an event that can be used to communicate a boolean state change
|
|
95
|
+
to workers in this context.
|
|
96
|
+
"""
|
|
97
|
+
raise NotImplementedError()
|
|
98
|
+
|
|
99
|
+
@abstractmethod
|
|
100
|
+
def make_worker(
|
|
101
|
+
self, target: Callable[..., None], args: tuple[Any, ...], name: str | None = None
|
|
102
|
+
) -> Worker:
|
|
103
|
+
"""Make a worker that runs the given callable.
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
target : `~collections.abc.Callable`
|
|
108
|
+
A callable to invoke on the worker.
|
|
109
|
+
args : `tuple`
|
|
110
|
+
Positional arguments to pass to the callable.
|
|
111
|
+
name : `str`, optional
|
|
112
|
+
Human-readable name for the worker.
|
|
113
|
+
|
|
114
|
+
Returns
|
|
115
|
+
-------
|
|
116
|
+
worker : `threading.Thread` or `multiprocessing.Process`
|
|
117
|
+
Process or thread. Will need to have its ``start`` method called
|
|
118
|
+
to actually begin.
|
|
119
|
+
"""
|
|
120
|
+
raise NotImplementedError()
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class ThreadingContext(WorkerContext):
|
|
124
|
+
"""An implementation of `WorkerContext` backed by the `threading`
|
|
125
|
+
module.
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
def make_queue(self) -> Queue[Any]:
|
|
129
|
+
return queue.Queue()
|
|
130
|
+
|
|
131
|
+
def make_event(self) -> Event:
|
|
132
|
+
return threading.Event()
|
|
133
|
+
|
|
134
|
+
def make_worker(
|
|
135
|
+
self, target: Callable[..., None], args: tuple[Any, ...], name: str | None = None
|
|
136
|
+
) -> Worker:
|
|
137
|
+
return threading.Thread(target=target, args=args, name=name)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class SpawnProcessContext(WorkerContext):
|
|
141
|
+
"""An implementation of `WorkerContext` backed by the `multiprocessing`
|
|
142
|
+
module, with new processes started by spawning.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
def __init__(self) -> None:
|
|
146
|
+
self._ctx = multiprocessing.get_context("spawn")
|
|
147
|
+
|
|
148
|
+
def make_queue(self) -> Queue[Any]:
|
|
149
|
+
return self._ctx.Queue()
|
|
150
|
+
|
|
151
|
+
def make_event(self) -> Event:
|
|
152
|
+
return self._ctx.Event()
|
|
153
|
+
|
|
154
|
+
def make_worker(
|
|
155
|
+
self, target: Callable[..., None], args: tuple[Any, ...], name: str | None = None
|
|
156
|
+
) -> Worker:
|
|
157
|
+
return self._ctx.Process(target=target, args=args, name=name)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _get_from_queue(q: Queue[_T], block: bool = False, timeout: float | None = None) -> _T | None:
|
|
161
|
+
"""Get an object from a queue and return `None` if it is empty.
|
|
162
|
+
|
|
163
|
+
Parameters
|
|
164
|
+
----------
|
|
165
|
+
q : `Queue`
|
|
166
|
+
Queue to get an object from.
|
|
167
|
+
block : `bool`
|
|
168
|
+
Whether to block until an object is available.
|
|
169
|
+
timeout : `float` or `None`, optional
|
|
170
|
+
Maximum number of seconds to wait while blocking.
|
|
171
|
+
|
|
172
|
+
Returns
|
|
173
|
+
-------
|
|
174
|
+
obj : `object` or `None`
|
|
175
|
+
Object from the queue, or `None` if it was empty.
|
|
176
|
+
"""
|
|
177
|
+
try:
|
|
178
|
+
return q.get(block=block, timeout=timeout)
|
|
179
|
+
except queue.Empty:
|
|
180
|
+
return None
|
|
181
|
+
|
|
61
182
|
|
|
62
183
|
class FatalWorkerError(BaseException):
|
|
63
184
|
"""An exception raised by communicators when one worker (including the
|
|
@@ -66,12 +187,6 @@ class FatalWorkerError(BaseException):
|
|
|
66
187
|
"""
|
|
67
188
|
|
|
68
189
|
|
|
69
|
-
class _WorkerCommunicationError(Exception):
|
|
70
|
-
"""An exception raised by communicators when a worker has died unexpectedly
|
|
71
|
-
or become unresponsive.
|
|
72
|
-
"""
|
|
73
|
-
|
|
74
|
-
|
|
75
190
|
class _Sentinel(enum.Enum):
|
|
76
191
|
"""Sentinel values used to indicate sequence points or worker shutdown
|
|
77
192
|
conditions.
|
|
@@ -98,6 +213,21 @@ class _Sentinel(enum.Enum):
|
|
|
98
213
|
quantum's provenance was written.
|
|
99
214
|
"""
|
|
100
215
|
|
|
216
|
+
SCANNER_DONE = enum.auto()
|
|
217
|
+
"""Sentinel sent from scanners to the supervisor to report that they are
|
|
218
|
+
done and shutting down.
|
|
219
|
+
"""
|
|
220
|
+
|
|
221
|
+
INGESTER_DONE = enum.auto()
|
|
222
|
+
"""Sentinel sent from the ingester to the supervisor to report that it is
|
|
223
|
+
done and shutting down.
|
|
224
|
+
"""
|
|
225
|
+
|
|
226
|
+
WRITER_DONE = enum.auto()
|
|
227
|
+
"""Sentinel sent from the writer to the supervisor to report that it is
|
|
228
|
+
done and shutting down.
|
|
229
|
+
"""
|
|
230
|
+
|
|
101
231
|
|
|
102
232
|
@dataclasses.dataclass
|
|
103
233
|
class _WorkerErrorMessage:
|
|
@@ -147,16 +277,6 @@ class _IngestReport:
|
|
|
147
277
|
"""
|
|
148
278
|
|
|
149
279
|
|
|
150
|
-
@dataclasses.dataclass
|
|
151
|
-
class _WorkerDone:
|
|
152
|
-
"""An internal struct passed from a worker to the supervisor when it has
|
|
153
|
-
successfully completed all work.
|
|
154
|
-
"""
|
|
155
|
-
|
|
156
|
-
name: str
|
|
157
|
-
"""Name of the worker reporting completion."""
|
|
158
|
-
|
|
159
|
-
|
|
160
280
|
@dataclasses.dataclass
|
|
161
281
|
class _ProgressLog:
|
|
162
282
|
"""A high-level log message sent from a worker to the supervisor.
|
|
@@ -183,22 +303,20 @@ class _CompressionDictionary:
|
|
|
183
303
|
"""
|
|
184
304
|
|
|
185
305
|
|
|
186
|
-
|
|
306
|
+
Report: TypeAlias = (
|
|
187
307
|
ScanReport
|
|
188
308
|
| _IngestReport
|
|
189
309
|
| _WorkerErrorMessage
|
|
190
310
|
| _ProgressLog
|
|
191
|
-
|
|
|
192
|
-
|
|
311
|
+
| Literal[
|
|
312
|
+
_Sentinel.WRITE_REPORT,
|
|
313
|
+
_Sentinel.SCANNER_DONE,
|
|
314
|
+
_Sentinel.INGESTER_DONE,
|
|
315
|
+
_Sentinel.WRITER_DONE,
|
|
316
|
+
]
|
|
193
317
|
)
|
|
194
318
|
|
|
195
319
|
|
|
196
|
-
def _disable_resources_parallelism() -> None:
|
|
197
|
-
os.environ["LSST_RESOURCES_NUM_WORKERS"] = "1"
|
|
198
|
-
os.environ.pop("LSST_RESOURCES_EXECUTOR", None)
|
|
199
|
-
os.environ["LSST_S3_USE_THREADS"] = "False"
|
|
200
|
-
|
|
201
|
-
|
|
202
320
|
class SupervisorCommunicator:
|
|
203
321
|
"""A helper object that lets the supervisor direct the other workers.
|
|
204
322
|
|
|
@@ -208,7 +326,7 @@ class SupervisorCommunicator:
|
|
|
208
326
|
LSST-customized logger.
|
|
209
327
|
n_scanners : `int`
|
|
210
328
|
Number of scanner workers.
|
|
211
|
-
|
|
329
|
+
context : `WorkerContext`
|
|
212
330
|
Abstraction over threading vs. multiprocessing.
|
|
213
331
|
config : `AggregatorConfig`
|
|
214
332
|
Configuration for the aggregator.
|
|
@@ -218,7 +336,7 @@ class SupervisorCommunicator:
|
|
|
218
336
|
self,
|
|
219
337
|
log: LsstLogAdapter,
|
|
220
338
|
n_scanners: int,
|
|
221
|
-
|
|
339
|
+
context: WorkerContext,
|
|
222
340
|
config: AggregatorConfig,
|
|
223
341
|
) -> None:
|
|
224
342
|
self.config = config
|
|
@@ -228,14 +346,14 @@ class SupervisorCommunicator:
|
|
|
228
346
|
# When complete, the supervisor sends n_scanners sentinals and each
|
|
229
347
|
# scanner is careful to only take one before it starts its shutdown.
|
|
230
348
|
self._scan_requests: Queue[_ScanRequest | Literal[_Sentinel.NO_MORE_SCAN_REQUESTS]] = (
|
|
231
|
-
|
|
349
|
+
context.make_queue()
|
|
232
350
|
)
|
|
233
351
|
# The scanners send ingest requests to the ingester on this queue. Each
|
|
234
352
|
# scanner sends one sentinal when it is done, and the ingester is
|
|
235
353
|
# careful to wait for n_scanners sentinals to arrive before it starts
|
|
236
354
|
# its shutdown.
|
|
237
355
|
self._ingest_requests: Queue[IngestRequest | Literal[_Sentinel.NO_MORE_INGEST_REQUESTS]] = (
|
|
238
|
-
|
|
356
|
+
context.make_queue()
|
|
239
357
|
)
|
|
240
358
|
# The scanners send write requests to the writer on this queue (which
|
|
241
359
|
# will be `None` if we're not writing). The supervisor also sends
|
|
@@ -243,24 +361,24 @@ class SupervisorCommunicator:
|
|
|
243
361
|
# scanner and the supervisor send one sentinal when done, and the
|
|
244
362
|
# writer waits for (n_scanners + 1) sentinals to arrive before it
|
|
245
363
|
# starts its shutdown.
|
|
246
|
-
self._write_requests: (
|
|
247
|
-
|
|
248
|
-
)
|
|
364
|
+
self._write_requests: Queue[WriteRequest | Literal[_Sentinel.NO_MORE_WRITE_REQUESTS]] | None = (
|
|
365
|
+
context.make_queue() if config.output_path is not None else None
|
|
366
|
+
)
|
|
249
367
|
# All other workers use this queue to send many different kinds of
|
|
250
368
|
# reports the supervisor. The supervisor waits for a _DONE sentinal
|
|
251
369
|
# from each worker before it finishes its shutdown.
|
|
252
|
-
self._reports: Queue[Report] =
|
|
370
|
+
self._reports: Queue[Report] = context.make_queue()
|
|
253
371
|
# The writer sends the compression dictionary to the scanners on this
|
|
254
372
|
# queue. It puts n_scanners copies on the queue, and each scanner only
|
|
255
373
|
# takes one. The compression_dict queue has no sentinal because it is
|
|
256
374
|
# only used at most once; the supervisor takes responsibility for
|
|
257
375
|
# clearing it out shutting down.
|
|
258
|
-
self._compression_dict: Queue[_CompressionDictionary] =
|
|
376
|
+
self._compression_dict: Queue[_CompressionDictionary] = context.make_queue()
|
|
259
377
|
# The supervisor sets this event when it receives an interrupt request
|
|
260
378
|
# from an exception in the main process (usually KeyboardInterrupt).
|
|
261
379
|
# Worker communicators check this in their polling loops and raise
|
|
262
380
|
# FatalWorkerError when they see it set.
|
|
263
|
-
self._cancel_event: Event =
|
|
381
|
+
self._cancel_event: Event = context.make_event()
|
|
264
382
|
# Track what state we are in closing down, so we can start at the right
|
|
265
383
|
# point if we're interrupted and __exit__ needs to clean up. Note that
|
|
266
384
|
# we can't rely on a non-exception __exit__ to do any shutdown work
|
|
@@ -269,77 +387,51 @@ class SupervisorCommunicator:
|
|
|
269
387
|
self._sent_no_more_scan_requests = False
|
|
270
388
|
self._sent_no_more_write_requests = False
|
|
271
389
|
self._n_scanners_done = 0
|
|
272
|
-
self.
|
|
390
|
+
self._ingester_done = False
|
|
391
|
+
self._writer_done = self._write_requests is None
|
|
273
392
|
|
|
274
|
-
def
|
|
275
|
-
# Orderly shutdown, including exceptions: let workers clear out the
|
|
276
|
-
# queues they're responsible for reading from.
|
|
393
|
+
def wait_for_workers_to_finish(self, already_failing: bool = False) -> None:
|
|
277
394
|
if not self._sent_no_more_scan_requests:
|
|
278
395
|
for _ in range(self.n_scanners):
|
|
279
|
-
self._scan_requests.put(_Sentinel.NO_MORE_SCAN_REQUESTS)
|
|
396
|
+
self._scan_requests.put(_Sentinel.NO_MORE_SCAN_REQUESTS, block=False)
|
|
280
397
|
self._sent_no_more_scan_requests = True
|
|
281
398
|
if not self._sent_no_more_write_requests and self._write_requests is not None:
|
|
282
|
-
self._write_requests.put(_Sentinel.NO_MORE_WRITE_REQUESTS)
|
|
399
|
+
self._write_requests.put(_Sentinel.NO_MORE_WRITE_REQUESTS, block=False)
|
|
283
400
|
self._sent_no_more_write_requests = True
|
|
284
|
-
while not
|
|
401
|
+
while not (self._ingester_done and self._writer_done and self._n_scanners_done == self.n_scanners):
|
|
285
402
|
match self._handle_progress_reports(
|
|
286
|
-
self.
|
|
403
|
+
self._reports.get(block=True), already_failing=already_failing
|
|
287
404
|
):
|
|
288
|
-
case None | ScanReport():
|
|
405
|
+
case None | ScanReport() | _IngestReport():
|
|
289
406
|
pass
|
|
290
|
-
case
|
|
291
|
-
self.
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
self.progress.scans.close()
|
|
407
|
+
case _Sentinel.INGESTER_DONE:
|
|
408
|
+
self._ingester_done = True
|
|
409
|
+
self.progress.quantum_ingests.close()
|
|
410
|
+
case _Sentinel.SCANNER_DONE:
|
|
411
|
+
self._n_scanners_done += 1
|
|
412
|
+
self.progress.scans.close()
|
|
413
|
+
case _Sentinel.WRITER_DONE:
|
|
414
|
+
self._writer_done = True
|
|
415
|
+
self.progress.writes.close()
|
|
300
416
|
case unexpected:
|
|
301
417
|
raise AssertionError(f"Unexpected message {unexpected!r} to supervisor.")
|
|
302
418
|
self.log.verbose(
|
|
303
|
-
"
|
|
304
|
-
|
|
419
|
+
"Blocking on reports queue: ingester_done=%s, writer_done=%s, n_scanners_done=%s.",
|
|
420
|
+
self._ingester_done,
|
|
421
|
+
self._writer_done,
|
|
422
|
+
self._n_scanners_done,
|
|
305
423
|
)
|
|
424
|
+
while _get_from_queue(self._compression_dict) is not None:
|
|
425
|
+
self.log.verbose("Flushing compression dict queue.")
|
|
306
426
|
self.log.verbose("Checking that all queues are empty.")
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
self._scan_requests.kill()
|
|
310
|
-
if self._ingest_requests.clear():
|
|
311
|
-
self.progress.log.warning("Ingest request queue was not empty at shutdown.")
|
|
312
|
-
self._ingest_requests.kill()
|
|
313
|
-
if self._write_requests is not None and self._write_requests.clear():
|
|
314
|
-
self.progress.log.warning("Write request queue was not empty at shutdown.")
|
|
315
|
-
self._write_requests.kill()
|
|
316
|
-
if self._reports.clear():
|
|
317
|
-
self.progress.log.warning("Reports queue was not empty at shutdown.")
|
|
318
|
-
self._reports.kill()
|
|
319
|
-
if self._compression_dict.clear():
|
|
320
|
-
self.progress.log.warning("Compression dictionary queue was not empty at shutdown.")
|
|
321
|
-
self._compression_dict.kill()
|
|
322
|
-
for worker in self.workers.values():
|
|
323
|
-
self.log.verbose("Waiting for %s to shut down.", worker.name)
|
|
324
|
-
worker.join()
|
|
325
|
-
|
|
326
|
-
def _terminate(self) -> None:
|
|
327
|
-
# Disorderly shutdown: we cannot assume any of the
|
|
328
|
-
# multiprocessing.Queue object work, and in fact they may hang
|
|
329
|
-
# if we try to do anything with them.
|
|
330
|
-
self._scan_requests.kill()
|
|
331
|
-
self._ingest_requests.kill()
|
|
427
|
+
self._expect_empty_queue(self._scan_requests)
|
|
428
|
+
self._expect_empty_queue(self._ingest_requests)
|
|
332
429
|
if self._write_requests is not None:
|
|
333
|
-
self._write_requests
|
|
334
|
-
self.
|
|
335
|
-
self.
|
|
336
|
-
for name, worker in self.workers.items():
|
|
337
|
-
if worker.is_alive():
|
|
338
|
-
self.progress.log.critical("Terminating worker %r.", name)
|
|
339
|
-
worker.kill()
|
|
430
|
+
self._expect_empty_queue(self._write_requests)
|
|
431
|
+
self._expect_empty_queue(self._reports)
|
|
432
|
+
self._expect_empty_queue(self._compression_dict)
|
|
340
433
|
|
|
341
434
|
def __enter__(self) -> Self:
|
|
342
|
-
_disable_resources_parallelism()
|
|
343
435
|
self.progress.__enter__()
|
|
344
436
|
# We make the low-level logger in __enter__ instead of __init__ only
|
|
345
437
|
# because that's the pattern used by true workers (where it matters).
|
|
@@ -353,23 +445,11 @@ class SupervisorCommunicator:
|
|
|
353
445
|
traceback: TracebackType | None,
|
|
354
446
|
) -> None:
|
|
355
447
|
if exc_type is not None:
|
|
356
|
-
self._cancel_event.set()
|
|
357
|
-
if exc_type is _WorkerCommunicationError:
|
|
358
|
-
self.progress.log.critical("Worker '%s' was terminated before it could finish.", exc_value)
|
|
359
|
-
self._terminate()
|
|
360
|
-
return None
|
|
361
448
|
if exc_type is not FatalWorkerError:
|
|
362
|
-
self.progress.log.critical("Caught
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
except _WorkerCommunicationError as err:
|
|
366
|
-
self.progress.log.critical(
|
|
367
|
-
"Worker '%s' was terminated before it could finish (after scanning).", err
|
|
368
|
-
)
|
|
369
|
-
self._terminate()
|
|
370
|
-
raise
|
|
449
|
+
self.progress.log.critical(f"Caught {exc_type.__name__}; attempting to shut down cleanly.")
|
|
450
|
+
self._cancel_event.set()
|
|
451
|
+
self.wait_for_workers_to_finish(already_failing=exc_type is not None)
|
|
371
452
|
self.progress.__exit__(exc_type, exc_value, traceback)
|
|
372
|
-
return None
|
|
373
453
|
|
|
374
454
|
def request_scan(self, quantum_id: uuid.UUID) -> None:
|
|
375
455
|
"""Send a request to the scanners to scan the given quantum.
|
|
@@ -379,19 +459,19 @@ class SupervisorCommunicator:
|
|
|
379
459
|
quantum_id : `uuid.UUID`
|
|
380
460
|
ID of the quantum to scan.
|
|
381
461
|
"""
|
|
382
|
-
self._scan_requests.put(_ScanRequest(quantum_id))
|
|
462
|
+
self._scan_requests.put(_ScanRequest(quantum_id), block=False)
|
|
383
463
|
|
|
384
|
-
def request_write(self, request:
|
|
464
|
+
def request_write(self, request: WriteRequest) -> None:
|
|
385
465
|
"""Send a request to the writer to write provenance for the given scan.
|
|
386
466
|
|
|
387
467
|
Parameters
|
|
388
468
|
----------
|
|
389
|
-
request : `
|
|
469
|
+
request : `WriteRequest`
|
|
390
470
|
Information from scanning a quantum (or knowing you don't have to,
|
|
391
471
|
in the case of blocked quanta).
|
|
392
472
|
"""
|
|
393
473
|
assert self._write_requests is not None, "Writer should not be used if writing is disabled."
|
|
394
|
-
self._write_requests.put(request)
|
|
474
|
+
self._write_requests.put(request, block=False)
|
|
395
475
|
|
|
396
476
|
def poll(self) -> Iterator[ScanReport]:
|
|
397
477
|
"""Poll for reports from workers while sending scan requests.
|
|
@@ -407,8 +487,9 @@ class SupervisorCommunicator:
|
|
|
407
487
|
it continues until the report queue is empty.
|
|
408
488
|
"""
|
|
409
489
|
block = True
|
|
410
|
-
|
|
411
|
-
|
|
490
|
+
msg = _get_from_queue(self._reports, block=block)
|
|
491
|
+
while msg is not None:
|
|
492
|
+
match self._handle_progress_reports(msg):
|
|
412
493
|
case ScanReport() as scan_report:
|
|
413
494
|
block = False
|
|
414
495
|
yield scan_report
|
|
@@ -416,40 +497,19 @@ class SupervisorCommunicator:
|
|
|
416
497
|
pass
|
|
417
498
|
case unexpected:
|
|
418
499
|
raise AssertionError(f"Unexpected message {unexpected!r} to supervisor.")
|
|
419
|
-
|
|
420
|
-
@overload
|
|
421
|
-
def _get_report(self, block: Literal[True]) -> Report: ...
|
|
422
|
-
|
|
423
|
-
@overload
|
|
424
|
-
def _get_report(self, block: bool) -> Report | None: ...
|
|
425
|
-
|
|
426
|
-
def _get_report(self, block: bool) -> Report | None:
|
|
427
|
-
"""Get a report from the reports queue, with timeout guards on
|
|
428
|
-
blocking requests.
|
|
429
|
-
|
|
430
|
-
This method may *return* WorkerCommunicatorError (rather than raise it)
|
|
431
|
-
when a serious error occurred communicating with a subprocess. This
|
|
432
|
-
is to avoid raising an exception in an __exit__ method (which calls
|
|
433
|
-
method).
|
|
434
|
-
"""
|
|
435
|
-
report = self._reports.get(block=block, timeout=self.config.worker_check_timeout)
|
|
436
|
-
while report is None and block:
|
|
437
|
-
# We hit the timeout; make sure all of the workers
|
|
438
|
-
# that should be alive actually are.
|
|
439
|
-
for name, worker in self.workers.items():
|
|
440
|
-
if not worker.successful and not worker.is_alive():
|
|
441
|
-
# Delete this worker from the list of workers so we don't
|
|
442
|
-
# hit this condition again when we try to handle the
|
|
443
|
-
# exception we raise.
|
|
444
|
-
raise _WorkerCommunicationError(name)
|
|
445
|
-
# If nothing is dead and we didn't hit the hang timeout, keep
|
|
446
|
-
# trying.
|
|
447
|
-
report = self._reports.get(block=block, timeout=self.config.worker_check_timeout)
|
|
448
|
-
return report
|
|
500
|
+
msg = _get_from_queue(self._reports, block=block)
|
|
449
501
|
|
|
450
502
|
def _handle_progress_reports(
|
|
451
503
|
self, report: Report, already_failing: bool = False
|
|
452
|
-
) ->
|
|
504
|
+
) -> (
|
|
505
|
+
ScanReport
|
|
506
|
+
| Literal[
|
|
507
|
+
_Sentinel.SCANNER_DONE,
|
|
508
|
+
_Sentinel.INGESTER_DONE,
|
|
509
|
+
_Sentinel.WRITER_DONE,
|
|
510
|
+
]
|
|
511
|
+
| None
|
|
512
|
+
):
|
|
453
513
|
"""Handle reports to the supervisor that can appear at any time, and
|
|
454
514
|
are typically just updates to the progress we've made.
|
|
455
515
|
|
|
@@ -479,9 +539,15 @@ class SupervisorCommunicator:
|
|
|
479
539
|
return report
|
|
480
540
|
return None
|
|
481
541
|
|
|
542
|
+
@staticmethod
|
|
543
|
+
def _expect_empty_queue(queue: Queue[Any]) -> None:
|
|
544
|
+
"""Assert that the given queue is empty."""
|
|
545
|
+
if (msg := _get_from_queue(queue, block=False, timeout=0)) is not None:
|
|
546
|
+
raise AssertionError(f"Queue is not empty; found {msg!r}.")
|
|
547
|
+
|
|
482
548
|
|
|
483
549
|
class WorkerCommunicator:
|
|
484
|
-
"""A base class for non-supervisor
|
|
550
|
+
"""A base class for non-supervisor workers.
|
|
485
551
|
|
|
486
552
|
Parameters
|
|
487
553
|
----------
|
|
@@ -493,8 +559,8 @@ class WorkerCommunicator:
|
|
|
493
559
|
Notes
|
|
494
560
|
-----
|
|
495
561
|
Each worker communicator is constructed in the main process and entered as
|
|
496
|
-
a context manager
|
|
497
|
-
|
|
562
|
+
a context manager on the actual worker process, so attributes that cannot
|
|
563
|
+
be pickled are constructed in ``__enter__`` instead of ``__init__``.
|
|
498
564
|
|
|
499
565
|
Worker communicators provide access to an `AggregatorConfig` and a logger
|
|
500
566
|
to their workers. As context managers, they handle exceptions and ensure
|
|
@@ -514,7 +580,6 @@ class WorkerCommunicator:
|
|
|
514
580
|
self._cancel_event = supervisor._cancel_event
|
|
515
581
|
|
|
516
582
|
def __enter__(self) -> Self:
|
|
517
|
-
_disable_resources_parallelism()
|
|
518
583
|
self.log = make_worker_log(self.name, self.config)
|
|
519
584
|
self.log.verbose("%s has PID %s (parent is %s).", self.name, os.getpid(), os.getppid())
|
|
520
585
|
self._exit_stack = ExitStack().__enter__()
|
|
@@ -547,7 +612,8 @@ class WorkerCommunicator:
|
|
|
547
612
|
_WorkerErrorMessage(
|
|
548
613
|
self.name,
|
|
549
614
|
"".join(format_exception(exc_type, exc_value, traceback)),
|
|
550
|
-
)
|
|
615
|
+
),
|
|
616
|
+
block=False,
|
|
551
617
|
)
|
|
552
618
|
self.log.debug("Error message sent to supervisor.")
|
|
553
619
|
else:
|
|
@@ -555,11 +621,6 @@ class WorkerCommunicator:
|
|
|
555
621
|
self._exit_stack.__exit__(exc_type, exc_value, traceback)
|
|
556
622
|
return True
|
|
557
623
|
|
|
558
|
-
@property
|
|
559
|
-
def exit_stack(self) -> ExitStack:
|
|
560
|
-
"""A `contextlib.ExitStack` tied to the communicator."""
|
|
561
|
-
return self._exit_stack
|
|
562
|
-
|
|
563
624
|
def log_progress(self, level: int, message: str) -> None:
|
|
564
625
|
"""Send a high-level log message to the supervisor.
|
|
565
626
|
|
|
@@ -570,7 +631,45 @@ class WorkerCommunicator:
|
|
|
570
631
|
message : `str`
|
|
571
632
|
Log message.
|
|
572
633
|
"""
|
|
573
|
-
self._reports.put(_ProgressLog(message=message, level=level))
|
|
634
|
+
self._reports.put(_ProgressLog(message=message, level=level), block=False)
|
|
635
|
+
|
|
636
|
+
def enter(
|
|
637
|
+
self,
|
|
638
|
+
cm: AbstractContextManager[_T],
|
|
639
|
+
on_close: str | None = None,
|
|
640
|
+
level: int = VERBOSE,
|
|
641
|
+
is_progress_log: bool = False,
|
|
642
|
+
) -> _T:
|
|
643
|
+
"""Enter a context manager that will be exited when the communicator's
|
|
644
|
+
context is exited.
|
|
645
|
+
|
|
646
|
+
Parameters
|
|
647
|
+
----------
|
|
648
|
+
cm : `contextlib.AbstractContextManager`
|
|
649
|
+
A context manager to enter.
|
|
650
|
+
on_close : `str`, optional
|
|
651
|
+
A log message to emit (on the worker's logger) just before the
|
|
652
|
+
given context manager is exited. This can be used to indicate
|
|
653
|
+
what's going on when an ``__exit__`` implementation has a lot of
|
|
654
|
+
work to do (e.g. moving a large file into a zip archive).
|
|
655
|
+
level : `int`, optional
|
|
656
|
+
Level for the ``on_close`` log message.
|
|
657
|
+
is_progress_log : `bool`, optional
|
|
658
|
+
If `True`, send the ``on_close`` message to the supervisor via
|
|
659
|
+
`log_progress` as well as the worker's logger.
|
|
660
|
+
"""
|
|
661
|
+
if on_close is None:
|
|
662
|
+
return self._exit_stack.enter_context(cm)
|
|
663
|
+
|
|
664
|
+
@contextmanager
|
|
665
|
+
def wrapper() -> Iterator[_T]:
|
|
666
|
+
with cm as result:
|
|
667
|
+
yield result
|
|
668
|
+
self.log.log(level, on_close)
|
|
669
|
+
if is_progress_log:
|
|
670
|
+
self.log_progress(level, on_close)
|
|
671
|
+
|
|
672
|
+
return self._exit_stack.enter_context(wrapper())
|
|
574
673
|
|
|
575
674
|
def check_for_cancel(self) -> None:
|
|
576
675
|
"""Check for a cancel signal from the supervisor and raise
|
|
@@ -592,7 +691,7 @@ class ScannerCommunicator(WorkerCommunicator):
|
|
|
592
691
|
"""
|
|
593
692
|
|
|
594
693
|
def __init__(self, supervisor: SupervisorCommunicator, scanner_id: int):
|
|
595
|
-
super().__init__(supervisor,
|
|
694
|
+
super().__init__(supervisor, f"scanner-{scanner_id:03d}")
|
|
596
695
|
self.scanner_id = scanner_id
|
|
597
696
|
self._scan_requests = supervisor._scan_requests
|
|
598
697
|
self._ingest_requests = supervisor._ingest_requests
|
|
@@ -601,10 +700,6 @@ class ScannerCommunicator(WorkerCommunicator):
|
|
|
601
700
|
self._got_no_more_scan_requests: bool = False
|
|
602
701
|
self._sent_no_more_ingest_requests: bool = False
|
|
603
702
|
|
|
604
|
-
@staticmethod
|
|
605
|
-
def get_worker_name(scanner_id: int) -> str:
|
|
606
|
-
return f"scanner-{scanner_id:03d}"
|
|
607
|
-
|
|
608
703
|
def report_scan(self, msg: ScanReport) -> None:
|
|
609
704
|
"""Report a completed scan to the supervisor.
|
|
610
705
|
|
|
@@ -613,7 +708,7 @@ class ScannerCommunicator(WorkerCommunicator):
|
|
|
613
708
|
msg : `ScanReport`
|
|
614
709
|
Report to send.
|
|
615
710
|
"""
|
|
616
|
-
self._reports.put(msg)
|
|
711
|
+
self._reports.put(msg, block=False)
|
|
617
712
|
|
|
618
713
|
def request_ingest(self, request: IngestRequest) -> None:
|
|
619
714
|
"""Ask the ingester to ingest a quantum's outputs.
|
|
@@ -629,20 +724,20 @@ class ScannerCommunicator(WorkerCommunicator):
|
|
|
629
724
|
as complete to the supervisor instead of sending it to the ingester.
|
|
630
725
|
"""
|
|
631
726
|
if request:
|
|
632
|
-
self._ingest_requests.put(request)
|
|
727
|
+
self._ingest_requests.put(request, block=False)
|
|
633
728
|
else:
|
|
634
|
-
self._reports.put(_IngestReport(1))
|
|
729
|
+
self._reports.put(_IngestReport(1), block=False)
|
|
635
730
|
|
|
636
|
-
def request_write(self, request:
|
|
731
|
+
def request_write(self, request: WriteRequest) -> None:
|
|
637
732
|
"""Ask the writer to write provenance for a quantum.
|
|
638
733
|
|
|
639
734
|
Parameters
|
|
640
735
|
----------
|
|
641
|
-
request : `
|
|
736
|
+
request : `WriteRequest`
|
|
642
737
|
Result of scanning a quantum.
|
|
643
738
|
"""
|
|
644
739
|
assert self._write_requests is not None, "Writer should not be used if writing is disabled."
|
|
645
|
-
self._write_requests.put(request)
|
|
740
|
+
self._write_requests.put(request, block=False)
|
|
646
741
|
|
|
647
742
|
def get_compression_dict(self) -> bytes | None:
|
|
648
743
|
"""Attempt to get the compression dict from the writer.
|
|
@@ -658,7 +753,7 @@ class ScannerCommunicator(WorkerCommunicator):
|
|
|
658
753
|
A scanner should only call this method before it actually has the
|
|
659
754
|
compression dict.
|
|
660
755
|
"""
|
|
661
|
-
if (cdict := self._compression_dict
|
|
756
|
+
if (cdict := _get_from_queue(self._compression_dict)) is not None:
|
|
662
757
|
return cdict.data
|
|
663
758
|
return None
|
|
664
759
|
|
|
@@ -677,7 +772,7 @@ class ScannerCommunicator(WorkerCommunicator):
|
|
|
677
772
|
"""
|
|
678
773
|
while True:
|
|
679
774
|
self.check_for_cancel()
|
|
680
|
-
scan_request = self._scan_requests
|
|
775
|
+
scan_request = _get_from_queue(self._scan_requests, block=True, timeout=self.config.worker_sleep)
|
|
681
776
|
if scan_request is _Sentinel.NO_MORE_SCAN_REQUESTS:
|
|
682
777
|
self._got_no_more_scan_requests = True
|
|
683
778
|
return
|
|
@@ -691,18 +786,20 @@ class ScannerCommunicator(WorkerCommunicator):
|
|
|
691
786
|
traceback: TracebackType | None,
|
|
692
787
|
) -> bool | None:
|
|
693
788
|
result = super().__exit__(exc_type, exc_value, traceback)
|
|
694
|
-
self._ingest_requests.put(_Sentinel.NO_MORE_INGEST_REQUESTS)
|
|
789
|
+
self._ingest_requests.put(_Sentinel.NO_MORE_INGEST_REQUESTS, block=False)
|
|
695
790
|
if self._write_requests is not None:
|
|
696
|
-
self._write_requests.put(_Sentinel.NO_MORE_WRITE_REQUESTS)
|
|
791
|
+
self._write_requests.put(_Sentinel.NO_MORE_WRITE_REQUESTS, block=False)
|
|
697
792
|
while not self._got_no_more_scan_requests:
|
|
793
|
+
self.log.debug("Clearing scan request queue (~%d remaining)", self._scan_requests.qsize())
|
|
698
794
|
if (
|
|
699
795
|
not self._got_no_more_scan_requests
|
|
700
|
-
and self._scan_requests.get(
|
|
796
|
+
and self._scan_requests.get() is _Sentinel.NO_MORE_SCAN_REQUESTS
|
|
701
797
|
):
|
|
702
798
|
self._got_no_more_scan_requests = True
|
|
703
|
-
# We let the
|
|
704
|
-
|
|
705
|
-
self.
|
|
799
|
+
# We let the supervisor clear out the compression dict queue, because
|
|
800
|
+
# a single scanner can't know if it ever got sent out or not.
|
|
801
|
+
self.log.verbose("Sending done sentinal.")
|
|
802
|
+
self._reports.put(_Sentinel.SCANNER_DONE, block=False)
|
|
706
803
|
return result
|
|
707
804
|
|
|
708
805
|
|
|
@@ -716,15 +813,11 @@ class IngesterCommunicator(WorkerCommunicator):
|
|
|
716
813
|
"""
|
|
717
814
|
|
|
718
815
|
def __init__(self, supervisor: SupervisorCommunicator):
|
|
719
|
-
super().__init__(supervisor,
|
|
816
|
+
super().__init__(supervisor, "ingester")
|
|
720
817
|
self.n_scanners = supervisor.n_scanners
|
|
721
818
|
self._ingest_requests = supervisor._ingest_requests
|
|
722
819
|
self._n_requesters_done = 0
|
|
723
820
|
|
|
724
|
-
@staticmethod
|
|
725
|
-
def get_worker_name() -> str:
|
|
726
|
-
return "ingester"
|
|
727
|
-
|
|
728
821
|
def __exit__(
|
|
729
822
|
self,
|
|
730
823
|
exc_type: type[BaseException] | None,
|
|
@@ -740,8 +833,8 @@ class IngesterCommunicator(WorkerCommunicator):
|
|
|
740
833
|
)
|
|
741
834
|
if self._ingest_requests.get(block=True) is _Sentinel.NO_MORE_INGEST_REQUESTS:
|
|
742
835
|
self._n_requesters_done += 1
|
|
743
|
-
self.log.verbose("Sending
|
|
744
|
-
self._reports.put(
|
|
836
|
+
self.log.verbose("Sending done sentinal.")
|
|
837
|
+
self._reports.put(_Sentinel.INGESTER_DONE, block=False)
|
|
745
838
|
return result
|
|
746
839
|
|
|
747
840
|
def report_ingest(self, n_producers: int) -> None:
|
|
@@ -752,7 +845,7 @@ class IngesterCommunicator(WorkerCommunicator):
|
|
|
752
845
|
n_producers : `int`
|
|
753
846
|
Number of producing quanta whose datasets were ingested.
|
|
754
847
|
"""
|
|
755
|
-
self._reports.put(_IngestReport(n_producers))
|
|
848
|
+
self._reports.put(_IngestReport(n_producers), block=False)
|
|
756
849
|
|
|
757
850
|
def poll(self) -> Iterator[IngestRequest]:
|
|
758
851
|
"""Poll for ingest requests from the scanner workers.
|
|
@@ -769,7 +862,7 @@ class IngesterCommunicator(WorkerCommunicator):
|
|
|
769
862
|
"""
|
|
770
863
|
while True:
|
|
771
864
|
self.check_for_cancel()
|
|
772
|
-
ingest_request = self._ingest_requests
|
|
865
|
+
ingest_request = _get_from_queue(self._ingest_requests, block=True, timeout=_TINY_TIMEOUT)
|
|
773
866
|
if ingest_request is _Sentinel.NO_MORE_INGEST_REQUESTS:
|
|
774
867
|
self._n_requesters_done += 1
|
|
775
868
|
if self._n_requesters_done == self.n_scanners:
|
|
@@ -791,7 +884,7 @@ class WriterCommunicator(WorkerCommunicator):
|
|
|
791
884
|
|
|
792
885
|
def __init__(self, supervisor: SupervisorCommunicator):
|
|
793
886
|
assert supervisor._write_requests is not None
|
|
794
|
-
super().__init__(supervisor,
|
|
887
|
+
super().__init__(supervisor, "writer")
|
|
795
888
|
self.n_scanners = supervisor.n_scanners
|
|
796
889
|
self._write_requests = supervisor._write_requests
|
|
797
890
|
self._compression_dict = supervisor._compression_dict
|
|
@@ -799,10 +892,6 @@ class WriterCommunicator(WorkerCommunicator):
|
|
|
799
892
|
self._n_requesters_done = 0
|
|
800
893
|
self._sent_compression_dict = False
|
|
801
894
|
|
|
802
|
-
@staticmethod
|
|
803
|
-
def get_worker_name() -> str:
|
|
804
|
-
return "writer"
|
|
805
|
-
|
|
806
895
|
def __exit__(
|
|
807
896
|
self,
|
|
808
897
|
exc_type: type[BaseException] | None,
|
|
@@ -820,20 +909,16 @@ class WriterCommunicator(WorkerCommunicator):
|
|
|
820
909
|
)
|
|
821
910
|
if self._write_requests.get(block=True) is _Sentinel.NO_MORE_WRITE_REQUESTS:
|
|
822
911
|
self._n_requesters_done += 1
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
else:
|
|
826
|
-
self.log.verbose("Compression dictionary queue was already empty.")
|
|
827
|
-
self.log.verbose("Sending completion message.")
|
|
828
|
-
self._reports.put(_WorkerDone(self.name))
|
|
912
|
+
self.log.verbose("Sending done sentinal.")
|
|
913
|
+
self._reports.put(_Sentinel.WRITER_DONE, block=False)
|
|
829
914
|
return result
|
|
830
915
|
|
|
831
|
-
def poll(self) -> Iterator[
|
|
916
|
+
def poll(self) -> Iterator[WriteRequest]:
|
|
832
917
|
"""Poll for writer requests from the scanner workers and supervisor.
|
|
833
918
|
|
|
834
919
|
Yields
|
|
835
920
|
------
|
|
836
|
-
request : `
|
|
921
|
+
request : `WriteRequest`
|
|
837
922
|
The result of a quantum scan.
|
|
838
923
|
|
|
839
924
|
Notes
|
|
@@ -843,7 +928,7 @@ class WriterCommunicator(WorkerCommunicator):
|
|
|
843
928
|
"""
|
|
844
929
|
while True:
|
|
845
930
|
self.check_for_cancel()
|
|
846
|
-
write_request = self._write_requests
|
|
931
|
+
write_request = _get_from_queue(self._write_requests, block=True, timeout=_TINY_TIMEOUT)
|
|
847
932
|
if write_request is _Sentinel.NO_MORE_WRITE_REQUESTS:
|
|
848
933
|
self._n_requesters_done += 1
|
|
849
934
|
if self._n_requesters_done == self._n_requesters:
|
|
@@ -863,16 +948,16 @@ class WriterCommunicator(WorkerCommunicator):
|
|
|
863
948
|
"""
|
|
864
949
|
self.log.debug("Sending compression dictionary.")
|
|
865
950
|
for _ in range(self.n_scanners):
|
|
866
|
-
self._compression_dict.put(_CompressionDictionary(cdict_data))
|
|
951
|
+
self._compression_dict.put(_CompressionDictionary(cdict_data), block=False)
|
|
867
952
|
self._sent_compression_dict = True
|
|
868
953
|
|
|
869
954
|
def report_write(self) -> None:
|
|
870
955
|
"""Report to the supervisor that provenance for a quantum was written
|
|
871
956
|
to the graph.
|
|
872
957
|
"""
|
|
873
|
-
self._reports.put(_Sentinel.WRITE_REPORT)
|
|
958
|
+
self._reports.put(_Sentinel.WRITE_REPORT, block=False)
|
|
874
959
|
|
|
875
|
-
def periodically_check_for_cancel
|
|
960
|
+
def periodically_check_for_cancel(self, iterable: Iterable[_T], n: int = 100) -> Iterator[_T]:
|
|
876
961
|
"""Iterate while checking for a cancellation signal every ``n``
|
|
877
962
|
iterations.
|
|
878
963
|
|