lsst-pipe-base 30.0.0rc2__py3-none-any.whl → 30.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_instrument.py +31 -20
- lsst/pipe/base/_quantumContext.py +3 -3
- lsst/pipe/base/_status.py +43 -10
- lsst/pipe/base/_task_metadata.py +2 -2
- lsst/pipe/base/all_dimensions_quantum_graph_builder.py +8 -3
- lsst/pipe/base/automatic_connection_constants.py +20 -1
- lsst/pipe/base/cli/cmd/__init__.py +18 -2
- lsst/pipe/base/cli/cmd/commands.py +149 -4
- lsst/pipe/base/connectionTypes.py +72 -160
- lsst/pipe/base/connections.py +6 -9
- lsst/pipe/base/execution_reports.py +0 -5
- lsst/pipe/base/graph/graph.py +11 -10
- lsst/pipe/base/graph/quantumNode.py +4 -4
- lsst/pipe/base/graph_walker.py +8 -10
- lsst/pipe/base/log_capture.py +40 -80
- lsst/pipe/base/log_on_close.py +76 -0
- lsst/pipe/base/mp_graph_executor.py +51 -15
- lsst/pipe/base/pipeline.py +5 -6
- lsst/pipe/base/pipelineIR.py +2 -8
- lsst/pipe/base/pipelineTask.py +5 -7
- lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
- lsst/pipe/base/pipeline_graph/_edges.py +32 -22
- lsst/pipe/base/pipeline_graph/_mapping_views.py +4 -7
- lsst/pipe/base/pipeline_graph/_pipeline_graph.py +14 -7
- lsst/pipe/base/pipeline_graph/expressions.py +2 -2
- lsst/pipe/base/pipeline_graph/io.py +7 -10
- lsst/pipe/base/pipeline_graph/visualization/_dot.py +13 -12
- lsst/pipe/base/pipeline_graph/visualization/_layout.py +16 -18
- lsst/pipe/base/pipeline_graph/visualization/_merge.py +4 -7
- lsst/pipe/base/pipeline_graph/visualization/_printer.py +10 -10
- lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +7 -0
- lsst/pipe/base/prerequisite_helpers.py +2 -1
- lsst/pipe/base/quantum_graph/_common.py +19 -20
- lsst/pipe/base/quantum_graph/_multiblock.py +37 -31
- lsst/pipe/base/quantum_graph/_predicted.py +113 -15
- lsst/pipe/base/quantum_graph/_provenance.py +1136 -45
- lsst/pipe/base/quantum_graph/aggregator/__init__.py +0 -1
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +204 -289
- lsst/pipe/base/quantum_graph/aggregator/_config.py +87 -9
- lsst/pipe/base/quantum_graph/aggregator/_ingester.py +13 -12
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +49 -235
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +6 -116
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +29 -39
- lsst/pipe/base/quantum_graph/aggregator/_workers.py +303 -0
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +34 -351
- lsst/pipe/base/quantum_graph/formatter.py +171 -0
- lsst/pipe/base/quantum_graph/ingest_graph.py +413 -0
- lsst/pipe/base/quantum_graph/visualization.py +5 -1
- lsst/pipe/base/quantum_graph_builder.py +33 -9
- lsst/pipe/base/quantum_graph_executor.py +116 -13
- lsst/pipe/base/quantum_graph_skeleton.py +31 -35
- lsst/pipe/base/quantum_provenance_graph.py +29 -12
- lsst/pipe/base/separable_pipeline_executor.py +19 -3
- lsst/pipe/base/single_quantum_executor.py +67 -42
- lsst/pipe/base/struct.py +4 -0
- lsst/pipe/base/testUtils.py +3 -3
- lsst/pipe/base/tests/mocks/_storage_class.py +2 -1
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/METADATA +3 -3
- lsst_pipe_base-30.0.1.dist-info/RECORD +129 -0
- {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/WHEEL +1 -1
- lsst_pipe_base-30.0.0rc2.dist-info/RECORD +0 -125
- {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/zip-safe +0 -0
|
@@ -31,154 +31,33 @@ __all__ = (
|
|
|
31
31
|
"FatalWorkerError",
|
|
32
32
|
"IngesterCommunicator",
|
|
33
33
|
"ScannerCommunicator",
|
|
34
|
-
"SpawnProcessContext",
|
|
35
34
|
"SupervisorCommunicator",
|
|
36
|
-
"ThreadingContext",
|
|
37
|
-
"WorkerContext",
|
|
38
35
|
)
|
|
39
36
|
|
|
40
37
|
import cProfile
|
|
41
38
|
import dataclasses
|
|
42
39
|
import enum
|
|
43
40
|
import logging
|
|
44
|
-
import multiprocessing.context
|
|
45
|
-
import multiprocessing.synchronize
|
|
46
41
|
import os
|
|
47
|
-
import queue
|
|
48
42
|
import signal
|
|
49
|
-
import threading
|
|
50
43
|
import time
|
|
51
44
|
import uuid
|
|
52
|
-
from abc import
|
|
53
|
-
from
|
|
54
|
-
from contextlib import AbstractContextManager, ExitStack, contextmanager
|
|
45
|
+
from collections.abc import Iterable, Iterator
|
|
46
|
+
from contextlib import ExitStack
|
|
55
47
|
from traceback import format_exception
|
|
56
48
|
from types import TracebackType
|
|
57
|
-
from typing import
|
|
49
|
+
from typing import Literal, Self, overload
|
|
58
50
|
|
|
59
|
-
from lsst.utils.logging import
|
|
51
|
+
from lsst.utils.logging import LsstLogAdapter
|
|
60
52
|
|
|
53
|
+
from .._provenance import ProvenanceQuantumScanData
|
|
61
54
|
from ._config import AggregatorConfig
|
|
62
55
|
from ._progress import ProgressManager, make_worker_log
|
|
63
|
-
from ._structs import IngestRequest, ScanReport
|
|
64
|
-
|
|
65
|
-
_T = TypeVar("_T")
|
|
56
|
+
from ._structs import IngestRequest, ScanReport
|
|
57
|
+
from ._workers import Event, Queue, Worker, WorkerFactory
|
|
66
58
|
|
|
67
59
|
_TINY_TIMEOUT = 0.01
|
|
68
60
|
|
|
69
|
-
# multiprocessing.Queue is a type according to the standard library type stubs,
|
|
70
|
-
# but it's really a function at runtime. But since the Python <= 3.11 type
|
|
71
|
-
# alias syntax uses the real runtime things we need to use strings, and hence
|
|
72
|
-
# we need to use Union. With Python 3.12's 'type' statement this gets cleaner.
|
|
73
|
-
Queue: TypeAlias = Union["queue.Queue[_T]", "multiprocessing.Queue[_T]"]
|
|
74
|
-
|
|
75
|
-
Event: TypeAlias = threading.Event | multiprocessing.synchronize.Event
|
|
76
|
-
|
|
77
|
-
Worker: TypeAlias = threading.Thread | multiprocessing.context.SpawnProcess
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
class WorkerContext(ABC):
|
|
81
|
-
"""A simple abstract interface that can be implemented by both threading
|
|
82
|
-
and multiprocessing.
|
|
83
|
-
"""
|
|
84
|
-
|
|
85
|
-
@abstractmethod
|
|
86
|
-
def make_queue(self) -> Queue[Any]:
|
|
87
|
-
"""Make an empty queue that can be used to pass objects between
|
|
88
|
-
workers in this context.
|
|
89
|
-
"""
|
|
90
|
-
raise NotImplementedError()
|
|
91
|
-
|
|
92
|
-
@abstractmethod
|
|
93
|
-
def make_event(self) -> Event:
|
|
94
|
-
"""Make an event that can be used to communicate a boolean state change
|
|
95
|
-
to workers in this context.
|
|
96
|
-
"""
|
|
97
|
-
raise NotImplementedError()
|
|
98
|
-
|
|
99
|
-
@abstractmethod
|
|
100
|
-
def make_worker(
|
|
101
|
-
self, target: Callable[..., None], args: tuple[Any, ...], name: str | None = None
|
|
102
|
-
) -> Worker:
|
|
103
|
-
"""Make a worker that runs the given callable.
|
|
104
|
-
|
|
105
|
-
Parameters
|
|
106
|
-
----------
|
|
107
|
-
target : `~collections.abc.Callable`
|
|
108
|
-
A callable to invoke on the worker.
|
|
109
|
-
args : `tuple`
|
|
110
|
-
Positional arguments to pass to the callable.
|
|
111
|
-
name : `str`, optional
|
|
112
|
-
Human-readable name for the worker.
|
|
113
|
-
|
|
114
|
-
Returns
|
|
115
|
-
-------
|
|
116
|
-
worker : `threading.Thread` or `multiprocessing.Process`
|
|
117
|
-
Process or thread. Will need to have its ``start`` method called
|
|
118
|
-
to actually begin.
|
|
119
|
-
"""
|
|
120
|
-
raise NotImplementedError()
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
class ThreadingContext(WorkerContext):
|
|
124
|
-
"""An implementation of `WorkerContext` backed by the `threading`
|
|
125
|
-
module.
|
|
126
|
-
"""
|
|
127
|
-
|
|
128
|
-
def make_queue(self) -> Queue[Any]:
|
|
129
|
-
return queue.Queue()
|
|
130
|
-
|
|
131
|
-
def make_event(self) -> Event:
|
|
132
|
-
return threading.Event()
|
|
133
|
-
|
|
134
|
-
def make_worker(
|
|
135
|
-
self, target: Callable[..., None], args: tuple[Any, ...], name: str | None = None
|
|
136
|
-
) -> Worker:
|
|
137
|
-
return threading.Thread(target=target, args=args, name=name)
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
class SpawnProcessContext(WorkerContext):
|
|
141
|
-
"""An implementation of `WorkerContext` backed by the `multiprocessing`
|
|
142
|
-
module, with new processes started by spawning.
|
|
143
|
-
"""
|
|
144
|
-
|
|
145
|
-
def __init__(self) -> None:
|
|
146
|
-
self._ctx = multiprocessing.get_context("spawn")
|
|
147
|
-
|
|
148
|
-
def make_queue(self) -> Queue[Any]:
|
|
149
|
-
return self._ctx.Queue()
|
|
150
|
-
|
|
151
|
-
def make_event(self) -> Event:
|
|
152
|
-
return self._ctx.Event()
|
|
153
|
-
|
|
154
|
-
def make_worker(
|
|
155
|
-
self, target: Callable[..., None], args: tuple[Any, ...], name: str | None = None
|
|
156
|
-
) -> Worker:
|
|
157
|
-
return self._ctx.Process(target=target, args=args, name=name)
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
def _get_from_queue(q: Queue[_T], block: bool = False, timeout: float | None = None) -> _T | None:
|
|
161
|
-
"""Get an object from a queue and return `None` if it is empty.
|
|
162
|
-
|
|
163
|
-
Parameters
|
|
164
|
-
----------
|
|
165
|
-
q : `Queue`
|
|
166
|
-
Queue to get an object from.
|
|
167
|
-
block : `bool`
|
|
168
|
-
Whether to block until an object is available.
|
|
169
|
-
timeout : `float` or `None`, optional
|
|
170
|
-
Maximum number of seconds to wait while blocking.
|
|
171
|
-
|
|
172
|
-
Returns
|
|
173
|
-
-------
|
|
174
|
-
obj : `object` or `None`
|
|
175
|
-
Object from the queue, or `None` if it was empty.
|
|
176
|
-
"""
|
|
177
|
-
try:
|
|
178
|
-
return q.get(block=block, timeout=timeout)
|
|
179
|
-
except queue.Empty:
|
|
180
|
-
return None
|
|
181
|
-
|
|
182
61
|
|
|
183
62
|
class FatalWorkerError(BaseException):
|
|
184
63
|
"""An exception raised by communicators when one worker (including the
|
|
@@ -187,6 +66,12 @@ class FatalWorkerError(BaseException):
|
|
|
187
66
|
"""
|
|
188
67
|
|
|
189
68
|
|
|
69
|
+
class _WorkerCommunicationError(Exception):
|
|
70
|
+
"""An exception raised by communicators when a worker has died unexpectedly
|
|
71
|
+
or become unresponsive.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
|
|
190
75
|
class _Sentinel(enum.Enum):
|
|
191
76
|
"""Sentinel values used to indicate sequence points or worker shutdown
|
|
192
77
|
conditions.
|
|
@@ -213,21 +98,6 @@ class _Sentinel(enum.Enum):
|
|
|
213
98
|
quantum's provenance was written.
|
|
214
99
|
"""
|
|
215
100
|
|
|
216
|
-
SCANNER_DONE = enum.auto()
|
|
217
|
-
"""Sentinel sent from scanners to the supervisor to report that they are
|
|
218
|
-
done and shutting down.
|
|
219
|
-
"""
|
|
220
|
-
|
|
221
|
-
INGESTER_DONE = enum.auto()
|
|
222
|
-
"""Sentinel sent from the ingester to the supervisor to report that it is
|
|
223
|
-
done and shutting down.
|
|
224
|
-
"""
|
|
225
|
-
|
|
226
|
-
WRITER_DONE = enum.auto()
|
|
227
|
-
"""Sentinel sent from the writer to the supervisor to report that it is
|
|
228
|
-
done and shutting down.
|
|
229
|
-
"""
|
|
230
|
-
|
|
231
101
|
|
|
232
102
|
@dataclasses.dataclass
|
|
233
103
|
class _WorkerErrorMessage:
|
|
@@ -277,6 +147,16 @@ class _IngestReport:
|
|
|
277
147
|
"""
|
|
278
148
|
|
|
279
149
|
|
|
150
|
+
@dataclasses.dataclass
|
|
151
|
+
class _WorkerDone:
|
|
152
|
+
"""An internal struct passed from a worker to the supervisor when it has
|
|
153
|
+
successfully completed all work.
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
name: str
|
|
157
|
+
"""Name of the worker reporting completion."""
|
|
158
|
+
|
|
159
|
+
|
|
280
160
|
@dataclasses.dataclass
|
|
281
161
|
class _ProgressLog:
|
|
282
162
|
"""A high-level log message sent from a worker to the supervisor.
|
|
@@ -303,20 +183,22 @@ class _CompressionDictionary:
|
|
|
303
183
|
"""
|
|
304
184
|
|
|
305
185
|
|
|
306
|
-
Report
|
|
186
|
+
type Report = (
|
|
307
187
|
ScanReport
|
|
308
188
|
| _IngestReport
|
|
309
189
|
| _WorkerErrorMessage
|
|
310
190
|
| _ProgressLog
|
|
311
|
-
|
|
|
312
|
-
|
|
313
|
-
_Sentinel.SCANNER_DONE,
|
|
314
|
-
_Sentinel.INGESTER_DONE,
|
|
315
|
-
_Sentinel.WRITER_DONE,
|
|
316
|
-
]
|
|
191
|
+
| _WorkerDone
|
|
192
|
+
| Literal[_Sentinel.WRITE_REPORT]
|
|
317
193
|
)
|
|
318
194
|
|
|
319
195
|
|
|
196
|
+
def _disable_resources_parallelism() -> None:
|
|
197
|
+
os.environ["LSST_RESOURCES_NUM_WORKERS"] = "1"
|
|
198
|
+
os.environ.pop("LSST_RESOURCES_EXECUTOR", None)
|
|
199
|
+
os.environ["LSST_S3_USE_THREADS"] = "False"
|
|
200
|
+
|
|
201
|
+
|
|
320
202
|
class SupervisorCommunicator:
|
|
321
203
|
"""A helper object that lets the supervisor direct the other workers.
|
|
322
204
|
|
|
@@ -326,7 +208,7 @@ class SupervisorCommunicator:
|
|
|
326
208
|
LSST-customized logger.
|
|
327
209
|
n_scanners : `int`
|
|
328
210
|
Number of scanner workers.
|
|
329
|
-
|
|
211
|
+
worker_factory : `WorkerFactory`
|
|
330
212
|
Abstraction over threading vs. multiprocessing.
|
|
331
213
|
config : `AggregatorConfig`
|
|
332
214
|
Configuration for the aggregator.
|
|
@@ -336,7 +218,7 @@ class SupervisorCommunicator:
|
|
|
336
218
|
self,
|
|
337
219
|
log: LsstLogAdapter,
|
|
338
220
|
n_scanners: int,
|
|
339
|
-
|
|
221
|
+
worker_factory: WorkerFactory,
|
|
340
222
|
config: AggregatorConfig,
|
|
341
223
|
) -> None:
|
|
342
224
|
self.config = config
|
|
@@ -346,14 +228,14 @@ class SupervisorCommunicator:
|
|
|
346
228
|
# When complete, the supervisor sends n_scanners sentinals and each
|
|
347
229
|
# scanner is careful to only take one before it starts its shutdown.
|
|
348
230
|
self._scan_requests: Queue[_ScanRequest | Literal[_Sentinel.NO_MORE_SCAN_REQUESTS]] = (
|
|
349
|
-
|
|
231
|
+
worker_factory.make_queue()
|
|
350
232
|
)
|
|
351
233
|
# The scanners send ingest requests to the ingester on this queue. Each
|
|
352
234
|
# scanner sends one sentinal when it is done, and the ingester is
|
|
353
235
|
# careful to wait for n_scanners sentinals to arrive before it starts
|
|
354
236
|
# its shutdown.
|
|
355
237
|
self._ingest_requests: Queue[IngestRequest | Literal[_Sentinel.NO_MORE_INGEST_REQUESTS]] = (
|
|
356
|
-
|
|
238
|
+
worker_factory.make_queue()
|
|
357
239
|
)
|
|
358
240
|
# The scanners send write requests to the writer on this queue (which
|
|
359
241
|
# will be `None` if we're not writing). The supervisor also sends
|
|
@@ -361,24 +243,24 @@ class SupervisorCommunicator:
|
|
|
361
243
|
# scanner and the supervisor send one sentinal when done, and the
|
|
362
244
|
# writer waits for (n_scanners + 1) sentinals to arrive before it
|
|
363
245
|
# starts its shutdown.
|
|
364
|
-
self._write_requests:
|
|
365
|
-
|
|
366
|
-
)
|
|
246
|
+
self._write_requests: (
|
|
247
|
+
Queue[ProvenanceQuantumScanData | Literal[_Sentinel.NO_MORE_WRITE_REQUESTS]] | None
|
|
248
|
+
) = worker_factory.make_queue() if config.is_writing_provenance else None
|
|
367
249
|
# All other workers use this queue to send many different kinds of
|
|
368
250
|
# reports the supervisor. The supervisor waits for a _DONE sentinal
|
|
369
251
|
# from each worker before it finishes its shutdown.
|
|
370
|
-
self._reports: Queue[Report] =
|
|
252
|
+
self._reports: Queue[Report] = worker_factory.make_queue()
|
|
371
253
|
# The writer sends the compression dictionary to the scanners on this
|
|
372
254
|
# queue. It puts n_scanners copies on the queue, and each scanner only
|
|
373
255
|
# takes one. The compression_dict queue has no sentinal because it is
|
|
374
256
|
# only used at most once; the supervisor takes responsibility for
|
|
375
257
|
# clearing it out shutting down.
|
|
376
|
-
self._compression_dict: Queue[_CompressionDictionary] =
|
|
258
|
+
self._compression_dict: Queue[_CompressionDictionary] = worker_factory.make_queue()
|
|
377
259
|
# The supervisor sets this event when it receives an interrupt request
|
|
378
260
|
# from an exception in the main process (usually KeyboardInterrupt).
|
|
379
261
|
# Worker communicators check this in their polling loops and raise
|
|
380
262
|
# FatalWorkerError when they see it set.
|
|
381
|
-
self._cancel_event: Event =
|
|
263
|
+
self._cancel_event: Event = worker_factory.make_event()
|
|
382
264
|
# Track what state we are in closing down, so we can start at the right
|
|
383
265
|
# point if we're interrupted and __exit__ needs to clean up. Note that
|
|
384
266
|
# we can't rely on a non-exception __exit__ to do any shutdown work
|
|
@@ -387,51 +269,77 @@ class SupervisorCommunicator:
|
|
|
387
269
|
self._sent_no_more_scan_requests = False
|
|
388
270
|
self._sent_no_more_write_requests = False
|
|
389
271
|
self._n_scanners_done = 0
|
|
390
|
-
self.
|
|
391
|
-
self._writer_done = self._write_requests is None
|
|
272
|
+
self.workers: dict[str, Worker] = {}
|
|
392
273
|
|
|
393
|
-
def
|
|
274
|
+
def _wait_for_workers_to_finish(self, already_failing: bool = False) -> None:
|
|
275
|
+
# Orderly shutdown, including exceptions: let workers clear out the
|
|
276
|
+
# queues they're responsible for reading from.
|
|
394
277
|
if not self._sent_no_more_scan_requests:
|
|
395
278
|
for _ in range(self.n_scanners):
|
|
396
|
-
self._scan_requests.put(_Sentinel.NO_MORE_SCAN_REQUESTS
|
|
279
|
+
self._scan_requests.put(_Sentinel.NO_MORE_SCAN_REQUESTS)
|
|
397
280
|
self._sent_no_more_scan_requests = True
|
|
398
281
|
if not self._sent_no_more_write_requests and self._write_requests is not None:
|
|
399
|
-
self._write_requests.put(_Sentinel.NO_MORE_WRITE_REQUESTS
|
|
282
|
+
self._write_requests.put(_Sentinel.NO_MORE_WRITE_REQUESTS)
|
|
400
283
|
self._sent_no_more_write_requests = True
|
|
401
|
-
while not (
|
|
284
|
+
while not all(w.successful for w in self.workers.values()):
|
|
402
285
|
match self._handle_progress_reports(
|
|
403
|
-
self.
|
|
286
|
+
self._get_report(block=True), already_failing=already_failing
|
|
404
287
|
):
|
|
405
|
-
case None | ScanReport()
|
|
288
|
+
case None | ScanReport():
|
|
406
289
|
pass
|
|
407
|
-
case
|
|
408
|
-
self.
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
290
|
+
case _WorkerDone(name=worker_name):
|
|
291
|
+
self.workers[worker_name].successful = True
|
|
292
|
+
if worker_name == IngesterCommunicator.get_worker_name():
|
|
293
|
+
self.progress.quantum_ingests.close()
|
|
294
|
+
elif worker_name == WriterCommunicator.get_worker_name():
|
|
295
|
+
self.progress.writes.close()
|
|
296
|
+
else:
|
|
297
|
+
self._n_scanners_done += 1
|
|
298
|
+
if self._n_scanners_done == self.n_scanners:
|
|
299
|
+
self.progress.scans.close()
|
|
416
300
|
case unexpected:
|
|
417
301
|
raise AssertionError(f"Unexpected message {unexpected!r} to supervisor.")
|
|
418
302
|
self.log.verbose(
|
|
419
|
-
"
|
|
420
|
-
self.
|
|
421
|
-
self._writer_done,
|
|
422
|
-
self._n_scanners_done,
|
|
303
|
+
"Waiting for workers [%s] to report successful completion.",
|
|
304
|
+
", ".join(w.name for w in self.workers.values() if not w.successful),
|
|
423
305
|
)
|
|
424
|
-
while _get_from_queue(self._compression_dict) is not None:
|
|
425
|
-
self.log.verbose("Flushing compression dict queue.")
|
|
426
306
|
self.log.verbose("Checking that all queues are empty.")
|
|
427
|
-
self.
|
|
428
|
-
|
|
307
|
+
if self._scan_requests.clear():
|
|
308
|
+
self.progress.log.warning("Scan request queue was not empty at shutdown.")
|
|
309
|
+
self._scan_requests.kill()
|
|
310
|
+
if self._ingest_requests.clear():
|
|
311
|
+
self.progress.log.warning("Ingest request queue was not empty at shutdown.")
|
|
312
|
+
self._ingest_requests.kill()
|
|
313
|
+
if self._write_requests is not None and self._write_requests.clear():
|
|
314
|
+
self.progress.log.warning("Write request queue was not empty at shutdown.")
|
|
315
|
+
self._write_requests.kill()
|
|
316
|
+
if self._reports.clear():
|
|
317
|
+
self.progress.log.warning("Reports queue was not empty at shutdown.")
|
|
318
|
+
self._reports.kill()
|
|
319
|
+
if self._compression_dict.clear():
|
|
320
|
+
self.progress.log.warning("Compression dictionary queue was not empty at shutdown.")
|
|
321
|
+
self._compression_dict.kill()
|
|
322
|
+
for worker in self.workers.values():
|
|
323
|
+
self.log.verbose("Waiting for %s to shut down.", worker.name)
|
|
324
|
+
worker.join()
|
|
325
|
+
|
|
326
|
+
def _terminate(self) -> None:
|
|
327
|
+
# Disorderly shutdown: we cannot assume any of the
|
|
328
|
+
# multiprocessing.Queue object work, and in fact they may hang
|
|
329
|
+
# if we try to do anything with them.
|
|
330
|
+
self._scan_requests.kill()
|
|
331
|
+
self._ingest_requests.kill()
|
|
429
332
|
if self._write_requests is not None:
|
|
430
|
-
self.
|
|
431
|
-
self.
|
|
432
|
-
self.
|
|
333
|
+
self._write_requests.kill()
|
|
334
|
+
self._compression_dict.kill()
|
|
335
|
+
self._reports.kill()
|
|
336
|
+
for name, worker in self.workers.items():
|
|
337
|
+
if worker.is_alive():
|
|
338
|
+
self.progress.log.critical("Terminating worker %r.", name)
|
|
339
|
+
worker.kill()
|
|
433
340
|
|
|
434
341
|
def __enter__(self) -> Self:
|
|
342
|
+
_disable_resources_parallelism()
|
|
435
343
|
self.progress.__enter__()
|
|
436
344
|
# We make the low-level logger in __enter__ instead of __init__ only
|
|
437
345
|
# because that's the pattern used by true workers (where it matters).
|
|
@@ -445,11 +353,23 @@ class SupervisorCommunicator:
|
|
|
445
353
|
traceback: TracebackType | None,
|
|
446
354
|
) -> None:
|
|
447
355
|
if exc_type is not None:
|
|
448
|
-
if exc_type is not FatalWorkerError:
|
|
449
|
-
self.progress.log.critical(f"Caught {exc_type.__name__}; attempting to shut down cleanly.")
|
|
450
356
|
self._cancel_event.set()
|
|
451
|
-
|
|
357
|
+
if exc_type is _WorkerCommunicationError:
|
|
358
|
+
self.progress.log.critical("Worker '%s' was terminated before it could finish.", exc_value)
|
|
359
|
+
self._terminate()
|
|
360
|
+
return None
|
|
361
|
+
if exc_type is not FatalWorkerError:
|
|
362
|
+
self.progress.log.critical("Caught %s; attempting to shut down cleanly.", exc_type)
|
|
363
|
+
try:
|
|
364
|
+
self._wait_for_workers_to_finish(already_failing=exc_type is not None)
|
|
365
|
+
except _WorkerCommunicationError as err:
|
|
366
|
+
self.progress.log.critical(
|
|
367
|
+
"Worker '%s' was terminated before it could finish (after scanning).", err
|
|
368
|
+
)
|
|
369
|
+
self._terminate()
|
|
370
|
+
raise
|
|
452
371
|
self.progress.__exit__(exc_type, exc_value, traceback)
|
|
372
|
+
return None
|
|
453
373
|
|
|
454
374
|
def request_scan(self, quantum_id: uuid.UUID) -> None:
|
|
455
375
|
"""Send a request to the scanners to scan the given quantum.
|
|
@@ -459,19 +379,19 @@ class SupervisorCommunicator:
|
|
|
459
379
|
quantum_id : `uuid.UUID`
|
|
460
380
|
ID of the quantum to scan.
|
|
461
381
|
"""
|
|
462
|
-
self._scan_requests.put(_ScanRequest(quantum_id)
|
|
382
|
+
self._scan_requests.put(_ScanRequest(quantum_id))
|
|
463
383
|
|
|
464
|
-
def request_write(self, request:
|
|
384
|
+
def request_write(self, request: ProvenanceQuantumScanData) -> None:
|
|
465
385
|
"""Send a request to the writer to write provenance for the given scan.
|
|
466
386
|
|
|
467
387
|
Parameters
|
|
468
388
|
----------
|
|
469
|
-
request : `
|
|
389
|
+
request : `ProvenanceQuantumScanData`
|
|
470
390
|
Information from scanning a quantum (or knowing you don't have to,
|
|
471
391
|
in the case of blocked quanta).
|
|
472
392
|
"""
|
|
473
393
|
assert self._write_requests is not None, "Writer should not be used if writing is disabled."
|
|
474
|
-
self._write_requests.put(request
|
|
394
|
+
self._write_requests.put(request)
|
|
475
395
|
|
|
476
396
|
def poll(self) -> Iterator[ScanReport]:
|
|
477
397
|
"""Poll for reports from workers while sending scan requests.
|
|
@@ -487,9 +407,8 @@ class SupervisorCommunicator:
|
|
|
487
407
|
it continues until the report queue is empty.
|
|
488
408
|
"""
|
|
489
409
|
block = True
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
match self._handle_progress_reports(msg):
|
|
410
|
+
while report := self._get_report(block=block):
|
|
411
|
+
match self._handle_progress_reports(report):
|
|
493
412
|
case ScanReport() as scan_report:
|
|
494
413
|
block = False
|
|
495
414
|
yield scan_report
|
|
@@ -497,19 +416,40 @@ class SupervisorCommunicator:
|
|
|
497
416
|
pass
|
|
498
417
|
case unexpected:
|
|
499
418
|
raise AssertionError(f"Unexpected message {unexpected!r} to supervisor.")
|
|
500
|
-
|
|
419
|
+
|
|
420
|
+
@overload
|
|
421
|
+
def _get_report(self, block: Literal[True]) -> Report: ...
|
|
422
|
+
|
|
423
|
+
@overload
|
|
424
|
+
def _get_report(self, block: bool) -> Report | None: ...
|
|
425
|
+
|
|
426
|
+
def _get_report(self, block: bool) -> Report | None:
|
|
427
|
+
"""Get a report from the reports queue, with timeout guards on
|
|
428
|
+
blocking requests.
|
|
429
|
+
|
|
430
|
+
This method may *return* WorkerCommunicatorError (rather than raise it)
|
|
431
|
+
when a serious error occurred communicating with a subprocess. This
|
|
432
|
+
is to avoid raising an exception in an __exit__ method (which calls
|
|
433
|
+
method).
|
|
434
|
+
"""
|
|
435
|
+
report = self._reports.get(block=block, timeout=self.config.worker_check_timeout)
|
|
436
|
+
while report is None and block:
|
|
437
|
+
# We hit the timeout; make sure all of the workers
|
|
438
|
+
# that should be alive actually are.
|
|
439
|
+
for name, worker in self.workers.items():
|
|
440
|
+
if not worker.successful and not worker.is_alive():
|
|
441
|
+
# Delete this worker from the list of workers so we don't
|
|
442
|
+
# hit this condition again when we try to handle the
|
|
443
|
+
# exception we raise.
|
|
444
|
+
raise _WorkerCommunicationError(name)
|
|
445
|
+
# If nothing is dead and we didn't hit the hang timeout, keep
|
|
446
|
+
# trying.
|
|
447
|
+
report = self._reports.get(block=block, timeout=self.config.worker_check_timeout)
|
|
448
|
+
return report
|
|
501
449
|
|
|
502
450
|
def _handle_progress_reports(
|
|
503
451
|
self, report: Report, already_failing: bool = False
|
|
504
|
-
) ->
|
|
505
|
-
ScanReport
|
|
506
|
-
| Literal[
|
|
507
|
-
_Sentinel.SCANNER_DONE,
|
|
508
|
-
_Sentinel.INGESTER_DONE,
|
|
509
|
-
_Sentinel.WRITER_DONE,
|
|
510
|
-
]
|
|
511
|
-
| None
|
|
512
|
-
):
|
|
452
|
+
) -> ScanReport | _WorkerDone | None:
|
|
513
453
|
"""Handle reports to the supervisor that can appear at any time, and
|
|
514
454
|
are typically just updates to the progress we've made.
|
|
515
455
|
|
|
@@ -539,15 +479,9 @@ class SupervisorCommunicator:
|
|
|
539
479
|
return report
|
|
540
480
|
return None
|
|
541
481
|
|
|
542
|
-
@staticmethod
|
|
543
|
-
def _expect_empty_queue(queue: Queue[Any]) -> None:
|
|
544
|
-
"""Assert that the given queue is empty."""
|
|
545
|
-
if (msg := _get_from_queue(queue, block=False, timeout=0)) is not None:
|
|
546
|
-
raise AssertionError(f"Queue is not empty; found {msg!r}.")
|
|
547
|
-
|
|
548
482
|
|
|
549
483
|
class WorkerCommunicator:
|
|
550
|
-
"""A base class for non-supervisor
|
|
484
|
+
"""A base class for non-supervisor worker communicators.
|
|
551
485
|
|
|
552
486
|
Parameters
|
|
553
487
|
----------
|
|
@@ -559,8 +493,8 @@ class WorkerCommunicator:
|
|
|
559
493
|
Notes
|
|
560
494
|
-----
|
|
561
495
|
Each worker communicator is constructed in the main process and entered as
|
|
562
|
-
a context manager on the actual worker process, so attributes that
|
|
563
|
-
be pickled are constructed in ``__enter__`` instead of ``__init__``.
|
|
496
|
+
a context manager *only* on the actual worker process, so attributes that
|
|
497
|
+
cannot be pickled are constructed in ``__enter__`` instead of ``__init__``.
|
|
564
498
|
|
|
565
499
|
Worker communicators provide access to an `AggregatorConfig` and a logger
|
|
566
500
|
to their workers. As context managers, they handle exceptions and ensure
|
|
@@ -580,6 +514,7 @@ class WorkerCommunicator:
|
|
|
580
514
|
self._cancel_event = supervisor._cancel_event
|
|
581
515
|
|
|
582
516
|
def __enter__(self) -> Self:
|
|
517
|
+
_disable_resources_parallelism()
|
|
583
518
|
self.log = make_worker_log(self.name, self.config)
|
|
584
519
|
self.log.verbose("%s has PID %s (parent is %s).", self.name, os.getpid(), os.getppid())
|
|
585
520
|
self._exit_stack = ExitStack().__enter__()
|
|
@@ -612,8 +547,7 @@ class WorkerCommunicator:
|
|
|
612
547
|
_WorkerErrorMessage(
|
|
613
548
|
self.name,
|
|
614
549
|
"".join(format_exception(exc_type, exc_value, traceback)),
|
|
615
|
-
)
|
|
616
|
-
block=False,
|
|
550
|
+
)
|
|
617
551
|
)
|
|
618
552
|
self.log.debug("Error message sent to supervisor.")
|
|
619
553
|
else:
|
|
@@ -621,6 +555,11 @@ class WorkerCommunicator:
|
|
|
621
555
|
self._exit_stack.__exit__(exc_type, exc_value, traceback)
|
|
622
556
|
return True
|
|
623
557
|
|
|
558
|
+
@property
|
|
559
|
+
def exit_stack(self) -> ExitStack:
|
|
560
|
+
"""A `contextlib.ExitStack` tied to the communicator."""
|
|
561
|
+
return self._exit_stack
|
|
562
|
+
|
|
624
563
|
def log_progress(self, level: int, message: str) -> None:
|
|
625
564
|
"""Send a high-level log message to the supervisor.
|
|
626
565
|
|
|
@@ -631,45 +570,7 @@ class WorkerCommunicator:
|
|
|
631
570
|
message : `str`
|
|
632
571
|
Log message.
|
|
633
572
|
"""
|
|
634
|
-
self._reports.put(_ProgressLog(message=message, level=level)
|
|
635
|
-
|
|
636
|
-
def enter(
|
|
637
|
-
self,
|
|
638
|
-
cm: AbstractContextManager[_T],
|
|
639
|
-
on_close: str | None = None,
|
|
640
|
-
level: int = VERBOSE,
|
|
641
|
-
is_progress_log: bool = False,
|
|
642
|
-
) -> _T:
|
|
643
|
-
"""Enter a context manager that will be exited when the communicator's
|
|
644
|
-
context is exited.
|
|
645
|
-
|
|
646
|
-
Parameters
|
|
647
|
-
----------
|
|
648
|
-
cm : `contextlib.AbstractContextManager`
|
|
649
|
-
A context manager to enter.
|
|
650
|
-
on_close : `str`, optional
|
|
651
|
-
A log message to emit (on the worker's logger) just before the
|
|
652
|
-
given context manager is exited. This can be used to indicate
|
|
653
|
-
what's going on when an ``__exit__`` implementation has a lot of
|
|
654
|
-
work to do (e.g. moving a large file into a zip archive).
|
|
655
|
-
level : `int`, optional
|
|
656
|
-
Level for the ``on_close`` log message.
|
|
657
|
-
is_progress_log : `bool`, optional
|
|
658
|
-
If `True`, send the ``on_close`` message to the supervisor via
|
|
659
|
-
`log_progress` as well as the worker's logger.
|
|
660
|
-
"""
|
|
661
|
-
if on_close is None:
|
|
662
|
-
return self._exit_stack.enter_context(cm)
|
|
663
|
-
|
|
664
|
-
@contextmanager
|
|
665
|
-
def wrapper() -> Iterator[_T]:
|
|
666
|
-
with cm as result:
|
|
667
|
-
yield result
|
|
668
|
-
self.log.log(level, on_close)
|
|
669
|
-
if is_progress_log:
|
|
670
|
-
self.log_progress(level, on_close)
|
|
671
|
-
|
|
672
|
-
return self._exit_stack.enter_context(wrapper())
|
|
573
|
+
self._reports.put(_ProgressLog(message=message, level=level))
|
|
673
574
|
|
|
674
575
|
def check_for_cancel(self) -> None:
|
|
675
576
|
"""Check for a cancel signal from the supervisor and raise
|
|
@@ -691,7 +592,7 @@ class ScannerCommunicator(WorkerCommunicator):
|
|
|
691
592
|
"""
|
|
692
593
|
|
|
693
594
|
def __init__(self, supervisor: SupervisorCommunicator, scanner_id: int):
|
|
694
|
-
super().__init__(supervisor,
|
|
595
|
+
super().__init__(supervisor, self.get_worker_name(scanner_id))
|
|
695
596
|
self.scanner_id = scanner_id
|
|
696
597
|
self._scan_requests = supervisor._scan_requests
|
|
697
598
|
self._ingest_requests = supervisor._ingest_requests
|
|
@@ -700,6 +601,10 @@ class ScannerCommunicator(WorkerCommunicator):
|
|
|
700
601
|
self._got_no_more_scan_requests: bool = False
|
|
701
602
|
self._sent_no_more_ingest_requests: bool = False
|
|
702
603
|
|
|
604
|
+
@staticmethod
|
|
605
|
+
def get_worker_name(scanner_id: int) -> str:
|
|
606
|
+
return f"scanner-{scanner_id:03d}"
|
|
607
|
+
|
|
703
608
|
def report_scan(self, msg: ScanReport) -> None:
|
|
704
609
|
"""Report a completed scan to the supervisor.
|
|
705
610
|
|
|
@@ -708,7 +613,7 @@ class ScannerCommunicator(WorkerCommunicator):
|
|
|
708
613
|
msg : `ScanReport`
|
|
709
614
|
Report to send.
|
|
710
615
|
"""
|
|
711
|
-
self._reports.put(msg
|
|
616
|
+
self._reports.put(msg)
|
|
712
617
|
|
|
713
618
|
def request_ingest(self, request: IngestRequest) -> None:
|
|
714
619
|
"""Ask the ingester to ingest a quantum's outputs.
|
|
@@ -724,20 +629,20 @@ class ScannerCommunicator(WorkerCommunicator):
|
|
|
724
629
|
as complete to the supervisor instead of sending it to the ingester.
|
|
725
630
|
"""
|
|
726
631
|
if request:
|
|
727
|
-
self._ingest_requests.put(request
|
|
632
|
+
self._ingest_requests.put(request)
|
|
728
633
|
else:
|
|
729
|
-
self._reports.put(_IngestReport(1)
|
|
634
|
+
self._reports.put(_IngestReport(1))
|
|
730
635
|
|
|
731
|
-
def request_write(self, request:
|
|
636
|
+
def request_write(self, request: ProvenanceQuantumScanData) -> None:
|
|
732
637
|
"""Ask the writer to write provenance for a quantum.
|
|
733
638
|
|
|
734
639
|
Parameters
|
|
735
640
|
----------
|
|
736
|
-
request : `
|
|
641
|
+
request : `ProvenanceQuantumScanData`
|
|
737
642
|
Result of scanning a quantum.
|
|
738
643
|
"""
|
|
739
644
|
assert self._write_requests is not None, "Writer should not be used if writing is disabled."
|
|
740
|
-
self._write_requests.put(request
|
|
645
|
+
self._write_requests.put(request)
|
|
741
646
|
|
|
742
647
|
def get_compression_dict(self) -> bytes | None:
|
|
743
648
|
"""Attempt to get the compression dict from the writer.
|
|
@@ -753,7 +658,7 @@ class ScannerCommunicator(WorkerCommunicator):
|
|
|
753
658
|
A scanner should only call this method before it actually has the
|
|
754
659
|
compression dict.
|
|
755
660
|
"""
|
|
756
|
-
if (cdict :=
|
|
661
|
+
if (cdict := self._compression_dict.get()) is not None:
|
|
757
662
|
return cdict.data
|
|
758
663
|
return None
|
|
759
664
|
|
|
@@ -772,7 +677,7 @@ class ScannerCommunicator(WorkerCommunicator):
|
|
|
772
677
|
"""
|
|
773
678
|
while True:
|
|
774
679
|
self.check_for_cancel()
|
|
775
|
-
scan_request =
|
|
680
|
+
scan_request = self._scan_requests.get(block=True, timeout=self.config.worker_sleep)
|
|
776
681
|
if scan_request is _Sentinel.NO_MORE_SCAN_REQUESTS:
|
|
777
682
|
self._got_no_more_scan_requests = True
|
|
778
683
|
return
|
|
@@ -786,20 +691,18 @@ class ScannerCommunicator(WorkerCommunicator):
|
|
|
786
691
|
traceback: TracebackType | None,
|
|
787
692
|
) -> bool | None:
|
|
788
693
|
result = super().__exit__(exc_type, exc_value, traceback)
|
|
789
|
-
self._ingest_requests.put(_Sentinel.NO_MORE_INGEST_REQUESTS
|
|
694
|
+
self._ingest_requests.put(_Sentinel.NO_MORE_INGEST_REQUESTS)
|
|
790
695
|
if self._write_requests is not None:
|
|
791
|
-
self._write_requests.put(_Sentinel.NO_MORE_WRITE_REQUESTS
|
|
696
|
+
self._write_requests.put(_Sentinel.NO_MORE_WRITE_REQUESTS)
|
|
792
697
|
while not self._got_no_more_scan_requests:
|
|
793
|
-
self.log.debug("Clearing scan request queue (~%d remaining)", self._scan_requests.qsize())
|
|
794
698
|
if (
|
|
795
699
|
not self._got_no_more_scan_requests
|
|
796
|
-
and self._scan_requests.get() is _Sentinel.NO_MORE_SCAN_REQUESTS
|
|
700
|
+
and self._scan_requests.get(block=True) is _Sentinel.NO_MORE_SCAN_REQUESTS
|
|
797
701
|
):
|
|
798
702
|
self._got_no_more_scan_requests = True
|
|
799
|
-
# We let the
|
|
800
|
-
|
|
801
|
-
self.
|
|
802
|
-
self._reports.put(_Sentinel.SCANNER_DONE, block=False)
|
|
703
|
+
# We let the writer clear out the compression dict queue.
|
|
704
|
+
self.log.verbose("Sending completion message.")
|
|
705
|
+
self._reports.put(_WorkerDone(self.name))
|
|
803
706
|
return result
|
|
804
707
|
|
|
805
708
|
|
|
@@ -813,11 +716,15 @@ class IngesterCommunicator(WorkerCommunicator):
|
|
|
813
716
|
"""
|
|
814
717
|
|
|
815
718
|
def __init__(self, supervisor: SupervisorCommunicator):
|
|
816
|
-
super().__init__(supervisor,
|
|
719
|
+
super().__init__(supervisor, self.get_worker_name())
|
|
817
720
|
self.n_scanners = supervisor.n_scanners
|
|
818
721
|
self._ingest_requests = supervisor._ingest_requests
|
|
819
722
|
self._n_requesters_done = 0
|
|
820
723
|
|
|
724
|
+
@staticmethod
|
|
725
|
+
def get_worker_name() -> str:
|
|
726
|
+
return "ingester"
|
|
727
|
+
|
|
821
728
|
def __exit__(
|
|
822
729
|
self,
|
|
823
730
|
exc_type: type[BaseException] | None,
|
|
@@ -833,8 +740,8 @@ class IngesterCommunicator(WorkerCommunicator):
|
|
|
833
740
|
)
|
|
834
741
|
if self._ingest_requests.get(block=True) is _Sentinel.NO_MORE_INGEST_REQUESTS:
|
|
835
742
|
self._n_requesters_done += 1
|
|
836
|
-
self.log.verbose("Sending
|
|
837
|
-
self._reports.put(
|
|
743
|
+
self.log.verbose("Sending completion message.")
|
|
744
|
+
self._reports.put(_WorkerDone(self.name))
|
|
838
745
|
return result
|
|
839
746
|
|
|
840
747
|
def report_ingest(self, n_producers: int) -> None:
|
|
@@ -845,7 +752,7 @@ class IngesterCommunicator(WorkerCommunicator):
|
|
|
845
752
|
n_producers : `int`
|
|
846
753
|
Number of producing quanta whose datasets were ingested.
|
|
847
754
|
"""
|
|
848
|
-
self._reports.put(_IngestReport(n_producers)
|
|
755
|
+
self._reports.put(_IngestReport(n_producers))
|
|
849
756
|
|
|
850
757
|
def poll(self) -> Iterator[IngestRequest]:
|
|
851
758
|
"""Poll for ingest requests from the scanner workers.
|
|
@@ -862,7 +769,7 @@ class IngesterCommunicator(WorkerCommunicator):
|
|
|
862
769
|
"""
|
|
863
770
|
while True:
|
|
864
771
|
self.check_for_cancel()
|
|
865
|
-
ingest_request =
|
|
772
|
+
ingest_request = self._ingest_requests.get(block=True, timeout=_TINY_TIMEOUT)
|
|
866
773
|
if ingest_request is _Sentinel.NO_MORE_INGEST_REQUESTS:
|
|
867
774
|
self._n_requesters_done += 1
|
|
868
775
|
if self._n_requesters_done == self.n_scanners:
|
|
@@ -884,7 +791,7 @@ class WriterCommunicator(WorkerCommunicator):
|
|
|
884
791
|
|
|
885
792
|
def __init__(self, supervisor: SupervisorCommunicator):
|
|
886
793
|
assert supervisor._write_requests is not None
|
|
887
|
-
super().__init__(supervisor,
|
|
794
|
+
super().__init__(supervisor, self.get_worker_name())
|
|
888
795
|
self.n_scanners = supervisor.n_scanners
|
|
889
796
|
self._write_requests = supervisor._write_requests
|
|
890
797
|
self._compression_dict = supervisor._compression_dict
|
|
@@ -892,6 +799,10 @@ class WriterCommunicator(WorkerCommunicator):
|
|
|
892
799
|
self._n_requesters_done = 0
|
|
893
800
|
self._sent_compression_dict = False
|
|
894
801
|
|
|
802
|
+
@staticmethod
|
|
803
|
+
def get_worker_name() -> str:
|
|
804
|
+
return "writer"
|
|
805
|
+
|
|
895
806
|
def __exit__(
|
|
896
807
|
self,
|
|
897
808
|
exc_type: type[BaseException] | None,
|
|
@@ -909,16 +820,20 @@ class WriterCommunicator(WorkerCommunicator):
|
|
|
909
820
|
)
|
|
910
821
|
if self._write_requests.get(block=True) is _Sentinel.NO_MORE_WRITE_REQUESTS:
|
|
911
822
|
self._n_requesters_done += 1
|
|
912
|
-
self.
|
|
913
|
-
|
|
823
|
+
if self._compression_dict.clear():
|
|
824
|
+
self.log.verbose("Cleared out compression dictionary queue.")
|
|
825
|
+
else:
|
|
826
|
+
self.log.verbose("Compression dictionary queue was already empty.")
|
|
827
|
+
self.log.verbose("Sending completion message.")
|
|
828
|
+
self._reports.put(_WorkerDone(self.name))
|
|
914
829
|
return result
|
|
915
830
|
|
|
916
|
-
def poll(self) -> Iterator[
|
|
831
|
+
def poll(self) -> Iterator[ProvenanceQuantumScanData]:
|
|
917
832
|
"""Poll for writer requests from the scanner workers and supervisor.
|
|
918
833
|
|
|
919
834
|
Yields
|
|
920
835
|
------
|
|
921
|
-
request : `
|
|
836
|
+
request : `ProvenanceQuantumScanData`
|
|
922
837
|
The result of a quantum scan.
|
|
923
838
|
|
|
924
839
|
Notes
|
|
@@ -928,7 +843,7 @@ class WriterCommunicator(WorkerCommunicator):
|
|
|
928
843
|
"""
|
|
929
844
|
while True:
|
|
930
845
|
self.check_for_cancel()
|
|
931
|
-
write_request =
|
|
846
|
+
write_request = self._write_requests.get(block=True, timeout=_TINY_TIMEOUT)
|
|
932
847
|
if write_request is _Sentinel.NO_MORE_WRITE_REQUESTS:
|
|
933
848
|
self._n_requesters_done += 1
|
|
934
849
|
if self._n_requesters_done == self._n_requesters:
|
|
@@ -948,16 +863,16 @@ class WriterCommunicator(WorkerCommunicator):
|
|
|
948
863
|
"""
|
|
949
864
|
self.log.debug("Sending compression dictionary.")
|
|
950
865
|
for _ in range(self.n_scanners):
|
|
951
|
-
self._compression_dict.put(_CompressionDictionary(cdict_data)
|
|
866
|
+
self._compression_dict.put(_CompressionDictionary(cdict_data))
|
|
952
867
|
self._sent_compression_dict = True
|
|
953
868
|
|
|
954
869
|
def report_write(self) -> None:
|
|
955
870
|
"""Report to the supervisor that provenance for a quantum was written
|
|
956
871
|
to the graph.
|
|
957
872
|
"""
|
|
958
|
-
self._reports.put(_Sentinel.WRITE_REPORT
|
|
873
|
+
self._reports.put(_Sentinel.WRITE_REPORT)
|
|
959
874
|
|
|
960
|
-
def periodically_check_for_cancel(self, iterable: Iterable[
|
|
875
|
+
def periodically_check_for_cancel[T](self, iterable: Iterable[T], n: int = 100) -> Iterator[T]:
|
|
961
876
|
"""Iterate while checking for a cancellation signal every ``n``
|
|
962
877
|
iterations.
|
|
963
878
|
|