deadpool-executor 2026.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deadpool.py +1022 -0
- deadpool_executor-2026.6.1.dist-info/METADATA +842 -0
- deadpool_executor-2026.6.1.dist-info/RECORD +6 -0
- deadpool_executor-2026.6.1.dist-info/WHEEL +4 -0
- deadpool_executor-2026.6.1.dist-info/licenses/LICENSE-AGPL +661 -0
- deadpool_executor-2026.6.1.dist-info/licenses/LICENSE-Apache +202 -0
deadpool.py
ADDED
|
@@ -0,0 +1,1022 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Deadpool
|
|
3
|
+
========
|
|
4
|
+
|
|
5
|
+
Important design considerations:
|
|
6
|
+
|
|
7
|
+
Backpressure
|
|
8
|
+
------------
|
|
9
|
+
|
|
10
|
+
To allow backpressure when submitting work to the pool, we make
|
|
11
|
+
the ``submit`` method block when the number of pending tasks is
|
|
12
|
+
greater than the ``max_workers`` parameter. This has consequences,
|
|
13
|
+
basically it means the main thread is blocked and nothing else
|
|
14
|
+
can happen until it unblocks by getting space in the queue.
|
|
15
|
+
|
|
16
|
+
Deadpool itself needs to do actions around job management, so
|
|
17
|
+
this is why we have a separate "supervisor" thread for each
|
|
18
|
+
worker process.
|
|
19
|
+
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import concurrent.futures
|
|
23
|
+
import ctypes
|
|
24
|
+
import logging
|
|
25
|
+
import multiprocessing as mp
|
|
26
|
+
import os
|
|
27
|
+
import pickle
|
|
28
|
+
import signal
|
|
29
|
+
import sys
|
|
30
|
+
import threading
|
|
31
|
+
import traceback
|
|
32
|
+
import typing
|
|
33
|
+
import weakref
|
|
34
|
+
import atexit
|
|
35
|
+
import json
|
|
36
|
+
from concurrent.futures import CancelledError, Executor, InvalidStateError, as_completed
|
|
37
|
+
from dataclasses import dataclass, field
|
|
38
|
+
from multiprocessing.connection import Connection
|
|
39
|
+
from queue import Empty, PriorityQueue, Queue, SimpleQueue
|
|
40
|
+
from typing import Callable, Optional, Tuple
|
|
41
|
+
from collections.abc import Mapping
|
|
42
|
+
from functools import partial
|
|
43
|
+
|
|
44
|
+
import psutil
|
|
45
|
+
from setproctitle import setproctitle
|
|
46
|
+
|
|
47
|
+
__version__ = "2026.6.1"
|
|
48
|
+
__all__ = [
|
|
49
|
+
"Deadpool",
|
|
50
|
+
"Future",
|
|
51
|
+
"CancelledError",
|
|
52
|
+
"TimeoutError",
|
|
53
|
+
"ProcessError",
|
|
54
|
+
"PoolClosed",
|
|
55
|
+
"as_completed",
|
|
56
|
+
]
|
|
57
|
+
logger = logging.getLogger("deadpool")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# Does not work. Hangs the process on exit.
|
|
61
|
+
# There currently isn't an official way to clean up the
|
|
62
|
+
# resource tracker process. It is an open issue on the
|
|
63
|
+
# Python issue tracker.
|
|
64
|
+
# @atexit.register
|
|
65
|
+
# def stop_resource_tracker():
|
|
66
|
+
# from multiprocessing import resource_tracker
|
|
67
|
+
# tracker = resource_tracker._resource_tracker
|
|
68
|
+
# try:
|
|
69
|
+
# import time
|
|
70
|
+
# time.sleep(5)
|
|
71
|
+
# tracker._stop()
|
|
72
|
+
# except Exception:
|
|
73
|
+
# logger.info("Error stopping the multiprocessing resource tracker")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@dataclass
|
|
77
|
+
class Stat:
|
|
78
|
+
lock: threading.Lock
|
|
79
|
+
value: int = 0
|
|
80
|
+
|
|
81
|
+
def increment(self, value: int = 1):
|
|
82
|
+
with self.lock:
|
|
83
|
+
self.value += value
|
|
84
|
+
|
|
85
|
+
def set(self, value: int = 0):
|
|
86
|
+
self.value = value
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class Statistics:
|
|
90
|
+
def __init__(self):
|
|
91
|
+
self._lock = threading.Lock()
|
|
92
|
+
|
|
93
|
+
self.tasks_received = Stat(self._lock, 0)
|
|
94
|
+
self.tasks_launched = Stat(self._lock, 0)
|
|
95
|
+
self.tasks_failed = Stat(self._lock, 0)
|
|
96
|
+
self.worker_processes_created = Stat(self._lock, 0)
|
|
97
|
+
self.max_workers_busy_concurrently = Stat(self._lock, 0)
|
|
98
|
+
|
|
99
|
+
def reset_counters(self):
|
|
100
|
+
self.tasks_received.set()
|
|
101
|
+
self.tasks_launched.set()
|
|
102
|
+
self.tasks_failed.set()
|
|
103
|
+
self.worker_processes_created.set()
|
|
104
|
+
self.max_workers_busy_concurrently.set()
|
|
105
|
+
|
|
106
|
+
def to_dict(self) -> dict[str, typing.Any]:
|
|
107
|
+
return {
|
|
108
|
+
"tasks_received": self.tasks_received.value,
|
|
109
|
+
"tasks_launched": self.tasks_launched.value,
|
|
110
|
+
"tasks_failed": self.tasks_failed.value,
|
|
111
|
+
"worker_processes_created": self.worker_processes_created.value,
|
|
112
|
+
"max_workers_busy_concurrently": self.max_workers_busy_concurrently.value,
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@dataclass(order=True)
|
|
117
|
+
class PrioritizedItem:
|
|
118
|
+
priority: int
|
|
119
|
+
item: typing.Any = field(compare=False)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@dataclass(init=False)
|
|
123
|
+
class WorkerProcess:
|
|
124
|
+
process: mp.Process
|
|
125
|
+
connection_receive_msgs_from_process: Connection
|
|
126
|
+
connection_send_msgs_to_process: Connection
|
|
127
|
+
# Stats
|
|
128
|
+
tasks_ran_counter: int
|
|
129
|
+
# Controls
|
|
130
|
+
# If the subprocess RSS memory is above this threshold,
|
|
131
|
+
# ask the system allocator to release unused memory back
|
|
132
|
+
# to the OS.
|
|
133
|
+
malloc_trim_rss_memory_threshold_bytes: Optional[int] = None
|
|
134
|
+
ok: bool = True
|
|
135
|
+
|
|
136
|
+
def __init__(
|
|
137
|
+
self,
|
|
138
|
+
initializer=None,
|
|
139
|
+
initargs=(),
|
|
140
|
+
finalizer=None,
|
|
141
|
+
finargs=(),
|
|
142
|
+
daemon=True,
|
|
143
|
+
mp_context="forkserver",
|
|
144
|
+
malloc_trim_rss_memory_threshold_bytes=None,
|
|
145
|
+
):
|
|
146
|
+
# For the process to send info OUT OF the process
|
|
147
|
+
conn_receiver, conn_sender = mp.Pipe(duplex=False)
|
|
148
|
+
# For sending work INTO the process
|
|
149
|
+
conn_receiver2, conn_sender2 = mp.Pipe(duplex=False)
|
|
150
|
+
p = mp_context.Process(
|
|
151
|
+
daemon=daemon,
|
|
152
|
+
target=raw_runner2,
|
|
153
|
+
args=(
|
|
154
|
+
conn_sender,
|
|
155
|
+
conn_receiver2,
|
|
156
|
+
os.getpid(),
|
|
157
|
+
initializer,
|
|
158
|
+
initargs,
|
|
159
|
+
finalizer,
|
|
160
|
+
finargs,
|
|
161
|
+
malloc_trim_rss_memory_threshold_bytes,
|
|
162
|
+
),
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
p.start()
|
|
166
|
+
self.process = p
|
|
167
|
+
self.connection_receive_msgs_from_process = conn_receiver
|
|
168
|
+
self.connection_send_msgs_to_process = conn_sender2
|
|
169
|
+
self.tasks_ran_counter = 0
|
|
170
|
+
self.ok = True
|
|
171
|
+
|
|
172
|
+
def __hash__(self):
|
|
173
|
+
return hash(self.process.pid)
|
|
174
|
+
|
|
175
|
+
@property
|
|
176
|
+
def pid(self):
|
|
177
|
+
return self.process.pid
|
|
178
|
+
|
|
179
|
+
def get_rss_bytes(self) -> int:
|
|
180
|
+
return psutil.Process(pid=self.pid).memory_info().rss
|
|
181
|
+
|
|
182
|
+
def submit_job(self, job):
|
|
183
|
+
self.tasks_ran_counter += 1
|
|
184
|
+
self.connection_send_msgs_to_process.send(job)
|
|
185
|
+
|
|
186
|
+
def shutdown(self, wait=True):
|
|
187
|
+
if not self.process.is_alive():
|
|
188
|
+
return
|
|
189
|
+
|
|
190
|
+
self.connection_receive_msgs_from_process.close()
|
|
191
|
+
|
|
192
|
+
if self.connection_send_msgs_to_process.writable: # pragma: no branch
|
|
193
|
+
try:
|
|
194
|
+
self.connection_send_msgs_to_process.send(None)
|
|
195
|
+
except BrokenPipeError: # pragma: no cover
|
|
196
|
+
pass
|
|
197
|
+
else:
|
|
198
|
+
self.connection_send_msgs_to_process.close()
|
|
199
|
+
|
|
200
|
+
if wait:
|
|
201
|
+
self.process.join()
|
|
202
|
+
|
|
203
|
+
def is_alive(self):
|
|
204
|
+
return self.process.is_alive()
|
|
205
|
+
|
|
206
|
+
def format_death_message(self, join_timeout: float = 0.1) -> str:
|
|
207
|
+
# When a worker dies from a signal, the parent sees EOF on the pipe
|
|
208
|
+
# before `exitcode` becomes non-None — so the obvious `exitcode` check
|
|
209
|
+
# right after EOFError reports None, and we lose the signal name.
|
|
210
|
+
# See issue #331.
|
|
211
|
+
#
|
|
212
|
+
# Ordering on Linux: `do_exit()` runs `__exit_files()` (closes the
|
|
213
|
+
# child's fds -> EOF on our pipe) *before* `exit_notify()` sets
|
|
214
|
+
# TASK_ZOMBIE and sends SIGCHLD. Only after the latter can
|
|
215
|
+
# `waitpid(pid, WNOHANG)` report a status, which is what populates
|
|
216
|
+
# `Process.exitcode` via `Popen.poll()`. The gap is typically
|
|
217
|
+
# microseconds but is unbounded under pathological load.
|
|
218
|
+
#
|
|
219
|
+
# `Process.join(timeout=X)` bridges the gap cleanly: it waits on the
|
|
220
|
+
# sentinel fd (closed in the same `__exit_files()` call, so already
|
|
221
|
+
# readable by the time we get here) and then calls blocking
|
|
222
|
+
# `waitpid(pid)`, returning as soon as the child reaches TASK_ZOMBIE.
|
|
223
|
+
# The timeout is a ceiling for pathological cases — and for the edge
|
|
224
|
+
# case where a worker closed its data pipe voluntarily without
|
|
225
|
+
# exiting, in which case we'd otherwise block until the timeout.
|
|
226
|
+
#
|
|
227
|
+
# Refs:
|
|
228
|
+
# - cpython Lib/multiprocessing/popen_fork.py (Popen.wait/poll)
|
|
229
|
+
# - cpython Lib/multiprocessing/process.py (Process.exitcode/join)
|
|
230
|
+
# - Linux do_exit() ordering: __exit_files() precedes exit_notify()
|
|
231
|
+
# - https://docs.python.org/3/library/multiprocessing.html
|
|
232
|
+
# (Process.exitcode, Process.join semantics)
|
|
233
|
+
proc = self.process
|
|
234
|
+
if proc.exitcode is None:
|
|
235
|
+
proc.join(timeout=join_timeout)
|
|
236
|
+
|
|
237
|
+
exitcode = proc.exitcode
|
|
238
|
+
if exitcode is None:
|
|
239
|
+
return "Worker process died unexpectedly"
|
|
240
|
+
|
|
241
|
+
try:
|
|
242
|
+
signame = signal.strsignal(-exitcode)
|
|
243
|
+
except (ValueError, TypeError):
|
|
244
|
+
signame = "Unknown"
|
|
245
|
+
|
|
246
|
+
return (
|
|
247
|
+
f"Subprocess {self.pid} completed unexpectedly with "
|
|
248
|
+
f"exitcode {exitcode} ({signame})"
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
def results_are_available(self, block_for: float = 0.2):
|
|
252
|
+
return self.connection_receive_msgs_from_process.poll(timeout=block_for)
|
|
253
|
+
|
|
254
|
+
def get_results(self):
|
|
255
|
+
return self.connection_receive_msgs_from_process.recv()
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
class Future(concurrent.futures.Future):
|
|
259
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
260
|
+
super().__init__(*args, **kwargs)
|
|
261
|
+
self._pid: Optional[int] = None
|
|
262
|
+
self.pid_callback = None
|
|
263
|
+
|
|
264
|
+
@property
|
|
265
|
+
def pid(self):
|
|
266
|
+
return self._pid
|
|
267
|
+
|
|
268
|
+
@pid.setter
|
|
269
|
+
def pid(self, value):
|
|
270
|
+
self._pid = value
|
|
271
|
+
if self.pid_callback:
|
|
272
|
+
try:
|
|
273
|
+
self.pid_callback(self)
|
|
274
|
+
except Exception: # pragma: no cover
|
|
275
|
+
logger.exception("Error calling pid_callback")
|
|
276
|
+
|
|
277
|
+
def add_pid_callback(self, fn):
|
|
278
|
+
self.pid_callback = fn
|
|
279
|
+
|
|
280
|
+
def cancel_and_kill_if_running(self, sig=signal.SIGKILL):
|
|
281
|
+
self.cancel()
|
|
282
|
+
if self.pid:
|
|
283
|
+
try:
|
|
284
|
+
kill_proc_tree(self.pid, sig=sig)
|
|
285
|
+
except Exception as e: # pragma: no cover
|
|
286
|
+
logger.warning(f"Got error killing pid {self.pid}: {e}")
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
class TimeoutError(concurrent.futures.TimeoutError): ...
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
class ProcessError(mp.ProcessError): ...
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
class PoolClosed(Exception): ...
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
class Deadpool(Executor):
|
|
299
|
+
def __init__(
|
|
300
|
+
self,
|
|
301
|
+
max_workers: Optional[int] = None,
|
|
302
|
+
min_workers: Optional[int] = None,
|
|
303
|
+
max_tasks_per_child: Optional[int] = None,
|
|
304
|
+
max_worker_memory_bytes: Optional[int] = None,
|
|
305
|
+
mp_context=None,
|
|
306
|
+
initializer=None,
|
|
307
|
+
initargs=(),
|
|
308
|
+
finalizer=None,
|
|
309
|
+
finalargs=(),
|
|
310
|
+
max_backlog=1000,
|
|
311
|
+
shutdown_wait: Optional[bool] = None,
|
|
312
|
+
shutdown_cancel_futures: Optional[bool] = None,
|
|
313
|
+
daemon=True,
|
|
314
|
+
malloc_trim_rss_memory_threshold_bytes: Optional[int] = None,
|
|
315
|
+
propagate_environ: Optional[Mapping] = None,
|
|
316
|
+
) -> None:
|
|
317
|
+
"""The pool.
|
|
318
|
+
|
|
319
|
+
:param propagate_environ: A mapping of environment variables to
|
|
320
|
+
propagate to the worker processes. This is useful for
|
|
321
|
+
setting up the environment in the worker processes. Subprocesses
|
|
322
|
+
will inherit the environment of the parent process, but crucially,
|
|
323
|
+
they will not inherit any changes made to the environment after
|
|
324
|
+
the subprocess is created (via `os.environ`). This parameter
|
|
325
|
+
allows you to specify a mapping of environment variables to
|
|
326
|
+
propagate to the worker processes. The worker processes will
|
|
327
|
+
receive these environment variables at the time they are created.
|
|
328
|
+
There are two important points: firstly, these env vars will
|
|
329
|
+
be set before the initializer is run, so the initializer can
|
|
330
|
+
use them. Secondly, these are applied only when the worker
|
|
331
|
+
process is created, which means that you can dynamically change the
|
|
332
|
+
values of the dict supplied here, and they will be used in
|
|
333
|
+
new worker processes as they are created. (The new parameters
|
|
334
|
+
will not be seen by existing worker processes.)
|
|
335
|
+
|
|
336
|
+
"""
|
|
337
|
+
super().__init__()
|
|
338
|
+
|
|
339
|
+
if not mp_context:
|
|
340
|
+
mp_context = "forkserver"
|
|
341
|
+
|
|
342
|
+
if isinstance(mp_context, str):
|
|
343
|
+
mp_context = mp.get_context(mp_context)
|
|
344
|
+
|
|
345
|
+
# This is stored (instead of immediately currying the `initializer`)
|
|
346
|
+
# for a very important reason, which you can read about in the
|
|
347
|
+
# `add_worker_to_pool` method.
|
|
348
|
+
self.propagate_environ = propagate_environ
|
|
349
|
+
self.ctx = mp_context
|
|
350
|
+
self.initializer = initializer
|
|
351
|
+
self.initargs = initargs
|
|
352
|
+
self.finitializer = finalizer
|
|
353
|
+
self.finitargs = finalargs
|
|
354
|
+
self.pool_size = max_workers or len(os.sched_getaffinity(0))
|
|
355
|
+
if min_workers is None:
|
|
356
|
+
self.min_workers = self.pool_size
|
|
357
|
+
else:
|
|
358
|
+
self.min_workers = min_workers
|
|
359
|
+
|
|
360
|
+
self.max_tasks_per_child = max_tasks_per_child
|
|
361
|
+
self.max_worker_memory_bytes = max_worker_memory_bytes
|
|
362
|
+
self.submitted_jobs: PriorityQueue[PrioritizedItem] = PriorityQueue(
|
|
363
|
+
maxsize=max_backlog
|
|
364
|
+
)
|
|
365
|
+
self.running_jobs = Queue(maxsize=self.pool_size)
|
|
366
|
+
self.running_futs = weakref.WeakSet()
|
|
367
|
+
self.existing_workers = weakref.WeakSet()
|
|
368
|
+
# Lock protecting busy_workers, existing_workers, and
|
|
369
|
+
# running_futs for thread-safety without the GIL.
|
|
370
|
+
self._workers_lock = threading.Lock()
|
|
371
|
+
self.closed = False
|
|
372
|
+
self.shutdown_wait = shutdown_wait
|
|
373
|
+
self.shutdown_cancel_futures = shutdown_cancel_futures
|
|
374
|
+
self.daemon = daemon
|
|
375
|
+
self.malloc_trim_rss_memory_threshold_bytes = (
|
|
376
|
+
malloc_trim_rss_memory_threshold_bytes
|
|
377
|
+
)
|
|
378
|
+
self._statistics = Statistics()
|
|
379
|
+
|
|
380
|
+
# TODO: overcommit
|
|
381
|
+
self.workers: SimpleQueue[WorkerProcess] = SimpleQueue()
|
|
382
|
+
for _ in range(self.pool_size):
|
|
383
|
+
self.add_worker_to_pool()
|
|
384
|
+
# When a worker is running a job, it will be removed from
|
|
385
|
+
# the workers queue, and added to the busy_workers set.
|
|
386
|
+
# When a worker successfully completes a job, it will be
|
|
387
|
+
# added back to the workers queue, and removed from the
|
|
388
|
+
# busy_workers set.
|
|
389
|
+
self.busy_workers = set() # weakref.WeakSet()
|
|
390
|
+
|
|
391
|
+
# THE ONLY ACTIVE, PERSISTENT STATE IN DEADPOOL IS THIS THREAD
|
|
392
|
+
# BELOW. PROTECT IT AT ALL COSTS.
|
|
393
|
+
self.runner_thread = threading.Thread(
|
|
394
|
+
target=self.runner, name="deadpool.runner", daemon=True
|
|
395
|
+
)
|
|
396
|
+
self.runner_thread.start()
|
|
397
|
+
|
|
398
|
+
def get_statistics(self) -> dict[str, typing.Any]:
|
|
399
|
+
stats = self._statistics.to_dict()
|
|
400
|
+
|
|
401
|
+
# These are not counters; they are determined at the time of the
|
|
402
|
+
# call based on the state of the worker processes.
|
|
403
|
+
with self._workers_lock:
|
|
404
|
+
stats["worker_processes_still_alive"] = len(self.existing_workers)
|
|
405
|
+
stats["worker_processes_busy"] = len(self.busy_workers)
|
|
406
|
+
stats["worker_processes_idle"] = self.workers.qsize()
|
|
407
|
+
|
|
408
|
+
return stats
|
|
409
|
+
|
|
410
|
+
def add_worker_to_pool(self):
|
|
411
|
+
if self.propagate_environ:
|
|
412
|
+
# By constructing here, late, we allow the user to make
|
|
413
|
+
# changes dynamically to the configured env vars and these
|
|
414
|
+
# will be reflected in the worker processes as they are
|
|
415
|
+
# added to the pool. This has a large number of interesting
|
|
416
|
+
# applications, such as dynamically changing the logging
|
|
417
|
+
# level of the worker processes, or changing the location
|
|
418
|
+
# of a file that the worker processes need to read, or
|
|
419
|
+
# changing timeouts and so on. All the user needs to do
|
|
420
|
+
# is update the value on the Deadpool instance itself.
|
|
421
|
+
initializer = partial(
|
|
422
|
+
initializer_environ_propagator,
|
|
423
|
+
dict(self.propagate_environ),
|
|
424
|
+
original_initializer=self.initializer,
|
|
425
|
+
)
|
|
426
|
+
else:
|
|
427
|
+
initializer = self.initializer
|
|
428
|
+
|
|
429
|
+
worker = WorkerProcess(
|
|
430
|
+
initializer=initializer,
|
|
431
|
+
initargs=self.initargs,
|
|
432
|
+
finalizer=self.finitializer,
|
|
433
|
+
finargs=self.finitargs,
|
|
434
|
+
mp_context=self.ctx,
|
|
435
|
+
daemon=self.daemon,
|
|
436
|
+
malloc_trim_rss_memory_threshold_bytes=self.malloc_trim_rss_memory_threshold_bytes,
|
|
437
|
+
)
|
|
438
|
+
self.workers.put(worker)
|
|
439
|
+
self._statistics.worker_processes_created.increment()
|
|
440
|
+
with self._workers_lock:
|
|
441
|
+
self.existing_workers.add(worker)
|
|
442
|
+
|
|
443
|
+
def clear_workers(self):
|
|
444
|
+
"""Clear all workers from the pool.
|
|
445
|
+
|
|
446
|
+
Typically they will all get added back according to the
|
|
447
|
+
rules for `max_workers` and so on. One neat reason to do
|
|
448
|
+
this is to have new settings take effect, such as a new
|
|
449
|
+
environment variable that needs to be set in the workers.
|
|
450
|
+
"""
|
|
451
|
+
while not self.workers.empty():
|
|
452
|
+
worker = self.workers.get()
|
|
453
|
+
worker.shutdown(wait=False)
|
|
454
|
+
|
|
455
|
+
def runner(self):
|
|
456
|
+
while True:
|
|
457
|
+
# This will block if the queue of running jobs is full.
|
|
458
|
+
self.running_jobs.put(None)
|
|
459
|
+
|
|
460
|
+
priority_job = self.submitted_jobs.get()
|
|
461
|
+
job = priority_job.item
|
|
462
|
+
if job is None:
|
|
463
|
+
# This is for the `None` that terminates the while loop.
|
|
464
|
+
self.submitted_jobs.task_done()
|
|
465
|
+
self.running_jobs.get()
|
|
466
|
+
# TODO: this probably isn't necessary, since cleanup is happening
|
|
467
|
+
# in the shutdown method anyway.
|
|
468
|
+
cancel_all_futures_on_queue(self.submitted_jobs)
|
|
469
|
+
logger.debug("Got shutdown event, leaving runner.")
|
|
470
|
+
return
|
|
471
|
+
|
|
472
|
+
*_, fut = job
|
|
473
|
+
if fut.done():
|
|
474
|
+
# This shouldn't really be possible, but if the associated future
|
|
475
|
+
# for this job has somehow already been marked as done (e.g. if
|
|
476
|
+
# the caller decided to cancel it themselves) then just skip the
|
|
477
|
+
# whole job.
|
|
478
|
+
self.submitted_jobs.task_done()
|
|
479
|
+
self.running_jobs.get()
|
|
480
|
+
continue
|
|
481
|
+
|
|
482
|
+
t = threading.Thread(target=self.run_task, args=job, daemon=True)
|
|
483
|
+
self._statistics.tasks_launched.increment()
|
|
484
|
+
t.start()
|
|
485
|
+
|
|
486
|
+
def get_process(self) -> WorkerProcess:
|
|
487
|
+
with self._workers_lock:
|
|
488
|
+
bw = len(self.busy_workers)
|
|
489
|
+
mw = self.pool_size
|
|
490
|
+
qs = self.workers.qsize()
|
|
491
|
+
|
|
492
|
+
total_workers = bw + qs
|
|
493
|
+
if total_workers < mw and qs == 0:
|
|
494
|
+
self.add_worker_to_pool()
|
|
495
|
+
|
|
496
|
+
wp = self.workers.get()
|
|
497
|
+
with self._workers_lock:
|
|
498
|
+
self.busy_workers.add(wp)
|
|
499
|
+
busy_count = len(self.busy_workers)
|
|
500
|
+
with self._statistics.max_workers_busy_concurrently.lock:
|
|
501
|
+
if busy_count > self._statistics.max_workers_busy_concurrently.value:
|
|
502
|
+
self._statistics.max_workers_busy_concurrently.value = busy_count
|
|
503
|
+
|
|
504
|
+
return wp
|
|
505
|
+
|
|
506
|
+
def done_with_process(self, wp: WorkerProcess):
|
|
507
|
+
# This worker is done with its job and is no longer busy.
|
|
508
|
+
with self._workers_lock:
|
|
509
|
+
self.busy_workers.remove(wp)
|
|
510
|
+
count_workers_busy = len(self.busy_workers)
|
|
511
|
+
count_workers_idle = self.workers.qsize()
|
|
512
|
+
backlog_size = self.submitted_jobs.qsize()
|
|
513
|
+
|
|
514
|
+
# The `1` is for `wp` itself.
|
|
515
|
+
total_workers = count_workers_busy + count_workers_idle + 1
|
|
516
|
+
there_are_more_workers_than_min = total_workers > self.min_workers
|
|
517
|
+
task_backlog_is_empty = backlog_size == 0
|
|
518
|
+
|
|
519
|
+
# if there_are_more_workers_than_min and (there_are_idle_workers or task_backlog_is_empty):
|
|
520
|
+
if there_are_more_workers_than_min and task_backlog_is_empty:
|
|
521
|
+
# We have more workers than the minimum, and there is no backlog of
|
|
522
|
+
# tasks. This implies any tasks currently in play have already been picked
|
|
523
|
+
# up by workers in the pool, or the pool is idle. We can safely remove
|
|
524
|
+
# this worker from the pool.
|
|
525
|
+
wp.shutdown(wait=False)
|
|
526
|
+
return
|
|
527
|
+
|
|
528
|
+
if not wp.is_alive():
|
|
529
|
+
self.add_worker_to_pool()
|
|
530
|
+
return
|
|
531
|
+
|
|
532
|
+
if not wp.ok:
|
|
533
|
+
self.add_worker_to_pool()
|
|
534
|
+
return
|
|
535
|
+
|
|
536
|
+
if self.max_tasks_per_child is not None:
|
|
537
|
+
if wp.tasks_ran_counter >= self.max_tasks_per_child:
|
|
538
|
+
logger.debug(f"Worker {wp.pid} hit max tasks per child.")
|
|
539
|
+
wp.shutdown(wait=False)
|
|
540
|
+
self.add_worker_to_pool()
|
|
541
|
+
return
|
|
542
|
+
|
|
543
|
+
if self.max_worker_memory_bytes is not None:
|
|
544
|
+
mem = wp.get_rss_bytes()
|
|
545
|
+
logger.debug(f"Worker {wp.pid} has {mem} bytes of RSS memory.")
|
|
546
|
+
if mem >= self.max_worker_memory_bytes:
|
|
547
|
+
logger.debug(f"Worker {wp.pid} hit max memory threshold.")
|
|
548
|
+
wp.shutdown(wait=False)
|
|
549
|
+
self.add_worker_to_pool()
|
|
550
|
+
return
|
|
551
|
+
|
|
552
|
+
self.workers.put(wp)
|
|
553
|
+
|
|
554
|
+
def run_task(self, fn, args, kwargs, timeout, fut: Future):
|
|
555
|
+
try:
|
|
556
|
+
retry_count = 10
|
|
557
|
+
while retry_count > 0:
|
|
558
|
+
retry_count -= 1
|
|
559
|
+
worker: WorkerProcess = self.get_process()
|
|
560
|
+
try:
|
|
561
|
+
worker.submit_job((fn, args, kwargs, timeout))
|
|
562
|
+
break
|
|
563
|
+
except (pickle.PicklingError, AttributeError) as e:
|
|
564
|
+
# If the user passed in a function or params that can't
|
|
565
|
+
# be pickled, use the future to communicate the error.
|
|
566
|
+
# Note that in this scenario, there is nothing wrong
|
|
567
|
+
# with the worker process itself, so we don't need to
|
|
568
|
+
# shut it down.
|
|
569
|
+
fut.set_exception(e)
|
|
570
|
+
self.done_with_process(worker)
|
|
571
|
+
return
|
|
572
|
+
except BrokenPipeError:
|
|
573
|
+
# This likely comes from trying to send a job over a pipe
|
|
574
|
+
# that has been closed. This is a serious problem, and
|
|
575
|
+
# we should shut down the worker process and get rid of
|
|
576
|
+
# it. We're going to loop back around and try again with
|
|
577
|
+
# a new worker.
|
|
578
|
+
# TODO: it seems that this might be expected in situations
|
|
579
|
+
# where the worker process often OOMs. As such, not sure
|
|
580
|
+
# whether logging at warning level is appropriate.
|
|
581
|
+
logger.warning(f"BrokenPipeError on {worker.pid}, retrying.")
|
|
582
|
+
worker.ok = False
|
|
583
|
+
self.done_with_process(worker)
|
|
584
|
+
# TODO: probably this should be moved into the `done_with_process`
|
|
585
|
+
# and can act on the `worker.ok` flag.
|
|
586
|
+
kill_proc_tree(worker.pid, sig=signal.SIGKILL)
|
|
587
|
+
else: # pragma: no cover
|
|
588
|
+
# If we get here, we've tried to submit the job to a worker
|
|
589
|
+
# process multiple times and failed each time. We're giving
|
|
590
|
+
# up.
|
|
591
|
+
logger.error("Failed to submit job to worker")
|
|
592
|
+
fut.set_exception(ProcessError("Failed to submit job to worker"))
|
|
593
|
+
return
|
|
594
|
+
|
|
595
|
+
fut.pid = worker.pid
|
|
596
|
+
with self._workers_lock:
|
|
597
|
+
self.running_futs.add(fut)
|
|
598
|
+
|
|
599
|
+
while True:
|
|
600
|
+
if worker.results_are_available():
|
|
601
|
+
try:
|
|
602
|
+
results = worker.get_results()
|
|
603
|
+
except EOFError:
|
|
604
|
+
self._statistics.tasks_failed.increment()
|
|
605
|
+
if not fut.done():
|
|
606
|
+
try:
|
|
607
|
+
fut.set_exception(
|
|
608
|
+
ProcessError(worker.format_death_message())
|
|
609
|
+
)
|
|
610
|
+
except InvalidStateError:
|
|
611
|
+
pass
|
|
612
|
+
except BaseException as e:
|
|
613
|
+
self._statistics.tasks_failed.increment()
|
|
614
|
+
logger.debug(f"Unexpected exception from worker: {e}")
|
|
615
|
+
if not fut.done():
|
|
616
|
+
try:
|
|
617
|
+
fut.set_exception(e)
|
|
618
|
+
except InvalidStateError:
|
|
619
|
+
pass
|
|
620
|
+
else:
|
|
621
|
+
if isinstance(results, BaseException):
|
|
622
|
+
self._statistics.tasks_failed.increment()
|
|
623
|
+
if not fut.done():
|
|
624
|
+
try:
|
|
625
|
+
fut.set_exception(results)
|
|
626
|
+
except InvalidStateError:
|
|
627
|
+
pass
|
|
628
|
+
else:
|
|
629
|
+
if not fut.done():
|
|
630
|
+
try:
|
|
631
|
+
fut.set_result(results)
|
|
632
|
+
except InvalidStateError:
|
|
633
|
+
pass
|
|
634
|
+
|
|
635
|
+
if isinstance(results, TimeoutError):
|
|
636
|
+
self._statistics.tasks_failed.increment()
|
|
637
|
+
logger.debug(
|
|
638
|
+
f"TimeoutError on {worker.pid}, setting ok=False"
|
|
639
|
+
)
|
|
640
|
+
worker.ok = False
|
|
641
|
+
break
|
|
642
|
+
elif not worker.is_alive():
|
|
643
|
+
self._statistics.tasks_failed.increment()
|
|
644
|
+
logger.debug(f"p is no longer alive: {worker.process}")
|
|
645
|
+
if not fut.done():
|
|
646
|
+
# It is possible that fut has already had a result set on
|
|
647
|
+
# it. If that's the case we'll do nothing. Otherwise, put
|
|
648
|
+
# an exception reporting the unexpected situation.
|
|
649
|
+
try:
|
|
650
|
+
fut.set_exception(
|
|
651
|
+
ProcessError(worker.format_death_message())
|
|
652
|
+
)
|
|
653
|
+
except InvalidStateError: # pragma: no cover
|
|
654
|
+
# We still have to catch this even though there is a
|
|
655
|
+
# check for `fut.done()`, simply due to an possible
|
|
656
|
+
# race between the done check and the set_exception call.
|
|
657
|
+
pass
|
|
658
|
+
|
|
659
|
+
break
|
|
660
|
+
else:
|
|
661
|
+
pass # pragma: no cover
|
|
662
|
+
|
|
663
|
+
self.done_with_process(worker)
|
|
664
|
+
finally:
|
|
665
|
+
self.submitted_jobs.task_done()
|
|
666
|
+
|
|
667
|
+
if not fut.done(): # pragma: no cover
|
|
668
|
+
fut.set_exception(ProcessError("Somehow no result got set on fut."))
|
|
669
|
+
|
|
670
|
+
try:
|
|
671
|
+
self.running_jobs.get_nowait()
|
|
672
|
+
except Empty: # pragma: no cover
|
|
673
|
+
logger.warning("Weird error, did not expect running jobs to be empty")
|
|
674
|
+
|
|
675
|
+
def submit(
|
|
676
|
+
self,
|
|
677
|
+
fn: Callable,
|
|
678
|
+
/,
|
|
679
|
+
*args,
|
|
680
|
+
deadpool_timeout=None,
|
|
681
|
+
deadpool_priority=0,
|
|
682
|
+
**kwargs,
|
|
683
|
+
) -> Future:
|
|
684
|
+
if deadpool_priority < 0: # pragma: no cover
|
|
685
|
+
raise ValueError(
|
|
686
|
+
f"Parameter deadpool_priority must be >= 0, but was {deadpool_priority}"
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
if self.closed:
|
|
690
|
+
raise PoolClosed("The pool is closed. No more tasks can be submitted.")
|
|
691
|
+
|
|
692
|
+
fut = Future()
|
|
693
|
+
self.submitted_jobs.put(
|
|
694
|
+
PrioritizedItem(
|
|
695
|
+
priority=deadpool_priority,
|
|
696
|
+
item=(fn, args, kwargs, deadpool_timeout, fut),
|
|
697
|
+
)
|
|
698
|
+
)
|
|
699
|
+
self._statistics.tasks_received.increment()
|
|
700
|
+
return fut
|
|
701
|
+
|
|
702
|
+
def shutdown(self, wait: bool = True, *, cancel_futures: bool = False) -> None:
|
|
703
|
+
if self.closed:
|
|
704
|
+
return
|
|
705
|
+
|
|
706
|
+
logger.debug(f"shutdown: {wait=} {cancel_futures=}")
|
|
707
|
+
|
|
708
|
+
# No more new tasks can be submitted
|
|
709
|
+
self.closed = True
|
|
710
|
+
|
|
711
|
+
if cancel_futures:
|
|
712
|
+
cancel_all_futures_on_queue(self.submitted_jobs)
|
|
713
|
+
|
|
714
|
+
if wait:
|
|
715
|
+
# The None sentinel will pop last
|
|
716
|
+
shutdown_priority = sys.maxsize
|
|
717
|
+
else:
|
|
718
|
+
# The None sentinel will pop first
|
|
719
|
+
shutdown_priority = -1
|
|
720
|
+
|
|
721
|
+
try:
|
|
722
|
+
self.submitted_jobs.put(
|
|
723
|
+
PrioritizedItem(priority=shutdown_priority, item=None),
|
|
724
|
+
timeout=2.0,
|
|
725
|
+
)
|
|
726
|
+
except TimeoutError: # pragma: no cover
|
|
727
|
+
logger.warning(
|
|
728
|
+
"Timed out putting None on the submit queue. This "
|
|
729
|
+
"should not be possible "
|
|
730
|
+
"and might be a bug in deadpool."
|
|
731
|
+
)
|
|
732
|
+
|
|
733
|
+
# Up till this point, all the pending work that has been
|
|
734
|
+
# submitted, but not yet started, has been cancelled. The
|
|
735
|
+
# runner loop has also been stopped (with the None sentinel).
|
|
736
|
+
# The only thing left to do is decide whether or not to
|
|
737
|
+
# actively kill processes that are still running. We presume
|
|
738
|
+
# that if the user is asking for cancellation and doesn't
|
|
739
|
+
# want to wait, that she probably wants us to also stop
|
|
740
|
+
# running processes.
|
|
741
|
+
if (not wait) and cancel_futures:
|
|
742
|
+
with self._workers_lock:
|
|
743
|
+
running_futs = list(self.running_futs)
|
|
744
|
+
for fut in running_futs:
|
|
745
|
+
fut.cancel_and_kill_if_running()
|
|
746
|
+
|
|
747
|
+
logger.debug("waiting for submitted_jobs to join...")
|
|
748
|
+
self.submitted_jobs.join()
|
|
749
|
+
|
|
750
|
+
super().shutdown(wait, cancel_futures=cancel_futures)
|
|
751
|
+
|
|
752
|
+
# We can now remove all other processes hanging around
|
|
753
|
+
# in the background.
|
|
754
|
+
while not self.workers.empty():
|
|
755
|
+
try:
|
|
756
|
+
worker = self.workers.get_nowait()
|
|
757
|
+
worker.shutdown()
|
|
758
|
+
except Empty: # pragma: no cover
|
|
759
|
+
break
|
|
760
|
+
|
|
761
|
+
# There may be a few processes left in the
|
|
762
|
+
# `busy_workers` queue. Shut them down too.
|
|
763
|
+
with self._workers_lock:
|
|
764
|
+
remaining = list(self.busy_workers)
|
|
765
|
+
self.busy_workers.clear()
|
|
766
|
+
for worker in remaining:
|
|
767
|
+
worker.shutdown()
|
|
768
|
+
|
|
769
|
+
def __enter__(self):
|
|
770
|
+
return self
|
|
771
|
+
|
|
772
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
773
|
+
kwargs = {}
|
|
774
|
+
if self.shutdown_wait is not None:
|
|
775
|
+
kwargs["wait"] = self.shutdown_wait
|
|
776
|
+
|
|
777
|
+
if self.shutdown_cancel_futures is not None:
|
|
778
|
+
kwargs["cancel_futures"] = self.shutdown_cancel_futures
|
|
779
|
+
|
|
780
|
+
self.shutdown(**kwargs)
|
|
781
|
+
self.runner_thread.join()
|
|
782
|
+
return False
|
|
783
|
+
|
|
784
|
+
|
|
785
|
+
def cancel_all_futures_on_queue(q: Queue):
|
|
786
|
+
while True:
|
|
787
|
+
try:
|
|
788
|
+
priority_item = q.get_nowait()
|
|
789
|
+
q.task_done()
|
|
790
|
+
job = priority_item.item
|
|
791
|
+
*_, fut = job
|
|
792
|
+
fut.cancel()
|
|
793
|
+
except Empty:
|
|
794
|
+
break
|
|
795
|
+
|
|
796
|
+
|
|
797
|
+
# Taken from
|
|
798
|
+
# https://psutil.readthedocs.io/en/latest/index.html?highlight=children#kill-process-tree
|
|
799
|
+
def kill_proc_tree(
|
|
800
|
+
pid,
|
|
801
|
+
sig=signal.SIGTERM,
|
|
802
|
+
include_parent=True,
|
|
803
|
+
timeout=None,
|
|
804
|
+
on_terminate=None,
|
|
805
|
+
allow_kill_self=False,
|
|
806
|
+
):
|
|
807
|
+
"""Kill a process tree (including grandchildren) with signal
|
|
808
|
+
"sig" and return a (gone, still_alive) tuple.
|
|
809
|
+
"on_terminate", if specified, is a callback function which is
|
|
810
|
+
called as soon as a child terminates.
|
|
811
|
+
"""
|
|
812
|
+
if not allow_kill_self and pid == os.getpid():
|
|
813
|
+
raise ValueError("Won't kill myself")
|
|
814
|
+
|
|
815
|
+
try:
|
|
816
|
+
parent = psutil.Process(pid)
|
|
817
|
+
except psutil.NoSuchProcess:
|
|
818
|
+
return
|
|
819
|
+
|
|
820
|
+
children = parent.children(recursive=True)
|
|
821
|
+
if include_parent:
|
|
822
|
+
children.append(parent)
|
|
823
|
+
|
|
824
|
+
for p in children:
|
|
825
|
+
try:
|
|
826
|
+
p.send_signal(sig)
|
|
827
|
+
except psutil.NoSuchProcess: # pragma: no cover
|
|
828
|
+
pass
|
|
829
|
+
|
|
830
|
+
gone, alive = psutil.wait_procs(children, timeout=timeout, callback=on_terminate)
|
|
831
|
+
return (gone, alive)
|
|
832
|
+
|
|
833
|
+
|
|
834
|
+
def raw_runner2(
|
|
835
|
+
conn: Connection,
|
|
836
|
+
conn_receiver: Connection,
|
|
837
|
+
parent_pid,
|
|
838
|
+
initializer,
|
|
839
|
+
initargs,
|
|
840
|
+
finitializer: Optional[Callable] = None,
|
|
841
|
+
finitargs: Optional[Tuple] = None,
|
|
842
|
+
mem_clear_threshold_bytes: Optional[int] = None,
|
|
843
|
+
kill_proc_tree=kill_proc_tree,
|
|
844
|
+
):
|
|
845
|
+
setproctitle("deadpool.worker")
|
|
846
|
+
# This event is used to signal that the "parent"
|
|
847
|
+
# monitor thread should be deactivated.
|
|
848
|
+
evt = threading.Event()
|
|
849
|
+
|
|
850
|
+
def self_destruct_if_parent_disappers():
|
|
851
|
+
"""Poll every 5 seconds to see whether the parent is still
|
|
852
|
+
alive.
|
|
853
|
+
"""
|
|
854
|
+
while True:
|
|
855
|
+
if evt.wait(2.0):
|
|
856
|
+
return
|
|
857
|
+
|
|
858
|
+
if not psutil.pid_exists(parent_pid):
|
|
859
|
+
logger.warning(f"Parent {parent_pid} is gone, self-destructing.")
|
|
860
|
+
evt.set()
|
|
861
|
+
atexit._run_exitfuncs()
|
|
862
|
+
kill_proc_tree(
|
|
863
|
+
pid, sig=signal.SIGKILL, allow_kill_self=True
|
|
864
|
+
) # pragma: no cover
|
|
865
|
+
return # pragma: no cover
|
|
866
|
+
|
|
867
|
+
tparent = threading.Thread(target=self_destruct_if_parent_disappers, daemon=True)
|
|
868
|
+
tparent.start()
|
|
869
|
+
|
|
870
|
+
def deactivate_parentless_self_destruct():
|
|
871
|
+
evt.set()
|
|
872
|
+
|
|
873
|
+
proc = psutil.Process()
|
|
874
|
+
pid = proc.pid
|
|
875
|
+
|
|
876
|
+
def conn_send_safe(obj):
|
|
877
|
+
try:
|
|
878
|
+
conn.send(obj)
|
|
879
|
+
except BrokenPipeError: # pragma: no cover
|
|
880
|
+
logger.debug("Pipe not usable")
|
|
881
|
+
except BaseException: # pragma: no cover
|
|
882
|
+
logger.exception("Unexpected pipe error")
|
|
883
|
+
|
|
884
|
+
def timed_out():
|
|
885
|
+
"""Action to fire when the timeout given to ``threading.Timer``
|
|
886
|
+
is reached. It kills this process with SIGKILL."""
|
|
887
|
+
# First things first. Set a self-destruct timer for ourselves.
|
|
888
|
+
# If we don't finish up in time, boom.
|
|
889
|
+
deactivate_parentless_self_destruct()
|
|
890
|
+
conn_send_safe(TimeoutError(f"Process {pid} timed out, self-destructing."))
|
|
891
|
+
# kill_proc_tree_in_process_daemon(pid, signal.SIGKILL)
|
|
892
|
+
atexit._run_exitfuncs()
|
|
893
|
+
kill_proc_tree(
|
|
894
|
+
pid, sig=signal.SIGKILL, allow_kill_self=True
|
|
895
|
+
) # pragma: no cover
|
|
896
|
+
|
|
897
|
+
if initializer:
|
|
898
|
+
initargs = initargs or ()
|
|
899
|
+
try:
|
|
900
|
+
initializer(*initargs)
|
|
901
|
+
except Exception:
|
|
902
|
+
logger.exception("Initializer failed")
|
|
903
|
+
|
|
904
|
+
while True:
|
|
905
|
+
# Wait for some work.
|
|
906
|
+
try:
|
|
907
|
+
logger.debug("Waiting for work...")
|
|
908
|
+
job = conn_receiver.recv()
|
|
909
|
+
logger.debug("Got a job")
|
|
910
|
+
except EOFError:
|
|
911
|
+
logger.debug("Received EOF, exiting.")
|
|
912
|
+
break
|
|
913
|
+
except KeyboardInterrupt: # pragma: no cover
|
|
914
|
+
logger.debug("Received KeyboardInterrupt, exiting.")
|
|
915
|
+
break
|
|
916
|
+
except BaseException: # pragma: no cover
|
|
917
|
+
logger.exception("Received unexpected exception, exiting.")
|
|
918
|
+
break
|
|
919
|
+
|
|
920
|
+
if job is None:
|
|
921
|
+
logger.debug("Received None, exiting.")
|
|
922
|
+
break
|
|
923
|
+
|
|
924
|
+
# Real work, unpack.
|
|
925
|
+
fn, args, kwargs, timeout = job
|
|
926
|
+
|
|
927
|
+
if timeout:
|
|
928
|
+
t = threading.Timer(timeout, timed_out)
|
|
929
|
+
t.start()
|
|
930
|
+
deactivate_timer = lambda: t.cancel() # noqa: E731
|
|
931
|
+
else:
|
|
932
|
+
deactivate_timer = lambda: None # noqa: E731
|
|
933
|
+
|
|
934
|
+
try:
|
|
935
|
+
results = fn(*args, **kwargs)
|
|
936
|
+
except BaseException as e:
|
|
937
|
+
# Check whether the exception can be pickled. If not we're going
|
|
938
|
+
# to wrap it. Why do this? It turns out that mp.Connection.send
|
|
939
|
+
# will try to pickle the exception, and if it can't, it will
|
|
940
|
+
# lose its mind. I've gotten segfaults in Python with this.
|
|
941
|
+
try: # pragma: no cover
|
|
942
|
+
pickle.dumps(e)
|
|
943
|
+
except Exception as pickle_error:
|
|
944
|
+
msg = (
|
|
945
|
+
f"An exception occurred but pickling it failed. "
|
|
946
|
+
f"The original exception is presented here as a string with "
|
|
947
|
+
f"traceback.\n{e}\n{traceback.format_exception(e)}\n\n"
|
|
948
|
+
f"The reason for the pickling failure is the following:\n"
|
|
949
|
+
f"{traceback.format_exception(pickle_error)}"
|
|
950
|
+
)
|
|
951
|
+
e = ProcessError(msg)
|
|
952
|
+
|
|
953
|
+
# Because we can't retain the traceback (can't be pickled by default,
|
|
954
|
+
# an external library like "tblib" would be needed), we're going to
|
|
955
|
+
# render the traceback to a string and add that to the exception
|
|
956
|
+
# text. This approach also works for when deadpool can be distributed
|
|
957
|
+
# across multiple machines, since the traceback is a string.
|
|
958
|
+
traceback_str = "".join(
|
|
959
|
+
traceback.format_exception(type(e), e, e.__traceback__)
|
|
960
|
+
)
|
|
961
|
+
# Modify the exception's args to include the traceback
|
|
962
|
+
# This changes the string representation of the exception
|
|
963
|
+
e.args = (f"{e}\n{traceback_str}",) + e.args[1:]
|
|
964
|
+
conn_send_safe(e)
|
|
965
|
+
else:
|
|
966
|
+
conn_send_safe(results)
|
|
967
|
+
finally:
|
|
968
|
+
deactivate_timer()
|
|
969
|
+
|
|
970
|
+
if mem_clear_threshold_bytes is not None:
|
|
971
|
+
mem = proc.memory_info().rss
|
|
972
|
+
if mem > mem_clear_threshold_bytes:
|
|
973
|
+
trim_memory()
|
|
974
|
+
|
|
975
|
+
if finitializer:
|
|
976
|
+
finitargs = finitargs or ()
|
|
977
|
+
try:
|
|
978
|
+
finitializer(*finitargs)
|
|
979
|
+
except BaseException:
|
|
980
|
+
logger.exception("finitializer failed")
|
|
981
|
+
|
|
982
|
+
# We've reached the end of this function which means this
|
|
983
|
+
# process must exit. However, we started a couple threads
|
|
984
|
+
# in here and they don't magically exit. Additional
|
|
985
|
+
# synchronization controls are needed to tell the threads
|
|
986
|
+
# to exit, which we don't have. However, we do have a kill
|
|
987
|
+
# switch. Since this process worker will process no more
|
|
988
|
+
# work, and since we've already fun the finalizer, we may
|
|
989
|
+
# as well just nuke it. That will remove its memory space
|
|
990
|
+
# and all its threads too.
|
|
991
|
+
deactivate_parentless_self_destruct()
|
|
992
|
+
logger.debug(f"Deleting worker {pid=}")
|
|
993
|
+
atexit._run_exitfuncs()
|
|
994
|
+
kill_proc_tree(pid, sig=signal.SIGKILL, allow_kill_self=True) # pragma: no cover
|
|
995
|
+
|
|
996
|
+
|
|
997
|
+
def kill_proc_tree_in_process_daemon(pid, sig): # pragma: no cover
|
|
998
|
+
mp.Process(target=kill_proc_tree, args=(pid, sig), daemon=True).start()
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
def trim_memory() -> None:
|
|
1002
|
+
"""Tell malloc to give all the unused memory back to the OS."""
|
|
1003
|
+
if sys.platform == "linux":
|
|
1004
|
+
libc = ctypes.CDLL("libc.so.6")
|
|
1005
|
+
libc.malloc_trim(0)
|
|
1006
|
+
|
|
1007
|
+
|
|
1008
|
+
def initializer_environ_propagator(
|
|
1009
|
+
environ: dict,
|
|
1010
|
+
original_initializer: Optional[Callable] = None,
|
|
1011
|
+
initargs=(),
|
|
1012
|
+
):
|
|
1013
|
+
"""Wrap the original initializer with one that sets the
|
|
1014
|
+
environment variables in the given dict."""
|
|
1015
|
+
|
|
1016
|
+
# Quite important that we run this first, so that the
|
|
1017
|
+
# environment variables are set before the original
|
|
1018
|
+
# initializer runs. This allows the original initializer
|
|
1019
|
+
# to use the environment variables.
|
|
1020
|
+
os.environ.update(environ or {})
|
|
1021
|
+
if original_initializer:
|
|
1022
|
+
original_initializer(*(initargs or ()))
|