deadpool-executor 2026.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deadpool.py ADDED
@@ -0,0 +1,1022 @@
1
+ """
2
+ Deadpool
3
+ ========
4
+
5
+ Important design considerations:
6
+
7
+ Backpressure
8
+ ------------
9
+
10
+ To allow backpressure when submitting work to the pool, we make
11
+ the ``submit`` method block when the number of pending tasks is
12
+ greater than the ``max_workers`` parameter. This has consequences,
13
+ basically it means the main thread is blocked and nothing else
14
+ can happen until it unblocks by getting space in the queue.
15
+
16
+ Deadpool itself needs to do actions around job management, so
17
+ this is why we have a separate "supervisor" thread for each
18
+ worker process.
19
+
20
+ """
21
+
22
+ import concurrent.futures
23
+ import ctypes
24
+ import logging
25
+ import multiprocessing as mp
26
+ import os
27
+ import pickle
28
+ import signal
29
+ import sys
30
+ import threading
31
+ import traceback
32
+ import typing
33
+ import weakref
34
+ import atexit
35
+ import json
36
+ from concurrent.futures import CancelledError, Executor, InvalidStateError, as_completed
37
+ from dataclasses import dataclass, field
38
+ from multiprocessing.connection import Connection
39
+ from queue import Empty, PriorityQueue, Queue, SimpleQueue
40
+ from typing import Callable, Optional, Tuple
41
+ from collections.abc import Mapping
42
+ from functools import partial
43
+
44
+ import psutil
45
+ from setproctitle import setproctitle
46
+
47
+ __version__ = "2026.6.1"
48
+ __all__ = [
49
+ "Deadpool",
50
+ "Future",
51
+ "CancelledError",
52
+ "TimeoutError",
53
+ "ProcessError",
54
+ "PoolClosed",
55
+ "as_completed",
56
+ ]
57
+ logger = logging.getLogger("deadpool")
58
+
59
+
60
+ # Does not work. Hangs the process on exit.
61
+ # There currently isn't an official way to clean up the
62
+ # resource tracker process. It is an open issue on the
63
+ # Python issue tracker.
64
+ # @atexit.register
65
+ # def stop_resource_tracker():
66
+ # from multiprocessing import resource_tracker
67
+ # tracker = resource_tracker._resource_tracker
68
+ # try:
69
+ # import time
70
+ # time.sleep(5)
71
+ # tracker._stop()
72
+ # except Exception:
73
+ # logger.info("Error stopping the multiprocessing resource tracker")
74
+
75
+
76
+ @dataclass
77
+ class Stat:
78
+ lock: threading.Lock
79
+ value: int = 0
80
+
81
+ def increment(self, value: int = 1):
82
+ with self.lock:
83
+ self.value += value
84
+
85
+ def set(self, value: int = 0):
86
+ self.value = value
87
+
88
+
89
+ class Statistics:
90
+ def __init__(self):
91
+ self._lock = threading.Lock()
92
+
93
+ self.tasks_received = Stat(self._lock, 0)
94
+ self.tasks_launched = Stat(self._lock, 0)
95
+ self.tasks_failed = Stat(self._lock, 0)
96
+ self.worker_processes_created = Stat(self._lock, 0)
97
+ self.max_workers_busy_concurrently = Stat(self._lock, 0)
98
+
99
+ def reset_counters(self):
100
+ self.tasks_received.set()
101
+ self.tasks_launched.set()
102
+ self.tasks_failed.set()
103
+ self.worker_processes_created.set()
104
+ self.max_workers_busy_concurrently.set()
105
+
106
+ def to_dict(self) -> dict[str, typing.Any]:
107
+ return {
108
+ "tasks_received": self.tasks_received.value,
109
+ "tasks_launched": self.tasks_launched.value,
110
+ "tasks_failed": self.tasks_failed.value,
111
+ "worker_processes_created": self.worker_processes_created.value,
112
+ "max_workers_busy_concurrently": self.max_workers_busy_concurrently.value,
113
+ }
114
+
115
+
116
+ @dataclass(order=True)
117
+ class PrioritizedItem:
118
+ priority: int
119
+ item: typing.Any = field(compare=False)
120
+
121
+
122
+ @dataclass(init=False)
123
+ class WorkerProcess:
124
+ process: mp.Process
125
+ connection_receive_msgs_from_process: Connection
126
+ connection_send_msgs_to_process: Connection
127
+ # Stats
128
+ tasks_ran_counter: int
129
+ # Controls
130
+ # If the subprocess RSS memory is above this threshold,
131
+ # ask the system allocator to release unused memory back
132
+ # to the OS.
133
+ malloc_trim_rss_memory_threshold_bytes: Optional[int] = None
134
+ ok: bool = True
135
+
136
+ def __init__(
137
+ self,
138
+ initializer=None,
139
+ initargs=(),
140
+ finalizer=None,
141
+ finargs=(),
142
+ daemon=True,
143
+ mp_context="forkserver",
144
+ malloc_trim_rss_memory_threshold_bytes=None,
145
+ ):
146
+ # For the process to send info OUT OF the process
147
+ conn_receiver, conn_sender = mp.Pipe(duplex=False)
148
+ # For sending work INTO the process
149
+ conn_receiver2, conn_sender2 = mp.Pipe(duplex=False)
150
+ p = mp_context.Process(
151
+ daemon=daemon,
152
+ target=raw_runner2,
153
+ args=(
154
+ conn_sender,
155
+ conn_receiver2,
156
+ os.getpid(),
157
+ initializer,
158
+ initargs,
159
+ finalizer,
160
+ finargs,
161
+ malloc_trim_rss_memory_threshold_bytes,
162
+ ),
163
+ )
164
+
165
+ p.start()
166
+ self.process = p
167
+ self.connection_receive_msgs_from_process = conn_receiver
168
+ self.connection_send_msgs_to_process = conn_sender2
169
+ self.tasks_ran_counter = 0
170
+ self.ok = True
171
+
172
+ def __hash__(self):
173
+ return hash(self.process.pid)
174
+
175
+ @property
176
+ def pid(self):
177
+ return self.process.pid
178
+
179
+ def get_rss_bytes(self) -> int:
180
+ return psutil.Process(pid=self.pid).memory_info().rss
181
+
182
+ def submit_job(self, job):
183
+ self.tasks_ran_counter += 1
184
+ self.connection_send_msgs_to_process.send(job)
185
+
186
+ def shutdown(self, wait=True):
187
+ if not self.process.is_alive():
188
+ return
189
+
190
+ self.connection_receive_msgs_from_process.close()
191
+
192
+ if self.connection_send_msgs_to_process.writable: # pragma: no branch
193
+ try:
194
+ self.connection_send_msgs_to_process.send(None)
195
+ except BrokenPipeError: # pragma: no cover
196
+ pass
197
+ else:
198
+ self.connection_send_msgs_to_process.close()
199
+
200
+ if wait:
201
+ self.process.join()
202
+
203
+ def is_alive(self):
204
+ return self.process.is_alive()
205
+
206
+ def format_death_message(self, join_timeout: float = 0.1) -> str:
207
+ # When a worker dies from a signal, the parent sees EOF on the pipe
208
+ # before `exitcode` becomes non-None — so the obvious `exitcode` check
209
+ # right after EOFError reports None, and we lose the signal name.
210
+ # See issue #331.
211
+ #
212
+ # Ordering on Linux: `do_exit()` runs `__exit_files()` (closes the
213
+ # child's fds -> EOF on our pipe) *before* `exit_notify()` sets
214
+ # TASK_ZOMBIE and sends SIGCHLD. Only after the latter can
215
+ # `waitpid(pid, WNOHANG)` report a status, which is what populates
216
+ # `Process.exitcode` via `Popen.poll()`. The gap is typically
217
+ # microseconds but is unbounded under pathological load.
218
+ #
219
+ # `Process.join(timeout=X)` bridges the gap cleanly: it waits on the
220
+ # sentinel fd (closed in the same `__exit_files()` call, so already
221
+ # readable by the time we get here) and then calls blocking
222
+ # `waitpid(pid)`, returning as soon as the child reaches TASK_ZOMBIE.
223
+ # The timeout is a ceiling for pathological cases — and for the edge
224
+ # case where a worker closed its data pipe voluntarily without
225
+ # exiting, in which case we'd otherwise block until the timeout.
226
+ #
227
+ # Refs:
228
+ # - cpython Lib/multiprocessing/popen_fork.py (Popen.wait/poll)
229
+ # - cpython Lib/multiprocessing/process.py (Process.exitcode/join)
230
+ # - Linux do_exit() ordering: __exit_files() precedes exit_notify()
231
+ # - https://docs.python.org/3/library/multiprocessing.html
232
+ # (Process.exitcode, Process.join semantics)
233
+ proc = self.process
234
+ if proc.exitcode is None:
235
+ proc.join(timeout=join_timeout)
236
+
237
+ exitcode = proc.exitcode
238
+ if exitcode is None:
239
+ return "Worker process died unexpectedly"
240
+
241
+ try:
242
+ signame = signal.strsignal(-exitcode)
243
+ except (ValueError, TypeError):
244
+ signame = "Unknown"
245
+
246
+ return (
247
+ f"Subprocess {self.pid} completed unexpectedly with "
248
+ f"exitcode {exitcode} ({signame})"
249
+ )
250
+
251
+ def results_are_available(self, block_for: float = 0.2):
252
+ return self.connection_receive_msgs_from_process.poll(timeout=block_for)
253
+
254
+ def get_results(self):
255
+ return self.connection_receive_msgs_from_process.recv()
256
+
257
+
258
+ class Future(concurrent.futures.Future):
259
+ def __init__(self, *args, **kwargs) -> None:
260
+ super().__init__(*args, **kwargs)
261
+ self._pid: Optional[int] = None
262
+ self.pid_callback = None
263
+
264
+ @property
265
+ def pid(self):
266
+ return self._pid
267
+
268
+ @pid.setter
269
+ def pid(self, value):
270
+ self._pid = value
271
+ if self.pid_callback:
272
+ try:
273
+ self.pid_callback(self)
274
+ except Exception: # pragma: no cover
275
+ logger.exception("Error calling pid_callback")
276
+
277
+ def add_pid_callback(self, fn):
278
+ self.pid_callback = fn
279
+
280
+ def cancel_and_kill_if_running(self, sig=signal.SIGKILL):
281
+ self.cancel()
282
+ if self.pid:
283
+ try:
284
+ kill_proc_tree(self.pid, sig=sig)
285
+ except Exception as e: # pragma: no cover
286
+ logger.warning(f"Got error killing pid {self.pid}: {e}")
287
+
288
+
289
+ class TimeoutError(concurrent.futures.TimeoutError): ...
290
+
291
+
292
+ class ProcessError(mp.ProcessError): ...
293
+
294
+
295
+ class PoolClosed(Exception): ...
296
+
297
+
298
+ class Deadpool(Executor):
299
+ def __init__(
300
+ self,
301
+ max_workers: Optional[int] = None,
302
+ min_workers: Optional[int] = None,
303
+ max_tasks_per_child: Optional[int] = None,
304
+ max_worker_memory_bytes: Optional[int] = None,
305
+ mp_context=None,
306
+ initializer=None,
307
+ initargs=(),
308
+ finalizer=None,
309
+ finalargs=(),
310
+ max_backlog=1000,
311
+ shutdown_wait: Optional[bool] = None,
312
+ shutdown_cancel_futures: Optional[bool] = None,
313
+ daemon=True,
314
+ malloc_trim_rss_memory_threshold_bytes: Optional[int] = None,
315
+ propagate_environ: Optional[Mapping] = None,
316
+ ) -> None:
317
+ """The pool.
318
+
319
+ :param propagate_environ: A mapping of environment variables to
320
+ propagate to the worker processes. This is useful for
321
+ setting up the environment in the worker processes. Subprocesses
322
+ will inherit the environment of the parent process, but crucially,
323
+ they will not inherit any changes made to the environment after
324
+ the subprocess is created (via `os.environ`). This parameter
325
+ allows you to specify a mapping of environment variables to
326
+ propagate to the worker processes. The worker processes will
327
+ receive these environment variables at the time they are created.
328
+ There are two important points: firstly, these env vars will
329
+ be set before the initializer is run, so the initializer can
330
+ use them. Secondly, these are applied only when the worker
331
+ process is created, which means that you can dynamically change the
332
+ values of the dict supplied here, and they will be used in
333
+ new worker processes as they are created. (The new parameters
334
+ will not be seen by existing worker processes.)
335
+
336
+ """
337
+ super().__init__()
338
+
339
+ if not mp_context:
340
+ mp_context = "forkserver"
341
+
342
+ if isinstance(mp_context, str):
343
+ mp_context = mp.get_context(mp_context)
344
+
345
+ # This is stored (instead of immediately currying the `initializer`)
346
+ # for a very important reason, which you can read about in the
347
+ # `add_worker_to_pool` method.
348
+ self.propagate_environ = propagate_environ
349
+ self.ctx = mp_context
350
+ self.initializer = initializer
351
+ self.initargs = initargs
352
+ self.finitializer = finalizer
353
+ self.finitargs = finalargs
354
+ self.pool_size = max_workers or len(os.sched_getaffinity(0))
355
+ if min_workers is None:
356
+ self.min_workers = self.pool_size
357
+ else:
358
+ self.min_workers = min_workers
359
+
360
+ self.max_tasks_per_child = max_tasks_per_child
361
+ self.max_worker_memory_bytes = max_worker_memory_bytes
362
+ self.submitted_jobs: PriorityQueue[PrioritizedItem] = PriorityQueue(
363
+ maxsize=max_backlog
364
+ )
365
+ self.running_jobs = Queue(maxsize=self.pool_size)
366
+ self.running_futs = weakref.WeakSet()
367
+ self.existing_workers = weakref.WeakSet()
368
+ # Lock protecting busy_workers, existing_workers, and
369
+ # running_futs for thread-safety without the GIL.
370
+ self._workers_lock = threading.Lock()
371
+ self.closed = False
372
+ self.shutdown_wait = shutdown_wait
373
+ self.shutdown_cancel_futures = shutdown_cancel_futures
374
+ self.daemon = daemon
375
+ self.malloc_trim_rss_memory_threshold_bytes = (
376
+ malloc_trim_rss_memory_threshold_bytes
377
+ )
378
+ self._statistics = Statistics()
379
+
380
+ # TODO: overcommit
381
+ self.workers: SimpleQueue[WorkerProcess] = SimpleQueue()
382
+ for _ in range(self.pool_size):
383
+ self.add_worker_to_pool()
384
+ # When a worker is running a job, it will be removed from
385
+ # the workers queue, and added to the busy_workers set.
386
+ # When a worker successfully completes a job, it will be
387
+ # added back to the workers queue, and removed from the
388
+ # busy_workers set.
389
+ self.busy_workers = set() # weakref.WeakSet()
390
+
391
+ # THE ONLY ACTIVE, PERSISTENT STATE IN DEADPOOL IS THIS THREAD
392
+ # BELOW. PROTECT IT AT ALL COSTS.
393
+ self.runner_thread = threading.Thread(
394
+ target=self.runner, name="deadpool.runner", daemon=True
395
+ )
396
+ self.runner_thread.start()
397
+
398
+ def get_statistics(self) -> dict[str, typing.Any]:
399
+ stats = self._statistics.to_dict()
400
+
401
+ # These are not counters; they are determined at the time of the
402
+ # call based on the state of the worker processes.
403
+ with self._workers_lock:
404
+ stats["worker_processes_still_alive"] = len(self.existing_workers)
405
+ stats["worker_processes_busy"] = len(self.busy_workers)
406
+ stats["worker_processes_idle"] = self.workers.qsize()
407
+
408
+ return stats
409
+
410
+ def add_worker_to_pool(self):
411
+ if self.propagate_environ:
412
+ # By constructing here, late, we allow the user to make
413
+ # changes dynamically to the configured env vars and these
414
+ # will be reflected in the worker processes as they are
415
+ # added to the pool. This has a large number of interesting
416
+ # applications, such as dynamically changing the logging
417
+ # level of the worker processes, or changing the location
418
+ # of a file that the worker processes need to read, or
419
+ # changing timeouts and so on. All the user needs to do
420
+ # is update the value on the Deadpool instance itself.
421
+ initializer = partial(
422
+ initializer_environ_propagator,
423
+ dict(self.propagate_environ),
424
+ original_initializer=self.initializer,
425
+ )
426
+ else:
427
+ initializer = self.initializer
428
+
429
+ worker = WorkerProcess(
430
+ initializer=initializer,
431
+ initargs=self.initargs,
432
+ finalizer=self.finitializer,
433
+ finargs=self.finitargs,
434
+ mp_context=self.ctx,
435
+ daemon=self.daemon,
436
+ malloc_trim_rss_memory_threshold_bytes=self.malloc_trim_rss_memory_threshold_bytes,
437
+ )
438
+ self.workers.put(worker)
439
+ self._statistics.worker_processes_created.increment()
440
+ with self._workers_lock:
441
+ self.existing_workers.add(worker)
442
+
443
+ def clear_workers(self):
444
+ """Clear all workers from the pool.
445
+
446
+ Typically they will all get added back according to the
447
+ rules for `max_workers` and so on. One neat reason to do
448
+ this is to have new settings take effect, such as a new
449
+ environment variable that needs to be set in the workers.
450
+ """
451
+ while not self.workers.empty():
452
+ worker = self.workers.get()
453
+ worker.shutdown(wait=False)
454
+
455
+ def runner(self):
456
+ while True:
457
+ # This will block if the queue of running jobs is full.
458
+ self.running_jobs.put(None)
459
+
460
+ priority_job = self.submitted_jobs.get()
461
+ job = priority_job.item
462
+ if job is None:
463
+ # This is for the `None` that terminates the while loop.
464
+ self.submitted_jobs.task_done()
465
+ self.running_jobs.get()
466
+ # TODO: this probably isn't necessary, since cleanup is happening
467
+ # in the shutdown method anyway.
468
+ cancel_all_futures_on_queue(self.submitted_jobs)
469
+ logger.debug("Got shutdown event, leaving runner.")
470
+ return
471
+
472
+ *_, fut = job
473
+ if fut.done():
474
+ # This shouldn't really be possible, but if the associated future
475
+ # for this job has somehow already been marked as done (e.g. if
476
+ # the caller decided to cancel it themselves) then just skip the
477
+ # whole job.
478
+ self.submitted_jobs.task_done()
479
+ self.running_jobs.get()
480
+ continue
481
+
482
+ t = threading.Thread(target=self.run_task, args=job, daemon=True)
483
+ self._statistics.tasks_launched.increment()
484
+ t.start()
485
+
486
+ def get_process(self) -> WorkerProcess:
487
+ with self._workers_lock:
488
+ bw = len(self.busy_workers)
489
+ mw = self.pool_size
490
+ qs = self.workers.qsize()
491
+
492
+ total_workers = bw + qs
493
+ if total_workers < mw and qs == 0:
494
+ self.add_worker_to_pool()
495
+
496
+ wp = self.workers.get()
497
+ with self._workers_lock:
498
+ self.busy_workers.add(wp)
499
+ busy_count = len(self.busy_workers)
500
+ with self._statistics.max_workers_busy_concurrently.lock:
501
+ if busy_count > self._statistics.max_workers_busy_concurrently.value:
502
+ self._statistics.max_workers_busy_concurrently.value = busy_count
503
+
504
+ return wp
505
+
506
+ def done_with_process(self, wp: WorkerProcess):
507
+ # This worker is done with its job and is no longer busy.
508
+ with self._workers_lock:
509
+ self.busy_workers.remove(wp)
510
+ count_workers_busy = len(self.busy_workers)
511
+ count_workers_idle = self.workers.qsize()
512
+ backlog_size = self.submitted_jobs.qsize()
513
+
514
+ # The `1` is for `wp` itself.
515
+ total_workers = count_workers_busy + count_workers_idle + 1
516
+ there_are_more_workers_than_min = total_workers > self.min_workers
517
+ task_backlog_is_empty = backlog_size == 0
518
+
519
+ # if there_are_more_workers_than_min and (there_are_idle_workers or task_backlog_is_empty):
520
+ if there_are_more_workers_than_min and task_backlog_is_empty:
521
+ # We have more workers than the minimum, and there is no backlog of
522
+ # tasks. This implies any tasks currently in play have already been picked
523
+ # up by workers in the pool, or the pool is idle. We can safely remove
524
+ # this worker from the pool.
525
+ wp.shutdown(wait=False)
526
+ return
527
+
528
+ if not wp.is_alive():
529
+ self.add_worker_to_pool()
530
+ return
531
+
532
+ if not wp.ok:
533
+ self.add_worker_to_pool()
534
+ return
535
+
536
+ if self.max_tasks_per_child is not None:
537
+ if wp.tasks_ran_counter >= self.max_tasks_per_child:
538
+ logger.debug(f"Worker {wp.pid} hit max tasks per child.")
539
+ wp.shutdown(wait=False)
540
+ self.add_worker_to_pool()
541
+ return
542
+
543
+ if self.max_worker_memory_bytes is not None:
544
+ mem = wp.get_rss_bytes()
545
+ logger.debug(f"Worker {wp.pid} has {mem} bytes of RSS memory.")
546
+ if mem >= self.max_worker_memory_bytes:
547
+ logger.debug(f"Worker {wp.pid} hit max memory threshold.")
548
+ wp.shutdown(wait=False)
549
+ self.add_worker_to_pool()
550
+ return
551
+
552
+ self.workers.put(wp)
553
+
554
+ def run_task(self, fn, args, kwargs, timeout, fut: Future):
555
+ try:
556
+ retry_count = 10
557
+ while retry_count > 0:
558
+ retry_count -= 1
559
+ worker: WorkerProcess = self.get_process()
560
+ try:
561
+ worker.submit_job((fn, args, kwargs, timeout))
562
+ break
563
+ except (pickle.PicklingError, AttributeError) as e:
564
+ # If the user passed in a function or params that can't
565
+ # be pickled, use the future to communicate the error.
566
+ # Note that in this scenario, there is nothing wrong
567
+ # with the worker process itself, so we don't need to
568
+ # shut it down.
569
+ fut.set_exception(e)
570
+ self.done_with_process(worker)
571
+ return
572
+ except BrokenPipeError:
573
+ # This likely comes from trying to send a job over a pipe
574
+ # that has been closed. This is a serious problem, and
575
+ # we should shut down the worker process and get rid of
576
+ # it. We're going to loop back around and try again with
577
+ # a new worker.
578
+ # TODO: it seems that this might be expected in situations
579
+ # where the worker process often OOMs. As such, not sure
580
+ # whether logging at warning level is appropriate.
581
+ logger.warning(f"BrokenPipeError on {worker.pid}, retrying.")
582
+ worker.ok = False
583
+ self.done_with_process(worker)
584
+ # TODO: probably this should be moved into the `done_with_process`
585
+ # and can act on the `worker.ok` flag.
586
+ kill_proc_tree(worker.pid, sig=signal.SIGKILL)
587
+ else: # pragma: no cover
588
+ # If we get here, we've tried to submit the job to a worker
589
+ # process multiple times and failed each time. We're giving
590
+ # up.
591
+ logger.error("Failed to submit job to worker")
592
+ fut.set_exception(ProcessError("Failed to submit job to worker"))
593
+ return
594
+
595
+ fut.pid = worker.pid
596
+ with self._workers_lock:
597
+ self.running_futs.add(fut)
598
+
599
+ while True:
600
+ if worker.results_are_available():
601
+ try:
602
+ results = worker.get_results()
603
+ except EOFError:
604
+ self._statistics.tasks_failed.increment()
605
+ if not fut.done():
606
+ try:
607
+ fut.set_exception(
608
+ ProcessError(worker.format_death_message())
609
+ )
610
+ except InvalidStateError:
611
+ pass
612
+ except BaseException as e:
613
+ self._statistics.tasks_failed.increment()
614
+ logger.debug(f"Unexpected exception from worker: {e}")
615
+ if not fut.done():
616
+ try:
617
+ fut.set_exception(e)
618
+ except InvalidStateError:
619
+ pass
620
+ else:
621
+ if isinstance(results, BaseException):
622
+ self._statistics.tasks_failed.increment()
623
+ if not fut.done():
624
+ try:
625
+ fut.set_exception(results)
626
+ except InvalidStateError:
627
+ pass
628
+ else:
629
+ if not fut.done():
630
+ try:
631
+ fut.set_result(results)
632
+ except InvalidStateError:
633
+ pass
634
+
635
+ if isinstance(results, TimeoutError):
636
+ self._statistics.tasks_failed.increment()
637
+ logger.debug(
638
+ f"TimeoutError on {worker.pid}, setting ok=False"
639
+ )
640
+ worker.ok = False
641
+ break
642
+ elif not worker.is_alive():
643
+ self._statistics.tasks_failed.increment()
644
+ logger.debug(f"p is no longer alive: {worker.process}")
645
+ if not fut.done():
646
+ # It is possible that fut has already had a result set on
647
+ # it. If that's the case we'll do nothing. Otherwise, put
648
+ # an exception reporting the unexpected situation.
649
+ try:
650
+ fut.set_exception(
651
+ ProcessError(worker.format_death_message())
652
+ )
653
+ except InvalidStateError: # pragma: no cover
654
+ # We still have to catch this even though there is a
655
+ # check for `fut.done()`, simply due to an possible
656
+ # race between the done check and the set_exception call.
657
+ pass
658
+
659
+ break
660
+ else:
661
+ pass # pragma: no cover
662
+
663
+ self.done_with_process(worker)
664
+ finally:
665
+ self.submitted_jobs.task_done()
666
+
667
+ if not fut.done(): # pragma: no cover
668
+ fut.set_exception(ProcessError("Somehow no result got set on fut."))
669
+
670
+ try:
671
+ self.running_jobs.get_nowait()
672
+ except Empty: # pragma: no cover
673
+ logger.warning("Weird error, did not expect running jobs to be empty")
674
+
675
+ def submit(
676
+ self,
677
+ fn: Callable,
678
+ /,
679
+ *args,
680
+ deadpool_timeout=None,
681
+ deadpool_priority=0,
682
+ **kwargs,
683
+ ) -> Future:
684
+ if deadpool_priority < 0: # pragma: no cover
685
+ raise ValueError(
686
+ f"Parameter deadpool_priority must be >= 0, but was {deadpool_priority}"
687
+ )
688
+
689
+ if self.closed:
690
+ raise PoolClosed("The pool is closed. No more tasks can be submitted.")
691
+
692
+ fut = Future()
693
+ self.submitted_jobs.put(
694
+ PrioritizedItem(
695
+ priority=deadpool_priority,
696
+ item=(fn, args, kwargs, deadpool_timeout, fut),
697
+ )
698
+ )
699
+ self._statistics.tasks_received.increment()
700
+ return fut
701
+
702
+ def shutdown(self, wait: bool = True, *, cancel_futures: bool = False) -> None:
703
+ if self.closed:
704
+ return
705
+
706
+ logger.debug(f"shutdown: {wait=} {cancel_futures=}")
707
+
708
+ # No more new tasks can be submitted
709
+ self.closed = True
710
+
711
+ if cancel_futures:
712
+ cancel_all_futures_on_queue(self.submitted_jobs)
713
+
714
+ if wait:
715
+ # The None sentinel will pop last
716
+ shutdown_priority = sys.maxsize
717
+ else:
718
+ # The None sentinel will pop first
719
+ shutdown_priority = -1
720
+
721
+ try:
722
+ self.submitted_jobs.put(
723
+ PrioritizedItem(priority=shutdown_priority, item=None),
724
+ timeout=2.0,
725
+ )
726
+ except TimeoutError: # pragma: no cover
727
+ logger.warning(
728
+ "Timed out putting None on the submit queue. This "
729
+ "should not be possible "
730
+ "and might be a bug in deadpool."
731
+ )
732
+
733
+ # Up till this point, all the pending work that has been
734
+ # submitted, but not yet started, has been cancelled. The
735
+ # runner loop has also been stopped (with the None sentinel).
736
+ # The only thing left to do is decide whether or not to
737
+ # actively kill processes that are still running. We presume
738
+ # that if the user is asking for cancellation and doesn't
739
+ # want to wait, that she probably wants us to also stop
740
+ # running processes.
741
+ if (not wait) and cancel_futures:
742
+ with self._workers_lock:
743
+ running_futs = list(self.running_futs)
744
+ for fut in running_futs:
745
+ fut.cancel_and_kill_if_running()
746
+
747
+ logger.debug("waiting for submitted_jobs to join...")
748
+ self.submitted_jobs.join()
749
+
750
+ super().shutdown(wait, cancel_futures=cancel_futures)
751
+
752
+ # We can now remove all other processes hanging around
753
+ # in the background.
754
+ while not self.workers.empty():
755
+ try:
756
+ worker = self.workers.get_nowait()
757
+ worker.shutdown()
758
+ except Empty: # pragma: no cover
759
+ break
760
+
761
+ # There may be a few processes left in the
762
+ # `busy_workers` queue. Shut them down too.
763
+ with self._workers_lock:
764
+ remaining = list(self.busy_workers)
765
+ self.busy_workers.clear()
766
+ for worker in remaining:
767
+ worker.shutdown()
768
+
769
+ def __enter__(self):
770
+ return self
771
+
772
+ def __exit__(self, exc_type, exc_val, exc_tb):
773
+ kwargs = {}
774
+ if self.shutdown_wait is not None:
775
+ kwargs["wait"] = self.shutdown_wait
776
+
777
+ if self.shutdown_cancel_futures is not None:
778
+ kwargs["cancel_futures"] = self.shutdown_cancel_futures
779
+
780
+ self.shutdown(**kwargs)
781
+ self.runner_thread.join()
782
+ return False
783
+
784
+
785
+ def cancel_all_futures_on_queue(q: Queue):
786
+ while True:
787
+ try:
788
+ priority_item = q.get_nowait()
789
+ q.task_done()
790
+ job = priority_item.item
791
+ *_, fut = job
792
+ fut.cancel()
793
+ except Empty:
794
+ break
795
+
796
+
797
+ # Taken from
798
+ # https://psutil.readthedocs.io/en/latest/index.html?highlight=children#kill-process-tree
799
+ def kill_proc_tree(
800
+ pid,
801
+ sig=signal.SIGTERM,
802
+ include_parent=True,
803
+ timeout=None,
804
+ on_terminate=None,
805
+ allow_kill_self=False,
806
+ ):
807
+ """Kill a process tree (including grandchildren) with signal
808
+ "sig" and return a (gone, still_alive) tuple.
809
+ "on_terminate", if specified, is a callback function which is
810
+ called as soon as a child terminates.
811
+ """
812
+ if not allow_kill_self and pid == os.getpid():
813
+ raise ValueError("Won't kill myself")
814
+
815
+ try:
816
+ parent = psutil.Process(pid)
817
+ except psutil.NoSuchProcess:
818
+ return
819
+
820
+ children = parent.children(recursive=True)
821
+ if include_parent:
822
+ children.append(parent)
823
+
824
+ for p in children:
825
+ try:
826
+ p.send_signal(sig)
827
+ except psutil.NoSuchProcess: # pragma: no cover
828
+ pass
829
+
830
+ gone, alive = psutil.wait_procs(children, timeout=timeout, callback=on_terminate)
831
+ return (gone, alive)
832
+
833
+
834
+ def raw_runner2(
835
+ conn: Connection,
836
+ conn_receiver: Connection,
837
+ parent_pid,
838
+ initializer,
839
+ initargs,
840
+ finitializer: Optional[Callable] = None,
841
+ finitargs: Optional[Tuple] = None,
842
+ mem_clear_threshold_bytes: Optional[int] = None,
843
+ kill_proc_tree=kill_proc_tree,
844
+ ):
845
+ setproctitle("deadpool.worker")
846
+ # This event is used to signal that the "parent"
847
+ # monitor thread should be deactivated.
848
+ evt = threading.Event()
849
+
850
+ def self_destruct_if_parent_disappers():
851
+ """Poll every 5 seconds to see whether the parent is still
852
+ alive.
853
+ """
854
+ while True:
855
+ if evt.wait(2.0):
856
+ return
857
+
858
+ if not psutil.pid_exists(parent_pid):
859
+ logger.warning(f"Parent {parent_pid} is gone, self-destructing.")
860
+ evt.set()
861
+ atexit._run_exitfuncs()
862
+ kill_proc_tree(
863
+ pid, sig=signal.SIGKILL, allow_kill_self=True
864
+ ) # pragma: no cover
865
+ return # pragma: no cover
866
+
867
+ tparent = threading.Thread(target=self_destruct_if_parent_disappers, daemon=True)
868
+ tparent.start()
869
+
870
+ def deactivate_parentless_self_destruct():
871
+ evt.set()
872
+
873
+ proc = psutil.Process()
874
+ pid = proc.pid
875
+
876
+ def conn_send_safe(obj):
877
+ try:
878
+ conn.send(obj)
879
+ except BrokenPipeError: # pragma: no cover
880
+ logger.debug("Pipe not usable")
881
+ except BaseException: # pragma: no cover
882
+ logger.exception("Unexpected pipe error")
883
+
884
+ def timed_out():
885
+ """Action to fire when the timeout given to ``threading.Timer``
886
+ is reached. It kills this process with SIGKILL."""
887
+ # First things first. Set a self-destruct timer for ourselves.
888
+ # If we don't finish up in time, boom.
889
+ deactivate_parentless_self_destruct()
890
+ conn_send_safe(TimeoutError(f"Process {pid} timed out, self-destructing."))
891
+ # kill_proc_tree_in_process_daemon(pid, signal.SIGKILL)
892
+ atexit._run_exitfuncs()
893
+ kill_proc_tree(
894
+ pid, sig=signal.SIGKILL, allow_kill_self=True
895
+ ) # pragma: no cover
896
+
897
+ if initializer:
898
+ initargs = initargs or ()
899
+ try:
900
+ initializer(*initargs)
901
+ except Exception:
902
+ logger.exception("Initializer failed")
903
+
904
+ while True:
905
+ # Wait for some work.
906
+ try:
907
+ logger.debug("Waiting for work...")
908
+ job = conn_receiver.recv()
909
+ logger.debug("Got a job")
910
+ except EOFError:
911
+ logger.debug("Received EOF, exiting.")
912
+ break
913
+ except KeyboardInterrupt: # pragma: no cover
914
+ logger.debug("Received KeyboardInterrupt, exiting.")
915
+ break
916
+ except BaseException: # pragma: no cover
917
+ logger.exception("Received unexpected exception, exiting.")
918
+ break
919
+
920
+ if job is None:
921
+ logger.debug("Received None, exiting.")
922
+ break
923
+
924
+ # Real work, unpack.
925
+ fn, args, kwargs, timeout = job
926
+
927
+ if timeout:
928
+ t = threading.Timer(timeout, timed_out)
929
+ t.start()
930
+ deactivate_timer = lambda: t.cancel() # noqa: E731
931
+ else:
932
+ deactivate_timer = lambda: None # noqa: E731
933
+
934
+ try:
935
+ results = fn(*args, **kwargs)
936
+ except BaseException as e:
937
+ # Check whether the exception can be pickled. If not we're going
938
+ # to wrap it. Why do this? It turns out that mp.Connection.send
939
+ # will try to pickle the exception, and if it can't, it will
940
+ # lose its mind. I've gotten segfaults in Python with this.
941
+ try: # pragma: no cover
942
+ pickle.dumps(e)
943
+ except Exception as pickle_error:
944
+ msg = (
945
+ f"An exception occurred but pickling it failed. "
946
+ f"The original exception is presented here as a string with "
947
+ f"traceback.\n{e}\n{traceback.format_exception(e)}\n\n"
948
+ f"The reason for the pickling failure is the following:\n"
949
+ f"{traceback.format_exception(pickle_error)}"
950
+ )
951
+ e = ProcessError(msg)
952
+
953
+ # Because we can't retain the traceback (can't be pickled by default,
954
+ # an external library like "tblib" would be needed), we're going to
955
+ # render the traceback to a string and add that to the exception
956
+ # text. This approach also works for when deadpool can be distributed
957
+ # across multiple machines, since the traceback is a string.
958
+ traceback_str = "".join(
959
+ traceback.format_exception(type(e), e, e.__traceback__)
960
+ )
961
+ # Modify the exception's args to include the traceback
962
+ # This changes the string representation of the exception
963
+ e.args = (f"{e}\n{traceback_str}",) + e.args[1:]
964
+ conn_send_safe(e)
965
+ else:
966
+ conn_send_safe(results)
967
+ finally:
968
+ deactivate_timer()
969
+
970
+ if mem_clear_threshold_bytes is not None:
971
+ mem = proc.memory_info().rss
972
+ if mem > mem_clear_threshold_bytes:
973
+ trim_memory()
974
+
975
+ if finitializer:
976
+ finitargs = finitargs or ()
977
+ try:
978
+ finitializer(*finitargs)
979
+ except BaseException:
980
+ logger.exception("finitializer failed")
981
+
982
+ # We've reached the end of this function which means this
983
+ # process must exit. However, we started a couple threads
984
+ # in here and they don't magically exit. Additional
985
+ # synchronization controls are needed to tell the threads
986
+ # to exit, which we don't have. However, we do have a kill
987
+ # switch. Since this process worker will process no more
988
+ # work, and since we've already fun the finalizer, we may
989
+ # as well just nuke it. That will remove its memory space
990
+ # and all its threads too.
991
+ deactivate_parentless_self_destruct()
992
+ logger.debug(f"Deleting worker {pid=}")
993
+ atexit._run_exitfuncs()
994
+ kill_proc_tree(pid, sig=signal.SIGKILL, allow_kill_self=True) # pragma: no cover
995
+
996
+
997
+ def kill_proc_tree_in_process_daemon(pid, sig): # pragma: no cover
998
+ mp.Process(target=kill_proc_tree, args=(pid, sig), daemon=True).start()
999
+
1000
+
1001
+ def trim_memory() -> None:
1002
+ """Tell malloc to give all the unused memory back to the OS."""
1003
+ if sys.platform == "linux":
1004
+ libc = ctypes.CDLL("libc.so.6")
1005
+ libc.malloc_trim(0)
1006
+
1007
+
1008
+ def initializer_environ_propagator(
1009
+ environ: dict,
1010
+ original_initializer: Optional[Callable] = None,
1011
+ initargs=(),
1012
+ ):
1013
+ """Wrap the original initializer with one that sets the
1014
+ environment variables in the given dict."""
1015
+
1016
+ # Quite important that we run this first, so that the
1017
+ # environment variables are set before the original
1018
+ # initializer runs. This allows the original initializer
1019
+ # to use the environment variables.
1020
+ os.environ.update(environ or {})
1021
+ if original_initializer:
1022
+ original_initializer(*(initargs or ()))