parsl 2025.3.10__py3-none-any.whl → 2025.3.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -203,6 +203,8 @@ class MPITaskScheduler(TaskScheduler):
203
203
  def get_result(self, block: bool = True, timeout: Optional[float] = None):
204
204
  """Return result and relinquish provisioned nodes"""
205
205
  result_pkl = self.pending_result_q.get(block, timeout)
206
+ if result_pkl is None:
207
+ return None
206
208
  result_dict = pickle.loads(result_pkl)
207
209
  # TODO (wardlt): If the task did not request nodes, it won't be in `self._map_tasks_to_nodes`.
208
210
  # Causes Parsl to hang. See Issue #3427
@@ -15,6 +15,7 @@ import threading
15
15
  import time
16
16
  import uuid
17
17
  from importlib.metadata import distributions
18
+ from multiprocessing.context import SpawnProcess
18
19
  from multiprocessing.managers import DictProxy
19
20
  from multiprocessing.sharedctypes import Synchronized
20
21
  from typing import Dict, List, Optional, Sequence
@@ -403,52 +404,34 @@ class Manager:
403
404
  result_outgoing.connect(self._result_q_url)
404
405
  logger.info("Manager result pipe connected to interchange")
405
406
 
406
- push_poll_period = max(10, self.poll_period) / 1000 # push_poll_period must be atleast 10 ms
407
- logger.debug("push poll period: {}".format(push_poll_period))
408
-
409
- last_beat = time.time()
410
- last_result_beat = time.time()
411
- items = []
412
-
413
407
  while not self._stop_event.is_set():
408
+ logger.debug("Starting pending_result_queue get")
414
409
  try:
415
- logger.debug("Starting pending_result_queue get")
416
- r = self.task_scheduler.get_result(block=True, timeout=push_poll_period)
417
- logger.debug("Got a result item")
418
- items.append(r)
419
- except queue.Empty:
420
- logger.debug("pending_result_queue get timeout without result item")
421
- except Exception as e:
422
- logger.exception("Got an exception: {}".format(e))
423
-
424
- if time.time() > last_result_beat + self.heartbeat_period:
425
- heartbeat_message = f"last_result_beat={last_result_beat} heartbeat_period={self.heartbeat_period} seconds"
426
- logger.info(f"Sending heartbeat via results connection: {heartbeat_message}")
427
- last_result_beat = time.time()
428
- items.append(pickle.dumps({'type': 'heartbeat'}))
429
-
430
- if len(items) >= self.max_queue_size or time.time() > last_beat + push_poll_period:
431
- last_beat = time.time()
432
- if items:
433
- logger.debug(f"Result send: Pushing {len(items)} items")
434
- result_outgoing.send_multipart(items)
435
- logger.debug("Result send: Pushed")
436
- items = []
437
- else:
438
- logger.debug("Result send: No items to push")
439
- else:
440
- logger.debug(f"Result send: check condition not met - deferring {len(items)} result items")
410
+ r = self.task_scheduler.get_result()
411
+ if r is None:
412
+ continue
413
+ logger.debug("Result received from worker: %s", id(r))
414
+ result_outgoing.send(r)
415
+ logger.debug("Result sent to interchange: %s", id(r))
416
+ except Exception:
417
+ logger.exception("Failed to send result to interchange")
441
418
 
442
419
  result_outgoing.close()
443
- logger.info("Exiting")
420
+ logger.debug("Exiting")
444
421
 
445
422
  @wrap_with_logs
446
- def worker_watchdog(self):
423
+ def heartbeater(self):
424
+ while not self._stop_event.wait(self.heartbeat_period):
425
+ heartbeat_message = f"heartbeat_period={self.heartbeat_period} seconds"
426
+ logger.info(f"Sending heartbeat via results connection: {heartbeat_message}")
427
+ self.pending_result_queue.put(pickle.dumps({'type': 'heartbeat'}))
428
+
429
+ def worker_watchdog(self, procs: dict[int, SpawnProcess]):
447
430
  """Keeps workers alive."""
448
431
  logger.debug("Starting worker watchdog")
449
432
 
450
433
  while not self._stop_event.wait(self.heartbeat_period):
451
- for worker_id, p in self.procs.items():
434
+ for worker_id, p in procs.items():
452
435
  if not p.is_alive():
453
436
  logger.error("Worker {} has died".format(worker_id))
454
437
  try:
@@ -466,11 +449,10 @@ class Manager:
466
449
  except KeyError:
467
450
  logger.info("Worker {} was not busy when it died".format(worker_id))
468
451
 
469
- p = self._start_worker(worker_id)
470
- self.procs[worker_id] = p
452
+ procs[worker_id] = self._start_worker(worker_id)
471
453
  logger.info("Worker {} has been restarted".format(worker_id))
472
454
 
473
- logger.critical("Exiting")
455
+ logger.debug("Exiting")
474
456
 
475
457
  @wrap_with_logs
476
458
  def handle_monitoring_messages(self):
@@ -485,32 +467,28 @@ class Manager:
485
467
  """
486
468
  logger.debug("Starting monitoring handler thread")
487
469
 
488
- poll_period_s = max(10, self.poll_period) / 1000 # Must be at least 10 ms
489
-
490
470
  while not self._stop_event.is_set():
491
471
  try:
492
472
  logger.debug("Starting monitor_queue.get()")
493
- msg = self.monitoring_queue.get(block=True, timeout=poll_period_s)
494
- except queue.Empty:
495
- logger.debug("monitoring_queue.get() has timed out")
496
- except Exception as e:
497
- logger.exception(f"Got an exception: {e}")
498
- else:
473
+ msg = self.monitoring_queue.get(block=True)
474
+ if msg is None:
475
+ continue
499
476
  logger.debug("Got a monitoring message")
500
477
  self.pending_result_queue.put(msg)
501
478
  logger.debug("Put monitoring message on pending_result_queue")
479
+ except Exception:
480
+ logger.exception("Failed to forward monitoring message")
502
481
 
503
- logger.critical("Exiting")
482
+ logger.debug("Exiting")
504
483
 
505
484
  def start(self):
506
485
  """ Start the worker processes.
507
486
 
508
487
  TODO: Move task receiving to a thread
509
488
  """
510
- self.procs = {}
489
+ procs: dict[int, SpawnProcess] = {}
511
490
  for worker_id in range(self.worker_count):
512
- p = self._start_worker(worker_id)
513
- self.procs[worker_id] = p
491
+ procs[worker_id] = self._start_worker(worker_id)
514
492
 
515
493
  logger.debug("Workers started")
516
494
 
@@ -519,40 +497,69 @@ class Manager:
519
497
  target=self.push_results, name="Result-Pusher"
520
498
  )
521
499
  thr_worker_watchdog = threading.Thread(
522
- target=self.worker_watchdog, name="worker-watchdog"
500
+ target=self.worker_watchdog, args=(procs,), name="worker-watchdog"
523
501
  )
524
502
  thr_monitoring_handler = threading.Thread(
525
503
  target=self.handle_monitoring_messages, name="Monitoring-Handler"
526
504
  )
505
+ thr_heartbeater = threading.Thread(target=self.heartbeater, name="Heartbeater")
527
506
 
528
507
  thr_task_puller.start()
529
508
  thr_result_pusher.start()
530
509
  thr_worker_watchdog.start()
531
510
  thr_monitoring_handler.start()
511
+ thr_heartbeater.start()
532
512
 
533
513
  logger.info("Manager threads started")
534
514
 
535
515
  # This might need a multiprocessing event to signal back.
536
516
  self._stop_event.wait()
537
- logger.critical("Received kill event, terminating worker processes")
517
+ logger.info("Stop event set; terminating worker processes")
518
+
519
+ # Invite blocking threads to quit
520
+ self.monitoring_queue.put(None)
521
+ self.pending_result_queue.put(None)
538
522
 
523
+ thr_heartbeater.join()
539
524
  thr_task_puller.join()
540
525
  thr_result_pusher.join()
541
526
  thr_worker_watchdog.join()
542
527
  thr_monitoring_handler.join()
543
- for proc_id in self.procs:
544
- self.procs[proc_id].terminate()
545
- logger.critical("Terminating worker {}: is_alive()={}".format(self.procs[proc_id],
546
- self.procs[proc_id].is_alive()))
547
- self.procs[proc_id].join()
548
- logger.debug("Worker {} joined successfully".format(self.procs[proc_id]))
528
+
529
+ for worker_id in procs:
530
+ p = procs[worker_id]
531
+ proc_info = f"(PID: {p.pid}, Worker ID: {worker_id})"
532
+ logger.debug(f"Signaling worker {p.name} (TERM). {proc_info}")
533
+ p.terminate()
549
534
 
550
535
  self.zmq_context.term()
536
+
537
+ # give processes 1 second to gracefully shut themselves down, based on the
538
+ # SIGTERM (.terminate()) just sent; after then, we pull the plug.
539
+ force_child_shutdown_at = time.monotonic() + 1
540
+ while procs:
541
+ worker_id, p = procs.popitem()
542
+ timeout = max(force_child_shutdown_at - time.monotonic(), 0.000001)
543
+ p.join(timeout=timeout)
544
+ proc_info = f"(PID: {p.pid}, Worker ID: {worker_id})"
545
+ if p.exitcode is not None:
546
+ logger.debug(
547
+ "Worker joined successfully. %s (exitcode: %s)", proc_info, p.exitcode
548
+ )
549
+
550
+ else:
551
+ logger.warning(
552
+ f"Worker {p.name} ({worker_id}) failed to terminate in a timely"
553
+ f" manner; sending KILL signal to process. {proc_info}"
554
+ )
555
+ p.kill()
556
+ p.join()
557
+ p.close()
558
+
551
559
  delta = time.time() - self._start_time
552
560
  logger.info("process_worker_pool ran for {} seconds".format(delta))
553
- return
554
561
 
555
- def _start_worker(self, worker_id: int):
562
+ def _start_worker(self, worker_id: int) -> SpawnProcess:
556
563
  p = SpawnContext.Process(
557
564
  target=worker,
558
565
  args=(
@@ -939,27 +946,27 @@ if __name__ == "__main__":
939
946
  )
940
947
  logger.info(
941
948
  f"\n Python version: {sys.version}"
942
- f" Debug logging: {args.debug}"
943
- f" Certificates dir: {args.cert_dir}"
944
- f" Log dir: {args.logdir}"
945
- f" Manager ID: {args.uid}"
946
- f" Block ID: {args.block_id}"
947
- f" cores_per_worker: {args.cores_per_worker}"
948
- f" mem_per_worker: {args.mem_per_worker}"
949
- f" task_port: {args.task_port}"
950
- f" result_port: {args.result_port}"
951
- f" addresses: {args.addresses}"
952
- f" max_workers_per_node: {args.max_workers_per_node}"
953
- f" poll_period: {args.poll}"
954
- f" address_probe_timeout: {args.address_probe_timeout}"
955
- f" Prefetch capacity: {args.prefetch_capacity}"
956
- f" Heartbeat threshold: {args.hb_threshold}"
957
- f" Heartbeat period: {args.hb_period}"
958
- f" Drain period: {args.drain_period}"
959
- f" CPU affinity: {args.cpu_affinity}"
960
- f" Accelerators: {' '.join(args.available_accelerators)}"
961
- f" enable_mpi_mode: {args.enable_mpi_mode}"
962
- f" mpi_launcher: {args.mpi_launcher}"
949
+ f"\n Debug logging: {args.debug}"
950
+ f"\n Certificates dir: {args.cert_dir}"
951
+ f"\n Log dir: {args.logdir}"
952
+ f"\n Manager ID: {args.uid}"
953
+ f"\n Block ID: {args.block_id}"
954
+ f"\n cores_per_worker: {args.cores_per_worker}"
955
+ f"\n mem_per_worker: {args.mem_per_worker}"
956
+ f"\n task_port: {args.task_port}"
957
+ f"\n result_port: {args.result_port}"
958
+ f"\n addresses: {args.addresses}"
959
+ f"\n max_workers_per_node: {args.max_workers_per_node}"
960
+ f"\n poll_period: {args.poll}"
961
+ f"\n address_probe_timeout: {args.address_probe_timeout}"
962
+ f"\n Prefetch capacity: {args.prefetch_capacity}"
963
+ f"\n Heartbeat threshold: {args.hb_threshold}"
964
+ f"\n Heartbeat period: {args.hb_period}"
965
+ f"\n Drain period: {args.drain_period}"
966
+ f"\n CPU affinity: {args.cpu_affinity}"
967
+ f"\n Accelerators: {' '.join(args.available_accelerators)}"
968
+ f"\n enable_mpi_mode: {args.enable_mpi_mode}"
969
+ f"\n mpi_launcher: {args.mpi_launcher}"
963
970
  )
964
971
  try:
965
972
  manager = Manager(task_port=args.task_port,
@@ -1,6 +1,7 @@
1
1
  import datetime
2
2
  import logging
3
3
  import multiprocessing.queues as mpq
4
+ import multiprocessing.synchronize as mpe
4
5
  import os
5
6
  import queue
6
7
  import threading
@@ -278,11 +279,13 @@ class Database:
278
279
 
279
280
  class DatabaseManager:
280
281
  def __init__(self,
282
+ *,
281
283
  db_url: str = 'sqlite:///runinfo/monitoring.db',
282
284
  run_dir: str = '.',
283
285
  logging_level: int = logging.INFO,
284
286
  batching_interval: float = 1,
285
287
  batching_threshold: float = 99999,
288
+ exit_event: mpe.Event
286
289
  ):
287
290
 
288
291
  self.workflow_end = False
@@ -307,6 +310,8 @@ class DatabaseManager:
307
310
  self.pending_block_queue: queue.Queue[MonitoringMessage] = queue.Queue()
308
311
  self.pending_resource_queue: queue.Queue[MonitoringMessage] = queue.Queue()
309
312
 
313
+ self.external_exit_event = exit_event
314
+
310
315
  def start(self,
311
316
  resource_queue: mpq.Queue) -> None:
312
317
 
@@ -555,15 +560,16 @@ class DatabaseManager:
555
560
  while not kill_event.is_set() or logs_queue.qsize() != 0:
556
561
  logger.debug("Checking STOP conditions: kill event: %s, queue has entries: %s",
557
562
  kill_event.is_set(), logs_queue.qsize() != 0)
563
+
564
+ if self.external_exit_event.is_set():
565
+ self.close()
566
+
558
567
  try:
559
568
  x = logs_queue.get(timeout=0.1)
560
569
  except queue.Empty:
561
570
  continue
562
571
  else:
563
- if x == 'STOP':
564
- self.close()
565
- else:
566
- self._dispatch_to_internal(x)
572
+ self._dispatch_to_internal(x)
567
573
 
568
574
  def _dispatch_to_internal(self, x: Tuple) -> None:
569
575
  assert isinstance(x, tuple)
@@ -678,11 +684,11 @@ class DatabaseManager:
678
684
 
679
685
  @wrap_with_logs(target="database_manager")
680
686
  @typeguard.typechecked
681
- def dbm_starter(exception_q: mpq.Queue,
682
- resource_msgs: mpq.Queue,
687
+ def dbm_starter(resource_msgs: mpq.Queue,
683
688
  db_url: str,
684
689
  run_dir: str,
685
- logging_level: int) -> None:
690
+ logging_level: int,
691
+ exit_event: mpe.Event) -> None:
686
692
  """Start the database manager process
687
693
 
688
694
  The DFK should start this function. The args, kwargs match that of the monitoring config
@@ -693,16 +699,16 @@ def dbm_starter(exception_q: mpq.Queue,
693
699
  try:
694
700
  dbm = DatabaseManager(db_url=db_url,
695
701
  run_dir=run_dir,
696
- logging_level=logging_level)
702
+ logging_level=logging_level,
703
+ exit_event=exit_event)
697
704
  logger.info("Starting dbm in dbm starter")
698
705
  dbm.start(resource_msgs)
699
706
  except KeyboardInterrupt:
700
707
  logger.exception("KeyboardInterrupt signal caught")
701
708
  dbm.close()
702
709
  raise
703
- except Exception as e:
710
+ except Exception:
704
711
  logger.exception("dbm.start exception")
705
- exception_q.put(("DBM", str(e)))
706
712
  dbm.close()
707
713
 
708
714
  logger.info("End of dbm_starter")
@@ -5,8 +5,9 @@ import multiprocessing.synchronize as ms
5
5
  import os
6
6
  import queue
7
7
  from multiprocessing import Event
8
+ from multiprocessing.context import ForkProcess as ForkProcessType
8
9
  from multiprocessing.queues import Queue
9
- from typing import TYPE_CHECKING, Literal, Optional, Tuple, Union
10
+ from typing import TYPE_CHECKING, Optional, Tuple, Union
10
11
 
11
12
  import typeguard
12
13
 
@@ -128,10 +129,7 @@ class MonitoringHub(RepresentationMixin):
128
129
  zmq_comm_q = SizedQueue(maxsize=10)
129
130
  udp_comm_q = SizedQueue(maxsize=10)
130
131
 
131
- self.exception_q: Queue[Tuple[str, str]]
132
- self.exception_q = SizedQueue(maxsize=10)
133
-
134
- self.resource_msgs: Queue[Union[TaggedMonitoringMessage, Literal["STOP"]]]
132
+ self.resource_msgs: Queue[TaggedMonitoringMessage]
135
133
  self.resource_msgs = SizedQueue()
136
134
 
137
135
  self.router_exit_event: ms.Event
@@ -139,7 +137,6 @@ class MonitoringHub(RepresentationMixin):
139
137
 
140
138
  self.zmq_router_proc = ForkProcess(target=zmq_router_starter,
141
139
  kwargs={"comm_q": zmq_comm_q,
142
- "exception_q": self.exception_q,
143
140
  "resource_msgs": self.resource_msgs,
144
141
  "exit_event": self.router_exit_event,
145
142
  "hub_address": self.hub_address,
@@ -154,7 +151,6 @@ class MonitoringHub(RepresentationMixin):
154
151
 
155
152
  self.udp_router_proc = ForkProcess(target=udp_router_starter,
156
153
  kwargs={"comm_q": udp_comm_q,
157
- "exception_q": self.exception_q,
158
154
  "resource_msgs": self.resource_msgs,
159
155
  "exit_event": self.router_exit_event,
160
156
  "hub_address": self.hub_address,
@@ -167,11 +163,15 @@ class MonitoringHub(RepresentationMixin):
167
163
  )
168
164
  self.udp_router_proc.start()
169
165
 
166
+ self.dbm_exit_event: ms.Event
167
+ self.dbm_exit_event = Event()
168
+
170
169
  self.dbm_proc = ForkProcess(target=dbm_starter,
171
- args=(self.exception_q, self.resource_msgs,),
170
+ args=(self.resource_msgs,),
172
171
  kwargs={"run_dir": dfk_run_dir,
173
172
  "logging_level": logging.DEBUG if self.monitoring_debug else logging.INFO,
174
173
  "db_url": self.logging_endpoint,
174
+ "exit_event": self.dbm_exit_event,
175
175
  },
176
176
  name="Monitoring-DBM-Process",
177
177
  daemon=True,
@@ -181,7 +181,7 @@ class MonitoringHub(RepresentationMixin):
181
181
  self.zmq_router_proc.pid, self.udp_router_proc.pid, self.dbm_proc.pid)
182
182
 
183
183
  self.filesystem_proc = ForkProcess(target=filesystem_router_starter,
184
- args=(self.resource_msgs, dfk_run_dir),
184
+ args=(self.resource_msgs, dfk_run_dir, self.router_exit_event),
185
185
  name="Monitoring-Filesystem-Process",
186
186
  daemon=True
187
187
  )
@@ -227,58 +227,62 @@ class MonitoringHub(RepresentationMixin):
227
227
 
228
228
  def close(self) -> None:
229
229
  logger.info("Terminating Monitoring Hub")
230
- exception_msgs = []
231
- while True:
232
- try:
233
- exception_msgs.append(self.exception_q.get(block=False))
234
- logger.error("There was a queued exception (Either router or DBM process got exception much earlier?)")
235
- except queue.Empty:
236
- break
237
230
  if self.monitoring_hub_active:
238
231
  self.monitoring_hub_active = False
239
- if exception_msgs:
240
- for exception_msg in exception_msgs:
241
- logger.error(
242
- "%s process delivered an exception: %s. Terminating all monitoring processes immediately.",
243
- exception_msg[0],
244
- exception_msg[1]
245
- )
246
- self.zmq_router_proc.terminate()
247
- self.udp_router_proc.terminate()
248
- self.dbm_proc.terminate()
249
- self.filesystem_proc.terminate()
250
232
  logger.info("Setting router termination event")
251
233
  self.router_exit_event.set()
252
234
 
253
235
  logger.info("Waiting for ZMQ router to terminate")
254
- self.zmq_router_proc.join()
255
- self.zmq_router_proc.close()
236
+ join_terminate_close_proc(self.zmq_router_proc)
256
237
 
257
238
  logger.info("Waiting for UDP router to terminate")
258
- self.udp_router_proc.join()
259
- self.udp_router_proc.close()
239
+ join_terminate_close_proc(self.udp_router_proc)
260
240
 
261
241
  logger.debug("Finished waiting for router termination")
262
- if len(exception_msgs) == 0:
263
- logger.debug("Sending STOP to DBM")
264
- self.resource_msgs.put("STOP")
265
- else:
266
- logger.debug("Not sending STOP to DBM, because there were DBM exceptions")
267
242
  logger.debug("Waiting for DB termination")
268
- self.dbm_proc.join()
269
- self.dbm_proc.close()
243
+ self.dbm_exit_event.set()
244
+ join_terminate_close_proc(self.dbm_proc)
270
245
  logger.debug("Finished waiting for DBM termination")
271
246
 
272
- # should this be message based? it probably doesn't need to be if
273
- # we believe we've received all messages
274
247
  logger.info("Terminating filesystem radio receiver process")
275
- self.filesystem_proc.terminate()
276
- self.filesystem_proc.join()
277
- self.filesystem_proc.close()
248
+ join_terminate_close_proc(self.filesystem_proc)
278
249
 
279
250
  logger.info("Closing monitoring multiprocessing queues")
280
- self.exception_q.close()
281
- self.exception_q.join_thread()
282
251
  self.resource_msgs.close()
283
252
  self.resource_msgs.join_thread()
284
253
  logger.info("Closed monitoring multiprocessing queues")
254
+
255
+
256
+ def join_terminate_close_proc(process: ForkProcessType, *, timeout: int = 30) -> None:
257
+ """Increasingly aggressively terminate a process.
258
+
259
+ This function assumes that the process is likely to exit before
260
+ the join timeout, driven by some other means, such as the
261
+ MonitoringHub router_exit_event. If the process does not exit, then
262
+ first terminate() and then kill() will be used to end the process.
263
+
264
+ In the case of a very mis-behaving process, this function might take
265
+ up to 3*timeout to exhaust all termination methods and return.
266
+ """
267
+ logger.debug("Joining process")
268
+ process.join(timeout)
269
+
270
+ # run a sequence of increasingly aggressive steps to shut down the process.
271
+ if process.is_alive():
272
+ logger.error("Process did not join. Terminating.")
273
+ process.terminate()
274
+ process.join(timeout)
275
+ if process.is_alive():
276
+ logger.error("Process did not join after terminate. Killing.")
277
+ process.kill()
278
+ process.join(timeout)
279
+ # This kill should not be caught by any signal handlers so it is
280
+ # unlikely that this join will timeout. If it does, there isn't
281
+ # anything further to do except log an error in the next if-block.
282
+
283
+ if process.is_alive():
284
+ logger.error("Process failed to end")
285
+ # don't call close if the process hasn't ended:
286
+ # process.close() doesn't work on a running process.
287
+ else:
288
+ process.close()
@@ -5,6 +5,7 @@ import os
5
5
  import pickle
6
6
  import time
7
7
  from multiprocessing.queues import Queue
8
+ from multiprocessing.synchronize import Event
8
9
  from typing import cast
9
10
 
10
11
  from parsl.log_utils import set_file_logger
@@ -15,7 +16,7 @@ from parsl.utils import setproctitle
15
16
 
16
17
 
17
18
  @wrap_with_logs
18
- def filesystem_router_starter(q: Queue[TaggedMonitoringMessage], run_dir: str) -> None:
19
+ def filesystem_router_starter(q: Queue[TaggedMonitoringMessage], run_dir: str, exit_event: Event) -> None:
19
20
  logger = set_file_logger(f"{run_dir}/monitoring_filesystem_radio.log",
20
21
  name="monitoring_filesystem_radio",
21
22
  level=logging.INFO)
@@ -32,7 +33,7 @@ def filesystem_router_starter(q: Queue[TaggedMonitoringMessage], run_dir: str) -
32
33
  os.makedirs(tmp_dir, exist_ok=True)
33
34
  os.makedirs(new_dir, exist_ok=True)
34
35
 
35
- while True: # this loop will end on process termination
36
+ while not exit_event.is_set():
36
37
  logger.debug("Start filesystem radio receiver loop")
37
38
 
38
39
  # iterate over files in new_dir
@@ -50,3 +51,4 @@ def filesystem_router_starter(q: Queue[TaggedMonitoringMessage], run_dir: str) -
50
51
  logger.exception("Exception processing %s - probably will be retried next iteration", filename)
51
52
 
52
53
  time.sleep(1) # whats a good time for this poll?
54
+ logger.info("Ending filesystem radio receiver")
@@ -118,7 +118,6 @@ class MonitoringRouter:
118
118
  @typeguard.typechecked
119
119
  def udp_router_starter(*,
120
120
  comm_q: mpq.Queue,
121
- exception_q: mpq.Queue,
122
121
  resource_msgs: mpq.Queue,
123
122
  exit_event: Event,
124
123
 
@@ -144,6 +143,5 @@ def udp_router_starter(*,
144
143
  router.logger.info("Starting MonitoringRouter in router_starter")
145
144
  try:
146
145
  router.start()
147
- except Exception as e:
146
+ except Exception:
148
147
  router.logger.exception("UDP router start exception")
149
- exception_q.put(('Hub', str(e)))
@@ -107,7 +107,6 @@ class MonitoringRouter:
107
107
  @typeguard.typechecked
108
108
  def zmq_router_starter(*,
109
109
  comm_q: mpq.Queue,
110
- exception_q: mpq.Queue,
111
110
  resource_msgs: mpq.Queue,
112
111
  exit_event: Event,
113
112
 
@@ -129,10 +128,4 @@ def zmq_router_starter(*,
129
128
  comm_q.put(f"Monitoring router construction failed: {e}")
130
129
  else:
131
130
  comm_q.put(router.zmq_receiver_port)
132
-
133
- router.logger.info("Starting MonitoringRouter in router_starter")
134
- try:
135
- router.start()
136
- except Exception as e:
137
- router.logger.exception("ZMQ router start exception")
138
- exception_q.put(('Hub', str(e)))
131
+ router.start()
@@ -0,0 +1,55 @@
1
+ import multiprocessing
2
+ import signal
3
+
4
+ import psutil
5
+ import pytest
6
+
7
+ from parsl.monitoring.monitoring import join_terminate_close_proc
8
+ from parsl.multiprocessing import ForkProcess
9
+
10
+
11
+ def noop():
12
+ pass
13
+
14
+
15
+ @pytest.mark.local
16
+ def test_end_process_already_exited():
17
+ p = ForkProcess(target=noop)
18
+ p.start()
19
+ p.join()
20
+ join_terminate_close_proc(p)
21
+
22
+
23
+ def hang():
24
+ while True:
25
+ pass
26
+
27
+
28
+ @pytest.mark.local
29
+ def test_end_hung_process():
30
+ """Test calling against a process that will not exit itself."""
31
+ p = ForkProcess(target=hang)
32
+ p.start()
33
+ pid = p.pid
34
+ join_terminate_close_proc(p, timeout=1)
35
+ assert not psutil.pid_exists(pid), "process should not exist any more"
36
+
37
+
38
+ def hang_no_sigint(e):
39
+ def s(*args, **kwargs):
40
+ e.set()
41
+ signal.signal(signal.SIGTERM, s)
42
+ while True:
43
+ pass
44
+
45
+
46
+ @pytest.mark.local
47
+ def test_end_hung_process_no_sigint():
48
+ """Test calling against a process that will not exit itself."""
49
+ e = multiprocessing.Event()
50
+ p = ForkProcess(target=hang_no_sigint, args=(e,))
51
+ p.start()
52
+ pid = p.pid
53
+ join_terminate_close_proc(p, timeout=1)
54
+ assert not psutil.pid_exists(pid), "process should not exist any more"
55
+ assert e.is_set(), "hung process should have set event on signal"
parsl/version.py CHANGED
@@ -3,4 +3,4 @@
3
3
  Year.Month.Day[alpha/beta/..]
4
4
  Alphas will be numbered like this -> 2024.12.10a0
5
5
  """
6
- VERSION = '2025.03.10'
6
+ VERSION = '2025.03.17'
@@ -15,6 +15,7 @@ import threading
15
15
  import time
16
16
  import uuid
17
17
  from importlib.metadata import distributions
18
+ from multiprocessing.context import SpawnProcess
18
19
  from multiprocessing.managers import DictProxy
19
20
  from multiprocessing.sharedctypes import Synchronized
20
21
  from typing import Dict, List, Optional, Sequence
@@ -403,52 +404,34 @@ class Manager:
403
404
  result_outgoing.connect(self._result_q_url)
404
405
  logger.info("Manager result pipe connected to interchange")
405
406
 
406
- push_poll_period = max(10, self.poll_period) / 1000 # push_poll_period must be atleast 10 ms
407
- logger.debug("push poll period: {}".format(push_poll_period))
408
-
409
- last_beat = time.time()
410
- last_result_beat = time.time()
411
- items = []
412
-
413
407
  while not self._stop_event.is_set():
408
+ logger.debug("Starting pending_result_queue get")
414
409
  try:
415
- logger.debug("Starting pending_result_queue get")
416
- r = self.task_scheduler.get_result(block=True, timeout=push_poll_period)
417
- logger.debug("Got a result item")
418
- items.append(r)
419
- except queue.Empty:
420
- logger.debug("pending_result_queue get timeout without result item")
421
- except Exception as e:
422
- logger.exception("Got an exception: {}".format(e))
423
-
424
- if time.time() > last_result_beat + self.heartbeat_period:
425
- heartbeat_message = f"last_result_beat={last_result_beat} heartbeat_period={self.heartbeat_period} seconds"
426
- logger.info(f"Sending heartbeat via results connection: {heartbeat_message}")
427
- last_result_beat = time.time()
428
- items.append(pickle.dumps({'type': 'heartbeat'}))
429
-
430
- if len(items) >= self.max_queue_size or time.time() > last_beat + push_poll_period:
431
- last_beat = time.time()
432
- if items:
433
- logger.debug(f"Result send: Pushing {len(items)} items")
434
- result_outgoing.send_multipart(items)
435
- logger.debug("Result send: Pushed")
436
- items = []
437
- else:
438
- logger.debug("Result send: No items to push")
439
- else:
440
- logger.debug(f"Result send: check condition not met - deferring {len(items)} result items")
410
+ r = self.task_scheduler.get_result()
411
+ if r is None:
412
+ continue
413
+ logger.debug("Result received from worker: %s", id(r))
414
+ result_outgoing.send(r)
415
+ logger.debug("Result sent to interchange: %s", id(r))
416
+ except Exception:
417
+ logger.exception("Failed to send result to interchange")
441
418
 
442
419
  result_outgoing.close()
443
- logger.info("Exiting")
420
+ logger.debug("Exiting")
444
421
 
445
422
  @wrap_with_logs
446
- def worker_watchdog(self):
423
+ def heartbeater(self):
424
+ while not self._stop_event.wait(self.heartbeat_period):
425
+ heartbeat_message = f"heartbeat_period={self.heartbeat_period} seconds"
426
+ logger.info(f"Sending heartbeat via results connection: {heartbeat_message}")
427
+ self.pending_result_queue.put(pickle.dumps({'type': 'heartbeat'}))
428
+
429
+ def worker_watchdog(self, procs: dict[int, SpawnProcess]):
447
430
  """Keeps workers alive."""
448
431
  logger.debug("Starting worker watchdog")
449
432
 
450
433
  while not self._stop_event.wait(self.heartbeat_period):
451
- for worker_id, p in self.procs.items():
434
+ for worker_id, p in procs.items():
452
435
  if not p.is_alive():
453
436
  logger.error("Worker {} has died".format(worker_id))
454
437
  try:
@@ -466,11 +449,10 @@ class Manager:
466
449
  except KeyError:
467
450
  logger.info("Worker {} was not busy when it died".format(worker_id))
468
451
 
469
- p = self._start_worker(worker_id)
470
- self.procs[worker_id] = p
452
+ procs[worker_id] = self._start_worker(worker_id)
471
453
  logger.info("Worker {} has been restarted".format(worker_id))
472
454
 
473
- logger.critical("Exiting")
455
+ logger.debug("Exiting")
474
456
 
475
457
  @wrap_with_logs
476
458
  def handle_monitoring_messages(self):
@@ -485,32 +467,28 @@ class Manager:
485
467
  """
486
468
  logger.debug("Starting monitoring handler thread")
487
469
 
488
- poll_period_s = max(10, self.poll_period) / 1000 # Must be at least 10 ms
489
-
490
470
  while not self._stop_event.is_set():
491
471
  try:
492
472
  logger.debug("Starting monitor_queue.get()")
493
- msg = self.monitoring_queue.get(block=True, timeout=poll_period_s)
494
- except queue.Empty:
495
- logger.debug("monitoring_queue.get() has timed out")
496
- except Exception as e:
497
- logger.exception(f"Got an exception: {e}")
498
- else:
473
+ msg = self.monitoring_queue.get(block=True)
474
+ if msg is None:
475
+ continue
499
476
  logger.debug("Got a monitoring message")
500
477
  self.pending_result_queue.put(msg)
501
478
  logger.debug("Put monitoring message on pending_result_queue")
479
+ except Exception:
480
+ logger.exception("Failed to forward monitoring message")
502
481
 
503
- logger.critical("Exiting")
482
+ logger.debug("Exiting")
504
483
 
505
484
  def start(self):
506
485
  """ Start the worker processes.
507
486
 
508
487
  TODO: Move task receiving to a thread
509
488
  """
510
- self.procs = {}
489
+ procs: dict[int, SpawnProcess] = {}
511
490
  for worker_id in range(self.worker_count):
512
- p = self._start_worker(worker_id)
513
- self.procs[worker_id] = p
491
+ procs[worker_id] = self._start_worker(worker_id)
514
492
 
515
493
  logger.debug("Workers started")
516
494
 
@@ -519,40 +497,69 @@ class Manager:
519
497
  target=self.push_results, name="Result-Pusher"
520
498
  )
521
499
  thr_worker_watchdog = threading.Thread(
522
- target=self.worker_watchdog, name="worker-watchdog"
500
+ target=self.worker_watchdog, args=(procs,), name="worker-watchdog"
523
501
  )
524
502
  thr_monitoring_handler = threading.Thread(
525
503
  target=self.handle_monitoring_messages, name="Monitoring-Handler"
526
504
  )
505
+ thr_heartbeater = threading.Thread(target=self.heartbeater, name="Heartbeater")
527
506
 
528
507
  thr_task_puller.start()
529
508
  thr_result_pusher.start()
530
509
  thr_worker_watchdog.start()
531
510
  thr_monitoring_handler.start()
511
+ thr_heartbeater.start()
532
512
 
533
513
  logger.info("Manager threads started")
534
514
 
535
515
  # This might need a multiprocessing event to signal back.
536
516
  self._stop_event.wait()
537
- logger.critical("Received kill event, terminating worker processes")
517
+ logger.info("Stop event set; terminating worker processes")
518
+
519
+ # Invite blocking threads to quit
520
+ self.monitoring_queue.put(None)
521
+ self.pending_result_queue.put(None)
538
522
 
523
+ thr_heartbeater.join()
539
524
  thr_task_puller.join()
540
525
  thr_result_pusher.join()
541
526
  thr_worker_watchdog.join()
542
527
  thr_monitoring_handler.join()
543
- for proc_id in self.procs:
544
- self.procs[proc_id].terminate()
545
- logger.critical("Terminating worker {}: is_alive()={}".format(self.procs[proc_id],
546
- self.procs[proc_id].is_alive()))
547
- self.procs[proc_id].join()
548
- logger.debug("Worker {} joined successfully".format(self.procs[proc_id]))
528
+
529
+ for worker_id in procs:
530
+ p = procs[worker_id]
531
+ proc_info = f"(PID: {p.pid}, Worker ID: {worker_id})"
532
+ logger.debug(f"Signaling worker {p.name} (TERM). {proc_info}")
533
+ p.terminate()
549
534
 
550
535
  self.zmq_context.term()
536
+
537
+ # give processes 1 second to gracefully shut themselves down, based on the
538
+ # SIGTERM (.terminate()) just sent; after then, we pull the plug.
539
+ force_child_shutdown_at = time.monotonic() + 1
540
+ while procs:
541
+ worker_id, p = procs.popitem()
542
+ timeout = max(force_child_shutdown_at - time.monotonic(), 0.000001)
543
+ p.join(timeout=timeout)
544
+ proc_info = f"(PID: {p.pid}, Worker ID: {worker_id})"
545
+ if p.exitcode is not None:
546
+ logger.debug(
547
+ "Worker joined successfully. %s (exitcode: %s)", proc_info, p.exitcode
548
+ )
549
+
550
+ else:
551
+ logger.warning(
552
+ f"Worker {p.name} ({worker_id}) failed to terminate in a timely"
553
+ f" manner; sending KILL signal to process. {proc_info}"
554
+ )
555
+ p.kill()
556
+ p.join()
557
+ p.close()
558
+
551
559
  delta = time.time() - self._start_time
552
560
  logger.info("process_worker_pool ran for {} seconds".format(delta))
553
- return
554
561
 
555
- def _start_worker(self, worker_id: int):
562
+ def _start_worker(self, worker_id: int) -> SpawnProcess:
556
563
  p = SpawnContext.Process(
557
564
  target=worker,
558
565
  args=(
@@ -939,27 +946,27 @@ if __name__ == "__main__":
939
946
  )
940
947
  logger.info(
941
948
  f"\n Python version: {sys.version}"
942
- f" Debug logging: {args.debug}"
943
- f" Certificates dir: {args.cert_dir}"
944
- f" Log dir: {args.logdir}"
945
- f" Manager ID: {args.uid}"
946
- f" Block ID: {args.block_id}"
947
- f" cores_per_worker: {args.cores_per_worker}"
948
- f" mem_per_worker: {args.mem_per_worker}"
949
- f" task_port: {args.task_port}"
950
- f" result_port: {args.result_port}"
951
- f" addresses: {args.addresses}"
952
- f" max_workers_per_node: {args.max_workers_per_node}"
953
- f" poll_period: {args.poll}"
954
- f" address_probe_timeout: {args.address_probe_timeout}"
955
- f" Prefetch capacity: {args.prefetch_capacity}"
956
- f" Heartbeat threshold: {args.hb_threshold}"
957
- f" Heartbeat period: {args.hb_period}"
958
- f" Drain period: {args.drain_period}"
959
- f" CPU affinity: {args.cpu_affinity}"
960
- f" Accelerators: {' '.join(args.available_accelerators)}"
961
- f" enable_mpi_mode: {args.enable_mpi_mode}"
962
- f" mpi_launcher: {args.mpi_launcher}"
949
+ f"\n Debug logging: {args.debug}"
950
+ f"\n Certificates dir: {args.cert_dir}"
951
+ f"\n Log dir: {args.logdir}"
952
+ f"\n Manager ID: {args.uid}"
953
+ f"\n Block ID: {args.block_id}"
954
+ f"\n cores_per_worker: {args.cores_per_worker}"
955
+ f"\n mem_per_worker: {args.mem_per_worker}"
956
+ f"\n task_port: {args.task_port}"
957
+ f"\n result_port: {args.result_port}"
958
+ f"\n addresses: {args.addresses}"
959
+ f"\n max_workers_per_node: {args.max_workers_per_node}"
960
+ f"\n poll_period: {args.poll}"
961
+ f"\n address_probe_timeout: {args.address_probe_timeout}"
962
+ f"\n Prefetch capacity: {args.prefetch_capacity}"
963
+ f"\n Heartbeat threshold: {args.hb_threshold}"
964
+ f"\n Heartbeat period: {args.hb_period}"
965
+ f"\n Drain period: {args.drain_period}"
966
+ f"\n CPU affinity: {args.cpu_affinity}"
967
+ f"\n Accelerators: {' '.join(args.available_accelerators)}"
968
+ f"\n enable_mpi_mode: {args.enable_mpi_mode}"
969
+ f"\n mpi_launcher: {args.mpi_launcher}"
963
970
  )
964
971
  try:
965
972
  manager = Manager(task_port=args.task_port,
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: parsl
3
- Version: 2025.3.10
3
+ Version: 2025.3.17
4
4
  Summary: Simple data dependent workflows in Python
5
5
  Home-page: https://github.com/Parsl/parsl
6
- Download-URL: https://github.com/Parsl/parsl/archive/2025.03.10.tar.gz
6
+ Download-URL: https://github.com/Parsl/parsl/archive/2025.03.17.tar.gz
7
7
  Author: The Parsl Team
8
8
  Author-email: parsl@googlegroups.com
9
9
  License: Apache 2.0
@@ -40,7 +40,7 @@ Requires-Dist: boto3; extra == "all"
40
40
  Requires-Dist: kubernetes; extra == "all"
41
41
  Requires-Dist: ipython<=8.6.0; extra == "all"
42
42
  Requires-Dist: nbsphinx; extra == "all"
43
- Requires-Dist: sphinx<7.2,>=7.1; extra == "all"
43
+ Requires-Dist: sphinx<8,>=7.4; extra == "all"
44
44
  Requires-Dist: sphinx-rtd-theme; extra == "all"
45
45
  Requires-Dist: google-auth; extra == "all"
46
46
  Requires-Dist: google-api-python-client; extra == "all"
@@ -63,7 +63,7 @@ Requires-Dist: msrestazure; extra == "azure"
63
63
  Provides-Extra: docs
64
64
  Requires-Dist: ipython<=8.6.0; extra == "docs"
65
65
  Requires-Dist: nbsphinx; extra == "docs"
66
- Requires-Dist: sphinx<7.2,>=7.1; extra == "docs"
66
+ Requires-Dist: sphinx<8,>=7.4; extra == "docs"
67
67
  Requires-Dist: sphinx-rtd-theme; extra == "docs"
68
68
  Provides-Extra: flux
69
69
  Requires-Dist: pyyaml; extra == "flux"
@@ -8,7 +8,7 @@ parsl/multiprocessing.py,sha256=MyaEcEq-Qf860u7V98u-PZrPNdtzOZL_NW6EhIJnmfQ,1937
8
8
  parsl/process_loggers.py,sha256=uQ7Gd0W72Jz7rrcYlOMfLsAEhkRltxXJL2MgdduJjEw,1136
9
9
  parsl/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  parsl/utils.py,sha256=codTX6_KLhgeTwNkRzc1lo4bgc1M93eJ-lkqOO98fvk,14331
11
- parsl/version.py,sha256=3cSnT_xfCul6H60imXWe7VlUXG29OzzoAFknr7Fc3TQ,131
11
+ parsl/version.py,sha256=_yYxGBkoJMDKADe5yJ2dAkIINmdZgyRTRJnodIasABw,131
12
12
  parsl/app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  parsl/app/app.py,sha256=0gbM4AH2OtFOLsv07I5nglpElcwMSOi-FzdZZfrk7So,8532
14
14
  parsl/app/bash.py,sha256=jm2AvePlCT9DZR7H_4ANDWxatp5dN_22FUlT_gWhZ-g,5528
@@ -80,9 +80,9 @@ parsl/executors/high_throughput/manager_selector.py,sha256=UKcUE6v0tO7PDMTThpKSK
80
80
  parsl/executors/high_throughput/monitoring_info.py,sha256=HC0drp6nlXQpAop5PTUKNjdXMgtZVvrBL0JzZJebPP4,298
81
81
  parsl/executors/high_throughput/mpi_executor.py,sha256=U-aatbLF_Mu1p6lP0HmT7Yn1Swn3cc7hPmDfuUb9TpI,4797
82
82
  parsl/executors/high_throughput/mpi_prefix_composer.py,sha256=DmpKugANNa1bdYlqQBLHkrFc15fJpefPPhW9hkAlh1s,4308
83
- parsl/executors/high_throughput/mpi_resource_management.py,sha256=xeJp4h4LysG8KuBLKqy1sgFahL1eqiG7XLpr09VLwy4,8144
83
+ parsl/executors/high_throughput/mpi_resource_management.py,sha256=73bTW2ZbHRfcrPN318cyjiqDN50AM1cOCQqUGJDIlBg,8199
84
84
  parsl/executors/high_throughput/probe.py,sha256=QOEaliO3x5cB6ltMOZMsZQ-ath9AAuFqXcBzRgWOM60,2754
85
- parsl/executors/high_throughput/process_worker_pool.py,sha256=YOJvTUMg3eIHr9fYfBWFHRiI1QQ898IGiuXyj5VRQNo,41084
85
+ parsl/executors/high_throughput/process_worker_pool.py,sha256=Q7FN0MdXIAOouxDarim6etYVHEgbXFiaMhBahC2ZtIQ,41137
86
86
  parsl/executors/high_throughput/zmq_pipes.py,sha256=NUK25IEh0UkxzdqQQyM8tMtuZmjSiTeWu1DzkkAIOhA,8980
87
87
  parsl/executors/radical/__init__.py,sha256=CKbtV2numw5QvgIBq1htMUrt9TqDCIC2zifyf2svTNU,186
88
88
  parsl/executors/radical/executor.py,sha256=en2TKzZnJYU_juojkM_aZUdWhbAgutAYn_EL6HGpfSY,22835
@@ -114,10 +114,10 @@ parsl/launchers/base.py,sha256=CblcvPTJiu-MNLWaRtFe29SZQ0BpTOlaY8CGcHdlHIE,538
114
114
  parsl/launchers/errors.py,sha256=8YMV_CHpBNVa4eXkGE4x5DaFQlZkDCRCHmBktYcY6TA,467
115
115
  parsl/launchers/launchers.py,sha256=cQsNsHuCOL_nQTjPXf0--YsgsDoMoJ77bO1Wt4ncLjs,15134
116
116
  parsl/monitoring/__init__.py,sha256=0ywNz6i0lM1xo_7_BIxhETDGeVd2C_0wwD7qgeaMR4c,83
117
- parsl/monitoring/db_manager.py,sha256=ra5PqmbUstfDx0o_bkBYI8GIUi461-GV3b4A-Q6DVVE,33300
117
+ parsl/monitoring/db_manager.py,sha256=L0c5S9ockq0UIchT2bjmkSAWXS-t0G-Q_neOIBfLbm0,33444
118
118
  parsl/monitoring/errors.py,sha256=D6jpYzEzp0d6FmVKGqhvjAxr4ztZfJX2s-aXemH9bBU,148
119
119
  parsl/monitoring/message_type.py,sha256=Khn88afNxcOIciKiCK4GLnn90I5BlRTiOL3zK-P07yQ,401
120
- parsl/monitoring/monitoring.py,sha256=p79F982lyPsplXeTVxqlvNuB8G1p3PAI8nTMHcZJ5UE,13113
120
+ parsl/monitoring/monitoring.py,sha256=PspFFtf3Iaj5tl23ITRRdHrBDAocSOSvP2IVP_pmW-Y,13134
121
121
  parsl/monitoring/remote.py,sha256=t0qCTUMCzeJ_JOARFpjqlTNrAWdEb20BxhmZh9X7kEM,13728
122
122
  parsl/monitoring/types.py,sha256=oOCrzv-ab-_rv4pb8o58Sdb8G_RGp1aZriRbdf9zBEk,339
123
123
  parsl/monitoring/queries/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -125,13 +125,13 @@ parsl/monitoring/queries/pandas.py,sha256=0Z2r0rjTKCemf0eaDkF1irvVHn5g7KC5SYETvQ
125
125
  parsl/monitoring/radios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
126
126
  parsl/monitoring/radios/base.py,sha256=Ep5kHf07Sm-ApMBJVudRhoWRyuiu0udjO4NvEir5LEk,291
127
127
  parsl/monitoring/radios/filesystem.py,sha256=ioZ3jOKX5Qf0DYRtWmpCEorfuMVbS58OMS_QV7DOFOs,1765
128
- parsl/monitoring/radios/filesystem_router.py,sha256=ZxPImntYHw-3arvCTMYOC67kUgkyk7I7XLDrCXnvkBw,2055
128
+ parsl/monitoring/radios/filesystem_router.py,sha256=kQkinktSpsVwfNESfUggSzBlRZ5JgwjM7IDN-jARAhM,2146
129
129
  parsl/monitoring/radios/htex.py,sha256=qBu4O5NYnSETHX0ptdwxSpqa2Pp3Z_V6a6lb3TbjKm4,1643
130
130
  parsl/monitoring/radios/multiprocessing.py,sha256=fsfaaoMDp6VJv1DSAl-P0R2ofO6jp13byx6NsPItV3Y,655
131
131
  parsl/monitoring/radios/udp.py,sha256=bTpt7JYp-5hyBBLzgiLj1_BlSTn28UVp39OYgVGLXCw,1613
132
- parsl/monitoring/radios/udp_router.py,sha256=Dtat4lVNz4cpnzZmXTjo5VA1Xcri5VTSNNpyepSjIVE,5868
132
+ parsl/monitoring/radios/udp_router.py,sha256=LEiHZVhw3lVFhqUK1FAFFtpvNOWbB6RNRBK8FaMvtDw,5771
133
133
  parsl/monitoring/radios/zmq.py,sha256=fhoHp9ylhf-D3eTJb2aSHRsuic8-FJ_oRNGnniGkCAI,592
134
- parsl/monitoring/radios/zmq_router.py,sha256=oKfMg_dc3UxJcSzDe1ZqkGJYQcOa4somvyGPzwOqQuA,5860
134
+ parsl/monitoring/radios/zmq_router.py,sha256=pYhol8-SV8FThv7YIjqc5tv149E4ktDLb-l7-ot4nfg,5579
135
135
  parsl/monitoring/visualization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
136
136
  parsl/monitoring/visualization/app.py,sha256=xMeRlAnzl5lHddAOdSBcqY3D5lmOYw3Z3Z2_YyoVwnw,1425
137
137
  parsl/monitoring/visualization/models.py,sha256=C7CcF6w6PhtrdvDX9VgDH-aSrpLfvYU1fJ4-HDUeFVQ,5138
@@ -339,6 +339,7 @@ parsl/tests/test_monitoring/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
339
339
  parsl/tests/test_monitoring/test_app_names.py,sha256=A-mOMCVhZDnUyJp32fsTUkHdcyval8o7WPEWacDkbD4,2208
340
340
  parsl/tests/test_monitoring/test_basic.py,sha256=VdF6JHfqsEOIMg-ysIAREgygZIjHWNDVLNVQ7jhWxmQ,4592
341
341
  parsl/tests/test_monitoring/test_db_locks.py,sha256=3s3c1xhKo230ZZIJ3f1Ca4U7LcEdXnanOGVXQyNlk2U,2895
342
+ parsl/tests/test_monitoring/test_exit_helper.py,sha256=FsMcQ1GF70vPXEfexDyo674_c5cglJBrLXKBzAYIfOk,1266
342
343
  parsl/tests/test_monitoring/test_fuzz_zmq.py,sha256=--3-pQUvXXbkr8v_BEJoPvVvNly1oXvrD2nJh6yl_0M,3436
343
344
  parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py,sha256=_QV8zjBKVF_qBbBnhT0C3X9AmfS7IKLcOnEw_cU6HeM,2622
344
345
  parsl/tests/test_monitoring/test_incomplete_futures.py,sha256=ZnO1sFSwlWUBHX64C_zwfTVRVC_UFNlU4h0POgx6NEo,2005
@@ -457,13 +458,13 @@ parsl/usage_tracking/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
457
458
  parsl/usage_tracking/api.py,sha256=iaCY58Dc5J4UM7_dJzEEs871P1p1HdxBMtNGyVdzc9g,1821
458
459
  parsl/usage_tracking/levels.py,sha256=xbfzYEsd55KiZJ-mzNgPebvOH4rRHum04hROzEf41tU,291
459
460
  parsl/usage_tracking/usage.py,sha256=f9k6QcpbQxkGyP5WTC9PVyv0CA05s9NDpRe5wwRdBTM,9163
460
- parsl-2025.3.10.data/scripts/exec_parsl_function.py,sha256=YXKVVIa4zXmOtz-0Ca4E_5nQfN_3S2bh2tB75uZZB4w,7774
461
- parsl-2025.3.10.data/scripts/interchange.py,sha256=17MrOc7-FXxKBWTwkzIbUoa8fvvDfPelfjByd3ZD2Wk,29446
462
- parsl-2025.3.10.data/scripts/parsl_coprocess.py,sha256=zrVjEqQvFOHxsLufPi00xzMONagjVwLZbavPM7bbjK4,5722
463
- parsl-2025.3.10.data/scripts/process_worker_pool.py,sha256=BbVJ1PS7ZW2grz0iAPPV0BgJyRMyQ7bbXSzLzWCBkyU,41070
464
- parsl-2025.3.10.dist-info/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
465
- parsl-2025.3.10.dist-info/METADATA,sha256=rEtmq9LYtfBbXFR2JuX9DmmPQyDshdmj0GuakTMQeSM,4027
466
- parsl-2025.3.10.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
467
- parsl-2025.3.10.dist-info/entry_points.txt,sha256=XqnsWDYoEcLbsMcpnYGKLEnSBmaIe1YoM5YsBdJG2tI,176
468
- parsl-2025.3.10.dist-info/top_level.txt,sha256=PIheYoUFQtF2icLsgOykgU-Cjuwr2Oi6On2jo5RYgRM,6
469
- parsl-2025.3.10.dist-info/RECORD,,
461
+ parsl-2025.3.17.data/scripts/exec_parsl_function.py,sha256=YXKVVIa4zXmOtz-0Ca4E_5nQfN_3S2bh2tB75uZZB4w,7774
462
+ parsl-2025.3.17.data/scripts/interchange.py,sha256=17MrOc7-FXxKBWTwkzIbUoa8fvvDfPelfjByd3ZD2Wk,29446
463
+ parsl-2025.3.17.data/scripts/parsl_coprocess.py,sha256=zrVjEqQvFOHxsLufPi00xzMONagjVwLZbavPM7bbjK4,5722
464
+ parsl-2025.3.17.data/scripts/process_worker_pool.py,sha256=__gFeFQJpV5moRofj3WKQCnKp6gmzieXjzkmzVuTmX4,41123
465
+ parsl-2025.3.17.dist-info/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
466
+ parsl-2025.3.17.dist-info/METADATA,sha256=d_WFIKY6wmq4VQQcz-BCh0yhu9i3i627EjutSTqSNH4,4023
467
+ parsl-2025.3.17.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
468
+ parsl-2025.3.17.dist-info/entry_points.txt,sha256=XqnsWDYoEcLbsMcpnYGKLEnSBmaIe1YoM5YsBdJG2tI,176
469
+ parsl-2025.3.17.dist-info/top_level.txt,sha256=PIheYoUFQtF2icLsgOykgU-Cjuwr2Oi6On2jo5RYgRM,6
470
+ parsl-2025.3.17.dist-info/RECORD,,