PyPI - parsl - Versions diffs - 2025.3.10__py3-none-any.whl → 2025.3.24__py3-none-any.whl - Mend

parsl 2025.3.10py3-none-any.whl → 2025.3.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

parsl/dataflow/dflow.py +1 -3
parsl/executors/base.py +13 -37
parsl/executors/flux/executor.py +1 -0
parsl/executors/globus_compute.py +1 -1
parsl/executors/high_throughput/executor.py +18 -0
parsl/executors/high_throughput/mpi_resource_management.py +2 -0
parsl/executors/high_throughput/process_worker_pool.py +89 -82
parsl/executors/radical/executor.py +1 -0
parsl/executors/status_handling.py +8 -0
parsl/executors/taskvine/executor.py +1 -0
parsl/executors/workqueue/executor.py +1 -0
parsl/monitoring/db_manager.py +16 -10
parsl/monitoring/errors.py +5 -0
parsl/monitoring/monitoring.py +61 -117
parsl/monitoring/radios/filesystem_router.py +4 -2
parsl/monitoring/radios/udp_router.py +1 -3
parsl/monitoring/radios/zmq_router.py +80 -25
parsl/multiprocessing.py +42 -2
parsl/tests/test_monitoring/test_exit_helper.py +54 -0
parsl/tests/test_monitoring/test_fuzz_zmq.py +1 -1
parsl/tests/test_monitoring/test_radio_zmq.py +27 -0
parsl/tests/test_monitoring/test_stdouterr.py +3 -0
parsl/tests/test_shutdown/test_kill_monitoring.py +1 -1
parsl/usage_tracking/usage.py +2 -2
parsl/version.py +1 -1
{parsl-2025.3.10.data → parsl-2025.3.24.data}/scripts/process_worker_pool.py +89 -82
{parsl-2025.3.10.dist-info → parsl-2025.3.24.dist-info}/METADATA +4 -4
{parsl-2025.3.10.dist-info → parsl-2025.3.24.dist-info}/RECORD +35 -33
{parsl-2025.3.10.data → parsl-2025.3.24.data}/scripts/exec_parsl_function.py +0 -0
{parsl-2025.3.10.data → parsl-2025.3.24.data}/scripts/interchange.py +0 -0
{parsl-2025.3.10.data → parsl-2025.3.24.data}/scripts/parsl_coprocess.py +0 -0
{parsl-2025.3.10.dist-info → parsl-2025.3.24.dist-info}/LICENSE +0 -0
{parsl-2025.3.10.dist-info → parsl-2025.3.24.dist-info}/WHEEL +0 -0
{parsl-2025.3.10.dist-info → parsl-2025.3.24.dist-info}/entry_points.txt +0 -0
{parsl-2025.3.10.dist-info → parsl-2025.3.24.dist-info}/top_level.txt +0 -0

parsl/dataflow/dflow.py CHANGED Viewed

@@ -1128,9 +1128,7 @@ class DataFlowKernel:
             executor.run_id = self.run_id
             executor.run_dir = self.run_dir
             if self.monitoring:
-                executor.hub_address = self.monitoring.hub_address
-                executor.hub_zmq_port = self.monitoring.hub_zmq_port
-                executor.submit_monitoring_radio = self.monitoring.radio
+                executor.monitoring_messages = self.monitoring.resource_msgs
             if hasattr(executor, 'provider'):
                 if hasattr(executor.provider, 'script_dir'):
                     executor.provider.script_dir = os.path.join(self.run_dir, 'submit_scripts')

parsl/executors/base.py CHANGED Viewed

@@ -1,11 +1,14 @@
+from __future__ import annotations
 import os
 from abc import ABCMeta, abstractmethod
 from concurrent.futures import Future
+from multiprocessing.queues import Queue
 from typing import Any, Callable, Dict, Optional
 from typing_extensions import Literal, Self
-from parsl.monitoring.radios.base import MonitoringRadioSender
+from parsl.monitoring.types import TaggedMonitoringMessage
 class ParslExecutor(metaclass=ABCMeta):
@@ -42,6 +45,13 @@ class ParslExecutor(metaclass=ABCMeta):
               invariant, not co-variant, and it looks like @typeguard cannot be
               persuaded otherwise. So if you're implementing an executor and want to
               @typeguard the constructor, you'll have to use List[Any] here.
+    The DataFlowKernel will set this attribute before calling .start(),
+    if monitoring is enabled:
+        monitoring_messages: Optional[Queue[TaggedMonitoringMessage]] - an executor
+            can send messages to the monitoring hub by putting them into
+            this queue.
     """
     label: str = "undefined"
@@ -50,15 +60,11 @@ class ParslExecutor(metaclass=ABCMeta):
     def __init__(
         self,
         *,
-        hub_address: Optional[str] = None,
-        hub_zmq_port: Optional[int] = None,
-        submit_monitoring_radio: Optional[MonitoringRadioSender] = None,
+        monitoring_messages: Optional[Queue[TaggedMonitoringMessage]] = None,
         run_dir: str = ".",
         run_id: Optional[str] = None,
     ):
-        self.hub_address = hub_address
-        self.hub_zmq_port = hub_zmq_port
-        self.submit_monitoring_radio = submit_monitoring_radio
+        self.monitoring_messages = monitoring_messages
         self.run_dir = os.path.abspath(run_dir)
         self.run_id = run_id
@@ -125,33 +131,3 @@ class ParslExecutor(metaclass=ABCMeta):
     @run_id.setter
     def run_id(self, value: Optional[str]) -> None:
         self._run_id = value
-    @property
-    def hub_address(self) -> Optional[str]:
-        """Address to the Hub for monitoring.
-        """
-        return self._hub_address
-    @hub_address.setter
-    def hub_address(self, value: Optional[str]) -> None:
-        self._hub_address = value
-    @property
-    def hub_zmq_port(self) -> Optional[int]:
-        """Port to the Hub for monitoring.
-        """
-        return self._hub_zmq_port
-    @hub_zmq_port.setter
-    def hub_zmq_port(self, value: Optional[int]) -> None:
-        self._hub_zmq_port = value
-    @property
-    def submit_monitoring_radio(self) -> Optional[MonitoringRadioSender]:
-        """Local radio for sending monitoring messages
-        """
-        return self._submit_monitoring_radio
-    @submit_monitoring_radio.setter
-    def submit_monitoring_radio(self, value: Optional[MonitoringRadioSender]) -> None:
-        self._submit_monitoring_radio = value

parsl/executors/flux/executor.py CHANGED Viewed

@@ -231,6 +231,7 @@ class FluxExecutor(ParslExecutor, RepresentationMixin):
     def start(self):
         """Called when DFK starts the executor when the config is loaded."""
+        super().start()
         os.makedirs(self.working_dir, exist_ok=True)
         self._submission_thread.start()

parsl/executors/globus_compute.py CHANGED Viewed

@@ -67,7 +67,7 @@ class GlobusComputeExecutor(ParslExecutor, RepresentationMixin):
     def start(self) -> None:
         """ Start the Globus Compute Executor """
-        pass
+        super().start()
     def submit(self, func: Callable, resource_specification: Dict[str, Any], *args: Any, **kwargs: Any) -> Future:
         """ Submit func to globus-compute

parsl/executors/high_throughput/executor.py CHANGED Viewed

@@ -29,6 +29,7 @@ from parsl.executors.high_throughput.manager_selector import (
 )
 from parsl.executors.status_handling import BlockProviderExecutor
 from parsl.jobs.states import TERMINAL_STATES, JobState, JobStatus
+from parsl.monitoring.radios.zmq_router import ZMQRadioReceiver, start_zmq_receiver
 from parsl.process_loggers import wrap_with_logs
 from parsl.providers import LocalProvider
 from parsl.providers.base import ExecutionProvider
@@ -334,6 +335,10 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
         self._result_queue_thread_exit = threading.Event()
         self._result_queue_thread: Optional[threading.Thread] = None
+        self.zmq_monitoring: Optional[ZMQRadioReceiver]
+        self.zmq_monitoring = None
+        self.hub_zmq_port = None
     radio_mode = "htex"
     enable_mpi_mode: bool = False
     mpi_launcher: str = "mpiexec"
@@ -407,6 +412,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
     def start(self):
         """Create the Interchange process and connect to it.
         """
+        super().start()
         if self.encrypted and self.cert_dir is None:
             logger.debug("Creating CurveZMQ certificates")
             self.cert_dir = curvezmq.create_certificates(self.logdir)
@@ -427,6 +433,15 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
             self.loopback_address, self.interchange_port_range, self.cert_dir
         )
+        if self.monitoring_messages is not None:
+            self.zmq_monitoring = start_zmq_receiver(monitoring_messages=self.monitoring_messages,
+                                                     loopback_address=self.loopback_address,
+                                                     port_range=self.interchange_port_range,
+                                                     logdir=self.logdir,
+                                                     worker_debug=self.worker_debug,
+                                                     )
+            self.hub_zmq_port = self.zmq_monitoring.port
         self._result_queue_thread = None
         self._start_result_queue_thread()
         self._start_local_interchange_process()
@@ -861,6 +876,9 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
         if self._result_queue_thread:
             self._result_queue_thread.join()
+        if self.zmq_monitoring:
+            self.zmq_monitoring.close()
         logger.info("Finished HighThroughputExecutor shutdown attempt")
     def get_usage_information(self):

parsl/executors/high_throughput/mpi_resource_management.py CHANGED Viewed

@@ -203,6 +203,8 @@ class MPITaskScheduler(TaskScheduler):
     def get_result(self, block: bool = True, timeout: Optional[float] = None):
         """Return result and relinquish provisioned nodes"""
         result_pkl = self.pending_result_q.get(block, timeout)
+        if result_pkl is None:
+            return None
         result_dict = pickle.loads(result_pkl)
         # TODO (wardlt): If the task did not request nodes, it won't be in `self._map_tasks_to_nodes`.
         #  Causes Parsl to hang. See Issue #3427

parsl/executors/high_throughput/process_worker_pool.py CHANGED Viewed

@@ -15,6 +15,7 @@ import threading
 import time
 import uuid
 from importlib.metadata import distributions
+from multiprocessing.context import SpawnProcess
 from multiprocessing.managers import DictProxy
 from multiprocessing.sharedctypes import Synchronized
 from typing import Dict, List, Optional, Sequence
@@ -403,52 +404,34 @@ class Manager:
         result_outgoing.connect(self._result_q_url)
         logger.info("Manager result pipe connected to interchange")
-        push_poll_period = max(10, self.poll_period) / 1000    # push_poll_period must be atleast 10 ms
-        logger.debug("push poll period: {}".format(push_poll_period))
-        last_beat = time.time()
-        last_result_beat = time.time()
-        items = []
         while not self._stop_event.is_set():
+            logger.debug("Starting pending_result_queue get")
             try:
-                logger.debug("Starting pending_result_queue get")
-                r = self.task_scheduler.get_result(block=True, timeout=push_poll_period)
-                logger.debug("Got a result item")
-                items.append(r)
-            except queue.Empty:
-                logger.debug("pending_result_queue get timeout without result item")
-            except Exception as e:
-                logger.exception("Got an exception: {}".format(e))
-            if time.time() > last_result_beat + self.heartbeat_period:
-                heartbeat_message = f"last_result_beat={last_result_beat} heartbeat_period={self.heartbeat_period} seconds"
-                logger.info(f"Sending heartbeat via results connection: {heartbeat_message}")
-                last_result_beat = time.time()
-                items.append(pickle.dumps({'type': 'heartbeat'}))
-            if len(items) >= self.max_queue_size or time.time() > last_beat + push_poll_period:
-                last_beat = time.time()
-                if items:
-                    logger.debug(f"Result send: Pushing {len(items)} items")
-                    result_outgoing.send_multipart(items)
-                    logger.debug("Result send: Pushed")
-                    items = []
-                else:
-                    logger.debug("Result send: No items to push")
-            else:
-                logger.debug(f"Result send: check condition not met - deferring {len(items)} result items")
+                r = self.task_scheduler.get_result()
+                if r is None:
+                    continue
+                logger.debug("Result received from worker: %s", id(r))
+                result_outgoing.send(r)
+                logger.debug("Result sent to interchange: %s", id(r))
+            except Exception:
+                logger.exception("Failed to send result to interchange")
         result_outgoing.close()
-        logger.info("Exiting")
+        logger.debug("Exiting")
     @wrap_with_logs
-    def worker_watchdog(self):
+    def heartbeater(self):
+        while not self._stop_event.wait(self.heartbeat_period):
+            heartbeat_message = f"heartbeat_period={self.heartbeat_period} seconds"
+            logger.info(f"Sending heartbeat via results connection: {heartbeat_message}")
+            self.pending_result_queue.put(pickle.dumps({'type': 'heartbeat'}))
+    def worker_watchdog(self, procs: dict[int, SpawnProcess]):
         """Keeps workers alive."""
         logger.debug("Starting worker watchdog")
         while not self._stop_event.wait(self.heartbeat_period):
-            for worker_id, p in self.procs.items():
+            for worker_id, p in procs.items():
                 if not p.is_alive():
                     logger.error("Worker {} has died".format(worker_id))
                     try:
@@ -466,11 +449,10 @@ class Manager:
                     except KeyError:
                         logger.info("Worker {} was not busy when it died".format(worker_id))
-                    p = self._start_worker(worker_id)
-                    self.procs[worker_id] = p
+                    procs[worker_id] = self._start_worker(worker_id)
                     logger.info("Worker {} has been restarted".format(worker_id))
-        logger.critical("Exiting")
+        logger.debug("Exiting")
     @wrap_with_logs
     def handle_monitoring_messages(self):
@@ -485,32 +467,28 @@ class Manager:
         """
         logger.debug("Starting monitoring handler thread")
-        poll_period_s = max(10, self.poll_period) / 1000    # Must be at least 10 ms
         while not self._stop_event.is_set():
             try:
                 logger.debug("Starting monitor_queue.get()")
-                msg = self.monitoring_queue.get(block=True, timeout=poll_period_s)
-            except queue.Empty:
-                logger.debug("monitoring_queue.get() has timed out")
-            except Exception as e:
-                logger.exception(f"Got an exception: {e}")
-            else:
+                msg = self.monitoring_queue.get(block=True)
+                if msg is None:
+                    continue
                 logger.debug("Got a monitoring message")
                 self.pending_result_queue.put(msg)
                 logger.debug("Put monitoring message on pending_result_queue")
+            except Exception:
+                logger.exception("Failed to forward monitoring message")
-        logger.critical("Exiting")
+        logger.debug("Exiting")
     def start(self):
         """ Start the worker processes.
         TODO: Move task receiving to a thread
         """
-        self.procs = {}
+        procs: dict[int, SpawnProcess] = {}
         for worker_id in range(self.worker_count):
-            p = self._start_worker(worker_id)
-            self.procs[worker_id] = p
+            procs[worker_id] = self._start_worker(worker_id)
         logger.debug("Workers started")
@@ -519,40 +497,69 @@ class Manager:
             target=self.push_results, name="Result-Pusher"
         )
         thr_worker_watchdog = threading.Thread(
-            target=self.worker_watchdog, name="worker-watchdog"
+            target=self.worker_watchdog, args=(procs,), name="worker-watchdog"
         )
         thr_monitoring_handler = threading.Thread(
             target=self.handle_monitoring_messages, name="Monitoring-Handler"
         )
+        thr_heartbeater = threading.Thread(target=self.heartbeater, name="Heartbeater")
         thr_task_puller.start()
         thr_result_pusher.start()
         thr_worker_watchdog.start()
         thr_monitoring_handler.start()
+        thr_heartbeater.start()
         logger.info("Manager threads started")
         # This might need a multiprocessing event to signal back.
         self._stop_event.wait()
-        logger.critical("Received kill event, terminating worker processes")
+        logger.info("Stop event set; terminating worker processes")
+        # Invite blocking threads to quit
+        self.monitoring_queue.put(None)
+        self.pending_result_queue.put(None)
+        thr_heartbeater.join()
         thr_task_puller.join()
         thr_result_pusher.join()
         thr_worker_watchdog.join()
         thr_monitoring_handler.join()
-        for proc_id in self.procs:
-            self.procs[proc_id].terminate()
-            logger.critical("Terminating worker {}: is_alive()={}".format(self.procs[proc_id],
-                                                                          self.procs[proc_id].is_alive()))
-            self.procs[proc_id].join()
-            logger.debug("Worker {} joined successfully".format(self.procs[proc_id]))
+        for worker_id in procs:
+            p = procs[worker_id]
+            proc_info = f"(PID: {p.pid}, Worker ID: {worker_id})"
+            logger.debug(f"Signaling worker {p.name} (TERM). {proc_info}")
+            p.terminate()
         self.zmq_context.term()
+        # give processes 1 second to gracefully shut themselves down, based on the
+        # SIGTERM (.terminate()) just sent; after then, we pull the plug.
+        force_child_shutdown_at = time.monotonic() + 1
+        while procs:
+            worker_id, p = procs.popitem()
+            timeout = max(force_child_shutdown_at - time.monotonic(), 0.000001)
+            p.join(timeout=timeout)
+            proc_info = f"(PID: {p.pid}, Worker ID: {worker_id})"
+            if p.exitcode is not None:
+                logger.debug(
+                    "Worker joined successfully.  %s (exitcode: %s)", proc_info, p.exitcode
+                )
+            else:
+                logger.warning(
+                    f"Worker {p.name} ({worker_id}) failed to terminate in a timely"
+                    f" manner; sending KILL signal to process. {proc_info}"
+                )
+                p.kill()
+                p.join()
+            p.close()
         delta = time.time() - self._start_time
         logger.info("process_worker_pool ran for {} seconds".format(delta))
-        return
-    def _start_worker(self, worker_id: int):
+    def _start_worker(self, worker_id: int) -> SpawnProcess:
         p = SpawnContext.Process(
             target=worker,
             args=(
@@ -939,27 +946,27 @@ if __name__ == "__main__":
     )
     logger.info(
         f"\n  Python version: {sys.version}"
-        f"  Debug logging: {args.debug}"
-        f"  Certificates dir: {args.cert_dir}"
-        f"  Log dir: {args.logdir}"
-        f"  Manager ID: {args.uid}"
-        f"  Block ID: {args.block_id}"
-        f"  cores_per_worker: {args.cores_per_worker}"
-        f"  mem_per_worker: {args.mem_per_worker}"
-        f"  task_port: {args.task_port}"
-        f"  result_port: {args.result_port}"
-        f"  addresses: {args.addresses}"
-        f"  max_workers_per_node: {args.max_workers_per_node}"
-        f"  poll_period: {args.poll}"
-        f"  address_probe_timeout: {args.address_probe_timeout}"
-        f"  Prefetch capacity: {args.prefetch_capacity}"
-        f"  Heartbeat threshold: {args.hb_threshold}"
-        f"  Heartbeat period: {args.hb_period}"
-        f"  Drain period: {args.drain_period}"
-        f"  CPU affinity: {args.cpu_affinity}"
-        f"  Accelerators: {' '.join(args.available_accelerators)}"
-        f"  enable_mpi_mode: {args.enable_mpi_mode}"
-        f"  mpi_launcher: {args.mpi_launcher}"
+        f"\n  Debug logging: {args.debug}"
+        f"\n  Certificates dir: {args.cert_dir}"
+        f"\n  Log dir: {args.logdir}"
+        f"\n  Manager ID: {args.uid}"
+        f"\n  Block ID: {args.block_id}"
+        f"\n  cores_per_worker: {args.cores_per_worker}"
+        f"\n  mem_per_worker: {args.mem_per_worker}"
+        f"\n  task_port: {args.task_port}"
+        f"\n  result_port: {args.result_port}"
+        f"\n  addresses: {args.addresses}"
+        f"\n  max_workers_per_node: {args.max_workers_per_node}"
+        f"\n  poll_period: {args.poll}"
+        f"\n  address_probe_timeout: {args.address_probe_timeout}"
+        f"\n  Prefetch capacity: {args.prefetch_capacity}"
+        f"\n  Heartbeat threshold: {args.hb_threshold}"
+        f"\n  Heartbeat period: {args.hb_period}"
+        f"\n  Drain period: {args.drain_period}"
+        f"\n  CPU affinity: {args.cpu_affinity}"
+        f"\n  Accelerators: {' '.join(args.available_accelerators)}"
+        f"\n  enable_mpi_mode: {args.enable_mpi_mode}"
+        f"\n  mpi_launcher: {args.mpi_launcher}"
     )
     try:
         manager = Manager(task_port=args.task_port,

parsl/executors/radical/executor.py CHANGED Viewed

@@ -215,6 +215,7 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
         """Create the Pilot component and pass it.
         """
         logger.info("starting RadicalPilotExecutor")
+        super().start()
         logger.info('Parsl: {0}'.format(parsl.__version__))
         logger.info('RADICAL pilot: {0}'.format(rp.version))
         self.session = rp.Session(cfg={'base': self.run_dir},

parsl/executors/status_handling.py CHANGED Viewed

@@ -14,6 +14,7 @@ from parsl.executors.errors import BadStateException, ScalingFailed
 from parsl.jobs.error_handlers import noop_error_handler, simple_error_handler
 from parsl.jobs.states import TERMINAL_STATES, JobState, JobStatus
 from parsl.monitoring.message_type import MessageType
+from parsl.monitoring.radios.multiprocessing import MultiprocessingQueueRadioSender
 from parsl.providers.base import ExecutionProvider
 from parsl.utils import AtomicIDCounter
@@ -83,6 +84,13 @@ class BlockProviderExecutor(ParslExecutor):
         # of pending, active and recently terminated blocks
         self._status = {}  # type: Dict[str, JobStatus]
+        self.submit_monitoring_radio: Optional[MultiprocessingQueueRadioSender] = None
+    def start(self):
+        super().start()
+        if self.monitoring_messages:
+            self.submit_monitoring_radio = MultiprocessingQueueRadioSender(self.monitoring_messages)
     def _make_status_dict(self, block_ids: List[str], status_list: List[JobStatus]) -> Dict[str, JobStatus]:
         """Given a list of block ids and a list of corresponding status strings,
         returns a dictionary mapping each block id to the corresponding status

parsl/executors/taskvine/executor.py CHANGED Viewed

@@ -239,6 +239,7 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
         retrieve Parsl tasks within the TaskVine system.
         """
+        super().start()
         # Synchronize connection and communication settings between the manager and factory
         self.__synchronize_manager_factory_comm_settings()

parsl/executors/workqueue/executor.py CHANGED Viewed

@@ -314,6 +314,7 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
         """Create submit process and collector thread to create, send, and
         retrieve Parsl tasks within the Work Queue system.
         """
+        super().start()
         self.tasks_lock = threading.Lock()
         # Create directories for data and results

parsl/monitoring/db_manager.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import datetime
 import logging
 import multiprocessing.queues as mpq
+import multiprocessing.synchronize as mpe
 import os
 import queue
 import threading
@@ -278,11 +279,13 @@ class Database:
 class DatabaseManager:
     def __init__(self,
+                 *,
                  db_url: str = 'sqlite:///runinfo/monitoring.db',
                  run_dir: str = '.',
                  logging_level: int = logging.INFO,
                  batching_interval: float = 1,
                  batching_threshold: float = 99999,
+                 exit_event: mpe.Event
                  ):
         self.workflow_end = False
@@ -307,6 +310,8 @@ class DatabaseManager:
         self.pending_block_queue: queue.Queue[MonitoringMessage] = queue.Queue()
         self.pending_resource_queue: queue.Queue[MonitoringMessage] = queue.Queue()
+        self.external_exit_event = exit_event
     def start(self,
               resource_queue: mpq.Queue) -> None:
@@ -555,15 +560,16 @@ class DatabaseManager:
         while not kill_event.is_set() or logs_queue.qsize() != 0:
             logger.debug("Checking STOP conditions: kill event: %s, queue has entries: %s",
                          kill_event.is_set(), logs_queue.qsize() != 0)
+            if self.external_exit_event.is_set():
+                self.close()
             try:
                 x = logs_queue.get(timeout=0.1)
             except queue.Empty:
                 continue
             else:
-                if x == 'STOP':
-                    self.close()
-                else:
-                    self._dispatch_to_internal(x)
+                self._dispatch_to_internal(x)
     def _dispatch_to_internal(self, x: Tuple) -> None:
         assert isinstance(x, tuple)
@@ -678,11 +684,11 @@ class DatabaseManager:
 @wrap_with_logs(target="database_manager")
 @typeguard.typechecked
-def dbm_starter(exception_q: mpq.Queue,
-                resource_msgs: mpq.Queue,
+def dbm_starter(resource_msgs: mpq.Queue,
                 db_url: str,
                 run_dir: str,
-                logging_level: int) -> None:
+                logging_level: int,
+                exit_event: mpe.Event) -> None:
     """Start the database manager process
     The DFK should start this function. The args, kwargs match that of the monitoring config
@@ -693,16 +699,16 @@ def dbm_starter(exception_q: mpq.Queue,
     try:
         dbm = DatabaseManager(db_url=db_url,
                               run_dir=run_dir,
-                              logging_level=logging_level)
+                              logging_level=logging_level,
+                              exit_event=exit_event)
         logger.info("Starting dbm in dbm starter")
         dbm.start(resource_msgs)
     except KeyboardInterrupt:
         logger.exception("KeyboardInterrupt signal caught")
         dbm.close()
         raise
-    except Exception as e:
+    except Exception:
         logger.exception("dbm.start exception")
-        exception_q.put(("DBM", str(e)))
         dbm.close()
     logger.info("End of dbm_starter")

parsl/monitoring/errors.py CHANGED Viewed

@@ -4,3 +4,8 @@ from parsl.errors import ParslError
 class MonitoringHubStartError(ParslError):
     def __str__(self) -> str:
         return "Hub failed to start"
+class MonitoringRouterStartError(ParslError):
+    def __str__(self) -> str:
+        return "Monitoring router failed to start"

parsl 2025.3.10__py3-none-any.whl → 2025.3.24__py3-none-any.whl

parsl 2025.3.10py3-none-any.whl → 2025.3.24py3-none-any.whl