PyPI - parsl - Versions diffs - 2024.7.22__py3-none-any.whl → 2024.8.5__py3-none-any.whl - Mend

parsl 2024.7.22py3-none-any.whl → 2024.8.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

parsl/dataflow/dflow.py +4 -10
parsl/executors/base.py +8 -8
parsl/executors/flux/executor.py +7 -7
parsl/executors/high_throughput/executor.py +55 -55
parsl/executors/high_throughput/interchange.py +37 -37
parsl/executors/high_throughput/manager_record.py +1 -0
parsl/executors/high_throughput/manager_selector.py +25 -0
parsl/executors/high_throughput/process_worker_pool.py +2 -0
parsl/executors/status_handling.py +52 -21
parsl/executors/taskvine/executor.py +0 -18
parsl/executors/workqueue/executor.py +0 -18
parsl/monitoring/errors.py +6 -0
parsl/monitoring/monitoring.py +6 -5
parsl/monitoring/radios.py +23 -7
parsl/monitoring/remote.py +12 -12
parsl/monitoring/router.py +71 -30
parsl/tests/test_htex/test_disconnected_blocks_failing_provider.py +71 -0
parsl/tests/test_htex/test_htex.py +28 -19
parsl/tests/test_htex/test_zmq_binding.py +2 -0
parsl/tests/test_monitoring/test_basic.py +14 -1
parsl/tests/test_monitoring/test_fuzz_zmq.py +2 -2
parsl/tests/test_mpi_apps/test_mpiex.py +1 -1
parsl/version.py +1 -1
{parsl-2024.7.22.data → parsl-2024.8.5.data}/scripts/interchange.py +37 -37
{parsl-2024.7.22.data → parsl-2024.8.5.data}/scripts/process_worker_pool.py +2 -0
parsl-2024.8.5.dist-info/METADATA +101 -0
{parsl-2024.7.22.dist-info → parsl-2024.8.5.dist-info}/RECORD +33 -30
{parsl-2024.7.22.dist-info → parsl-2024.8.5.dist-info}/WHEEL +1 -1
parsl-2024.7.22.dist-info/METADATA +0 -101
{parsl-2024.7.22.data → parsl-2024.8.5.data}/scripts/exec_parsl_function.py +0 -0
{parsl-2024.7.22.data → parsl-2024.8.5.data}/scripts/parsl_coprocess.py +0 -0
{parsl-2024.7.22.dist-info → parsl-2024.8.5.dist-info}/LICENSE +0 -0
{parsl-2024.7.22.dist-info → parsl-2024.8.5.dist-info}/entry_points.txt +0 -0
{parsl-2024.7.22.dist-info → parsl-2024.8.5.dist-info}/top_level.txt +0 -0

parsl/dataflow/dflow.py CHANGED Viewed

@@ -113,14 +113,10 @@ class DataFlowKernel:
         self.monitoring: Optional[MonitoringHub]
         self.monitoring = config.monitoring
-        # hub address and port for interchange to connect
-        self.hub_address = None  # type: Optional[str]
-        self.hub_zmq_port = None  # type: Optional[int]
         if self.monitoring:
             if self.monitoring.logdir is None:
                 self.monitoring.logdir = self.run_dir
-            self.hub_address = self.monitoring.hub_address
-            self.hub_zmq_port = self.monitoring.start(self.run_id, self.run_dir, self.config.run_dir)
+            self.monitoring.start(self.run_id, self.run_dir, self.config.run_dir)
         self.time_began = datetime.datetime.now()
         self.time_completed: Optional[datetime.datetime] = None
@@ -1181,10 +1177,10 @@ class DataFlowKernel:
         for executor in executors:
             executor.run_id = self.run_id
             executor.run_dir = self.run_dir
-            executor.hub_address = self.hub_address
-            executor.hub_zmq_port = self.hub_zmq_port
             if self.monitoring:
-                executor.monitoring_radio = self.monitoring.radio
+                executor.hub_address = self.monitoring.hub_address
+                executor.hub_zmq_port = self.monitoring.hub_zmq_port
+                executor.submit_monitoring_radio = self.monitoring.radio
             if hasattr(executor, 'provider'):
                 if hasattr(executor.provider, 'script_dir'):
                     executor.provider.script_dir = os.path.join(self.run_dir, 'submit_scripts')
@@ -1460,8 +1456,6 @@ class DataFlowKernel:
         Returns:
              - dict containing, hashed -> future mappings
         """
-        self.memo_lookup_table = None
         if checkpointDirs:
             return self._load_checkpoints(checkpointDirs)
         else:

parsl/executors/base.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Any, Callable, Dict, Optional
 from typing_extensions import Literal, Self
-from parsl.monitoring.radios import MonitoringRadio
+from parsl.monitoring.radios import MonitoringRadioSender
 class ParslExecutor(metaclass=ABCMeta):
@@ -52,13 +52,13 @@ class ParslExecutor(metaclass=ABCMeta):
         *,
         hub_address: Optional[str] = None,
         hub_zmq_port: Optional[int] = None,
-        monitoring_radio: Optional[MonitoringRadio] = None,
+        submit_monitoring_radio: Optional[MonitoringRadioSender] = None,
         run_dir: str = ".",
         run_id: Optional[str] = None,
     ):
         self.hub_address = hub_address
         self.hub_zmq_port = hub_zmq_port
-        self.monitoring_radio = monitoring_radio
+        self.submit_monitoring_radio = submit_monitoring_radio
         self.run_dir = os.path.abspath(run_dir)
         self.run_id = run_id
@@ -147,11 +147,11 @@ class ParslExecutor(metaclass=ABCMeta):
         self._hub_zmq_port = value
     @property
-    def monitoring_radio(self) -> Optional[MonitoringRadio]:
+    def submit_monitoring_radio(self) -> Optional[MonitoringRadioSender]:
         """Local radio for sending monitoring messages
         """
-        return self._monitoring_radio
+        return self._submit_monitoring_radio
-    @monitoring_radio.setter
-    def monitoring_radio(self, value: Optional[MonitoringRadio]) -> None:
-        self._monitoring_radio = value
+    @submit_monitoring_radio.setter
+    def submit_monitoring_radio(self, value: Optional[MonitoringRadioSender]) -> None:
+        self._submit_monitoring_radio = value

parsl/executors/flux/executor.py CHANGED Viewed

@@ -200,7 +200,6 @@ class FluxExecutor(ParslExecutor, RepresentationMixin):
                 raise EnvironmentError("Cannot find Flux installation in PATH")
         self.flux_path = os.path.abspath(flux_path)
         self._task_id_counter = itertools.count()
-        self._socket = zmq.Context().socket(zmq.REP)
         # Assumes a launch command cannot be None or empty
         self.launch_cmd = launch_cmd or self.DEFAULT_LAUNCH_CMD
         self._submission_queue: queue.Queue = queue.Queue()
@@ -213,7 +212,6 @@ class FluxExecutor(ParslExecutor, RepresentationMixin):
             args=(
                 self._submission_queue,
                 self._stop_event,
-                self._socket,
                 self.working_dir,
                 self.flux_executor_kwargs,
                 self.provider,
@@ -306,11 +304,13 @@ def _submit_wrapper(
     If an exception is thrown, error out all submitted tasks.
     """
-    try:
-        _submit_flux_jobs(submission_queue, stop_event, *args, **kwargs)
-    except Exception as exc:
-        _error_out_jobs(submission_queue, stop_event, exc)
-        raise
+    with zmq.Context() as ctx:
+        with ctx.socket(zmq.REP) as socket:
+            try:
+                _submit_flux_jobs(submission_queue, stop_event, socket, *args, **kwargs)
+            except Exception as exc:
+                _error_out_jobs(submission_queue, stop_event, exc)
+                raise
 def _error_out_jobs(

parsl/executors/high_throughput/executor.py CHANGED Viewed

@@ -20,6 +20,10 @@ from parsl.data_provider.staging import Staging
 from parsl.executors.errors import BadMessage, ScalingFailed
 from parsl.executors.high_throughput import zmq_pipes
 from parsl.executors.high_throughput.errors import CommandClientTimeoutError
+from parsl.executors.high_throughput.manager_selector import (
+    ManagerSelector,
+    RandomManagerSelector,
+)
 from parsl.executors.high_throughput.mpi_prefix_composer import (
     VALID_LAUNCHERS,
     validate_resource_spec,
@@ -56,7 +60,7 @@ DEFAULT_LAUNCH_CMD = ("process_worker_pool.py {debug} {max_workers_per_node} "
                       "--mpi-launcher={mpi_launcher} "
                       "--available-accelerators {accelerators}")
-DEFAULT_INTERCHANGE_LAUNCH_CMD = "interchange.py"
+DEFAULT_INTERCHANGE_LAUNCH_CMD = ["interchange.py"]
 GENERAL_HTEX_PARAM_DOCS = """provider : :class:`~parsl.providers.base.ExecutionProvider`
        Provider to access computation resources. Can be one of :class:`~parsl.providers.aws.aws.EC2Provider`,
@@ -78,9 +82,9 @@ GENERAL_HTEX_PARAM_DOCS = """provider : :class:`~parsl.providers.base.ExecutionP
         cores_per_worker, nodes_per_block, heartbeat_period ,heartbeat_threshold, logdir). For example:
         launch_cmd="process_worker_pool.py {debug} -c {cores_per_worker} --task_url={task_url} --result_url={result_url}"
-    interchange_launch_cmd : str
-        Custom command line string to launch the interchange process from the executor. If undefined,
-        the executor will use the default "interchange.py" command.
+    interchange_launch_cmd : Sequence[str]
+        Custom sequence of command line tokens to launch the interchange process from the executor. If
+        undefined, the executor will use the default "interchange.py" command.
     address : string
         An address to connect to the main Parsl process which is reachable from the network in which
@@ -238,7 +242,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
                  label: str = 'HighThroughputExecutor',
                  provider: ExecutionProvider = LocalProvider(),
                  launch_cmd: Optional[str] = None,
-                 interchange_launch_cmd: Optional[str] = None,
+                 interchange_launch_cmd: Optional[Sequence[str]] = None,
                  address: Optional[str] = None,
                  worker_ports: Optional[Tuple[int, int]] = None,
                  worker_port_range: Optional[Tuple[int, int]] = (54000, 55000),
@@ -261,6 +265,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
                  worker_logdir_root: Optional[str] = None,
                  enable_mpi_mode: bool = False,
                  mpi_launcher: str = "mpiexec",
+                 manager_selector: ManagerSelector = RandomManagerSelector(),
                  block_error_handler: Union[bool, Callable[[BlockProviderExecutor, Dict[str, JobStatus]], None]] = True,
                  encrypted: bool = False):
@@ -276,6 +281,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
         self.prefetch_capacity = prefetch_capacity
         self.address = address
         self.address_probe_timeout = address_probe_timeout
+        self.manager_selector = manager_selector
         if self.address:
             self.all_addresses = address
         else:
@@ -456,8 +462,6 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
                "task_id" : <task_id>
                "exception" : serialized exception object, on failure
             }
-        The `None` message is a die request.
         """
         logger.debug("Result queue worker starting")
@@ -475,58 +479,53 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
             else:
-                if msgs is None:
-                    logger.debug("Got None, exiting")
-                    return
+                for serialized_msg in msgs:
+                    try:
+                        msg = pickle.loads(serialized_msg)
+                    except pickle.UnpicklingError:
+                        raise BadMessage("Message received could not be unpickled")
-                else:
-                    for serialized_msg in msgs:
+                    if msg['type'] == 'heartbeat':
+                        continue
+                    elif msg['type'] == 'result':
                         try:
-                            msg = pickle.loads(serialized_msg)
-                        except pickle.UnpicklingError:
-                            raise BadMessage("Message received could not be unpickled")
+                            tid = msg['task_id']
+                        except Exception:
+                            raise BadMessage("Message received does not contain 'task_id' field")
+                        if tid == -1 and 'exception' in msg:
+                            logger.warning("Executor shutting down due to exception from interchange")
+                            exception = deserialize(msg['exception'])
+                            self.set_bad_state_and_fail_all(exception)
+                            break
+                        task_fut = self.tasks.pop(tid)
+                        if 'result' in msg:
+                            result = deserialize(msg['result'])
+                            task_fut.set_result(result)
-                        if msg['type'] == 'heartbeat':
-                            continue
-                        elif msg['type'] == 'result':
+                        elif 'exception' in msg:
                             try:
-                                tid = msg['task_id']
-                            except Exception:
-                                raise BadMessage("Message received does not contain 'task_id' field")
-                            if tid == -1 and 'exception' in msg:
-                                logger.warning("Executor shutting down due to exception from interchange")
-                                exception = deserialize(msg['exception'])
-                                self.set_bad_state_and_fail_all(exception)
-                                break
-                            task_fut = self.tasks.pop(tid)
-                            if 'result' in msg:
-                                result = deserialize(msg['result'])
-                                task_fut.set_result(result)
-                            elif 'exception' in msg:
-                                try:
-                                    s = deserialize(msg['exception'])
-                                    # s should be a RemoteExceptionWrapper... so we can reraise it
-                                    if isinstance(s, RemoteExceptionWrapper):
-                                        try:
-                                            s.reraise()
-                                        except Exception as e:
-                                            task_fut.set_exception(e)
-                                    elif isinstance(s, Exception):
-                                        task_fut.set_exception(s)
-                                    else:
-                                        raise ValueError("Unknown exception-like type received: {}".format(type(s)))
-                                except Exception as e:
-                                    # TODO could be a proper wrapped exception?
-                                    task_fut.set_exception(
-                                        DeserializationError("Received exception, but handling also threw an exception: {}".format(e)))
-                            else:
-                                raise BadMessage("Message received is neither result or exception")
+                                s = deserialize(msg['exception'])
+                                # s should be a RemoteExceptionWrapper... so we can reraise it
+                                if isinstance(s, RemoteExceptionWrapper):
+                                    try:
+                                        s.reraise()
+                                    except Exception as e:
+                                        task_fut.set_exception(e)
+                                elif isinstance(s, Exception):
+                                    task_fut.set_exception(s)
+                                else:
+                                    raise ValueError("Unknown exception-like type received: {}".format(type(s)))
+                            except Exception as e:
+                                # TODO could be a proper wrapped exception?
+                                task_fut.set_exception(
+                                    DeserializationError("Received exception, but handling also threw an exception: {}".format(e)))
                         else:
-                            raise BadMessage("Message received with unknown type {}".format(msg['type']))
+                            raise BadMessage("Message received is neither result or exception")
+                    else:
+                        raise BadMessage("Message received with unknown type {}".format(msg['type']))
         logger.info("Result queue worker finished")
@@ -551,11 +550,12 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
                               "poll_period": self.poll_period,
                               "logging_level": logging.DEBUG if self.worker_debug else logging.INFO,
                               "cert_dir": self.cert_dir,
+                              "manager_selector": self.manager_selector,
                               }
         config_pickle = pickle.dumps(interchange_config)
-        self.interchange_proc = subprocess.Popen(self.interchange_launch_cmd.encode("utf-8"), stdin=subprocess.PIPE)
+        self.interchange_proc = subprocess.Popen(self.interchange_launch_cmd, stdin=subprocess.PIPE)
         stdin = self.interchange_proc.stdin
         assert stdin is not None, "Popen should have created an IO object (vs default None) because of PIPE mode"

parsl/executors/high_throughput/interchange.py CHANGED Viewed

@@ -6,7 +6,6 @@ import os
 import pickle
 import platform
 import queue
-import random
 import signal
 import sys
 import threading
@@ -19,7 +18,9 @@ from parsl import curvezmq
 from parsl.app.errors import RemoteExceptionWrapper
 from parsl.executors.high_throughput.errors import ManagerLost, VersionMismatch
 from parsl.executors.high_throughput.manager_record import ManagerRecord
+from parsl.executors.high_throughput.manager_selector import ManagerSelector
 from parsl.monitoring.message_type import MessageType
+from parsl.monitoring.radios import MonitoringRadioSender, ZMQRadioSender
 from parsl.process_loggers import wrap_with_logs
 from parsl.serialize import serialize as serialize_object
 from parsl.utils import setproctitle
@@ -53,6 +54,7 @@ class Interchange:
                  logging_level: int,
                  poll_period: int,
                  cert_dir: Optional[str],
+                 manager_selector: ManagerSelector,
                  ) -> None:
         """
         Parameters
@@ -160,6 +162,8 @@ class Interchange:
         self.heartbeat_threshold = heartbeat_threshold
+        self.manager_selector = manager_selector
         self.current_platform = {'parsl_v': PARSL_VERSION,
                                  'python_v': "{}.{}.{}".format(sys.version_info.major,
                                                                sys.version_info.minor,
@@ -216,27 +220,15 @@ class Interchange:
             task_counter += 1
             logger.debug(f"Fetched {task_counter} tasks so far")
-    def _create_monitoring_channel(self) -> Optional[zmq.Socket]:
-        if self.hub_address and self.hub_zmq_port:
-            logger.info("Connecting to MonitoringHub")
-            # This is a one-off because monitoring is unencrypted
-            hub_channel = zmq.Context().socket(zmq.DEALER)
-            hub_channel.set_hwm(0)
-            hub_channel.connect("tcp://{}:{}".format(self.hub_address, self.hub_zmq_port))
-            logger.info("Connected to MonitoringHub")
-            return hub_channel
-        else:
-            return None
-    def _send_monitoring_info(self, hub_channel: Optional[zmq.Socket], manager: ManagerRecord) -> None:
-        if hub_channel:
+    def _send_monitoring_info(self, monitoring_radio: Optional[MonitoringRadioSender], manager: ManagerRecord) -> None:
+        if monitoring_radio:
             logger.info("Sending message {} to MonitoringHub".format(manager))
             d: Dict = cast(Dict, manager.copy())
             d['timestamp'] = datetime.datetime.now()
             d['last_heartbeat'] = datetime.datetime.fromtimestamp(d['last_heartbeat'])
-            hub_channel.send_pyobj((MessageType.NODE_INFO, d))
+            monitoring_radio.send((MessageType.NODE_INFO, d))
     @wrap_with_logs(target="interchange")
     def _command_server(self) -> NoReturn:
@@ -244,8 +236,11 @@ class Interchange:
         """
         logger.debug("Command Server Starting")
-        # Need to create a new ZMQ socket for command server thread
-        hub_channel = self._create_monitoring_channel()
+        if self.hub_address is not None and self.hub_zmq_port is not None:
+            logger.debug("Creating monitoring radio to %s:%s", self.hub_address, self.hub_zmq_port)
+            monitoring_radio = ZMQRadioSender(self.hub_address, self.hub_zmq_port)
+        else:
+            monitoring_radio = None
         reply: Any  # the type of reply depends on the command_req received (aka this needs dependent types...)
@@ -295,7 +290,7 @@ class Interchange:
                     if manager_id in self._ready_managers:
                         m = self._ready_managers[manager_id]
                         m['active'] = False
-                        self._send_monitoring_info(hub_channel, m)
+                        self._send_monitoring_info(monitoring_radio, m)
                     else:
                         logger.warning("Worker to hold was not in ready managers list")
@@ -330,9 +325,14 @@ class Interchange:
         # parent-process-inheritance problems.
         signal.signal(signal.SIGTERM, signal.SIG_DFL)
-        logger.info("Incoming ports bound")
+        logger.info("Starting main interchange method")
-        hub_channel = self._create_monitoring_channel()
+        if self.hub_address is not None and self.hub_zmq_port is not None:
+            logger.debug("Creating monitoring radio to %s:%s", self.hub_address, self.hub_zmq_port)
+            monitoring_radio = ZMQRadioSender(self.hub_address, self.hub_zmq_port)
+            logger.debug("Created monitoring radio")
+        else:
+            monitoring_radio = None
         poll_period = self.poll_period
@@ -363,10 +363,10 @@ class Interchange:
         while not kill_event.is_set():
             self.socks = dict(poller.poll(timeout=poll_period))
-            self.process_task_outgoing_incoming(interesting_managers, hub_channel, kill_event)
-            self.process_results_incoming(interesting_managers, hub_channel)
-            self.expire_bad_managers(interesting_managers, hub_channel)
-            self.expire_drained_managers(interesting_managers, hub_channel)
+            self.process_task_outgoing_incoming(interesting_managers, monitoring_radio, kill_event)
+            self.process_results_incoming(interesting_managers, monitoring_radio)
+            self.expire_bad_managers(interesting_managers, monitoring_radio)
+            self.expire_drained_managers(interesting_managers, monitoring_radio)
             self.process_tasks_to_send(interesting_managers)
         self.zmq_context.destroy()
@@ -377,7 +377,7 @@ class Interchange:
     def process_task_outgoing_incoming(
             self,
             interesting_managers: Set[bytes],
-            hub_channel: Optional[zmq.Socket],
+            monitoring_radio: Optional[MonitoringRadioSender],
             kill_event: threading.Event
     ) -> None:
         """Process one message from manager on the task_outgoing channel.
@@ -410,6 +410,7 @@ class Interchange:
                 self._ready_managers[manager_id] = {'last_heartbeat': time.time(),
                                                     'idle_since': time.time(),
                                                     'block_id': None,
+                                                    'start_time': msg['start_time'],
                                                     'max_capacity': 0,
                                                     'worker_count': 0,
                                                     'active': True,
@@ -430,7 +431,7 @@ class Interchange:
                 m.update(msg)  # type: ignore[typeddict-item]
                 logger.info("Registration info for manager {!r}: {}".format(manager_id, msg))
-                self._send_monitoring_info(hub_channel, m)
+                self._send_monitoring_info(monitoring_radio, m)
                 if (msg['python_v'].rsplit(".", 1)[0] != self.current_platform['python_v'].rsplit(".", 1)[0] or
                     msg['parsl_v'] != self.current_platform['parsl_v']):
@@ -461,7 +462,7 @@ class Interchange:
                 logger.error(f"Unexpected message type received from manager: {msg['type']}")
             logger.debug("leaving task_outgoing section")
-    def expire_drained_managers(self, interesting_managers: Set[bytes], hub_channel: Optional[zmq.Socket]) -> None:
+    def expire_drained_managers(self, interesting_managers: Set[bytes], monitoring_radio: Optional[MonitoringRadioSender]) -> None:
         for manager_id in list(interesting_managers):
             # is it always true that a draining manager will be in interesting managers?
@@ -474,7 +475,7 @@ class Interchange:
                 self._ready_managers.pop(manager_id)
                 m['active'] = False
-                self._send_monitoring_info(hub_channel, m)
+                self._send_monitoring_info(monitoring_radio, m)
     def process_tasks_to_send(self, interesting_managers: Set[bytes]) -> None:
         # Check if there are tasks that could be sent to managers
@@ -484,8 +485,7 @@ class Interchange:
             interesting=len(interesting_managers)))
         if interesting_managers and not self.pending_task_queue.empty():
-            shuffled_managers = list(interesting_managers)
-            random.shuffle(shuffled_managers)
+            shuffled_managers = self.manager_selector.sort_managers(self._ready_managers, interesting_managers)
             while shuffled_managers and not self.pending_task_queue.empty():  # cf. the if statement above...
                 manager_id = shuffled_managers.pop()
@@ -518,7 +518,7 @@ class Interchange:
         else:
             logger.debug("either no interesting managers or no tasks, so skipping manager pass")
-    def process_results_incoming(self, interesting_managers: Set[bytes], hub_channel: Optional[zmq.Socket]) -> None:
+    def process_results_incoming(self, interesting_managers: Set[bytes], monitoring_radio: Optional[MonitoringRadioSender]) -> None:
         # Receive any results and forward to client
         if self.results_incoming in self.socks and self.socks[self.results_incoming] == zmq.POLLIN:
             logger.debug("entering results_incoming section")
@@ -538,11 +538,11 @@ class Interchange:
                     elif r['type'] == 'monitoring':
                         # the monitoring code makes the assumption that no
                         # monitoring messages will be received if monitoring
-                        # is not configured, and that hub_channel will only
+                        # is not configured, and that monitoring_radio will only
                         # be None when monitoring is not configurated.
-                        assert hub_channel is not None
+                        assert monitoring_radio is not None
-                        hub_channel.send_pyobj(r['payload'])
+                        monitoring_radio.send(r['payload'])
                     elif r['type'] == 'heartbeat':
                         logger.debug(f"Manager {manager_id!r} sent heartbeat via results connection")
                         b_messages.append((p_message, r))
@@ -586,7 +586,7 @@ class Interchange:
                     interesting_managers.add(manager_id)
             logger.debug("leaving results_incoming section")
-    def expire_bad_managers(self, interesting_managers: Set[bytes], hub_channel: Optional[zmq.Socket]) -> None:
+    def expire_bad_managers(self, interesting_managers: Set[bytes], monitoring_radio: Optional[MonitoringRadioSender]) -> None:
         bad_managers = [(manager_id, m) for (manager_id, m) in self._ready_managers.items() if
                         time.time() - m['last_heartbeat'] > self.heartbeat_threshold]
         for (manager_id, m) in bad_managers:
@@ -594,7 +594,7 @@ class Interchange:
             logger.warning(f"Too many heartbeats missed for manager {manager_id!r} - removing manager")
             if m['active']:
                 m['active'] = False
-                self._send_monitoring_info(hub_channel, m)
+                self._send_monitoring_info(monitoring_radio, m)
             logger.warning(f"Cancelling htex tasks {m['tasks']} on removed manager")
             for tid in m['tasks']:

parsl/executors/high_throughput/manager_record.py CHANGED Viewed

@@ -6,6 +6,7 @@ from typing_extensions import TypedDict
 class ManagerRecord(TypedDict, total=False):
     block_id: Optional[str]
+    start_time: float
     tasks: List[Any]
     worker_count: int
     max_capacity: int

parsl/executors/high_throughput/manager_selector.py ADDED Viewed

@@ -0,0 +1,25 @@
+import random
+from abc import ABCMeta, abstractmethod
+from typing import Dict, List, Set
+from parsl.executors.high_throughput.manager_record import ManagerRecord
+class ManagerSelector(metaclass=ABCMeta):
+    @abstractmethod
+    def sort_managers(self, ready_managers: Dict[bytes, ManagerRecord], manager_list: Set[bytes]) -> List[bytes]:
+        """ Sort a given list of managers.
+        Any operations pertaining to the sorting and rearrangement of the
+        interesting_managers Set should be performed here.
+        """
+        pass
+class RandomManagerSelector(ManagerSelector):
+    def sort_managers(self, ready_managers: Dict[bytes, ManagerRecord], manager_list: Set[bytes]) -> List[bytes]:
+        c_manager_list = list(manager_list)
+        random.shuffle(c_manager_list)
+        return c_manager_list

parsl/executors/high_throughput/process_worker_pool.py CHANGED Viewed

@@ -184,6 +184,7 @@ class Manager:
         self.uid = uid
         self.block_id = block_id
+        self.start_time = time.time()
         self.enable_mpi_mode = enable_mpi_mode
         self.mpi_launcher = mpi_launcher
@@ -263,6 +264,7 @@ class Manager:
                'worker_count': self.worker_count,
                'uid': self.uid,
                'block_id': self.block_id,
+               'start_time': self.start_time,
                'prefetch_capacity': self.prefetch_capacity,
                'max_capacity': self.worker_count + self.prefetch_capacity,
                'os': platform.system(),

parsl 2024.7.22__py3-none-any.whl → 2024.8.5__py3-none-any.whl

parsl 2024.7.22py3-none-any.whl → 2024.8.5py3-none-any.whl