PyPI - parsl - Versions diffs - 2024.8.12__py3-none-any.whl → 2024.8.26__py3-none-any.whl - Mend

parsl 2024.8.12py3-none-any.whl → 2024.8.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

parsl/monitoring/db_manager.py CHANGED Viewed

@@ -1,11 +1,14 @@
 import datetime
 import logging
+import multiprocessing.queues as mpq
 import os
 import queue
 import threading
 import time
 from typing import Any, Dict, List, Optional, Set, Tuple, TypeVar, cast
+import typeguard
 from parsl.dataflow.states import States
 from parsl.errors import OptionalModuleMissing
 from parsl.log_utils import set_file_logger
@@ -305,39 +308,13 @@ class DatabaseManager:
         self.pending_resource_queue: queue.Queue[MonitoringMessage] = queue.Queue()
     def start(self,
-              priority_queue: "queue.Queue[TaggedMonitoringMessage]",
-              node_queue: "queue.Queue[MonitoringMessage]",
-              block_queue: "queue.Queue[MonitoringMessage]",
-              resource_queue: "queue.Queue[MonitoringMessage]") -> None:
+              resource_queue: mpq.Queue) -> None:
         self._kill_event = threading.Event()
-        self._priority_queue_pull_thread = threading.Thread(target=self._migrate_logs_to_internal,
-                                                            args=(
-                                                                priority_queue, 'priority', self._kill_event,),
-                                                            name="Monitoring-migrate-priority",
-                                                            daemon=True,
-                                                            )
-        self._priority_queue_pull_thread.start()
-        self._node_queue_pull_thread = threading.Thread(target=self._migrate_logs_to_internal,
-                                                        args=(
-                                                            node_queue, 'node', self._kill_event,),
-                                                        name="Monitoring-migrate-node",
-                                                        daemon=True,
-                                                        )
-        self._node_queue_pull_thread.start()
-        self._block_queue_pull_thread = threading.Thread(target=self._migrate_logs_to_internal,
-                                                         args=(
-                                                             block_queue, 'block', self._kill_event,),
-                                                         name="Monitoring-migrate-block",
-                                                         daemon=True,
-                                                         )
-        self._block_queue_pull_thread.start()
         self._resource_queue_pull_thread = threading.Thread(target=self._migrate_logs_to_internal,
                                                             args=(
-                                                                resource_queue, 'resource', self._kill_event,),
+                                                                resource_queue, self._kill_event,),
                                                             name="Monitoring-migrate-resource",
                                                             daemon=True,
                                                             )
@@ -369,20 +346,18 @@ class DatabaseManager:
         while (not self._kill_event.is_set() or
                self.pending_priority_queue.qsize() != 0 or self.pending_resource_queue.qsize() != 0 or
                self.pending_node_queue.qsize() != 0 or self.pending_block_queue.qsize() != 0 or
-               priority_queue.qsize() != 0 or resource_queue.qsize() != 0 or
-               node_queue.qsize() != 0 or block_queue.qsize() != 0):
+               resource_queue.qsize() != 0):
             """
             WORKFLOW_INFO and TASK_INFO messages (i.e. priority messages)
             """
             try:
-                logger.debug("""Checking STOP conditions: {}, {}, {}, {}, {}, {}, {}, {}, {}""".format(
+                logger.debug("""Checking STOP conditions: {}, {}, {}, {}, {}, {}""".format(
                                   self._kill_event.is_set(),
                                   self.pending_priority_queue.qsize() != 0, self.pending_resource_queue.qsize() != 0,
                                   self.pending_node_queue.qsize() != 0, self.pending_block_queue.qsize() != 0,
-                                  priority_queue.qsize() != 0, resource_queue.qsize() != 0,
-                                  node_queue.qsize() != 0, block_queue.qsize() != 0))
+                                  resource_queue.qsize() != 0))
                 # This is the list of resource messages which can be reprocessed as if they
                 # had just arrived because the corresponding first task message has been
@@ -574,43 +549,26 @@ class DatabaseManager:
             raise RuntimeError("An exception happened sometime during database processing and should have been logged in database_manager.log")
     @wrap_with_logs(target="database_manager")
-    def _migrate_logs_to_internal(self, logs_queue: queue.Queue, queue_tag: str, kill_event: threading.Event) -> None:
-        logger.info("Starting processing for queue {}".format(queue_tag))
+    def _migrate_logs_to_internal(self, logs_queue: queue.Queue, kill_event: threading.Event) -> None:
+        logger.info("Starting _migrate_logs_to_internal")
         while not kill_event.is_set() or logs_queue.qsize() != 0:
-            logger.debug("""Checking STOP conditions for {} threads: {}, {}"""
-                         .format(queue_tag, kill_event.is_set(), logs_queue.qsize() != 0))
+            logger.debug("Checking STOP conditions: kill event: %s, queue has entries: %s",
+                         kill_event.is_set(), logs_queue.qsize() != 0)
             try:
                 x, addr = logs_queue.get(timeout=0.1)
             except queue.Empty:
                 continue
             else:
-                if queue_tag == 'priority' and x == 'STOP':
+                if x == 'STOP':
                     self.close()
-                elif queue_tag == 'priority':  # implicitly not 'STOP'
-                    assert isinstance(x, tuple)
-                    assert len(x) == 2
-                    assert x[0] in [MessageType.WORKFLOW_INFO, MessageType.TASK_INFO], \
-                        "_migrate_logs_to_internal can only migrate WORKFLOW_,TASK_INFO message from priority queue, got x[0] == {}".format(x[0])
-                    self._dispatch_to_internal(x)
-                elif queue_tag == 'resource':
-                    assert isinstance(x, tuple), "_migrate_logs_to_internal was expecting a tuple, got {}".format(x)
-                    assert x[0] == MessageType.RESOURCE_INFO, (
-                        "_migrate_logs_to_internal can only migrate RESOURCE_INFO message from resource queue, "
-                        "got tag {}, message {}".format(x[0], x)
-                    )
-                    self._dispatch_to_internal(x)
-                elif queue_tag == 'node':
-                    assert len(x) == 2, "expected message tuple to have exactly two elements"
-                    assert x[0] == MessageType.NODE_INFO, "_migrate_logs_to_internal can only migrate NODE_INFO messages from node queue"
-                    self._dispatch_to_internal(x)
-                elif queue_tag == "block":
-                    self._dispatch_to_internal(x)
                 else:
-                    logger.error(f"Discarding because unknown queue tag '{queue_tag}', message: {x}")
+                    self._dispatch_to_internal(x)
     def _dispatch_to_internal(self, x: Tuple) -> None:
+        assert isinstance(x, tuple)
+        assert len(x) == 2, "expected message tuple to have exactly two elements"
         if x[0] in [MessageType.WORKFLOW_INFO, MessageType.TASK_INFO]:
             self.pending_priority_queue.put(cast(Any, x))
         elif x[0] == MessageType.RESOURCE_INFO:
@@ -719,11 +677,9 @@ class DatabaseManager:
 @wrap_with_logs(target="database_manager")
-def dbm_starter(exception_q: "queue.Queue[Tuple[str, str]]",
-                priority_msgs: "queue.Queue[TaggedMonitoringMessage]",
-                node_msgs: "queue.Queue[MonitoringMessage]",
-                block_msgs: "queue.Queue[MonitoringMessage]",
-                resource_msgs: "queue.Queue[MonitoringMessage]",
+@typeguard.typechecked
+def dbm_starter(exception_q: mpq.Queue,
+                resource_msgs: mpq.Queue,
                 db_url: str,
                 logdir: str,
                 logging_level: int) -> None:
@@ -739,7 +695,7 @@ def dbm_starter(exception_q: "queue.Queue[Tuple[str, str]]",
                               logdir=logdir,
                               logging_level=logging_level)
         logger.info("Starting dbm in dbm starter")
-        dbm.start(priority_msgs, node_msgs, block_msgs, resource_msgs)
+        dbm.start(resource_msgs)
     except KeyboardInterrupt:
         logger.exception("KeyboardInterrupt signal caught")
         dbm.close()

parsl/monitoring/monitoring.py CHANGED Viewed

@@ -7,7 +7,7 @@ import queue
 import time
 from multiprocessing import Event, Process
 from multiprocessing.queues import Queue
-from typing import TYPE_CHECKING, Any, Optional, Tuple, Union, cast
+from typing import TYPE_CHECKING, Any, Literal, Optional, Tuple, Union, cast
 import typeguard
@@ -138,25 +138,18 @@ class MonitoringHub(RepresentationMixin):
         self.exception_q: Queue[Tuple[str, str]]
         self.exception_q = SizedQueue(maxsize=10)
-        self.priority_msgs: Queue[Tuple[Any, int]]
-        self.priority_msgs = SizedQueue()
-        self.resource_msgs: Queue[AddressedMonitoringMessage]
+        self.resource_msgs: Queue[Union[AddressedMonitoringMessage, Tuple[Literal["STOP"], Literal[0]]]]
         self.resource_msgs = SizedQueue()
-        self.node_msgs: Queue[AddressedMonitoringMessage]
-        self.node_msgs = SizedQueue()
-        self.block_msgs: Queue[AddressedMonitoringMessage]
-        self.block_msgs = SizedQueue()
         self.router_exit_event: ms.Event
         self.router_exit_event = Event()
         self.router_proc = ForkProcess(target=router_starter,
-                                       args=(comm_q, self.exception_q, self.priority_msgs, self.node_msgs,
-                                             self.block_msgs, self.resource_msgs, self.router_exit_event),
-                                       kwargs={"hub_address": self.hub_address,
+                                       kwargs={"comm_q": comm_q,
+                                               "exception_q": self.exception_q,
+                                               "resource_msgs": self.resource_msgs,
+                                               "exit_event": self.router_exit_event,
+                                               "hub_address": self.hub_address,
                                                "udp_port": self.hub_port,
                                                "zmq_port_range": self.hub_port_range,
                                                "logdir": self.logdir,
@@ -168,7 +161,7 @@ class MonitoringHub(RepresentationMixin):
         self.router_proc.start()
         self.dbm_proc = ForkProcess(target=dbm_starter,
-                                    args=(self.exception_q, self.priority_msgs, self.node_msgs, self.block_msgs, self.resource_msgs,),
+                                    args=(self.exception_q, self.resource_msgs,),
                                     kwargs={"logdir": self.logdir,
                                             "logging_level": logging.DEBUG if self.monitoring_debug else logging.INFO,
                                             "db_url": self.logging_endpoint,
@@ -187,7 +180,7 @@ class MonitoringHub(RepresentationMixin):
         self.filesystem_proc.start()
         logger.info(f"Started filesystem radio receiver process {self.filesystem_proc.pid}")
-        self.radio = MultiprocessingQueueRadioSender(self.block_msgs)
+        self.radio = MultiprocessingQueueRadioSender(self.resource_msgs)
         try:
             comm_q_result = comm_q.get(block=True, timeout=120)
@@ -244,7 +237,7 @@ class MonitoringHub(RepresentationMixin):
             logger.debug("Finished waiting for router termination")
             if len(exception_msgs) == 0:
                 logger.debug("Sending STOP to DBM")
-                self.priority_msgs.put(("STOP", 0))
+                self.resource_msgs.put(("STOP", 0))
             else:
                 logger.debug("Not sending STOP to DBM, because there were DBM exceptions")
             logger.debug("Waiting for DB termination")
@@ -262,14 +255,8 @@ class MonitoringHub(RepresentationMixin):
             logger.info("Closing monitoring multiprocessing queues")
             self.exception_q.close()
             self.exception_q.join_thread()
-            self.priority_msgs.close()
-            self.priority_msgs.join_thread()
             self.resource_msgs.close()
             self.resource_msgs.join_thread()
-            self.node_msgs.close()
-            self.node_msgs.join_thread()
-            self.block_msgs.close()
-            self.block_msgs.join_thread()
             logger.info("Closed monitoring multiprocessing queues")

parsl/monitoring/router.py CHANGED Viewed

@@ -1,19 +1,19 @@
 from __future__ import annotations
 import logging
+import multiprocessing.queues as mpq
 import os
 import pickle
-import queue
 import socket
 import threading
 import time
 from multiprocessing.synchronize import Event
-from typing import Optional, Tuple, Union
+from typing import Optional, Tuple
+import typeguard
 import zmq
 from parsl.log_utils import set_file_logger
-from parsl.monitoring.message_type import MessageType
 from parsl.monitoring.types import AddressedMonitoringMessage, TaggedMonitoringMessage
 from parsl.process_loggers import wrap_with_logs
 from parsl.utils import setproctitle
@@ -33,10 +33,7 @@ class MonitoringRouter:
                  logdir: str = ".",
                  logging_level: int = logging.INFO,
                  atexit_timeout: int = 3,   # in seconds
-                 priority_msgs: "queue.Queue[AddressedMonitoringMessage]",
-                 node_msgs: "queue.Queue[AddressedMonitoringMessage]",
-                 block_msgs: "queue.Queue[AddressedMonitoringMessage]",
-                 resource_msgs: "queue.Queue[AddressedMonitoringMessage]",
+                 resource_msgs: mpq.Queue,
                  exit_event: Event,
                  ):
         """ Initializes a monitoring configuration class.
@@ -56,8 +53,8 @@ class MonitoringRouter:
              Logging level as defined in the logging module. Default: logging.INFO
         atexit_timeout : float, optional
             The amount of time in seconds to terminate the hub without receiving any messages, after the last dfk workflow message is received.
-        *_msgs : Queue
-            Four multiprocessing queues to receive messages, routed by type tag, and sometimes modified according to type tag.
+        resource_msgs : multiprocessing.Queue
+            A multiprocessing queue to receive messages to be routed onwards to the database process
         exit_event : Event
             An event that the main Parsl process will set to signal that the monitoring router should shut down.
@@ -101,9 +98,6 @@ class MonitoringRouter:
                                                                                min_port=zmq_port_range[0],
                                                                                max_port=zmq_port_range[1])
-        self.priority_msgs = priority_msgs
-        self.node_msgs = node_msgs
-        self.block_msgs = block_msgs
         self.resource_msgs = resource_msgs
         self.exit_event = exit_event
@@ -169,24 +163,7 @@ class MonitoringRouter:
                         msg_0: AddressedMonitoringMessage
                         msg_0 = (msg, 0)
-                        if msg[0] == MessageType.NODE_INFO:
-                            self.node_msgs.put(msg_0)
-                        elif msg[0] == MessageType.RESOURCE_INFO:
-                            self.resource_msgs.put(msg_0)
-                        elif msg[0] == MessageType.BLOCK_INFO:
-                            self.block_msgs.put(msg_0)
-                        elif msg[0] == MessageType.TASK_INFO:
-                            self.priority_msgs.put(msg_0)
-                        elif msg[0] == MessageType.WORKFLOW_INFO:
-                            self.priority_msgs.put(msg_0)
-                        else:
-                            # There is a type: ignore here because if msg[0]
-                            # is of the correct type, this code is unreachable,
-                            # but there is no verification that the message
-                            # received from zmq_receiver_channel.recv_pyobj() is actually
-                            # of that type.
-                            self.logger.error("Discarding message "  # type: ignore[unreachable]
-                                              f"from interchange with unknown type {msg[0].value}")
+                        self.resource_msgs.put(msg_0)
                 except zmq.Again:
                     pass
                 except Exception:
@@ -202,12 +179,11 @@ class MonitoringRouter:
 @wrap_with_logs
-def router_starter(comm_q: "queue.Queue[Union[Tuple[int, int], str]]",
-                   exception_q: "queue.Queue[Tuple[str, str]]",
-                   priority_msgs: "queue.Queue[AddressedMonitoringMessage]",
-                   node_msgs: "queue.Queue[AddressedMonitoringMessage]",
-                   block_msgs: "queue.Queue[AddressedMonitoringMessage]",
-                   resource_msgs: "queue.Queue[AddressedMonitoringMessage]",
+@typeguard.typechecked
+def router_starter(*,
+                   comm_q: mpq.Queue,
+                   exception_q: mpq.Queue,
+                   resource_msgs: mpq.Queue,
                    exit_event: Event,
                    hub_address: str,
@@ -223,9 +199,6 @@ def router_starter(comm_q: "queue.Queue[Union[Tuple[int, int], str]]",
                                   zmq_port_range=zmq_port_range,
                                   logdir=logdir,
                                   logging_level=logging_level,
-                                  priority_msgs=priority_msgs,
-                                  node_msgs=node_msgs,
-                                  block_msgs=block_msgs,
                                   resource_msgs=resource_msgs,
                                   exit_event=exit_event)
     except Exception as e:

parsl/providers/slurm/slurm.py CHANGED Viewed

@@ -20,7 +20,7 @@ from parsl.utils import RepresentationMixin, wtime_to_minutes
 logger = logging.getLogger(__name__)
 # From https://slurm.schedmd.com/sacct.html#SECTION_JOB-STATE-CODES
-translate_table = {
+sacct_translate_table = {
     'PENDING': JobState.PENDING,
     'RUNNING': JobState.RUNNING,
     'CANCELLED': JobState.CANCELLED,
@@ -37,6 +37,20 @@ translate_table = {
     'REQUEUED': JobState.PENDING
 }
+squeue_translate_table = {
+    'PD': JobState.PENDING,
+    'R': JobState.RUNNING,
+    'CA': JobState.CANCELLED,
+    'CF': JobState.PENDING,  # (configuring),
+    'CG': JobState.RUNNING,  # (completing),
+    'CD': JobState.COMPLETED,
+    'F': JobState.FAILED,  # (failed),
+    'TO': JobState.TIMEOUT,  # (timeout),
+    'NF': JobState.FAILED,  # (node failure),
+    'RV': JobState.FAILED,  # (revoked) and
+    'SE': JobState.FAILED   # (special exit state)
+}
 class SlurmProvider(ClusterProvider, RepresentationMixin):
     """Slurm Execution Provider
@@ -155,6 +169,23 @@ class SlurmProvider(ClusterProvider, RepresentationMixin):
         self.regex_job_id = regex_job_id
         self.worker_init = worker_init + '\n'
+        # Check if sacct works and if not fall back to squeue
+        cmd = "sacct -X"
+        logger.debug("Executing %s", cmd)
+        retcode, stdout, stderr = self.execute_wait(cmd)
+        # If sacct fails it should return retcode=1 stderr="Slurm accounting storage is disabled"
+        logger.debug(f"sacct returned retcode={retcode} stderr={stderr}")
+        if retcode == 0:
+            logger.debug("using sacct to get job status")
+            # Using state%20 to get enough characters to not truncate output
+            # of the state. Without output can look like "<job_id>     CANCELLED+"
+            self._cmd = "sacct -X --noheader --format=jobid,state%20 --job '{0}'"
+            self._translate_table = sacct_translate_table
+        else:
+            logger.debug(f"sacct failed with retcode={retcode}")
+            logger.debug("falling back to using squeue to get job status")
+            self._cmd = "squeue --noheader --format='%i %t' --job '{0}'"
+            self._translate_table = squeue_translate_table
     def _status(self):
         '''Returns the status list for a list of job_ids
@@ -172,16 +203,14 @@ class SlurmProvider(ClusterProvider, RepresentationMixin):
             logger.debug('No active jobs, skipping status update')
             return
-        # Using state%20 to get enough characters to not truncate output
-        # of the state. Without output can look like "<job_id>     CANCELLED+"
-        cmd = "sacct -X --noheader --format=jobid,state%20 --job '{0}'".format(job_id_list)
+        cmd = self._cmd.format(job_id_list)
         logger.debug("Executing %s", cmd)
         retcode, stdout, stderr = self.execute_wait(cmd)
-        logger.debug("sacct returned %s %s", stdout, stderr)
+        logger.debug("sacct/squeue returned %s %s", stdout, stderr)
         # Execute_wait failed. Do no update
         if retcode != 0:
-            logger.warning("sacct failed with non-zero exit code {}".format(retcode))
+            logger.warning("sacct/squeue failed with non-zero exit code {}".format(retcode))
             return
         jobs_missing = set(self.resources.keys())
@@ -193,9 +222,9 @@ class SlurmProvider(ClusterProvider, RepresentationMixin):
             # For example "<job_id> CANCELLED by <user_id>"
             # This splits and ignores anything past the first two unpacked values
             job_id, slurm_state, *ignore = line.split()
-            if slurm_state not in translate_table:
+            if slurm_state not in self._translate_table:
                 logger.warning(f"Slurm status {slurm_state} is not recognized")
-            status = translate_table.get(slurm_state, JobState.UNKNOWN)
+            status = self._translate_table.get(slurm_state, JobState.UNKNOWN)
             logger.debug("Updating job {} with slurm status {} to parsl state {!s}".format(job_id, slurm_state, status))
             self.resources[job_id]['status'] = JobStatus(status,
                                                          stdout_path=self.resources[job_id]['job_stdout_path'],
@@ -203,9 +232,10 @@ class SlurmProvider(ClusterProvider, RepresentationMixin):
             jobs_missing.remove(job_id)
         # sacct can get job info after jobs have completed so this path shouldn't be hit
-        # log a warning if there are missing jobs for some reason
+        # squeue does not report on jobs that are not running. So we are filling in the
+        # blanks for missing jobs, we might lose some information about why the jobs failed.
         for missing_job in jobs_missing:
-            logger.warning("Updating missing job {} to completed status".format(missing_job))
+            logger.debug("Updating missing job {} to completed status".format(missing_job))
             self.resources[missing_job]['status'] = JobStatus(
                 JobState.COMPLETED, stdout_path=self.resources[missing_job]['job_stdout_path'],
                 stderr_path=self.resources[missing_job]['job_stderr_path'])

parsl/tests/test_htex/test_multiple_disconnected_blocks.py CHANGED Viewed

@@ -21,16 +21,14 @@ def local_config():
                 poll_period=100,
                 max_workers_per_node=1,
                 provider=LocalProvider(
-                    worker_init="conda deactivate; export PATH=''; which python; exit 0",
-                    init_blocks=2,
-                    max_blocks=4,
-                    min_blocks=0,
+                    worker_init="exit 0",
+                    init_blocks=2
                 ),
             )
         ],
         run_dir="/tmp/test_htex",
         max_idletime=0.5,
-        strategy='htex_auto_scale',
+        strategy='none',
     )

parsl/tests/test_htex/test_resource_spec_validation.py ADDED Viewed

@@ -0,0 +1,40 @@
+import queue
+from unittest import mock
+import pytest
+from parsl.executors import HighThroughputExecutor
+from parsl.executors.high_throughput.mpi_prefix_composer import (
+    InvalidResourceSpecification,
+)
+def double(x):
+    return x * 2
+@pytest.mark.local
+def test_submit_calls_validate():
+    htex = HighThroughputExecutor()
+    htex.outgoing_q = mock.Mock(spec=queue.Queue)
+    htex.validate_resource_spec = mock.Mock(spec=htex.validate_resource_spec)
+    res_spec = {}
+    htex.submit(double, res_spec, (5,), {})
+    htex.validate_resource_spec.assert_called()
+@pytest.mark.local
+def test_resource_spec_validation():
+    htex = HighThroughputExecutor()
+    ret_val = htex.validate_resource_spec({})
+    assert ret_val is None
+@pytest.mark.local
+def test_resource_spec_validation_bad_keys():
+    htex = HighThroughputExecutor()
+    with pytest.raises(InvalidResourceSpecification):
+        htex.validate_resource_spec({"num_nodes": 2})

parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py CHANGED Viewed

@@ -78,6 +78,6 @@ def test_row_counts(tmpd_cwd, strategy):
         (c, ) = result.first()
         assert c == 1, "There should be a single pending status"
-        result = connection.execute(text("SELECT COUNT(*) FROM block WHERE block_id = 0 AND status = 'CANCELLED' AND run_id = :run_id"), binds)
+        result = connection.execute(text("SELECT COUNT(*) FROM block WHERE block_id = 0 AND status = 'SCALED_IN' AND run_id = :run_id"), binds)
         (c, ) = result.first()
         assert c == 1, "There should be a single cancelled status"

parsl/tests/test_mpi_apps/test_bad_mpi_config.py CHANGED Viewed

@@ -1,33 +1,48 @@
 import pytest
 from parsl import Config
-from parsl.executors import HighThroughputExecutor
+from parsl.executors import MPIExecutor
 from parsl.launchers import AprunLauncher, SimpleLauncher, SrunLauncher
 from parsl.providers import SlurmProvider
 @pytest.mark.local
-def test_bad_launcher_with_mpi_mode():
-    """AssertionError if a launcher other than SimpleLauncher is supplied"""
+def test_bad_launcher():
+    """TypeError if a launcher other than SimpleLauncher is supplied"""
     for launcher in [SrunLauncher(), AprunLauncher()]:
-        with pytest.raises(AssertionError):
+        with pytest.raises(TypeError):
             Config(executors=[
-                HighThroughputExecutor(
-                    enable_mpi_mode=True,
+                MPIExecutor(
                     provider=SlurmProvider(launcher=launcher),
                 )
             ])
 @pytest.mark.local
-def test_correct_launcher_with_mpi_mode():
+def test_bad_mpi_launcher():
+    """ValueError if an unsupported mpi_launcher is specified"""
+    with pytest.raises(ValueError):
+        Config(executors=[
+            MPIExecutor(
+                mpi_launcher="bad_launcher",
+                provider=SlurmProvider(launcher=SimpleLauncher()),
+            )
+        ])
+@pytest.mark.local
+@pytest.mark.parametrize(
+    "mpi_launcher",
+    ["srun", "aprun", "mpiexec"]
+)
+def test_correct_launcher_with_mpi_mode(mpi_launcher: str):
     """Confirm that SimpleLauncher works with mpi_mode"""
-    config = Config(executors=[
-        HighThroughputExecutor(
-            enable_mpi_mode=True,
-            provider=SlurmProvider(launcher=SimpleLauncher()),
-        )
-    ])
-    assert isinstance(config.executors[0].provider.launcher, SimpleLauncher)
+    executor = MPIExecutor(
+        mpi_launcher=mpi_launcher,
+        provider=SlurmProvider(launcher=SimpleLauncher()),
+    )
+    assert isinstance(executor.provider.launcher, SimpleLauncher)

parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py CHANGED Viewed

@@ -6,26 +6,34 @@ from typing import Dict
 import pytest
 import parsl
-from parsl import bash_app, python_app
+from parsl import Config, bash_app, python_app
+from parsl.executors import MPIExecutor
 from parsl.executors.high_throughput.mpi_prefix_composer import (
     MissingResourceSpecification,
 )
-from parsl.tests.configs.htex_local import fresh_config
+from parsl.launchers import SimpleLauncher
+from parsl.providers import LocalProvider
 EXECUTOR_LABEL = "MPI_TEST"
 def local_setup():
-    config = fresh_config()
-    config.executors[0].label = EXECUTOR_LABEL
-    config.executors[0].max_workers_per_node = 2
-    config.executors[0].enable_mpi_mode = True
-    config.executors[0].mpi_launcher = "mpiexec"
     cwd = os.path.abspath(os.path.dirname(__file__))
     pbs_nodefile = os.path.join(cwd, "mocks", "pbs_nodefile")
-    config.executors[0].provider.worker_init = f"export PBS_NODEFILE={pbs_nodefile}"
+    config = Config(
+        executors=[
+            MPIExecutor(
+                label=EXECUTOR_LABEL,
+                max_workers_per_block=2,
+                mpi_launcher="mpiexec",
+                provider=LocalProvider(
+                    worker_init=f"export PBS_NODEFILE={pbs_nodefile}",
+                    launcher=SimpleLauncher()
+                )
+            )
+        ])
     parsl.load(config)

parsl/tests/test_mpi_apps/test_mpiex.py CHANGED Viewed

@@ -4,7 +4,6 @@ from pathlib import Path
 import pytest
-import parsl
 from parsl import Config, HighThroughputExecutor
 from parsl.executors.high_throughput.mpi_executor import MPIExecutor
 from parsl.launchers import SimpleLauncher
@@ -42,8 +41,8 @@ def test_docstring():
 def test_init():
     """Ensure all relevant kwargs are copied over from HTEx"""
-    new_kwargs = {'max_workers_per_block'}
-    excluded_kwargs = {'available_accelerators', 'enable_mpi_mode', 'cores_per_worker', 'max_workers_per_node',
+    new_kwargs = {'max_workers_per_block', 'mpi_launcher'}
+    excluded_kwargs = {'available_accelerators', 'cores_per_worker', 'max_workers_per_node',
                        'mem_per_worker', 'cpu_affinity', 'max_workers', 'manager_selector'}
     # Get the kwargs from both HTEx and MPIEx

parsl 2024.8.12__py3-none-any.whl → 2024.8.26__py3-none-any.whl

parsl 2024.8.12py3-none-any.whl → 2024.8.26py3-none-any.whl