PyPI - parsl - Versions diffs - 2025.6.23__py3-none-any.whl → 2025.6.30__py3-none-any.whl - Mend

parsl 2025.6.23py3-none-any.whl → 2025.6.30py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

parsl/configs/osg.py +1 -1
parsl/dataflow/dflow.py +14 -4
parsl/executors/base.py +14 -6
parsl/executors/high_throughput/executor.py +20 -15
parsl/executors/high_throughput/interchange.py +173 -191
parsl/executors/high_throughput/mpi_executor.py +7 -4
parsl/executors/high_throughput/probe.py +4 -4
parsl/executors/high_throughput/process_worker_pool.py +88 -94
parsl/executors/taskvine/executor.py +9 -3
parsl/executors/taskvine/manager.py +3 -1
parsl/executors/threads.py +8 -1
parsl/executors/workqueue/executor.py +9 -3
parsl/monitoring/errors.py +5 -0
parsl/monitoring/monitoring.py +25 -42
parsl/monitoring/radios/base.py +63 -2
parsl/monitoring/radios/filesystem.py +18 -3
parsl/monitoring/radios/filesystem_router.py +13 -26
parsl/monitoring/radios/htex.py +22 -13
parsl/monitoring/radios/multiprocessing.py +22 -2
parsl/monitoring/radios/udp.py +57 -19
parsl/monitoring/radios/udp_router.py +49 -15
parsl/monitoring/remote.py +19 -40
parsl/providers/local/local.py +12 -13
parsl/tests/configs/htex_local_alternate.py +0 -1
parsl/tests/test_htex/test_interchange_exit_bad_registration.py +5 -7
parsl/tests/test_htex/test_zmq_binding.py +5 -6
parsl/tests/test_monitoring/test_basic.py +12 -10
parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +0 -1
parsl/tests/test_monitoring/test_radio_filesystem.py +7 -9
parsl/tests/test_monitoring/test_radio_multiprocessing.py +44 -0
parsl/tests/test_monitoring/test_radio_udp.py +163 -12
parsl/tests/test_monitoring/test_stdouterr.py +1 -3
parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +3 -7
parsl/version.py +1 -1
{parsl-2025.6.23.data → parsl-2025.6.30.data}/scripts/interchange.py +173 -191
{parsl-2025.6.23.data → parsl-2025.6.30.data}/scripts/process_worker_pool.py +88 -94
{parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/METADATA +2 -2
{parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/RECORD +44 -43
{parsl-2025.6.23.data → parsl-2025.6.30.data}/scripts/exec_parsl_function.py +0 -0
{parsl-2025.6.23.data → parsl-2025.6.30.data}/scripts/parsl_coprocess.py +0 -0
{parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/LICENSE +0 -0
{parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/WHEEL +0 -0
{parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/entry_points.txt +0 -0
{parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/top_level.txt +0 -0

parsl/providers/local/local.py CHANGED Viewed

@@ -114,17 +114,15 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
         return [self.resources[jid]['status'] for jid in job_ids]
-    def _is_alive(self, job_dict):
-        retcode, stdout, stderr = execute_wait(
-            'ps -p {} > /dev/null 2> /dev/null; echo "STATUS:$?" '.format(
-                job_dict['remote_pid']), self.cmd_timeout)
-        for line in stdout.split('\n'):
-            if line.startswith("STATUS:"):
-                status = line.split("STATUS:")[1].strip()
-                if status == "0":
-                    return True
-                else:
-                    return False
+    @staticmethod
+    def _is_alive(job_dict) -> bool:
+        try:
+            os.kill(job_dict['remote_pid'], 0)
+        except ProcessLookupError:
+            return False
+        except PermissionError:
+            pass  # exists; just no permissions to send signal
+        return True
     def _job_file_path(self, script_path: str, suffix: str) -> str:
         path = '{0}{1}'.format(script_path, suffix)
@@ -230,8 +228,9 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
                                   stdout, stderr)
         for line in stdout.split('\n'):
             if line.startswith("PID:"):
-                remote_pid = line.split("PID:")[1].strip()
-                job_id = remote_pid
+                job_id = line.split("PID:")[1].strip()
+                remote_pid = int(job_id)
+                break
         if job_id is None:
             raise SubmitException(job_name, "Channel failed to start remote command/retrieve PID")

parsl/tests/configs/htex_local_alternate.py CHANGED Viewed

@@ -59,7 +59,6 @@ def fresh_config():
         app_cache=True, checkpoint_mode='task_exit',
         retries=2,
         monitoring=MonitoringHub(
-                        hub_address="localhost",
                         monitoring_debug=False,
                         resource_monitoring_interval=1,
         ),

parsl/tests/test_htex/test_interchange_exit_bad_registration.py CHANGED Viewed

@@ -40,7 +40,7 @@ def test_exit_with_bad_registration(tmpd_cwd, try_assert):
                                            incoming_q.port,
                                            command_client.port),
                           "interchange_address": "127.0.0.1",
-                          "worker_ports": None,
+                          "worker_port": None,
                           "worker_port_range": (50000, 60000),
                           "hub_address": None,
                           "hub_zmq_port": None,
@@ -67,7 +67,7 @@ def test_exit_with_bad_registration(tmpd_cwd, try_assert):
     # responsive. if the interchange process didn't start enough to get the command
     # thread running, this will time out.
-    (task_port, result_port) = command_client.run("WORKER_PORTS", timeout_s=120)
+    worker_port = command_client.run("WORKER_BINDS", timeout_s=120)
     # now we'll assume that if the interchange command thread is responding,
     # then the worker polling code is also running and that the interchange has
@@ -80,7 +80,7 @@ def test_exit_with_bad_registration(tmpd_cwd, try_assert):
     msg = {'type': 'registration',
            'parsl_v': PARSL_VERSION,
-           'python_v': "{}.{}.{}".format(1, 1, 1),  # this is the bad bit
+           'python_v': "1.1.1",  # this is the bad bit
            'worker_count': 1,
            'uid': 'testuid',
            'block_id': 0,
@@ -104,11 +104,9 @@ def test_exit_with_bad_registration(tmpd_cwd, try_assert):
     task_channel.set_hwm(0)
     task_channel.setsockopt(zmq.SNDTIMEO, channel_timeout)
-    task_channel.connect(f"tcp://127.0.0.1:{task_port}")
+    task_channel.connect(f"tcp://127.0.0.1:{worker_port}")
-    b_msg = json.dumps(msg).encode('utf-8')
-    task_channel.send(b_msg)
+    task_channel.send(pickle.dumps(msg))
     # check that the interchange exits within some reasonable time
     try_assert(lambda: interchange_proc.poll() is not None, "Interchange did not exit after killing watched client process", timeout_ms=5000)

parsl/tests/test_htex/test_zmq_binding.py CHANGED Viewed

@@ -15,12 +15,12 @@ from parsl.executors.high_throughput.manager_selector import RandomManagerSelect
 def make_interchange(*,
                      interchange_address: Optional[str],
                      cert_dir: Optional[str],
-                     worker_ports: Optional[tuple[int, int]] = None) -> Interchange:
+                     worker_port: Optional[int] = None) -> Interchange:
     return Interchange(interchange_address=interchange_address,
                        cert_dir=cert_dir,
                        client_address="127.0.0.1",
                        client_ports=(50055, 50056, 50057),
-                       worker_ports=worker_ports,
+                       worker_port=worker_port,
                        worker_port_range=(54000, 55000),
                        hub_address=None,
                        hub_zmq_port=None,
@@ -56,7 +56,7 @@ def test_interchange_curvezmq_sockets(
     ix = make_interchange(interchange_address=address, cert_dir=cert_dir)
     assert isinstance(ix.zmq_context, curvezmq.ServerContext)
     assert ix.zmq_context.encrypted is encrypted
-    assert mock_socket.call_count == 5
+    assert mock_socket.call_count == 4
 @pytest.mark.local
@@ -100,11 +100,10 @@ def test_limited_interface_binding(cert_dir: Optional[str]):
     """When address is specified the worker_port would be bound to it rather than to 0.0.0.0"""
     address = "127.0.0.1"
     ix = make_interchange(interchange_address=address, cert_dir=cert_dir)
-    ix.worker_result_port
     proc = psutil.Process()
     conns = proc.connections(kind="tcp")
-    matched_conns = [conn for conn in conns if conn.laddr.port == ix.worker_result_port]
+    matched_conns = [conn for conn in conns if conn.laddr.port == ix.worker_port]
     assert len(matched_conns) == 1
     # laddr.ip can return ::ffff:127.0.0.1 when using IPv6
     assert address in matched_conns[0].laddr.ip
@@ -113,5 +112,5 @@ def test_limited_interface_binding(cert_dir: Optional[str]):
 @pytest.mark.local
 @pytest.mark.parametrize("encrypted", (True, False), indirect=True)
 def test_fixed_ports(cert_dir: Optional[str]):
-    ix = make_interchange(interchange_address=None, cert_dir=cert_dir, worker_ports=(51117, 51118))
+    ix = make_interchange(interchange_address=None, cert_dir=cert_dir, worker_port=51117)
     assert ix.interchange_address == "*"

parsl/tests/test_monitoring/test_basic.py CHANGED Viewed

@@ -8,6 +8,9 @@ from parsl import HighThroughputExecutor, ThreadPoolExecutor
 from parsl.config import Config
 from parsl.executors.status_handling import BlockProviderExecutor
 from parsl.monitoring import MonitoringHub
+from parsl.monitoring.radios.filesystem import FilesystemRadio
+from parsl.monitoring.radios.htex import HTEXRadio
+from parsl.monitoring.radios.udp import UDPRadio
 @parsl.python_app
@@ -25,9 +28,8 @@ def this_app():
 # a configuration that is suitably configured for monitoring.
 def thread_config():
-    c = Config(executors=[ThreadPoolExecutor()],
-               monitoring=MonitoringHub(hub_address="localhost",
-                                        resource_monitoring_interval=0))
+    c = Config(executors=[ThreadPoolExecutor(remote_monitoring_radio=UDPRadio(address="localhost", atexit_timeout=0))],
+               monitoring=MonitoringHub(resource_monitoring_interval=0))
     return c
@@ -42,9 +44,10 @@ def htex_udp_config():
     from parsl.tests.configs.htex_local_alternate import fresh_config
     c = fresh_config()
     assert len(c.executors) == 1
+    ex = c.executors[0]
-    assert c.executors[0].radio_mode == "htex", "precondition: htex has a radio mode attribute, configured for htex radio"
-    c.executors[0].radio_mode = "udp"
+    assert isinstance(ex.remote_monitoring_radio, HTEXRadio), "precondition: htex is configured for the HTEXRadio"
+    ex.remote_monitoring_radio = UDPRadio(address="localhost", atexit_timeout=0)
     return c
@@ -54,9 +57,10 @@ def htex_filesystem_config():
     from parsl.tests.configs.htex_local_alternate import fresh_config
     c = fresh_config()
     assert len(c.executors) == 1
+    ex = c.executors[0]
-    assert c.executors[0].radio_mode == "htex", "precondition: htex has a radio mode attribute, configured for htex radio"
-    c.executors[0].radio_mode = "filesystem"
+    assert isinstance(ex.remote_monitoring_radio, HTEXRadio), "precondition: htex is configured for the HTEXRadio"
+    ex.remote_monitoring_radio = FilesystemRadio()
     return c
@@ -65,7 +69,6 @@ def workqueue_config():
     from parsl.tests.configs.workqueue_ex import fresh_config
     c = fresh_config()
     c.monitoring = MonitoringHub(
-                        hub_address="localhost",
                         resource_monitoring_interval=1)
     return c
@@ -76,8 +79,7 @@ def taskvine_config():
                                            worker_launch_method='provider')],
                strategy_period=0.5,
-               monitoring=MonitoringHub(hub_address="localhost",
-                                        resource_monitoring_interval=1))
+               monitoring=MonitoringHub(resource_monitoring_interval=1))
     return c

parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py CHANGED Viewed

@@ -34,7 +34,6 @@ def fresh_config(run_dir, strategy, db_url):
         strategy=strategy,
         strategy_period=0.1,
         monitoring=MonitoringHub(
-                        hub_address="localhost",
                         logging_endpoint=db_url
         )
     )

parsl/tests/test_monitoring/test_radio_filesystem.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import pytest
 from parsl.monitoring.message_type import MessageType
-from parsl.monitoring.radios.filesystem import FilesystemRadioSender
-from parsl.monitoring.radios.filesystem_router import start_filesystem_receiver
+from parsl.monitoring.radios.filesystem import FilesystemRadio
 from parsl.multiprocessing import SpawnQueue
@@ -16,16 +15,15 @@ def test_filesystem(tmpd_cwd):
     resource_msgs = SpawnQueue()
+    radio_config = FilesystemRadio()
     # start receiver
-    receiver = start_filesystem_receiver(debug=True,
-                                         logdir=str(tmpd_cwd),
-                                         monitoring_messages=resource_msgs,
-                                         )
+    receiver = radio_config.create_receiver(run_dir=str(tmpd_cwd),
+                                            resource_msgs=resource_msgs)
     # make radio
-    radio_sender = FilesystemRadioSender(run_dir=str(tmpd_cwd),
-                                         monitoring_url="irrelevant:")
+    radio_sender = radio_config.create_sender()
     # send message into radio
@@ -41,7 +39,7 @@ def test_filesystem(tmpd_cwd):
     # shut down router
-    receiver.close()
+    receiver.shutdown()
     # we can't inspect the process if it has been closed properly, but
     # we can verify that it raises the expected ValueError the closed

parsl/tests/test_monitoring/test_radio_multiprocessing.py ADDED Viewed

@@ -0,0 +1,44 @@
+import pytest
+from parsl.monitoring.message_type import MessageType
+from parsl.monitoring.radios.multiprocessing import MultiprocessingQueueRadio
+from parsl.multiprocessing import SpawnQueue
+@pytest.mark.local
+def test_radio(tmpd_cwd):
+    """Test filesystem radio/receiver pair.
+    This test checks that the pair can be started up locally, that a message
+    is conveyed from radio to receiver, and that the receiver process goes
+    away at shutdown.
+    """
+    resource_msgs = SpawnQueue()
+    radio_config = MultiprocessingQueueRadio()
+    # start receiver
+    receiver = radio_config.create_receiver(run_dir=str(tmpd_cwd),
+                                            resource_msgs=resource_msgs)
+    # make radio
+    radio_sender = radio_config.create_sender()
+    # send message into radio
+    message = (MessageType.RESOURCE_INFO, {})
+    radio_sender.send(message)
+    # verify it comes out of the receiver
+    m = resource_msgs.get()
+    assert m == message, "The sent message should appear in the queue"
+    # Shut down router.
+    # In the multiprocessing radio, nothing happens at shutdown, so this
+    # validates that the call executes without raising an exception, but
+    # not much else.
+    receiver.shutdown()

parsl/tests/test_monitoring/test_radio_udp.py CHANGED Viewed

@@ -1,8 +1,10 @@
+import socket
+import time
 import pytest
 from parsl.monitoring.message_type import MessageType
-from parsl.monitoring.radios.udp import UDPRadioSender
-from parsl.monitoring.radios.udp_router import start_udp_receiver
+from parsl.monitoring.radios.udp import UDPRadio
 from parsl.multiprocessing import SpawnQueue
@@ -16,19 +18,19 @@ def test_udp(tmpd_cwd):
     resource_msgs = SpawnQueue()
+    radio_config = UDPRadio(address="localhost", atexit_timeout=0)
     # start receiver
-    udp_receiver = start_udp_receiver(debug=True,
-                                      logdir=str(tmpd_cwd),
-                                      monitoring_messages=resource_msgs,
-                                      port=None
-                                      )
+    udp_receiver = radio_config.create_receiver(run_dir=str(tmpd_cwd),
+                                                resource_msgs=resource_msgs)
-    # make radio
+    # check hash properties
-    # this comes from monitoring.py:
-    url = "udp://{}:{}".format("localhost", udp_receiver.port)
+    assert len(radio_config.hmac_key) == 64, "With default hash, should expect 64 byte key"
-    radio_sender = UDPRadioSender(url)
+    # make radio
+    radio_sender = radio_config.create_sender()
     # send message into radio
@@ -44,7 +46,156 @@ def test_udp(tmpd_cwd):
     # shut down router
-    udp_receiver.close()
+    udp_receiver.shutdown()
+    # we can't inspect the process if it has been closed properly, but
+    # we can verify that it raises the expected ValueError the closed
+    # processes raise on access.
+    with pytest.raises(ValueError):
+        udp_receiver.process.exitcode
+@pytest.mark.local
+def test_bad_hmac(tmpd_cwd, caplog):
+    """Test when HMAC does not match.
+    """
+    resource_msgs = SpawnQueue()
+    radio_config = UDPRadio(address="localhost", atexit_timeout=0)
+    # start receiver
+    udp_receiver = radio_config.create_receiver(run_dir=str(tmpd_cwd),
+                                                resource_msgs=resource_msgs)
+    # check the hmac is configured in the right place,
+    # then change it to something different (by prepending a new byte)
+    assert radio_config.hmac_key is not None
+    radio_config.hmac_key += b'x'
+    # make radio, after changing the HMAC key
+    radio_sender = radio_config.create_sender()
+    # send message into radio
+    message = (MessageType.RESOURCE_INFO, {})
+    radio_sender.send(message)
+    # We should expect no message from the UDP side. That's hard to
+    # state in this scenario because UDP doesn't have any delivery
+    # guarantees for the test-failing case.
+    # So sleep a while to allow that test to misdeliver and fail.
+    time.sleep(1)
+    assert resource_msgs.empty(), "receiving queue should be empty"
+    assert udp_receiver.process.is_alive(), "UDP router process should still be alive"
+    with open(f"{tmpd_cwd}/monitoring_udp_router.log", "r") as logfile:
+        assert "ERROR" in logfile.read(), "Router log file should contain an error"
+    # shut down router
+    udp_receiver.shutdown()
+    # we can't inspect the process if it has been closed properly, but
+    # we can verify that it raises the expected ValueError the closed
+    # processes raise on access.
+    with pytest.raises(ValueError):
+        udp_receiver.process.exitcode
+@pytest.mark.local
+def test_wrong_digest(tmpd_cwd, caplog):
+    """Test when HMAC does not match.
+    """
+    resource_msgs = SpawnQueue()
+    radio_config = UDPRadio(address="localhost", atexit_timeout=0)
+    # start receiver
+    udp_receiver = radio_config.create_receiver(run_dir=str(tmpd_cwd),
+                                                resource_msgs=resource_msgs)
+    # check the hmac is configured in the right place,
+    # then change it to a different digest. The choice of different
+    # digest is arbitrary.
+    assert radio_config.hmac_digest is not None
+    radio_config.hmac_digest = "sha3_224"
+    # make radio, after changing the HMAC digest
+    radio_sender = radio_config.create_sender()
+    # send message into radio
+    message = (MessageType.RESOURCE_INFO, {})
+    radio_sender.send(message)
+    # We should expect no message from the UDP side. That's hard to
+    # state in this scenario because UDP doesn't have any delivery
+    # guarantees for the test-failing case.
+    # So sleep a while to allow that test to misdeliver and fail.
+    time.sleep(1)
+    assert resource_msgs.empty(), "receiving queue should be empty"
+    assert udp_receiver.process.is_alive(), "UDP router process should still be alive"
+    with open(f"{tmpd_cwd}/monitoring_udp_router.log", "r") as logfile:
+        assert "ERROR" in logfile.read(), "Router log file should contain an error"
+    # shut down router
+    udp_receiver.shutdown()
+    # we can't inspect the process if it has been closed properly, but
+    # we can verify that it raises the expected ValueError the closed
+    # processes raise on access.
+    with pytest.raises(ValueError):
+        udp_receiver.process.exitcode
+@pytest.mark.local
+def test_short_message(tmpd_cwd, caplog):
+    """Test when UDP message is so short it can't even be parsed into
+    HMAC + the rest.
+    """
+    resource_msgs = SpawnQueue()
+    radio_config = UDPRadio(address="localhost", atexit_timeout=0)
+    # start receiver
+    udp_receiver = radio_config.create_receiver(run_dir=str(tmpd_cwd),
+                                                resource_msgs=resource_msgs)
+    # now send a bad UDP message, rather than using the sender mechanism.
+    sock = socket.socket(socket.AF_INET,
+                         socket.SOCK_DGRAM,
+                         socket.IPPROTO_UDP)
+    sock.sendto(b'', (radio_config.address, radio_config.port))
+    sock.close()
+    # We should expect no message from the UDP side. That's hard to
+    # state in this scenario because UDP doesn't have any delivery
+    # guarantees for the test-failing case.
+    # So sleep a while to allow that test to misdeliver and fail.
+    time.sleep(1)
+    assert resource_msgs.empty(), "receiving queue should be empty"
+    assert udp_receiver.process.is_alive(), "UDP router process should still be alive"
+    with open(f"{tmpd_cwd}/monitoring_udp_router.log", "r") as logfile:
+        assert "ERROR" in logfile.read(), "Router log file should contain an error"
+    # shut down router
+    udp_receiver.shutdown()
     # we can't inspect the process if it has been closed properly, but
     # we can verify that it raises the expected ValueError the closed

parsl/tests/test_monitoring/test_stdouterr.py CHANGED Viewed

@@ -35,9 +35,7 @@ def fresh_config(run_dir):
         ],
         strategy='simple',
         strategy_period=0.1,
-        monitoring=MonitoringHub(
-                        hub_address="localhost",
-        )
+        monitoring=MonitoringHub(),
     )

parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py CHANGED Viewed

@@ -1,7 +1,3 @@
-import os
-import signal
-import time
 import pytest
 import zmq
@@ -61,11 +57,11 @@ def test_bad_messages(try_assert, msg):
     with parsl.load(c):
-        # send a bad message into the interchange on the task_outgoing worker
+        # send a bad message into the interchange on the worker_sock worker
         # channel, and then check that the interchange is still alive enough
         # that we can scale out a block and run a task.
-        (task_port, result_port) = htex.command_client.run("WORKER_PORTS")
+        worker_port = htex.command_client.run("WORKER_BINDS")
         context = zmq.Context()
         channel_timeout = 10000  # in milliseconds
@@ -75,7 +71,7 @@ def test_bad_messages(try_assert, msg):
         task_channel.set_hwm(0)
         task_channel.setsockopt(zmq.SNDTIMEO, channel_timeout)
-        task_channel.connect(f"tcp://localhost:{task_port}")
+        task_channel.connect(f"tcp://localhost:{worker_port}")
         task_channel.send(msg)

parsl/version.py CHANGED Viewed

@@ -3,4 +3,4 @@
 Year.Month.Day[alpha/beta/..]
 Alphas will be numbered like this -> 2024.12.10a0
 """
-VERSION = '2025.06.23'
+VERSION = '2025.06.30'

parsl 2025.6.23__py3-none-any.whl → 2025.6.30__py3-none-any.whl

parsl 2025.6.23py3-none-any.whl → 2025.6.30py3-none-any.whl