parsl 2025.6.23__py3-none-any.whl → 2025.6.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsl/configs/osg.py +1 -1
- parsl/dataflow/dflow.py +14 -4
- parsl/executors/base.py +14 -6
- parsl/executors/high_throughput/executor.py +20 -15
- parsl/executors/high_throughput/interchange.py +173 -191
- parsl/executors/high_throughput/mpi_executor.py +7 -4
- parsl/executors/high_throughput/probe.py +4 -4
- parsl/executors/high_throughput/process_worker_pool.py +88 -94
- parsl/executors/taskvine/executor.py +9 -3
- parsl/executors/taskvine/manager.py +3 -1
- parsl/executors/threads.py +8 -1
- parsl/executors/workqueue/executor.py +9 -3
- parsl/monitoring/errors.py +5 -0
- parsl/monitoring/monitoring.py +25 -42
- parsl/monitoring/radios/base.py +63 -2
- parsl/monitoring/radios/filesystem.py +18 -3
- parsl/monitoring/radios/filesystem_router.py +13 -26
- parsl/monitoring/radios/htex.py +22 -13
- parsl/monitoring/radios/multiprocessing.py +22 -2
- parsl/monitoring/radios/udp.py +57 -19
- parsl/monitoring/radios/udp_router.py +49 -15
- parsl/monitoring/remote.py +19 -40
- parsl/providers/local/local.py +12 -13
- parsl/tests/configs/htex_local_alternate.py +0 -1
- parsl/tests/test_htex/test_interchange_exit_bad_registration.py +5 -7
- parsl/tests/test_htex/test_zmq_binding.py +5 -6
- parsl/tests/test_monitoring/test_basic.py +12 -10
- parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +0 -1
- parsl/tests/test_monitoring/test_radio_filesystem.py +7 -9
- parsl/tests/test_monitoring/test_radio_multiprocessing.py +44 -0
- parsl/tests/test_monitoring/test_radio_udp.py +163 -12
- parsl/tests/test_monitoring/test_stdouterr.py +1 -3
- parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +3 -7
- parsl/version.py +1 -1
- {parsl-2025.6.23.data → parsl-2025.6.30.data}/scripts/interchange.py +173 -191
- {parsl-2025.6.23.data → parsl-2025.6.30.data}/scripts/process_worker_pool.py +88 -94
- {parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/METADATA +2 -2
- {parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/RECORD +44 -43
- {parsl-2025.6.23.data → parsl-2025.6.30.data}/scripts/exec_parsl_function.py +0 -0
- {parsl-2025.6.23.data → parsl-2025.6.30.data}/scripts/parsl_coprocess.py +0 -0
- {parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/LICENSE +0 -0
- {parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/WHEEL +0 -0
- {parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/entry_points.txt +0 -0
- {parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/top_level.txt +0 -0
parsl/providers/local/local.py
CHANGED
@@ -114,17 +114,15 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
|
|
114
114
|
|
115
115
|
return [self.resources[jid]['status'] for jid in job_ids]
|
116
116
|
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
else:
|
127
|
-
return False
|
117
|
+
@staticmethod
|
118
|
+
def _is_alive(job_dict) -> bool:
|
119
|
+
try:
|
120
|
+
os.kill(job_dict['remote_pid'], 0)
|
121
|
+
except ProcessLookupError:
|
122
|
+
return False
|
123
|
+
except PermissionError:
|
124
|
+
pass # exists; just no permissions to send signal
|
125
|
+
return True
|
128
126
|
|
129
127
|
def _job_file_path(self, script_path: str, suffix: str) -> str:
|
130
128
|
path = '{0}{1}'.format(script_path, suffix)
|
@@ -230,8 +228,9 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
|
|
230
228
|
stdout, stderr)
|
231
229
|
for line in stdout.split('\n'):
|
232
230
|
if line.startswith("PID:"):
|
233
|
-
|
234
|
-
|
231
|
+
job_id = line.split("PID:")[1].strip()
|
232
|
+
remote_pid = int(job_id)
|
233
|
+
break
|
235
234
|
if job_id is None:
|
236
235
|
raise SubmitException(job_name, "Channel failed to start remote command/retrieve PID")
|
237
236
|
|
@@ -40,7 +40,7 @@ def test_exit_with_bad_registration(tmpd_cwd, try_assert):
|
|
40
40
|
incoming_q.port,
|
41
41
|
command_client.port),
|
42
42
|
"interchange_address": "127.0.0.1",
|
43
|
-
"
|
43
|
+
"worker_port": None,
|
44
44
|
"worker_port_range": (50000, 60000),
|
45
45
|
"hub_address": None,
|
46
46
|
"hub_zmq_port": None,
|
@@ -67,7 +67,7 @@ def test_exit_with_bad_registration(tmpd_cwd, try_assert):
|
|
67
67
|
# responsive. if the interchange process didn't start enough to get the command
|
68
68
|
# thread running, this will time out.
|
69
69
|
|
70
|
-
|
70
|
+
worker_port = command_client.run("WORKER_BINDS", timeout_s=120)
|
71
71
|
|
72
72
|
# now we'll assume that if the interchange command thread is responding,
|
73
73
|
# then the worker polling code is also running and that the interchange has
|
@@ -80,7 +80,7 @@ def test_exit_with_bad_registration(tmpd_cwd, try_assert):
|
|
80
80
|
|
81
81
|
msg = {'type': 'registration',
|
82
82
|
'parsl_v': PARSL_VERSION,
|
83
|
-
'python_v': "
|
83
|
+
'python_v': "1.1.1", # this is the bad bit
|
84
84
|
'worker_count': 1,
|
85
85
|
'uid': 'testuid',
|
86
86
|
'block_id': 0,
|
@@ -104,11 +104,9 @@ def test_exit_with_bad_registration(tmpd_cwd, try_assert):
|
|
104
104
|
|
105
105
|
task_channel.set_hwm(0)
|
106
106
|
task_channel.setsockopt(zmq.SNDTIMEO, channel_timeout)
|
107
|
-
task_channel.connect(f"tcp://127.0.0.1:{
|
107
|
+
task_channel.connect(f"tcp://127.0.0.1:{worker_port}")
|
108
108
|
|
109
|
-
|
110
|
-
|
111
|
-
task_channel.send(b_msg)
|
109
|
+
task_channel.send(pickle.dumps(msg))
|
112
110
|
|
113
111
|
# check that the interchange exits within some reasonable time
|
114
112
|
try_assert(lambda: interchange_proc.poll() is not None, "Interchange did not exit after killing watched client process", timeout_ms=5000)
|
@@ -15,12 +15,12 @@ from parsl.executors.high_throughput.manager_selector import RandomManagerSelect
|
|
15
15
|
def make_interchange(*,
|
16
16
|
interchange_address: Optional[str],
|
17
17
|
cert_dir: Optional[str],
|
18
|
-
|
18
|
+
worker_port: Optional[int] = None) -> Interchange:
|
19
19
|
return Interchange(interchange_address=interchange_address,
|
20
20
|
cert_dir=cert_dir,
|
21
21
|
client_address="127.0.0.1",
|
22
22
|
client_ports=(50055, 50056, 50057),
|
23
|
-
|
23
|
+
worker_port=worker_port,
|
24
24
|
worker_port_range=(54000, 55000),
|
25
25
|
hub_address=None,
|
26
26
|
hub_zmq_port=None,
|
@@ -56,7 +56,7 @@ def test_interchange_curvezmq_sockets(
|
|
56
56
|
ix = make_interchange(interchange_address=address, cert_dir=cert_dir)
|
57
57
|
assert isinstance(ix.zmq_context, curvezmq.ServerContext)
|
58
58
|
assert ix.zmq_context.encrypted is encrypted
|
59
|
-
assert mock_socket.call_count ==
|
59
|
+
assert mock_socket.call_count == 4
|
60
60
|
|
61
61
|
|
62
62
|
@pytest.mark.local
|
@@ -100,11 +100,10 @@ def test_limited_interface_binding(cert_dir: Optional[str]):
|
|
100
100
|
"""When address is specified the worker_port would be bound to it rather than to 0.0.0.0"""
|
101
101
|
address = "127.0.0.1"
|
102
102
|
ix = make_interchange(interchange_address=address, cert_dir=cert_dir)
|
103
|
-
ix.worker_result_port
|
104
103
|
proc = psutil.Process()
|
105
104
|
conns = proc.connections(kind="tcp")
|
106
105
|
|
107
|
-
matched_conns = [conn for conn in conns if conn.laddr.port == ix.
|
106
|
+
matched_conns = [conn for conn in conns if conn.laddr.port == ix.worker_port]
|
108
107
|
assert len(matched_conns) == 1
|
109
108
|
# laddr.ip can return ::ffff:127.0.0.1 when using IPv6
|
110
109
|
assert address in matched_conns[0].laddr.ip
|
@@ -113,5 +112,5 @@ def test_limited_interface_binding(cert_dir: Optional[str]):
|
|
113
112
|
@pytest.mark.local
|
114
113
|
@pytest.mark.parametrize("encrypted", (True, False), indirect=True)
|
115
114
|
def test_fixed_ports(cert_dir: Optional[str]):
|
116
|
-
ix = make_interchange(interchange_address=None, cert_dir=cert_dir,
|
115
|
+
ix = make_interchange(interchange_address=None, cert_dir=cert_dir, worker_port=51117)
|
117
116
|
assert ix.interchange_address == "*"
|
@@ -8,6 +8,9 @@ from parsl import HighThroughputExecutor, ThreadPoolExecutor
|
|
8
8
|
from parsl.config import Config
|
9
9
|
from parsl.executors.status_handling import BlockProviderExecutor
|
10
10
|
from parsl.monitoring import MonitoringHub
|
11
|
+
from parsl.monitoring.radios.filesystem import FilesystemRadio
|
12
|
+
from parsl.monitoring.radios.htex import HTEXRadio
|
13
|
+
from parsl.monitoring.radios.udp import UDPRadio
|
11
14
|
|
12
15
|
|
13
16
|
@parsl.python_app
|
@@ -25,9 +28,8 @@ def this_app():
|
|
25
28
|
# a configuration that is suitably configured for monitoring.
|
26
29
|
|
27
30
|
def thread_config():
|
28
|
-
c = Config(executors=[ThreadPoolExecutor()],
|
29
|
-
monitoring=MonitoringHub(
|
30
|
-
resource_monitoring_interval=0))
|
31
|
+
c = Config(executors=[ThreadPoolExecutor(remote_monitoring_radio=UDPRadio(address="localhost", atexit_timeout=0))],
|
32
|
+
monitoring=MonitoringHub(resource_monitoring_interval=0))
|
31
33
|
return c
|
32
34
|
|
33
35
|
|
@@ -42,9 +44,10 @@ def htex_udp_config():
|
|
42
44
|
from parsl.tests.configs.htex_local_alternate import fresh_config
|
43
45
|
c = fresh_config()
|
44
46
|
assert len(c.executors) == 1
|
47
|
+
ex = c.executors[0]
|
45
48
|
|
46
|
-
assert
|
47
|
-
|
49
|
+
assert isinstance(ex.remote_monitoring_radio, HTEXRadio), "precondition: htex is configured for the HTEXRadio"
|
50
|
+
ex.remote_monitoring_radio = UDPRadio(address="localhost", atexit_timeout=0)
|
48
51
|
|
49
52
|
return c
|
50
53
|
|
@@ -54,9 +57,10 @@ def htex_filesystem_config():
|
|
54
57
|
from parsl.tests.configs.htex_local_alternate import fresh_config
|
55
58
|
c = fresh_config()
|
56
59
|
assert len(c.executors) == 1
|
60
|
+
ex = c.executors[0]
|
57
61
|
|
58
|
-
assert
|
59
|
-
|
62
|
+
assert isinstance(ex.remote_monitoring_radio, HTEXRadio), "precondition: htex is configured for the HTEXRadio"
|
63
|
+
ex.remote_monitoring_radio = FilesystemRadio()
|
60
64
|
|
61
65
|
return c
|
62
66
|
|
@@ -65,7 +69,6 @@ def workqueue_config():
|
|
65
69
|
from parsl.tests.configs.workqueue_ex import fresh_config
|
66
70
|
c = fresh_config()
|
67
71
|
c.monitoring = MonitoringHub(
|
68
|
-
hub_address="localhost",
|
69
72
|
resource_monitoring_interval=1)
|
70
73
|
return c
|
71
74
|
|
@@ -76,8 +79,7 @@ def taskvine_config():
|
|
76
79
|
worker_launch_method='provider')],
|
77
80
|
strategy_period=0.5,
|
78
81
|
|
79
|
-
monitoring=MonitoringHub(
|
80
|
-
resource_monitoring_interval=1))
|
82
|
+
monitoring=MonitoringHub(resource_monitoring_interval=1))
|
81
83
|
return c
|
82
84
|
|
83
85
|
|
@@ -1,8 +1,7 @@
|
|
1
1
|
import pytest
|
2
2
|
|
3
3
|
from parsl.monitoring.message_type import MessageType
|
4
|
-
from parsl.monitoring.radios.filesystem import
|
5
|
-
from parsl.monitoring.radios.filesystem_router import start_filesystem_receiver
|
4
|
+
from parsl.monitoring.radios.filesystem import FilesystemRadio
|
6
5
|
from parsl.multiprocessing import SpawnQueue
|
7
6
|
|
8
7
|
|
@@ -16,16 +15,15 @@ def test_filesystem(tmpd_cwd):
|
|
16
15
|
|
17
16
|
resource_msgs = SpawnQueue()
|
18
17
|
|
18
|
+
radio_config = FilesystemRadio()
|
19
|
+
|
19
20
|
# start receiver
|
20
|
-
receiver =
|
21
|
-
|
22
|
-
monitoring_messages=resource_msgs,
|
23
|
-
)
|
21
|
+
receiver = radio_config.create_receiver(run_dir=str(tmpd_cwd),
|
22
|
+
resource_msgs=resource_msgs)
|
24
23
|
|
25
24
|
# make radio
|
26
25
|
|
27
|
-
radio_sender =
|
28
|
-
monitoring_url="irrelevant:")
|
26
|
+
radio_sender = radio_config.create_sender()
|
29
27
|
|
30
28
|
# send message into radio
|
31
29
|
|
@@ -41,7 +39,7 @@ def test_filesystem(tmpd_cwd):
|
|
41
39
|
|
42
40
|
# shut down router
|
43
41
|
|
44
|
-
receiver.
|
42
|
+
receiver.shutdown()
|
45
43
|
|
46
44
|
# we can't inspect the process if it has been closed properly, but
|
47
45
|
# we can verify that it raises the expected ValueError the closed
|
@@ -0,0 +1,44 @@
|
|
1
|
+
import pytest
|
2
|
+
|
3
|
+
from parsl.monitoring.message_type import MessageType
|
4
|
+
from parsl.monitoring.radios.multiprocessing import MultiprocessingQueueRadio
|
5
|
+
from parsl.multiprocessing import SpawnQueue
|
6
|
+
|
7
|
+
|
8
|
+
@pytest.mark.local
|
9
|
+
def test_radio(tmpd_cwd):
|
10
|
+
"""Test filesystem radio/receiver pair.
|
11
|
+
This test checks that the pair can be started up locally, that a message
|
12
|
+
is conveyed from radio to receiver, and that the receiver process goes
|
13
|
+
away at shutdown.
|
14
|
+
"""
|
15
|
+
|
16
|
+
resource_msgs = SpawnQueue()
|
17
|
+
|
18
|
+
radio_config = MultiprocessingQueueRadio()
|
19
|
+
|
20
|
+
# start receiver
|
21
|
+
receiver = radio_config.create_receiver(run_dir=str(tmpd_cwd),
|
22
|
+
resource_msgs=resource_msgs)
|
23
|
+
|
24
|
+
# make radio
|
25
|
+
|
26
|
+
radio_sender = radio_config.create_sender()
|
27
|
+
|
28
|
+
# send message into radio
|
29
|
+
|
30
|
+
message = (MessageType.RESOURCE_INFO, {})
|
31
|
+
|
32
|
+
radio_sender.send(message)
|
33
|
+
|
34
|
+
# verify it comes out of the receiver
|
35
|
+
|
36
|
+
m = resource_msgs.get()
|
37
|
+
|
38
|
+
assert m == message, "The sent message should appear in the queue"
|
39
|
+
|
40
|
+
# Shut down router.
|
41
|
+
# In the multiprocessing radio, nothing happens at shutdown, so this
|
42
|
+
# validates that the call executes without raising an exception, but
|
43
|
+
# not much else.
|
44
|
+
receiver.shutdown()
|
@@ -1,8 +1,10 @@
|
|
1
|
+
import socket
|
2
|
+
import time
|
3
|
+
|
1
4
|
import pytest
|
2
5
|
|
3
6
|
from parsl.monitoring.message_type import MessageType
|
4
|
-
from parsl.monitoring.radios.udp import
|
5
|
-
from parsl.monitoring.radios.udp_router import start_udp_receiver
|
7
|
+
from parsl.monitoring.radios.udp import UDPRadio
|
6
8
|
from parsl.multiprocessing import SpawnQueue
|
7
9
|
|
8
10
|
|
@@ -16,19 +18,19 @@ def test_udp(tmpd_cwd):
|
|
16
18
|
|
17
19
|
resource_msgs = SpawnQueue()
|
18
20
|
|
21
|
+
radio_config = UDPRadio(address="localhost", atexit_timeout=0)
|
22
|
+
|
19
23
|
# start receiver
|
20
|
-
udp_receiver =
|
21
|
-
|
22
|
-
monitoring_messages=resource_msgs,
|
23
|
-
port=None
|
24
|
-
)
|
24
|
+
udp_receiver = radio_config.create_receiver(run_dir=str(tmpd_cwd),
|
25
|
+
resource_msgs=resource_msgs)
|
25
26
|
|
26
|
-
#
|
27
|
+
# check hash properties
|
27
28
|
|
28
|
-
|
29
|
-
url = "udp://{}:{}".format("localhost", udp_receiver.port)
|
29
|
+
assert len(radio_config.hmac_key) == 64, "With default hash, should expect 64 byte key"
|
30
30
|
|
31
|
-
|
31
|
+
# make radio
|
32
|
+
|
33
|
+
radio_sender = radio_config.create_sender()
|
32
34
|
|
33
35
|
# send message into radio
|
34
36
|
|
@@ -44,7 +46,156 @@ def test_udp(tmpd_cwd):
|
|
44
46
|
|
45
47
|
# shut down router
|
46
48
|
|
47
|
-
udp_receiver.
|
49
|
+
udp_receiver.shutdown()
|
50
|
+
|
51
|
+
# we can't inspect the process if it has been closed properly, but
|
52
|
+
# we can verify that it raises the expected ValueError the closed
|
53
|
+
# processes raise on access.
|
54
|
+
with pytest.raises(ValueError):
|
55
|
+
udp_receiver.process.exitcode
|
56
|
+
|
57
|
+
|
58
|
+
@pytest.mark.local
|
59
|
+
def test_bad_hmac(tmpd_cwd, caplog):
|
60
|
+
"""Test when HMAC does not match.
|
61
|
+
"""
|
62
|
+
|
63
|
+
resource_msgs = SpawnQueue()
|
64
|
+
|
65
|
+
radio_config = UDPRadio(address="localhost", atexit_timeout=0)
|
66
|
+
|
67
|
+
# start receiver
|
68
|
+
udp_receiver = radio_config.create_receiver(run_dir=str(tmpd_cwd),
|
69
|
+
resource_msgs=resource_msgs)
|
70
|
+
|
71
|
+
# check the hmac is configured in the right place,
|
72
|
+
# then change it to something different (by prepending a new byte)
|
73
|
+
assert radio_config.hmac_key is not None
|
74
|
+
radio_config.hmac_key += b'x'
|
75
|
+
|
76
|
+
# make radio, after changing the HMAC key
|
77
|
+
|
78
|
+
radio_sender = radio_config.create_sender()
|
79
|
+
|
80
|
+
# send message into radio
|
81
|
+
|
82
|
+
message = (MessageType.RESOURCE_INFO, {})
|
83
|
+
|
84
|
+
radio_sender.send(message)
|
85
|
+
|
86
|
+
# We should expect no message from the UDP side. That's hard to
|
87
|
+
# state in this scenario because UDP doesn't have any delivery
|
88
|
+
# guarantees for the test-failing case.
|
89
|
+
# So sleep a while to allow that test to misdeliver and fail.
|
90
|
+
time.sleep(1)
|
91
|
+
|
92
|
+
assert resource_msgs.empty(), "receiving queue should be empty"
|
93
|
+
assert udp_receiver.process.is_alive(), "UDP router process should still be alive"
|
94
|
+
|
95
|
+
with open(f"{tmpd_cwd}/monitoring_udp_router.log", "r") as logfile:
|
96
|
+
assert "ERROR" in logfile.read(), "Router log file should contain an error"
|
97
|
+
|
98
|
+
# shut down router
|
99
|
+
|
100
|
+
udp_receiver.shutdown()
|
101
|
+
|
102
|
+
# we can't inspect the process if it has been closed properly, but
|
103
|
+
# we can verify that it raises the expected ValueError the closed
|
104
|
+
# processes raise on access.
|
105
|
+
with pytest.raises(ValueError):
|
106
|
+
udp_receiver.process.exitcode
|
107
|
+
|
108
|
+
|
109
|
+
@pytest.mark.local
|
110
|
+
def test_wrong_digest(tmpd_cwd, caplog):
|
111
|
+
"""Test when HMAC does not match.
|
112
|
+
"""
|
113
|
+
|
114
|
+
resource_msgs = SpawnQueue()
|
115
|
+
|
116
|
+
radio_config = UDPRadio(address="localhost", atexit_timeout=0)
|
117
|
+
|
118
|
+
# start receiver
|
119
|
+
udp_receiver = radio_config.create_receiver(run_dir=str(tmpd_cwd),
|
120
|
+
resource_msgs=resource_msgs)
|
121
|
+
|
122
|
+
# check the hmac is configured in the right place,
|
123
|
+
# then change it to a different digest. The choice of different
|
124
|
+
# digest is arbitrary.
|
125
|
+
assert radio_config.hmac_digest is not None
|
126
|
+
radio_config.hmac_digest = "sha3_224"
|
127
|
+
|
128
|
+
# make radio, after changing the HMAC digest
|
129
|
+
|
130
|
+
radio_sender = radio_config.create_sender()
|
131
|
+
|
132
|
+
# send message into radio
|
133
|
+
|
134
|
+
message = (MessageType.RESOURCE_INFO, {})
|
135
|
+
|
136
|
+
radio_sender.send(message)
|
137
|
+
|
138
|
+
# We should expect no message from the UDP side. That's hard to
|
139
|
+
# state in this scenario because UDP doesn't have any delivery
|
140
|
+
# guarantees for the test-failing case.
|
141
|
+
# So sleep a while to allow that test to misdeliver and fail.
|
142
|
+
time.sleep(1)
|
143
|
+
|
144
|
+
assert resource_msgs.empty(), "receiving queue should be empty"
|
145
|
+
assert udp_receiver.process.is_alive(), "UDP router process should still be alive"
|
146
|
+
|
147
|
+
with open(f"{tmpd_cwd}/monitoring_udp_router.log", "r") as logfile:
|
148
|
+
assert "ERROR" in logfile.read(), "Router log file should contain an error"
|
149
|
+
|
150
|
+
# shut down router
|
151
|
+
|
152
|
+
udp_receiver.shutdown()
|
153
|
+
|
154
|
+
# we can't inspect the process if it has been closed properly, but
|
155
|
+
# we can verify that it raises the expected ValueError the closed
|
156
|
+
# processes raise on access.
|
157
|
+
with pytest.raises(ValueError):
|
158
|
+
udp_receiver.process.exitcode
|
159
|
+
|
160
|
+
|
161
|
+
@pytest.mark.local
|
162
|
+
def test_short_message(tmpd_cwd, caplog):
|
163
|
+
"""Test when UDP message is so short it can't even be parsed into
|
164
|
+
HMAC + the rest.
|
165
|
+
"""
|
166
|
+
|
167
|
+
resource_msgs = SpawnQueue()
|
168
|
+
|
169
|
+
radio_config = UDPRadio(address="localhost", atexit_timeout=0)
|
170
|
+
|
171
|
+
# start receiver
|
172
|
+
udp_receiver = radio_config.create_receiver(run_dir=str(tmpd_cwd),
|
173
|
+
resource_msgs=resource_msgs)
|
174
|
+
|
175
|
+
# now send a bad UDP message, rather than using the sender mechanism.
|
176
|
+
|
177
|
+
sock = socket.socket(socket.AF_INET,
|
178
|
+
socket.SOCK_DGRAM,
|
179
|
+
socket.IPPROTO_UDP)
|
180
|
+
|
181
|
+
sock.sendto(b'', (radio_config.address, radio_config.port))
|
182
|
+
sock.close()
|
183
|
+
|
184
|
+
# We should expect no message from the UDP side. That's hard to
|
185
|
+
# state in this scenario because UDP doesn't have any delivery
|
186
|
+
# guarantees for the test-failing case.
|
187
|
+
# So sleep a while to allow that test to misdeliver and fail.
|
188
|
+
time.sleep(1)
|
189
|
+
|
190
|
+
assert resource_msgs.empty(), "receiving queue should be empty"
|
191
|
+
assert udp_receiver.process.is_alive(), "UDP router process should still be alive"
|
192
|
+
|
193
|
+
with open(f"{tmpd_cwd}/monitoring_udp_router.log", "r") as logfile:
|
194
|
+
assert "ERROR" in logfile.read(), "Router log file should contain an error"
|
195
|
+
|
196
|
+
# shut down router
|
197
|
+
|
198
|
+
udp_receiver.shutdown()
|
48
199
|
|
49
200
|
# we can't inspect the process if it has been closed properly, but
|
50
201
|
# we can verify that it raises the expected ValueError the closed
|
@@ -1,7 +1,3 @@
|
|
1
|
-
import os
|
2
|
-
import signal
|
3
|
-
import time
|
4
|
-
|
5
1
|
import pytest
|
6
2
|
import zmq
|
7
3
|
|
@@ -61,11 +57,11 @@ def test_bad_messages(try_assert, msg):
|
|
61
57
|
|
62
58
|
with parsl.load(c):
|
63
59
|
|
64
|
-
# send a bad message into the interchange on the
|
60
|
+
# send a bad message into the interchange on the worker_sock worker
|
65
61
|
# channel, and then check that the interchange is still alive enough
|
66
62
|
# that we can scale out a block and run a task.
|
67
63
|
|
68
|
-
|
64
|
+
worker_port = htex.command_client.run("WORKER_BINDS")
|
69
65
|
|
70
66
|
context = zmq.Context()
|
71
67
|
channel_timeout = 10000 # in milliseconds
|
@@ -75,7 +71,7 @@ def test_bad_messages(try_assert, msg):
|
|
75
71
|
|
76
72
|
task_channel.set_hwm(0)
|
77
73
|
task_channel.setsockopt(zmq.SNDTIMEO, channel_timeout)
|
78
|
-
task_channel.connect(f"tcp://localhost:{
|
74
|
+
task_channel.connect(f"tcp://localhost:{worker_port}")
|
79
75
|
|
80
76
|
task_channel.send(msg)
|
81
77
|
|
parsl/version.py
CHANGED