parsl 2025.6.16__py3-none-any.whl → 2025.6.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsl/configs/osg.py +1 -1
- parsl/dataflow/dflow.py +14 -4
- parsl/executors/base.py +19 -9
- parsl/executors/flux/executor.py +2 -0
- parsl/executors/globus_compute.py +2 -0
- parsl/executors/high_throughput/executor.py +22 -15
- parsl/executors/high_throughput/interchange.py +173 -191
- parsl/executors/high_throughput/mpi_executor.py +14 -4
- parsl/executors/high_throughput/probe.py +4 -4
- parsl/executors/high_throughput/process_worker_pool.py +88 -94
- parsl/executors/radical/executor.py +3 -0
- parsl/executors/taskvine/executor.py +11 -3
- parsl/executors/taskvine/manager.py +3 -1
- parsl/executors/threads.py +19 -3
- parsl/executors/workqueue/executor.py +11 -3
- parsl/monitoring/errors.py +4 -4
- parsl/monitoring/monitoring.py +26 -88
- parsl/monitoring/radios/base.py +63 -2
- parsl/monitoring/radios/filesystem.py +19 -4
- parsl/monitoring/radios/filesystem_router.py +22 -3
- parsl/monitoring/radios/htex.py +22 -13
- parsl/monitoring/radios/multiprocessing.py +22 -2
- parsl/monitoring/radios/udp.py +57 -19
- parsl/monitoring/radios/udp_router.py +119 -25
- parsl/monitoring/radios/zmq_router.py +9 -10
- parsl/monitoring/remote.py +19 -40
- parsl/providers/local/local.py +12 -13
- parsl/tests/configs/htex_local_alternate.py +0 -1
- parsl/tests/conftest.py +7 -4
- parsl/tests/test_htex/test_interchange_exit_bad_registration.py +5 -7
- parsl/tests/test_htex/test_zmq_binding.py +5 -6
- parsl/tests/test_monitoring/test_basic.py +12 -10
- parsl/tests/test_monitoring/{test_fuzz_zmq.py → test_htex_fuzz_zmq.py} +7 -2
- parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +0 -1
- parsl/tests/test_monitoring/test_radio_filesystem.py +48 -0
- parsl/tests/test_monitoring/test_radio_multiprocessing.py +44 -0
- parsl/tests/test_monitoring/test_radio_udp.py +204 -0
- parsl/tests/test_monitoring/test_stdouterr.py +1 -3
- parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +3 -7
- parsl/tests/test_shutdown/test_kill_monitoring.py +1 -1
- parsl/version.py +1 -1
- {parsl-2025.6.16.data → parsl-2025.6.30.data}/scripts/interchange.py +173 -191
- {parsl-2025.6.16.data → parsl-2025.6.30.data}/scripts/process_worker_pool.py +88 -94
- {parsl-2025.6.16.dist-info → parsl-2025.6.30.dist-info}/METADATA +2 -2
- {parsl-2025.6.16.dist-info → parsl-2025.6.30.dist-info}/RECORD +51 -50
- parsl/tests/configs/local_threads_monitoring.py +0 -10
- parsl/tests/manual_tests/test_udp_simple.py +0 -51
- {parsl-2025.6.16.data → parsl-2025.6.30.data}/scripts/exec_parsl_function.py +0 -0
- {parsl-2025.6.16.data → parsl-2025.6.30.data}/scripts/parsl_coprocess.py +0 -0
- {parsl-2025.6.16.dist-info → parsl-2025.6.30.dist-info}/LICENSE +0 -0
- {parsl-2025.6.16.dist-info → parsl-2025.6.30.dist-info}/WHEEL +0 -0
- {parsl-2025.6.16.dist-info → parsl-2025.6.30.dist-info}/entry_points.txt +0 -0
- {parsl-2025.6.16.dist-info → parsl-2025.6.30.dist-info}/top_level.txt +0 -0
parsl/monitoring/monitoring.py
CHANGED
@@ -3,16 +3,12 @@ from __future__ import annotations
|
|
3
3
|
import logging
|
4
4
|
import multiprocessing.synchronize as ms
|
5
5
|
import os
|
6
|
-
import queue
|
7
6
|
import warnings
|
8
7
|
from multiprocessing.queues import Queue
|
9
|
-
from typing import
|
8
|
+
from typing import Any, Optional, Union
|
10
9
|
|
11
10
|
import typeguard
|
12
11
|
|
13
|
-
from parsl.monitoring.errors import MonitoringHubStartError
|
14
|
-
from parsl.monitoring.radios.filesystem_router import filesystem_router_starter
|
15
|
-
from parsl.monitoring.radios.udp_router import udp_router_starter
|
16
12
|
from parsl.monitoring.types import TaggedMonitoringMessage
|
17
13
|
from parsl.multiprocessing import (
|
18
14
|
SizedQueue,
|
@@ -38,9 +34,9 @@ logger = logging.getLogger(__name__)
|
|
38
34
|
@typeguard.typechecked
|
39
35
|
class MonitoringHub(RepresentationMixin):
|
40
36
|
def __init__(self,
|
41
|
-
hub_address:
|
42
|
-
|
43
|
-
|
37
|
+
hub_address: Any = None, # unused, so no type enforcement
|
38
|
+
hub_port_range: Any = None, # unused, so no type enforcement
|
39
|
+
hub_port: Any = None, # unused, so no type enforcement
|
44
40
|
|
45
41
|
workflow_name: Optional[str] = None,
|
46
42
|
workflow_version: Optional[str] = None,
|
@@ -51,16 +47,14 @@ class MonitoringHub(RepresentationMixin):
|
|
51
47
|
"""
|
52
48
|
Parameters
|
53
49
|
----------
|
54
|
-
hub_address :
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
hub_port_range.
|
61
|
-
Default: None
|
50
|
+
hub_address : unused
|
51
|
+
hub_port : unused
|
52
|
+
Unused, but probably retained until 2026-06-01 to give deprecation warning.
|
53
|
+
These two values previously configured UDP parameters when UDP was used
|
54
|
+
for monitoring messages from workers. These are now configured on the
|
55
|
+
relevant UDPRadio.
|
62
56
|
hub_port_range : unused
|
63
|
-
Unused, but retained until
|
57
|
+
Unused, but probably retained until 2026-06-01 to give deprecation warning.
|
64
58
|
This value previously configured one ZMQ channel inside the
|
65
59
|
HighThroughputExecutor. That ZMQ channel is now configured by the
|
66
60
|
interchange_port_range parameter of HighThroughputExecutor.
|
@@ -88,15 +82,27 @@ class MonitoringHub(RepresentationMixin):
|
|
88
82
|
if _db_manager_excepts:
|
89
83
|
raise _db_manager_excepts
|
90
84
|
|
85
|
+
# The following three parameters need to exist as attributes to be
|
86
|
+
# output by RepresentationMixin.
|
87
|
+
if hub_address is not None:
|
88
|
+
message = "Instead of MonitoringHub.hub_address, specify UDPRadio(address=...)"
|
89
|
+
warnings.warn(message, DeprecationWarning)
|
90
|
+
logger.warning(message)
|
91
|
+
|
91
92
|
self.hub_address = hub_address
|
93
|
+
|
94
|
+
if hub_port is not None:
|
95
|
+
message = "Instead of MonitoringHub.hub_port, specify UDPRadio(port=...)"
|
96
|
+
warnings.warn(message, DeprecationWarning)
|
97
|
+
logger.warning(message)
|
98
|
+
|
92
99
|
self.hub_port = hub_port
|
93
100
|
|
94
101
|
if hub_port_range is not None:
|
95
102
|
message = "Instead of MonitoringHub.hub_port_range, Use HighThroughputExecutor.interchange_port_range"
|
96
103
|
warnings.warn(message, DeprecationWarning)
|
97
104
|
logger.warning(message)
|
98
|
-
|
99
|
-
# even though now it is otherwise unused.
|
105
|
+
|
100
106
|
self.hub_port_range = hub_port_range
|
101
107
|
|
102
108
|
self.logging_endpoint = logging_endpoint
|
@@ -119,40 +125,9 @@ class MonitoringHub(RepresentationMixin):
|
|
119
125
|
|
120
126
|
self.monitoring_hub_active = True
|
121
127
|
|
122
|
-
# This annotation is incompatible with typeguard 4.x instrumentation
|
123
|
-
# of local variables: Queue is not subscriptable at runtime, as far
|
124
|
-
# as typeguard is concerned. The more general Queue annotation works,
|
125
|
-
# but does not restrict the contents of the Queue. Using TYPE_CHECKING
|
126
|
-
# here allows the stricter definition to be seen by mypy, and the
|
127
|
-
# simpler definition to be seen by typeguard. Hopefully at some point
|
128
|
-
# in the future, Queue will allow runtime subscripts.
|
129
|
-
|
130
|
-
if TYPE_CHECKING:
|
131
|
-
udp_comm_q: Queue[Union[int, str]]
|
132
|
-
else:
|
133
|
-
udp_comm_q: Queue
|
134
|
-
|
135
|
-
udp_comm_q = SizedQueue(maxsize=10)
|
136
|
-
|
137
128
|
self.resource_msgs: Queue[TaggedMonitoringMessage]
|
138
129
|
self.resource_msgs = SizedQueue()
|
139
130
|
|
140
|
-
self.router_exit_event: ms.Event
|
141
|
-
self.router_exit_event = SpawnEvent()
|
142
|
-
|
143
|
-
self.udp_router_proc = SpawnProcess(target=udp_router_starter,
|
144
|
-
kwargs={"comm_q": udp_comm_q,
|
145
|
-
"resource_msgs": self.resource_msgs,
|
146
|
-
"exit_event": self.router_exit_event,
|
147
|
-
"udp_port": self.hub_port,
|
148
|
-
"run_dir": dfk_run_dir,
|
149
|
-
"logging_level": logging.DEBUG if self.monitoring_debug else logging.INFO,
|
150
|
-
},
|
151
|
-
name="Monitoring-UDP-Router-Process",
|
152
|
-
daemon=True,
|
153
|
-
)
|
154
|
-
self.udp_router_proc.start()
|
155
|
-
|
156
131
|
self.dbm_exit_event: ms.Event
|
157
132
|
self.dbm_exit_event = SpawnEvent()
|
158
133
|
|
@@ -167,55 +142,18 @@ class MonitoringHub(RepresentationMixin):
|
|
167
142
|
daemon=True,
|
168
143
|
)
|
169
144
|
self.dbm_proc.start()
|
170
|
-
logger.info("Started
|
171
|
-
self.udp_router_proc.pid, self.dbm_proc.pid)
|
172
|
-
|
173
|
-
self.filesystem_proc = SpawnProcess(target=filesystem_router_starter,
|
174
|
-
kwargs={"q": self.resource_msgs,
|
175
|
-
"run_dir": dfk_run_dir,
|
176
|
-
"exit_event": self.router_exit_event},
|
177
|
-
name="Monitoring-Filesystem-Process",
|
178
|
-
daemon=True
|
179
|
-
)
|
180
|
-
self.filesystem_proc.start()
|
181
|
-
logger.info("Started filesystem radio receiver process %s", self.filesystem_proc.pid)
|
182
|
-
|
183
|
-
try:
|
184
|
-
udp_comm_q_result = udp_comm_q.get(block=True, timeout=120)
|
185
|
-
udp_comm_q.close()
|
186
|
-
udp_comm_q.join_thread()
|
187
|
-
except queue.Empty:
|
188
|
-
logger.error("Monitoring UDP router has not reported port in 120s. Aborting")
|
189
|
-
raise MonitoringHubStartError()
|
190
|
-
|
191
|
-
if isinstance(udp_comm_q_result, str):
|
192
|
-
logger.error("MonitoringRouter sent an error message: %s", udp_comm_q_result)
|
193
|
-
raise RuntimeError(f"MonitoringRouter failed to start: {udp_comm_q_result}")
|
194
|
-
|
195
|
-
udp_port = udp_comm_q_result
|
196
|
-
self.monitoring_hub_url = "udp://{}:{}".format(self.hub_address, udp_port)
|
197
|
-
|
145
|
+
logger.info("Started DBM process %s", self.dbm_proc.pid)
|
198
146
|
logger.info("Monitoring Hub initialized")
|
199
147
|
|
200
148
|
def close(self) -> None:
|
201
149
|
logger.info("Terminating Monitoring Hub")
|
202
150
|
if self.monitoring_hub_active:
|
203
151
|
self.monitoring_hub_active = False
|
204
|
-
logger.info("Setting router termination event")
|
205
|
-
self.router_exit_event.set()
|
206
|
-
|
207
|
-
logger.info("Waiting for UDP router to terminate")
|
208
|
-
join_terminate_close_proc(self.udp_router_proc)
|
209
|
-
|
210
|
-
logger.debug("Finished waiting for router termination")
|
211
152
|
logger.debug("Waiting for DB termination")
|
212
153
|
self.dbm_exit_event.set()
|
213
154
|
join_terminate_close_proc(self.dbm_proc)
|
214
155
|
logger.debug("Finished waiting for DBM termination")
|
215
156
|
|
216
|
-
logger.info("Terminating filesystem radio receiver process")
|
217
|
-
join_terminate_close_proc(self.filesystem_proc)
|
218
|
-
|
219
157
|
logger.info("Closing monitoring multiprocessing queues")
|
220
158
|
self.resource_msgs.close()
|
221
159
|
self.resource_msgs.join_thread()
|
parsl/monitoring/radios/base.py
CHANGED
@@ -1,10 +1,71 @@
|
|
1
|
-
import logging
|
2
1
|
from abc import ABCMeta, abstractmethod
|
2
|
+
from multiprocessing.queues import Queue
|
3
3
|
|
4
|
-
|
4
|
+
|
5
|
+
class MonitoringRadioReceiver(metaclass=ABCMeta):
|
6
|
+
@abstractmethod
|
7
|
+
def shutdown(self) -> None:
|
8
|
+
pass
|
5
9
|
|
6
10
|
|
7
11
|
class MonitoringRadioSender(metaclass=ABCMeta):
|
8
12
|
@abstractmethod
|
9
13
|
def send(self, message: object) -> None:
|
10
14
|
pass
|
15
|
+
|
16
|
+
|
17
|
+
class RadioConfig(metaclass=ABCMeta):
|
18
|
+
"""Base class for radio plugin configuration.
|
19
|
+
|
20
|
+
This provides the configuration for a particular way of sending monitoring
|
21
|
+
messages from a source of monitoring messages into the submit side
|
22
|
+
monitoring database.
|
23
|
+
|
24
|
+
This uses staged initialization like lots of Parsl configuration, but in
|
25
|
+
a slightly different form.
|
26
|
+
|
27
|
+
A RadioConfig object must be pickleable, because it will be sent to remote
|
28
|
+
workers to configure senders. The MonitoringRadioSender and
|
29
|
+
MonitoringRadioReceiver objects do not need to be pickleable (and often
|
30
|
+
will not be - for example, when they hold references to other processes).
|
31
|
+
|
32
|
+
The RadioConfig object will be used by Parsl in this sequence:
|
33
|
+
|
34
|
+
* A user creates a RadioConfig object from the appropriate subclass for
|
35
|
+
radio mechanism they want to use, and specifies it as part of their
|
36
|
+
executor configuration.
|
37
|
+
|
38
|
+
Methods on the RadioConfig will then be invoked by Parsl like this:
|
39
|
+
|
40
|
+
* one create_receiver call, on the submit side
|
41
|
+
- this call can modify the state of radioconfig to contain information
|
42
|
+
about how a sender can connect back to the receiver. for example,
|
43
|
+
after binding to a particular port, can store that port so that the
|
44
|
+
sender knows which port to connect to.
|
45
|
+
|
46
|
+
* Possibly many serializations to get the RadioConfig to remote workers
|
47
|
+
|
48
|
+
* Many (0 or more) create_sender calls, possibly on remote workers, to
|
49
|
+
create the sending side of the radio (MonitoringRadioSender instances)
|
50
|
+
|
51
|
+
* Those senders are used to send messages
|
52
|
+
|
53
|
+
* At executor shutdown, the receiver is shut down.
|
54
|
+
|
55
|
+
This object cannot be re-used across parsl configurations - like many other
|
56
|
+
pieces of parsl config it is single use in that respect.
|
57
|
+
"""
|
58
|
+
|
59
|
+
@abstractmethod
|
60
|
+
def create_receiver(self, *, run_dir: str, resource_msgs: Queue) -> MonitoringRadioReceiver:
|
61
|
+
"""Create a receiver for this RadioConfig, and update this RadioConfig
|
62
|
+
with enough context to create senders.
|
63
|
+
"""
|
64
|
+
pass
|
65
|
+
|
66
|
+
@abstractmethod
|
67
|
+
def create_sender(self) -> MonitoringRadioSender:
|
68
|
+
"""Create a sender to connect to the receiver created by an
|
69
|
+
earlier call to create_receiver.
|
70
|
+
"""
|
71
|
+
pass
|
@@ -2,13 +2,19 @@ import logging
|
|
2
2
|
import os
|
3
3
|
import pickle
|
4
4
|
import uuid
|
5
|
+
from multiprocessing.queues import Queue
|
5
6
|
|
6
|
-
from parsl.monitoring.radios.base import
|
7
|
+
from parsl.monitoring.radios.base import (
|
8
|
+
MonitoringRadioReceiver,
|
9
|
+
MonitoringRadioSender,
|
10
|
+
RadioConfig,
|
11
|
+
)
|
12
|
+
from parsl.monitoring.radios.filesystem_router import FilesystemRadioReceiver
|
7
13
|
|
8
14
|
logger = logging.getLogger(__name__)
|
9
15
|
|
10
16
|
|
11
|
-
class
|
17
|
+
class FilesystemRadio(RadioConfig):
|
12
18
|
"""A MonitoringRadioSender that sends messages over a shared filesystem.
|
13
19
|
|
14
20
|
The messsage directory structure is based on maildir,
|
@@ -26,8 +32,17 @@ class FilesystemRadioSender(MonitoringRadioSender):
|
|
26
32
|
the UDP radio, but should be much more reliable.
|
27
33
|
"""
|
28
34
|
|
29
|
-
def
|
30
|
-
|
35
|
+
def create_sender(self) -> MonitoringRadioSender:
|
36
|
+
return FilesystemRadioSender(run_dir=self.run_dir)
|
37
|
+
|
38
|
+
def create_receiver(self, *, run_dir: str, resource_msgs: Queue) -> MonitoringRadioReceiver:
|
39
|
+
self.run_dir = run_dir
|
40
|
+
return FilesystemRadioReceiver(resource_msgs, run_dir)
|
41
|
+
|
42
|
+
|
43
|
+
class FilesystemRadioSender(MonitoringRadioSender):
|
44
|
+
def __init__(self, *, run_dir: str):
|
45
|
+
logger.info("filesystem based monitoring radio initializing")
|
31
46
|
self.base_path = f"{run_dir}/monitor-fs-radio/"
|
32
47
|
self.tmp_path = f"{self.base_path}/tmp"
|
33
48
|
self.new_path = f"{self.base_path}/new"
|
@@ -9,17 +9,20 @@ from multiprocessing.synchronize import Event
|
|
9
9
|
from typing import cast
|
10
10
|
|
11
11
|
from parsl.log_utils import set_file_logger
|
12
|
+
from parsl.monitoring.radios.base import MonitoringRadioReceiver
|
12
13
|
from parsl.monitoring.radios.multiprocessing import MultiprocessingQueueRadioSender
|
13
14
|
from parsl.monitoring.types import TaggedMonitoringMessage
|
15
|
+
from parsl.multiprocessing import SpawnEvent, SpawnProcess, join_terminate_close_proc
|
14
16
|
from parsl.process_loggers import wrap_with_logs
|
15
17
|
from parsl.utils import setproctitle
|
16
18
|
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
17
21
|
|
18
22
|
@wrap_with_logs
|
19
23
|
def filesystem_router_starter(*, q: Queue[TaggedMonitoringMessage], run_dir: str, exit_event: Event) -> None:
|
20
|
-
|
21
|
-
|
22
|
-
level=logging.INFO)
|
24
|
+
set_file_logger(f"{run_dir}/monitoring_filesystem_radio.log",
|
25
|
+
level=logging.INFO)
|
23
26
|
|
24
27
|
logger.info("Starting filesystem radio receiver")
|
25
28
|
setproctitle("parsl: monitoring filesystem receiver")
|
@@ -52,3 +55,19 @@ def filesystem_router_starter(*, q: Queue[TaggedMonitoringMessage], run_dir: str
|
|
52
55
|
|
53
56
|
time.sleep(1) # whats a good time for this poll?
|
54
57
|
logger.info("Ending filesystem radio receiver")
|
58
|
+
|
59
|
+
|
60
|
+
class FilesystemRadioReceiver(MonitoringRadioReceiver):
|
61
|
+
def __init__(self, resource_msgs: Queue, run_dir: str) -> None:
|
62
|
+
self.exit_event = SpawnEvent()
|
63
|
+
self.process = SpawnProcess(target=filesystem_router_starter,
|
64
|
+
kwargs={"q": resource_msgs, "run_dir": run_dir, "exit_event": self.exit_event},
|
65
|
+
name="Monitoring-Filesystem-Process",
|
66
|
+
daemon=True
|
67
|
+
)
|
68
|
+
self.process.start()
|
69
|
+
logger.info("Started filesystem radio receiver process %s", self.process.pid)
|
70
|
+
|
71
|
+
def shutdown(self) -> None:
|
72
|
+
self.exit_event.set()
|
73
|
+
join_terminate_close_proc(self.process)
|
parsl/monitoring/radios/htex.py
CHANGED
@@ -1,24 +1,29 @@
|
|
1
1
|
import logging
|
2
2
|
import pickle
|
3
|
+
from multiprocessing.queues import Queue
|
3
4
|
|
4
|
-
from parsl.monitoring.radios.base import
|
5
|
+
from parsl.monitoring.radios.base import (
|
6
|
+
MonitoringRadioReceiver,
|
7
|
+
MonitoringRadioSender,
|
8
|
+
RadioConfig,
|
9
|
+
)
|
5
10
|
|
6
11
|
logger = logging.getLogger(__name__)
|
7
12
|
|
8
13
|
|
9
|
-
class
|
14
|
+
class HTEXRadio(RadioConfig):
|
15
|
+
def create_sender(self) -> MonitoringRadioSender:
|
16
|
+
return HTEXRadioSender()
|
10
17
|
|
11
|
-
def
|
12
|
-
|
13
|
-
Parameters
|
14
|
-
----------
|
18
|
+
def create_receiver(self, *, run_dir: str, resource_msgs: Queue) -> MonitoringRadioReceiver:
|
19
|
+
return HTEXRadioReceiver()
|
15
20
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
21
|
+
|
22
|
+
class HTEXRadioSender(MonitoringRadioSender):
|
23
|
+
|
24
|
+
def __init__(self) -> None:
|
25
|
+
# there is nothing to initialize
|
26
|
+
pass
|
22
27
|
|
23
28
|
def send(self, message: object) -> None:
|
24
29
|
""" Sends a message to the UDP receiver
|
@@ -54,4 +59,8 @@ class HTEXRadioSender(MonitoringRadioSender):
|
|
54
59
|
else:
|
55
60
|
logger.error("result_queue is uninitialized - cannot put monitoring message")
|
56
61
|
|
57
|
-
|
62
|
+
|
63
|
+
class HTEXRadioReceiver(MonitoringRadioReceiver):
|
64
|
+
def shutdown(self) -> None:
|
65
|
+
# there is nothing to shut down
|
66
|
+
pass
|
@@ -1,6 +1,10 @@
|
|
1
|
-
from multiprocessing
|
1
|
+
from multiprocessing import Queue
|
2
2
|
|
3
|
-
from parsl.monitoring.radios.base import
|
3
|
+
from parsl.monitoring.radios.base import (
|
4
|
+
MonitoringRadioReceiver,
|
5
|
+
MonitoringRadioSender,
|
6
|
+
RadioConfig,
|
7
|
+
)
|
4
8
|
|
5
9
|
|
6
10
|
class MultiprocessingQueueRadioSender(MonitoringRadioSender):
|
@@ -15,3 +19,19 @@ class MultiprocessingQueueRadioSender(MonitoringRadioSender):
|
|
15
19
|
|
16
20
|
def send(self, message: object) -> None:
|
17
21
|
self.queue.put(message)
|
22
|
+
|
23
|
+
|
24
|
+
class MultiprocessingQueueRadio(RadioConfig):
|
25
|
+
def create_sender(self) -> MonitoringRadioSender:
|
26
|
+
return MultiprocessingQueueRadioSender(self._queue)
|
27
|
+
|
28
|
+
def create_receiver(self, *, run_dir: str, resource_msgs: Queue) -> MonitoringRadioReceiver:
|
29
|
+
# This object is only for use with an in-process thread-pool so it
|
30
|
+
# is fine to store a reference to the message queue directly.
|
31
|
+
self._queue = resource_msgs
|
32
|
+
return MultiprocessingQueueRadioReceiver()
|
33
|
+
|
34
|
+
|
35
|
+
class MultiprocessingQueueRadioReceiver(MonitoringRadioReceiver):
|
36
|
+
def shutdown(self) -> None:
|
37
|
+
pass
|
parsl/monitoring/radios/udp.py
CHANGED
@@ -1,29 +1,63 @@
|
|
1
|
+
import hashlib
|
2
|
+
import hmac
|
1
3
|
import logging
|
2
4
|
import pickle
|
5
|
+
import secrets
|
3
6
|
import socket
|
7
|
+
from multiprocessing.queues import Queue
|
8
|
+
from typing import Optional
|
4
9
|
|
5
|
-
from parsl.monitoring.radios.base import
|
10
|
+
from parsl.monitoring.radios.base import (
|
11
|
+
MonitoringRadioReceiver,
|
12
|
+
MonitoringRadioSender,
|
13
|
+
RadioConfig,
|
14
|
+
)
|
15
|
+
from parsl.monitoring.radios.udp_router import start_udp_receiver
|
6
16
|
|
17
|
+
logger = logging.getLogger(__name__)
|
7
18
|
|
8
|
-
class UDPRadioSender(MonitoringRadioSender):
|
9
19
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
20
|
+
class UDPRadio(RadioConfig):
|
21
|
+
def __init__(self, *, port: Optional[int] = None, atexit_timeout: int = 3, address: str, debug: bool = False, hmac_digest: str = 'sha512'):
|
22
|
+
self.port = port
|
23
|
+
self.atexit_timeout = atexit_timeout
|
24
|
+
self.address = address
|
25
|
+
self.debug = debug
|
26
|
+
self.hmac_digest = hmac_digest
|
14
27
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
28
|
+
def create_sender(self) -> MonitoringRadioSender:
|
29
|
+
assert self.port is not None, "self.port should have been initialized by create_receiver"
|
30
|
+
return UDPRadioSender(self.address, self.port, self.hmac_key, self.hmac_digest)
|
31
|
+
|
32
|
+
def create_receiver(self, run_dir: str, resource_msgs: Queue) -> MonitoringRadioReceiver:
|
33
|
+
# RFC 2104 section 2 recommends that the key length be at
|
34
|
+
# least as long as the hash output (64 bytes in the case of SHA512).
|
35
|
+
# RFC 2014 section 3 talks about periodic key refreshing. This key is
|
36
|
+
# not refreshed inside a workflow run, but each separate workflow run
|
37
|
+
# uses a new key.
|
38
|
+
keysize = hashlib.new(self.hmac_digest).digest_size
|
39
|
+
self.hmac_key = secrets.token_bytes(keysize)
|
40
|
+
|
41
|
+
udp_receiver = start_udp_receiver(logdir=run_dir,
|
42
|
+
monitoring_messages=resource_msgs,
|
43
|
+
port=self.port,
|
44
|
+
debug=self.debug,
|
45
|
+
atexit_timeout=self.atexit_timeout,
|
46
|
+
hmac_key=self.hmac_key,
|
47
|
+
hmac_digest=self.hmac_digest
|
48
|
+
)
|
49
|
+
self.port = udp_receiver.port
|
50
|
+
return udp_receiver
|
51
|
+
|
52
|
+
|
53
|
+
class UDPRadioSender(MonitoringRadioSender):
|
54
|
+
|
55
|
+
def __init__(self, address: str, port: int, hmac_key: bytes, hmac_digest: str, *, timeout: int = 10) -> None:
|
21
56
|
self.sock_timeout = timeout
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
raise Exception("Failed to parse monitoring url: {}".format(monitoring_url))
|
57
|
+
self.address = address
|
58
|
+
self.port = port
|
59
|
+
self.hmac_key = hmac_key
|
60
|
+
self.hmac_digest = hmac_digest
|
27
61
|
|
28
62
|
self.sock = socket.socket(socket.AF_INET,
|
29
63
|
socket.SOCK_DGRAM,
|
@@ -42,15 +76,19 @@ class UDPRadioSender(MonitoringRadioSender):
|
|
42
76
|
Returns:
|
43
77
|
None
|
44
78
|
"""
|
79
|
+
logger.info("Starting UDP radio message send")
|
45
80
|
try:
|
46
|
-
|
81
|
+
data = pickle.dumps(message)
|
82
|
+
origin_hmac = hmac.digest(self.hmac_key, data, self.hmac_digest)
|
83
|
+
buffer = origin_hmac + data
|
47
84
|
except Exception:
|
48
85
|
logging.exception("Exception during pickling", exc_info=True)
|
49
86
|
return
|
50
87
|
|
51
88
|
try:
|
52
|
-
self.sock.sendto(buffer, (self.
|
89
|
+
self.sock.sendto(buffer, (self.address, self.port))
|
53
90
|
except socket.timeout:
|
54
91
|
logging.error("Could not send message within timeout limit")
|
55
92
|
return
|
93
|
+
logger.info("Normal ending for UDP radio message send")
|
56
94
|
return
|