parsl 2025.6.23__py3-none-any.whl → 2025.6.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. parsl/configs/osg.py +1 -1
  2. parsl/dataflow/dflow.py +14 -4
  3. parsl/executors/base.py +14 -6
  4. parsl/executors/high_throughput/executor.py +20 -15
  5. parsl/executors/high_throughput/interchange.py +173 -191
  6. parsl/executors/high_throughput/mpi_executor.py +7 -4
  7. parsl/executors/high_throughput/probe.py +4 -4
  8. parsl/executors/high_throughput/process_worker_pool.py +88 -94
  9. parsl/executors/taskvine/executor.py +9 -3
  10. parsl/executors/taskvine/manager.py +3 -1
  11. parsl/executors/threads.py +8 -1
  12. parsl/executors/workqueue/executor.py +9 -3
  13. parsl/monitoring/errors.py +5 -0
  14. parsl/monitoring/monitoring.py +25 -42
  15. parsl/monitoring/radios/base.py +63 -2
  16. parsl/monitoring/radios/filesystem.py +18 -3
  17. parsl/monitoring/radios/filesystem_router.py +13 -26
  18. parsl/monitoring/radios/htex.py +22 -13
  19. parsl/monitoring/radios/multiprocessing.py +22 -2
  20. parsl/monitoring/radios/udp.py +57 -19
  21. parsl/monitoring/radios/udp_router.py +49 -15
  22. parsl/monitoring/remote.py +19 -40
  23. parsl/providers/local/local.py +12 -13
  24. parsl/tests/configs/htex_local_alternate.py +0 -1
  25. parsl/tests/test_htex/test_interchange_exit_bad_registration.py +5 -7
  26. parsl/tests/test_htex/test_zmq_binding.py +5 -6
  27. parsl/tests/test_monitoring/test_basic.py +12 -10
  28. parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +0 -1
  29. parsl/tests/test_monitoring/test_radio_filesystem.py +7 -9
  30. parsl/tests/test_monitoring/test_radio_multiprocessing.py +44 -0
  31. parsl/tests/test_monitoring/test_radio_udp.py +163 -12
  32. parsl/tests/test_monitoring/test_stdouterr.py +1 -3
  33. parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +3 -7
  34. parsl/version.py +1 -1
  35. {parsl-2025.6.23.data → parsl-2025.6.30.data}/scripts/interchange.py +173 -191
  36. {parsl-2025.6.23.data → parsl-2025.6.30.data}/scripts/process_worker_pool.py +88 -94
  37. {parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/METADATA +2 -2
  38. {parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/RECORD +44 -43
  39. {parsl-2025.6.23.data → parsl-2025.6.30.data}/scripts/exec_parsl_function.py +0 -0
  40. {parsl-2025.6.23.data → parsl-2025.6.30.data}/scripts/parsl_coprocess.py +0 -0
  41. {parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/LICENSE +0 -0
  42. {parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/WHEEL +0 -0
  43. {parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/entry_points.txt +0 -0
  44. {parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/top_level.txt +0 -0
@@ -1,10 +1,71 @@
1
- import logging
2
1
  from abc import ABCMeta, abstractmethod
2
+ from multiprocessing.queues import Queue
3
3
 
4
- logger = logging.getLogger(__name__)
4
+
5
+ class MonitoringRadioReceiver(metaclass=ABCMeta):
6
+ @abstractmethod
7
+ def shutdown(self) -> None:
8
+ pass
5
9
 
6
10
 
7
11
  class MonitoringRadioSender(metaclass=ABCMeta):
8
12
  @abstractmethod
9
13
  def send(self, message: object) -> None:
10
14
  pass
15
+
16
+
17
+ class RadioConfig(metaclass=ABCMeta):
18
+ """Base class for radio plugin configuration.
19
+
20
+ This provides the configuration for a particular way of sending monitoring
21
+ messages from a source of monitoring messages into the submit side
22
+ monitoring database.
23
+
24
+ This uses staged initialization like lots of Parsl configuration, but in
25
+ a slightly different form.
26
+
27
+ A RadioConfig object must be pickleable, because it will be sent to remote
28
+ workers to configure senders. The MonitoringRadioSender and
29
+ MonitoringRadioReceiver objects do not need to be pickleable (and often
30
+ will not be - for example, when they hold references to other processes).
31
+
32
+ The RadioConfig object will be used by Parsl in this sequence:
33
+
34
+ * A user creates a RadioConfig object from the appropriate subclass for
35
+ radio mechanism they want to use, and specifies it as part of their
36
+ executor configuration.
37
+
38
+ Methods on the RadioConfig will then be invoked by Parsl like this:
39
+
40
+ * one create_receiver call, on the submit side
41
+ - this call can modify the state of radioconfig to contain information
42
+ about how a sender can connect back to the receiver. for example,
43
+ after binding to a particular port, can store that port so that the
44
+ sender knows which port to connect to.
45
+
46
+ * Possibly many serializations to get the RadioConfig to remote workers
47
+
48
+ * Many (0 or more) create_sender calls, possibly on remote workers, to
49
+ create the sending side of the radio (MonitoringRadioSender instances)
50
+
51
+ * Those senders are used to send messages
52
+
53
+ * At executor shutdown, the receiver is shut down.
54
+
55
+ This object cannot be re-used across parsl configurations - like many other
56
+ pieces of parsl config it is single use in that respect.
57
+ """
58
+
59
+ @abstractmethod
60
+ def create_receiver(self, *, run_dir: str, resource_msgs: Queue) -> MonitoringRadioReceiver:
61
+ """Create a receiver for this RadioConfig, and update this RadioConfig
62
+ with enough context to create senders.
63
+ """
64
+ pass
65
+
66
+ @abstractmethod
67
+ def create_sender(self) -> MonitoringRadioSender:
68
+ """Create a sender to connect to the receiver created by an
69
+ earlier call to create_receiver.
70
+ """
71
+ pass
@@ -2,13 +2,19 @@ import logging
2
2
  import os
3
3
  import pickle
4
4
  import uuid
5
+ from multiprocessing.queues import Queue
5
6
 
6
- from parsl.monitoring.radios.base import MonitoringRadioSender
7
+ from parsl.monitoring.radios.base import (
8
+ MonitoringRadioReceiver,
9
+ MonitoringRadioSender,
10
+ RadioConfig,
11
+ )
12
+ from parsl.monitoring.radios.filesystem_router import FilesystemRadioReceiver
7
13
 
8
14
  logger = logging.getLogger(__name__)
9
15
 
10
16
 
11
- class FilesystemRadioSender(MonitoringRadioSender):
17
+ class FilesystemRadio(RadioConfig):
12
18
  """A MonitoringRadioSender that sends messages over a shared filesystem.
13
19
 
14
20
  The messsage directory structure is based on maildir,
@@ -26,7 +32,16 @@ class FilesystemRadioSender(MonitoringRadioSender):
26
32
  the UDP radio, but should be much more reliable.
27
33
  """
28
34
 
29
- def __init__(self, *, monitoring_url: str, timeout: int = 10, run_dir: str):
35
+ def create_sender(self) -> MonitoringRadioSender:
36
+ return FilesystemRadioSender(run_dir=self.run_dir)
37
+
38
+ def create_receiver(self, *, run_dir: str, resource_msgs: Queue) -> MonitoringRadioReceiver:
39
+ self.run_dir = run_dir
40
+ return FilesystemRadioReceiver(resource_msgs, run_dir)
41
+
42
+
43
+ class FilesystemRadioSender(MonitoringRadioSender):
44
+ def __init__(self, *, run_dir: str):
30
45
  logger.info("filesystem based monitoring radio initializing")
31
46
  self.base_path = f"{run_dir}/monitor-fs-radio/"
32
47
  self.tmp_path = f"{self.base_path}/tmp"
@@ -4,15 +4,15 @@ import logging
4
4
  import os
5
5
  import pickle
6
6
  import time
7
- from multiprocessing.context import SpawnProcess
8
7
  from multiprocessing.queues import Queue
9
8
  from multiprocessing.synchronize import Event
10
9
  from typing import cast
11
10
 
12
11
  from parsl.log_utils import set_file_logger
12
+ from parsl.monitoring.radios.base import MonitoringRadioReceiver
13
13
  from parsl.monitoring.radios.multiprocessing import MultiprocessingQueueRadioSender
14
14
  from parsl.monitoring.types import TaggedMonitoringMessage
15
- from parsl.multiprocessing import SpawnEvent, join_terminate_close_proc
15
+ from parsl.multiprocessing import SpawnEvent, SpawnProcess, join_terminate_close_proc
16
16
  from parsl.process_loggers import wrap_with_logs
17
17
  from parsl.utils import setproctitle
18
18
 
@@ -57,30 +57,17 @@ def filesystem_router_starter(*, q: Queue[TaggedMonitoringMessage], run_dir: str
57
57
  logger.info("Ending filesystem radio receiver")
58
58
 
59
59
 
60
- class FilesystemRadioReceiver():
61
- def __init__(self, *, process: SpawnProcess, exit_event: Event) -> None:
62
- self.process = process
63
- self.exit_event = exit_event
60
+ class FilesystemRadioReceiver(MonitoringRadioReceiver):
61
+ def __init__(self, resource_msgs: Queue, run_dir: str) -> None:
62
+ self.exit_event = SpawnEvent()
63
+ self.process = SpawnProcess(target=filesystem_router_starter,
64
+ kwargs={"q": resource_msgs, "run_dir": run_dir, "exit_event": self.exit_event},
65
+ name="Monitoring-Filesystem-Process",
66
+ daemon=True
67
+ )
68
+ self.process.start()
69
+ logger.info("Started filesystem radio receiver process %s", self.process.pid)
64
70
 
65
- def close(self) -> None:
71
+ def shutdown(self) -> None:
66
72
  self.exit_event.set()
67
73
  join_terminate_close_proc(self.process)
68
-
69
-
70
- def start_filesystem_receiver(*,
71
- monitoring_messages: Queue,
72
- logdir: str,
73
- debug: bool) -> FilesystemRadioReceiver:
74
-
75
- router_exit_event = SpawnEvent()
76
-
77
- filesystem_proc = SpawnProcess(target=filesystem_router_starter,
78
- kwargs={"q": monitoring_messages,
79
- "run_dir": logdir,
80
- "exit_event": router_exit_event},
81
- name="Monitoring-Filesystem-Process",
82
- daemon=True
83
- )
84
- filesystem_proc.start()
85
-
86
- return FilesystemRadioReceiver(process=filesystem_proc, exit_event=router_exit_event)
@@ -1,24 +1,29 @@
1
1
  import logging
2
2
  import pickle
3
+ from multiprocessing.queues import Queue
3
4
 
4
- from parsl.monitoring.radios.base import MonitoringRadioSender
5
+ from parsl.monitoring.radios.base import (
6
+ MonitoringRadioReceiver,
7
+ MonitoringRadioSender,
8
+ RadioConfig,
9
+ )
5
10
 
6
11
  logger = logging.getLogger(__name__)
7
12
 
8
13
 
9
- class HTEXRadioSender(MonitoringRadioSender):
14
+ class HTEXRadio(RadioConfig):
15
+ def create_sender(self) -> MonitoringRadioSender:
16
+ return HTEXRadioSender()
10
17
 
11
- def __init__(self, monitoring_url: str, timeout: int = 10):
12
- """
13
- Parameters
14
- ----------
18
+ def create_receiver(self, *, run_dir: str, resource_msgs: Queue) -> MonitoringRadioReceiver:
19
+ return HTEXRadioReceiver()
15
20
 
16
- monitoring_url : str
17
- URL of the form <scheme>://<IP>:<PORT>
18
- timeout : int
19
- timeout, default=10s
20
- """
21
- logger.info("htex-based monitoring radio initialising")
21
+
22
+ class HTEXRadioSender(MonitoringRadioSender):
23
+
24
+ def __init__(self) -> None:
25
+ # there is nothing to initialize
26
+ pass
22
27
 
23
28
  def send(self, message: object) -> None:
24
29
  """ Sends a message to the UDP receiver
@@ -54,4 +59,8 @@ class HTEXRadioSender(MonitoringRadioSender):
54
59
  else:
55
60
  logger.error("result_queue is uninitialized - cannot put monitoring message")
56
61
 
57
- return
62
+
63
+ class HTEXRadioReceiver(MonitoringRadioReceiver):
64
+ def shutdown(self) -> None:
65
+ # there is nothing to shut down
66
+ pass
@@ -1,6 +1,10 @@
1
- from multiprocessing.queues import Queue
1
+ from multiprocessing import Queue
2
2
 
3
- from parsl.monitoring.radios.base import MonitoringRadioSender
3
+ from parsl.monitoring.radios.base import (
4
+ MonitoringRadioReceiver,
5
+ MonitoringRadioSender,
6
+ RadioConfig,
7
+ )
4
8
 
5
9
 
6
10
  class MultiprocessingQueueRadioSender(MonitoringRadioSender):
@@ -15,3 +19,19 @@ class MultiprocessingQueueRadioSender(MonitoringRadioSender):
15
19
 
16
20
  def send(self, message: object) -> None:
17
21
  self.queue.put(message)
22
+
23
+
24
+ class MultiprocessingQueueRadio(RadioConfig):
25
+ def create_sender(self) -> MonitoringRadioSender:
26
+ return MultiprocessingQueueRadioSender(self._queue)
27
+
28
+ def create_receiver(self, *, run_dir: str, resource_msgs: Queue) -> MonitoringRadioReceiver:
29
+ # This object is only for use with an in-process thread-pool so it
30
+ # is fine to store a reference to the message queue directly.
31
+ self._queue = resource_msgs
32
+ return MultiprocessingQueueRadioReceiver()
33
+
34
+
35
+ class MultiprocessingQueueRadioReceiver(MonitoringRadioReceiver):
36
+ def shutdown(self) -> None:
37
+ pass
@@ -1,29 +1,63 @@
1
+ import hashlib
2
+ import hmac
1
3
  import logging
2
4
  import pickle
5
+ import secrets
3
6
  import socket
7
+ from multiprocessing.queues import Queue
8
+ from typing import Optional
4
9
 
5
- from parsl.monitoring.radios.base import MonitoringRadioSender
10
+ from parsl.monitoring.radios.base import (
11
+ MonitoringRadioReceiver,
12
+ MonitoringRadioSender,
13
+ RadioConfig,
14
+ )
15
+ from parsl.monitoring.radios.udp_router import start_udp_receiver
6
16
 
17
+ logger = logging.getLogger(__name__)
7
18
 
8
- class UDPRadioSender(MonitoringRadioSender):
9
19
 
10
- def __init__(self, monitoring_url: str, timeout: int = 10):
11
- """
12
- Parameters
13
- ----------
20
+ class UDPRadio(RadioConfig):
21
+ def __init__(self, *, port: Optional[int] = None, atexit_timeout: int = 3, address: str, debug: bool = False, hmac_digest: str = 'sha512'):
22
+ self.port = port
23
+ self.atexit_timeout = atexit_timeout
24
+ self.address = address
25
+ self.debug = debug
26
+ self.hmac_digest = hmac_digest
14
27
 
15
- monitoring_url : str
16
- URL of the form <scheme>://<IP>:<PORT>
17
- timeout : int
18
- timeout, default=10s
19
- """
20
- self.monitoring_url = monitoring_url
28
+ def create_sender(self) -> MonitoringRadioSender:
29
+ assert self.port is not None, "self.port should have been initialized by create_receiver"
30
+ return UDPRadioSender(self.address, self.port, self.hmac_key, self.hmac_digest)
31
+
32
+ def create_receiver(self, run_dir: str, resource_msgs: Queue) -> MonitoringRadioReceiver:
33
+ # RFC 2104 section 2 recommends that the key length be at
34
+ # least as long as the hash output (64 bytes in the case of SHA512).
35
+ # RFC 2014 section 3 talks about periodic key refreshing. This key is
36
+ # not refreshed inside a workflow run, but each separate workflow run
37
+ # uses a new key.
38
+ keysize = hashlib.new(self.hmac_digest).digest_size
39
+ self.hmac_key = secrets.token_bytes(keysize)
40
+
41
+ udp_receiver = start_udp_receiver(logdir=run_dir,
42
+ monitoring_messages=resource_msgs,
43
+ port=self.port,
44
+ debug=self.debug,
45
+ atexit_timeout=self.atexit_timeout,
46
+ hmac_key=self.hmac_key,
47
+ hmac_digest=self.hmac_digest
48
+ )
49
+ self.port = udp_receiver.port
50
+ return udp_receiver
51
+
52
+
53
+ class UDPRadioSender(MonitoringRadioSender):
54
+
55
+ def __init__(self, address: str, port: int, hmac_key: bytes, hmac_digest: str, *, timeout: int = 10) -> None:
21
56
  self.sock_timeout = timeout
22
- try:
23
- self.scheme, self.ip, port = (x.strip('/') for x in monitoring_url.split(':'))
24
- self.port = int(port)
25
- except Exception:
26
- raise Exception("Failed to parse monitoring url: {}".format(monitoring_url))
57
+ self.address = address
58
+ self.port = port
59
+ self.hmac_key = hmac_key
60
+ self.hmac_digest = hmac_digest
27
61
 
28
62
  self.sock = socket.socket(socket.AF_INET,
29
63
  socket.SOCK_DGRAM,
@@ -42,15 +76,19 @@ class UDPRadioSender(MonitoringRadioSender):
42
76
  Returns:
43
77
  None
44
78
  """
79
+ logger.info("Starting UDP radio message send")
45
80
  try:
46
- buffer = pickle.dumps(message)
81
+ data = pickle.dumps(message)
82
+ origin_hmac = hmac.digest(self.hmac_key, data, self.hmac_digest)
83
+ buffer = origin_hmac + data
47
84
  except Exception:
48
85
  logging.exception("Exception during pickling", exc_info=True)
49
86
  return
50
87
 
51
88
  try:
52
- self.sock.sendto(buffer, (self.ip, self.port))
89
+ self.sock.sendto(buffer, (self.address, self.port))
53
90
  except socket.timeout:
54
91
  logging.error("Could not send message within timeout limit")
55
92
  return
93
+ logger.info("Normal ending for UDP radio message send")
56
94
  return
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import hmac
3
4
  import logging
4
5
  import multiprocessing.queues as mpq
5
6
  import os
@@ -17,6 +18,7 @@ import typeguard
17
18
 
18
19
  from parsl.log_utils import set_file_logger
19
20
  from parsl.monitoring.errors import MonitoringRouterStartError
21
+ from parsl.monitoring.radios.base import MonitoringRadioReceiver
20
22
  from parsl.monitoring.radios.multiprocessing import MultiprocessingQueueRadioSender
21
23
  from parsl.multiprocessing import (
22
24
  SizedQueue,
@@ -37,9 +39,11 @@ class MonitoringRouter:
37
39
  udp_port: Optional[int] = None,
38
40
  run_dir: str = ".",
39
41
  logging_level: int = logging.INFO,
40
- atexit_timeout: int = 3, # in seconds
42
+ atexit_timeout: int, # in seconds
41
43
  resource_msgs: mpq.Queue,
42
44
  exit_event: Event,
45
+ hmac_key: bytes,
46
+ hmac_digest: str,
43
47
  ):
44
48
  """ Initializes a monitoring configuration class.
45
49
 
@@ -65,6 +69,9 @@ class MonitoringRouter:
65
69
 
66
70
  self.atexit_timeout = atexit_timeout
67
71
 
72
+ self.hmac_key = hmac_key
73
+ self.hmac_digest = hmac_digest
74
+
68
75
  self.loop_freq = 10.0 # milliseconds
69
76
 
70
77
  # Initialize the UDP socket
@@ -94,10 +101,7 @@ class MonitoringRouter:
94
101
  try:
95
102
  while not self.exit_event.is_set():
96
103
  try:
97
- data, addr = self.udp_sock.recvfrom(2048)
98
- resource_msg = pickle.loads(data)
99
- logger.debug("Got UDP Message from {}: {}".format(addr, resource_msg))
100
- self.target_radio.send(resource_msg)
104
+ self.process_message()
101
105
  except socket.timeout:
102
106
  pass
103
107
 
@@ -105,10 +109,7 @@ class MonitoringRouter:
105
109
  last_msg_received_time = time.time()
106
110
  while time.time() - last_msg_received_time < self.atexit_timeout:
107
111
  try:
108
- data, addr = self.udp_sock.recvfrom(2048)
109
- msg = pickle.loads(data)
110
- logger.debug("Got UDP Message from {}: {}".format(addr, msg))
111
- self.target_radio.send(msg)
112
+ self.process_message()
112
113
  last_msg_received_time = time.time()
113
114
  except socket.timeout:
114
115
  pass
@@ -117,6 +118,28 @@ class MonitoringRouter:
117
118
  finally:
118
119
  logger.info("UDP listener finished")
119
120
 
121
+ def process_message(self) -> None:
122
+ hmdata, addr = self.udp_sock.recvfrom(2048)
123
+ h = hmac.HMAC(key=self.hmac_key, digestmod=self.hmac_digest)
124
+ origin_hmac = hmdata[0:h.digest_size]
125
+ h.update(hmdata[h.digest_size:])
126
+ data = hmdata[h.digest_size:]
127
+
128
+ # Check hmac before pickle load.
129
+ # If data is wrong, do not log it because it is suspect,
130
+ # but it should be safe to log the addr, at error level.
131
+
132
+ recomputed_hmac = h.digest()
133
+
134
+ if not hmac.compare_digest(origin_hmac, recomputed_hmac):
135
+ logger.error("HMAC does not match on received message")
136
+ # No exception, because this can be arbitrary network noise
137
+ # that shouldn't break the receiver.
138
+ else:
139
+ resource_msg = pickle.loads(data)
140
+ logger.debug("Got UDP Message from {}: {}".format(addr, resource_msg))
141
+ self.target_radio.send(resource_msg)
142
+
120
143
 
121
144
  @wrap_with_logs
122
145
  @typeguard.typechecked
@@ -126,16 +149,21 @@ def udp_router_starter(*,
126
149
  exit_event: Event,
127
150
 
128
151
  udp_port: Optional[int],
129
-
152
+ hmac_key: bytes,
130
153
  run_dir: str,
131
- logging_level: int) -> None:
154
+ logging_level: int,
155
+ atexit_timeout: int,
156
+ hmac_digest: str) -> None:
132
157
  setproctitle("parsl: monitoring UDP router")
133
158
  try:
134
159
  router = MonitoringRouter(udp_port=udp_port,
135
160
  run_dir=run_dir,
136
161
  logging_level=logging_level,
137
162
  resource_msgs=resource_msgs,
138
- exit_event=exit_event)
163
+ exit_event=exit_event,
164
+ atexit_timeout=atexit_timeout,
165
+ hmac_key=hmac_key,
166
+ hmac_digest=hmac_digest)
139
167
  except Exception as e:
140
168
  logger.error("MonitoringRouter construction failed.", exc_info=True)
141
169
  comm_q.put(f"Monitoring router construction failed: {e}")
@@ -149,13 +177,13 @@ def udp_router_starter(*,
149
177
  logger.exception("UDP router start exception")
150
178
 
151
179
 
152
- class UDPRadioReceiver():
180
+ class UDPRadioReceiver(MonitoringRadioReceiver):
153
181
  def __init__(self, *, process: SpawnProcessType, exit_event: EventType, port: int) -> None:
154
182
  self.process = process
155
183
  self.exit_event = exit_event
156
184
  self.port = port
157
185
 
158
- def close(self) -> None:
186
+ def shutdown(self) -> None:
159
187
  self.exit_event.set()
160
188
  join_terminate_close_proc(self.process)
161
189
 
@@ -164,7 +192,10 @@ def start_udp_receiver(*,
164
192
  monitoring_messages: Queue,
165
193
  port: Optional[int],
166
194
  logdir: str,
167
- debug: bool) -> UDPRadioReceiver:
195
+ debug: bool,
196
+ atexit_timeout: int,
197
+ hmac_key: bytes,
198
+ hmac_digest: str) -> UDPRadioReceiver:
168
199
 
169
200
  udp_comm_q: Queue[Union[int, str]]
170
201
  udp_comm_q = SizedQueue(maxsize=10)
@@ -178,6 +209,9 @@ def start_udp_receiver(*,
178
209
  "udp_port": port,
179
210
  "run_dir": logdir,
180
211
  "logging_level": logging.DEBUG if debug else logging.INFO,
212
+ "atexit_timeout": atexit_timeout,
213
+ "hmac_key": hmac_key,
214
+ "hmac_digest": hmac_digest,
181
215
  },
182
216
  name="Monitoring-UDP-Router-Process",
183
217
  daemon=True,
@@ -7,10 +7,7 @@ from multiprocessing import Event
7
7
  from typing import Any, Callable, Dict, List, Sequence, Tuple
8
8
 
9
9
  from parsl.monitoring.message_type import MessageType
10
- from parsl.monitoring.radios.base import MonitoringRadioSender
11
- from parsl.monitoring.radios.filesystem import FilesystemRadioSender
12
- from parsl.monitoring.radios.htex import HTEXRadioSender
13
- from parsl.monitoring.radios.udp import UDPRadioSender
10
+ from parsl.monitoring.radios.base import RadioConfig
14
11
  from parsl.multiprocessing import ForkProcess
15
12
  from parsl.process_loggers import wrap_with_logs
16
13
 
@@ -23,11 +20,10 @@ def monitor_wrapper(*,
23
20
  kwargs: Dict, # per invocation
24
21
  x_try_id: int, # per invocation
25
22
  x_task_id: int, # per invocation
26
- monitoring_hub_url: str, # per workflow
23
+ radio_config: RadioConfig, # per executor
27
24
  run_id: str, # per workflow
28
25
  logging_level: int, # per workflow
29
26
  sleep_dur: float, # per workflow
30
- radio_mode: str, # per executor
31
27
  monitor_resources: bool, # per workflow
32
28
  run_dir: str) -> Tuple[Callable, Sequence, Dict]:
33
29
  """Wrap the Parsl app with a function that will call the monitor function and point it at the correct pid when the task begins.
@@ -41,9 +37,8 @@ def monitor_wrapper(*,
41
37
  # Send first message to monitoring router
42
38
  send_first_message(try_id,
43
39
  task_id,
44
- monitoring_hub_url,
40
+ radio_config,
45
41
  run_id,
46
- radio_mode,
47
42
  run_dir)
48
43
 
49
44
  if monitor_resources and sleep_dur > 0:
@@ -52,9 +47,8 @@ def monitor_wrapper(*,
52
47
  args=(os.getpid(),
53
48
  try_id,
54
49
  task_id,
55
- monitoring_hub_url,
50
+ radio_config,
56
51
  run_id,
57
- radio_mode,
58
52
  logging_level,
59
53
  sleep_dur,
60
54
  run_dir,
@@ -87,9 +81,9 @@ def monitor_wrapper(*,
87
81
 
88
82
  send_last_message(try_id,
89
83
  task_id,
90
- monitoring_hub_url,
84
+ radio_config,
91
85
  run_id,
92
- radio_mode, run_dir)
86
+ run_dir)
93
87
 
94
88
  new_kwargs = kwargs.copy()
95
89
  new_kwargs['_parsl_monitoring_task_id'] = x_task_id
@@ -98,47 +92,33 @@ def monitor_wrapper(*,
98
92
  return (wrapped, args, new_kwargs)
99
93
 
100
94
 
101
- def get_radio(radio_mode: str, monitoring_hub_url: str, task_id: int, run_dir: str) -> MonitoringRadioSender:
102
- radio: MonitoringRadioSender
103
- if radio_mode == "udp":
104
- radio = UDPRadioSender(monitoring_hub_url)
105
- elif radio_mode == "htex":
106
- radio = HTEXRadioSender(monitoring_hub_url)
107
- elif radio_mode == "filesystem":
108
- radio = FilesystemRadioSender(monitoring_url=monitoring_hub_url,
109
- run_dir=run_dir)
110
- else:
111
- raise RuntimeError(f"Unknown radio mode: {radio_mode}")
112
- return radio
113
-
114
-
115
95
  @wrap_with_logs
116
96
  def send_first_message(try_id: int,
117
97
  task_id: int,
118
- monitoring_hub_url: str,
119
- run_id: str, radio_mode: str, run_dir: str) -> None:
120
- send_first_last_message(try_id, task_id, monitoring_hub_url, run_id,
121
- radio_mode, run_dir, False)
98
+ radio_config: RadioConfig,
99
+ run_id: str, run_dir: str) -> None:
100
+ send_first_last_message(try_id, task_id, radio_config, run_id,
101
+ run_dir, False)
122
102
 
123
103
 
124
104
  @wrap_with_logs
125
105
  def send_last_message(try_id: int,
126
106
  task_id: int,
127
- monitoring_hub_url: str,
128
- run_id: str, radio_mode: str, run_dir: str) -> None:
129
- send_first_last_message(try_id, task_id, monitoring_hub_url, run_id,
130
- radio_mode, run_dir, True)
107
+ radio_config: RadioConfig,
108
+ run_id: str, run_dir: str) -> None:
109
+ send_first_last_message(try_id, task_id, radio_config, run_id,
110
+ run_dir, True)
131
111
 
132
112
 
133
113
  def send_first_last_message(try_id: int,
134
114
  task_id: int,
135
- monitoring_hub_url: str,
136
- run_id: str, radio_mode: str, run_dir: str,
115
+ radio_config: RadioConfig,
116
+ run_id: str, run_dir: str,
137
117
  is_last: bool) -> None:
138
118
  import os
139
119
  import platform
140
120
 
141
- radio = get_radio(radio_mode, monitoring_hub_url, task_id, run_dir)
121
+ radio = radio_config.create_sender()
142
122
 
143
123
  msg = (MessageType.RESOURCE_INFO,
144
124
  {'run_id': run_id,
@@ -158,9 +138,8 @@ def send_first_last_message(try_id: int,
158
138
  def monitor(pid: int,
159
139
  try_id: int,
160
140
  task_id: int,
161
- monitoring_hub_url: str,
141
+ radio_config: RadioConfig,
162
142
  run_id: str,
163
- radio_mode: str,
164
143
  logging_level: int,
165
144
  sleep_dur: float,
166
145
  run_dir: str,
@@ -184,7 +163,7 @@ def monitor(pid: int,
184
163
 
185
164
  setproctitle("parsl: task resource monitor")
186
165
 
187
- radio = get_radio(radio_mode, monitoring_hub_url, task_id, run_dir)
166
+ radio = radio_config.create_sender()
188
167
 
189
168
  logging.debug("start of monitor")
190
169