parsl 2025.6.23__py3-none-any.whl → 2025.7.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. parsl/configs/anvil.py +34 -0
  2. parsl/configs/delta.py +35 -0
  3. parsl/configs/osg.py +1 -1
  4. parsl/dataflow/dflow.py +14 -4
  5. parsl/executors/base.py +14 -6
  6. parsl/executors/high_throughput/executor.py +20 -15
  7. parsl/executors/high_throughput/interchange.py +177 -191
  8. parsl/executors/high_throughput/mpi_executor.py +7 -4
  9. parsl/executors/high_throughput/probe.py +61 -49
  10. parsl/executors/high_throughput/process_worker_pool.py +105 -103
  11. parsl/executors/taskvine/executor.py +9 -3
  12. parsl/executors/taskvine/manager.py +3 -1
  13. parsl/executors/threads.py +8 -1
  14. parsl/executors/workqueue/executor.py +9 -3
  15. parsl/monitoring/errors.py +5 -0
  16. parsl/monitoring/monitoring.py +25 -42
  17. parsl/monitoring/radios/base.py +63 -2
  18. parsl/monitoring/radios/filesystem.py +18 -3
  19. parsl/monitoring/radios/filesystem_router.py +13 -26
  20. parsl/monitoring/radios/htex.py +22 -13
  21. parsl/monitoring/radios/multiprocessing.py +22 -2
  22. parsl/monitoring/radios/udp.py +57 -19
  23. parsl/monitoring/radios/udp_router.py +49 -15
  24. parsl/monitoring/remote.py +19 -40
  25. parsl/providers/local/local.py +12 -13
  26. parsl/tests/configs/htex_local_alternate.py +0 -1
  27. parsl/tests/test_htex/test_interchange_exit_bad_registration.py +5 -7
  28. parsl/tests/test_htex/test_zmq_binding.py +5 -6
  29. parsl/tests/test_monitoring/test_basic.py +12 -10
  30. parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +0 -1
  31. parsl/tests/test_monitoring/test_radio_filesystem.py +7 -9
  32. parsl/tests/test_monitoring/test_radio_multiprocessing.py +44 -0
  33. parsl/tests/test_monitoring/test_radio_udp.py +163 -12
  34. parsl/tests/test_monitoring/test_stdouterr.py +1 -3
  35. parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +3 -7
  36. parsl/version.py +1 -1
  37. {parsl-2025.6.23.data → parsl-2025.7.7.data}/scripts/interchange.py +177 -191
  38. {parsl-2025.6.23.data → parsl-2025.7.7.data}/scripts/process_worker_pool.py +105 -103
  39. {parsl-2025.6.23.dist-info → parsl-2025.7.7.dist-info}/METADATA +2 -2
  40. {parsl-2025.6.23.dist-info → parsl-2025.7.7.dist-info}/RECORD +46 -43
  41. {parsl-2025.6.23.data → parsl-2025.7.7.data}/scripts/exec_parsl_function.py +0 -0
  42. {parsl-2025.6.23.data → parsl-2025.7.7.data}/scripts/parsl_coprocess.py +0 -0
  43. {parsl-2025.6.23.dist-info → parsl-2025.7.7.dist-info}/LICENSE +0 -0
  44. {parsl-2025.6.23.dist-info → parsl-2025.7.7.dist-info}/WHEEL +0 -0
  45. {parsl-2025.6.23.dist-info → parsl-2025.7.7.dist-info}/entry_points.txt +0 -0
  46. {parsl-2025.6.23.dist-info → parsl-2025.7.7.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,6 @@
1
1
  #!/usr/bin/env python3
2
2
 
3
3
  import argparse
4
- import json
5
4
  import logging
6
5
  import math
7
6
  import multiprocessing
@@ -66,8 +65,7 @@ class Manager:
66
65
  def __init__(self, *,
67
66
  addresses,
68
67
  address_probe_timeout,
69
- task_port,
70
- result_port,
68
+ port,
71
69
  cores_per_worker,
72
70
  mem_per_worker,
73
71
  max_workers_per_node,
@@ -156,26 +154,23 @@ class Manager:
156
154
 
157
155
  self._start_time = time.time()
158
156
 
159
- try:
160
- ix_address = probe_addresses(addresses.split(','), task_port, timeout=address_probe_timeout)
161
- if not ix_address:
162
- raise Exception("No viable address found")
163
- else:
164
- logger.info("Connection to Interchange successful on {}".format(ix_address))
165
- task_q_url = tcp_url(ix_address, task_port)
166
- result_q_url = tcp_url(ix_address, result_port)
167
- logger.info("Task url : {}".format(task_q_url))
168
- logger.info("Result url : {}".format(result_q_url))
169
- except Exception:
170
- logger.exception("Caught exception while trying to determine viable address to interchange")
171
- print("Failed to find a viable address to connect to interchange. Exiting")
172
- exit(5)
173
-
174
157
  self.cert_dir = cert_dir
175
158
  self.zmq_context = curvezmq.ClientContext(self.cert_dir)
176
159
 
177
- self._task_q_url = task_q_url
178
- self._result_q_url = result_q_url
160
+ addresses = ','.join(tcp_url(a, port) for a in addresses.split(','))
161
+ try:
162
+ self._ix_url = probe_addresses(
163
+ self.zmq_context,
164
+ addresses,
165
+ timeout_ms=1_000 * address_probe_timeout,
166
+ identity=uid.encode('utf-8'),
167
+ )
168
+ except ConnectionError:
169
+ addys = ", ".join(addresses.split(","))
170
+ logger.error(f"Unable to connect to interchange; attempted addresses: {addys}")
171
+ raise
172
+
173
+ logger.info(f"Probe discovered interchange url: {self._ix_url}")
179
174
 
180
175
  self.uid = uid
181
176
  self.block_id = block_id
@@ -250,37 +245,37 @@ class Manager:
250
245
  self.worker_count = min(len(self.available_accelerators), self.worker_count)
251
246
  logger.info("Manager will spawn {} workers".format(self.worker_count))
252
247
 
253
- def create_reg_message(self):
248
+ def create_reg_message(self) -> dict:
254
249
  """ Creates a registration message to identify the worker to the interchange
255
250
  """
256
- msg = {'type': 'registration',
257
- 'parsl_v': PARSL_VERSION,
258
- 'python_v': "{}.{}.{}".format(sys.version_info.major,
259
- sys.version_info.minor,
260
- sys.version_info.micro),
261
- 'packages': {dist.metadata['Name']: dist.version for dist in distributions()},
262
- 'worker_count': self.worker_count,
263
- 'uid': self.uid,
264
- 'block_id': self.block_id,
265
- 'start_time': self.start_time,
266
- 'prefetch_capacity': self.prefetch_capacity,
267
- 'max_capacity': self.worker_count + self.prefetch_capacity,
268
- 'os': platform.system(),
269
- 'hostname': platform.node(),
270
- 'dir': os.getcwd(),
271
- 'cpu_count': psutil.cpu_count(logical=False),
272
- 'total_memory': psutil.virtual_memory().total,
273
- }
274
- b_msg = json.dumps(msg).encode('utf-8')
275
- return b_msg
251
+ return {
252
+ 'type': 'registration',
253
+ 'parsl_v': PARSL_VERSION,
254
+ 'python_v': "{}.{}.{}".format(
255
+ sys.version_info.major,
256
+ sys.version_info.minor,
257
+ sys.version_info.micro
258
+ ),
259
+ 'packages': {d.metadata['Name']: d.version for d in distributions()},
260
+ 'worker_count': self.worker_count,
261
+ 'uid': self.uid,
262
+ 'block_id': self.block_id,
263
+ 'start_time': self.start_time,
264
+ 'prefetch_capacity': self.prefetch_capacity,
265
+ 'max_capacity': self.worker_count + self.prefetch_capacity,
266
+ 'os': platform.system(),
267
+ 'hostname': platform.node(),
268
+ 'dir': os.getcwd(),
269
+ 'cpu_count': psutil.cpu_count(logical=False),
270
+ 'total_memory': psutil.virtual_memory().total,
271
+ }
276
272
 
277
273
  @staticmethod
278
274
  def heartbeat_to_incoming(task_incoming: zmq.Socket) -> None:
279
275
  """ Send heartbeat to the incoming task queue
280
276
  """
281
- msg = {'type': 'heartbeat'}
282
277
  # don't need to dumps and encode this every time - could do as a global on import?
283
- b_msg = json.dumps(msg).encode('utf-8')
278
+ b_msg = pickle.dumps({'type': 'heartbeat'})
284
279
  task_incoming.send(b_msg)
285
280
  logger.debug("Sent heartbeat")
286
281
 
@@ -289,32 +284,46 @@ class Manager:
289
284
  """ Send heartbeat to the incoming task queue
290
285
  """
291
286
  msg = {'type': 'drain'}
292
- b_msg = json.dumps(msg).encode('utf-8')
287
+ b_msg = pickle.dumps(msg)
293
288
  task_incoming.send(b_msg)
294
289
  logger.debug("Sent drain")
295
290
 
296
291
  @wrap_with_logs
297
- def pull_tasks(self):
292
+ def interchange_communicator(self, pair_setup: threading.Event):
298
293
  """ Pull tasks from the incoming tasks zmq pipe onto the internal
299
294
  pending task queue
300
295
  """
301
296
  logger.info("starting")
302
297
 
298
+ results_sock = self.zmq_context.socket(zmq.PAIR)
299
+ results_sock.setsockopt(zmq.LINGER, 0)
300
+ results_sock.bind("inproc://results")
301
+ pair_setup.set()
302
+
303
303
  # Linger is set to 0, so that the manager can exit even when there might be
304
304
  # messages in the pipe
305
- task_incoming = self.zmq_context.socket(zmq.DEALER)
306
- task_incoming.setsockopt(zmq.IDENTITY, self.uid.encode('utf-8'))
307
- task_incoming.setsockopt(zmq.LINGER, 0)
308
- task_incoming.connect(self._task_q_url)
305
+ ix_sock = self.zmq_context.socket(zmq.DEALER)
306
+ ix_sock.setsockopt(zmq.IDENTITY, self.uid.encode('utf-8'))
307
+ ix_sock.setsockopt(zmq.LINGER, 0)
308
+ ix_sock.connect(self._ix_url)
309
309
  logger.info("Manager task pipe connected to interchange")
310
310
 
311
311
  poller = zmq.Poller()
312
- poller.register(task_incoming, zmq.POLLIN)
312
+ poller.register(results_sock, zmq.POLLIN)
313
+ poller.register(ix_sock, zmq.POLLIN)
314
+
315
+ ix_sock.send(pickle.dumps({"type": "connection_probe"}))
316
+ evts = dict(poller.poll(timeout=self.heartbeat_period))
317
+ if evts.get(ix_sock) is None:
318
+ logger.error(f"Failed to connect to interchange ({self._ix_url}")
319
+
320
+ ix_sock.recv()
321
+ logger.info(f"Successfully connected to interchange via URL: {self._ix_url}")
313
322
 
314
323
  # Send a registration message
315
324
  msg = self.create_reg_message()
316
- logger.debug("Sending registration message: {}".format(msg))
317
- task_incoming.send(msg)
325
+ logger.debug("Sending registration message: %s", msg)
326
+ ix_sock.send(pickle.dumps(msg))
318
327
  last_beat = time.time()
319
328
  last_interchange_contact = time.time()
320
329
  task_recv_counter = 0
@@ -335,18 +344,21 @@ class Manager:
335
344
  pending_task_count = self.pending_task_queue.qsize()
336
345
  except NotImplementedError:
337
346
  # Ref: https://github.com/python/cpython/blob/6d5e0dc0e330f4009e8dc3d1642e46b129788877/Lib/multiprocessing/queues.py#L125
338
- pending_task_count = f"pending task count is not available on {platform.system()}"
347
+ pending_task_count = f"pending task count is not available on {platform.system()}" # type: ignore[assignment]
339
348
 
340
- logger.debug("ready workers: {}, pending tasks: {}".format(self.ready_worker_count.value,
341
- pending_task_count))
349
+ logger.debug(
350
+ 'ready workers: %d, pending tasks: %d',
351
+ self.ready_worker_count.value, # type: ignore[attr-defined]
352
+ pending_task_count,
353
+ )
342
354
 
343
355
  if time.time() >= last_beat + self.heartbeat_period:
344
- self.heartbeat_to_incoming(task_incoming)
356
+ self.heartbeat_to_incoming(ix_sock)
345
357
  last_beat = time.time()
346
358
 
347
359
  if time.time() > self.drain_time:
348
360
  logger.info("Requesting drain")
349
- self.drain_to_incoming(task_incoming)
361
+ self.drain_to_incoming(ix_sock)
350
362
  # This will start the pool draining...
351
363
  # Drained exit behaviour does not happen here. It will be
352
364
  # driven by the interchange sending a DRAINED_CODE message.
@@ -358,8 +370,8 @@ class Manager:
358
370
  poll_duration_s = max(0, next_interesting_event_time - time.time())
359
371
  socks = dict(poller.poll(timeout=poll_duration_s * 1000))
360
372
 
361
- if socks.get(task_incoming) == zmq.POLLIN:
362
- _, pkl_msg = task_incoming.recv_multipart()
373
+ if socks.get(ix_sock) == zmq.POLLIN:
374
+ pkl_msg = ix_sock.recv()
363
375
  tasks = pickle.loads(pkl_msg)
364
376
  last_interchange_contact = time.time()
365
377
 
@@ -377,6 +389,11 @@ class Manager:
377
389
  for task in tasks:
378
390
  self.task_scheduler.put_task(task)
379
391
 
392
+ elif socks.get(results_sock) == zmq.POLLIN:
393
+ meta_b = pickle.dumps({'type': 'result'})
394
+ ix_sock.send_multipart([meta_b, results_sock.recv()])
395
+ logger.debug("Result sent to interchange")
396
+
380
397
  else:
381
398
  logger.debug("No incoming tasks")
382
399
 
@@ -387,45 +404,36 @@ class Manager:
387
404
  logger.critical("Exiting")
388
405
  break
389
406
 
390
- task_incoming.close()
407
+ ix_sock.close()
391
408
  logger.info("Exiting")
392
409
 
393
410
  @wrap_with_logs
394
- def push_results(self):
395
- """ Listens on the pending_result_queue and sends out results via zmq
411
+ def ferry_result(self, may_connect: threading.Event):
412
+ """ Listens on the pending_result_queue and ferries results to the interchange
413
+ connected thread
396
414
  """
397
- logger.debug("Starting result push thread")
415
+ logger.debug("Begin")
398
416
 
399
417
  # Linger is set to 0, so that the manager can exit even when there might be
400
418
  # messages in the pipe
401
- result_outgoing = self.zmq_context.socket(zmq.DEALER)
402
- result_outgoing.setsockopt(zmq.IDENTITY, self.uid.encode('utf-8'))
403
- result_outgoing.setsockopt(zmq.LINGER, 0)
404
- result_outgoing.connect(self._result_q_url)
405
- logger.info("Manager result pipe connected to interchange")
419
+ notify_sock = self.zmq_context.socket(zmq.PAIR)
420
+ notify_sock.setsockopt(zmq.LINGER, 0)
421
+ may_connect.wait()
422
+ notify_sock.connect("inproc://results")
406
423
 
407
424
  while not self._stop_event.is_set():
408
- logger.debug("Starting pending_result_queue get")
409
425
  try:
410
426
  r = self.task_scheduler.get_result()
411
427
  if r is None:
412
428
  continue
413
- logger.debug("Result received from worker: %s", id(r))
414
- result_outgoing.send(r)
415
- logger.debug("Result sent to interchange: %s", id(r))
429
+ logger.debug("Result received from worker")
430
+ notify_sock.send(r)
416
431
  except Exception:
417
432
  logger.exception("Failed to send result to interchange")
418
433
 
419
- result_outgoing.close()
434
+ notify_sock.close()
420
435
  logger.debug("Exiting")
421
436
 
422
- @wrap_with_logs
423
- def heartbeater(self):
424
- while not self._stop_event.wait(self.heartbeat_period):
425
- heartbeat_message = f"heartbeat_period={self.heartbeat_period} seconds"
426
- logger.info(f"Sending heartbeat via results connection: {heartbeat_message}")
427
- self.pending_result_queue.put(pickle.dumps({'type': 'heartbeat'}))
428
-
429
437
  def worker_watchdog(self, procs: dict[int, SpawnProcess]):
430
438
  """Keeps workers alive."""
431
439
  logger.debug("Starting worker watchdog")
@@ -492,23 +500,26 @@ class Manager:
492
500
 
493
501
  logger.debug("Workers started")
494
502
 
495
- thr_task_puller = threading.Thread(target=self.pull_tasks, name="Task-Puller")
496
- thr_result_pusher = threading.Thread(
497
- target=self.push_results, name="Result-Pusher"
503
+ pair_setup = threading.Event()
504
+
505
+ thr_task_puller = threading.Thread(
506
+ target=self.interchange_communicator,
507
+ args=(pair_setup,),
508
+ name="Interchange-Communicator",
498
509
  )
510
+ thr_result_ferry = threading.Thread(
511
+ target=self.ferry_result, args=(pair_setup,), name="Result-Shovel")
499
512
  thr_worker_watchdog = threading.Thread(
500
513
  target=self.worker_watchdog, args=(procs,), name="worker-watchdog"
501
514
  )
502
515
  thr_monitoring_handler = threading.Thread(
503
516
  target=self.handle_monitoring_messages, name="Monitoring-Handler"
504
517
  )
505
- thr_heartbeater = threading.Thread(target=self.heartbeater, name="Heartbeater")
506
518
 
507
519
  thr_task_puller.start()
508
- thr_result_pusher.start()
520
+ thr_result_ferry.start()
509
521
  thr_worker_watchdog.start()
510
522
  thr_monitoring_handler.start()
511
- thr_heartbeater.start()
512
523
 
513
524
  logger.info("Manager threads started")
514
525
 
@@ -520,11 +531,10 @@ class Manager:
520
531
  self.monitoring_queue.put(None)
521
532
  self.pending_result_queue.put(None)
522
533
 
523
- thr_heartbeater.join()
524
- thr_task_puller.join()
525
- thr_result_pusher.join()
526
- thr_worker_watchdog.join()
527
534
  thr_monitoring_handler.join()
535
+ thr_worker_watchdog.join()
536
+ thr_result_ferry.join()
537
+ thr_task_puller.join()
528
538
 
529
539
  for worker_id in procs:
530
540
  p = procs[worker_id]
@@ -862,10 +872,10 @@ def get_arg_parser() -> argparse.ArgumentParser:
862
872
  help="GB of memory assigned to each worker process. Default=0, no assignment",
863
873
  )
864
874
  parser.add_argument(
865
- "-t",
866
- "--task_port",
875
+ "-P",
876
+ "--port",
867
877
  required=True,
868
- help="Task port for receiving tasks from the interchange",
878
+ help="Port for communication with the interchange",
869
879
  )
870
880
  parser.add_argument(
871
881
  "--max_workers_per_node",
@@ -901,12 +911,6 @@ def get_arg_parser() -> argparse.ArgumentParser:
901
911
  parser.add_argument(
902
912
  "--poll", default=10, help="Poll period used in milliseconds"
903
913
  )
904
- parser.add_argument(
905
- "-r",
906
- "--result_port",
907
- required=True,
908
- help="Result port for posting results to the interchange",
909
- )
910
914
  parser.add_argument(
911
915
  "--cpu-affinity",
912
916
  type=strategyorlist,
@@ -953,8 +957,7 @@ if __name__ == "__main__":
953
957
  f"\n Block ID: {args.block_id}"
954
958
  f"\n cores_per_worker: {args.cores_per_worker}"
955
959
  f"\n mem_per_worker: {args.mem_per_worker}"
956
- f"\n task_port: {args.task_port}"
957
- f"\n result_port: {args.result_port}"
960
+ f"\n Interchange port: {args.port}"
958
961
  f"\n addresses: {args.addresses}"
959
962
  f"\n max_workers_per_node: {args.max_workers_per_node}"
960
963
  f"\n poll_period: {args.poll}"
@@ -969,8 +972,7 @@ if __name__ == "__main__":
969
972
  f"\n mpi_launcher: {args.mpi_launcher}"
970
973
  )
971
974
  try:
972
- manager = Manager(task_port=args.task_port,
973
- result_port=args.result_port,
975
+ manager = Manager(port=args.port,
974
976
  addresses=args.addresses,
975
977
  address_probe_timeout=int(args.address_probe_timeout),
976
978
  uid=args.uid,
@@ -40,6 +40,8 @@ from parsl.executors.taskvine.factory_config import TaskVineFactoryConfig
40
40
  from parsl.executors.taskvine.manager import _taskvine_submit_wait
41
41
  from parsl.executors.taskvine.manager_config import TaskVineManagerConfig
42
42
  from parsl.executors.taskvine.utils import ParslFileToVine, ParslTaskToVine
43
+ from parsl.monitoring.radios.base import RadioConfig
44
+ from parsl.monitoring.radios.filesystem import FilesystemRadio
43
45
  from parsl.multiprocessing import SpawnContext
44
46
  from parsl.process_loggers import wrap_with_logs
45
47
  from parsl.providers import CondorProvider, LocalProvider
@@ -98,8 +100,6 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
98
100
  Default is None.
99
101
  """
100
102
 
101
- radio_mode = "filesystem"
102
-
103
103
  @typeguard.typechecked
104
104
  def __init__(self,
105
105
  label: str = "TaskVineExecutor",
@@ -108,7 +108,8 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
108
108
  manager_config: TaskVineManagerConfig = TaskVineManagerConfig(),
109
109
  factory_config: TaskVineFactoryConfig = TaskVineFactoryConfig(),
110
110
  provider: Optional[ExecutionProvider] = LocalProvider(init_blocks=1),
111
- storage_access: Optional[List[Staging]] = None):
111
+ storage_access: Optional[List[Staging]] = None,
112
+ remote_monitoring_radio: Optional[RadioConfig] = None):
112
113
 
113
114
  # Set worker launch option for this executor
114
115
  if worker_launch_method == 'factory' or worker_launch_method == 'manual':
@@ -134,6 +135,11 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
134
135
  self.factory_config = factory_config
135
136
  self.storage_access = storage_access
136
137
 
138
+ if remote_monitoring_radio is not None:
139
+ self.remote_monitoring_radio = remote_monitoring_radio
140
+ else:
141
+ self.remote_monitoring_radio = FilesystemRadio()
142
+
137
143
  # Queue to send ready tasks from TaskVine executor process to TaskVine manager process
138
144
  self._ready_task_queue: multiprocessing.Queue = SpawnContext.Queue()
139
145
 
@@ -6,6 +6,7 @@ import shutil
6
6
  import subprocess
7
7
  import uuid
8
8
 
9
+ import parsl
9
10
  from parsl.executors.taskvine import exec_parsl_function
10
11
  from parsl.executors.taskvine.utils import VineTaskToParsl, run_parsl_function
11
12
  from parsl.process_loggers import wrap_with_logs
@@ -255,7 +256,8 @@ def _taskvine_submit_wait(ready_task_queue=None,
255
256
  run_parsl_function,
256
257
  poncho_env=poncho_env_path,
257
258
  init_command=manager_config.init_command,
258
- add_env=add_env)
259
+ add_env=add_env,
260
+ hoisting_modules=[parsl.serialize, run_parsl_function])
259
261
 
260
262
  # Configure the library if provided
261
263
  if manager_config.library_config:
@@ -7,6 +7,8 @@ import typeguard
7
7
  from parsl.data_provider.staging import Staging
8
8
  from parsl.executors.base import ParslExecutor
9
9
  from parsl.executors.errors import InvalidResourceSpecification
10
+ from parsl.monitoring.radios.base import RadioConfig
11
+ from parsl.monitoring.radios.multiprocessing import MultiprocessingQueueRadio
10
12
  from parsl.utils import RepresentationMixin
11
13
 
12
14
  logger = logging.getLogger(__name__)
@@ -28,7 +30,7 @@ class ThreadPoolExecutor(ParslExecutor, RepresentationMixin):
28
30
  @typeguard.typechecked
29
31
  def __init__(self, label: str = 'threads', max_threads: Optional[int] = 2,
30
32
  thread_name_prefix: str = '', storage_access: Optional[List[Staging]] = None,
31
- working_dir: Optional[str] = None):
33
+ working_dir: Optional[str] = None, remote_monitoring_radio: Optional[RadioConfig] = None):
32
34
  ParslExecutor.__init__(self)
33
35
  self.label = label
34
36
  self.max_threads = max_threads
@@ -40,6 +42,11 @@ class ThreadPoolExecutor(ParslExecutor, RepresentationMixin):
40
42
  self.storage_access = storage_access
41
43
  self.working_dir = working_dir
42
44
 
45
+ if remote_monitoring_radio is not None:
46
+ self.remote_monitoring_radio = remote_monitoring_radio
47
+ else:
48
+ self.remote_monitoring_radio = MultiprocessingQueueRadio()
49
+
43
50
  def start(self):
44
51
  self.executor = cf.ThreadPoolExecutor(max_workers=self.max_threads,
45
52
  thread_name_prefix=self.thread_name_prefix)
@@ -31,6 +31,8 @@ from parsl.errors import OptionalModuleMissing
31
31
  from parsl.executors.errors import ExecutorError, InvalidResourceSpecification
32
32
  from parsl.executors.status_handling import BlockProviderExecutor
33
33
  from parsl.executors.workqueue import exec_parsl_function
34
+ from parsl.monitoring.radios.base import RadioConfig
35
+ from parsl.monitoring.radios.filesystem import FilesystemRadio
34
36
  from parsl.multiprocessing import SpawnContext, SpawnProcess
35
37
  from parsl.process_loggers import wrap_with_logs
36
38
  from parsl.providers import CondorProvider, LocalProvider
@@ -227,8 +229,6 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
227
229
  specifiation for each task).
228
230
  """
229
231
 
230
- radio_mode = "filesystem"
231
-
232
232
  @typeguard.typechecked
233
233
  def __init__(self,
234
234
  label: str = "WorkQueueExecutor",
@@ -255,7 +255,8 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
255
255
  worker_executable: str = 'work_queue_worker',
256
256
  function_dir: Optional[str] = None,
257
257
  coprocess: bool = False,
258
- scaling_cores_per_worker: int = 1):
258
+ scaling_cores_per_worker: int = 1,
259
+ remote_monitoring_radio: Optional[RadioConfig] = None):
259
260
  BlockProviderExecutor.__init__(self, provider=provider,
260
261
  block_error_handler=True)
261
262
  if not _work_queue_enabled:
@@ -308,6 +309,11 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
308
309
  if self.init_command != "":
309
310
  self.launch_cmd = self.init_command + "; " + self.launch_cmd
310
311
 
312
+ if remote_monitoring_radio is not None:
313
+ self.remote_monitoring_radio = remote_monitoring_radio
314
+ else:
315
+ self.remote_monitoring_radio = FilesystemRadio()
316
+
311
317
  def _get_launch_command(self, block_id):
312
318
  # this executor uses different terminology for worker/launch
313
319
  # commands than in htex
@@ -4,3 +4,8 @@ from parsl.errors import ParslError
4
4
  class MonitoringRouterStartError(ParslError):
5
5
  def __str__(self) -> str:
6
6
  return "Monitoring router failed to start"
7
+
8
+
9
+ class RadioRequiredError(ParslError):
10
+ def __str__(self) -> str:
11
+ return "A radio must be configured for remote task monitoring"
@@ -9,8 +9,6 @@ from typing import Any, Optional, Union
9
9
 
10
10
  import typeguard
11
11
 
12
- from parsl.monitoring.radios.filesystem_router import start_filesystem_receiver
13
- from parsl.monitoring.radios.udp_router import start_udp_receiver
14
12
  from parsl.monitoring.types import TaggedMonitoringMessage
15
13
  from parsl.multiprocessing import (
16
14
  SizedQueue,
@@ -36,9 +34,9 @@ logger = logging.getLogger(__name__)
36
34
  @typeguard.typechecked
37
35
  class MonitoringHub(RepresentationMixin):
38
36
  def __init__(self,
39
- hub_address: str,
40
- hub_port: Optional[int] = None,
41
- hub_port_range: Any = None,
37
+ hub_address: Any = None, # unused, so no type enforcement
38
+ hub_port_range: Any = None, # unused, so no type enforcement
39
+ hub_port: Any = None, # unused, so no type enforcement
42
40
 
43
41
  workflow_name: Optional[str] = None,
44
42
  workflow_version: Optional[str] = None,
@@ -49,16 +47,14 @@ class MonitoringHub(RepresentationMixin):
49
47
  """
50
48
  Parameters
51
49
  ----------
52
- hub_address : str
53
- The ip address at which the workers will be able to reach the Hub.
54
- hub_port : int
55
- The UDP port to which workers will be able to deliver messages to
56
- the monitoring router.
57
- Note that despite the similar name, this is not related to
58
- hub_port_range.
59
- Default: None
50
+ hub_address : unused
51
+ hub_port : unused
52
+ Unused, but probably retained until 2026-06-01 to give deprecation warning.
53
+ These two values previously configured UDP parameters when UDP was used
54
+ for monitoring messages from workers. These are now configured on the
55
+ relevant UDPRadio.
60
56
  hub_port_range : unused
61
- Unused, but retained until 2025-09-14 to avoid configuration errors.
57
+ Unused, but probably retained until 2026-06-01 to give deprecation warning.
62
58
  This value previously configured one ZMQ channel inside the
63
59
  HighThroughputExecutor. That ZMQ channel is now configured by the
64
60
  interchange_port_range parameter of HighThroughputExecutor.
@@ -86,15 +82,27 @@ class MonitoringHub(RepresentationMixin):
86
82
  if _db_manager_excepts:
87
83
  raise _db_manager_excepts
88
84
 
85
+ # The following three parameters need to exist as attributes to be
86
+ # output by RepresentationMixin.
87
+ if hub_address is not None:
88
+ message = "Instead of MonitoringHub.hub_address, specify UDPRadio(address=...)"
89
+ warnings.warn(message, DeprecationWarning)
90
+ logger.warning(message)
91
+
89
92
  self.hub_address = hub_address
93
+
94
+ if hub_port is not None:
95
+ message = "Instead of MonitoringHub.hub_port, specify UDPRadio(port=...)"
96
+ warnings.warn(message, DeprecationWarning)
97
+ logger.warning(message)
98
+
90
99
  self.hub_port = hub_port
91
100
 
92
101
  if hub_port_range is not None:
93
102
  message = "Instead of MonitoringHub.hub_port_range, Use HighThroughputExecutor.interchange_port_range"
94
103
  warnings.warn(message, DeprecationWarning)
95
104
  logger.warning(message)
96
- # This is used by RepresentationMixin so needs to exist as an attribute
97
- # even though now it is otherwise unused.
105
+
98
106
  self.hub_port_range = hub_port_range
99
107
 
100
108
  self.logging_endpoint = logging_endpoint
@@ -120,12 +128,6 @@ class MonitoringHub(RepresentationMixin):
120
128
  self.resource_msgs: Queue[TaggedMonitoringMessage]
121
129
  self.resource_msgs = SizedQueue()
122
130
 
123
- self.udp_receiver = start_udp_receiver(debug=self.monitoring_debug,
124
- logdir=dfk_run_dir,
125
- monitoring_messages=self.resource_msgs,
126
- port=self.hub_port
127
- )
128
-
129
131
  self.dbm_exit_event: ms.Event
130
132
  self.dbm_exit_event = SpawnEvent()
131
133
 
@@ -140,37 +142,18 @@ class MonitoringHub(RepresentationMixin):
140
142
  daemon=True,
141
143
  )
142
144
  self.dbm_proc.start()
143
- logger.info("Started UDP router process %s and DBM process %s",
144
- self.udp_receiver.process.pid, self.dbm_proc.pid)
145
-
146
- self.filesystem_receiver = start_filesystem_receiver(debug=self.monitoring_debug,
147
- logdir=dfk_run_dir,
148
- monitoring_messages=self.resource_msgs
149
- )
150
- logger.info("Started filesystem radio receiver process %s", self.filesystem_receiver.process.pid)
151
-
152
- self.monitoring_hub_url = "udp://{}:{}".format(self.hub_address, self.udp_receiver.port)
153
-
145
+ logger.info("Started DBM process %s", self.dbm_proc.pid)
154
146
  logger.info("Monitoring Hub initialized")
155
147
 
156
148
  def close(self) -> None:
157
149
  logger.info("Terminating Monitoring Hub")
158
150
  if self.monitoring_hub_active:
159
151
  self.monitoring_hub_active = False
160
- logger.info("Setting router termination event")
161
-
162
- logger.info("Waiting for UDP router to terminate")
163
- self.udp_receiver.close()
164
-
165
- logger.debug("Finished waiting for router termination")
166
152
  logger.debug("Waiting for DB termination")
167
153
  self.dbm_exit_event.set()
168
154
  join_terminate_close_proc(self.dbm_proc)
169
155
  logger.debug("Finished waiting for DBM termination")
170
156
 
171
- logger.info("Terminating filesystem radio receiver process")
172
- self.filesystem_receiver.close()
173
-
174
157
  logger.info("Closing monitoring multiprocessing queues")
175
158
  self.resource_msgs.close()
176
159
  self.resource_msgs.join_thread()