parsl 2025.6.23__py3-none-any.whl → 2025.6.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. parsl/configs/osg.py +1 -1
  2. parsl/dataflow/dflow.py +14 -4
  3. parsl/executors/base.py +14 -6
  4. parsl/executors/high_throughput/executor.py +20 -15
  5. parsl/executors/high_throughput/interchange.py +173 -191
  6. parsl/executors/high_throughput/mpi_executor.py +7 -4
  7. parsl/executors/high_throughput/probe.py +4 -4
  8. parsl/executors/high_throughput/process_worker_pool.py +88 -94
  9. parsl/executors/taskvine/executor.py +9 -3
  10. parsl/executors/taskvine/manager.py +3 -1
  11. parsl/executors/threads.py +8 -1
  12. parsl/executors/workqueue/executor.py +9 -3
  13. parsl/monitoring/errors.py +5 -0
  14. parsl/monitoring/monitoring.py +25 -42
  15. parsl/monitoring/radios/base.py +63 -2
  16. parsl/monitoring/radios/filesystem.py +18 -3
  17. parsl/monitoring/radios/filesystem_router.py +13 -26
  18. parsl/monitoring/radios/htex.py +22 -13
  19. parsl/monitoring/radios/multiprocessing.py +22 -2
  20. parsl/monitoring/radios/udp.py +57 -19
  21. parsl/monitoring/radios/udp_router.py +49 -15
  22. parsl/monitoring/remote.py +19 -40
  23. parsl/providers/local/local.py +12 -13
  24. parsl/tests/configs/htex_local_alternate.py +0 -1
  25. parsl/tests/test_htex/test_interchange_exit_bad_registration.py +5 -7
  26. parsl/tests/test_htex/test_zmq_binding.py +5 -6
  27. parsl/tests/test_monitoring/test_basic.py +12 -10
  28. parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +0 -1
  29. parsl/tests/test_monitoring/test_radio_filesystem.py +7 -9
  30. parsl/tests/test_monitoring/test_radio_multiprocessing.py +44 -0
  31. parsl/tests/test_monitoring/test_radio_udp.py +163 -12
  32. parsl/tests/test_monitoring/test_stdouterr.py +1 -3
  33. parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +3 -7
  34. parsl/version.py +1 -1
  35. {parsl-2025.6.23.data → parsl-2025.6.30.data}/scripts/interchange.py +173 -191
  36. {parsl-2025.6.23.data → parsl-2025.6.30.data}/scripts/process_worker_pool.py +88 -94
  37. {parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/METADATA +2 -2
  38. {parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/RECORD +44 -43
  39. {parsl-2025.6.23.data → parsl-2025.6.30.data}/scripts/exec_parsl_function.py +0 -0
  40. {parsl-2025.6.23.data → parsl-2025.6.30.data}/scripts/parsl_coprocess.py +0 -0
  41. {parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/LICENSE +0 -0
  42. {parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/WHEEL +0 -0
  43. {parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/entry_points.txt +0 -0
  44. {parsl-2025.6.23.dist-info → parsl-2025.6.30.dist-info}/top_level.txt +0 -0
parsl/configs/osg.py CHANGED
@@ -25,7 +25,7 @@ python3 -m venv parsl_env; source parsl_env/bin/activate; python3 -m pip install
25
25
  walltime="00:20:00",
26
26
  ),
27
27
  worker_logdir_root='$OSG_WN_TMP',
28
- worker_ports=(31000, 31001)
28
+ worker_port=31000,
29
29
  )
30
30
  ],
31
31
  usage_tracking=LEVEL_1,
parsl/dataflow/dflow.py CHANGED
@@ -44,6 +44,7 @@ from parsl.executors.status_handling import BlockProviderExecutor
44
44
  from parsl.executors.threads import ThreadPoolExecutor
45
45
  from parsl.jobs.job_status_poller import JobStatusPoller
46
46
  from parsl.monitoring import MonitoringHub
47
+ from parsl.monitoring.errors import RadioRequiredError
47
48
  from parsl.monitoring.message_type import MessageType
48
49
  from parsl.monitoring.radios.multiprocessing import MultiprocessingQueueRadioSender
49
50
  from parsl.monitoring.remote import monitor_wrapper
@@ -186,7 +187,8 @@ class DataFlowKernel:
186
187
  self.executors: Dict[str, ParslExecutor] = {}
187
188
 
188
189
  self.data_manager = DataManager(self)
189
- parsl_internal_executor = ThreadPoolExecutor(max_threads=config.internal_tasks_max_threads, label='_parsl_internal')
190
+ parsl_internal_executor = ThreadPoolExecutor(max_threads=config.internal_tasks_max_threads,
191
+ label='_parsl_internal')
190
192
  self.add_executors(config.executors)
191
193
  self.add_executors([parsl_internal_executor])
192
194
 
@@ -736,17 +738,19 @@ class DataFlowKernel:
736
738
  try_id = task_record['fail_count']
737
739
 
738
740
  if self.monitoring is not None and self.monitoring.resource_monitoring_enabled:
741
+ if executor.remote_monitoring_radio is None:
742
+ raise RadioRequiredError()
743
+
739
744
  wrapper_logging_level = logging.DEBUG if self.monitoring.monitoring_debug else logging.INFO
740
745
  (function, args, kwargs) = monitor_wrapper(f=function,
741
746
  args=args,
742
747
  kwargs=kwargs,
743
748
  x_try_id=try_id,
744
749
  x_task_id=task_id,
745
- monitoring_hub_url=self.monitoring.monitoring_hub_url,
750
+ radio_config=executor.remote_monitoring_radio,
746
751
  run_id=self.run_id,
747
752
  logging_level=wrapper_logging_level,
748
753
  sleep_dur=self.monitoring.resource_monitoring_interval,
749
- radio_mode=executor.radio_mode,
750
754
  monitor_resources=executor.monitor_resources(),
751
755
  run_dir=self.run_dir)
752
756
 
@@ -1131,8 +1135,14 @@ class DataFlowKernel:
1131
1135
  for executor in executors:
1132
1136
  executor.run_id = self.run_id
1133
1137
  executor.run_dir = self.run_dir
1134
- if self.monitoring:
1138
+ if self.monitoring and executor.remote_monitoring_radio is not None:
1135
1139
  executor.monitoring_messages = self.monitoring.resource_msgs
1140
+ logger.debug("Starting monitoring receiver for executor %s "
1141
+ "with remote monitoring radio config %s",
1142
+ executor, executor.remote_monitoring_radio)
1143
+
1144
+ executor.monitoring_receiver = executor.remote_monitoring_radio.create_receiver(resource_msgs=executor.monitoring_messages,
1145
+ run_dir=executor.run_dir)
1136
1146
  if hasattr(executor, 'provider'):
1137
1147
  if hasattr(executor.provider, 'script_dir'):
1138
1148
  executor.provider.script_dir = os.path.join(self.run_dir, 'submit_scripts')
parsl/executors/base.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import logging
3
4
  import os
4
5
  from abc import ABCMeta, abstractmethod
5
6
  from concurrent.futures import Future
@@ -8,8 +9,11 @@ from typing import Any, Callable, Dict, Optional
8
9
 
9
10
  from typing_extensions import Literal, Self
10
11
 
12
+ from parsl.monitoring.radios.base import MonitoringRadioReceiver, RadioConfig
11
13
  from parsl.monitoring.types import TaggedMonitoringMessage
12
14
 
15
+ logger = logging.getLogger(__name__)
16
+
13
17
 
14
18
  class ParslExecutor(metaclass=ABCMeta):
15
19
  """Executors are abstractions that represent available compute resources
@@ -27,10 +31,8 @@ class ParslExecutor(metaclass=ABCMeta):
27
31
  label: str - a human readable label for the executor, unique
28
32
  with respect to other executors.
29
33
 
30
- Per-executor monitoring behaviour can be influenced by exposing:
31
-
32
- radio_mode: str - a string describing which radio mode should be used to
33
- send task resource data back to the submit side.
34
+ remote_monitoring_radio: RadioConfig describing how tasks on this executor
35
+ should report task resource status
34
36
 
35
37
  An executor may optionally expose:
36
38
 
@@ -55,7 +57,6 @@ class ParslExecutor(metaclass=ABCMeta):
55
57
  """
56
58
 
57
59
  label: str = "undefined"
58
- radio_mode: str = "udp"
59
60
 
60
61
  def __init__(
61
62
  self,
@@ -65,6 +66,10 @@ class ParslExecutor(metaclass=ABCMeta):
65
66
  run_id: Optional[str] = None,
66
67
  ):
67
68
  self.monitoring_messages = monitoring_messages
69
+
70
+ self.remote_monitoring_radio: Optional[RadioConfig] = None
71
+ self.monitoring_receiver: Optional[MonitoringRadioReceiver] = None
72
+
68
73
  self.run_dir = os.path.abspath(run_dir)
69
74
  self.run_id = run_id
70
75
 
@@ -101,7 +106,10 @@ class ParslExecutor(metaclass=ABCMeta):
101
106
  Executors should call super().shutdown() as part of their overridden
102
107
  implementation.
103
108
  """
104
- pass
109
+ if self.monitoring_receiver is not None:
110
+ logger.info("Starting monitoring receiver shutdown")
111
+ self.monitoring_receiver.shutdown()
112
+ logger.info("Done with monitoring receiver shutdown")
105
113
 
106
114
  def monitor_resources(self) -> bool:
107
115
  """Should resource monitoring happen for tasks on running on this executor?
@@ -29,6 +29,8 @@ from parsl.executors.high_throughput.manager_selector import (
29
29
  )
30
30
  from parsl.executors.status_handling import BlockProviderExecutor
31
31
  from parsl.jobs.states import TERMINAL_STATES, JobState, JobStatus
32
+ from parsl.monitoring.radios.base import RadioConfig
33
+ from parsl.monitoring.radios.htex import HTEXRadio
32
34
  from parsl.monitoring.radios.zmq_router import ZMQRadioReceiver, start_zmq_receiver
33
35
  from parsl.process_loggers import wrap_with_logs
34
36
  from parsl.providers import LocalProvider
@@ -46,8 +48,7 @@ DEFAULT_LAUNCH_CMD = ("process_worker_pool.py {debug} {max_workers_per_node} "
46
48
  "-c {cores_per_worker} "
47
49
  "-m {mem_per_worker} "
48
50
  "--poll {poll_period} "
49
- "--task_port={task_port} "
50
- "--result_port={result_port} "
51
+ "--port={worker_port} "
51
52
  "--cert_dir {cert_dir} "
52
53
  "--logdir={logdir} "
53
54
  "--block_id={{block_id}} "
@@ -100,8 +101,8 @@ GENERAL_HTEX_PARAM_DOCS = """provider : :class:`~parsl.providers.base.ExecutionP
100
101
  Supports IPv4 and IPv6 addresses
101
102
  default=127.0.0.1
102
103
 
103
- worker_ports : (int, int)
104
- Specify the ports to be used by workers to connect to Parsl. If this option is specified,
104
+ worker_port : int
105
+ Specify the port to be used by workers to connect to Parsl. If this option is specified,
105
106
  worker_port_range will not be honored.
106
107
 
107
108
  worker_port_range : (int, int)
@@ -241,7 +242,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
241
242
  interchange_launch_cmd: Optional[Sequence[str]] = None,
242
243
  address: Optional[str] = None,
243
244
  loopback_address: str = "127.0.0.1",
244
- worker_ports: Optional[Tuple[int, int]] = None,
245
+ worker_port: Optional[int] = None,
245
246
  worker_port_range: Optional[Tuple[int, int]] = (54000, 55000),
246
247
  interchange_port_range: Optional[Tuple[int, int]] = (55000, 56000),
247
248
  storage_access: Optional[List[Staging]] = None,
@@ -261,7 +262,8 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
261
262
  worker_logdir_root: Optional[str] = None,
262
263
  manager_selector: ManagerSelector = RandomManagerSelector(),
263
264
  block_error_handler: Union[bool, Callable[[BlockProviderExecutor, Dict[str, JobStatus]], None]] = True,
264
- encrypted: bool = False):
265
+ encrypted: bool = False,
266
+ remote_monitoring_radio: Optional[RadioConfig] = None):
265
267
 
266
268
  logger.debug("Initializing HighThroughputExecutor")
267
269
 
@@ -310,7 +312,13 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
310
312
  self._workers_per_node = 1 # our best guess-- we do not have any provider hints
311
313
 
312
314
  self._task_counter = 0
313
- self.worker_ports = worker_ports
315
+
316
+ if remote_monitoring_radio is not None:
317
+ self.remote_monitoring_radio = remote_monitoring_radio
318
+ else:
319
+ self.remote_monitoring_radio = HTEXRadio()
320
+
321
+ self.worker_port = worker_port
314
322
  self.worker_port_range = worker_port_range
315
323
  self.interchange_proc: Optional[subprocess.Popen] = None
316
324
  self.interchange_port_range = interchange_port_range
@@ -339,7 +347,6 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
339
347
  self.zmq_monitoring = None
340
348
  self.hub_zmq_port = None
341
349
 
342
- radio_mode = "htex"
343
350
  enable_mpi_mode: bool = False
344
351
  mpi_launcher: str = "mpiexec"
345
352
 
@@ -390,8 +397,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
390
397
  prefetch_capacity=self.prefetch_capacity,
391
398
  address_probe_timeout_string=address_probe_timeout_string,
392
399
  addresses=self.all_addresses,
393
- task_port=self.worker_task_port,
394
- result_port=self.worker_result_port,
400
+ worker_port=self.worker_port,
395
401
  cores_per_worker=self.cores_per_worker,
396
402
  mem_per_worker=self.mem_per_worker,
397
403
  max_workers_per_node=max_workers_per_node,
@@ -551,7 +557,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
551
557
  self.incoming_q.port,
552
558
  self.command_client.port),
553
559
  "interchange_address": self.address,
554
- "worker_ports": self.worker_ports,
560
+ "worker_port": self.worker_port,
555
561
  "worker_port_range": self.worker_port_range,
556
562
  "hub_address": self.loopback_address,
557
563
  "hub_zmq_port": self.hub_zmq_port,
@@ -576,15 +582,14 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
576
582
  stdin.close()
577
583
  logger.debug("Sent config object. Requesting worker ports")
578
584
  try:
579
- (self.worker_task_port, self.worker_result_port) = self.command_client.run("WORKER_PORTS", timeout_s=120)
585
+ self.worker_port = self.command_client.run("WORKER_BINDS", timeout_s=120)
580
586
  except CommandClientTimeoutError:
581
587
  logger.error("Interchange has not completed initialization. Aborting")
582
588
  raise Exception("Interchange failed to start")
583
589
  logger.debug(
584
- "Interchange process started (%r). Worker ports: %d, %d",
590
+ "Interchange process started (%r). Worker port: %d",
585
591
  self.interchange_proc,
586
- self.worker_task_port,
587
- self.worker_result_port
592
+ self.worker_port,
588
593
  )
589
594
 
590
595
  def _start_result_queue_thread(self):