parsl 2025.6.16__py3-none-any.whl → 2025.6.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsl/configs/osg.py +1 -1
- parsl/dataflow/dflow.py +14 -4
- parsl/executors/base.py +19 -9
- parsl/executors/flux/executor.py +2 -0
- parsl/executors/globus_compute.py +2 -0
- parsl/executors/high_throughput/executor.py +22 -15
- parsl/executors/high_throughput/interchange.py +173 -191
- parsl/executors/high_throughput/mpi_executor.py +14 -4
- parsl/executors/high_throughput/probe.py +4 -4
- parsl/executors/high_throughput/process_worker_pool.py +88 -94
- parsl/executors/radical/executor.py +3 -0
- parsl/executors/taskvine/executor.py +11 -3
- parsl/executors/taskvine/manager.py +3 -1
- parsl/executors/threads.py +19 -3
- parsl/executors/workqueue/executor.py +11 -3
- parsl/monitoring/errors.py +4 -4
- parsl/monitoring/monitoring.py +26 -88
- parsl/monitoring/radios/base.py +63 -2
- parsl/monitoring/radios/filesystem.py +19 -4
- parsl/monitoring/radios/filesystem_router.py +22 -3
- parsl/monitoring/radios/htex.py +22 -13
- parsl/monitoring/radios/multiprocessing.py +22 -2
- parsl/monitoring/radios/udp.py +57 -19
- parsl/monitoring/radios/udp_router.py +119 -25
- parsl/monitoring/radios/zmq_router.py +9 -10
- parsl/monitoring/remote.py +19 -40
- parsl/providers/local/local.py +12 -13
- parsl/tests/configs/htex_local_alternate.py +0 -1
- parsl/tests/conftest.py +7 -4
- parsl/tests/test_htex/test_interchange_exit_bad_registration.py +5 -7
- parsl/tests/test_htex/test_zmq_binding.py +5 -6
- parsl/tests/test_monitoring/test_basic.py +12 -10
- parsl/tests/test_monitoring/{test_fuzz_zmq.py → test_htex_fuzz_zmq.py} +7 -2
- parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +0 -1
- parsl/tests/test_monitoring/test_radio_filesystem.py +48 -0
- parsl/tests/test_monitoring/test_radio_multiprocessing.py +44 -0
- parsl/tests/test_monitoring/test_radio_udp.py +204 -0
- parsl/tests/test_monitoring/test_stdouterr.py +1 -3
- parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +3 -7
- parsl/tests/test_shutdown/test_kill_monitoring.py +1 -1
- parsl/version.py +1 -1
- {parsl-2025.6.16.data → parsl-2025.6.30.data}/scripts/interchange.py +173 -191
- {parsl-2025.6.16.data → parsl-2025.6.30.data}/scripts/process_worker_pool.py +88 -94
- {parsl-2025.6.16.dist-info → parsl-2025.6.30.dist-info}/METADATA +2 -2
- {parsl-2025.6.16.dist-info → parsl-2025.6.30.dist-info}/RECORD +51 -50
- parsl/tests/configs/local_threads_monitoring.py +0 -10
- parsl/tests/manual_tests/test_udp_simple.py +0 -51
- {parsl-2025.6.16.data → parsl-2025.6.30.data}/scripts/exec_parsl_function.py +0 -0
- {parsl-2025.6.16.data → parsl-2025.6.30.data}/scripts/parsl_coprocess.py +0 -0
- {parsl-2025.6.16.dist-info → parsl-2025.6.30.dist-info}/LICENSE +0 -0
- {parsl-2025.6.16.dist-info → parsl-2025.6.30.dist-info}/WHEEL +0 -0
- {parsl-2025.6.16.dist-info → parsl-2025.6.30.dist-info}/entry_points.txt +0 -0
- {parsl-2025.6.16.dist-info → parsl-2025.6.30.dist-info}/top_level.txt +0 -0
parsl/configs/osg.py
CHANGED
parsl/dataflow/dflow.py
CHANGED
@@ -44,6 +44,7 @@ from parsl.executors.status_handling import BlockProviderExecutor
|
|
44
44
|
from parsl.executors.threads import ThreadPoolExecutor
|
45
45
|
from parsl.jobs.job_status_poller import JobStatusPoller
|
46
46
|
from parsl.monitoring import MonitoringHub
|
47
|
+
from parsl.monitoring.errors import RadioRequiredError
|
47
48
|
from parsl.monitoring.message_type import MessageType
|
48
49
|
from parsl.monitoring.radios.multiprocessing import MultiprocessingQueueRadioSender
|
49
50
|
from parsl.monitoring.remote import monitor_wrapper
|
@@ -186,7 +187,8 @@ class DataFlowKernel:
|
|
186
187
|
self.executors: Dict[str, ParslExecutor] = {}
|
187
188
|
|
188
189
|
self.data_manager = DataManager(self)
|
189
|
-
parsl_internal_executor = ThreadPoolExecutor(max_threads=config.internal_tasks_max_threads,
|
190
|
+
parsl_internal_executor = ThreadPoolExecutor(max_threads=config.internal_tasks_max_threads,
|
191
|
+
label='_parsl_internal')
|
190
192
|
self.add_executors(config.executors)
|
191
193
|
self.add_executors([parsl_internal_executor])
|
192
194
|
|
@@ -736,17 +738,19 @@ class DataFlowKernel:
|
|
736
738
|
try_id = task_record['fail_count']
|
737
739
|
|
738
740
|
if self.monitoring is not None and self.monitoring.resource_monitoring_enabled:
|
741
|
+
if executor.remote_monitoring_radio is None:
|
742
|
+
raise RadioRequiredError()
|
743
|
+
|
739
744
|
wrapper_logging_level = logging.DEBUG if self.monitoring.monitoring_debug else logging.INFO
|
740
745
|
(function, args, kwargs) = monitor_wrapper(f=function,
|
741
746
|
args=args,
|
742
747
|
kwargs=kwargs,
|
743
748
|
x_try_id=try_id,
|
744
749
|
x_task_id=task_id,
|
745
|
-
|
750
|
+
radio_config=executor.remote_monitoring_radio,
|
746
751
|
run_id=self.run_id,
|
747
752
|
logging_level=wrapper_logging_level,
|
748
753
|
sleep_dur=self.monitoring.resource_monitoring_interval,
|
749
|
-
radio_mode=executor.radio_mode,
|
750
754
|
monitor_resources=executor.monitor_resources(),
|
751
755
|
run_dir=self.run_dir)
|
752
756
|
|
@@ -1131,8 +1135,14 @@ class DataFlowKernel:
|
|
1131
1135
|
for executor in executors:
|
1132
1136
|
executor.run_id = self.run_id
|
1133
1137
|
executor.run_dir = self.run_dir
|
1134
|
-
if self.monitoring:
|
1138
|
+
if self.monitoring and executor.remote_monitoring_radio is not None:
|
1135
1139
|
executor.monitoring_messages = self.monitoring.resource_msgs
|
1140
|
+
logger.debug("Starting monitoring receiver for executor %s "
|
1141
|
+
"with remote monitoring radio config %s",
|
1142
|
+
executor, executor.remote_monitoring_radio)
|
1143
|
+
|
1144
|
+
executor.monitoring_receiver = executor.remote_monitoring_radio.create_receiver(resource_msgs=executor.monitoring_messages,
|
1145
|
+
run_dir=executor.run_dir)
|
1136
1146
|
if hasattr(executor, 'provider'):
|
1137
1147
|
if hasattr(executor.provider, 'script_dir'):
|
1138
1148
|
executor.provider.script_dir = os.path.join(self.run_dir, 'submit_scripts')
|
parsl/executors/base.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import logging
|
3
4
|
import os
|
4
5
|
from abc import ABCMeta, abstractmethod
|
5
6
|
from concurrent.futures import Future
|
@@ -8,8 +9,11 @@ from typing import Any, Callable, Dict, Optional
|
|
8
9
|
|
9
10
|
from typing_extensions import Literal, Self
|
10
11
|
|
12
|
+
from parsl.monitoring.radios.base import MonitoringRadioReceiver, RadioConfig
|
11
13
|
from parsl.monitoring.types import TaggedMonitoringMessage
|
12
14
|
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
13
17
|
|
14
18
|
class ParslExecutor(metaclass=ABCMeta):
|
15
19
|
"""Executors are abstractions that represent available compute resources
|
@@ -27,10 +31,8 @@ class ParslExecutor(metaclass=ABCMeta):
|
|
27
31
|
label: str - a human readable label for the executor, unique
|
28
32
|
with respect to other executors.
|
29
33
|
|
30
|
-
|
31
|
-
|
32
|
-
radio_mode: str - a string describing which radio mode should be used to
|
33
|
-
send task resource data back to the submit side.
|
34
|
+
remote_monitoring_radio: RadioConfig describing how tasks on this executor
|
35
|
+
should report task resource status
|
34
36
|
|
35
37
|
An executor may optionally expose:
|
36
38
|
|
@@ -55,7 +57,6 @@ class ParslExecutor(metaclass=ABCMeta):
|
|
55
57
|
"""
|
56
58
|
|
57
59
|
label: str = "undefined"
|
58
|
-
radio_mode: str = "udp"
|
59
60
|
|
60
61
|
def __init__(
|
61
62
|
self,
|
@@ -65,6 +66,10 @@ class ParslExecutor(metaclass=ABCMeta):
|
|
65
66
|
run_id: Optional[str] = None,
|
66
67
|
):
|
67
68
|
self.monitoring_messages = monitoring_messages
|
69
|
+
|
70
|
+
self.remote_monitoring_radio: Optional[RadioConfig] = None
|
71
|
+
self.monitoring_receiver: Optional[MonitoringRadioReceiver] = None
|
72
|
+
|
68
73
|
self.run_dir = os.path.abspath(run_dir)
|
69
74
|
self.run_id = run_id
|
70
75
|
|
@@ -98,15 +103,20 @@ class ParslExecutor(metaclass=ABCMeta):
|
|
98
103
|
def shutdown(self) -> None:
|
99
104
|
"""Shutdown the executor.
|
100
105
|
|
101
|
-
|
106
|
+
Executors should call super().shutdown() as part of their overridden
|
107
|
+
implementation.
|
102
108
|
"""
|
103
|
-
|
109
|
+
if self.monitoring_receiver is not None:
|
110
|
+
logger.info("Starting monitoring receiver shutdown")
|
111
|
+
self.monitoring_receiver.shutdown()
|
112
|
+
logger.info("Done with monitoring receiver shutdown")
|
104
113
|
|
105
114
|
def monitor_resources(self) -> bool:
|
106
115
|
"""Should resource monitoring happen for tasks on running on this executor?
|
107
116
|
|
108
|
-
Parsl resource monitoring conflicts with execution styles which
|
109
|
-
|
117
|
+
Parsl resource monitoring conflicts with execution styles which do
|
118
|
+
not directly use a process tree - for example, the ThreadPoolExecutor
|
119
|
+
and the MPIExecutor.
|
110
120
|
|
111
121
|
This function allows resource monitoring to be disabled per executor implementation.
|
112
122
|
"""
|
parsl/executors/flux/executor.py
CHANGED
@@ -134,3 +134,5 @@ class GlobusComputeExecutor(ParslExecutor, RepresentationMixin):
|
|
134
134
|
self.executor.shutdown(wait=False, cancel_futures=True)
|
135
135
|
result_watcher = self.executor._get_result_watcher()
|
136
136
|
result_watcher.shutdown(wait=False, cancel_futures=True)
|
137
|
+
|
138
|
+
super().shutdown()
|
@@ -29,6 +29,8 @@ from parsl.executors.high_throughput.manager_selector import (
|
|
29
29
|
)
|
30
30
|
from parsl.executors.status_handling import BlockProviderExecutor
|
31
31
|
from parsl.jobs.states import TERMINAL_STATES, JobState, JobStatus
|
32
|
+
from parsl.monitoring.radios.base import RadioConfig
|
33
|
+
from parsl.monitoring.radios.htex import HTEXRadio
|
32
34
|
from parsl.monitoring.radios.zmq_router import ZMQRadioReceiver, start_zmq_receiver
|
33
35
|
from parsl.process_loggers import wrap_with_logs
|
34
36
|
from parsl.providers import LocalProvider
|
@@ -46,8 +48,7 @@ DEFAULT_LAUNCH_CMD = ("process_worker_pool.py {debug} {max_workers_per_node} "
|
|
46
48
|
"-c {cores_per_worker} "
|
47
49
|
"-m {mem_per_worker} "
|
48
50
|
"--poll {poll_period} "
|
49
|
-
"--
|
50
|
-
"--result_port={result_port} "
|
51
|
+
"--port={worker_port} "
|
51
52
|
"--cert_dir {cert_dir} "
|
52
53
|
"--logdir={logdir} "
|
53
54
|
"--block_id={{block_id}} "
|
@@ -100,8 +101,8 @@ GENERAL_HTEX_PARAM_DOCS = """provider : :class:`~parsl.providers.base.ExecutionP
|
|
100
101
|
Supports IPv4 and IPv6 addresses
|
101
102
|
default=127.0.0.1
|
102
103
|
|
103
|
-
|
104
|
-
Specify the
|
104
|
+
worker_port : int
|
105
|
+
Specify the port to be used by workers to connect to Parsl. If this option is specified,
|
105
106
|
worker_port_range will not be honored.
|
106
107
|
|
107
108
|
worker_port_range : (int, int)
|
@@ -241,7 +242,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
|
|
241
242
|
interchange_launch_cmd: Optional[Sequence[str]] = None,
|
242
243
|
address: Optional[str] = None,
|
243
244
|
loopback_address: str = "127.0.0.1",
|
244
|
-
|
245
|
+
worker_port: Optional[int] = None,
|
245
246
|
worker_port_range: Optional[Tuple[int, int]] = (54000, 55000),
|
246
247
|
interchange_port_range: Optional[Tuple[int, int]] = (55000, 56000),
|
247
248
|
storage_access: Optional[List[Staging]] = None,
|
@@ -261,7 +262,8 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
|
|
261
262
|
worker_logdir_root: Optional[str] = None,
|
262
263
|
manager_selector: ManagerSelector = RandomManagerSelector(),
|
263
264
|
block_error_handler: Union[bool, Callable[[BlockProviderExecutor, Dict[str, JobStatus]], None]] = True,
|
264
|
-
encrypted: bool = False
|
265
|
+
encrypted: bool = False,
|
266
|
+
remote_monitoring_radio: Optional[RadioConfig] = None):
|
265
267
|
|
266
268
|
logger.debug("Initializing HighThroughputExecutor")
|
267
269
|
|
@@ -310,7 +312,13 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
|
|
310
312
|
self._workers_per_node = 1 # our best guess-- we do not have any provider hints
|
311
313
|
|
312
314
|
self._task_counter = 0
|
313
|
-
|
315
|
+
|
316
|
+
if remote_monitoring_radio is not None:
|
317
|
+
self.remote_monitoring_radio = remote_monitoring_radio
|
318
|
+
else:
|
319
|
+
self.remote_monitoring_radio = HTEXRadio()
|
320
|
+
|
321
|
+
self.worker_port = worker_port
|
314
322
|
self.worker_port_range = worker_port_range
|
315
323
|
self.interchange_proc: Optional[subprocess.Popen] = None
|
316
324
|
self.interchange_port_range = interchange_port_range
|
@@ -339,7 +347,6 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
|
|
339
347
|
self.zmq_monitoring = None
|
340
348
|
self.hub_zmq_port = None
|
341
349
|
|
342
|
-
radio_mode = "htex"
|
343
350
|
enable_mpi_mode: bool = False
|
344
351
|
mpi_launcher: str = "mpiexec"
|
345
352
|
|
@@ -390,8 +397,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
|
|
390
397
|
prefetch_capacity=self.prefetch_capacity,
|
391
398
|
address_probe_timeout_string=address_probe_timeout_string,
|
392
399
|
addresses=self.all_addresses,
|
393
|
-
|
394
|
-
result_port=self.worker_result_port,
|
400
|
+
worker_port=self.worker_port,
|
395
401
|
cores_per_worker=self.cores_per_worker,
|
396
402
|
mem_per_worker=self.mem_per_worker,
|
397
403
|
max_workers_per_node=max_workers_per_node,
|
@@ -551,7 +557,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
|
|
551
557
|
self.incoming_q.port,
|
552
558
|
self.command_client.port),
|
553
559
|
"interchange_address": self.address,
|
554
|
-
"
|
560
|
+
"worker_port": self.worker_port,
|
555
561
|
"worker_port_range": self.worker_port_range,
|
556
562
|
"hub_address": self.loopback_address,
|
557
563
|
"hub_zmq_port": self.hub_zmq_port,
|
@@ -576,15 +582,14 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
|
|
576
582
|
stdin.close()
|
577
583
|
logger.debug("Sent config object. Requesting worker ports")
|
578
584
|
try:
|
579
|
-
|
585
|
+
self.worker_port = self.command_client.run("WORKER_BINDS", timeout_s=120)
|
580
586
|
except CommandClientTimeoutError:
|
581
587
|
logger.error("Interchange has not completed initialization. Aborting")
|
582
588
|
raise Exception("Interchange failed to start")
|
583
589
|
logger.debug(
|
584
|
-
"Interchange process started (%r). Worker
|
590
|
+
"Interchange process started (%r). Worker port: %d",
|
585
591
|
self.interchange_proc,
|
586
|
-
self.
|
587
|
-
self.worker_result_port
|
592
|
+
self.worker_port,
|
588
593
|
)
|
589
594
|
|
590
595
|
def _start_result_queue_thread(self):
|
@@ -881,6 +886,8 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
|
|
881
886
|
if self.zmq_monitoring:
|
882
887
|
self.zmq_monitoring.close()
|
883
888
|
|
889
|
+
super().shutdown()
|
890
|
+
|
884
891
|
logger.info("Finished HighThroughputExecutor shutdown attempt")
|
885
892
|
|
886
893
|
def get_usage_information(self):
|