parsl 2024.7.29__py3-none-any.whl → 2024.8.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. parsl/channels/__init__.py +1 -4
  2. parsl/channels/oauth_ssh/oauth_ssh.py +2 -2
  3. parsl/channels/ssh/ssh.py +1 -1
  4. parsl/channels/ssh_il/ssh_il.py +2 -2
  5. parsl/dataflow/dflow.py +2 -2
  6. parsl/executors/base.py +7 -7
  7. parsl/executors/high_throughput/executor.py +15 -7
  8. parsl/executors/high_throughput/interchange.py +40 -37
  9. parsl/executors/high_throughput/manager_selector.py +25 -0
  10. parsl/executors/status_handling.py +38 -24
  11. parsl/executors/taskvine/executor.py +2 -0
  12. parsl/executors/workqueue/executor.py +2 -0
  13. parsl/monitoring/db_manager.py +10 -10
  14. parsl/monitoring/errors.py +6 -0
  15. parsl/monitoring/monitoring.py +3 -3
  16. parsl/monitoring/radios.py +16 -0
  17. parsl/monitoring/remote.py +4 -4
  18. parsl/monitoring/router.py +71 -35
  19. parsl/providers/__init__.py +0 -4
  20. parsl/providers/ad_hoc/ad_hoc.py +6 -2
  21. parsl/tests/configs/local_adhoc.py +2 -2
  22. parsl/tests/test_htex/test_disconnected_blocks_failing_provider.py +71 -0
  23. parsl/tests/test_htex/test_htex.py +28 -19
  24. parsl/tests/test_htex/test_zmq_binding.py +4 -1
  25. parsl/tests/test_monitoring/test_basic.py +14 -1
  26. parsl/tests/test_mpi_apps/test_mpiex.py +1 -1
  27. parsl/tests/test_providers/test_local_provider.py +6 -5
  28. parsl/version.py +1 -1
  29. {parsl-2024.7.29.data → parsl-2024.8.12.data}/scripts/interchange.py +40 -37
  30. parsl-2024.8.12.dist-info/METADATA +101 -0
  31. {parsl-2024.7.29.dist-info → parsl-2024.8.12.dist-info}/RECORD +38 -46
  32. {parsl-2024.7.29.dist-info → parsl-2024.8.12.dist-info}/WHEEL +1 -1
  33. parsl/configs/ad_hoc.py +0 -38
  34. parsl/tests/configs/ad_hoc_cluster_htex.py +0 -35
  35. parsl/tests/configs/htex_ad_hoc_cluster.py +0 -26
  36. parsl/tests/configs/swan_htex.py +0 -43
  37. parsl/tests/integration/test_channels/test_scp_1.py +0 -45
  38. parsl/tests/integration/test_channels/test_ssh_1.py +0 -40
  39. parsl/tests/integration/test_channels/test_ssh_errors.py +0 -46
  40. parsl/tests/integration/test_channels/test_ssh_file_transport.py +0 -41
  41. parsl/tests/integration/test_channels/test_ssh_interactive.py +0 -24
  42. parsl/tests/manual_tests/test_ad_hoc_htex.py +0 -49
  43. parsl/tests/manual_tests/test_oauth_ssh.py +0 -13
  44. parsl-2024.7.29.dist-info/METADATA +0 -101
  45. {parsl-2024.7.29.data → parsl-2024.8.12.data}/scripts/exec_parsl_function.py +0 -0
  46. {parsl-2024.7.29.data → parsl-2024.8.12.data}/scripts/parsl_coprocess.py +0 -0
  47. {parsl-2024.7.29.data → parsl-2024.8.12.data}/scripts/process_worker_pool.py +0 -0
  48. {parsl-2024.7.29.dist-info → parsl-2024.8.12.dist-info}/LICENSE +0 -0
  49. {parsl-2024.7.29.dist-info → parsl-2024.8.12.dist-info}/entry_points.txt +0 -0
  50. {parsl-2024.7.29.dist-info → parsl-2024.8.12.dist-info}/top_level.txt +0 -0
@@ -12,6 +12,7 @@ from typing import TYPE_CHECKING, Any, Optional, Tuple, Union, cast
12
12
  import typeguard
13
13
 
14
14
  from parsl.log_utils import set_file_logger
15
+ from parsl.monitoring.errors import MonitoringHubStartError
15
16
  from parsl.monitoring.message_type import MessageType
16
17
  from parsl.monitoring.radios import MultiprocessingQueueRadioSender
17
18
  from parsl.monitoring.router import router_starter
@@ -105,7 +106,7 @@ class MonitoringHub(RepresentationMixin):
105
106
  self.resource_monitoring_enabled = resource_monitoring_enabled
106
107
  self.resource_monitoring_interval = resource_monitoring_interval
107
108
 
108
- def start(self, run_id: str, dfk_run_dir: str, config_run_dir: Union[str, os.PathLike]) -> None:
109
+ def start(self, dfk_run_dir: str, config_run_dir: Union[str, os.PathLike]) -> None:
109
110
 
110
111
  logger.debug("Starting MonitoringHub")
111
112
 
@@ -160,7 +161,6 @@ class MonitoringHub(RepresentationMixin):
160
161
  "zmq_port_range": self.hub_port_range,
161
162
  "logdir": self.logdir,
162
163
  "logging_level": logging.DEBUG if self.monitoring_debug else logging.INFO,
163
- "run_id": run_id
164
164
  },
165
165
  name="Monitoring-Router-Process",
166
166
  daemon=True,
@@ -195,7 +195,7 @@ class MonitoringHub(RepresentationMixin):
195
195
  comm_q.join_thread()
196
196
  except queue.Empty:
197
197
  logger.error("Hub has not completed initialization in 120s. Aborting")
198
- raise Exception("Hub failed to start")
198
+ raise MonitoringHubStartError()
199
199
 
200
200
  if isinstance(comm_q_result, str):
201
201
  logger.error(f"MonitoringRouter sent an error message: {comm_q_result}")
@@ -7,6 +7,8 @@ from abc import ABCMeta, abstractmethod
7
7
  from multiprocessing.queues import Queue
8
8
  from typing import Optional
9
9
 
10
+ import zmq
11
+
10
12
  from parsl.serialize import serialize
11
13
 
12
14
  _db_manager_excepts: Optional[Exception]
@@ -186,3 +188,17 @@ class MultiprocessingQueueRadioSender(MonitoringRadioSender):
186
188
 
187
189
  def send(self, message: object) -> None:
188
190
  self.queue.put((message, 0))
191
+
192
+
193
+ class ZMQRadioSender(MonitoringRadioSender):
194
+ """A monitoring radio which connects over ZMQ. This radio is not
195
+ thread-safe, because its use of ZMQ is not thread-safe.
196
+ """
197
+
198
+ def __init__(self, hub_address: str, hub_zmq_port: int) -> None:
199
+ self._hub_channel = zmq.Context().socket(zmq.DEALER)
200
+ self._hub_channel.set_hwm(0)
201
+ self._hub_channel.connect(f"tcp://{hub_address}:{hub_zmq_port}")
202
+
203
+ def send(self, message: object) -> None:
204
+ self._hub_channel.send_pyobj(message)
@@ -199,10 +199,10 @@ def monitor(pid: int,
199
199
 
200
200
  pm = psutil.Process(pid)
201
201
 
202
- children_user_time = {} # type: Dict[int, float]
203
- children_system_time = {} # type: Dict[int, float]
204
- children_num_ctx_switches_voluntary = {} # type: Dict[int, float]
205
- children_num_ctx_switches_involuntary = {} # type: Dict[int, float]
202
+ children_user_time: Dict[int, float] = {}
203
+ children_system_time: Dict[int, float] = {}
204
+ children_num_ctx_switches_voluntary: Dict[int, float] = {}
205
+ children_num_ctx_switches_involuntary: Dict[int, float] = {}
206
206
 
207
207
  def accumulate_and_prepare() -> Dict[str, Any]:
208
208
  d = {"psutil_process_" + str(k): v for k, v in pm.as_dict().items() if k in simple}
@@ -5,6 +5,7 @@ import os
5
5
  import pickle
6
6
  import queue
7
7
  import socket
8
+ import threading
8
9
  import time
9
10
  from multiprocessing.synchronize import Event
10
11
  from typing import Optional, Tuple, Union
@@ -30,9 +31,13 @@ class MonitoringRouter:
30
31
 
31
32
  monitoring_hub_address: str = "127.0.0.1",
32
33
  logdir: str = ".",
33
- run_id: str,
34
34
  logging_level: int = logging.INFO,
35
- atexit_timeout: int = 3 # in seconds
35
+ atexit_timeout: int = 3, # in seconds
36
+ priority_msgs: "queue.Queue[AddressedMonitoringMessage]",
37
+ node_msgs: "queue.Queue[AddressedMonitoringMessage]",
38
+ block_msgs: "queue.Queue[AddressedMonitoringMessage]",
39
+ resource_msgs: "queue.Queue[AddressedMonitoringMessage]",
40
+ exit_event: Event,
36
41
  ):
37
42
  """ Initializes a monitoring configuration class.
38
43
 
@@ -51,7 +56,11 @@ class MonitoringRouter:
51
56
  Logging level as defined in the logging module. Default: logging.INFO
52
57
  atexit_timeout : float, optional
53
58
  The amount of time in seconds to terminate the hub without receiving any messages, after the last dfk workflow message is received.
59
+ *_msgs : Queue
60
+ Four multiprocessing queues to receive messages, routed by type tag, and sometimes modified according to type tag.
54
61
 
62
+ exit_event : Event
63
+ An event that the main Parsl process will set to signal that the monitoring router should shut down.
55
64
  """
56
65
  os.makedirs(logdir, exist_ok=True)
57
66
  self.logger = set_file_logger("{}/monitoring_router.log".format(logdir),
@@ -61,7 +70,6 @@ class MonitoringRouter:
61
70
 
62
71
  self.hub_address = hub_address
63
72
  self.atexit_timeout = atexit_timeout
64
- self.run_id = run_id
65
73
 
66
74
  self.loop_freq = 10.0 # milliseconds
67
75
 
@@ -93,22 +101,60 @@ class MonitoringRouter:
93
101
  min_port=zmq_port_range[0],
94
102
  max_port=zmq_port_range[1])
95
103
 
96
- def start(self,
97
- priority_msgs: "queue.Queue[AddressedMonitoringMessage]",
98
- node_msgs: "queue.Queue[AddressedMonitoringMessage]",
99
- block_msgs: "queue.Queue[AddressedMonitoringMessage]",
100
- resource_msgs: "queue.Queue[AddressedMonitoringMessage]",
101
- exit_event: Event) -> None:
104
+ self.priority_msgs = priority_msgs
105
+ self.node_msgs = node_msgs
106
+ self.block_msgs = block_msgs
107
+ self.resource_msgs = resource_msgs
108
+ self.exit_event = exit_event
109
+
110
+ @wrap_with_logs(target="monitoring_router")
111
+ def start(self) -> None:
112
+ self.logger.info("Starting UDP listener thread")
113
+ udp_radio_receiver_thread = threading.Thread(target=self.start_udp_listener, daemon=True)
114
+ udp_radio_receiver_thread.start()
115
+
116
+ self.logger.info("Starting ZMQ listener thread")
117
+ zmq_radio_receiver_thread = threading.Thread(target=self.start_zmq_listener, daemon=True)
118
+ zmq_radio_receiver_thread.start()
119
+
120
+ self.logger.info("Joining on ZMQ listener thread")
121
+ zmq_radio_receiver_thread.join()
122
+ self.logger.info("Joining on UDP listener thread")
123
+ udp_radio_receiver_thread.join()
124
+ self.logger.info("Joined on both ZMQ and UDP listener threads")
125
+
126
+ @wrap_with_logs(target="monitoring_router")
127
+ def start_udp_listener(self) -> None:
102
128
  try:
103
- while not exit_event.is_set():
129
+ while not self.exit_event.is_set():
104
130
  try:
105
131
  data, addr = self.udp_sock.recvfrom(2048)
106
132
  resource_msg = pickle.loads(data)
107
133
  self.logger.debug("Got UDP Message from {}: {}".format(addr, resource_msg))
108
- resource_msgs.put((resource_msg, addr))
134
+ self.resource_msgs.put((resource_msg, addr))
109
135
  except socket.timeout:
110
136
  pass
111
137
 
138
+ self.logger.info("UDP listener draining")
139
+ last_msg_received_time = time.time()
140
+ while time.time() - last_msg_received_time < self.atexit_timeout:
141
+ try:
142
+ data, addr = self.udp_sock.recvfrom(2048)
143
+ msg = pickle.loads(data)
144
+ self.logger.debug("Got UDP Message from {}: {}".format(addr, msg))
145
+ self.resource_msgs.put((msg, addr))
146
+ last_msg_received_time = time.time()
147
+ except socket.timeout:
148
+ pass
149
+
150
+ self.logger.info("UDP listener finishing normally")
151
+ finally:
152
+ self.logger.info("UDP listener finished")
153
+
154
+ @wrap_with_logs(target="monitoring_router")
155
+ def start_zmq_listener(self) -> None:
156
+ try:
157
+ while not self.exit_event.is_set():
112
158
  try:
113
159
  dfk_loop_start = time.time()
114
160
  while time.time() - dfk_loop_start < 1.0: # TODO make configurable
@@ -124,16 +170,15 @@ class MonitoringRouter:
124
170
  msg_0 = (msg, 0)
125
171
 
126
172
  if msg[0] == MessageType.NODE_INFO:
127
- msg[1]['run_id'] = self.run_id
128
- node_msgs.put(msg_0)
173
+ self.node_msgs.put(msg_0)
129
174
  elif msg[0] == MessageType.RESOURCE_INFO:
130
- resource_msgs.put(msg_0)
175
+ self.resource_msgs.put(msg_0)
131
176
  elif msg[0] == MessageType.BLOCK_INFO:
132
- block_msgs.put(msg_0)
177
+ self.block_msgs.put(msg_0)
133
178
  elif msg[0] == MessageType.TASK_INFO:
134
- priority_msgs.put(msg_0)
179
+ self.priority_msgs.put(msg_0)
135
180
  elif msg[0] == MessageType.WORKFLOW_INFO:
136
- priority_msgs.put(msg_0)
181
+ self.priority_msgs.put(msg_0)
137
182
  else:
138
183
  # There is a type: ignore here because if msg[0]
139
184
  # is of the correct type, this code is unreachable,
@@ -151,21 +196,9 @@ class MonitoringRouter:
151
196
  # thing to do.
152
197
  self.logger.warning("Failure processing a ZMQ message", exc_info=True)
153
198
 
154
- self.logger.info("Monitoring router draining")
155
- last_msg_received_time = time.time()
156
- while time.time() - last_msg_received_time < self.atexit_timeout:
157
- try:
158
- data, addr = self.udp_sock.recvfrom(2048)
159
- msg = pickle.loads(data)
160
- self.logger.debug("Got UDP Message from {}: {}".format(addr, msg))
161
- resource_msgs.put((msg, addr))
162
- last_msg_received_time = time.time()
163
- except socket.timeout:
164
- pass
165
-
166
- self.logger.info("Monitoring router finishing normally")
199
+ self.logger.info("ZMQ listener finishing normally")
167
200
  finally:
168
- self.logger.info("Monitoring router finished")
201
+ self.logger.info("ZMQ listener finished")
169
202
 
170
203
 
171
204
  @wrap_with_logs
@@ -182,8 +215,7 @@ def router_starter(comm_q: "queue.Queue[Union[Tuple[int, int], str]]",
182
215
  zmq_port_range: Tuple[int, int],
183
216
 
184
217
  logdir: str,
185
- logging_level: int,
186
- run_id: str) -> None:
218
+ logging_level: int) -> None:
187
219
  setproctitle("parsl: monitoring router")
188
220
  try:
189
221
  router = MonitoringRouter(hub_address=hub_address,
@@ -191,7 +223,11 @@ def router_starter(comm_q: "queue.Queue[Union[Tuple[int, int], str]]",
191
223
  zmq_port_range=zmq_port_range,
192
224
  logdir=logdir,
193
225
  logging_level=logging_level,
194
- run_id=run_id)
226
+ priority_msgs=priority_msgs,
227
+ node_msgs=node_msgs,
228
+ block_msgs=block_msgs,
229
+ resource_msgs=resource_msgs,
230
+ exit_event=exit_event)
195
231
  except Exception as e:
196
232
  logger.error("MonitoringRouter construction failed.", exc_info=True)
197
233
  comm_q.put(f"Monitoring router construction failed: {e}")
@@ -200,7 +236,7 @@ def router_starter(comm_q: "queue.Queue[Union[Tuple[int, int], str]]",
200
236
 
201
237
  router.logger.info("Starting MonitoringRouter in router_starter")
202
238
  try:
203
- router.start(priority_msgs, node_msgs, block_msgs, resource_msgs, exit_event)
239
+ router.start()
204
240
  except Exception as e:
205
241
  router.logger.exception("router.start exception")
206
242
  exception_q.put(('Hub', str(e)))
@@ -1,6 +1,3 @@
1
- # Workstation Provider
2
- from parsl.providers.ad_hoc.ad_hoc import AdHocProvider
3
-
4
1
  # Cloud Providers
5
2
  from parsl.providers.aws.aws import AWSProvider
6
3
  from parsl.providers.azure.azure import AzureProvider
@@ -24,7 +21,6 @@ __all__ = ['LocalProvider',
24
21
  'SlurmProvider',
25
22
  'TorqueProvider',
26
23
  'LSFProvider',
27
- 'AdHocProvider',
28
24
  'PBSProProvider',
29
25
  'AWSProvider',
30
26
  'GoogleCloudProvider',
@@ -12,8 +12,12 @@ from parsl.utils import RepresentationMixin
12
12
  logger = logging.getLogger(__name__)
13
13
 
14
14
 
15
- class AdHocProvider(ExecutionProvider, RepresentationMixin):
16
- """ Ad-hoc execution provider
15
+ class DeprecatedAdHocProvider(ExecutionProvider, RepresentationMixin):
16
+ """ Deprecated ad-hoc execution provider
17
+
18
+ The (former) AdHocProvider is deprecated. See
19
+ `issue #3515 <https://github.com/Parsl/parsl/issues/3515>`_
20
+ for further discussion.
17
21
 
18
22
  This provider is used to provision execution resources over one or more ad hoc nodes
19
23
  that are each accessible over a Channel (say, ssh) but otherwise lack a cluster scheduler.
@@ -1,7 +1,7 @@
1
1
  from parsl.channels import LocalChannel
2
2
  from parsl.config import Config
3
3
  from parsl.executors import HighThroughputExecutor
4
- from parsl.providers import AdHocProvider
4
+ from parsl.providers.ad_hoc.ad_hoc import DeprecatedAdHocProvider
5
5
 
6
6
 
7
7
  def fresh_config():
@@ -10,7 +10,7 @@ def fresh_config():
10
10
  HighThroughputExecutor(
11
11
  label='AdHoc',
12
12
  encrypted=True,
13
- provider=AdHocProvider(
13
+ provider=DeprecatedAdHocProvider(
14
14
  channels=[LocalChannel(), LocalChannel()]
15
15
  )
16
16
  )
@@ -0,0 +1,71 @@
1
+ import logging
2
+
3
+ import pytest
4
+
5
+ import parsl
6
+ from parsl import Config
7
+ from parsl.executors import HighThroughputExecutor
8
+ from parsl.executors.errors import BadStateException
9
+ from parsl.jobs.states import JobState, JobStatus
10
+ from parsl.providers import LocalProvider
11
+
12
+
13
+ class FailingProvider(LocalProvider):
14
+ def submit(*args, **kwargs):
15
+ raise RuntimeError("Deliberate failure of provider.submit")
16
+
17
+
18
+ def local_config():
19
+ """Config to simulate failing blocks without connecting"""
20
+ return Config(
21
+ executors=[
22
+ HighThroughputExecutor(
23
+ label="HTEX",
24
+ heartbeat_period=1,
25
+ heartbeat_threshold=2,
26
+ poll_period=100,
27
+ max_workers_per_node=1,
28
+ provider=FailingProvider(
29
+ init_blocks=0,
30
+ max_blocks=2,
31
+ min_blocks=0,
32
+ ),
33
+ )
34
+ ],
35
+ max_idletime=0.5,
36
+ strategy='htex_auto_scale',
37
+ strategy_period=0.1
38
+ # this strategy period needs to be a few times smaller than the
39
+ # status_polling_interval of FailingProvider, which is 5s at
40
+ # time of writing
41
+ )
42
+
43
+
44
+ @parsl.python_app
45
+ def double(x):
46
+ return x * 2
47
+
48
+
49
+ @pytest.mark.local
50
+ def test_disconnected_blocks():
51
+ """Test reporting of blocks that fail to connect from HTEX"""
52
+ dfk = parsl.dfk()
53
+ executor = dfk.executors["HTEX"]
54
+
55
+ connected_blocks = executor.connected_blocks()
56
+ assert not connected_blocks, "Expected 0 blocks"
57
+
58
+ future = double(5)
59
+ with pytest.raises(BadStateException):
60
+ future.result()
61
+
62
+ assert isinstance(future.exception(), BadStateException)
63
+
64
+ status_dict = executor.status()
65
+ assert len(status_dict) == 1, "Expected exactly 1 block"
66
+ for status in status_dict.values():
67
+ assert isinstance(status, JobStatus)
68
+ assert status.state == JobState.MISSING
69
+
70
+ connected_blocks = executor.connected_blocks()
71
+ assert connected_blocks == [], "Expected exactly 0 connected blocks"
@@ -1,6 +1,7 @@
1
+ import logging
1
2
  import pathlib
2
- import warnings
3
3
  from subprocess import Popen, TimeoutExpired
4
+ from typing import Optional, Sequence
4
5
  from unittest import mock
5
6
 
6
7
  import pytest
@@ -71,12 +72,11 @@ def test_htex_start_encrypted(
71
72
  @pytest.mark.local
72
73
  @pytest.mark.parametrize("started", (True, False))
73
74
  @pytest.mark.parametrize("timeout_expires", (True, False))
74
- @mock.patch(f"{_MOCK_BASE}.logger")
75
75
  def test_htex_shutdown(
76
- mock_logger: mock.MagicMock,
77
76
  started: bool,
78
77
  timeout_expires: bool,
79
78
  htex: HighThroughputExecutor,
79
+ caplog
80
80
  ):
81
81
  mock_ix_proc = mock.Mock(spec=Popen)
82
82
 
@@ -108,22 +108,22 @@ def test_htex_shutdown(
108
108
 
109
109
  mock_ix_proc.terminate.side_effect = kill_interchange
110
110
 
111
- htex.shutdown()
111
+ with caplog.at_level(logging.INFO):
112
+ htex.shutdown()
112
113
 
113
- mock_logs = mock_logger.info.call_args_list
114
114
  if started:
115
115
  assert mock_ix_proc.terminate.called
116
116
  assert mock_ix_proc.wait.called
117
117
  assert {"timeout": 10} == mock_ix_proc.wait.call_args[1]
118
118
  if timeout_expires:
119
- assert "Unable to terminate Interchange" in mock_logs[1][0][0]
119
+ assert "Unable to terminate Interchange" in caplog.text
120
120
  assert mock_ix_proc.kill.called
121
- assert "Attempting" in mock_logs[0][0][0]
122
- assert "Finished" in mock_logs[-1][0][0]
121
+ assert "Attempting HighThroughputExecutor shutdown" in caplog.text
122
+ assert "Finished HighThroughputExecutor shutdown" in caplog.text
123
123
  else:
124
124
  assert not mock_ix_proc.terminate.called
125
125
  assert not mock_ix_proc.wait.called
126
- assert "has not started" in mock_logs[0][0][0]
126
+ assert "HighThroughputExecutor has not started" in caplog.text
127
127
 
128
128
 
129
129
  @pytest.mark.local
@@ -139,13 +139,22 @@ def test_max_workers_per_node():
139
139
 
140
140
 
141
141
  @pytest.mark.local
142
- def test_htex_launch_cmd():
143
- htex = HighThroughputExecutor()
144
- assert htex.launch_cmd.startswith("process_worker_pool.py")
145
- assert htex.interchange_launch_cmd == "interchange.py"
146
-
147
- launch_cmd = "custom-launch-cmd"
148
- ix_launch_cmd = "custom-ix-launch-cmd"
149
- htex = HighThroughputExecutor(launch_cmd=launch_cmd, interchange_launch_cmd=ix_launch_cmd)
150
- assert htex.launch_cmd == launch_cmd
151
- assert htex.interchange_launch_cmd == ix_launch_cmd
142
+ @pytest.mark.parametrize("cmd", (None, "custom-launch-cmd"))
143
+ def test_htex_worker_pool_launch_cmd(cmd: Optional[str]):
144
+ if cmd:
145
+ htex = HighThroughputExecutor(launch_cmd=cmd)
146
+ assert htex.launch_cmd == cmd
147
+ else:
148
+ htex = HighThroughputExecutor()
149
+ assert htex.launch_cmd.startswith("process_worker_pool.py")
150
+
151
+
152
+ @pytest.mark.local
153
+ @pytest.mark.parametrize("cmd", (None, ["custom", "launch", "cmd"]))
154
+ def test_htex_interchange_launch_cmd(cmd: Optional[Sequence[str]]):
155
+ if cmd:
156
+ htex = HighThroughputExecutor(interchange_launch_cmd=cmd)
157
+ assert htex.interchange_launch_cmd == cmd
158
+ else:
159
+ htex = HighThroughputExecutor()
160
+ assert htex.interchange_launch_cmd == ["interchange.py"]
@@ -9,6 +9,7 @@ import zmq
9
9
 
10
10
  from parsl import curvezmq
11
11
  from parsl.executors.high_throughput.interchange import Interchange
12
+ from parsl.executors.high_throughput.manager_selector import RandomManagerSelector
12
13
 
13
14
 
14
15
  def make_interchange(*, interchange_address: Optional[str], cert_dir: Optional[str]) -> Interchange:
@@ -23,7 +24,9 @@ def make_interchange(*, interchange_address: Optional[str], cert_dir: Optional[s
23
24
  heartbeat_threshold=60,
24
25
  logdir=".",
25
26
  logging_level=logging.INFO,
26
- poll_period=10)
27
+ manager_selector=RandomManagerSelector(),
28
+ poll_period=10,
29
+ run_id="test_run_id")
27
30
 
28
31
 
29
32
  @pytest.fixture
@@ -25,10 +25,23 @@ def this_app():
25
25
  # a configuration that is suitably configured for monitoring.
26
26
 
27
27
  def htex_config():
28
+ """This config will use htex's default htex-specific monitoring radio mode"""
28
29
  from parsl.tests.configs.htex_local_alternate import fresh_config
29
30
  return fresh_config()
30
31
 
31
32
 
33
+ def htex_udp_config():
34
+ """This config will force UDP"""
35
+ from parsl.tests.configs.htex_local_alternate import fresh_config
36
+ c = fresh_config()
37
+ assert len(c.executors) == 1
38
+
39
+ assert c.executors[0].radio_mode == "htex", "precondition: htex has a radio mode attribute, configured for htex radio"
40
+ c.executors[0].radio_mode = "udp"
41
+
42
+ return c
43
+
44
+
32
45
  def workqueue_config():
33
46
  from parsl.tests.configs.workqueue_ex import fresh_config
34
47
  c = fresh_config()
@@ -48,7 +61,7 @@ def taskvine_config():
48
61
 
49
62
 
50
63
  @pytest.mark.local
51
- @pytest.mark.parametrize("fresh_config", [htex_config, workqueue_config, taskvine_config])
64
+ @pytest.mark.parametrize("fresh_config", [htex_config, htex_udp_config, workqueue_config, taskvine_config])
52
65
  def test_row_counts(tmpd_cwd, fresh_config):
53
66
  # this is imported here rather than at module level because
54
67
  # it isn't available in a plain parsl install, so this module
@@ -44,7 +44,7 @@ def test_init():
44
44
 
45
45
  new_kwargs = {'max_workers_per_block'}
46
46
  excluded_kwargs = {'available_accelerators', 'enable_mpi_mode', 'cores_per_worker', 'max_workers_per_node',
47
- 'mem_per_worker', 'cpu_affinity', 'max_workers'}
47
+ 'mem_per_worker', 'cpu_affinity', 'max_workers', 'manager_selector'}
48
48
 
49
49
  # Get the kwargs from both HTEx and MPIEx
50
50
  htex_kwargs = set(signature(HighThroughputExecutor.__init__).parameters)
@@ -11,7 +11,8 @@ import time
11
11
 
12
12
  import pytest
13
13
 
14
- from parsl.channels import LocalChannel, SSHChannel
14
+ from parsl.channels import LocalChannel
15
+ from parsl.channels.ssh.ssh import DeprecatedSSHChannel
15
16
  from parsl.jobs.states import JobState
16
17
  from parsl.launchers import SingleNodeLauncher
17
18
  from parsl.providers import LocalProvider
@@ -92,10 +93,10 @@ def test_ssh_channel():
92
93
  # already exist, so create it here.
93
94
  pathlib.Path('{}/known.hosts'.format(config_dir)).touch(mode=0o600)
94
95
  script_dir = tempfile.mkdtemp()
95
- channel = SSHChannel('127.0.0.1', port=server_port,
96
- script_dir=remote_script_dir,
97
- host_keys_filename='{}/known.hosts'.format(config_dir),
98
- key_filename=priv_key)
96
+ channel = DeprecatedSSHChannel('127.0.0.1', port=server_port,
97
+ script_dir=remote_script_dir,
98
+ host_keys_filename='{}/known.hosts'.format(config_dir),
99
+ key_filename=priv_key)
99
100
  try:
100
101
  p = LocalProvider(channel=channel,
101
102
  launcher=SingleNodeLauncher(debug=False))
parsl/version.py CHANGED
@@ -3,4 +3,4 @@
3
3
  Year.Month.Day[alpha/beta/..]
4
4
  Alphas will be numbered like this -> 2024.12.10a0
5
5
  """
6
- VERSION = '2024.07.29'
6
+ VERSION = '2024.08.12'