parsl 2024.4.8__py3-none-any.whl → 2024.4.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. parsl/data_provider/data_manager.py +2 -1
  2. parsl/data_provider/zip.py +104 -0
  3. parsl/dataflow/dflow.py +44 -25
  4. parsl/dataflow/futures.py +0 -7
  5. parsl/executors/base.py +12 -9
  6. parsl/executors/high_throughput/executor.py +14 -19
  7. parsl/executors/high_throughput/process_worker_pool.py +3 -1
  8. parsl/executors/status_handling.py +81 -1
  9. parsl/executors/taskvine/executor.py +7 -2
  10. parsl/executors/workqueue/executor.py +8 -3
  11. parsl/jobs/job_status_poller.py +19 -113
  12. parsl/jobs/strategy.py +22 -27
  13. parsl/monitoring/monitoring.py +14 -23
  14. parsl/monitoring/radios.py +15 -0
  15. parsl/monitoring/router.py +7 -6
  16. parsl/providers/local/local.py +1 -1
  17. parsl/tests/configs/htex_local_alternate.py +2 -1
  18. parsl/tests/configs/taskvine_ex.py +1 -2
  19. parsl/tests/configs/workqueue_ex.py +1 -2
  20. parsl/tests/conftest.py +6 -7
  21. parsl/tests/test_bash_apps/test_basic.py +5 -4
  22. parsl/tests/test_bash_apps/test_error_codes.py +0 -3
  23. parsl/tests/test_bash_apps/test_kwarg_storage.py +0 -1
  24. parsl/tests/test_bash_apps/test_memoize.py +0 -2
  25. parsl/tests/test_bash_apps/test_memoize_ignore_args.py +0 -1
  26. parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py +0 -1
  27. parsl/tests/test_bash_apps/test_multiline.py +0 -1
  28. parsl/tests/test_bash_apps/test_stdout.py +11 -6
  29. parsl/tests/test_monitoring/test_basic.py +46 -21
  30. parsl/tests/test_monitoring/test_fuzz_zmq.py +10 -1
  31. parsl/tests/test_python_apps/test_outputs.py +0 -1
  32. parsl/tests/test_scaling/test_scale_down_htex_unregistered.py +74 -0
  33. parsl/tests/test_staging/test_zip_out.py +113 -0
  34. parsl/version.py +1 -1
  35. {parsl-2024.4.8.data → parsl-2024.4.15.data}/scripts/process_worker_pool.py +3 -1
  36. {parsl-2024.4.8.dist-info → parsl-2024.4.15.dist-info}/METADATA +3 -2
  37. {parsl-2024.4.8.dist-info → parsl-2024.4.15.dist-info}/RECORD +43 -40
  38. {parsl-2024.4.8.data → parsl-2024.4.15.data}/scripts/exec_parsl_function.py +0 -0
  39. {parsl-2024.4.8.data → parsl-2024.4.15.data}/scripts/parsl_coprocess.py +0 -0
  40. {parsl-2024.4.8.dist-info → parsl-2024.4.15.dist-info}/LICENSE +0 -0
  41. {parsl-2024.4.8.dist-info → parsl-2024.4.15.dist-info}/WHEEL +0 -0
  42. {parsl-2024.4.8.dist-info → parsl-2024.4.15.dist-info}/entry_points.txt +0 -0
  43. {parsl-2024.4.8.dist-info → parsl-2024.4.15.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,9 @@
1
1
  import logging
2
2
  import parsl
3
- import time
4
- import zmq
5
- from typing import Dict, List, Sequence, Optional, Union
3
+ from typing import List, Sequence, Optional, Union
6
4
 
7
- from parsl.jobs.states import JobStatus, JobState
8
5
  from parsl.jobs.strategy import Strategy
9
6
  from parsl.executors.status_handling import BlockProviderExecutor
10
- from parsl.monitoring.message_type import MessageType
11
7
 
12
8
 
13
9
  from parsl.utils import Timer
@@ -16,137 +12,47 @@ from parsl.utils import Timer
16
12
  logger = logging.getLogger(__name__)
17
13
 
18
14
 
19
- class PolledExecutorFacade:
20
- def __init__(self, executor: BlockProviderExecutor, dfk: Optional["parsl.dataflow.dflow.DataFlowKernel"] = None):
21
- self._executor = executor
22
- self._interval = executor.status_polling_interval
23
- self._last_poll_time = 0.0
24
- self._status = {} # type: Dict[str, JobStatus]
25
-
26
- # Create a ZMQ channel to send poll status to monitoring
27
- self.monitoring_enabled = False
28
- if dfk and dfk.monitoring is not None:
29
- self.monitoring_enabled = True
30
- hub_address = dfk.hub_address
31
- hub_port = dfk.hub_zmq_port
32
- context = zmq.Context()
33
- self.hub_channel = context.socket(zmq.DEALER)
34
- self.hub_channel.set_hwm(0)
35
- self.hub_channel.connect("tcp://{}:{}".format(hub_address, hub_port))
36
- logger.info("Monitoring enabled on job status poller")
37
-
38
- def _should_poll(self, now: float) -> bool:
39
- return now >= self._last_poll_time + self._interval
40
-
41
- def poll(self, now: float) -> None:
42
- if self._should_poll(now):
43
- previous_status = self._status
44
- self._status = self._executor.status()
45
- self._last_poll_time = now
46
- delta_status = {}
47
- for block_id in self._status:
48
- if block_id not in previous_status \
49
- or previous_status[block_id].state != self._status[block_id].state:
50
- delta_status[block_id] = self._status[block_id]
51
-
52
- if delta_status:
53
- self.send_monitoring_info(delta_status)
54
-
55
- def send_monitoring_info(self, status: Dict) -> None:
56
- # Send monitoring info for HTEX when monitoring enabled
57
- if self.monitoring_enabled:
58
- msg = self._executor.create_monitoring_info(status)
59
- logger.debug("Sending message {} to hub from job status poller".format(msg))
60
- self.hub_channel.send_pyobj((MessageType.BLOCK_INFO, msg))
61
-
62
- @property
63
- def status(self) -> Dict[str, JobStatus]:
64
- """Return the status of all jobs/blocks of the executor of this poller.
65
-
66
- :return: a dictionary mapping block ids (in string) to job status
67
- """
68
- return self._status
69
-
70
- @property
71
- def executor(self) -> BlockProviderExecutor:
72
- return self._executor
73
-
74
- def scale_in(self, n: int, max_idletime: Optional[float] = None) -> List[str]:
75
-
76
- if max_idletime is None:
77
- block_ids = self._executor.scale_in(n)
78
- else:
79
- # This is a HighThroughputExecutor-specific interface violation.
80
- # This code hopes, through pan-codebase reasoning, that this
81
- # scale_in method really does come from HighThroughputExecutor,
82
- # and so does have an extra max_idletime parameter not present
83
- # in the executor interface.
84
- block_ids = self._executor.scale_in(n, max_idletime=max_idletime) # type: ignore[call-arg]
85
- if block_ids is not None:
86
- new_status = {}
87
- for block_id in block_ids:
88
- new_status[block_id] = JobStatus(JobState.CANCELLED)
89
- del self._status[block_id]
90
- self.send_monitoring_info(new_status)
91
- return block_ids
92
-
93
- def scale_out(self, n: int) -> List[str]:
94
- block_ids = self._executor.scale_out(n)
95
- if block_ids is not None:
96
- new_status = {}
97
- for block_id in block_ids:
98
- new_status[block_id] = JobStatus(JobState.PENDING)
99
- self.send_monitoring_info(new_status)
100
- self._status.update(new_status)
101
- return block_ids
102
-
103
- def __repr__(self) -> str:
104
- return self._status.__repr__()
105
-
106
-
107
15
  class JobStatusPoller(Timer):
108
16
  def __init__(self, *, strategy: Optional[str], max_idletime: float,
109
17
  strategy_period: Union[float, int],
110
- dfk: Optional["parsl.dataflow.dflow.DataFlowKernel"] = None) -> None:
111
- self._executor_facades = [] # type: List[PolledExecutorFacade]
112
- self.dfk = dfk
18
+ monitoring: Optional["parsl.monitoring.radios.MonitoringRadio"] = None) -> None:
19
+ self._executors = [] # type: List[BlockProviderExecutor]
113
20
  self._strategy = Strategy(strategy=strategy,
114
21
  max_idletime=max_idletime)
115
22
  super().__init__(self.poll, interval=strategy_period, name="JobStatusPoller")
116
23
 
117
24
  def poll(self) -> None:
118
25
  self._update_state()
119
- self._run_error_handlers(self._executor_facades)
120
- self._strategy.strategize(self._executor_facades)
26
+ self._run_error_handlers(self._executors)
27
+ self._strategy.strategize(self._executors)
121
28
 
122
- def _run_error_handlers(self, status: List[PolledExecutorFacade]) -> None:
123
- for es in status:
124
- es.executor.handle_errors(es.status)
29
+ def _run_error_handlers(self, executors: List[BlockProviderExecutor]) -> None:
30
+ for e in executors:
31
+ e.handle_errors(e.status_facade)
125
32
 
126
33
  def _update_state(self) -> None:
127
- now = time.time()
128
- for item in self._executor_facades:
129
- item.poll(now)
34
+ for item in self._executors:
35
+ item.poll_facade()
130
36
 
131
37
  def add_executors(self, executors: Sequence[BlockProviderExecutor]) -> None:
132
38
  for executor in executors:
133
39
  if executor.status_polling_interval > 0:
134
40
  logger.debug("Adding executor {}".format(executor.label))
135
- self._executor_facades.append(PolledExecutorFacade(executor, self.dfk))
41
+ self._executors.append(executor)
136
42
  self._strategy.add_executors(executors)
137
43
 
138
- def close(self):
139
- super().close()
140
- for ef in self._executor_facades:
141
- if not ef.executor.bad_state_is_set:
142
- logger.info(f"Scaling in executor {ef.executor.label}")
44
+ def close(self, timeout: Optional[float] = None) -> None:
45
+ super().close(timeout)
46
+ for executor in self._executors:
47
+ if not executor.bad_state_is_set:
48
+ logger.info(f"Scaling in executor {executor.label}")
143
49
 
144
50
  # this code needs to be at least as many blocks as need
145
51
  # cancelling, but it is safe to be more, as the scaling
146
52
  # code will cope with being asked to cancel more blocks
147
53
  # than exist.
148
- block_count = len(ef.status)
149
- ef.scale_in(block_count)
54
+ block_count = len(executor.status_facade)
55
+ executor.scale_in_facade(block_count)
150
56
 
151
57
  else: # and bad_state_is_set
152
- logger.warning(f"Not scaling in executor {ef.executor.label} because it is in bad state")
58
+ logger.warning(f"Not scaling in executor {executor.label} because it is in bad state")
parsl/jobs/strategy.py CHANGED
@@ -5,8 +5,6 @@ import math
5
5
  import warnings
6
6
  from typing import Dict, List, Optional, Sequence, TypedDict
7
7
 
8
- import parsl.jobs.job_status_poller as jsp
9
-
10
8
  from parsl.executors import HighThroughputExecutor
11
9
  from parsl.executors.base import ParslExecutor
12
10
  from parsl.executors.status_handling import BlockProviderExecutor
@@ -150,22 +148,21 @@ class Strategy:
150
148
  for executor in executors:
151
149
  self.executors[executor.label] = {'idle_since': None, 'first': True}
152
150
 
153
- def _strategy_init_only(self, executor_facades: List[jsp.PolledExecutorFacade]) -> None:
151
+ def _strategy_init_only(self, executors: List[BlockProviderExecutor]) -> None:
154
152
  """Scale up to init_blocks at the start, then nothing more.
155
153
  """
156
- for ef in executor_facades:
157
- executor = ef.executor
154
+ for executor in executors:
158
155
  if self.executors[executor.label]['first']:
159
156
  logger.debug(f"strategy_init_only: scaling out {executor.provider.init_blocks} initial blocks for {executor.label}")
160
- ef.scale_out(executor.provider.init_blocks)
157
+ executor.scale_out_facade(executor.provider.init_blocks)
161
158
  self.executors[executor.label]['first'] = False
162
159
  else:
163
160
  logger.debug("strategy_init_only: doing nothing")
164
161
 
165
- def _strategy_simple(self, executor_facades: List[jsp.PolledExecutorFacade]) -> None:
166
- self._general_strategy(executor_facades, strategy_type='simple')
162
+ def _strategy_simple(self, executors: List[BlockProviderExecutor]) -> None:
163
+ self._general_strategy(executors, strategy_type='simple')
167
164
 
168
- def _strategy_htex_auto_scale(self, executor_facades: List[jsp.PolledExecutorFacade]) -> None:
165
+ def _strategy_htex_auto_scale(self, executors: List[BlockProviderExecutor]) -> None:
169
166
  """HTEX specific auto scaling strategy
170
167
 
171
168
  This strategy works only for HTEX. This strategy will scale out by
@@ -180,30 +177,25 @@ class Strategy:
180
177
  expected to scale in effectively only when # of workers, or tasks executing
181
178
  per block is close to 1.
182
179
  """
183
- self._general_strategy(executor_facades, strategy_type='htex')
180
+ self._general_strategy(executors, strategy_type='htex')
184
181
 
185
182
  @wrap_with_logs
186
- def _general_strategy(self, executor_facades, *, strategy_type):
187
- logger.debug(f"general strategy starting with strategy_type {strategy_type} for {len(executor_facades)} executors")
183
+ def _general_strategy(self, executors: List[BlockProviderExecutor], *, strategy_type: str) -> None:
184
+ logger.debug(f"general strategy starting with strategy_type {strategy_type} for {len(executors)} executors")
188
185
 
189
- for ef in executor_facades:
190
- executor = ef.executor
186
+ for executor in executors:
191
187
  label = executor.label
192
- if not isinstance(executor, BlockProviderExecutor):
193
- logger.debug(f"Not strategizing for executor {label} because scaling not enabled")
194
- continue
195
188
  logger.debug(f"Strategizing for executor {label}")
196
189
 
197
190
  if self.executors[label]['first']:
198
- executor = ef.executor
199
191
  logger.debug(f"Scaling out {executor.provider.init_blocks} initial blocks for {label}")
200
- ef.scale_out(executor.provider.init_blocks)
192
+ executor.scale_out_facade(executor.provider.init_blocks)
201
193
  self.executors[label]['first'] = False
202
194
 
203
195
  # Tasks that are either pending completion
204
196
  active_tasks = executor.outstanding
205
197
 
206
- status = ef.status
198
+ status = executor.status_facade
207
199
 
208
200
  # FIXME we need to handle case where provider does not define these
209
201
  # FIXME probably more of this logic should be moved to the provider
@@ -247,23 +239,26 @@ class Strategy:
247
239
  else:
248
240
  # We want to make sure that max_idletime is reached
249
241
  # before killing off resources
250
- logger.debug(f"Strategy case 1b: Executor has no active tasks, and more ({active_blocks}) than minimum blocks ({min_blocks})")
242
+ logger.debug(f"Strategy case 1b: Executor has no active tasks, and more ({active_blocks})"
243
+ f" than minimum blocks ({min_blocks})")
251
244
 
252
245
  if not self.executors[executor.label]['idle_since']:
253
246
  logger.debug(f"Starting idle timer for executor. If idle time exceeds {self.max_idletime}s, blocks will be scaled in")
254
247
  self.executors[executor.label]['idle_since'] = time.time()
255
-
256
248
  idle_since = self.executors[executor.label]['idle_since']
249
+ assert idle_since is not None, "The `if` statement above this assert should have forced idle time to be not-None"
250
+
257
251
  idle_duration = time.time() - idle_since
258
252
  if idle_duration > self.max_idletime:
259
253
  # We have resources idle for the max duration,
260
254
  # we have to scale_in now.
261
255
  logger.debug(f"Idle time has reached {self.max_idletime}s for executor {label}; scaling in")
262
- ef.scale_in(active_blocks - min_blocks)
256
+ executor.scale_in_facade(active_blocks - min_blocks)
263
257
 
264
258
  else:
265
259
  logger.debug(
266
- f"Idle time {idle_duration}s is less than max_idletime {self.max_idletime}s for executor {label}; not scaling in")
260
+ f"Idle time {idle_duration}s is less than max_idletime {self.max_idletime}s"
261
+ f" for executor {label}; not scaling in")
267
262
 
268
263
  # Case 2
269
264
  # More tasks than the available slots.
@@ -282,7 +277,7 @@ class Strategy:
282
277
  excess_blocks = math.ceil(float(excess_slots) / (tasks_per_node * nodes_per_block))
283
278
  excess_blocks = min(excess_blocks, max_blocks - active_blocks)
284
279
  logger.debug(f"Requesting {excess_blocks} more blocks")
285
- ef.scale_out(excess_blocks)
280
+ executor.scale_out_facade(excess_blocks)
286
281
 
287
282
  elif active_slots == 0 and active_tasks > 0:
288
283
  logger.debug("Strategy case 4a: No active slots but some active tasks - could scale out by a single block")
@@ -291,7 +286,7 @@ class Strategy:
291
286
  if active_blocks < max_blocks:
292
287
  logger.debug("Requesting single block")
293
288
 
294
- ef.scale_out(1)
289
+ executor.scale_out_facade(1)
295
290
  else:
296
291
  logger.debug("Not requesting single block, because at maxblocks already")
297
292
 
@@ -307,7 +302,7 @@ class Strategy:
307
302
  excess_blocks = math.ceil(float(excess_slots) / (tasks_per_node * nodes_per_block))
308
303
  excess_blocks = min(excess_blocks, active_blocks - min_blocks)
309
304
  logger.debug(f"Requesting scaling in by {excess_blocks} blocks with idle time {self.max_idletime}s")
310
- ef.scale_in(excess_blocks, max_idletime=self.max_idletime)
305
+ executor.scale_in_facade(excess_blocks, max_idletime=self.max_idletime)
311
306
  else:
312
307
  logger.error("This strategy does not support scaling in except for HighThroughputExecutor - taking no action")
313
308
  else:
@@ -3,13 +3,14 @@ from __future__ import annotations
3
3
  import os
4
4
  import time
5
5
  import logging
6
+ import multiprocessing.synchronize as ms
6
7
  import typeguard
7
- import zmq
8
8
 
9
9
  import queue
10
10
 
11
11
  from parsl.multiprocessing import ForkProcess, SizedQueue
12
12
  from multiprocessing import Process
13
+ from multiprocessing import Event
13
14
  from multiprocessing.queues import Queue
14
15
  from parsl.log_utils import set_file_logger
15
16
  from parsl.utils import RepresentationMixin
@@ -18,6 +19,7 @@ from parsl.utils import setproctitle
18
19
 
19
20
  from parsl.serialize import deserialize
20
21
 
22
+ from parsl.monitoring.radios import MultiprocessingQueueRadio
21
23
  from parsl.monitoring.router import router_starter
22
24
  from parsl.monitoring.message_type import MessageType
23
25
  from parsl.monitoring.types import AddressedMonitoringMessage
@@ -90,12 +92,6 @@ class MonitoringHub(RepresentationMixin):
90
92
  Default: 30 seconds
91
93
  """
92
94
 
93
- # Any is used to disable typechecking on uses of _dfk_channel,
94
- # because it is used in the code as if it points to a channel, but
95
- # the static type is that it can also be None. The code relies on
96
- # .start() being called and initialising this to a real channel.
97
- self._dfk_channel = None # type: Any
98
-
99
95
  if _db_manager_excepts:
100
96
  raise _db_manager_excepts
101
97
 
@@ -157,8 +153,12 @@ class MonitoringHub(RepresentationMixin):
157
153
  self.block_msgs: Queue[AddressedMonitoringMessage]
158
154
  self.block_msgs = SizedQueue()
159
155
 
156
+ self.router_exit_event: ms.Event
157
+ self.router_exit_event = Event()
158
+
160
159
  self.router_proc = ForkProcess(target=router_starter,
161
- args=(comm_q, self.exception_q, self.priority_msgs, self.node_msgs, self.block_msgs, self.resource_msgs),
160
+ args=(comm_q, self.exception_q, self.priority_msgs, self.node_msgs,
161
+ self.block_msgs, self.resource_msgs, self.router_exit_event),
162
162
  kwargs={"hub_address": self.hub_address,
163
163
  "udp_port": self.hub_port,
164
164
  "zmq_port_range": self.hub_port_range,
@@ -191,6 +191,8 @@ class MonitoringHub(RepresentationMixin):
191
191
  self.filesystem_proc.start()
192
192
  logger.info(f"Started filesystem radio receiver process {self.filesystem_proc.pid}")
193
193
 
194
+ self.radio = MultiprocessingQueueRadio(self.block_msgs)
195
+
194
196
  try:
195
197
  comm_q_result = comm_q.get(block=True, timeout=120)
196
198
  except queue.Empty:
@@ -205,14 +207,6 @@ class MonitoringHub(RepresentationMixin):
205
207
 
206
208
  self.monitoring_hub_url = "udp://{}:{}".format(self.hub_address, udp_port)
207
209
 
208
- context = zmq.Context()
209
- self.dfk_channel_timeout = 10000 # in milliseconds
210
- self._dfk_channel = context.socket(zmq.DEALER)
211
- self._dfk_channel.setsockopt(zmq.LINGER, 0)
212
- self._dfk_channel.set_hwm(0)
213
- self._dfk_channel.setsockopt(zmq.SNDTIMEO, self.dfk_channel_timeout)
214
- self._dfk_channel.connect("tcp://{}:{}".format(self.hub_address, zmq_port))
215
-
216
210
  logger.info("Monitoring Hub initialized")
217
211
 
218
212
  return zmq_port
@@ -220,11 +214,7 @@ class MonitoringHub(RepresentationMixin):
220
214
  # TODO: tighten the Any message format
221
215
  def send(self, mtype: MessageType, message: Any) -> None:
222
216
  logger.debug("Sending message type {}".format(mtype))
223
- try:
224
- self._dfk_channel.send_pyobj((mtype, message))
225
- except zmq.Again:
226
- logger.exception(
227
- "The monitoring message sent from DFK to router timed-out after {}ms".format(self.dfk_channel_timeout))
217
+ self.radio.send((mtype, message))
228
218
 
229
219
  def close(self) -> None:
230
220
  logger.info("Terminating Monitoring Hub")
@@ -235,9 +225,8 @@ class MonitoringHub(RepresentationMixin):
235
225
  logger.error("There was a queued exception (Either router or DBM process got exception much earlier?)")
236
226
  except queue.Empty:
237
227
  break
238
- if self._dfk_channel and self.monitoring_hub_active:
228
+ if self.monitoring_hub_active:
239
229
  self.monitoring_hub_active = False
240
- self._dfk_channel.close()
241
230
  if exception_msgs:
242
231
  for exception_msg in exception_msgs:
243
232
  logger.error(
@@ -249,6 +238,8 @@ class MonitoringHub(RepresentationMixin):
249
238
  self.router_proc.terminate()
250
239
  self.dbm_proc.terminate()
251
240
  self.filesystem_proc.terminate()
241
+ logger.info("Setting router termination event")
242
+ self.router_exit_event.set()
252
243
  logger.info("Waiting for router to terminate")
253
244
  self.router_proc.join()
254
245
  logger.debug("Finished waiting for router termination")
@@ -6,6 +6,7 @@ import logging
6
6
 
7
7
  from abc import ABCMeta, abstractmethod
8
8
 
9
+ from multiprocessing.queues import Queue
9
10
  from typing import Optional
10
11
 
11
12
  from parsl.serialize import serialize
@@ -173,3 +174,17 @@ class UDPRadio(MonitoringRadio):
173
174
  logging.error("Could not send message within timeout limit")
174
175
  return
175
176
  return
177
+
178
+
179
+ class MultiprocessingQueueRadio(MonitoringRadio):
180
+ """A monitoring radio intended which connects over a multiprocessing Queue.
181
+ This radio is intended to be used on the submit side, where components
182
+ in the submit process, or processes launched by multiprocessing, will have
183
+ access to a Queue shared with the monitoring database code (bypassing the
184
+ monitoring router).
185
+ """
186
+ def __init__(self, queue: Queue) -> None:
187
+ self.queue = queue
188
+
189
+ def send(self, message: object) -> None:
190
+ self.queue.put((message, 0))
@@ -15,6 +15,8 @@ from parsl.utils import setproctitle
15
15
 
16
16
  from parsl.monitoring.message_type import MessageType
17
17
  from parsl.monitoring.types import AddressedMonitoringMessage, TaggedMonitoringMessage
18
+
19
+ from multiprocessing.synchronize import Event
18
20
  from typing import Optional, Tuple, Union
19
21
 
20
22
 
@@ -98,10 +100,10 @@ class MonitoringRouter:
98
100
  priority_msgs: "queue.Queue[AddressedMonitoringMessage]",
99
101
  node_msgs: "queue.Queue[AddressedMonitoringMessage]",
100
102
  block_msgs: "queue.Queue[AddressedMonitoringMessage]",
101
- resource_msgs: "queue.Queue[AddressedMonitoringMessage]") -> None:
103
+ resource_msgs: "queue.Queue[AddressedMonitoringMessage]",
104
+ exit_event: Event) -> None:
102
105
  try:
103
- router_keep_going = True
104
- while router_keep_going:
106
+ while not exit_event.is_set():
105
107
  try:
106
108
  data, addr = self.udp_sock.recvfrom(2048)
107
109
  resource_msg = pickle.loads(data)
@@ -135,8 +137,6 @@ class MonitoringRouter:
135
137
  priority_msgs.put(msg_0)
136
138
  elif msg[0] == MessageType.WORKFLOW_INFO:
137
139
  priority_msgs.put(msg_0)
138
- if 'exit_now' in msg[1] and msg[1]['exit_now']:
139
- router_keep_going = False
140
140
  else:
141
141
  # There is a type: ignore here because if msg[0]
142
142
  # is of the correct type, this code is unreachable,
@@ -178,6 +178,7 @@ def router_starter(comm_q: "queue.Queue[Union[Tuple[int, int], str]]",
178
178
  node_msgs: "queue.Queue[AddressedMonitoringMessage]",
179
179
  block_msgs: "queue.Queue[AddressedMonitoringMessage]",
180
180
  resource_msgs: "queue.Queue[AddressedMonitoringMessage]",
181
+ exit_event: Event,
181
182
 
182
183
  hub_address: str,
183
184
  udp_port: Optional[int],
@@ -202,7 +203,7 @@ def router_starter(comm_q: "queue.Queue[Union[Tuple[int, int], str]]",
202
203
 
203
204
  router.logger.info("Starting MonitoringRouter in router_starter")
204
205
  try:
205
- router.start(priority_msgs, node_msgs, block_msgs, resource_msgs)
206
+ router.start(priority_msgs, node_msgs, block_msgs, resource_msgs, exit_event)
206
207
  except Exception as e:
207
208
  router.logger.exception("router.start exception")
208
209
  exception_q.put(('Hub', str(e)))
@@ -266,7 +266,7 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
266
266
  for job in job_ids:
267
267
  job_dict = self.resources[job]
268
268
  job_dict['cancelled'] = True
269
- logger.debug("Terminating job/proc_id: {0}".format(job))
269
+ logger.debug("Terminating job/process ID: {0}".format(job))
270
270
  cmd = "kill -- -$(ps -o pgid= {} | grep -o '[0-9]*')".format(job_dict['remote_pid'])
271
271
  retcode, stdout, stderr = self.channel.execute_wait(cmd, self.cmd_timeout)
272
272
  if retcode != 0:
@@ -31,6 +31,7 @@ from parsl.executors import HighThroughputExecutor
31
31
  from parsl.data_provider.http import HTTPInTaskStaging
32
32
  from parsl.data_provider.ftp import FTPInTaskStaging
33
33
  from parsl.data_provider.file_noop import NoOpFileStaging
34
+ from parsl.data_provider.zip import ZipFileStaging
34
35
 
35
36
  working_dir = os.getcwd() + "/" + "test_htex_alternate"
36
37
 
@@ -42,7 +43,7 @@ def fresh_config():
42
43
  address="127.0.0.1",
43
44
  label="htex_Local",
44
45
  working_dir=working_dir,
45
- storage_access=[FTPInTaskStaging(), HTTPInTaskStaging(), NoOpFileStaging()],
46
+ storage_access=[ZipFileStaging(), FTPInTaskStaging(), HTTPInTaskStaging(), NoOpFileStaging()],
46
47
  worker_debug=True,
47
48
  cores_per_worker=1,
48
49
  heartbeat_period=2,
@@ -9,5 +9,4 @@ from parsl.data_provider.file_noop import NoOpFileStaging
9
9
 
10
10
  def fresh_config():
11
11
  return Config(executors=[TaskVineExecutor(manager_config=TaskVineManagerConfig(port=9000),
12
- worker_launch_method='factory',
13
- storage_access=[FTPInTaskStaging(), HTTPInTaskStaging(), NoOpFileStaging()])])
12
+ worker_launch_method='factory')])
@@ -8,5 +8,4 @@ from parsl.data_provider.file_noop import NoOpFileStaging
8
8
 
9
9
  def fresh_config():
10
10
  return Config(executors=[WorkQueueExecutor(port=9000,
11
- coprocess=True,
12
- storage_access=[FTPInTaskStaging(), HTTPInTaskStaging(), NoOpFileStaging()])])
11
+ coprocess=True)])
parsl/tests/conftest.py CHANGED
@@ -135,28 +135,27 @@ def pytest_configure(config):
135
135
  )
136
136
  config.addinivalue_line(
137
137
  'markers',
138
- 'noci: mark test to be unsuitable for running during automated tests'
138
+ 'cleannet: Enable tests that require a clean network connection (such as for testing FTP)'
139
139
  )
140
-
141
140
  config.addinivalue_line(
142
141
  'markers',
143
- 'cleannet: Enable tests that require a clean network connection (such as for testing FTP)'
142
+ 'staging_required: Marks tests that require a staging provider, when there is no sharedFS)'
144
143
  )
145
144
  config.addinivalue_line(
146
145
  'markers',
147
- 'issue363: Marks tests that require a shared filesystem for stdout/stderr - see issue #363'
146
+ 'sshd_required: Marks tests that require a SSHD'
148
147
  )
149
148
  config.addinivalue_line(
150
149
  'markers',
151
- 'staging_required: Marks tests that require a staging provider, when there is no sharedFS)'
150
+ 'multiple_cores_required: Marks tests that require multiple cores, such as htex affinity'
152
151
  )
153
152
  config.addinivalue_line(
154
153
  'markers',
155
- 'sshd_required: Marks tests that require a SSHD'
154
+ 'issue3328: Marks tests broken by issue #3328'
156
155
  )
157
156
  config.addinivalue_line(
158
157
  'markers',
159
- 'multiple_cores_required: Marks tests that require multiple cores, such as htex affinity'
158
+ 'executor_supports_std_stream_tuples: Marks tests that require tuple support for stdout/stderr'
160
159
  )
161
160
 
162
161
 
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  import os
2
3
  import random
3
4
  import re
@@ -23,7 +24,6 @@ def foo(x, y, z=10, stdout=None, label=None):
23
24
  return f"echo {x} {y} {z}"
24
25
 
25
26
 
26
- @pytest.mark.issue363
27
27
  def test_command_format_1(tmpd_cwd):
28
28
  """Testing command format for BashApps"""
29
29
 
@@ -38,8 +38,7 @@ def test_command_format_1(tmpd_cwd):
38
38
  assert so_content == "1 4 10"
39
39
 
40
40
 
41
- @pytest.mark.issue363
42
- def test_auto_log_filename_format():
41
+ def test_auto_log_filename_format(caplog):
43
42
  """Testing auto log filename format for BashApps
44
43
  """
45
44
  app_label = "label_test_auto_log_filename_format"
@@ -61,8 +60,10 @@ def test_auto_log_filename_format():
61
60
  assert contents == '1 {0} 10\n'.format(rand_int), \
62
61
  'Output does not match expected string "1 {0} 10", Got: "{1}"'.format(rand_int, contents)
63
62
 
63
+ for record in caplog.records:
64
+ assert record.levelno < logging.ERROR
65
+
64
66
 
65
- @pytest.mark.issue363
66
67
  def test_parallel_for(tmpd_cwd, n=3):
67
68
  """Testing a simple parallel for loop"""
68
69
  outdir = tmpd_cwd / "outputs/test_parallel"
@@ -76,7 +76,6 @@ def test_div_0(test_fn=div_0):
76
76
  os.remove('std.out')
77
77
 
78
78
 
79
- @pytest.mark.issue363
80
79
  def test_bash_misuse(test_fn=bash_misuse):
81
80
  err_code = test_matrix[test_fn]['exit_code']
82
81
  f = test_fn()
@@ -91,7 +90,6 @@ def test_bash_misuse(test_fn=bash_misuse):
91
90
  os.remove('std.out')
92
91
 
93
92
 
94
- @pytest.mark.issue363
95
93
  def test_command_not_found(test_fn=command_not_found):
96
94
  err_code = test_matrix[test_fn]['exit_code']
97
95
  f = test_fn()
@@ -108,7 +106,6 @@ def test_command_not_found(test_fn=command_not_found):
108
106
  return True
109
107
 
110
108
 
111
- @pytest.mark.issue363
112
109
  def test_not_executable(test_fn=not_executable):
113
110
  err_code = test_matrix[test_fn]['exit_code']
114
111
  f = test_fn()
@@ -8,7 +8,6 @@ def foo(z=2, stdout=None):
8
8
  return f"echo {z}"
9
9
 
10
10
 
11
- @pytest.mark.issue363
12
11
  def test_command_format_1(tmpd_cwd):
13
12
  """Testing command format for BashApps
14
13
  """
@@ -12,7 +12,6 @@ def fail_on_presence(outputs=()):
12
12
  # This test is an oddity that requires a shared-FS and simply
13
13
  # won't work if there's a staging provider.
14
14
  # @pytest.mark.sharedFS_required
15
- @pytest.mark.issue363
16
15
  def test_bash_memoization(tmpd_cwd, n=2):
17
16
  """Testing bash memoization
18
17
  """
@@ -33,7 +32,6 @@ def fail_on_presence_kw(outputs=(), foo=None):
33
32
  # This test is an oddity that requires a shared-FS and simply
34
33
  # won't work if there's a staging provider.
35
34
  # @pytest.mark.sharedFS_required
36
- @pytest.mark.issue363
37
35
  def test_bash_memoization_keywords(tmpd_cwd, n=2):
38
36
  """Testing bash memoization
39
37
  """
@@ -22,7 +22,6 @@ def no_checkpoint_stdout_app_ignore_args(stdout=None):
22
22
  return "echo X"
23
23
 
24
24
 
25
- @pytest.mark.issue363
26
25
  def test_memo_stdout():
27
26
 
28
27
  # this should run and create a file named after path_x