parsl 2024.4.1__py3-none-any.whl → 2024.4.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. parsl/data_provider/data_manager.py +2 -1
  2. parsl/data_provider/zip.py +104 -0
  3. parsl/dataflow/dflow.py +57 -48
  4. parsl/dataflow/futures.py +0 -7
  5. parsl/executors/base.py +12 -9
  6. parsl/executors/high_throughput/executor.py +14 -19
  7. parsl/executors/high_throughput/process_worker_pool.py +3 -1
  8. parsl/executors/status_handling.py +82 -9
  9. parsl/executors/taskvine/executor.py +7 -2
  10. parsl/executors/workqueue/executor.py +8 -3
  11. parsl/jobs/job_status_poller.py +27 -107
  12. parsl/jobs/strategy.py +31 -32
  13. parsl/monitoring/monitoring.py +14 -23
  14. parsl/monitoring/radios.py +15 -0
  15. parsl/monitoring/remote.py +2 -1
  16. parsl/monitoring/router.py +7 -6
  17. parsl/providers/local/local.py +1 -1
  18. parsl/tests/configs/htex_local_alternate.py +2 -1
  19. parsl/tests/configs/taskvine_ex.py +1 -2
  20. parsl/tests/configs/workqueue_ex.py +1 -2
  21. parsl/tests/conftest.py +6 -7
  22. parsl/tests/test_bash_apps/test_basic.py +5 -4
  23. parsl/tests/test_bash_apps/test_error_codes.py +0 -3
  24. parsl/tests/test_bash_apps/test_kwarg_storage.py +0 -1
  25. parsl/tests/test_bash_apps/test_memoize.py +0 -2
  26. parsl/tests/test_bash_apps/test_memoize_ignore_args.py +0 -1
  27. parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py +0 -1
  28. parsl/tests/test_bash_apps/test_multiline.py +0 -1
  29. parsl/tests/test_bash_apps/test_stdout.py +11 -6
  30. parsl/tests/test_monitoring/test_basic.py +46 -21
  31. parsl/tests/test_monitoring/test_fuzz_zmq.py +10 -1
  32. parsl/tests/test_python_apps/test_outputs.py +0 -1
  33. parsl/tests/test_scaling/test_scale_down_htex_unregistered.py +74 -0
  34. parsl/tests/test_staging/test_zip_out.py +113 -0
  35. parsl/version.py +1 -1
  36. {parsl-2024.4.1.data → parsl-2024.4.15.data}/scripts/process_worker_pool.py +3 -1
  37. {parsl-2024.4.1.dist-info → parsl-2024.4.15.dist-info}/METADATA +3 -2
  38. {parsl-2024.4.1.dist-info → parsl-2024.4.15.dist-info}/RECORD +44 -41
  39. {parsl-2024.4.1.data → parsl-2024.4.15.data}/scripts/exec_parsl_function.py +0 -0
  40. {parsl-2024.4.1.data → parsl-2024.4.15.data}/scripts/parsl_coprocess.py +0 -0
  41. {parsl-2024.4.1.dist-info → parsl-2024.4.15.dist-info}/LICENSE +0 -0
  42. {parsl-2024.4.1.dist-info → parsl-2024.4.15.dist-info}/WHEEL +0 -0
  43. {parsl-2024.4.1.dist-info → parsl-2024.4.15.dist-info}/entry_points.txt +0 -0
  44. {parsl-2024.4.1.dist-info → parsl-2024.4.15.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,9 @@
1
1
  import logging
2
2
  import parsl
3
- import time
4
- import zmq
5
- from typing import Dict, List, Sequence, Optional, Union
3
+ from typing import List, Sequence, Optional, Union
6
4
 
7
- from parsl.jobs.states import JobStatus, JobState
8
5
  from parsl.jobs.strategy import Strategy
9
6
  from parsl.executors.status_handling import BlockProviderExecutor
10
- from parsl.monitoring.message_type import MessageType
11
7
 
12
8
 
13
9
  from parsl.utils import Timer
@@ -16,123 +12,47 @@ from parsl.utils import Timer
16
12
  logger = logging.getLogger(__name__)
17
13
 
18
14
 
19
- class PolledExecutorFacade:
20
- def __init__(self, executor: BlockProviderExecutor, dfk: Optional["parsl.dataflow.dflow.DataFlowKernel"] = None):
21
- self._executor = executor
22
- self._dfk = dfk
23
- self._interval = executor.status_polling_interval
24
- self._last_poll_time = 0.0
25
- self._status = {} # type: Dict[str, JobStatus]
26
- self.first = True
27
-
28
- # Create a ZMQ channel to send poll status to monitoring
29
- self.monitoring_enabled = False
30
- if self._dfk and self._dfk.monitoring is not None:
31
- self.monitoring_enabled = True
32
- hub_address = self._dfk.hub_address
33
- hub_port = self._dfk.hub_zmq_port
34
- context = zmq.Context()
35
- self.hub_channel = context.socket(zmq.DEALER)
36
- self.hub_channel.set_hwm(0)
37
- self.hub_channel.connect("tcp://{}:{}".format(hub_address, hub_port))
38
- logger.info("Monitoring enabled on job status poller")
39
-
40
- def _should_poll(self, now: float) -> bool:
41
- return now >= self._last_poll_time + self._interval
42
-
43
- def poll(self, now: float) -> None:
44
- if self._should_poll(now):
45
- previous_status = self._status
46
- self._status = self._executor.status()
47
- self._last_poll_time = now
48
- delta_status = {}
49
- for block_id in self._status:
50
- if block_id not in previous_status \
51
- or previous_status[block_id].state != self._status[block_id].state:
52
- delta_status[block_id] = self._status[block_id]
53
-
54
- if delta_status:
55
- self.send_monitoring_info(delta_status)
56
-
57
- def send_monitoring_info(self, status: Dict) -> None:
58
- # Send monitoring info for HTEX when monitoring enabled
59
- if self.monitoring_enabled:
60
- msg = self._executor.create_monitoring_info(status)
61
- logger.debug("Sending message {} to hub from job status poller".format(msg))
62
- self.hub_channel.send_pyobj((MessageType.BLOCK_INFO, msg))
63
-
64
- @property
65
- def status(self) -> Dict[str, JobStatus]:
66
- """Return the status of all jobs/blocks of the executor of this poller.
67
-
68
- :return: a dictionary mapping block ids (in string) to job status
69
- """
70
- return self._status
71
-
72
- @property
73
- def executor(self) -> BlockProviderExecutor:
74
- return self._executor
75
-
76
- def scale_in(self, n: int, max_idletime: Optional[float] = None) -> List[str]:
77
-
78
- if max_idletime is None:
79
- block_ids = self._executor.scale_in(n)
80
- else:
81
- # This is a HighThroughputExecutor-specific interface violation.
82
- # This code hopes, through pan-codebase reasoning, that this
83
- # scale_in method really does come from HighThroughputExecutor,
84
- # and so does have an extra max_idletime parameter not present
85
- # in the executor interface.
86
- block_ids = self._executor.scale_in(n, max_idletime=max_idletime) # type: ignore[call-arg]
87
- if block_ids is not None:
88
- new_status = {}
89
- for block_id in block_ids:
90
- new_status[block_id] = JobStatus(JobState.CANCELLED)
91
- del self._status[block_id]
92
- self.send_monitoring_info(new_status)
93
- return block_ids
94
-
95
- def scale_out(self, n: int) -> List[str]:
96
- block_ids = self._executor.scale_out(n)
97
- if block_ids is not None:
98
- new_status = {}
99
- for block_id in block_ids:
100
- new_status[block_id] = JobStatus(JobState.PENDING)
101
- self.send_monitoring_info(new_status)
102
- self._status.update(new_status)
103
- return block_ids
104
-
105
- def __repr__(self) -> str:
106
- return self._status.__repr__()
107
-
108
-
109
15
  class JobStatusPoller(Timer):
110
16
  def __init__(self, *, strategy: Optional[str], max_idletime: float,
111
17
  strategy_period: Union[float, int],
112
- dfk: Optional["parsl.dataflow.dflow.DataFlowKernel"] = None) -> None:
113
- self._executor_facades = [] # type: List[PolledExecutorFacade]
114
- self.dfk = dfk
18
+ monitoring: Optional["parsl.monitoring.radios.MonitoringRadio"] = None) -> None:
19
+ self._executors = [] # type: List[BlockProviderExecutor]
115
20
  self._strategy = Strategy(strategy=strategy,
116
21
  max_idletime=max_idletime)
117
22
  super().__init__(self.poll, interval=strategy_period, name="JobStatusPoller")
118
23
 
119
24
  def poll(self) -> None:
120
25
  self._update_state()
121
- self._run_error_handlers(self._executor_facades)
122
- self._strategy.strategize(self._executor_facades)
26
+ self._run_error_handlers(self._executors)
27
+ self._strategy.strategize(self._executors)
123
28
 
124
- def _run_error_handlers(self, status: List[PolledExecutorFacade]) -> None:
125
- for es in status:
126
- es.executor.handle_errors(es.status)
29
+ def _run_error_handlers(self, executors: List[BlockProviderExecutor]) -> None:
30
+ for e in executors:
31
+ e.handle_errors(e.status_facade)
127
32
 
128
33
  def _update_state(self) -> None:
129
- now = time.time()
130
- for item in self._executor_facades:
131
- item.poll(now)
34
+ for item in self._executors:
35
+ item.poll_facade()
132
36
 
133
37
  def add_executors(self, executors: Sequence[BlockProviderExecutor]) -> None:
134
38
  for executor in executors:
135
39
  if executor.status_polling_interval > 0:
136
40
  logger.debug("Adding executor {}".format(executor.label))
137
- self._executor_facades.append(PolledExecutorFacade(executor, self.dfk))
41
+ self._executors.append(executor)
138
42
  self._strategy.add_executors(executors)
43
+
44
+ def close(self, timeout: Optional[float] = None) -> None:
45
+ super().close(timeout)
46
+ for executor in self._executors:
47
+ if not executor.bad_state_is_set:
48
+ logger.info(f"Scaling in executor {executor.label}")
49
+
50
+ # this code needs to be at least as many blocks as need
51
+ # cancelling, but it is safe to be more, as the scaling
52
+ # code will cope with being asked to cancel more blocks
53
+ # than exist.
54
+ block_count = len(executor.status_facade)
55
+ executor.scale_in_facade(block_count)
56
+
57
+ else: # and bad_state_is_set
58
+ logger.warning(f"Not scaling in executor {executor.label} because it is in bad state")
parsl/jobs/strategy.py CHANGED
@@ -5,8 +5,6 @@ import math
5
5
  import warnings
6
6
  from typing import Dict, List, Optional, Sequence, TypedDict
7
7
 
8
- import parsl.jobs.job_status_poller as jsp
9
-
10
8
  from parsl.executors import HighThroughputExecutor
11
9
  from parsl.executors.base import ParslExecutor
12
10
  from parsl.executors.status_handling import BlockProviderExecutor
@@ -26,6 +24,10 @@ class ExecutorState(TypedDict):
26
24
  If the executor is not idle, then None.
27
25
  """
28
26
 
27
+ first: bool
28
+ """True if this executor has not yet had a strategy poll.
29
+ """
30
+
29
31
 
30
32
  class Strategy:
31
33
  """Scaling strategy.
@@ -144,24 +146,23 @@ class Strategy:
144
146
 
145
147
  def add_executors(self, executors: Sequence[ParslExecutor]) -> None:
146
148
  for executor in executors:
147
- self.executors[executor.label] = {'idle_since': None}
149
+ self.executors[executor.label] = {'idle_since': None, 'first': True}
148
150
 
149
- def _strategy_init_only(self, executor_facades: List[jsp.PolledExecutorFacade]) -> None:
151
+ def _strategy_init_only(self, executors: List[BlockProviderExecutor]) -> None:
150
152
  """Scale up to init_blocks at the start, then nothing more.
151
153
  """
152
- for ef in executor_facades:
153
- if ef.first:
154
- executor = ef.executor
154
+ for executor in executors:
155
+ if self.executors[executor.label]['first']:
155
156
  logger.debug(f"strategy_init_only: scaling out {executor.provider.init_blocks} initial blocks for {executor.label}")
156
- ef.scale_out(executor.provider.init_blocks)
157
- ef.first = False
157
+ executor.scale_out_facade(executor.provider.init_blocks)
158
+ self.executors[executor.label]['first'] = False
158
159
  else:
159
160
  logger.debug("strategy_init_only: doing nothing")
160
161
 
161
- def _strategy_simple(self, executor_facades: List[jsp.PolledExecutorFacade]) -> None:
162
- self._general_strategy(executor_facades, strategy_type='simple')
162
+ def _strategy_simple(self, executors: List[BlockProviderExecutor]) -> None:
163
+ self._general_strategy(executors, strategy_type='simple')
163
164
 
164
- def _strategy_htex_auto_scale(self, executor_facades: List[jsp.PolledExecutorFacade]) -> None:
165
+ def _strategy_htex_auto_scale(self, executors: List[BlockProviderExecutor]) -> None:
165
166
  """HTEX specific auto scaling strategy
166
167
 
167
168
  This strategy works only for HTEX. This strategy will scale out by
@@ -176,30 +177,25 @@ class Strategy:
176
177
  expected to scale in effectively only when # of workers, or tasks executing
177
178
  per block is close to 1.
178
179
  """
179
- self._general_strategy(executor_facades, strategy_type='htex')
180
+ self._general_strategy(executors, strategy_type='htex')
180
181
 
181
182
  @wrap_with_logs
182
- def _general_strategy(self, executor_facades, *, strategy_type):
183
- logger.debug(f"general strategy starting with strategy_type {strategy_type} for {len(executor_facades)} executors")
183
+ def _general_strategy(self, executors: List[BlockProviderExecutor], *, strategy_type: str) -> None:
184
+ logger.debug(f"general strategy starting with strategy_type {strategy_type} for {len(executors)} executors")
184
185
 
185
- for ef in executor_facades:
186
- executor = ef.executor
186
+ for executor in executors:
187
187
  label = executor.label
188
- if not isinstance(executor, BlockProviderExecutor):
189
- logger.debug(f"Not strategizing for executor {label} because scaling not enabled")
190
- continue
191
188
  logger.debug(f"Strategizing for executor {label}")
192
189
 
193
- if ef.first:
194
- executor = ef.executor
190
+ if self.executors[label]['first']:
195
191
  logger.debug(f"Scaling out {executor.provider.init_blocks} initial blocks for {label}")
196
- ef.scale_out(executor.provider.init_blocks)
197
- ef.first = False
192
+ executor.scale_out_facade(executor.provider.init_blocks)
193
+ self.executors[label]['first'] = False
198
194
 
199
195
  # Tasks that are either pending completion
200
196
  active_tasks = executor.outstanding
201
197
 
202
- status = ef.status
198
+ status = executor.status_facade
203
199
 
204
200
  # FIXME we need to handle case where provider does not define these
205
201
  # FIXME probably more of this logic should be moved to the provider
@@ -243,23 +239,26 @@ class Strategy:
243
239
  else:
244
240
  # We want to make sure that max_idletime is reached
245
241
  # before killing off resources
246
- logger.debug(f"Strategy case 1b: Executor has no active tasks, and more ({active_blocks}) than minimum blocks ({min_blocks})")
242
+ logger.debug(f"Strategy case 1b: Executor has no active tasks, and more ({active_blocks})"
243
+ f" than minimum blocks ({min_blocks})")
247
244
 
248
245
  if not self.executors[executor.label]['idle_since']:
249
246
  logger.debug(f"Starting idle timer for executor. If idle time exceeds {self.max_idletime}s, blocks will be scaled in")
250
247
  self.executors[executor.label]['idle_since'] = time.time()
251
-
252
248
  idle_since = self.executors[executor.label]['idle_since']
249
+ assert idle_since is not None, "The `if` statement above this assert should have forced idle time to be not-None"
250
+
253
251
  idle_duration = time.time() - idle_since
254
252
  if idle_duration > self.max_idletime:
255
253
  # We have resources idle for the max duration,
256
254
  # we have to scale_in now.
257
255
  logger.debug(f"Idle time has reached {self.max_idletime}s for executor {label}; scaling in")
258
- ef.scale_in(active_blocks - min_blocks)
256
+ executor.scale_in_facade(active_blocks - min_blocks)
259
257
 
260
258
  else:
261
259
  logger.debug(
262
- f"Idle time {idle_duration}s is less than max_idletime {self.max_idletime}s for executor {label}; not scaling in")
260
+ f"Idle time {idle_duration}s is less than max_idletime {self.max_idletime}s"
261
+ f" for executor {label}; not scaling in")
263
262
 
264
263
  # Case 2
265
264
  # More tasks than the available slots.
@@ -278,7 +277,7 @@ class Strategy:
278
277
  excess_blocks = math.ceil(float(excess_slots) / (tasks_per_node * nodes_per_block))
279
278
  excess_blocks = min(excess_blocks, max_blocks - active_blocks)
280
279
  logger.debug(f"Requesting {excess_blocks} more blocks")
281
- ef.scale_out(excess_blocks)
280
+ executor.scale_out_facade(excess_blocks)
282
281
 
283
282
  elif active_slots == 0 and active_tasks > 0:
284
283
  logger.debug("Strategy case 4a: No active slots but some active tasks - could scale out by a single block")
@@ -287,7 +286,7 @@ class Strategy:
287
286
  if active_blocks < max_blocks:
288
287
  logger.debug("Requesting single block")
289
288
 
290
- ef.scale_out(1)
289
+ executor.scale_out_facade(1)
291
290
  else:
292
291
  logger.debug("Not requesting single block, because at maxblocks already")
293
292
 
@@ -303,7 +302,7 @@ class Strategy:
303
302
  excess_blocks = math.ceil(float(excess_slots) / (tasks_per_node * nodes_per_block))
304
303
  excess_blocks = min(excess_blocks, active_blocks - min_blocks)
305
304
  logger.debug(f"Requesting scaling in by {excess_blocks} blocks with idle time {self.max_idletime}s")
306
- ef.scale_in(excess_blocks, max_idletime=self.max_idletime)
305
+ executor.scale_in_facade(excess_blocks, max_idletime=self.max_idletime)
307
306
  else:
308
307
  logger.error("This strategy does not support scaling in except for HighThroughputExecutor - taking no action")
309
308
  else:
@@ -3,13 +3,14 @@ from __future__ import annotations
3
3
  import os
4
4
  import time
5
5
  import logging
6
+ import multiprocessing.synchronize as ms
6
7
  import typeguard
7
- import zmq
8
8
 
9
9
  import queue
10
10
 
11
11
  from parsl.multiprocessing import ForkProcess, SizedQueue
12
12
  from multiprocessing import Process
13
+ from multiprocessing import Event
13
14
  from multiprocessing.queues import Queue
14
15
  from parsl.log_utils import set_file_logger
15
16
  from parsl.utils import RepresentationMixin
@@ -18,6 +19,7 @@ from parsl.utils import setproctitle
18
19
 
19
20
  from parsl.serialize import deserialize
20
21
 
22
+ from parsl.monitoring.radios import MultiprocessingQueueRadio
21
23
  from parsl.monitoring.router import router_starter
22
24
  from parsl.monitoring.message_type import MessageType
23
25
  from parsl.monitoring.types import AddressedMonitoringMessage
@@ -90,12 +92,6 @@ class MonitoringHub(RepresentationMixin):
90
92
  Default: 30 seconds
91
93
  """
92
94
 
93
- # Any is used to disable typechecking on uses of _dfk_channel,
94
- # because it is used in the code as if it points to a channel, but
95
- # the static type is that it can also be None. The code relies on
96
- # .start() being called and initialising this to a real channel.
97
- self._dfk_channel = None # type: Any
98
-
99
95
  if _db_manager_excepts:
100
96
  raise _db_manager_excepts
101
97
 
@@ -157,8 +153,12 @@ class MonitoringHub(RepresentationMixin):
157
153
  self.block_msgs: Queue[AddressedMonitoringMessage]
158
154
  self.block_msgs = SizedQueue()
159
155
 
156
+ self.router_exit_event: ms.Event
157
+ self.router_exit_event = Event()
158
+
160
159
  self.router_proc = ForkProcess(target=router_starter,
161
- args=(comm_q, self.exception_q, self.priority_msgs, self.node_msgs, self.block_msgs, self.resource_msgs),
160
+ args=(comm_q, self.exception_q, self.priority_msgs, self.node_msgs,
161
+ self.block_msgs, self.resource_msgs, self.router_exit_event),
162
162
  kwargs={"hub_address": self.hub_address,
163
163
  "udp_port": self.hub_port,
164
164
  "zmq_port_range": self.hub_port_range,
@@ -191,6 +191,8 @@ class MonitoringHub(RepresentationMixin):
191
191
  self.filesystem_proc.start()
192
192
  logger.info(f"Started filesystem radio receiver process {self.filesystem_proc.pid}")
193
193
 
194
+ self.radio = MultiprocessingQueueRadio(self.block_msgs)
195
+
194
196
  try:
195
197
  comm_q_result = comm_q.get(block=True, timeout=120)
196
198
  except queue.Empty:
@@ -205,14 +207,6 @@ class MonitoringHub(RepresentationMixin):
205
207
 
206
208
  self.monitoring_hub_url = "udp://{}:{}".format(self.hub_address, udp_port)
207
209
 
208
- context = zmq.Context()
209
- self.dfk_channel_timeout = 10000 # in milliseconds
210
- self._dfk_channel = context.socket(zmq.DEALER)
211
- self._dfk_channel.setsockopt(zmq.LINGER, 0)
212
- self._dfk_channel.set_hwm(0)
213
- self._dfk_channel.setsockopt(zmq.SNDTIMEO, self.dfk_channel_timeout)
214
- self._dfk_channel.connect("tcp://{}:{}".format(self.hub_address, zmq_port))
215
-
216
210
  logger.info("Monitoring Hub initialized")
217
211
 
218
212
  return zmq_port
@@ -220,11 +214,7 @@ class MonitoringHub(RepresentationMixin):
220
214
  # TODO: tighten the Any message format
221
215
  def send(self, mtype: MessageType, message: Any) -> None:
222
216
  logger.debug("Sending message type {}".format(mtype))
223
- try:
224
- self._dfk_channel.send_pyobj((mtype, message))
225
- except zmq.Again:
226
- logger.exception(
227
- "The monitoring message sent from DFK to router timed-out after {}ms".format(self.dfk_channel_timeout))
217
+ self.radio.send((mtype, message))
228
218
 
229
219
  def close(self) -> None:
230
220
  logger.info("Terminating Monitoring Hub")
@@ -235,9 +225,8 @@ class MonitoringHub(RepresentationMixin):
235
225
  logger.error("There was a queued exception (Either router or DBM process got exception much earlier?)")
236
226
  except queue.Empty:
237
227
  break
238
- if self._dfk_channel and self.monitoring_hub_active:
228
+ if self.monitoring_hub_active:
239
229
  self.monitoring_hub_active = False
240
- self._dfk_channel.close()
241
230
  if exception_msgs:
242
231
  for exception_msg in exception_msgs:
243
232
  logger.error(
@@ -249,6 +238,8 @@ class MonitoringHub(RepresentationMixin):
249
238
  self.router_proc.terminate()
250
239
  self.dbm_proc.terminate()
251
240
  self.filesystem_proc.terminate()
241
+ logger.info("Setting router termination event")
242
+ self.router_exit_event.set()
252
243
  logger.info("Waiting for router to terminate")
253
244
  self.router_proc.join()
254
245
  logger.debug("Finished waiting for router termination")
@@ -6,6 +6,7 @@ import logging
6
6
 
7
7
  from abc import ABCMeta, abstractmethod
8
8
 
9
+ from multiprocessing.queues import Queue
9
10
  from typing import Optional
10
11
 
11
12
  from parsl.serialize import serialize
@@ -173,3 +174,17 @@ class UDPRadio(MonitoringRadio):
173
174
  logging.error("Could not send message within timeout limit")
174
175
  return
175
176
  return
177
+
178
+
179
+ class MultiprocessingQueueRadio(MonitoringRadio):
180
+ """A monitoring radio intended which connects over a multiprocessing Queue.
181
+ This radio is intended to be used on the submit side, where components
182
+ in the submit process, or processes launched by multiprocessing, will have
183
+ access to a Queue shared with the monitoring database code (bypassing the
184
+ monitoring router).
185
+ """
186
+ def __init__(self, queue: Queue) -> None:
187
+ self.queue = queue
188
+
189
+ def send(self, message: object) -> None:
190
+ self.queue.put((message, 0))
@@ -15,7 +15,8 @@ from typing import Any, Callable, Dict, List, Sequence, Tuple
15
15
  logger = logging.getLogger(__name__)
16
16
 
17
17
 
18
- def monitor_wrapper(f: Any, # per app
18
+ def monitor_wrapper(*,
19
+ f: Any, # per app
19
20
  args: Sequence, # per invocation
20
21
  kwargs: Dict, # per invocation
21
22
  x_try_id: int, # per invocation
@@ -15,6 +15,8 @@ from parsl.utils import setproctitle
15
15
 
16
16
  from parsl.monitoring.message_type import MessageType
17
17
  from parsl.monitoring.types import AddressedMonitoringMessage, TaggedMonitoringMessage
18
+
19
+ from multiprocessing.synchronize import Event
18
20
  from typing import Optional, Tuple, Union
19
21
 
20
22
 
@@ -98,10 +100,10 @@ class MonitoringRouter:
98
100
  priority_msgs: "queue.Queue[AddressedMonitoringMessage]",
99
101
  node_msgs: "queue.Queue[AddressedMonitoringMessage]",
100
102
  block_msgs: "queue.Queue[AddressedMonitoringMessage]",
101
- resource_msgs: "queue.Queue[AddressedMonitoringMessage]") -> None:
103
+ resource_msgs: "queue.Queue[AddressedMonitoringMessage]",
104
+ exit_event: Event) -> None:
102
105
  try:
103
- router_keep_going = True
104
- while router_keep_going:
106
+ while not exit_event.is_set():
105
107
  try:
106
108
  data, addr = self.udp_sock.recvfrom(2048)
107
109
  resource_msg = pickle.loads(data)
@@ -135,8 +137,6 @@ class MonitoringRouter:
135
137
  priority_msgs.put(msg_0)
136
138
  elif msg[0] == MessageType.WORKFLOW_INFO:
137
139
  priority_msgs.put(msg_0)
138
- if 'exit_now' in msg[1] and msg[1]['exit_now']:
139
- router_keep_going = False
140
140
  else:
141
141
  # There is a type: ignore here because if msg[0]
142
142
  # is of the correct type, this code is unreachable,
@@ -178,6 +178,7 @@ def router_starter(comm_q: "queue.Queue[Union[Tuple[int, int], str]]",
178
178
  node_msgs: "queue.Queue[AddressedMonitoringMessage]",
179
179
  block_msgs: "queue.Queue[AddressedMonitoringMessage]",
180
180
  resource_msgs: "queue.Queue[AddressedMonitoringMessage]",
181
+ exit_event: Event,
181
182
 
182
183
  hub_address: str,
183
184
  udp_port: Optional[int],
@@ -202,7 +203,7 @@ def router_starter(comm_q: "queue.Queue[Union[Tuple[int, int], str]]",
202
203
 
203
204
  router.logger.info("Starting MonitoringRouter in router_starter")
204
205
  try:
205
- router.start(priority_msgs, node_msgs, block_msgs, resource_msgs)
206
+ router.start(priority_msgs, node_msgs, block_msgs, resource_msgs, exit_event)
206
207
  except Exception as e:
207
208
  router.logger.exception("router.start exception")
208
209
  exception_q.put(('Hub', str(e)))
@@ -266,7 +266,7 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
266
266
  for job in job_ids:
267
267
  job_dict = self.resources[job]
268
268
  job_dict['cancelled'] = True
269
- logger.debug("Terminating job/proc_id: {0}".format(job))
269
+ logger.debug("Terminating job/process ID: {0}".format(job))
270
270
  cmd = "kill -- -$(ps -o pgid= {} | grep -o '[0-9]*')".format(job_dict['remote_pid'])
271
271
  retcode, stdout, stderr = self.channel.execute_wait(cmd, self.cmd_timeout)
272
272
  if retcode != 0:
@@ -31,6 +31,7 @@ from parsl.executors import HighThroughputExecutor
31
31
  from parsl.data_provider.http import HTTPInTaskStaging
32
32
  from parsl.data_provider.ftp import FTPInTaskStaging
33
33
  from parsl.data_provider.file_noop import NoOpFileStaging
34
+ from parsl.data_provider.zip import ZipFileStaging
34
35
 
35
36
  working_dir = os.getcwd() + "/" + "test_htex_alternate"
36
37
 
@@ -42,7 +43,7 @@ def fresh_config():
42
43
  address="127.0.0.1",
43
44
  label="htex_Local",
44
45
  working_dir=working_dir,
45
- storage_access=[FTPInTaskStaging(), HTTPInTaskStaging(), NoOpFileStaging()],
46
+ storage_access=[ZipFileStaging(), FTPInTaskStaging(), HTTPInTaskStaging(), NoOpFileStaging()],
46
47
  worker_debug=True,
47
48
  cores_per_worker=1,
48
49
  heartbeat_period=2,
@@ -9,5 +9,4 @@ from parsl.data_provider.file_noop import NoOpFileStaging
9
9
 
10
10
  def fresh_config():
11
11
  return Config(executors=[TaskVineExecutor(manager_config=TaskVineManagerConfig(port=9000),
12
- worker_launch_method='factory',
13
- storage_access=[FTPInTaskStaging(), HTTPInTaskStaging(), NoOpFileStaging()])])
12
+ worker_launch_method='factory')])
@@ -8,5 +8,4 @@ from parsl.data_provider.file_noop import NoOpFileStaging
8
8
 
9
9
  def fresh_config():
10
10
  return Config(executors=[WorkQueueExecutor(port=9000,
11
- coprocess=True,
12
- storage_access=[FTPInTaskStaging(), HTTPInTaskStaging(), NoOpFileStaging()])])
11
+ coprocess=True)])
parsl/tests/conftest.py CHANGED
@@ -135,28 +135,27 @@ def pytest_configure(config):
135
135
  )
136
136
  config.addinivalue_line(
137
137
  'markers',
138
- 'noci: mark test to be unsuitable for running during automated tests'
138
+ 'cleannet: Enable tests that require a clean network connection (such as for testing FTP)'
139
139
  )
140
-
141
140
  config.addinivalue_line(
142
141
  'markers',
143
- 'cleannet: Enable tests that require a clean network connection (such as for testing FTP)'
142
+ 'staging_required: Marks tests that require a staging provider, when there is no sharedFS)'
144
143
  )
145
144
  config.addinivalue_line(
146
145
  'markers',
147
- 'issue363: Marks tests that require a shared filesystem for stdout/stderr - see issue #363'
146
+ 'sshd_required: Marks tests that require a SSHD'
148
147
  )
149
148
  config.addinivalue_line(
150
149
  'markers',
151
- 'staging_required: Marks tests that require a staging provider, when there is no sharedFS)'
150
+ 'multiple_cores_required: Marks tests that require multiple cores, such as htex affinity'
152
151
  )
153
152
  config.addinivalue_line(
154
153
  'markers',
155
- 'sshd_required: Marks tests that require a SSHD'
154
+ 'issue3328: Marks tests broken by issue #3328'
156
155
  )
157
156
  config.addinivalue_line(
158
157
  'markers',
159
- 'multiple_cores_required: Marks tests that require multiple cores, such as htex affinity'
158
+ 'executor_supports_std_stream_tuples: Marks tests that require tuple support for stdout/stderr'
160
159
  )
161
160
 
162
161
 
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  import os
2
3
  import random
3
4
  import re
@@ -23,7 +24,6 @@ def foo(x, y, z=10, stdout=None, label=None):
23
24
  return f"echo {x} {y} {z}"
24
25
 
25
26
 
26
- @pytest.mark.issue363
27
27
  def test_command_format_1(tmpd_cwd):
28
28
  """Testing command format for BashApps"""
29
29
 
@@ -38,8 +38,7 @@ def test_command_format_1(tmpd_cwd):
38
38
  assert so_content == "1 4 10"
39
39
 
40
40
 
41
- @pytest.mark.issue363
42
- def test_auto_log_filename_format():
41
+ def test_auto_log_filename_format(caplog):
43
42
  """Testing auto log filename format for BashApps
44
43
  """
45
44
  app_label = "label_test_auto_log_filename_format"
@@ -61,8 +60,10 @@ def test_auto_log_filename_format():
61
60
  assert contents == '1 {0} 10\n'.format(rand_int), \
62
61
  'Output does not match expected string "1 {0} 10", Got: "{1}"'.format(rand_int, contents)
63
62
 
63
+ for record in caplog.records:
64
+ assert record.levelno < logging.ERROR
65
+
64
66
 
65
- @pytest.mark.issue363
66
67
  def test_parallel_for(tmpd_cwd, n=3):
67
68
  """Testing a simple parallel for loop"""
68
69
  outdir = tmpd_cwd / "outputs/test_parallel"
@@ -76,7 +76,6 @@ def test_div_0(test_fn=div_0):
76
76
  os.remove('std.out')
77
77
 
78
78
 
79
- @pytest.mark.issue363
80
79
  def test_bash_misuse(test_fn=bash_misuse):
81
80
  err_code = test_matrix[test_fn]['exit_code']
82
81
  f = test_fn()
@@ -91,7 +90,6 @@ def test_bash_misuse(test_fn=bash_misuse):
91
90
  os.remove('std.out')
92
91
 
93
92
 
94
- @pytest.mark.issue363
95
93
  def test_command_not_found(test_fn=command_not_found):
96
94
  err_code = test_matrix[test_fn]['exit_code']
97
95
  f = test_fn()
@@ -108,7 +106,6 @@ def test_command_not_found(test_fn=command_not_found):
108
106
  return True
109
107
 
110
108
 
111
- @pytest.mark.issue363
112
109
  def test_not_executable(test_fn=not_executable):
113
110
  err_code = test_matrix[test_fn]['exit_code']
114
111
  f = test_fn()
@@ -8,7 +8,6 @@ def foo(z=2, stdout=None):
8
8
  return f"echo {z}"
9
9
 
10
10
 
11
- @pytest.mark.issue363
12
11
  def test_command_format_1(tmpd_cwd):
13
12
  """Testing command format for BashApps
14
13
  """