parsl 2025.3.17__py3-none-any.whl → 2025.3.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. parsl/dataflow/dflow.py +18 -15
  2. parsl/executors/base.py +13 -37
  3. parsl/executors/flux/executor.py +1 -0
  4. parsl/executors/globus_compute.py +13 -2
  5. parsl/executors/high_throughput/executor.py +18 -0
  6. parsl/executors/high_throughput/interchange.py +26 -36
  7. parsl/executors/radical/executor.py +1 -0
  8. parsl/executors/status_handling.py +20 -12
  9. parsl/executors/taskvine/executor.py +13 -11
  10. parsl/executors/workqueue/executor.py +9 -7
  11. parsl/monitoring/errors.py +5 -0
  12. parsl/monitoring/monitoring.py +55 -122
  13. parsl/monitoring/radios/zmq_router.py +80 -18
  14. parsl/multiprocessing.py +42 -2
  15. parsl/tests/test_monitoring/test_basic.py +1 -1
  16. parsl/tests/test_monitoring/test_exit_helper.py +6 -7
  17. parsl/tests/test_monitoring/test_fuzz_zmq.py +1 -1
  18. parsl/tests/test_monitoring/test_radio_zmq.py +27 -0
  19. parsl/tests/test_monitoring/test_stdouterr.py +3 -0
  20. parsl/tests/test_shutdown/test_kill_monitoring.py +1 -1
  21. parsl/usage_tracking/usage.py +2 -2
  22. parsl/version.py +1 -1
  23. {parsl-2025.3.17.data → parsl-2025.3.31.data}/scripts/interchange.py +26 -36
  24. {parsl-2025.3.17.dist-info → parsl-2025.3.31.dist-info}/METADATA +2 -2
  25. {parsl-2025.3.17.dist-info → parsl-2025.3.31.dist-info}/RECORD +32 -31
  26. {parsl-2025.3.17.data → parsl-2025.3.31.data}/scripts/exec_parsl_function.py +0 -0
  27. {parsl-2025.3.17.data → parsl-2025.3.31.data}/scripts/parsl_coprocess.py +0 -0
  28. {parsl-2025.3.17.data → parsl-2025.3.31.data}/scripts/process_worker_pool.py +0 -0
  29. {parsl-2025.3.17.dist-info → parsl-2025.3.31.dist-info}/LICENSE +0 -0
  30. {parsl-2025.3.17.dist-info → parsl-2025.3.31.dist-info}/WHEEL +0 -0
  31. {parsl-2025.3.17.dist-info → parsl-2025.3.31.dist-info}/entry_points.txt +0 -0
  32. {parsl-2025.3.17.dist-info → parsl-2025.3.31.dist-info}/top_level.txt +0 -0
parsl/dataflow/dflow.py CHANGED
@@ -45,6 +45,7 @@ from parsl.executors.threads import ThreadPoolExecutor
45
45
  from parsl.jobs.job_status_poller import JobStatusPoller
46
46
  from parsl.monitoring import MonitoringHub
47
47
  from parsl.monitoring.message_type import MessageType
48
+ from parsl.monitoring.radios.multiprocessing import MultiprocessingQueueRadioSender
48
49
  from parsl.monitoring.remote import monitor_wrapper
49
50
  from parsl.process_loggers import wrap_with_logs
50
51
  from parsl.usage_tracking.usage import UsageTracker
@@ -110,8 +111,11 @@ class DataFlowKernel:
110
111
  self.monitoring: Optional[MonitoringHub]
111
112
  self.monitoring = config.monitoring
112
113
 
114
+ self.monitoring_radio = None
115
+
113
116
  if self.monitoring:
114
117
  self.monitoring.start(self.run_dir, self.config.run_dir)
118
+ self.monitoring_radio = MultiprocessingQueueRadioSender(self.monitoring.resource_msgs)
115
119
 
116
120
  self.time_began = datetime.datetime.now()
117
121
  self.time_completed: Optional[datetime.datetime] = None
@@ -156,9 +160,9 @@ class DataFlowKernel:
156
160
  'host': gethostname(),
157
161
  }
158
162
 
159
- if self.monitoring:
160
- self.monitoring.send((MessageType.WORKFLOW_INFO,
161
- workflow_info))
163
+ if self.monitoring_radio:
164
+ self.monitoring_radio.send((MessageType.WORKFLOW_INFO,
165
+ workflow_info))
162
166
 
163
167
  if config.checkpoint_files is not None:
164
168
  checkpoint_files = config.checkpoint_files
@@ -231,9 +235,9 @@ class DataFlowKernel:
231
235
  raise InternalConsistencyError(f"Exit case for {mode} should be unreachable, validated by typeguard on Config()")
232
236
 
233
237
  def _send_task_log_info(self, task_record: TaskRecord) -> None:
234
- if self.monitoring:
238
+ if self.monitoring_radio:
235
239
  task_log_info = self._create_task_log_info(task_record)
236
- self.monitoring.send((MessageType.TASK_INFO, task_log_info))
240
+ self.monitoring_radio.send((MessageType.TASK_INFO, task_log_info))
237
241
 
238
242
  def _create_task_log_info(self, task_record: TaskRecord) -> Dict[str, Any]:
239
243
  """
@@ -1128,9 +1132,7 @@ class DataFlowKernel:
1128
1132
  executor.run_id = self.run_id
1129
1133
  executor.run_dir = self.run_dir
1130
1134
  if self.monitoring:
1131
- executor.hub_address = self.monitoring.hub_address
1132
- executor.hub_zmq_port = self.monitoring.hub_zmq_port
1133
- executor.submit_monitoring_radio = self.monitoring.radio
1135
+ executor.monitoring_messages = self.monitoring.resource_msgs
1134
1136
  if hasattr(executor, 'provider'):
1135
1137
  if hasattr(executor.provider, 'script_dir'):
1136
1138
  executor.provider.script_dir = os.path.join(self.run_dir, 'submit_scripts')
@@ -1217,15 +1219,16 @@ class DataFlowKernel:
1217
1219
  logger.info("Terminated executors")
1218
1220
  self.time_completed = datetime.datetime.now()
1219
1221
 
1220
- if self.monitoring:
1222
+ if self.monitoring_radio:
1221
1223
  logger.info("Sending final monitoring message")
1222
- self.monitoring.send((MessageType.WORKFLOW_INFO,
1223
- {'tasks_failed_count': self.task_state_counts[States.failed],
1224
- 'tasks_completed_count': self.task_state_counts[States.exec_done],
1225
- "time_began": self.time_began,
1226
- 'time_completed': self.time_completed,
1227
- 'run_id': self.run_id, 'rundir': self.run_dir}))
1224
+ self.monitoring_radio.send((MessageType.WORKFLOW_INFO,
1225
+ {'tasks_failed_count': self.task_state_counts[States.failed],
1226
+ 'tasks_completed_count': self.task_state_counts[States.exec_done],
1227
+ "time_began": self.time_began,
1228
+ 'time_completed': self.time_completed,
1229
+ 'run_id': self.run_id, 'rundir': self.run_dir}))
1228
1230
 
1231
+ if self.monitoring:
1229
1232
  logger.info("Terminating monitoring")
1230
1233
  self.monitoring.close()
1231
1234
  logger.info("Terminated monitoring")
parsl/executors/base.py CHANGED
@@ -1,11 +1,14 @@
1
+ from __future__ import annotations
2
+
1
3
  import os
2
4
  from abc import ABCMeta, abstractmethod
3
5
  from concurrent.futures import Future
6
+ from multiprocessing.queues import Queue
4
7
  from typing import Any, Callable, Dict, Optional
5
8
 
6
9
  from typing_extensions import Literal, Self
7
10
 
8
- from parsl.monitoring.radios.base import MonitoringRadioSender
11
+ from parsl.monitoring.types import TaggedMonitoringMessage
9
12
 
10
13
 
11
14
  class ParslExecutor(metaclass=ABCMeta):
@@ -42,6 +45,13 @@ class ParslExecutor(metaclass=ABCMeta):
42
45
  invariant, not co-variant, and it looks like @typeguard cannot be
43
46
  persuaded otherwise. So if you're implementing an executor and want to
44
47
  @typeguard the constructor, you'll have to use List[Any] here.
48
+
49
+ The DataFlowKernel will set this attribute before calling .start(),
50
+ if monitoring is enabled:
51
+
52
+ monitoring_messages: Optional[Queue[TaggedMonitoringMessage]] - an executor
53
+ can send messages to the monitoring hub by putting them into
54
+ this queue.
45
55
  """
46
56
 
47
57
  label: str = "undefined"
@@ -50,15 +60,11 @@ class ParslExecutor(metaclass=ABCMeta):
50
60
  def __init__(
51
61
  self,
52
62
  *,
53
- hub_address: Optional[str] = None,
54
- hub_zmq_port: Optional[int] = None,
55
- submit_monitoring_radio: Optional[MonitoringRadioSender] = None,
63
+ monitoring_messages: Optional[Queue[TaggedMonitoringMessage]] = None,
56
64
  run_dir: str = ".",
57
65
  run_id: Optional[str] = None,
58
66
  ):
59
- self.hub_address = hub_address
60
- self.hub_zmq_port = hub_zmq_port
61
- self.submit_monitoring_radio = submit_monitoring_radio
67
+ self.monitoring_messages = monitoring_messages
62
68
  self.run_dir = os.path.abspath(run_dir)
63
69
  self.run_id = run_id
64
70
 
@@ -125,33 +131,3 @@ class ParslExecutor(metaclass=ABCMeta):
125
131
  @run_id.setter
126
132
  def run_id(self, value: Optional[str]) -> None:
127
133
  self._run_id = value
128
-
129
- @property
130
- def hub_address(self) -> Optional[str]:
131
- """Address to the Hub for monitoring.
132
- """
133
- return self._hub_address
134
-
135
- @hub_address.setter
136
- def hub_address(self, value: Optional[str]) -> None:
137
- self._hub_address = value
138
-
139
- @property
140
- def hub_zmq_port(self) -> Optional[int]:
141
- """Port to the Hub for monitoring.
142
- """
143
- return self._hub_zmq_port
144
-
145
- @hub_zmq_port.setter
146
- def hub_zmq_port(self, value: Optional[int]) -> None:
147
- self._hub_zmq_port = value
148
-
149
- @property
150
- def submit_monitoring_radio(self) -> Optional[MonitoringRadioSender]:
151
- """Local radio for sending monitoring messages
152
- """
153
- return self._submit_monitoring_radio
154
-
155
- @submit_monitoring_radio.setter
156
- def submit_monitoring_radio(self, value: Optional[MonitoringRadioSender]) -> None:
157
- self._submit_monitoring_radio = value
@@ -231,6 +231,7 @@ class FluxExecutor(ParslExecutor, RepresentationMixin):
231
231
 
232
232
  def start(self):
233
233
  """Called when DFK starts the executor when the config is loaded."""
234
+ super().start()
234
235
  os.makedirs(self.working_dir, exist_ok=True)
235
236
  self._submission_thread.start()
236
237
 
@@ -2,10 +2,11 @@ from __future__ import annotations
2
2
 
3
3
  import copy
4
4
  from concurrent.futures import Future
5
- from typing import Any, Callable, Dict
5
+ from typing import Any, Callable, Dict, List, Optional
6
6
 
7
7
  import typeguard
8
8
 
9
+ from parsl.data_provider.staging import Staging
9
10
  from parsl.errors import OptionalModuleMissing
10
11
  from parsl.executors.base import ParslExecutor
11
12
  from parsl.utils import RepresentationMixin
@@ -40,6 +41,8 @@ class GlobusComputeExecutor(ParslExecutor, RepresentationMixin):
40
41
  self,
41
42
  executor: Executor,
42
43
  label: str = 'GlobusComputeExecutor',
44
+ storage_access: Optional[List[Staging]] = None,
45
+ working_dir: Optional[str] = None,
43
46
  ):
44
47
  """
45
48
  Parameters
@@ -52,6 +55,12 @@ class GlobusComputeExecutor(ParslExecutor, RepresentationMixin):
52
55
 
53
56
  label:
54
57
  a label to name the executor
58
+
59
+ storage_access:
60
+ a list of staging providers that will be used for file staging
61
+
62
+ working_dir:
63
+ The working dir to be used for file staging
55
64
  """
56
65
  if not _globus_compute_enabled:
57
66
  raise OptionalModuleMissing(
@@ -64,10 +73,12 @@ class GlobusComputeExecutor(ParslExecutor, RepresentationMixin):
64
73
  self.resource_specification = self.executor.resource_specification
65
74
  self.user_endpoint_config = self.executor.user_endpoint_config
66
75
  self.label = label
76
+ self.storage_access = storage_access
77
+ self.working_dir = working_dir
67
78
 
68
79
  def start(self) -> None:
69
80
  """ Start the Globus Compute Executor """
70
- pass
81
+ super().start()
71
82
 
72
83
  def submit(self, func: Callable, resource_specification: Dict[str, Any], *args: Any, **kwargs: Any) -> Future:
73
84
  """ Submit func to globus-compute
@@ -29,6 +29,7 @@ from parsl.executors.high_throughput.manager_selector import (
29
29
  )
30
30
  from parsl.executors.status_handling import BlockProviderExecutor
31
31
  from parsl.jobs.states import TERMINAL_STATES, JobState, JobStatus
32
+ from parsl.monitoring.radios.zmq_router import ZMQRadioReceiver, start_zmq_receiver
32
33
  from parsl.process_loggers import wrap_with_logs
33
34
  from parsl.providers import LocalProvider
34
35
  from parsl.providers.base import ExecutionProvider
@@ -334,6 +335,10 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
334
335
  self._result_queue_thread_exit = threading.Event()
335
336
  self._result_queue_thread: Optional[threading.Thread] = None
336
337
 
338
+ self.zmq_monitoring: Optional[ZMQRadioReceiver]
339
+ self.zmq_monitoring = None
340
+ self.hub_zmq_port = None
341
+
337
342
  radio_mode = "htex"
338
343
  enable_mpi_mode: bool = False
339
344
  mpi_launcher: str = "mpiexec"
@@ -407,6 +412,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
407
412
  def start(self):
408
413
  """Create the Interchange process and connect to it.
409
414
  """
415
+ super().start()
410
416
  if self.encrypted and self.cert_dir is None:
411
417
  logger.debug("Creating CurveZMQ certificates")
412
418
  self.cert_dir = curvezmq.create_certificates(self.logdir)
@@ -427,6 +433,15 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
427
433
  self.loopback_address, self.interchange_port_range, self.cert_dir
428
434
  )
429
435
 
436
+ if self.monitoring_messages is not None:
437
+ self.zmq_monitoring = start_zmq_receiver(monitoring_messages=self.monitoring_messages,
438
+ loopback_address=self.loopback_address,
439
+ port_range=self.interchange_port_range,
440
+ logdir=self.logdir,
441
+ worker_debug=self.worker_debug,
442
+ )
443
+ self.hub_zmq_port = self.zmq_monitoring.port
444
+
430
445
  self._result_queue_thread = None
431
446
  self._start_result_queue_thread()
432
447
  self._start_local_interchange_process()
@@ -861,6 +876,9 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
861
876
  if self._result_queue_thread:
862
877
  self._result_queue_thread.join()
863
878
 
879
+ if self.zmq_monitoring:
880
+ self.zmq_monitoring.close()
881
+
864
882
  logger.info("Finished HighThroughputExecutor shutdown attempt")
865
883
 
866
884
  def get_usage_information(self):
@@ -328,7 +328,7 @@ class Interchange:
328
328
  self.process_results_incoming(interesting_managers, monitoring_radio)
329
329
  self.expire_bad_managers(interesting_managers, monitoring_radio)
330
330
  self.expire_drained_managers(interesting_managers, monitoring_radio)
331
- self.process_tasks_to_send(interesting_managers)
331
+ self.process_tasks_to_send(interesting_managers, monitoring_radio)
332
332
 
333
333
  self.zmq_context.destroy()
334
334
  delta = time.time() - start
@@ -452,7 +452,7 @@ class Interchange:
452
452
  m['active'] = False
453
453
  self._send_monitoring_info(monitoring_radio, m)
454
454
 
455
- def process_tasks_to_send(self, interesting_managers: Set[bytes]) -> None:
455
+ def process_tasks_to_send(self, interesting_managers: Set[bytes], monitoring_radio: Optional[MonitoringRadioSender]) -> None:
456
456
  # Check if there are tasks that could be sent to managers
457
457
 
458
458
  logger.debug(
@@ -481,13 +481,14 @@ class Interchange:
481
481
  m['idle_since'] = None
482
482
  logger.debug("Sent tasks: %s to manager %r", tids, manager_id)
483
483
  # recompute real_capacity after sending tasks
484
- real_capacity = m['max_capacity'] - tasks_inflight
484
+ real_capacity -= task_count
485
485
  if real_capacity > 0:
486
486
  logger.debug("Manager %r has free capacity %s", manager_id, real_capacity)
487
487
  # ... so keep it in the interesting_managers list
488
488
  else:
489
489
  logger.debug("Manager %r is now saturated", manager_id)
490
490
  interesting_managers.remove(manager_id)
491
+ self._send_monitoring_info(monitoring_radio, m)
491
492
  else:
492
493
  interesting_managers.remove(manager_id)
493
494
  # logger.debug("Nothing to send to manager {}".format(manager_id))
@@ -505,13 +506,24 @@ class Interchange:
505
506
  else:
506
507
  logger.debug("Got %s result items in batch from manager %r", len(all_messages), manager_id)
507
508
 
508
- b_messages = []
509
+ m = self._ready_managers[manager_id]
510
+ b_messages_to_send = []
509
511
 
510
512
  for p_message in all_messages:
511
513
  r = pickle.loads(p_message)
512
514
  if r['type'] == 'result':
513
515
  # process this for task ID and forward to executor
514
- b_messages.append((p_message, r))
516
+ logger.debug("Removing task %s from manager record %r", r["task_id"], manager_id)
517
+ try:
518
+ m['tasks'].remove(r['task_id'])
519
+ b_messages_to_send.append(p_message)
520
+ except Exception:
521
+ logger.exception(
522
+ "Ignoring exception removing task_id %s for manager %r with task list %s",
523
+ r['task_id'],
524
+ manager_id,
525
+ m["tasks"]
526
+ )
515
527
  elif r['type'] == 'monitoring':
516
528
  # the monitoring code makes the assumption that no
517
529
  # monitoring messages will be received if monitoring
@@ -525,43 +537,21 @@ class Interchange:
525
537
  else:
526
538
  logger.error("Interchange discarding result_queue message of unknown type: %s", r["type"])
527
539
 
528
- got_result = False
529
- m = self._ready_managers[manager_id]
530
- for (_, r) in b_messages:
531
- assert 'type' in r, f"Message is missing type entry: {r}"
532
- if r['type'] == 'result':
533
- got_result = True
534
- try:
535
- logger.debug("Removing task %s from manager record %r", r["task_id"], manager_id)
536
- m['tasks'].remove(r['task_id'])
537
- except Exception:
538
- # If we reach here, there's something very wrong.
539
- logger.exception(
540
- "Ignoring exception removing task_id %s for manager %r with task list %s",
541
- r['task_id'],
542
- manager_id,
543
- m["tasks"]
544
- )
545
-
546
- b_messages_to_send = []
547
- for (b_message, _) in b_messages:
548
- b_messages_to_send.append(b_message)
549
-
550
540
  if b_messages_to_send:
551
541
  logger.debug("Sending messages on results_outgoing")
552
542
  self.results_outgoing.send_multipart(b_messages_to_send)
553
543
  logger.debug("Sent messages on results_outgoing")
554
544
 
555
- logger.debug("Current tasks on manager %r: %s", manager_id, m["tasks"])
556
- if len(m['tasks']) == 0 and m['idle_since'] is None:
557
- m['idle_since'] = time.time()
558
-
559
- # A manager is only made interesting here if a result was
560
- # received, which means there should be capacity for a new
561
- # task now. Heartbeats and monitoring messages do not make a
562
- # manager become interesting.
563
- if got_result:
545
+ # At least one result received, so manager now has idle capacity
564
546
  interesting_managers.add(manager_id)
547
+
548
+ if len(m['tasks']) == 0 and m['idle_since'] is None:
549
+ m['idle_since'] = time.time()
550
+
551
+ self._send_monitoring_info(monitoring_radio, m)
552
+
553
+ logger.debug("Current tasks on manager %r: %s", manager_id, m["tasks"])
554
+
565
555
  logger.debug("leaving results_incoming section")
566
556
 
567
557
  def expire_bad_managers(self, interesting_managers: Set[bytes], monitoring_radio: Optional[MonitoringRadioSender]) -> None:
@@ -215,6 +215,7 @@ class RadicalPilotExecutor(ParslExecutor, RepresentationMixin):
215
215
  """Create the Pilot component and pass it.
216
216
  """
217
217
  logger.info("starting RadicalPilotExecutor")
218
+ super().start()
218
219
  logger.info('Parsl: {0}'.format(parsl.__version__))
219
220
  logger.info('RADICAL pilot: {0}'.format(rp.version))
220
221
  self.session = rp.Session(cfg={'base': self.run_dir},
@@ -14,6 +14,7 @@ from parsl.executors.errors import BadStateException, ScalingFailed
14
14
  from parsl.jobs.error_handlers import noop_error_handler, simple_error_handler
15
15
  from parsl.jobs.states import TERMINAL_STATES, JobState, JobStatus
16
16
  from parsl.monitoring.message_type import MessageType
17
+ from parsl.monitoring.radios.multiprocessing import MultiprocessingQueueRadioSender
17
18
  from parsl.providers.base import ExecutionProvider
18
19
  from parsl.utils import AtomicIDCounter
19
20
 
@@ -83,6 +84,13 @@ class BlockProviderExecutor(ParslExecutor):
83
84
  # of pending, active and recently terminated blocks
84
85
  self._status = {} # type: Dict[str, JobStatus]
85
86
 
87
+ self.submit_monitoring_radio: Optional[MultiprocessingQueueRadioSender] = None
88
+
89
+ def start(self):
90
+ super().start()
91
+ if self.monitoring_messages:
92
+ self.submit_monitoring_radio = MultiprocessingQueueRadioSender(self.monitoring_messages)
93
+
86
94
  def _make_status_dict(self, block_ids: List[str], status_list: List[JobStatus]) -> Dict[str, JobStatus]:
87
95
  """Given a list of block ids and a list of corresponding status strings,
88
96
  returns a dictionary mapping each block id to the corresponding status
@@ -281,20 +289,20 @@ class BlockProviderExecutor(ParslExecutor):
281
289
  logger.debug("Sending block monitoring message: %r", msg)
282
290
  self.submit_monitoring_radio.send((MessageType.BLOCK_INFO, msg))
283
291
 
284
- def create_monitoring_info(self, status: Dict[str, JobStatus]) -> Sequence[object]:
292
+ def create_monitoring_info(self, status: Dict[str, JobStatus]) -> Sequence[Dict[str, Any]]:
285
293
  """Create a monitoring message for each block based on the poll status.
286
294
  """
287
- msg = []
288
- for bid, s in status.items():
289
- d: Dict[str, Any] = {}
290
- d['run_id'] = self.run_id
291
- d['status'] = s.status_name
292
- d['timestamp'] = datetime.datetime.now()
293
- d['executor_label'] = self.label
294
- d['job_id'] = self.blocks_to_job_id.get(bid, None)
295
- d['block_id'] = bid
296
- msg.append(d)
297
- return msg
295
+ return [
296
+ {
297
+ "run_id": self.run_id,
298
+ "status": s.status_name,
299
+ "timestamp": datetime.datetime.now(),
300
+ "executor_label": self.label,
301
+ "job_id": self.blocks_to_job_id.get(bid, None),
302
+ "block_id": bid
303
+ }
304
+ for bid, s in status.items()
305
+ ]
298
306
 
299
307
  def poll_facade(self) -> None:
300
308
  now = time.time()
@@ -40,6 +40,7 @@ from parsl.executors.taskvine.factory_config import TaskVineFactoryConfig
40
40
  from parsl.executors.taskvine.manager import _taskvine_submit_wait
41
41
  from parsl.executors.taskvine.manager_config import TaskVineManagerConfig
42
42
  from parsl.executors.taskvine.utils import ParslFileToVine, ParslTaskToVine
43
+ from parsl.multiprocessing import SpawnContext
43
44
  from parsl.process_loggers import wrap_with_logs
44
45
  from parsl.providers import CondorProvider, LocalProvider
45
46
  from parsl.providers.base import ExecutionProvider
@@ -134,13 +135,13 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
134
135
  self.storage_access = storage_access
135
136
 
136
137
  # Queue to send ready tasks from TaskVine executor process to TaskVine manager process
137
- self._ready_task_queue: multiprocessing.Queue = multiprocessing.Queue()
138
+ self._ready_task_queue: multiprocessing.Queue = SpawnContext.Queue()
138
139
 
139
140
  # Queue to send finished tasks from TaskVine manager process to TaskVine executor process
140
- self._finished_task_queue: multiprocessing.Queue = multiprocessing.Queue()
141
+ self._finished_task_queue: multiprocessing.Queue = SpawnContext.Queue()
141
142
 
142
143
  # Event to signal whether the manager and factory processes should stop running
143
- self._should_stop = multiprocessing.Event()
144
+ self._should_stop = SpawnContext.Event()
144
145
 
145
146
  # TaskVine manager process
146
147
  self._submit_process = None
@@ -239,6 +240,7 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
239
240
  retrieve Parsl tasks within the TaskVine system.
240
241
  """
241
242
 
243
+ super().start()
242
244
  # Synchronize connection and communication settings between the manager and factory
243
245
  self.__synchronize_manager_factory_comm_settings()
244
246
 
@@ -252,17 +254,17 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
252
254
  "finished_task_queue": self._finished_task_queue,
253
255
  "should_stop": self._should_stop,
254
256
  "manager_config": self.manager_config}
255
- self._submit_process = multiprocessing.Process(target=_taskvine_submit_wait,
256
- name="TaskVine-Submit-Process",
257
- kwargs=submit_process_kwargs)
257
+ self._submit_process = SpawnContext.Process(target=_taskvine_submit_wait,
258
+ name="TaskVine-Submit-Process",
259
+ kwargs=submit_process_kwargs)
258
260
 
259
261
  # Create a process to run the TaskVine factory if enabled.
260
262
  if self.worker_launch_method == 'factory':
261
263
  factory_process_kwargs = {"should_stop": self._should_stop,
262
264
  "factory_config": self.factory_config}
263
- self._factory_process = multiprocessing.Process(target=_taskvine_factory,
264
- name="TaskVine-Factory-Process",
265
- kwargs=factory_process_kwargs)
265
+ self._factory_process = SpawnContext.Process(target=_taskvine_factory,
266
+ name="TaskVine-Factory-Process",
267
+ kwargs=factory_process_kwargs)
266
268
 
267
269
  # Run thread to collect results and set tasks' futures.
268
270
  self._collector_thread = threading.Thread(target=self._collect_taskvine_results,
@@ -621,8 +623,8 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
621
623
  with self._tasks_lock:
622
624
  future = self.tasks.pop(task_report.executor_id)
623
625
 
624
- logger.debug(f'Updating Future for Parsl Task: {task_report.executor_id}. \
625
- Task {task_report.executor_id} has result_received set to {task_report.result_received}')
626
+ logger.debug(f'Updating Future for Parsl Task: {task_report.executor_id}. '
627
+ f'Task {task_report.executor_id} has result_received set to {task_report.result_received}')
626
628
  if task_report.result_received:
627
629
  try:
628
630
  with open(task_report.result_file, 'rb') as f_in:
@@ -31,6 +31,7 @@ from parsl.errors import OptionalModuleMissing
31
31
  from parsl.executors.errors import ExecutorError, InvalidResourceSpecification
32
32
  from parsl.executors.status_handling import BlockProviderExecutor
33
33
  from parsl.executors.workqueue import exec_parsl_function
34
+ from parsl.multiprocessing import SpawnContext, SpawnProcess
34
35
  from parsl.process_loggers import wrap_with_logs
35
36
  from parsl.providers import CondorProvider, LocalProvider
36
37
  from parsl.providers.base import ExecutionProvider
@@ -260,8 +261,8 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
260
261
 
261
262
  self.scaling_cores_per_worker = scaling_cores_per_worker
262
263
  self.label = label
263
- self.task_queue = multiprocessing.Queue() # type: multiprocessing.Queue
264
- self.collector_queue = multiprocessing.Queue() # type: multiprocessing.Queue
264
+ self.task_queue: multiprocessing.Queue = SpawnContext.Queue()
265
+ self.collector_queue: multiprocessing.Queue = SpawnContext.Queue()
265
266
  self.address = address
266
267
  self.port = port
267
268
  self.executor_task_counter = -1
@@ -282,7 +283,7 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
282
283
  self.autolabel_window = autolabel_window
283
284
  self.autocategory = autocategory
284
285
  self.max_retries = max_retries
285
- self.should_stop = multiprocessing.Value(c_bool, False)
286
+ self.should_stop = SpawnContext.Value(c_bool, False)
286
287
  self.cached_envs = {} # type: Dict[int, str]
287
288
  self.worker_options = worker_options
288
289
  self.worker_executable = worker_executable
@@ -314,6 +315,7 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
314
315
  """Create submit process and collector thread to create, send, and
315
316
  retrieve Parsl tasks within the Work Queue system.
316
317
  """
318
+ super().start()
317
319
  self.tasks_lock = threading.Lock()
318
320
 
319
321
  # Create directories for data and results
@@ -333,7 +335,7 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
333
335
 
334
336
  logger.debug("Starting WorkQueueExecutor")
335
337
 
336
- port_mailbox = multiprocessing.Queue()
338
+ port_mailbox = SpawnContext.Queue()
337
339
 
338
340
  # Create a Process to perform WorkQueue submissions
339
341
  submit_process_kwargs = {"task_queue": self.task_queue,
@@ -354,9 +356,9 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
354
356
  "port_mailbox": port_mailbox,
355
357
  "coprocess": self.coprocess
356
358
  }
357
- self.submit_process = multiprocessing.Process(target=_work_queue_submit_wait,
358
- name="WorkQueue-Submit-Process",
359
- kwargs=submit_process_kwargs)
359
+ self.submit_process = SpawnProcess(target=_work_queue_submit_wait,
360
+ name="WorkQueue-Submit-Process",
361
+ kwargs=submit_process_kwargs)
360
362
 
361
363
  self.collector_thread = threading.Thread(target=self._collect_work_queue_results,
362
364
  name="WorkQueue-collector-thread")
@@ -4,3 +4,8 @@ from parsl.errors import ParslError
4
4
  class MonitoringHubStartError(ParslError):
5
5
  def __str__(self) -> str:
6
6
  return "Hub failed to start"
7
+
8
+
9
+ class MonitoringRouterStartError(ParslError):
10
+ def __str__(self) -> str:
11
+ return "Monitoring router failed to start"