parsl 2023.12.4__py3-none-any.whl → 2023.12.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
parsl/dataflow/dflow.py CHANGED
@@ -854,10 +854,13 @@ class DataFlowKernel:
854
854
  try:
855
855
  new_args.extend([dep.result()])
856
856
  except Exception as e:
857
- if hasattr(dep, 'task_record'):
858
- tid = dep.task_record['id']
857
+ # If this Future is associated with a task inside this DFK,
858
+ # then refer to the task ID.
859
+ # Otherwise make a repr of the Future object.
860
+ if hasattr(dep, 'task_record') and dep.task_record['dfk'] == self:
861
+ tid = "task " + repr(dep.task_record['id'])
859
862
  else:
860
- tid = None
863
+ tid = repr(dep)
861
864
  dep_failures.extend([(e, tid)])
862
865
  else:
863
866
  new_args.extend([dep])
parsl/dataflow/errors.py CHANGED
@@ -36,7 +36,9 @@ class DependencyError(DataFlowException):
36
36
  in a dependency.
37
37
 
38
38
  Args:
39
- - dependent_exceptions_tids: List of dependency task IDs which failed
39
+ - dependent_exceptions_tids: List of exceptions and identifiers for
40
+ dependencies which failed. The identifier might be a task ID or
41
+ the repr of a non-DFK Future.
40
42
  - task_id: Task ID of the task that failed because of the dependency error
41
43
  """
42
44
 
@@ -45,8 +47,8 @@ class DependencyError(DataFlowException):
45
47
  self.task_id = task_id
46
48
 
47
49
  def __str__(self) -> str:
48
- dep_tids = [tid for (exception, tid) in self.dependent_exceptions_tids]
49
- return "Dependency failure for task {} with failed dependencies from tasks {}".format(self.task_id, dep_tids)
50
+ deps = ", ".join(tid for _exc, tid in self.dependent_exceptions_tids)
51
+ return f"Dependency failure for task {self.task_id} with failed dependencies from {deps}"
50
52
 
51
53
 
52
54
  class JoinError(DataFlowException):
@@ -17,6 +17,8 @@ import math
17
17
  import json
18
18
  import psutil
19
19
  import multiprocessing
20
+ from multiprocessing.managers import DictProxy
21
+ from multiprocessing.sharedctypes import Synchronized
20
22
 
21
23
  from parsl.process_loggers import wrap_with_logs
22
24
 
@@ -24,10 +26,7 @@ from parsl.version import VERSION as PARSL_VERSION
24
26
  from parsl.app.errors import RemoteExceptionWrapper
25
27
  from parsl.executors.high_throughput.errors import WorkerLost
26
28
  from parsl.executors.high_throughput.probe import probe_addresses
27
- from parsl.multiprocessing import ForkProcess as mpProcess
28
-
29
- from parsl.multiprocessing import SizedQueue as mpQueue
30
-
29
+ from parsl.multiprocessing import SpawnContext
31
30
  from parsl.serialize import unpack_apply_message, serialize
32
31
 
33
32
  HEARTBEAT_CODE = (2 ** 32) - 1
@@ -49,22 +48,22 @@ class Manager:
49
48
  | | IPC-Qeueues
50
49
 
51
50
  """
52
- def __init__(self,
53
- addresses="127.0.0.1",
54
- address_probe_timeout=30,
55
- task_port="50097",
56
- result_port="50098",
57
- cores_per_worker=1,
58
- mem_per_worker=None,
59
- max_workers=float('inf'),
60
- prefetch_capacity=0,
61
- uid=None,
62
- block_id=None,
63
- heartbeat_threshold=120,
64
- heartbeat_period=30,
65
- poll_period=10,
66
- cpu_affinity=False,
67
- available_accelerators: Sequence[str] = ()):
51
+ def __init__(self, *,
52
+ addresses,
53
+ address_probe_timeout,
54
+ task_port,
55
+ result_port,
56
+ cores_per_worker,
57
+ mem_per_worker,
58
+ max_workers,
59
+ prefetch_capacity,
60
+ uid,
61
+ block_id,
62
+ heartbeat_threshold,
63
+ heartbeat_period,
64
+ poll_period,
65
+ cpu_affinity,
66
+ available_accelerators: Sequence[str]):
68
67
  """
69
68
  Parameters
70
69
  ----------
@@ -73,7 +72,7 @@ class Manager:
73
72
 
74
73
  address_probe_timeout : int
75
74
  Timeout in seconds for the address probe to detect viable addresses
76
- to the interchange. Default : 30s
75
+ to the interchange.
77
76
 
78
77
  uid : str
79
78
  string unique identifier
@@ -83,43 +82,41 @@ class Manager:
83
82
 
84
83
  cores_per_worker : float
85
84
  cores to be assigned to each worker. Oversubscription is possible
86
- by setting cores_per_worker < 1.0. Default=1
85
+ by setting cores_per_worker < 1.0.
87
86
 
88
87
  mem_per_worker : float
89
88
  GB of memory required per worker. If this option is specified, the node manager
90
89
  will check the available memory at startup and limit the number of workers such that
91
90
  the there's sufficient memory for each worker. If set to None, memory on node is not
92
91
  considered in the determination of workers to be launched on node by the manager.
93
- Default: None
94
92
 
95
93
  max_workers : int
96
94
  caps the maximum number of workers that can be launched.
97
- default: infinity
98
95
 
99
96
  prefetch_capacity : int
100
97
  Number of tasks that could be prefetched over available worker capacity.
101
98
  When there are a few tasks (<100) or when tasks are long running, this option should
102
- be set to 0 for better load balancing. Default is 0.
99
+ be set to 0 for better load balancing.
103
100
 
104
101
  heartbeat_threshold : int
105
102
  Seconds since the last message from the interchange after which the
106
- interchange is assumed to be un-available, and the manager initiates shutdown. Default:120s
103
+ interchange is assumed to be un-available, and the manager initiates shutdown.
107
104
 
108
105
  Number of seconds since the last message from the interchange after which the worker
109
- assumes that the interchange is lost and the manager shuts down. Default:120
106
+ assumes that the interchange is lost and the manager shuts down.
110
107
 
111
108
  heartbeat_period : int
112
109
  Number of seconds after which a heartbeat message is sent to the interchange, and workers
113
110
  are checked for liveness.
114
111
 
115
112
  poll_period : int
116
- Timeout period used by the manager in milliseconds. Default: 10ms
113
+ Timeout period used by the manager in milliseconds.
117
114
 
118
115
  cpu_affinity : str
119
116
  Whether or how each worker should force its affinity to different CPUs
120
117
 
121
118
  available_accelerators: list of str
122
- List of accelerators available to the workers. Default: Empty list
119
+ List of accelerators available to the workers.
123
120
 
124
121
  """
125
122
 
@@ -160,7 +157,7 @@ class Manager:
160
157
  if os.environ.get('PARSL_CORES'):
161
158
  cores_on_node = int(os.environ['PARSL_CORES'])
162
159
  else:
163
- cores_on_node = multiprocessing.cpu_count()
160
+ cores_on_node = SpawnContext.cpu_count()
164
161
 
165
162
  if os.environ.get('PARSL_MEMORY_GB'):
166
163
  available_mem_on_node = float(os.environ['PARSL_MEMORY_GB'])
@@ -175,13 +172,16 @@ class Manager:
175
172
  if mem_per_worker and mem_per_worker > 0:
176
173
  mem_slots = math.floor(available_mem_on_node / mem_per_worker)
177
174
 
178
- self.worker_count = min(max_workers,
179
- mem_slots,
180
- math.floor(cores_on_node / cores_per_worker))
175
+ self.worker_count: int = min(max_workers,
176
+ mem_slots,
177
+ math.floor(cores_on_node / cores_per_worker))
181
178
 
182
- self.pending_task_queue = mpQueue()
183
- self.pending_result_queue = mpQueue()
184
- self.ready_worker_queue = mpQueue()
179
+ self._mp_manager = SpawnContext.Manager() # Starts a server process
180
+
181
+ self.monitoring_queue = self._mp_manager.Queue()
182
+ self.pending_task_queue = SpawnContext.Queue()
183
+ self.pending_result_queue = SpawnContext.Queue()
184
+ self.ready_worker_count = SpawnContext.Value("i", 0)
185
185
 
186
186
  self.max_queue_size = self.prefetch_capacity + self.worker_count
187
187
 
@@ -252,10 +252,13 @@ class Manager:
252
252
  poll_timer = self.poll_period
253
253
 
254
254
  while not kill_event.is_set():
255
- ready_worker_count = self.ready_worker_queue.qsize()
256
- pending_task_count = self.pending_task_queue.qsize()
255
+ try:
256
+ pending_task_count = self.pending_task_queue.qsize()
257
+ except NotImplementedError:
258
+ # Ref: https://github.com/python/cpython/blob/6d5e0dc0e330f4009e8dc3d1642e46b129788877/Lib/multiprocessing/queues.py#L125
259
+ pending_task_count = f"pending task count is not available on {platform.system()}"
257
260
 
258
- logger.debug("ready workers: {}, pending tasks: {}".format(ready_worker_count,
261
+ logger.debug("ready workers: {}, pending tasks: {}".format(self.ready_worker_count.value,
259
262
  pending_task_count))
260
263
 
261
264
  if time.time() > last_beat + self.heartbeat_period:
@@ -381,6 +384,36 @@ class Manager:
381
384
 
382
385
  logger.critical("Exiting")
383
386
 
387
+ @wrap_with_logs
388
+ def handle_monitoring_messages(self, kill_event: threading.Event):
389
+ """Transfer messages from the managed monitoring queue to the result queue.
390
+
391
+ We separate the queues so that the result queue does not rely on a manager
392
+ process, which adds overhead that causes slower queue operations but enables
393
+ use across processes started in fork and spawn contexts.
394
+
395
+ We transfer the messages to the result queue to reuse the ZMQ connection between
396
+ the manager and the interchange.
397
+ """
398
+ logger.debug("Starting monitoring handler thread")
399
+
400
+ poll_period_s = max(10, self.poll_period) / 1000 # Must be at least 10 ms
401
+
402
+ while not kill_event.is_set():
403
+ try:
404
+ logger.debug("Starting monitor_queue.get()")
405
+ msg = self.monitoring_queue.get(block=True, timeout=poll_period_s)
406
+ except queue.Empty:
407
+ logger.debug("monitoring_queue.get() has timed out")
408
+ except Exception as e:
409
+ logger.exception(f"Got an exception: {e}")
410
+ else:
411
+ logger.debug("Got a monitoring message")
412
+ self.pending_result_queue.put(msg)
413
+ logger.debug("Put monitoring message on pending_result_queue")
414
+
415
+ logger.critical("Exiting")
416
+
384
417
  def start(self):
385
418
  """ Start the worker processes.
386
419
 
@@ -388,7 +421,7 @@ class Manager:
388
421
  """
389
422
  start = time.time()
390
423
  self._kill_event = threading.Event()
391
- self._tasks_in_progress = multiprocessing.Manager().dict()
424
+ self._tasks_in_progress = self._mp_manager.dict()
392
425
 
393
426
  self.procs = {}
394
427
  for worker_id in range(self.worker_count):
@@ -406,9 +439,14 @@ class Manager:
406
439
  self._worker_watchdog_thread = threading.Thread(target=self.worker_watchdog,
407
440
  args=(self._kill_event,),
408
441
  name="worker-watchdog")
442
+ self._monitoring_handler_thread = threading.Thread(target=self.handle_monitoring_messages,
443
+ args=(self._kill_event,),
444
+ name="Monitoring-Handler")
445
+
409
446
  self._task_puller_thread.start()
410
447
  self._result_pusher_thread.start()
411
448
  self._worker_watchdog_thread.start()
449
+ self._monitoring_handler_thread.start()
412
450
 
413
451
  logger.info("Loop start")
414
452
 
@@ -420,6 +458,7 @@ class Manager:
420
458
  self._task_puller_thread.join()
421
459
  self._result_pusher_thread.join()
422
460
  self._worker_watchdog_thread.join()
461
+ self._monitoring_handler_thread.join()
423
462
  for proc_id in self.procs:
424
463
  self.procs[proc_id].terminate()
425
464
  logger.critical("Terminating worker {}: is_alive()={}".format(self.procs[proc_id],
@@ -435,7 +474,7 @@ class Manager:
435
474
  return
436
475
 
437
476
  def _start_worker(self, worker_id: int):
438
- p = mpProcess(
477
+ p = SpawnContext.Process(
439
478
  target=worker,
440
479
  args=(
441
480
  worker_id,
@@ -443,10 +482,16 @@ class Manager:
443
482
  self.worker_count,
444
483
  self.pending_task_queue,
445
484
  self.pending_result_queue,
446
- self.ready_worker_queue,
485
+ self.monitoring_queue,
486
+ self.ready_worker_count,
447
487
  self._tasks_in_progress,
448
488
  self.cpu_affinity,
449
489
  self.available_accelerators[worker_id] if self.accelerators_available else None,
490
+ self.block_id,
491
+ self.heartbeat_period,
492
+ os.getpid(),
493
+ args.logdir,
494
+ args.debug,
450
495
  ),
451
496
  name="HTEX-Worker-{}".format(worker_id),
452
497
  )
@@ -484,7 +529,23 @@ def execute_task(bufs):
484
529
 
485
530
 
486
531
  @wrap_with_logs(target="worker_log")
487
- def worker(worker_id, pool_id, pool_size, task_queue, result_queue, worker_queue, tasks_in_progress, cpu_affinity, accelerator: Optional[str]):
532
+ def worker(
533
+ worker_id: int,
534
+ pool_id: str,
535
+ pool_size: int,
536
+ task_queue: multiprocessing.Queue,
537
+ result_queue: multiprocessing.Queue,
538
+ monitoring_queue: queue.Queue,
539
+ ready_worker_count: Synchronized,
540
+ tasks_in_progress: DictProxy,
541
+ cpu_affinity: str,
542
+ accelerator: Optional[str],
543
+ block_id: str,
544
+ task_queue_timeout: int,
545
+ manager_pid: int,
546
+ logdir: str,
547
+ debug: bool,
548
+ ):
488
549
  """
489
550
 
490
551
  Put request token into queue
@@ -496,23 +557,22 @@ def worker(worker_id, pool_id, pool_size, task_queue, result_queue, worker_queue
496
557
  # override the global logger inherited from the __main__ process (which
497
558
  # usually logs to manager.log) with one specific to this worker.
498
559
  global logger
499
- logger = start_file_logger('{}/block-{}/{}/worker_{}.log'.format(args.logdir, args.block_id, pool_id, worker_id),
560
+ logger = start_file_logger('{}/block-{}/{}/worker_{}.log'.format(logdir, block_id, pool_id, worker_id),
500
561
  worker_id,
501
562
  name="worker_log",
502
- level=logging.DEBUG if args.debug else logging.INFO)
563
+ level=logging.DEBUG if debug else logging.INFO)
503
564
 
504
565
  # Store worker ID as an environment variable
505
566
  os.environ['PARSL_WORKER_RANK'] = str(worker_id)
506
567
  os.environ['PARSL_WORKER_COUNT'] = str(pool_size)
507
568
  os.environ['PARSL_WORKER_POOL_ID'] = str(pool_id)
508
- os.environ['PARSL_WORKER_BLOCK_ID'] = str(args.block_id)
569
+ os.environ['PARSL_WORKER_BLOCK_ID'] = str(block_id)
509
570
 
510
- # share the result queue with monitoring code so it too can send results down that channel
511
571
  import parsl.executors.high_throughput.monitoring_info as mi
512
- mi.result_queue = result_queue
572
+ mi.result_queue = monitoring_queue
513
573
 
514
574
  logger.info('Worker {} started'.format(worker_id))
515
- if args.debug:
575
+ if debug:
516
576
  logger.debug("Debug logging enabled")
517
577
 
518
578
  # If desired, set process affinity
@@ -553,20 +613,37 @@ def worker(worker_id, pool_id, pool_size, task_queue, result_queue, worker_queue
553
613
 
554
614
  logger.info(f'Pinned worker to accelerator: {accelerator}')
555
615
 
556
- while True:
557
- worker_queue.put(worker_id)
616
+ def manager_is_alive():
617
+ try:
618
+ # This does not kill the process, but instead raises
619
+ # an exception if the process doesn't exist
620
+ os.kill(manager_pid, 0)
621
+ except OSError:
622
+ logger.critical(f"Manager ({manager_pid}) died; worker {worker_id} shutting down")
623
+ return False
624
+ else:
625
+ return True
626
+
627
+ worker_enqueued = False
628
+ while manager_is_alive():
629
+ if not worker_enqueued:
630
+ with ready_worker_count.get_lock():
631
+ ready_worker_count.value += 1
632
+ worker_enqueued = True
633
+
634
+ try:
635
+ # The worker will receive {'task_id':<tid>, 'buffer':<buf>}
636
+ req = task_queue.get(timeout=task_queue_timeout)
637
+ except queue.Empty:
638
+ continue
558
639
 
559
- # The worker will receive {'task_id':<tid>, 'buffer':<buf>}
560
- req = task_queue.get()
561
640
  tasks_in_progress[worker_id] = req
562
641
  tid = req['task_id']
563
642
  logger.info("Received executor task {}".format(tid))
564
643
 
565
- try:
566
- worker_queue.get()
567
- except queue.Empty:
568
- logger.warning("Worker ID: {} failed to remove itself from ready_worker_queue".format(worker_id))
569
- pass
644
+ with ready_worker_count.get_lock():
645
+ ready_worker_count.value -= 1
646
+ worker_enqueued = False
570
647
 
571
648
  try:
572
649
  result = execute_task(req['buffer'])
@@ -653,6 +730,7 @@ if __name__ == "__main__":
653
730
  parser.add_argument("-r", "--result_port", required=True,
654
731
  help="REQUIRED: Result port for posting results to the interchange")
655
732
  parser.add_argument("--cpu-affinity", type=str, choices=["none", "block", "alternating", "block-reverse"],
733
+ required=True,
656
734
  help="Whether/how workers should control CPU affinity.")
657
735
  parser.add_argument("--available-accelerators", type=str, nargs="*",
658
736
  help="Names of available accelerators")
@@ -46,7 +46,6 @@ class CommandClient:
46
46
  """ This function needs to be fast at the same time aware of the possibility of
47
47
  ZMQ pipes overflowing.
48
48
 
49
- The timeout increases slowly if contention is detected on ZMQ pipes.
50
49
  We could set copy=False and get slightly better latency but this results
51
50
  in ZMQ sockets reaching a broken state once there are ~10k tasks in flight.
52
51
  This issue can be magnified if each the serialized buffer itself is larger.
@@ -55,8 +54,11 @@ class CommandClient:
55
54
  with self._lock:
56
55
  for _ in range(max_retries):
57
56
  try:
57
+ logger.debug("Sending command client command")
58
58
  self.zmq_socket.send_pyobj(message, copy=True)
59
+ logger.debug("Waiting for command client response")
59
60
  reply = self.zmq_socket.recv_pyobj()
61
+ logger.debug("Received command client response")
60
62
  except zmq.ZMQError:
61
63
  logger.exception("Potential ZMQ REQ-REP deadlock caught")
62
64
  logger.info("Trying to reestablish context")
@@ -115,7 +117,9 @@ class TasksOutgoing:
115
117
  socks = dict(self.poller.poll(timeout=timeout_ms))
116
118
  if self.zmq_socket in socks and socks[self.zmq_socket] == zmq.POLLOUT:
117
119
  # The copy option adds latency but reduces the risk of ZMQ overflow
120
+ logger.debug("Sending TasksOutgoing message")
118
121
  self.zmq_socket.send_pyobj(message, copy=True)
122
+ logger.debug("Sent TasksOutgoing message")
119
123
  return
120
124
  else:
121
125
  timeout_ms *= 2
@@ -149,7 +153,10 @@ class ResultsIncoming:
149
153
  max_port=port_range[1])
150
154
 
151
155
  def get(self):
152
- return self.results_receiver.recv_multipart()
156
+ logger.debug("Waiting for ResultsIncoming message")
157
+ m = self.results_receiver.recv_multipart()
158
+ logger.debug("Received ResultsIncoming message")
159
+ return m
153
160
 
154
161
  def close(self):
155
162
  self.results_receiver.close()
parsl/multiprocessing.py CHANGED
@@ -1,4 +1,4 @@
1
- """Helpers for cross-plaform multiprocessing support.
1
+ """Helpers for cross-platform multiprocessing support.
2
2
  """
3
3
 
4
4
  import logging
@@ -10,9 +10,12 @@ from typing import Callable, Type
10
10
 
11
11
  logger = logging.getLogger(__name__)
12
12
 
13
+ ForkContext = multiprocessing.get_context("fork")
14
+ SpawnContext = multiprocessing.get_context("spawn")
15
+
13
16
  # maybe ForkProcess should be: Callable[..., Process] so as to make
14
17
  # it clear that it returns a Process always to the type checker?
15
- ForkProcess: Type = multiprocessing.get_context('fork').Process
18
+ ForkProcess: Type = ForkContext.Process
16
19
 
17
20
 
18
21
  class MacSafeQueue(multiprocessing.queues.Queue):
@@ -28,8 +28,9 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
28
28
  Ratio of provisioned task slots to active tasks. A parallelism value of 1 represents aggressive
29
29
  scaling where as many resources as possible are used; parallelism close to 0 represents
30
30
  the opposite situation in which as few resources as possible (i.e., min_blocks) are used.
31
- move_files : Optional[Bool]: should files be moved? by default, Parsl will try to figure
32
- this out itself (= None). If True, then will always move. If False, will never move.
31
+ move_files : Optional[Bool]
32
+ Should files be moved? By default, Parsl will try to figure this out itself (= None).
33
+ If True, then will always move. If False, will never move.
33
34
  worker_init : str
34
35
  Command to be run before starting a worker, such as 'module load Anaconda; source activate env'.
35
36
  """
@@ -47,6 +47,10 @@ class SlurmProvider(ClusterProvider, RepresentationMixin):
47
47
  account : str
48
48
  Slurm account to which to charge resources used by the job. If unspecified or ``None``, the job will use the
49
49
  user's default account.
50
+ qos : str
51
+ Slurm queue to place job in. If unspecified or ``None``, no queue slurm directive will be specified.
52
+ constraint : str
53
+ Slurm job constraint, often used to choose cpu or gpu type. If unspecified or ``None``, no constraint slurm directive will be added.
50
54
  channel : Channel
51
55
  Channel for accessing this provider. Possible channels include
52
56
  :class:`~parsl.channels.LocalChannel` (the default),
@@ -92,6 +96,8 @@ class SlurmProvider(ClusterProvider, RepresentationMixin):
92
96
  def __init__(self,
93
97
  partition: Optional[str] = None,
94
98
  account: Optional[str] = None,
99
+ qos: Optional[str] = None,
100
+ constraint: Optional[str] = None,
95
101
  channel: Channel = LocalChannel(),
96
102
  nodes_per_block: int = 1,
97
103
  cores_per_node: Optional[int] = None,
@@ -126,6 +132,8 @@ class SlurmProvider(ClusterProvider, RepresentationMixin):
126
132
  self.exclusive = exclusive
127
133
  self.move_files = move_files
128
134
  self.account = account
135
+ self.qos = qos
136
+ self.constraint = constraint
129
137
  self.scheduler_options = scheduler_options + '\n'
130
138
  if exclusive:
131
139
  self.scheduler_options += "#SBATCH --exclusive\n"
@@ -133,6 +141,11 @@ class SlurmProvider(ClusterProvider, RepresentationMixin):
133
141
  self.scheduler_options += "#SBATCH --partition={}\n".format(partition)
134
142
  if account:
135
143
  self.scheduler_options += "#SBATCH --account={}\n".format(account)
144
+ if qos:
145
+ self.scheduler_options += "#SBATCH --qos={}\n".format(qos)
146
+ if constraint:
147
+ self.scheduler_options += "#SBATCH --constraint={}\n".format(constraint)
148
+
136
149
  self.regex_job_id = regex_job_id
137
150
  self.worker_init = worker_init + '\n'
138
151
 
@@ -0,0 +1,52 @@
1
+ import os
2
+ import signal
3
+ import time
4
+
5
+ import pytest
6
+
7
+ import parsl
8
+ from parsl.app.app import python_app
9
+ from parsl.tests.configs.htex_local import fresh_config
10
+
11
+
12
+ @pytest.fixture(autouse=True, scope="function")
13
+ def load_config():
14
+ config = fresh_config()
15
+ config.executors[0].poll_period = 1
16
+ config.executors[0].max_workers = 1
17
+ config.executors[0].heartbeat_period = 1
18
+
19
+ parsl.load(config)
20
+ yield
21
+
22
+ parsl.dfk().cleanup()
23
+ parsl.clear()
24
+
25
+
26
+ @python_app
27
+ def get_worker_pid():
28
+ import os
29
+ return os.getpid()
30
+
31
+
32
+ @python_app
33
+ def kill_manager(sig: int):
34
+ import os
35
+ os.kill(os.getppid(), sig)
36
+
37
+
38
+ @pytest.mark.local
39
+ @pytest.mark.parametrize("sig", [signal.SIGTERM, signal.SIGKILL])
40
+ def test_htex_manager_failure_worker_shutdown(sig: int):
41
+ """Ensure that HTEX workers shut down when the Manager process dies."""
42
+ worker_pid = get_worker_pid().result()
43
+
44
+ kill_manager(sig)
45
+
46
+ with pytest.raises(OSError):
47
+ end = time.monotonic() + 5
48
+ while time.monotonic() < end:
49
+ # Raises an exception if the process
50
+ # does not exist
51
+ os.kill(worker_pid, 0)
52
+ time.sleep(.1)
@@ -33,4 +33,12 @@ def test_future_fail_dependency():
33
33
 
34
34
  plain_fut.set_exception(ValueError("Plain failure"))
35
35
 
36
- assert isinstance(parsl_fut.exception(), DependencyError)
36
+ ex = parsl_fut.exception()
37
+
38
+ # check that what we got is a dependency error...
39
+ assert isinstance(ex, DependencyError)
40
+
41
+ # and that the dependency error string mentions the dependency
42
+ # Future, plain_fut, somewhere in its str
43
+
44
+ assert repr(plain_fut) in str(ex)
@@ -21,6 +21,10 @@ def test_depfail_once():
21
21
  assert not isinstance(f1.exception(), DependencyError)
22
22
  assert isinstance(f2.exception(), DependencyError)
23
23
 
24
+ # check that the task ID of the failing task is mentioned
25
+ # in the DependencyError message
26
+ assert ("task " + str(f1.task_record['id'])) in str(f2.exception())
27
+
24
28
 
25
29
  def test_depfail_chain():
26
30
  """Test that dependency failures chain"""
parsl/version.py CHANGED
@@ -3,4 +3,4 @@
3
3
  Year.Month.Day[alpha/beta/..]
4
4
  Alphas will be numbered like this -> 2024.12.10a0
5
5
  """
6
- VERSION = '2023.12.04'
6
+ VERSION = '2023.12.18'
@@ -17,6 +17,8 @@ import math
17
17
  import json
18
18
  import psutil
19
19
  import multiprocessing
20
+ from multiprocessing.managers import DictProxy
21
+ from multiprocessing.sharedctypes import Synchronized
20
22
 
21
23
  from parsl.process_loggers import wrap_with_logs
22
24
 
@@ -24,10 +26,7 @@ from parsl.version import VERSION as PARSL_VERSION
24
26
  from parsl.app.errors import RemoteExceptionWrapper
25
27
  from parsl.executors.high_throughput.errors import WorkerLost
26
28
  from parsl.executors.high_throughput.probe import probe_addresses
27
- from parsl.multiprocessing import ForkProcess as mpProcess
28
-
29
- from parsl.multiprocessing import SizedQueue as mpQueue
30
-
29
+ from parsl.multiprocessing import SpawnContext
31
30
  from parsl.serialize import unpack_apply_message, serialize
32
31
 
33
32
  HEARTBEAT_CODE = (2 ** 32) - 1
@@ -49,22 +48,22 @@ class Manager:
49
48
  | | IPC-Qeueues
50
49
 
51
50
  """
52
- def __init__(self,
53
- addresses="127.0.0.1",
54
- address_probe_timeout=30,
55
- task_port="50097",
56
- result_port="50098",
57
- cores_per_worker=1,
58
- mem_per_worker=None,
59
- max_workers=float('inf'),
60
- prefetch_capacity=0,
61
- uid=None,
62
- block_id=None,
63
- heartbeat_threshold=120,
64
- heartbeat_period=30,
65
- poll_period=10,
66
- cpu_affinity=False,
67
- available_accelerators: Sequence[str] = ()):
51
+ def __init__(self, *,
52
+ addresses,
53
+ address_probe_timeout,
54
+ task_port,
55
+ result_port,
56
+ cores_per_worker,
57
+ mem_per_worker,
58
+ max_workers,
59
+ prefetch_capacity,
60
+ uid,
61
+ block_id,
62
+ heartbeat_threshold,
63
+ heartbeat_period,
64
+ poll_period,
65
+ cpu_affinity,
66
+ available_accelerators: Sequence[str]):
68
67
  """
69
68
  Parameters
70
69
  ----------
@@ -73,7 +72,7 @@ class Manager:
73
72
 
74
73
  address_probe_timeout : int
75
74
  Timeout in seconds for the address probe to detect viable addresses
76
- to the interchange. Default : 30s
75
+ to the interchange.
77
76
 
78
77
  uid : str
79
78
  string unique identifier
@@ -83,43 +82,41 @@ class Manager:
83
82
 
84
83
  cores_per_worker : float
85
84
  cores to be assigned to each worker. Oversubscription is possible
86
- by setting cores_per_worker < 1.0. Default=1
85
+ by setting cores_per_worker < 1.0.
87
86
 
88
87
  mem_per_worker : float
89
88
  GB of memory required per worker. If this option is specified, the node manager
90
89
  will check the available memory at startup and limit the number of workers such that
91
90
  the there's sufficient memory for each worker. If set to None, memory on node is not
92
91
  considered in the determination of workers to be launched on node by the manager.
93
- Default: None
94
92
 
95
93
  max_workers : int
96
94
  caps the maximum number of workers that can be launched.
97
- default: infinity
98
95
 
99
96
  prefetch_capacity : int
100
97
  Number of tasks that could be prefetched over available worker capacity.
101
98
  When there are a few tasks (<100) or when tasks are long running, this option should
102
- be set to 0 for better load balancing. Default is 0.
99
+ be set to 0 for better load balancing.
103
100
 
104
101
  heartbeat_threshold : int
105
102
  Seconds since the last message from the interchange after which the
106
- interchange is assumed to be un-available, and the manager initiates shutdown. Default:120s
103
+ interchange is assumed to be un-available, and the manager initiates shutdown.
107
104
 
108
105
  Number of seconds since the last message from the interchange after which the worker
109
- assumes that the interchange is lost and the manager shuts down. Default:120
106
+ assumes that the interchange is lost and the manager shuts down.
110
107
 
111
108
  heartbeat_period : int
112
109
  Number of seconds after which a heartbeat message is sent to the interchange, and workers
113
110
  are checked for liveness.
114
111
 
115
112
  poll_period : int
116
- Timeout period used by the manager in milliseconds. Default: 10ms
113
+ Timeout period used by the manager in milliseconds.
117
114
 
118
115
  cpu_affinity : str
119
116
  Whether or how each worker should force its affinity to different CPUs
120
117
 
121
118
  available_accelerators: list of str
122
- List of accelerators available to the workers. Default: Empty list
119
+ List of accelerators available to the workers.
123
120
 
124
121
  """
125
122
 
@@ -160,7 +157,7 @@ class Manager:
160
157
  if os.environ.get('PARSL_CORES'):
161
158
  cores_on_node = int(os.environ['PARSL_CORES'])
162
159
  else:
163
- cores_on_node = multiprocessing.cpu_count()
160
+ cores_on_node = SpawnContext.cpu_count()
164
161
 
165
162
  if os.environ.get('PARSL_MEMORY_GB'):
166
163
  available_mem_on_node = float(os.environ['PARSL_MEMORY_GB'])
@@ -175,13 +172,16 @@ class Manager:
175
172
  if mem_per_worker and mem_per_worker > 0:
176
173
  mem_slots = math.floor(available_mem_on_node / mem_per_worker)
177
174
 
178
- self.worker_count = min(max_workers,
179
- mem_slots,
180
- math.floor(cores_on_node / cores_per_worker))
175
+ self.worker_count: int = min(max_workers,
176
+ mem_slots,
177
+ math.floor(cores_on_node / cores_per_worker))
181
178
 
182
- self.pending_task_queue = mpQueue()
183
- self.pending_result_queue = mpQueue()
184
- self.ready_worker_queue = mpQueue()
179
+ self._mp_manager = SpawnContext.Manager() # Starts a server process
180
+
181
+ self.monitoring_queue = self._mp_manager.Queue()
182
+ self.pending_task_queue = SpawnContext.Queue()
183
+ self.pending_result_queue = SpawnContext.Queue()
184
+ self.ready_worker_count = SpawnContext.Value("i", 0)
185
185
 
186
186
  self.max_queue_size = self.prefetch_capacity + self.worker_count
187
187
 
@@ -252,10 +252,13 @@ class Manager:
252
252
  poll_timer = self.poll_period
253
253
 
254
254
  while not kill_event.is_set():
255
- ready_worker_count = self.ready_worker_queue.qsize()
256
- pending_task_count = self.pending_task_queue.qsize()
255
+ try:
256
+ pending_task_count = self.pending_task_queue.qsize()
257
+ except NotImplementedError:
258
+ # Ref: https://github.com/python/cpython/blob/6d5e0dc0e330f4009e8dc3d1642e46b129788877/Lib/multiprocessing/queues.py#L125
259
+ pending_task_count = f"pending task count is not available on {platform.system()}"
257
260
 
258
- logger.debug("ready workers: {}, pending tasks: {}".format(ready_worker_count,
261
+ logger.debug("ready workers: {}, pending tasks: {}".format(self.ready_worker_count.value,
259
262
  pending_task_count))
260
263
 
261
264
  if time.time() > last_beat + self.heartbeat_period:
@@ -381,6 +384,36 @@ class Manager:
381
384
 
382
385
  logger.critical("Exiting")
383
386
 
387
+ @wrap_with_logs
388
+ def handle_monitoring_messages(self, kill_event: threading.Event):
389
+ """Transfer messages from the managed monitoring queue to the result queue.
390
+
391
+ We separate the queues so that the result queue does not rely on a manager
392
+ process, which adds overhead that causes slower queue operations but enables
393
+ use across processes started in fork and spawn contexts.
394
+
395
+ We transfer the messages to the result queue to reuse the ZMQ connection between
396
+ the manager and the interchange.
397
+ """
398
+ logger.debug("Starting monitoring handler thread")
399
+
400
+ poll_period_s = max(10, self.poll_period) / 1000 # Must be at least 10 ms
401
+
402
+ while not kill_event.is_set():
403
+ try:
404
+ logger.debug("Starting monitor_queue.get()")
405
+ msg = self.monitoring_queue.get(block=True, timeout=poll_period_s)
406
+ except queue.Empty:
407
+ logger.debug("monitoring_queue.get() has timed out")
408
+ except Exception as e:
409
+ logger.exception(f"Got an exception: {e}")
410
+ else:
411
+ logger.debug("Got a monitoring message")
412
+ self.pending_result_queue.put(msg)
413
+ logger.debug("Put monitoring message on pending_result_queue")
414
+
415
+ logger.critical("Exiting")
416
+
384
417
  def start(self):
385
418
  """ Start the worker processes.
386
419
 
@@ -388,7 +421,7 @@ class Manager:
388
421
  """
389
422
  start = time.time()
390
423
  self._kill_event = threading.Event()
391
- self._tasks_in_progress = multiprocessing.Manager().dict()
424
+ self._tasks_in_progress = self._mp_manager.dict()
392
425
 
393
426
  self.procs = {}
394
427
  for worker_id in range(self.worker_count):
@@ -406,9 +439,14 @@ class Manager:
406
439
  self._worker_watchdog_thread = threading.Thread(target=self.worker_watchdog,
407
440
  args=(self._kill_event,),
408
441
  name="worker-watchdog")
442
+ self._monitoring_handler_thread = threading.Thread(target=self.handle_monitoring_messages,
443
+ args=(self._kill_event,),
444
+ name="Monitoring-Handler")
445
+
409
446
  self._task_puller_thread.start()
410
447
  self._result_pusher_thread.start()
411
448
  self._worker_watchdog_thread.start()
449
+ self._monitoring_handler_thread.start()
412
450
 
413
451
  logger.info("Loop start")
414
452
 
@@ -420,6 +458,7 @@ class Manager:
420
458
  self._task_puller_thread.join()
421
459
  self._result_pusher_thread.join()
422
460
  self._worker_watchdog_thread.join()
461
+ self._monitoring_handler_thread.join()
423
462
  for proc_id in self.procs:
424
463
  self.procs[proc_id].terminate()
425
464
  logger.critical("Terminating worker {}: is_alive()={}".format(self.procs[proc_id],
@@ -435,7 +474,7 @@ class Manager:
435
474
  return
436
475
 
437
476
  def _start_worker(self, worker_id: int):
438
- p = mpProcess(
477
+ p = SpawnContext.Process(
439
478
  target=worker,
440
479
  args=(
441
480
  worker_id,
@@ -443,10 +482,16 @@ class Manager:
443
482
  self.worker_count,
444
483
  self.pending_task_queue,
445
484
  self.pending_result_queue,
446
- self.ready_worker_queue,
485
+ self.monitoring_queue,
486
+ self.ready_worker_count,
447
487
  self._tasks_in_progress,
448
488
  self.cpu_affinity,
449
489
  self.available_accelerators[worker_id] if self.accelerators_available else None,
490
+ self.block_id,
491
+ self.heartbeat_period,
492
+ os.getpid(),
493
+ args.logdir,
494
+ args.debug,
450
495
  ),
451
496
  name="HTEX-Worker-{}".format(worker_id),
452
497
  )
@@ -484,7 +529,23 @@ def execute_task(bufs):
484
529
 
485
530
 
486
531
  @wrap_with_logs(target="worker_log")
487
- def worker(worker_id, pool_id, pool_size, task_queue, result_queue, worker_queue, tasks_in_progress, cpu_affinity, accelerator: Optional[str]):
532
+ def worker(
533
+ worker_id: int,
534
+ pool_id: str,
535
+ pool_size: int,
536
+ task_queue: multiprocessing.Queue,
537
+ result_queue: multiprocessing.Queue,
538
+ monitoring_queue: queue.Queue,
539
+ ready_worker_count: Synchronized,
540
+ tasks_in_progress: DictProxy,
541
+ cpu_affinity: str,
542
+ accelerator: Optional[str],
543
+ block_id: str,
544
+ task_queue_timeout: int,
545
+ manager_pid: int,
546
+ logdir: str,
547
+ debug: bool,
548
+ ):
488
549
  """
489
550
 
490
551
  Put request token into queue
@@ -496,23 +557,22 @@ def worker(worker_id, pool_id, pool_size, task_queue, result_queue, worker_queue
496
557
  # override the global logger inherited from the __main__ process (which
497
558
  # usually logs to manager.log) with one specific to this worker.
498
559
  global logger
499
- logger = start_file_logger('{}/block-{}/{}/worker_{}.log'.format(args.logdir, args.block_id, pool_id, worker_id),
560
+ logger = start_file_logger('{}/block-{}/{}/worker_{}.log'.format(logdir, block_id, pool_id, worker_id),
500
561
  worker_id,
501
562
  name="worker_log",
502
- level=logging.DEBUG if args.debug else logging.INFO)
563
+ level=logging.DEBUG if debug else logging.INFO)
503
564
 
504
565
  # Store worker ID as an environment variable
505
566
  os.environ['PARSL_WORKER_RANK'] = str(worker_id)
506
567
  os.environ['PARSL_WORKER_COUNT'] = str(pool_size)
507
568
  os.environ['PARSL_WORKER_POOL_ID'] = str(pool_id)
508
- os.environ['PARSL_WORKER_BLOCK_ID'] = str(args.block_id)
569
+ os.environ['PARSL_WORKER_BLOCK_ID'] = str(block_id)
509
570
 
510
- # share the result queue with monitoring code so it too can send results down that channel
511
571
  import parsl.executors.high_throughput.monitoring_info as mi
512
- mi.result_queue = result_queue
572
+ mi.result_queue = monitoring_queue
513
573
 
514
574
  logger.info('Worker {} started'.format(worker_id))
515
- if args.debug:
575
+ if debug:
516
576
  logger.debug("Debug logging enabled")
517
577
 
518
578
  # If desired, set process affinity
@@ -553,20 +613,37 @@ def worker(worker_id, pool_id, pool_size, task_queue, result_queue, worker_queue
553
613
 
554
614
  logger.info(f'Pinned worker to accelerator: {accelerator}')
555
615
 
556
- while True:
557
- worker_queue.put(worker_id)
616
+ def manager_is_alive():
617
+ try:
618
+ # This does not kill the process, but instead raises
619
+ # an exception if the process doesn't exist
620
+ os.kill(manager_pid, 0)
621
+ except OSError:
622
+ logger.critical(f"Manager ({manager_pid}) died; worker {worker_id} shutting down")
623
+ return False
624
+ else:
625
+ return True
626
+
627
+ worker_enqueued = False
628
+ while manager_is_alive():
629
+ if not worker_enqueued:
630
+ with ready_worker_count.get_lock():
631
+ ready_worker_count.value += 1
632
+ worker_enqueued = True
633
+
634
+ try:
635
+ # The worker will receive {'task_id':<tid>, 'buffer':<buf>}
636
+ req = task_queue.get(timeout=task_queue_timeout)
637
+ except queue.Empty:
638
+ continue
558
639
 
559
- # The worker will receive {'task_id':<tid>, 'buffer':<buf>}
560
- req = task_queue.get()
561
640
  tasks_in_progress[worker_id] = req
562
641
  tid = req['task_id']
563
642
  logger.info("Received executor task {}".format(tid))
564
643
 
565
- try:
566
- worker_queue.get()
567
- except queue.Empty:
568
- logger.warning("Worker ID: {} failed to remove itself from ready_worker_queue".format(worker_id))
569
- pass
644
+ with ready_worker_count.get_lock():
645
+ ready_worker_count.value -= 1
646
+ worker_enqueued = False
570
647
 
571
648
  try:
572
649
  result = execute_task(req['buffer'])
@@ -653,6 +730,7 @@ if __name__ == "__main__":
653
730
  parser.add_argument("-r", "--result_port", required=True,
654
731
  help="REQUIRED: Result port for posting results to the interchange")
655
732
  parser.add_argument("--cpu-affinity", type=str, choices=["none", "block", "alternating", "block-reverse"],
733
+ required=True,
656
734
  help="Whether/how workers should control CPU affinity.")
657
735
  parser.add_argument("--available-accelerators", type=str, nargs="*",
658
736
  help="Names of available accelerators")
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: parsl
3
- Version: 2023.12.4
3
+ Version: 2023.12.18
4
4
  Summary: Simple data dependent workflows in Python
5
5
  Home-page: https://github.com/Parsl/parsl
6
- Download-URL: https://github.com/Parsl/parsl/archive/2023.12.04.tar.gz
6
+ Download-URL: https://github.com/Parsl/parsl/archive/2023.12.18.tar.gz
7
7
  Author: The Parsl Team
8
8
  Author-email: parsl@googlegroups.com
9
9
  License: Apache 2.0
@@ -3,11 +3,11 @@ parsl/addresses.py,sha256=L4RjQ-jGY9RfT-hBpsGw1uCzWaIdrEKxcPWV-TkGJes,4767
3
3
  parsl/config.py,sha256=ysUWBfm9bygayHHdItaJbP4oozkHJJmVQVnWCt5igjE,6808
4
4
  parsl/errors.py,sha256=SzINzQFZDBDbj9l-DPQznD0TbGkNhHIRAPkcBCogf_A,1019
5
5
  parsl/log_utils.py,sha256=AGem-dhQs5TYUyJg6GKkRuHxAw8FHhYlWB_0s7_ROw4,3175
6
- parsl/multiprocessing.py,sha256=e6WKPkqjC8TK0BesuVKUQPtipUtP0a-jM0iOzY_bagw,1904
6
+ parsl/multiprocessing.py,sha256=w3t1pFkHo4oZpznc2KF6Ff-Jj8MvXqvjm-hoiRqZDDQ,1984
7
7
  parsl/process_loggers.py,sha256=1G3Rfrh5wuZNo2X03grG4kTYPGOxz7hHCyG6L_A3b0A,1137
8
8
  parsl/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  parsl/utils.py,sha256=_flbNpTu6IXHbzIyE5JkUbOBIK4poc1R1bjBtwJUVdo,11622
10
- parsl/version.py,sha256=f6Ei3SwZ3QtwHwPBUkqRduc-oCtfvLSKvo0s-j5PC3g,131
10
+ parsl/version.py,sha256=TdZ44_l5ykDC_eoJiyMUVahV--8rnh2rNzC-MtW2rL4,131
11
11
  parsl/app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  parsl/app/app.py,sha256=wAHchJetgnicT1pn0NJKDeDX0lV3vDFlG8cQd_Ciax4,8522
13
13
  parsl/app/bash.py,sha256=bx9x1XFwkOTpZZD3CPwnVL9SyNRDjbUGtOnuGLvxN_8,5396
@@ -62,8 +62,8 @@ parsl/data_provider/http.py,sha256=nDHTW7XmJqAukWJjPRQjyhUXt8r6GsQ36mX9mv_wOig,2
62
62
  parsl/data_provider/rsync.py,sha256=2-ZxqrT-hBj39x082NusJaBqsGW4Jd2qCW6JkVPpEl0,4254
63
63
  parsl/data_provider/staging.py,sha256=l-mAXFburs3BWPjkSmiQKuAgJpsxCG62yATPDbrafYI,4523
64
64
  parsl/dataflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
- parsl/dataflow/dflow.py,sha256=875obETSEog7WyovHDxR9k07zr_nifhbn67H5zmHxf8,63658
66
- parsl/dataflow/errors.py,sha256=JfjoqqqSg1WvTash5uwUuJt1ZDvxw-aAMenO-N3zoas,2084
65
+ parsl/dataflow/dflow.py,sha256=uuXY9pURFDBpL0w52J0DWGCOtorTQ5wFy5V0WwHS9L8,63909
66
+ parsl/dataflow/errors.py,sha256=w2vOt_ymzG2dOqJUO4IDcmTlrCIHlMZL8nBVyVq0O_8,2176
67
67
  parsl/dataflow/futures.py,sha256=aVfEUTzp4-EdunDAtNcqVQf8l_A7ArDi2c82KZMwxfY,5256
68
68
  parsl/dataflow/memoization.py,sha256=AsJO6c6cRp2ac6H8uGn2USlEi78_nX3QWvpxYt4XdYE,9583
69
69
  parsl/dataflow/rundirs.py,sha256=XKmBZpBEIsGACBhYOkbbs2e5edC0pQegJcSlk4FWeag,1154
@@ -85,8 +85,8 @@ parsl/executors/high_throughput/interchange.py,sha256=tX_EvQf7WkSKMJG-TNmA-WADjh
85
85
  parsl/executors/high_throughput/manager_record.py,sha256=T8-JVMfDJU6SJfzJRooD0mO8AHGMXlcn3PBOM0m_vng,366
86
86
  parsl/executors/high_throughput/monitoring_info.py,sha256=3gQpwQjjNDEBz0cQqJZB6hRiwLiWwXs83zkQDmbOwxY,297
87
87
  parsl/executors/high_throughput/probe.py,sha256=lvnuf-vBv57tHvFh-J51F9sDYBES7jCgs6KYgWvmKRs,2749
88
- parsl/executors/high_throughput/process_worker_pool.py,sha256=KaGaIE0RfJmJPld26CFivD58DYmCGSYIJk5g9uZKYYQ,31185
89
- parsl/executors/high_throughput/zmq_pipes.py,sha256=3-UrPu4DlXYb6JufjBcENspLd31Qk5URDaZP6IyC6SM,5720
88
+ parsl/executors/high_throughput/process_worker_pool.py,sha256=l0l5F3mpJ60idMCN-d1AbdaogmOtO5eO3uGWogspNXg,34070
89
+ parsl/executors/high_throughput/zmq_pipes.py,sha256=88VJz9QejOCQ_yyhaO5C1uQuDYZTovYEcnKn15WxHSU,6103
90
90
  parsl/executors/radical/__init__.py,sha256=CKbtV2numw5QvgIBq1htMUrt9TqDCIC2zifyf2svTNU,186
91
91
  parsl/executors/radical/executor.py,sha256=ZYycq58jXlBlhmIO1355JCK1xIJHkspiy62NN1XiMYQ,20729
92
92
  parsl/executors/radical/rpex_master.py,sha256=nMGxYWw3r-8_vZVnEwfB5eCfdTqXkeQDP5yvU0jXgc8,1368
@@ -174,7 +174,7 @@ parsl/providers/kubernetes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
174
174
  parsl/providers/kubernetes/kube.py,sha256=uOr-sPgp73r1JFNc6wYhGhNGCvqkI8xBZznuJvfIfyk,12819
175
175
  parsl/providers/kubernetes/template.py,sha256=VsRz6cmNaII-y4OdMT6sCwzQy95SJX6NMB0hmmFBhX4,50
176
176
  parsl/providers/local/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
177
- parsl/providers/local/local.py,sha256=eH6l03i3e-To4A0YFgWsE-xXS3EKeDyQynvWUGJE1kQ,11362
177
+ parsl/providers/local/local.py,sha256=4X6Ds7PUuwFgkc6ZuzkEyDWTzjchb9eNzAgrwYfUKAs,11369
178
178
  parsl/providers/lsf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
179
179
  parsl/providers/lsf/lsf.py,sha256=AECVpjl_CTreE-APFQSjMVVIb3HheG6zculJn-zYtdM,11502
180
180
  parsl/providers/lsf/template.py,sha256=leQ_TpXv7ePMzbHfLaWvqMR0VORxlp-hjX5JxtkcwwU,269
@@ -182,7 +182,7 @@ parsl/providers/pbspro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
182
182
  parsl/providers/pbspro/pbspro.py,sha256=zXsb45LhgCkLEwwKXjdjsm2jv884j1fXHJ2hky9auD0,7789
183
183
  parsl/providers/pbspro/template.py,sha256=ozMbrx0HNsLnSoWbkZhy-55yJoTX5gpdRrDuVn6TFWA,369
184
184
  parsl/providers/slurm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
185
- parsl/providers/slurm/slurm.py,sha256=uQQryft2_ynjQkOsoaMHOqZdu3K1fTsL6LcfVJ6R-6o,11694
185
+ parsl/providers/slurm/slurm.py,sha256=qHTNI5crS90PzdcoDu_lzDrGrYNss8yY_1XuWU5S2bc,12330
186
186
  parsl/providers/slurm/template.py,sha256=cc-3l5wnThEWfqzpniPgi3FP6934Ni05UZ9r0A1RA8s,369
187
187
  parsl/providers/torque/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
188
188
  parsl/providers/torque/template.py,sha256=4qfc2gmlEhRCAD7erFDOs4prJQ43I8s4E8DSUSVQx3A,358
@@ -326,6 +326,7 @@ parsl/tests/test_docs/test_workflow4.py,sha256=PfOVDx5v_NtwDvg-ccC3A3SVM-SF0Pcyb
326
326
  parsl/tests/test_error_handling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
327
327
  parsl/tests/test_error_handling/test_fail.py,sha256=xx4TGWfL7le4cQ9nvnUkrlmKQJkskhD0l_3W1xwZSEI,282
328
328
  parsl/tests/test_error_handling/test_htex_basic.py,sha256=VRP_-Ro2SYp8TqfjpG_zCBJOZWuVFFCr3E0WKN_blg8,455
329
+ parsl/tests/test_error_handling/test_htex_manager_failure.py,sha256=5YsCS1z7wOfUcFCD7uzR7t_rD3x5toZnoaCKVrHaMb0,1152
329
330
  parsl/tests/test_error_handling/test_htex_missing_worker.py,sha256=Tux0Xla719eup7RdWj8LmxNH-CTscMN0NM4CPuPP1ng,967
330
331
  parsl/tests/test_error_handling/test_htex_worker_failure.py,sha256=KO3ZegC8C6tY62XI1-uiS4w4gEYoRZZOEUnALEdBU8c,594
331
332
  parsl/tests/test_error_handling/test_python_walltime.py,sha256=rdmGZHIkuann2Njt3i62odKJ0FaODGr7-L96rOXNVYg,950
@@ -350,9 +351,9 @@ parsl/tests/test_providers/test_local_provider.py,sha256=G6Fuko22SvAtD7xhfQv8k_8
350
351
  parsl/tests/test_python_apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
351
352
  parsl/tests/test_python_apps/test_arg_input_types.py,sha256=JXpfHiu8lr9BN6u1OzqFvGwBhxzsGTPMewHx6Wdo-HI,670
352
353
  parsl/tests/test_python_apps/test_basic.py,sha256=lFqh4ugePbp_FRiHGUXxzV34iS7l8C5UkxTHuLcpnYs,855
353
- parsl/tests/test_python_apps/test_dep_standard_futures.py,sha256=L3mUPmcrpC6QhZgHQbmnZIPE5JRmAA0JSMEmRQo_oOA,639
354
+ parsl/tests/test_python_apps/test_dep_standard_futures.py,sha256=BloeaYBci0jS5al2d8Eqe3OfZ1tvolA5ZflOBQPR9Wo,859
354
355
  parsl/tests/test_python_apps/test_dependencies.py,sha256=IRiTI_lPoWBSFSFnaBlE6Bv08PKEaf-qj5dfqO2RjT0,272
355
- parsl/tests/test_python_apps/test_depfail_propagation.py,sha256=Jukzl6P5Be4I7VpZw2SoD2cwY_Yd0oU_THKl8wswx7k,1316
356
+ parsl/tests/test_python_apps/test_depfail_propagation.py,sha256=3q3HlVWrOixFtXWBvR_ypKtbdAHAJcKndXQ5drwrBQU,1488
356
357
  parsl/tests/test_python_apps/test_fail.py,sha256=0Gld8LS6NB0Io1bU82vVR73twkuL5nW0ifKbIUcsJcw,1671
357
358
  parsl/tests/test_python_apps/test_fibonacci_iterative.py,sha256=ly2s5HuB9R53Z2FM_zy0WWdOk01iVhgcwSpQyK6ErIY,573
358
359
  parsl/tests/test_python_apps/test_fibonacci_recursive.py,sha256=q7LMFcu_pJSNPdz8iY0UiRoIweEWIBGwMjQffHWAuDc,592
@@ -412,12 +413,12 @@ parsl/tests/test_threads/test_configs.py,sha256=QA9YjIMAtZ2jmkfOWqBzEfzQQcFVCDiz
412
413
  parsl/tests/test_threads/test_lazy_errors.py,sha256=nGhYfCMHFZYSy6YJ4gnAmiLl9SfYs0WVnuvj8DXQ9bw,560
413
414
  parsl/usage_tracking/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
414
415
  parsl/usage_tracking/usage.py,sha256=TEuAIm_U_G2ojZxvd0bbVa6gZlU61_mVRa2yJC9mGiI,7555
415
- parsl-2023.12.4.data/scripts/exec_parsl_function.py,sha256=NtWNeBvRqksej38eRPw8zPBJ1CeW6vgaitve0tfz_qc,7801
416
- parsl-2023.12.4.data/scripts/parsl_coprocess.py,sha256=kzX_1RI3V2KMKs6L-il4I1qkLNVodDKFXN_1FHB9fmM,6031
417
- parsl-2023.12.4.data/scripts/process_worker_pool.py,sha256=nSHtxNcJMeliSah83zSZRDxMwSxQBl55sPV5nfnGnXc,31171
418
- parsl-2023.12.4.dist-info/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
419
- parsl-2023.12.4.dist-info/METADATA,sha256=Rv935KrQ9z1QJOgdhx5a5rxCFJYnJFTMmLQvobUazvI,3817
420
- parsl-2023.12.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
421
- parsl-2023.12.4.dist-info/entry_points.txt,sha256=XqnsWDYoEcLbsMcpnYGKLEnSBmaIe1YoM5YsBdJG2tI,176
422
- parsl-2023.12.4.dist-info/top_level.txt,sha256=PIheYoUFQtF2icLsgOykgU-Cjuwr2Oi6On2jo5RYgRM,6
423
- parsl-2023.12.4.dist-info/RECORD,,
416
+ parsl-2023.12.18.data/scripts/exec_parsl_function.py,sha256=NtWNeBvRqksej38eRPw8zPBJ1CeW6vgaitve0tfz_qc,7801
417
+ parsl-2023.12.18.data/scripts/parsl_coprocess.py,sha256=kzX_1RI3V2KMKs6L-il4I1qkLNVodDKFXN_1FHB9fmM,6031
418
+ parsl-2023.12.18.data/scripts/process_worker_pool.py,sha256=ytz3F8ZYeBr8tFqSRv2O9eZGdsID7oZRulBmmQmZaV8,34056
419
+ parsl-2023.12.18.dist-info/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
420
+ parsl-2023.12.18.dist-info/METADATA,sha256=hmDBTXvoQYLYqsFRJi8HWQYLk9bwf7MCNEtljOy_tOY,3818
421
+ parsl-2023.12.18.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
422
+ parsl-2023.12.18.dist-info/entry_points.txt,sha256=XqnsWDYoEcLbsMcpnYGKLEnSBmaIe1YoM5YsBdJG2tI,176
423
+ parsl-2023.12.18.dist-info/top_level.txt,sha256=PIheYoUFQtF2icLsgOykgU-Cjuwr2Oi6On2jo5RYgRM,6
424
+ parsl-2023.12.18.dist-info/RECORD,,