parsl 2023.10.23__py3-none-any.whl → 2023.11.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. parsl/app/app.py +29 -21
  2. parsl/channels/base.py +12 -24
  3. parsl/config.py +19 -12
  4. parsl/configs/ad_hoc.py +2 -2
  5. parsl/dataflow/dflow.py +10 -4
  6. parsl/executors/base.py +1 -1
  7. parsl/executors/high_throughput/executor.py +2 -2
  8. parsl/executors/high_throughput/interchange.py +59 -53
  9. parsl/executors/high_throughput/process_worker_pool.py +2 -2
  10. parsl/executors/high_throughput/zmq_pipes.py +1 -1
  11. parsl/executors/status_handling.py +1 -1
  12. parsl/executors/taskvine/exec_parsl_function.py +3 -4
  13. parsl/executors/taskvine/executor.py +18 -4
  14. parsl/executors/taskvine/factory.py +1 -1
  15. parsl/executors/taskvine/manager.py +12 -16
  16. parsl/executors/taskvine/utils.py +5 -5
  17. parsl/executors/threads.py +1 -2
  18. parsl/executors/workqueue/exec_parsl_function.py +2 -1
  19. parsl/executors/workqueue/executor.py +34 -24
  20. parsl/monitoring/monitoring.py +6 -6
  21. parsl/monitoring/remote.py +1 -1
  22. parsl/monitoring/visualization/plots/default/workflow_plots.py +4 -4
  23. parsl/monitoring/visualization/plots/default/workflow_resource_plots.py +2 -2
  24. parsl/providers/slurm/slurm.py +1 -1
  25. parsl/tests/configs/ad_hoc_cluster_htex.py +3 -3
  26. parsl/tests/configs/htex_ad_hoc_cluster.py +1 -1
  27. parsl/tests/configs/local_threads_monitoring.py +1 -1
  28. parsl/tests/conftest.py +6 -2
  29. parsl/tests/scaling_tests/vineex_condor.py +1 -1
  30. parsl/tests/scaling_tests/vineex_local.py +1 -1
  31. parsl/tests/scaling_tests/wqex_condor.py +1 -1
  32. parsl/tests/scaling_tests/wqex_local.py +1 -1
  33. parsl/tests/test_docs/test_kwargs.py +37 -0
  34. parsl/tests/test_python_apps/test_lifted.py +3 -2
  35. parsl/utils.py +4 -4
  36. parsl/version.py +1 -1
  37. {parsl-2023.10.23.data → parsl-2023.11.13.data}/scripts/exec_parsl_function.py +2 -1
  38. {parsl-2023.10.23.data → parsl-2023.11.13.data}/scripts/process_worker_pool.py +2 -2
  39. {parsl-2023.10.23.dist-info → parsl-2023.11.13.dist-info}/METADATA +2 -2
  40. {parsl-2023.10.23.dist-info → parsl-2023.11.13.dist-info}/RECORD +45 -44
  41. {parsl-2023.10.23.dist-info → parsl-2023.11.13.dist-info}/WHEEL +1 -1
  42. {parsl-2023.10.23.data → parsl-2023.11.13.data}/scripts/parsl_coprocess.py +0 -0
  43. {parsl-2023.10.23.dist-info → parsl-2023.11.13.dist-info}/LICENSE +0 -0
  44. {parsl-2023.10.23.dist-info → parsl-2023.11.13.dist-info}/entry_points.txt +0 -0
  45. {parsl-2023.10.23.dist-info → parsl-2023.11.13.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env python
2
+ import multiprocessing
2
3
  import zmq
3
4
  import os
4
5
  import sys
@@ -13,7 +14,7 @@ import queue
13
14
  import threading
14
15
  import json
15
16
 
16
- from typing import cast, Any, Dict, Set, Optional
17
+ from typing import cast, Any, Dict, NoReturn, Sequence, Set, Optional, Tuple
17
18
 
18
19
  from parsl.utils import setproctitle
19
20
  from parsl.version import VERSION as PARSL_VERSION
@@ -36,23 +37,23 @@ class ManagerLost(Exception):
36
37
  ''' Task lost due to manager loss. Manager is considered lost when multiple heartbeats
37
38
  have been missed.
38
39
  '''
39
- def __init__(self, manager_id, hostname):
40
+ def __init__(self, manager_id: bytes, hostname: str) -> None:
40
41
  self.manager_id = manager_id
41
42
  self.tstamp = time.time()
42
43
  self.hostname = hostname
43
44
 
44
- def __str__(self):
45
+ def __str__(self) -> str:
45
46
  return "Task failure due to loss of manager {} on host {}".format(self.manager_id.decode(), self.hostname)
46
47
 
47
48
 
48
49
  class VersionMismatch(Exception):
49
50
  ''' Manager and Interchange versions do not match
50
51
  '''
51
- def __init__(self, interchange_version, manager_version):
52
+ def __init__(self, interchange_version: str, manager_version: str):
52
53
  self.interchange_version = interchange_version
53
54
  self.manager_version = manager_version
54
55
 
55
- def __str__(self):
56
+ def __str__(self) -> str:
56
57
  return "Manager version info {} does not match interchange version info {}, causing a critical failure".format(
57
58
  self.manager_version,
58
59
  self.interchange_version)
@@ -67,18 +68,18 @@ class Interchange:
67
68
  4. Service single and batch requests from workers
68
69
  """
69
70
  def __init__(self,
70
- client_address="127.0.0.1",
71
+ client_address: str = "127.0.0.1",
71
72
  interchange_address: Optional[str] = None,
72
- client_ports=(50055, 50056, 50057),
73
- worker_ports=None,
74
- worker_port_range=(54000, 55000),
75
- hub_address=None,
76
- hub_port=None,
77
- heartbeat_threshold=60,
78
- logdir=".",
79
- logging_level=logging.INFO,
80
- poll_period=10,
81
- ) -> None:
73
+ client_ports: Tuple[int, int, int] = (50055, 50056, 50057),
74
+ worker_ports: Optional[Tuple[int, int]] = None,
75
+ worker_port_range: Tuple[int, int] = (54000, 55000),
76
+ hub_address: Optional[str] = None,
77
+ hub_port: Optional[int] = None,
78
+ heartbeat_threshold: int = 60,
79
+ logdir: str = ".",
80
+ logging_level: int = logging.INFO,
81
+ poll_period: int = 10,
82
+ ) -> None:
82
83
  """
83
84
  Parameters
84
85
  ----------
@@ -191,7 +192,7 @@ class Interchange:
191
192
 
192
193
  logger.info("Platform info: {}".format(self.current_platform))
193
194
 
194
- def get_tasks(self, count):
195
+ def get_tasks(self, count: int) -> Sequence[dict]:
195
196
  """ Obtains a batch of tasks from the internal pending_task_queue
196
197
 
197
198
  Parameters
@@ -205,7 +206,7 @@ class Interchange:
205
206
  eg. [{'task_id':<x>, 'buffer':<buf>} ... ]
206
207
  """
207
208
  tasks = []
208
- for i in range(0, count):
209
+ for _ in range(0, count):
209
210
  try:
210
211
  x = self.pending_task_queue.get(block=False)
211
212
  except queue.Empty:
@@ -216,7 +217,7 @@ class Interchange:
216
217
  return tasks
217
218
 
218
219
  @wrap_with_logs(target="interchange")
219
- def task_puller(self):
220
+ def task_puller(self) -> NoReturn:
220
221
  """Pull tasks from the incoming tasks zmq pipe onto the internal
221
222
  pending task queue
222
223
  """
@@ -237,7 +238,7 @@ class Interchange:
237
238
  task_counter += 1
238
239
  logger.debug(f"Fetched {task_counter} tasks so far")
239
240
 
240
- def _create_monitoring_channel(self):
241
+ def _create_monitoring_channel(self) -> Optional[zmq.Socket]:
241
242
  if self.hub_address and self.hub_port:
242
243
  logger.info("Connecting to monitoring")
243
244
  hub_channel = self.context.socket(zmq.DEALER)
@@ -248,7 +249,7 @@ class Interchange:
248
249
  else:
249
250
  return None
250
251
 
251
- def _send_monitoring_info(self, hub_channel, manager: ManagerRecord):
252
+ def _send_monitoring_info(self, hub_channel: Optional[zmq.Socket], manager: ManagerRecord) -> None:
252
253
  if hub_channel:
253
254
  logger.info("Sending message {} to hub".format(manager))
254
255
 
@@ -259,7 +260,7 @@ class Interchange:
259
260
  hub_channel.send_pyobj((MessageType.NODE_INFO, d))
260
261
 
261
262
  @wrap_with_logs(target="interchange")
262
- def _command_server(self):
263
+ def _command_server(self) -> NoReturn:
263
264
  """ Command server to run async command to the interchange
264
265
  """
265
266
  logger.debug("Command Server Starting")
@@ -305,7 +306,7 @@ class Interchange:
305
306
  elif command_req.startswith("HOLD_WORKER"):
306
307
  cmd, s_manager = command_req.split(';')
307
308
  manager_id = s_manager.encode('utf-8')
308
- logger.info("Received HOLD_WORKER for {}".format(manager_id))
309
+ logger.info("Received HOLD_WORKER for {!r}".format(manager_id))
309
310
  if manager_id in self._ready_managers:
310
311
  m = self._ready_managers[manager_id]
311
312
  m['active'] = False
@@ -326,7 +327,7 @@ class Interchange:
326
327
  continue
327
328
 
328
329
  @wrap_with_logs
329
- def start(self):
330
+ def start(self) -> None:
330
331
  """ Start the interchange
331
332
  """
332
333
 
@@ -382,7 +383,7 @@ class Interchange:
382
383
  logger.info("Processed {} tasks in {} seconds".format(self.count, delta))
383
384
  logger.warning("Exiting")
384
385
 
385
- def process_task_outgoing_incoming(self, interesting_managers, hub_channel, kill_event):
386
+ def process_task_outgoing_incoming(self, interesting_managers: Set[bytes], hub_channel: Optional[zmq.Socket], kill_event: threading.Event) -> None:
386
387
  # Listen for requests for work
387
388
  if self.task_outgoing in self.socks and self.socks[self.task_outgoing] == zmq.POLLIN:
388
389
  logger.debug("starting task_outgoing section")
@@ -396,9 +397,9 @@ class Interchange:
396
397
  msg = json.loads(message[1].decode('utf-8'))
397
398
  reg_flag = True
398
399
  except Exception:
399
- logger.warning("Got Exception reading registration message from manager: {}".format(
400
+ logger.warning("Got Exception reading registration message from manager: {!r}".format(
400
401
  manager_id), exc_info=True)
401
- logger.debug("Message: \n{}\n".format(message[1]))
402
+ logger.debug("Message: \n{!r}\n".format(message[1]))
402
403
  else:
403
404
  # We set up an entry only if registration works correctly
404
405
  self._ready_managers[manager_id] = {'last_heartbeat': time.time(),
@@ -410,46 +411,45 @@ class Interchange:
410
411
  'tasks': []}
411
412
  if reg_flag is True:
412
413
  interesting_managers.add(manager_id)
413
- logger.info("Adding manager: {} to ready queue".format(manager_id))
414
+ logger.info("Adding manager: {!r} to ready queue".format(manager_id))
414
415
  m = self._ready_managers[manager_id]
415
416
  m.update(msg)
416
- logger.info("Registration info for manager {}: {}".format(manager_id, msg))
417
+ logger.info("Registration info for manager {!r}: {}".format(manager_id, msg))
417
418
  self._send_monitoring_info(hub_channel, m)
418
419
 
419
420
  if (msg['python_v'].rsplit(".", 1)[0] != self.current_platform['python_v'].rsplit(".", 1)[0] or
420
421
  msg['parsl_v'] != self.current_platform['parsl_v']):
421
- logger.error("Manager {} has incompatible version info with the interchange".format(manager_id))
422
+ logger.error("Manager {!r} has incompatible version info with the interchange".format(manager_id))
422
423
  logger.debug("Setting kill event")
423
424
  kill_event.set()
424
425
  e = VersionMismatch("py.v={} parsl.v={}".format(self.current_platform['python_v'].rsplit(".", 1)[0],
425
426
  self.current_platform['parsl_v']),
426
427
  "py.v={} parsl.v={}".format(msg['python_v'].rsplit(".", 1)[0],
427
428
  msg['parsl_v'])
428
- )
429
+ )
429
430
  result_package = {'type': 'result', 'task_id': -1, 'exception': serialize_object(e)}
430
431
  pkl_package = pickle.dumps(result_package)
431
432
  self.results_outgoing.send(pkl_package)
432
433
  logger.error("Sent failure reports, shutting down interchange")
433
434
  else:
434
- logger.info("Manager {} has compatible Parsl version {}".format(manager_id, msg['parsl_v']))
435
- logger.info("Manager {} has compatible Python version {}".format(manager_id,
436
- msg['python_v'].rsplit(".", 1)[0]))
435
+ logger.info("Manager {!r} has compatible Parsl version {}".format(manager_id, msg['parsl_v']))
436
+ logger.info("Manager {!r} has compatible Python version {}".format(manager_id,
437
+ msg['python_v'].rsplit(".", 1)[0]))
437
438
  else:
438
439
  # Registration has failed.
439
- logger.debug("Suppressing bad registration from manager: {}".format(
440
- manager_id))
440
+ logger.debug("Suppressing bad registration from manager: {!r}".format(manager_id))
441
441
 
442
442
  else:
443
443
  tasks_requested = int.from_bytes(message[1], "little")
444
444
  self._ready_managers[manager_id]['last_heartbeat'] = time.time()
445
445
  if tasks_requested == HEARTBEAT_CODE:
446
- logger.debug("Manager {} sent heartbeat via tasks connection".format(manager_id))
446
+ logger.debug("Manager {!r} sent heartbeat via tasks connection".format(manager_id))
447
447
  self.task_outgoing.send_multipart([manager_id, b'', PKL_HEARTBEAT_CODE])
448
448
  else:
449
449
  logger.error("Unexpected non-heartbeat message received from manager {}")
450
450
  logger.debug("leaving task_outgoing section")
451
451
 
452
- def process_tasks_to_send(self, interesting_managers):
452
+ def process_tasks_to_send(self, interesting_managers: Set[bytes]) -> None:
453
453
  # If we had received any requests, check if there are tasks that could be passed
454
454
 
455
455
  logger.debug("Managers count (interesting/total): {interesting}/{total}".format(
@@ -475,14 +475,14 @@ class Interchange:
475
475
  tids = [t['task_id'] for t in tasks]
476
476
  m['tasks'].extend(tids)
477
477
  m['idle_since'] = None
478
- logger.debug("Sent tasks: {} to manager {}".format(tids, manager_id))
478
+ logger.debug("Sent tasks: {} to manager {!r}".format(tids, manager_id))
479
479
  # recompute real_capacity after sending tasks
480
480
  real_capacity = m['max_capacity'] - tasks_inflight
481
481
  if real_capacity > 0:
482
- logger.debug("Manager {} has free capacity {}".format(manager_id, real_capacity))
482
+ logger.debug("Manager {!r} has free capacity {}".format(manager_id, real_capacity))
483
483
  # ... so keep it in the interesting_managers list
484
484
  else:
485
- logger.debug("Manager {} is now saturated".format(manager_id))
485
+ logger.debug("Manager {!r} is now saturated".format(manager_id))
486
486
  interesting_managers.remove(manager_id)
487
487
  else:
488
488
  interesting_managers.remove(manager_id)
@@ -491,15 +491,15 @@ class Interchange:
491
491
  else:
492
492
  logger.debug("either no interesting managers or no tasks, so skipping manager pass")
493
493
 
494
- def process_results_incoming(self, interesting_managers, hub_channel):
494
+ def process_results_incoming(self, interesting_managers: Set[bytes], hub_channel: Optional[zmq.Socket]) -> None:
495
495
  # Receive any results and forward to client
496
496
  if self.results_incoming in self.socks and self.socks[self.results_incoming] == zmq.POLLIN:
497
497
  logger.debug("entering results_incoming section")
498
498
  manager_id, *all_messages = self.results_incoming.recv_multipart()
499
499
  if manager_id not in self._ready_managers:
500
- logger.warning("Received a result from a un-registered manager: {}".format(manager_id))
500
+ logger.warning("Received a result from a un-registered manager: {!r}".format(manager_id))
501
501
  else:
502
- logger.debug(f"Got {len(all_messages)} result items in batch from manager {manager_id}")
502
+ logger.debug(f"Got {len(all_messages)} result items in batch from manager {manager_id!r}")
503
503
 
504
504
  b_messages = []
505
505
 
@@ -509,25 +509,31 @@ class Interchange:
509
509
  # process this for task ID and forward to executor
510
510
  b_messages.append((p_message, r))
511
511
  elif r['type'] == 'monitoring':
512
+ # the monitoring code makes the assumption that no
513
+ # monitoring messages will be received if monitoring
514
+ # is not configured, and that hub_channel will only
515
+ # be None when monitoring is not configurated.
516
+ assert hub_channel is not None
517
+
512
518
  hub_channel.send_pyobj(r['payload'])
513
519
  elif r['type'] == 'heartbeat':
514
- logger.debug(f"Manager {manager_id} sent heartbeat via results connection")
520
+ logger.debug(f"Manager {manager_id!r} sent heartbeat via results connection")
515
521
  b_messages.append((p_message, r))
516
522
  else:
517
523
  logger.error("Interchange discarding result_queue message of unknown type: {}".format(r['type']))
518
524
 
519
525
  got_result = False
520
526
  m = self._ready_managers[manager_id]
521
- for (b_message, r) in b_messages:
527
+ for (_, r) in b_messages:
522
528
  assert 'type' in r, f"Message is missing type entry: {r}"
523
529
  if r['type'] == 'result':
524
530
  got_result = True
525
531
  try:
526
- logger.debug(f"Removing task {r['task_id']} from manager record {manager_id}")
532
+ logger.debug(f"Removing task {r['task_id']} from manager record {manager_id!r}")
527
533
  m['tasks'].remove(r['task_id'])
528
534
  except Exception:
529
535
  # If we reach here, there's something very wrong.
530
- logger.exception("Ignoring exception removing task_id {} for manager {} with task list {}".format(
536
+ logger.exception("Ignoring exception removing task_id {} for manager {!r} with task list {}".format(
531
537
  r['task_id'],
532
538
  manager_id,
533
539
  m['tasks']))
@@ -541,7 +547,7 @@ class Interchange:
541
547
  self.results_outgoing.send_multipart(b_messages_to_send)
542
548
  logger.debug("Sent messages on results_outgoing")
543
549
 
544
- logger.debug(f"Current tasks on manager {manager_id}: {m['tasks']}")
550
+ logger.debug(f"Current tasks on manager {manager_id!r}: {m['tasks']}")
545
551
  if len(m['tasks']) == 0 and m['idle_since'] is None:
546
552
  m['idle_since'] = time.time()
547
553
 
@@ -553,12 +559,12 @@ class Interchange:
553
559
  interesting_managers.add(manager_id)
554
560
  logger.debug("leaving results_incoming section")
555
561
 
556
- def expire_bad_managers(self, interesting_managers, hub_channel):
562
+ def expire_bad_managers(self, interesting_managers: Set[bytes], hub_channel: Optional[zmq.Socket]) -> None:
557
563
  bad_managers = [(manager_id, m) for (manager_id, m) in self._ready_managers.items() if
558
564
  time.time() - m['last_heartbeat'] > self.heartbeat_threshold]
559
565
  for (manager_id, m) in bad_managers:
560
566
  logger.debug("Last: {} Current: {}".format(m['last_heartbeat'], time.time()))
561
- logger.warning(f"Too many heartbeats missed for manager {manager_id} - removing manager")
567
+ logger.warning(f"Too many heartbeats missed for manager {manager_id!r} - removing manager")
562
568
  if m['active']:
563
569
  m['active'] = False
564
570
  self._send_monitoring_info(hub_channel, m)
@@ -577,7 +583,7 @@ class Interchange:
577
583
  interesting_managers.remove(manager_id)
578
584
 
579
585
 
580
- def start_file_logger(filename, level=logging.DEBUG, format_string=None):
586
+ def start_file_logger(filename: str, level: int = logging.DEBUG, format_string: Optional[str] = None) -> None:
581
587
  """Add a stream log handler.
582
588
 
583
589
  Parameters
@@ -609,7 +615,7 @@ def start_file_logger(filename, level=logging.DEBUG, format_string=None):
609
615
 
610
616
 
611
617
  @wrap_with_logs(target="interchange")
612
- def starter(comm_q, *args, **kwargs):
618
+ def starter(comm_q: multiprocessing.Queue, *args: Any, **kwargs: Any) -> None:
613
619
  """Start the interchange process
614
620
 
615
621
  The executor is expected to call this function. The args, kwargs match that of the Interchange.__init__
@@ -234,7 +234,7 @@ class Manager:
234
234
  'dir': os.getcwd(),
235
235
  'cpu_count': psutil.cpu_count(logical=False),
236
236
  'total_memory': psutil.virtual_memory().total,
237
- }
237
+ }
238
238
  b_msg = json.dumps(msg).encode('utf-8')
239
239
  return b_msg
240
240
 
@@ -608,7 +608,7 @@ def worker(worker_id, pool_id, pool_size, task_queue, result_queue, worker_queue
608
608
  logger.exception("Caught exception while trying to pickle the result package")
609
609
  pkl_package = pickle.dumps({'type': 'result', 'task_id': tid,
610
610
  'exception': serialize(RemoteExceptionWrapper(*sys.exc_info()))
611
- })
611
+ })
612
612
 
613
613
  result_queue.put(pkl_package)
614
614
  tasks_in_progress.pop(worker_id)
@@ -53,7 +53,7 @@ class CommandClient:
53
53
  """
54
54
  reply = '__PARSL_ZMQ_PIPES_MAGIC__'
55
55
  with self._lock:
56
- for i in range(max_retries):
56
+ for _ in range(max_retries):
57
57
  try:
58
58
  self.zmq_socket.send_pyobj(message, copy=True)
59
59
  reply = self.zmq_socket.recv_pyobj()
@@ -193,7 +193,7 @@ class BlockProviderExecutor(ParslExecutor):
193
193
  raise ScalingFailed(self, "No execution provider available")
194
194
  block_ids = []
195
195
  logger.info(f"Scaling out by {blocks} blocks")
196
- for i in range(blocks):
196
+ for _ in range(blocks):
197
197
  block_id = str(self._block_id_counter.get_id())
198
198
  logger.info(f"Allocated block ID {block_id}")
199
199
  try:
@@ -1,11 +1,10 @@
1
1
  import traceback
2
2
  import sys
3
3
 
4
- import pickle
5
4
  from parsl.app.errors import RemoteExceptionWrapper
6
5
  from parsl.data_provider.files import File
7
6
  from parsl.utils import get_std_fname_mode
8
- from parsl.serialize import deserialize
7
+ from parsl.serialize import deserialize, serialize
9
8
 
10
9
  # This scripts executes a parsl function which is pickled in 4 files:
11
10
  #
@@ -30,10 +29,10 @@ from parsl.serialize import deserialize
30
29
  #
31
30
 
32
31
 
33
- def dump_result_to_file(result_file: str, result_package):
32
+ def dump_result_to_file(result_file: str, result):
34
33
  """ Dump a result to the given result file."""
35
34
  with open(result_file, "wb") as f_out:
36
- pickle.dump(result_package, f_out)
35
+ f_out.write(serialize(result))
37
36
 
38
37
 
39
38
  def remap_location(mapping, parsl_file):
@@ -22,7 +22,7 @@ from typing import List, Optional, Union, Literal
22
22
  # Import Parsl constructs
23
23
  import parsl.utils as putils
24
24
  from parsl.data_provider.staging import Staging
25
- from parsl.serialize import serialize
25
+ from parsl.serialize import serialize, deserialize
26
26
  from parsl.data_provider.files import File
27
27
  from parsl.errors import OptionalModuleMissing
28
28
  from parsl.providers.base import ExecutionProvider
@@ -614,7 +614,6 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
614
614
  self._factory_process.join()
615
615
 
616
616
  logger.debug("TaskVine shutdown completed")
617
- return True
618
617
 
619
618
  @wrap_with_logs
620
619
  def _collect_taskvine_results(self):
@@ -639,11 +638,26 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
639
638
  logger.debug(f'Updating Future for Parsl Task: {task_report.executor_id}. \
640
639
  Task {task_report.executor_id} has result_received set to {task_report.result_received}')
641
640
  if task_report.result_received:
642
- future.set_result(task_report.result)
641
+ try:
642
+ with open(task_report.result_file, 'rb') as f_in:
643
+ result = deserialize(f_in.read())
644
+ except Exception as e:
645
+ logger.error(f'Cannot load result from result file {task_report.result_file}. Exception: {e}')
646
+ ex = TaskVineTaskFailure('Cannot load result from result file', None)
647
+ ex.__cause__ = e
648
+ future.set_exception(ex)
649
+ else:
650
+ if isinstance(result, Exception):
651
+ ex = TaskVineTaskFailure('Task execution raises an exception', result)
652
+ ex.__cause__ = result
653
+ future.set_exception(ex)
654
+ else:
655
+ future.set_result(result)
643
656
  else:
644
657
  # If there are no results, then the task failed according to one of
645
658
  # taskvine modes, such as resource exhaustion.
646
- future.set_exception(TaskVineTaskFailure(task_report.reason, task_report.result))
659
+ ex = TaskVineTaskFailure(task_report.reason, None)
660
+ future.set_exception(ex)
647
661
 
648
662
  # decrement outstanding task counter
649
663
  with self._outstanding_tasks_lock:
@@ -30,7 +30,7 @@ def _taskvine_factory(should_stop, factory_config):
30
30
  else:
31
31
  factory = Factory(batch_type=factory_config.batch_type,
32
32
  manager_host_port=f"{factory_config._project_address}:{factory_config._project_port}",
33
- )
33
+ )
34
34
  except Exception as e:
35
35
  raise TaskVineFactoryFailure(f'Cannot create factory with exception {e}')
36
36
 
@@ -2,7 +2,6 @@ import logging
2
2
  import hashlib
3
3
  import subprocess
4
4
  import os
5
- import pickle
6
5
  import queue
7
6
  import shutil
8
7
  import uuid
@@ -229,7 +228,7 @@ def _taskvine_submit_wait(ready_task_queue=None,
229
228
  logger.error("Unable to create executor task (mode:regular): {}".format(e))
230
229
  finished_task_queue.put_nowait(VineTaskToParsl(executor_id=task.executor_id,
231
230
  result_received=False,
232
- result=None,
231
+ result_file=None,
233
232
  reason="task could not be created by taskvine",
234
233
  status=-1))
235
234
  continue
@@ -268,7 +267,7 @@ def _taskvine_submit_wait(ready_task_queue=None,
268
267
  logger.error("Unable to create executor task (mode:serverless): {}".format(e))
269
268
  finished_task_queue.put_nowait(VineTaskToParsl(executor_id=task.executor_id,
270
269
  result_received=False,
271
- result=None,
270
+ result_file=None,
272
271
  reason="task could not be created by taskvine",
273
272
  status=-1))
274
273
  else:
@@ -369,7 +368,7 @@ def _taskvine_submit_wait(ready_task_queue=None,
369
368
  logger.error("Unable to submit task to taskvine: {}".format(e))
370
369
  finished_task_queue.put_nowait(VineTaskToParsl(executor_id=task.executor_id,
371
370
  result_received=False,
372
- result=None,
371
+ result_file=None,
373
372
  reason="task could not be submited to taskvine",
374
373
  status=-1))
375
374
  continue
@@ -394,24 +393,21 @@ def _taskvine_submit_wait(ready_task_queue=None,
394
393
 
395
394
  logger.debug(f"completed executor task info: {executor_task_id}, {t.category}, {t.command}, {t.std_output}")
396
395
 
397
- # A tasks completes 'succesfully' if it has result file,
398
- # and it can be loaded. This may mean that the 'success' is
399
- # an exception.
396
+ # A tasks completes 'succesfully' if it has result file.
397
+ # A check whether the Python object represented using this file can be
398
+ # deserialized happens later in the collector thread of the executor
399
+ # process.
400
400
  logger.debug("Looking for result in {}".format(result_file))
401
- try:
402
- with open(result_file, "rb") as f_in:
403
- result = pickle.load(f_in)
401
+ if os.path.exists(result_file):
404
402
  logger.debug("Found result in {}".format(result_file))
405
403
  finished_task_queue.put_nowait(VineTaskToParsl(executor_id=executor_task_id,
406
404
  result_received=True,
407
- result=result,
405
+ result_file=result_file,
408
406
  reason=None,
409
407
  status=t.exit_code))
410
408
  # If a result file could not be generated, explain the
411
- # failure according to taskvine error codes. We generate
412
- # an exception and wrap it with RemoteExceptionWrapper, to
413
- # match the positive case.
414
- except Exception as e:
409
+ # failure according to taskvine error codes.
410
+ else:
415
411
  reason = _explain_taskvine_result(t)
416
412
  logger.debug("Did not find result in {}".format(result_file))
417
413
  logger.debug("Wrapper Script status: {}\nTaskVine Status: {}"
@@ -420,7 +416,7 @@ def _taskvine_submit_wait(ready_task_queue=None,
420
416
  .format(executor_task_id, t.id, reason))
421
417
  finished_task_queue.put_nowait(VineTaskToParsl(executor_id=executor_task_id,
422
418
  result_received=False,
423
- result=e,
419
+ result_file=None,
424
420
  reason=reason,
425
421
  status=t.exit_code))
426
422
 
@@ -42,20 +42,20 @@ class ParslTaskToVine:
42
42
 
43
43
  class VineTaskToParsl:
44
44
  """
45
- Support structure to communicate final status of TaskVine tasks to Parsl
46
- result is only valid if result_received is True
47
- reason and status are only valid if result_received is False
45
+ Support structure to communicate final status of TaskVine tasks to Parsl.
46
+ result_file is only valid if result_received is True.
47
+ Reason and status are only valid if result_received is False.
48
48
  """
49
49
  def __init__(self,
50
50
  executor_id: int, # executor id of task
51
51
  result_received: bool, # whether result is received or not
52
- result, # result object if available
52
+ result_file: Optional[str], # path to file that contains the serialized result object
53
53
  reason: Optional[str], # string describing why execution fails
54
54
  status: Optional[int] # exit code of execution of task
55
55
  ):
56
56
  self.executor_id = executor_id
57
57
  self.result_received = result_received
58
- self.result = result
58
+ self.result_file = result_file
59
59
  self.reason = reason
60
60
  self.status = status
61
61
 
@@ -72,9 +72,8 @@ class ThreadPoolExecutor(ParslExecutor, RepresentationMixin):
72
72
 
73
73
  """
74
74
  logger.debug("Shutting down executor, which involves waiting for running tasks to complete")
75
- x = self.executor.shutdown(wait=block)
75
+ self.executor.shutdown(wait=block)
76
76
  logger.debug("Done with executor shutdown")
77
- return x
78
77
 
79
78
  def monitor_resources(self):
80
79
  """Resource monitoring sometimes deadlocks when using threads, so this function
@@ -4,6 +4,7 @@ from parsl.utils import get_std_fname_mode
4
4
  import traceback
5
5
  import sys
6
6
  import pickle
7
+ from parsl.serialize import serialize
7
8
 
8
9
  # This scripts executes a parsl function which is pickled in a file:
9
10
  #
@@ -32,7 +33,7 @@ def load_pickled_file(filename):
32
33
 
33
34
  def dump_result_to_file(result_file, result_package):
34
35
  with open(result_file, "wb") as f_out:
35
- pickle.dump(result_package, f_out)
36
+ f_out.write(serialize(result_package))
36
37
 
37
38
 
38
39
  def remap_location(mapping, parsl_file):