parsl 2023.10.23__py3-none-any.whl → 2023.11.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. parsl/__init__.py +1 -0
  2. parsl/app/app.py +29 -21
  3. parsl/channels/base.py +12 -24
  4. parsl/config.py +19 -12
  5. parsl/configs/ad_hoc.py +2 -2
  6. parsl/dataflow/dflow.py +10 -4
  7. parsl/executors/base.py +1 -3
  8. parsl/executors/high_throughput/executor.py +3 -3
  9. parsl/executors/high_throughput/interchange.py +59 -53
  10. parsl/executors/high_throughput/process_worker_pool.py +2 -2
  11. parsl/executors/high_throughput/zmq_pipes.py +1 -1
  12. parsl/executors/radical/__init__.py +4 -0
  13. parsl/executors/radical/executor.py +550 -0
  14. parsl/executors/radical/rpex_master.py +42 -0
  15. parsl/executors/radical/rpex_resources.py +165 -0
  16. parsl/executors/radical/rpex_worker.py +61 -0
  17. parsl/executors/status_handling.py +1 -2
  18. parsl/executors/taskvine/exec_parsl_function.py +3 -4
  19. parsl/executors/taskvine/executor.py +18 -4
  20. parsl/executors/taskvine/factory.py +1 -1
  21. parsl/executors/taskvine/manager.py +12 -16
  22. parsl/executors/taskvine/utils.py +5 -5
  23. parsl/executors/threads.py +1 -2
  24. parsl/executors/workqueue/exec_parsl_function.py +2 -1
  25. parsl/executors/workqueue/executor.py +34 -24
  26. parsl/jobs/job_status_poller.py +2 -3
  27. parsl/monitoring/monitoring.py +6 -6
  28. parsl/monitoring/remote.py +1 -1
  29. parsl/monitoring/visualization/plots/default/workflow_plots.py +4 -4
  30. parsl/monitoring/visualization/plots/default/workflow_resource_plots.py +2 -2
  31. parsl/providers/slurm/slurm.py +1 -1
  32. parsl/tests/configs/ad_hoc_cluster_htex.py +3 -3
  33. parsl/tests/configs/htex_ad_hoc_cluster.py +1 -1
  34. parsl/tests/configs/local_radical.py +20 -0
  35. parsl/tests/configs/local_radical_mpi.py +20 -0
  36. parsl/tests/configs/local_threads_monitoring.py +1 -1
  37. parsl/tests/conftest.py +6 -2
  38. parsl/tests/scaling_tests/vineex_condor.py +1 -1
  39. parsl/tests/scaling_tests/vineex_local.py +1 -1
  40. parsl/tests/scaling_tests/wqex_condor.py +1 -1
  41. parsl/tests/scaling_tests/wqex_local.py +1 -1
  42. parsl/tests/test_docs/test_kwargs.py +37 -0
  43. parsl/tests/test_python_apps/test_garbage_collect.py +1 -1
  44. parsl/tests/test_python_apps/test_lifted.py +3 -2
  45. parsl/tests/test_radical/__init__.py +0 -0
  46. parsl/tests/test_radical/test_mpi_funcs.py +27 -0
  47. parsl/tests/test_regression/test_1606_wait_for_current_tasks.py +1 -1
  48. parsl/utils.py +4 -4
  49. parsl/version.py +1 -1
  50. {parsl-2023.10.23.data → parsl-2023.11.20.data}/scripts/exec_parsl_function.py +2 -1
  51. {parsl-2023.10.23.data → parsl-2023.11.20.data}/scripts/process_worker_pool.py +2 -2
  52. {parsl-2023.10.23.dist-info → parsl-2023.11.20.dist-info}/METADATA +5 -2
  53. {parsl-2023.10.23.dist-info → parsl-2023.11.20.dist-info}/RECORD +58 -48
  54. {parsl-2023.10.23.dist-info → parsl-2023.11.20.dist-info}/WHEEL +1 -1
  55. {parsl-2023.10.23.data → parsl-2023.11.20.data}/scripts/parsl_coprocess.py +0 -0
  56. {parsl-2023.10.23.dist-info → parsl-2023.11.20.dist-info}/LICENSE +0 -0
  57. {parsl-2023.10.23.dist-info → parsl-2023.11.20.dist-info}/entry_points.txt +0 -0
  58. {parsl-2023.10.23.dist-info → parsl-2023.11.20.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env python
2
+ import multiprocessing
2
3
  import zmq
3
4
  import os
4
5
  import sys
@@ -13,7 +14,7 @@ import queue
13
14
  import threading
14
15
  import json
15
16
 
16
- from typing import cast, Any, Dict, Set, Optional
17
+ from typing import cast, Any, Dict, NoReturn, Sequence, Set, Optional, Tuple
17
18
 
18
19
  from parsl.utils import setproctitle
19
20
  from parsl.version import VERSION as PARSL_VERSION
@@ -36,23 +37,23 @@ class ManagerLost(Exception):
36
37
  ''' Task lost due to manager loss. Manager is considered lost when multiple heartbeats
37
38
  have been missed.
38
39
  '''
39
- def __init__(self, manager_id, hostname):
40
+ def __init__(self, manager_id: bytes, hostname: str) -> None:
40
41
  self.manager_id = manager_id
41
42
  self.tstamp = time.time()
42
43
  self.hostname = hostname
43
44
 
44
- def __str__(self):
45
+ def __str__(self) -> str:
45
46
  return "Task failure due to loss of manager {} on host {}".format(self.manager_id.decode(), self.hostname)
46
47
 
47
48
 
48
49
  class VersionMismatch(Exception):
49
50
  ''' Manager and Interchange versions do not match
50
51
  '''
51
- def __init__(self, interchange_version, manager_version):
52
+ def __init__(self, interchange_version: str, manager_version: str):
52
53
  self.interchange_version = interchange_version
53
54
  self.manager_version = manager_version
54
55
 
55
- def __str__(self):
56
+ def __str__(self) -> str:
56
57
  return "Manager version info {} does not match interchange version info {}, causing a critical failure".format(
57
58
  self.manager_version,
58
59
  self.interchange_version)
@@ -67,18 +68,18 @@ class Interchange:
67
68
  4. Service single and batch requests from workers
68
69
  """
69
70
  def __init__(self,
70
- client_address="127.0.0.1",
71
+ client_address: str = "127.0.0.1",
71
72
  interchange_address: Optional[str] = None,
72
- client_ports=(50055, 50056, 50057),
73
- worker_ports=None,
74
- worker_port_range=(54000, 55000),
75
- hub_address=None,
76
- hub_port=None,
77
- heartbeat_threshold=60,
78
- logdir=".",
79
- logging_level=logging.INFO,
80
- poll_period=10,
81
- ) -> None:
73
+ client_ports: Tuple[int, int, int] = (50055, 50056, 50057),
74
+ worker_ports: Optional[Tuple[int, int]] = None,
75
+ worker_port_range: Tuple[int, int] = (54000, 55000),
76
+ hub_address: Optional[str] = None,
77
+ hub_port: Optional[int] = None,
78
+ heartbeat_threshold: int = 60,
79
+ logdir: str = ".",
80
+ logging_level: int = logging.INFO,
81
+ poll_period: int = 10,
82
+ ) -> None:
82
83
  """
83
84
  Parameters
84
85
  ----------
@@ -191,7 +192,7 @@ class Interchange:
191
192
 
192
193
  logger.info("Platform info: {}".format(self.current_platform))
193
194
 
194
- def get_tasks(self, count):
195
+ def get_tasks(self, count: int) -> Sequence[dict]:
195
196
  """ Obtains a batch of tasks from the internal pending_task_queue
196
197
 
197
198
  Parameters
@@ -205,7 +206,7 @@ class Interchange:
205
206
  eg. [{'task_id':<x>, 'buffer':<buf>} ... ]
206
207
  """
207
208
  tasks = []
208
- for i in range(0, count):
209
+ for _ in range(0, count):
209
210
  try:
210
211
  x = self.pending_task_queue.get(block=False)
211
212
  except queue.Empty:
@@ -216,7 +217,7 @@ class Interchange:
216
217
  return tasks
217
218
 
218
219
  @wrap_with_logs(target="interchange")
219
- def task_puller(self):
220
+ def task_puller(self) -> NoReturn:
220
221
  """Pull tasks from the incoming tasks zmq pipe onto the internal
221
222
  pending task queue
222
223
  """
@@ -237,7 +238,7 @@ class Interchange:
237
238
  task_counter += 1
238
239
  logger.debug(f"Fetched {task_counter} tasks so far")
239
240
 
240
- def _create_monitoring_channel(self):
241
+ def _create_monitoring_channel(self) -> Optional[zmq.Socket]:
241
242
  if self.hub_address and self.hub_port:
242
243
  logger.info("Connecting to monitoring")
243
244
  hub_channel = self.context.socket(zmq.DEALER)
@@ -248,7 +249,7 @@ class Interchange:
248
249
  else:
249
250
  return None
250
251
 
251
- def _send_monitoring_info(self, hub_channel, manager: ManagerRecord):
252
+ def _send_monitoring_info(self, hub_channel: Optional[zmq.Socket], manager: ManagerRecord) -> None:
252
253
  if hub_channel:
253
254
  logger.info("Sending message {} to hub".format(manager))
254
255
 
@@ -259,7 +260,7 @@ class Interchange:
259
260
  hub_channel.send_pyobj((MessageType.NODE_INFO, d))
260
261
 
261
262
  @wrap_with_logs(target="interchange")
262
- def _command_server(self):
263
+ def _command_server(self) -> NoReturn:
263
264
  """ Command server to run async command to the interchange
264
265
  """
265
266
  logger.debug("Command Server Starting")
@@ -305,7 +306,7 @@ class Interchange:
305
306
  elif command_req.startswith("HOLD_WORKER"):
306
307
  cmd, s_manager = command_req.split(';')
307
308
  manager_id = s_manager.encode('utf-8')
308
- logger.info("Received HOLD_WORKER for {}".format(manager_id))
309
+ logger.info("Received HOLD_WORKER for {!r}".format(manager_id))
309
310
  if manager_id in self._ready_managers:
310
311
  m = self._ready_managers[manager_id]
311
312
  m['active'] = False
@@ -326,7 +327,7 @@ class Interchange:
326
327
  continue
327
328
 
328
329
  @wrap_with_logs
329
- def start(self):
330
+ def start(self) -> None:
330
331
  """ Start the interchange
331
332
  """
332
333
 
@@ -382,7 +383,7 @@ class Interchange:
382
383
  logger.info("Processed {} tasks in {} seconds".format(self.count, delta))
383
384
  logger.warning("Exiting")
384
385
 
385
- def process_task_outgoing_incoming(self, interesting_managers, hub_channel, kill_event):
386
+ def process_task_outgoing_incoming(self, interesting_managers: Set[bytes], hub_channel: Optional[zmq.Socket], kill_event: threading.Event) -> None:
386
387
  # Listen for requests for work
387
388
  if self.task_outgoing in self.socks and self.socks[self.task_outgoing] == zmq.POLLIN:
388
389
  logger.debug("starting task_outgoing section")
@@ -396,9 +397,9 @@ class Interchange:
396
397
  msg = json.loads(message[1].decode('utf-8'))
397
398
  reg_flag = True
398
399
  except Exception:
399
- logger.warning("Got Exception reading registration message from manager: {}".format(
400
+ logger.warning("Got Exception reading registration message from manager: {!r}".format(
400
401
  manager_id), exc_info=True)
401
- logger.debug("Message: \n{}\n".format(message[1]))
402
+ logger.debug("Message: \n{!r}\n".format(message[1]))
402
403
  else:
403
404
  # We set up an entry only if registration works correctly
404
405
  self._ready_managers[manager_id] = {'last_heartbeat': time.time(),
@@ -410,46 +411,45 @@ class Interchange:
410
411
  'tasks': []}
411
412
  if reg_flag is True:
412
413
  interesting_managers.add(manager_id)
413
- logger.info("Adding manager: {} to ready queue".format(manager_id))
414
+ logger.info("Adding manager: {!r} to ready queue".format(manager_id))
414
415
  m = self._ready_managers[manager_id]
415
416
  m.update(msg)
416
- logger.info("Registration info for manager {}: {}".format(manager_id, msg))
417
+ logger.info("Registration info for manager {!r}: {}".format(manager_id, msg))
417
418
  self._send_monitoring_info(hub_channel, m)
418
419
 
419
420
  if (msg['python_v'].rsplit(".", 1)[0] != self.current_platform['python_v'].rsplit(".", 1)[0] or
420
421
  msg['parsl_v'] != self.current_platform['parsl_v']):
421
- logger.error("Manager {} has incompatible version info with the interchange".format(manager_id))
422
+ logger.error("Manager {!r} has incompatible version info with the interchange".format(manager_id))
422
423
  logger.debug("Setting kill event")
423
424
  kill_event.set()
424
425
  e = VersionMismatch("py.v={} parsl.v={}".format(self.current_platform['python_v'].rsplit(".", 1)[0],
425
426
  self.current_platform['parsl_v']),
426
427
  "py.v={} parsl.v={}".format(msg['python_v'].rsplit(".", 1)[0],
427
428
  msg['parsl_v'])
428
- )
429
+ )
429
430
  result_package = {'type': 'result', 'task_id': -1, 'exception': serialize_object(e)}
430
431
  pkl_package = pickle.dumps(result_package)
431
432
  self.results_outgoing.send(pkl_package)
432
433
  logger.error("Sent failure reports, shutting down interchange")
433
434
  else:
434
- logger.info("Manager {} has compatible Parsl version {}".format(manager_id, msg['parsl_v']))
435
- logger.info("Manager {} has compatible Python version {}".format(manager_id,
436
- msg['python_v'].rsplit(".", 1)[0]))
435
+ logger.info("Manager {!r} has compatible Parsl version {}".format(manager_id, msg['parsl_v']))
436
+ logger.info("Manager {!r} has compatible Python version {}".format(manager_id,
437
+ msg['python_v'].rsplit(".", 1)[0]))
437
438
  else:
438
439
  # Registration has failed.
439
- logger.debug("Suppressing bad registration from manager: {}".format(
440
- manager_id))
440
+ logger.debug("Suppressing bad registration from manager: {!r}".format(manager_id))
441
441
 
442
442
  else:
443
443
  tasks_requested = int.from_bytes(message[1], "little")
444
444
  self._ready_managers[manager_id]['last_heartbeat'] = time.time()
445
445
  if tasks_requested == HEARTBEAT_CODE:
446
- logger.debug("Manager {} sent heartbeat via tasks connection".format(manager_id))
446
+ logger.debug("Manager {!r} sent heartbeat via tasks connection".format(manager_id))
447
447
  self.task_outgoing.send_multipart([manager_id, b'', PKL_HEARTBEAT_CODE])
448
448
  else:
449
449
  logger.error("Unexpected non-heartbeat message received from manager {}")
450
450
  logger.debug("leaving task_outgoing section")
451
451
 
452
- def process_tasks_to_send(self, interesting_managers):
452
+ def process_tasks_to_send(self, interesting_managers: Set[bytes]) -> None:
453
453
  # If we had received any requests, check if there are tasks that could be passed
454
454
 
455
455
  logger.debug("Managers count (interesting/total): {interesting}/{total}".format(
@@ -475,14 +475,14 @@ class Interchange:
475
475
  tids = [t['task_id'] for t in tasks]
476
476
  m['tasks'].extend(tids)
477
477
  m['idle_since'] = None
478
- logger.debug("Sent tasks: {} to manager {}".format(tids, manager_id))
478
+ logger.debug("Sent tasks: {} to manager {!r}".format(tids, manager_id))
479
479
  # recompute real_capacity after sending tasks
480
480
  real_capacity = m['max_capacity'] - tasks_inflight
481
481
  if real_capacity > 0:
482
- logger.debug("Manager {} has free capacity {}".format(manager_id, real_capacity))
482
+ logger.debug("Manager {!r} has free capacity {}".format(manager_id, real_capacity))
483
483
  # ... so keep it in the interesting_managers list
484
484
  else:
485
- logger.debug("Manager {} is now saturated".format(manager_id))
485
+ logger.debug("Manager {!r} is now saturated".format(manager_id))
486
486
  interesting_managers.remove(manager_id)
487
487
  else:
488
488
  interesting_managers.remove(manager_id)
@@ -491,15 +491,15 @@ class Interchange:
491
491
  else:
492
492
  logger.debug("either no interesting managers or no tasks, so skipping manager pass")
493
493
 
494
- def process_results_incoming(self, interesting_managers, hub_channel):
494
+ def process_results_incoming(self, interesting_managers: Set[bytes], hub_channel: Optional[zmq.Socket]) -> None:
495
495
  # Receive any results and forward to client
496
496
  if self.results_incoming in self.socks and self.socks[self.results_incoming] == zmq.POLLIN:
497
497
  logger.debug("entering results_incoming section")
498
498
  manager_id, *all_messages = self.results_incoming.recv_multipart()
499
499
  if manager_id not in self._ready_managers:
500
- logger.warning("Received a result from a un-registered manager: {}".format(manager_id))
500
+ logger.warning("Received a result from a un-registered manager: {!r}".format(manager_id))
501
501
  else:
502
- logger.debug(f"Got {len(all_messages)} result items in batch from manager {manager_id}")
502
+ logger.debug(f"Got {len(all_messages)} result items in batch from manager {manager_id!r}")
503
503
 
504
504
  b_messages = []
505
505
 
@@ -509,25 +509,31 @@ class Interchange:
509
509
  # process this for task ID and forward to executor
510
510
  b_messages.append((p_message, r))
511
511
  elif r['type'] == 'monitoring':
512
+ # the monitoring code makes the assumption that no
513
+ # monitoring messages will be received if monitoring
514
+ # is not configured, and that hub_channel will only
515
+ # be None when monitoring is not configurated.
516
+ assert hub_channel is not None
517
+
512
518
  hub_channel.send_pyobj(r['payload'])
513
519
  elif r['type'] == 'heartbeat':
514
- logger.debug(f"Manager {manager_id} sent heartbeat via results connection")
520
+ logger.debug(f"Manager {manager_id!r} sent heartbeat via results connection")
515
521
  b_messages.append((p_message, r))
516
522
  else:
517
523
  logger.error("Interchange discarding result_queue message of unknown type: {}".format(r['type']))
518
524
 
519
525
  got_result = False
520
526
  m = self._ready_managers[manager_id]
521
- for (b_message, r) in b_messages:
527
+ for (_, r) in b_messages:
522
528
  assert 'type' in r, f"Message is missing type entry: {r}"
523
529
  if r['type'] == 'result':
524
530
  got_result = True
525
531
  try:
526
- logger.debug(f"Removing task {r['task_id']} from manager record {manager_id}")
532
+ logger.debug(f"Removing task {r['task_id']} from manager record {manager_id!r}")
527
533
  m['tasks'].remove(r['task_id'])
528
534
  except Exception:
529
535
  # If we reach here, there's something very wrong.
530
- logger.exception("Ignoring exception removing task_id {} for manager {} with task list {}".format(
536
+ logger.exception("Ignoring exception removing task_id {} for manager {!r} with task list {}".format(
531
537
  r['task_id'],
532
538
  manager_id,
533
539
  m['tasks']))
@@ -541,7 +547,7 @@ class Interchange:
541
547
  self.results_outgoing.send_multipart(b_messages_to_send)
542
548
  logger.debug("Sent messages on results_outgoing")
543
549
 
544
- logger.debug(f"Current tasks on manager {manager_id}: {m['tasks']}")
550
+ logger.debug(f"Current tasks on manager {manager_id!r}: {m['tasks']}")
545
551
  if len(m['tasks']) == 0 and m['idle_since'] is None:
546
552
  m['idle_since'] = time.time()
547
553
 
@@ -553,12 +559,12 @@ class Interchange:
553
559
  interesting_managers.add(manager_id)
554
560
  logger.debug("leaving results_incoming section")
555
561
 
556
- def expire_bad_managers(self, interesting_managers, hub_channel):
562
+ def expire_bad_managers(self, interesting_managers: Set[bytes], hub_channel: Optional[zmq.Socket]) -> None:
557
563
  bad_managers = [(manager_id, m) for (manager_id, m) in self._ready_managers.items() if
558
564
  time.time() - m['last_heartbeat'] > self.heartbeat_threshold]
559
565
  for (manager_id, m) in bad_managers:
560
566
  logger.debug("Last: {} Current: {}".format(m['last_heartbeat'], time.time()))
561
- logger.warning(f"Too many heartbeats missed for manager {manager_id} - removing manager")
567
+ logger.warning(f"Too many heartbeats missed for manager {manager_id!r} - removing manager")
562
568
  if m['active']:
563
569
  m['active'] = False
564
570
  self._send_monitoring_info(hub_channel, m)
@@ -577,7 +583,7 @@ class Interchange:
577
583
  interesting_managers.remove(manager_id)
578
584
 
579
585
 
580
- def start_file_logger(filename, level=logging.DEBUG, format_string=None):
586
+ def start_file_logger(filename: str, level: int = logging.DEBUG, format_string: Optional[str] = None) -> None:
581
587
  """Add a stream log handler.
582
588
 
583
589
  Parameters
@@ -609,7 +615,7 @@ def start_file_logger(filename, level=logging.DEBUG, format_string=None):
609
615
 
610
616
 
611
617
  @wrap_with_logs(target="interchange")
612
- def starter(comm_q, *args, **kwargs):
618
+ def starter(comm_q: multiprocessing.Queue, *args: Any, **kwargs: Any) -> None:
613
619
  """Start the interchange process
614
620
 
615
621
  The executor is expected to call this function. The args, kwargs match that of the Interchange.__init__
@@ -234,7 +234,7 @@ class Manager:
234
234
  'dir': os.getcwd(),
235
235
  'cpu_count': psutil.cpu_count(logical=False),
236
236
  'total_memory': psutil.virtual_memory().total,
237
- }
237
+ }
238
238
  b_msg = json.dumps(msg).encode('utf-8')
239
239
  return b_msg
240
240
 
@@ -608,7 +608,7 @@ def worker(worker_id, pool_id, pool_size, task_queue, result_queue, worker_queue
608
608
  logger.exception("Caught exception while trying to pickle the result package")
609
609
  pkl_package = pickle.dumps({'type': 'result', 'task_id': tid,
610
610
  'exception': serialize(RemoteExceptionWrapper(*sys.exc_info()))
611
- })
611
+ })
612
612
 
613
613
  result_queue.put(pkl_package)
614
614
  tasks_in_progress.pop(worker_id)
@@ -53,7 +53,7 @@ class CommandClient:
53
53
  """
54
54
  reply = '__PARSL_ZMQ_PIPES_MAGIC__'
55
55
  with self._lock:
56
- for i in range(max_retries):
56
+ for _ in range(max_retries):
57
57
  try:
58
58
  self.zmq_socket.send_pyobj(message, copy=True)
59
59
  reply = self.zmq_socket.recv_pyobj()
@@ -0,0 +1,4 @@
1
+ from parsl.executors.radical.executor import RadicalPilotExecutor
2
+ from parsl.executors.radical.rpex_resources import ResourceConfig
3
+
4
+ __all__ = ['RadicalPilotExecutor', 'ResourceConfig']