parsl 2024.8.12__py3-none-any.whl → 2024.8.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. parsl/channels/oauth_ssh/oauth_ssh.py +10 -2
  2. parsl/channels/ssh/ssh.py +16 -6
  3. parsl/channels/ssh_il/ssh_il.py +12 -2
  4. parsl/executors/high_throughput/executor.py +18 -27
  5. parsl/executors/high_throughput/interchange.py +31 -29
  6. parsl/executors/high_throughput/mpi_executor.py +23 -2
  7. parsl/executors/high_throughput/mpi_prefix_composer.py +5 -4
  8. parsl/executors/status_handling.py +5 -2
  9. parsl/jobs/states.py +6 -1
  10. parsl/monitoring/db_manager.py +21 -65
  11. parsl/monitoring/monitoring.py +10 -23
  12. parsl/monitoring/router.py +12 -39
  13. parsl/providers/slurm/slurm.py +40 -10
  14. parsl/tests/test_htex/test_multiple_disconnected_blocks.py +3 -5
  15. parsl/tests/test_htex/test_resource_spec_validation.py +40 -0
  16. parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +1 -1
  17. parsl/tests/test_mpi_apps/test_bad_mpi_config.py +29 -14
  18. parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +16 -8
  19. parsl/tests/test_mpi_apps/test_mpiex.py +2 -3
  20. parsl/tests/test_mpi_apps/test_resource_spec.py +39 -41
  21. parsl/tests/test_scaling/test_regression_3568_scaledown_vs_MISSING.py +85 -0
  22. parsl/version.py +1 -1
  23. {parsl-2024.8.12.data → parsl-2024.8.26.data}/scripts/interchange.py +31 -29
  24. {parsl-2024.8.12.dist-info → parsl-2024.8.26.dist-info}/METADATA +5 -3
  25. {parsl-2024.8.12.dist-info → parsl-2024.8.26.dist-info}/RECORD +32 -31
  26. parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +0 -47
  27. {parsl-2024.8.12.data → parsl-2024.8.26.data}/scripts/exec_parsl_function.py +0 -0
  28. {parsl-2024.8.12.data → parsl-2024.8.26.data}/scripts/parsl_coprocess.py +0 -0
  29. {parsl-2024.8.12.data → parsl-2024.8.26.data}/scripts/process_worker_pool.py +0 -0
  30. {parsl-2024.8.12.dist-info → parsl-2024.8.26.dist-info}/LICENSE +0 -0
  31. {parsl-2024.8.12.dist-info → parsl-2024.8.26.dist-info}/WHEEL +0 -0
  32. {parsl-2024.8.12.dist-info → parsl-2024.8.26.dist-info}/entry_points.txt +0 -0
  33. {parsl-2024.8.12.dist-info → parsl-2024.8.26.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,15 @@
1
1
  import logging
2
2
  import socket
3
3
 
4
- import paramiko
5
-
6
4
  from parsl.channels.ssh.ssh import DeprecatedSSHChannel
7
5
  from parsl.errors import OptionalModuleMissing
8
6
 
7
+ try:
8
+ import paramiko
9
+ _ssh_enabled = True
10
+ except (ImportError, NameError, FileNotFoundError):
11
+ _ssh_enabled = False
12
+
9
13
  try:
10
14
  from oauth_ssh.oauth_ssh_token import find_access_token
11
15
  from oauth_ssh.ssh_service import SSHService
@@ -38,6 +42,10 @@ class DeprecatedOAuthSSHChannel(DeprecatedSSHChannel):
38
42
 
39
43
  Raises:
40
44
  '''
45
+ if not _ssh_enabled:
46
+ raise OptionalModuleMissing(['ssh'],
47
+ "OauthSSHChannel requires the ssh module and config.")
48
+
41
49
  if not _oauth_ssh_enabled:
42
50
  raise OptionalModuleMissing(['oauth_ssh'],
43
51
  "OauthSSHChannel requires oauth_ssh module and config.")
parsl/channels/ssh/ssh.py CHANGED
@@ -2,8 +2,6 @@ import errno
2
2
  import logging
3
3
  import os
4
4
 
5
- import paramiko
6
-
7
5
  from parsl.channels.base import Channel
8
6
  from parsl.channels.errors import (
9
7
  AuthException,
@@ -13,15 +11,24 @@ from parsl.channels.errors import (
13
11
  FileCopyException,
14
12
  SSHException,
15
13
  )
14
+ from parsl.errors import OptionalModuleMissing
16
15
  from parsl.utils import RepresentationMixin
17
16
 
17
+ try:
18
+ import paramiko
19
+ _ssh_enabled = True
20
+ except (ImportError, NameError, FileNotFoundError):
21
+ _ssh_enabled = False
22
+
23
+
18
24
  logger = logging.getLogger(__name__)
19
25
 
20
26
 
21
- class NoAuthSSHClient(paramiko.SSHClient):
22
- def _auth(self, username, *args):
23
- self._transport.auth_none(username)
24
- return
27
+ if _ssh_enabled:
28
+ class NoAuthSSHClient(paramiko.SSHClient):
29
+ def _auth(self, username, *args):
30
+ self._transport.auth_none(username)
31
+ return
25
32
 
26
33
 
27
34
  class DeprecatedSSHChannel(Channel, RepresentationMixin):
@@ -53,6 +60,9 @@ class DeprecatedSSHChannel(Channel, RepresentationMixin):
53
60
 
54
61
  Raises:
55
62
  '''
63
+ if not _ssh_enabled:
64
+ raise OptionalModuleMissing(['ssh'],
65
+ "SSHChannel requires the ssh module and config.")
56
66
 
57
67
  self.hostname = hostname
58
68
  self.username = username
@@ -1,9 +1,15 @@
1
1
  import getpass
2
2
  import logging
3
3
 
4
- import paramiko
5
-
6
4
  from parsl.channels.ssh.ssh import DeprecatedSSHChannel
5
+ from parsl.errors import OptionalModuleMissing
6
+
7
+ try:
8
+ import paramiko
9
+ _ssh_enabled = True
10
+ except (ImportError, NameError, FileNotFoundError):
11
+ _ssh_enabled = False
12
+
7
13
 
8
14
  logger = logging.getLogger(__name__)
9
15
 
@@ -30,6 +36,10 @@ class DeprecatedSSHInteractiveLoginChannel(DeprecatedSSHChannel):
30
36
 
31
37
  Raises:
32
38
  '''
39
+ if not _ssh_enabled:
40
+ raise OptionalModuleMissing(['ssh'],
41
+ "SSHInteractiveLoginChannel requires the ssh module and config.")
42
+
33
43
  self.hostname = hostname
34
44
  self.username = username
35
45
  self.password = password
@@ -12,7 +12,6 @@ from typing import Callable, Dict, List, Optional, Sequence, Tuple, Union
12
12
 
13
13
  import typeguard
14
14
 
15
- import parsl.launchers
16
15
  from parsl import curvezmq
17
16
  from parsl.addresses import get_all_addresses
18
17
  from parsl.app.errors import RemoteExceptionWrapper
@@ -25,8 +24,7 @@ from parsl.executors.high_throughput.manager_selector import (
25
24
  RandomManagerSelector,
26
25
  )
27
26
  from parsl.executors.high_throughput.mpi_prefix_composer import (
28
- VALID_LAUNCHERS,
29
- validate_resource_spec,
27
+ InvalidResourceSpecification,
30
28
  )
31
29
  from parsl.executors.status_handling import BlockProviderExecutor
32
30
  from parsl.jobs.states import TERMINAL_STATES, JobState, JobStatus
@@ -224,17 +222,6 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
224
222
  Parsl will create names as integers starting with 0.
225
223
 
226
224
  default: empty list
227
-
228
- enable_mpi_mode: bool
229
- If enabled, MPI launch prefixes will be composed for the batch scheduler based on
230
- the nodes available in each batch job and the resource_specification dict passed
231
- from the app. This is an experimental feature, please refer to the following doc section
232
- before use: https://parsl.readthedocs.io/en/stable/userguide/mpi_apps.html
233
-
234
- mpi_launcher: str
235
- This field is only used if enable_mpi_mode is set. Select one from the
236
- list of supported MPI launchers = ("srun", "aprun", "mpiexec").
237
- default: "mpiexec"
238
225
  """
239
226
 
240
227
  @typeguard.typechecked
@@ -263,8 +250,6 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
263
250
  poll_period: int = 10,
264
251
  address_probe_timeout: Optional[int] = None,
265
252
  worker_logdir_root: Optional[str] = None,
266
- enable_mpi_mode: bool = False,
267
- mpi_launcher: str = "mpiexec",
268
253
  manager_selector: ManagerSelector = RandomManagerSelector(),
269
254
  block_error_handler: Union[bool, Callable[[BlockProviderExecutor, Dict[str, JobStatus]], None]] = True,
270
255
  encrypted: bool = False):
@@ -330,15 +315,6 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
330
315
  self.encrypted = encrypted
331
316
  self.cert_dir = None
332
317
 
333
- self.enable_mpi_mode = enable_mpi_mode
334
- assert mpi_launcher in VALID_LAUNCHERS, \
335
- f"mpi_launcher must be set to one of {VALID_LAUNCHERS}"
336
- if self.enable_mpi_mode:
337
- assert isinstance(self.provider.launcher, parsl.launchers.SimpleLauncher), \
338
- "mpi_mode requires the provider to be configured to use a SimpleLauncher"
339
-
340
- self.mpi_launcher = mpi_launcher
341
-
342
318
  if not launch_cmd:
343
319
  launch_cmd = DEFAULT_LAUNCH_CMD
344
320
  self.launch_cmd = launch_cmd
@@ -348,6 +324,8 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
348
324
  self.interchange_launch_cmd = interchange_launch_cmd
349
325
 
350
326
  radio_mode = "htex"
327
+ enable_mpi_mode: bool = False
328
+ mpi_launcher: str = "mpiexec"
351
329
 
352
330
  def _warn_deprecated(self, old: str, new: str):
353
331
  warnings.warn(
@@ -377,6 +355,18 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
377
355
  return "{}/{}".format(self.worker_logdir_root, self.label)
378
356
  return self.logdir
379
357
 
358
+ def validate_resource_spec(self, resource_specification: dict):
359
+ """HTEX does not support *any* resource_specification options and
360
+ will raise InvalidResourceSpecification is any are passed to it"""
361
+ if resource_specification:
362
+ raise InvalidResourceSpecification(
363
+ set(resource_specification.keys()),
364
+ ("HTEX does not support the supplied resource_specifications."
365
+ "For MPI applications consider using the MPIExecutor. "
366
+ "For specifications for core count/memory/walltime, consider using WorkQueueExecutor. ")
367
+ )
368
+ return
369
+
380
370
  def initialize_scaling(self):
381
371
  """Compose the launch command and scale out the initial blocks.
382
372
  """
@@ -660,7 +650,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
660
650
  Future
661
651
  """
662
652
 
663
- validate_resource_spec(resource_specification, self.enable_mpi_mode)
653
+ self.validate_resource_spec(resource_specification)
664
654
 
665
655
  if self.bad_state_is_set:
666
656
  raise self.executor_exception
@@ -800,7 +790,8 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
800
790
  connected_blocks = self.connected_blocks()
801
791
  for job_id in job_status:
802
792
  job_info = job_status[job_id]
803
- if job_info.terminal and job_id not in connected_blocks:
793
+ if job_info.terminal and job_id not in connected_blocks and job_info.state != JobState.SCALED_IN:
794
+ logger.debug("Rewriting job %s from status %s to MISSING", job_id, job_info)
804
795
  job_status[job_id].state = JobState.MISSING
805
796
  if job_status[job_id].message is None:
806
797
  job_status[job_id].message = (
@@ -375,7 +375,7 @@ class Interchange:
375
375
 
376
376
  self.zmq_context.destroy()
377
377
  delta = time.time() - start
378
- logger.info("Processed {} tasks in {} seconds".format(self.count, delta))
378
+ logger.info(f"Processed {self.count} tasks in {delta} seconds")
379
379
  logger.warning("Exiting")
380
380
 
381
381
  def process_task_outgoing_incoming(
@@ -396,9 +396,8 @@ class Interchange:
396
396
  try:
397
397
  msg = json.loads(message[1].decode('utf-8'))
398
398
  except Exception:
399
- logger.warning("Got Exception reading message from manager: {!r}".format(
400
- manager_id), exc_info=True)
401
- logger.debug("Message: \n{!r}\n".format(message[1]))
399
+ logger.warning(f"Got Exception reading message from manager: {manager_id!r}", exc_info=True)
400
+ logger.debug("Message:\n %r\n", message[1])
402
401
  return
403
402
 
404
403
  # perform a bit of validation on the structure of the deserialized
@@ -406,7 +405,7 @@ class Interchange:
406
405
  # in obviously malformed cases
407
406
  if not isinstance(msg, dict) or 'type' not in msg:
408
407
  logger.error(f"JSON message was not correctly formatted from manager: {manager_id!r}")
409
- logger.debug("Message: \n{!r}\n".format(message[1]))
408
+ logger.debug("Message:\n %r\n", message[1])
410
409
  return
411
410
 
412
411
  if msg['type'] == 'registration':
@@ -425,7 +424,7 @@ class Interchange:
425
424
  self.connected_block_history.append(msg['block_id'])
426
425
 
427
426
  interesting_managers.add(manager_id)
428
- logger.info("Adding manager: {!r} to ready queue".format(manager_id))
427
+ logger.info(f"Adding manager: {manager_id!r} to ready queue")
429
428
  m = self._ready_managers[manager_id]
430
429
 
431
430
  # m is a ManagerRecord, but msg is a dict[Any,Any] and so can
@@ -434,12 +433,12 @@ class Interchange:
434
433
  # later.
435
434
  m.update(msg) # type: ignore[typeddict-item]
436
435
 
437
- logger.info("Registration info for manager {!r}: {}".format(manager_id, msg))
436
+ logger.info(f"Registration info for manager {manager_id!r}: {msg}")
438
437
  self._send_monitoring_info(monitoring_radio, m)
439
438
 
440
439
  if (msg['python_v'].rsplit(".", 1)[0] != self.current_platform['python_v'].rsplit(".", 1)[0] or
441
440
  msg['parsl_v'] != self.current_platform['parsl_v']):
442
- logger.error("Manager {!r} has incompatible version info with the interchange".format(manager_id))
441
+ logger.error(f"Manager {manager_id!r} has incompatible version info with the interchange")
443
442
  logger.debug("Setting kill event")
444
443
  kill_event.set()
445
444
  e = VersionMismatch("py.v={} parsl.v={}".format(self.current_platform['python_v'].rsplit(".", 1)[0],
@@ -452,16 +451,15 @@ class Interchange:
452
451
  self.results_outgoing.send(pkl_package)
453
452
  logger.error("Sent failure reports, shutting down interchange")
454
453
  else:
455
- logger.info("Manager {!r} has compatible Parsl version {}".format(manager_id, msg['parsl_v']))
456
- logger.info("Manager {!r} has compatible Python version {}".format(manager_id,
457
- msg['python_v'].rsplit(".", 1)[0]))
454
+ logger.info(f"Manager {manager_id!r} has compatible Parsl version {msg['parsl_v']}")
455
+ logger.info(f"Manager {manager_id!r} has compatible Python version {msg['python_v'].rsplit('.', 1)[0]}")
458
456
  elif msg['type'] == 'heartbeat':
459
457
  self._ready_managers[manager_id]['last_heartbeat'] = time.time()
460
- logger.debug("Manager {!r} sent heartbeat via tasks connection".format(manager_id))
458
+ logger.debug("Manager %r sent heartbeat via tasks connection", manager_id)
461
459
  self.task_outgoing.send_multipart([manager_id, b'', PKL_HEARTBEAT_CODE])
462
460
  elif msg['type'] == 'drain':
463
461
  self._ready_managers[manager_id]['draining'] = True
464
- logger.debug(f"Manager {manager_id!r} requested drain")
462
+ logger.debug("Manager %r requested drain", manager_id)
465
463
  else:
466
464
  logger.error(f"Unexpected message type received from manager: {msg['type']}")
467
465
  logger.debug("leaving task_outgoing section")
@@ -484,9 +482,11 @@ class Interchange:
484
482
  def process_tasks_to_send(self, interesting_managers: Set[bytes]) -> None:
485
483
  # Check if there are tasks that could be sent to managers
486
484
 
487
- logger.debug("Managers count (interesting/total): {interesting}/{total}".format(
488
- total=len(self._ready_managers),
489
- interesting=len(interesting_managers)))
485
+ logger.debug(
486
+ "Managers count (interesting/total): %d/%d",
487
+ len(interesting_managers),
488
+ len(self._ready_managers)
489
+ )
490
490
 
491
491
  if interesting_managers and not self.pending_task_queue.empty():
492
492
  shuffled_managers = self.manager_selector.sort_managers(self._ready_managers, interesting_managers)
@@ -497,7 +497,7 @@ class Interchange:
497
497
  tasks_inflight = len(m['tasks'])
498
498
  real_capacity = m['max_capacity'] - tasks_inflight
499
499
 
500
- if (real_capacity and m['active'] and not m['draining']):
500
+ if real_capacity and m["active"] and not m["draining"]:
501
501
  tasks = self.get_tasks(real_capacity)
502
502
  if tasks:
503
503
  self.task_outgoing.send_multipart([manager_id, b'', pickle.dumps(tasks)])
@@ -506,19 +506,19 @@ class Interchange:
506
506
  tids = [t['task_id'] for t in tasks]
507
507
  m['tasks'].extend(tids)
508
508
  m['idle_since'] = None
509
- logger.debug("Sent tasks: {} to manager {!r}".format(tids, manager_id))
509
+ logger.debug("Sent tasks: %s to manager %r", tids, manager_id)
510
510
  # recompute real_capacity after sending tasks
511
511
  real_capacity = m['max_capacity'] - tasks_inflight
512
512
  if real_capacity > 0:
513
- logger.debug("Manager {!r} has free capacity {}".format(manager_id, real_capacity))
513
+ logger.debug("Manager %r has free capacity %s", manager_id, real_capacity)
514
514
  # ... so keep it in the interesting_managers list
515
515
  else:
516
- logger.debug("Manager {!r} is now saturated".format(manager_id))
516
+ logger.debug("Manager %r is now saturated", manager_id)
517
517
  interesting_managers.remove(manager_id)
518
518
  else:
519
519
  interesting_managers.remove(manager_id)
520
520
  # logger.debug("Nothing to send to manager {}".format(manager_id))
521
- logger.debug("leaving _ready_managers section, with {} managers still interesting".format(len(interesting_managers)))
521
+ logger.debug("leaving _ready_managers section, with %s managers still interesting", len(interesting_managers))
522
522
  else:
523
523
  logger.debug("either no interesting managers or no tasks, so skipping manager pass")
524
524
 
@@ -528,9 +528,9 @@ class Interchange:
528
528
  logger.debug("entering results_incoming section")
529
529
  manager_id, *all_messages = self.results_incoming.recv_multipart()
530
530
  if manager_id not in self._ready_managers:
531
- logger.warning("Received a result from a un-registered manager: {!r}".format(manager_id))
531
+ logger.warning(f"Received a result from a un-registered manager: {manager_id!r}")
532
532
  else:
533
- logger.debug(f"Got {len(all_messages)} result items in batch from manager {manager_id!r}")
533
+ logger.debug("Got %s result items in batch from manager %r", len(all_messages), manager_id)
534
534
 
535
535
  b_messages = []
536
536
 
@@ -548,10 +548,10 @@ class Interchange:
548
548
 
549
549
  monitoring_radio.send(r['payload'])
550
550
  elif r['type'] == 'heartbeat':
551
- logger.debug(f"Manager {manager_id!r} sent heartbeat via results connection")
551
+ logger.debug("Manager %r sent heartbeat via results connection", manager_id)
552
552
  b_messages.append((p_message, r))
553
553
  else:
554
- logger.error("Interchange discarding result_queue message of unknown type: {}".format(r['type']))
554
+ logger.error("Interchange discarding result_queue message of unknown type: %s", r["type"])
555
555
 
556
556
  got_result = False
557
557
  m = self._ready_managers[manager_id]
@@ -560,14 +560,16 @@ class Interchange:
560
560
  if r['type'] == 'result':
561
561
  got_result = True
562
562
  try:
563
- logger.debug(f"Removing task {r['task_id']} from manager record {manager_id!r}")
563
+ logger.debug("Removing task %s from manager record %r", r["task_id"], manager_id)
564
564
  m['tasks'].remove(r['task_id'])
565
565
  except Exception:
566
566
  # If we reach here, there's something very wrong.
567
- logger.exception("Ignoring exception removing task_id {} for manager {!r} with task list {}".format(
567
+ logger.exception(
568
+ "Ignoring exception removing task_id %s for manager %r with task list %s",
568
569
  r['task_id'],
569
570
  manager_id,
570
- m['tasks']))
571
+ m["tasks"]
572
+ )
571
573
 
572
574
  b_messages_to_send = []
573
575
  for (b_message, _) in b_messages:
@@ -578,7 +580,7 @@ class Interchange:
578
580
  self.results_outgoing.send_multipart(b_messages_to_send)
579
581
  logger.debug("Sent messages on results_outgoing")
580
582
 
581
- logger.debug(f"Current tasks on manager {manager_id!r}: {m['tasks']}")
583
+ logger.debug("Current tasks on manager %r: %s", manager_id, m["tasks"])
582
584
  if len(m['tasks']) == 0 and m['idle_since'] is None:
583
585
  m['idle_since'] = time.time()
584
586
 
@@ -8,8 +8,13 @@ from parsl.executors.high_throughput.executor import (
8
8
  GENERAL_HTEX_PARAM_DOCS,
9
9
  HighThroughputExecutor,
10
10
  )
11
+ from parsl.executors.high_throughput.mpi_prefix_composer import (
12
+ VALID_LAUNCHERS,
13
+ validate_resource_spec,
14
+ )
11
15
  from parsl.executors.status_handling import BlockProviderExecutor
12
16
  from parsl.jobs.states import JobStatus
17
+ from parsl.launchers import SimpleLauncher
13
18
  from parsl.providers import LocalProvider
14
19
  from parsl.providers.base import ExecutionProvider
15
20
 
@@ -30,6 +35,11 @@ class MPIExecutor(HighThroughputExecutor):
30
35
  max_workers_per_block: int
31
36
  Maximum number of MPI applications to run at once per block
32
37
 
38
+ mpi_launcher: str
39
+ Select one from the list of supported MPI launchers:
40
+ ("srun", "aprun", "mpiexec").
41
+ default: "mpiexec"
42
+
33
43
  {GENERAL_HTEX_PARAM_DOCS}
34
44
  """
35
45
 
@@ -60,7 +70,6 @@ class MPIExecutor(HighThroughputExecutor):
60
70
  super().__init__(
61
71
  # Hard-coded settings
62
72
  cores_per_worker=1e-9, # Ensures there will be at least an absurd number of workers
63
- enable_mpi_mode=True,
64
73
  max_workers_per_node=max_workers_per_block,
65
74
 
66
75
  # Everything else
@@ -82,9 +91,21 @@ class MPIExecutor(HighThroughputExecutor):
82
91
  poll_period=poll_period,
83
92
  address_probe_timeout=address_probe_timeout,
84
93
  worker_logdir_root=worker_logdir_root,
85
- mpi_launcher=mpi_launcher,
86
94
  block_error_handler=block_error_handler,
87
95
  encrypted=encrypted
88
96
  )
97
+ self.enable_mpi_mode = True
98
+ self.mpi_launcher = mpi_launcher
89
99
 
90
100
  self.max_workers_per_block = max_workers_per_block
101
+
102
+ if not isinstance(self.provider.launcher, SimpleLauncher):
103
+ raise TypeError("mpi_mode requires the provider to be configured to use a SimpleLauncher")
104
+
105
+ if mpi_launcher not in VALID_LAUNCHERS:
106
+ raise ValueError(f"mpi_launcher set to:{mpi_launcher} must be set to one of {VALID_LAUNCHERS}")
107
+
108
+ self.mpi_launcher = mpi_launcher
109
+
110
+ def validate_resource_spec(self, resource_specification: dict):
111
+ return validate_resource_spec(resource_specification)
@@ -21,14 +21,15 @@ class MissingResourceSpecification(Exception):
21
21
  class InvalidResourceSpecification(Exception):
22
22
  """Exception raised when Invalid input is supplied via resource specification"""
23
23
 
24
- def __init__(self, invalid_keys: Set[str]):
24
+ def __init__(self, invalid_keys: Set[str], message: str = ''):
25
25
  self.invalid_keys = invalid_keys
26
+ self.message = message
26
27
 
27
28
  def __str__(self):
28
- return f"Invalid resource specification options supplied: {self.invalid_keys}"
29
+ return f"Invalid resource specification options supplied: {self.invalid_keys} {self.message}"
29
30
 
30
31
 
31
- def validate_resource_spec(resource_spec: Dict[str, str], is_mpi_enabled: bool):
32
+ def validate_resource_spec(resource_spec: Dict[str, str]):
32
33
  """Basic validation of keys in the resource_spec
33
34
 
34
35
  Raises: InvalidResourceSpecification if the resource_spec
@@ -38,7 +39,7 @@ def validate_resource_spec(resource_spec: Dict[str, str], is_mpi_enabled: bool):
38
39
 
39
40
  # empty resource_spec when mpi_mode is set causes parsl to hang
40
41
  # ref issue #3427
41
- if is_mpi_enabled and len(user_keys) == 0:
42
+ if len(user_keys) == 0:
42
43
  raise MissingResourceSpecification('MPI mode requires optional parsl_resource_specification keyword argument to be configured')
43
44
 
44
45
  legal_keys = set(("ranks_per_node",
@@ -347,7 +347,10 @@ class BlockProviderExecutor(ParslExecutor):
347
347
  if block_ids is not None:
348
348
  new_status = {}
349
349
  for block_id in block_ids:
350
- new_status[block_id] = JobStatus(JobState.CANCELLED)
351
- del self._status[block_id]
350
+ logger.debug("Marking block %s as SCALED_IN", block_id)
351
+ s = JobStatus(JobState.SCALED_IN)
352
+ new_status[block_id] = s
353
+ self._status[block_id] = s
354
+ self._simulated_status[block_id] = s
352
355
  self.send_monitoring_info(new_status)
353
356
  return block_ids
parsl/jobs/states.py CHANGED
@@ -46,12 +46,17 @@ class JobState(IntEnum):
46
46
  bad worker environment or network connectivity issues.
47
47
  """
48
48
 
49
+ SCALED_IN = 9
50
+ """This job has been deliberately scaled in. Scaling code should not be concerned
51
+ that the job never ran (for example for error handling purposes).
52
+ """
53
+
49
54
  def __str__(self) -> str:
50
55
  return f"{self.__class__.__name__}.{self.name}"
51
56
 
52
57
 
53
58
  TERMINAL_STATES = [JobState.CANCELLED, JobState.COMPLETED, JobState.FAILED,
54
- JobState.TIMEOUT, JobState.MISSING]
59
+ JobState.TIMEOUT, JobState.MISSING, JobState.SCALED_IN]
55
60
 
56
61
 
57
62
  class JobStatus: