parsl 2024.6.10__py3-none-any.whl → 2024.6.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. parsl/app/app.py +0 -2
  2. parsl/app/bash.py +2 -3
  3. parsl/channels/local/local.py +7 -2
  4. parsl/configs/ASPIRE1.py +3 -1
  5. parsl/configs/Azure.py +3 -1
  6. parsl/configs/ad_hoc.py +2 -0
  7. parsl/configs/bridges.py +3 -1
  8. parsl/configs/cc_in2p3.py +2 -0
  9. parsl/configs/ec2.py +2 -0
  10. parsl/configs/expanse.py +3 -1
  11. parsl/configs/frontera.py +2 -0
  12. parsl/configs/htex_local.py +2 -0
  13. parsl/configs/illinoiscluster.py +2 -0
  14. parsl/configs/kubernetes.py +3 -1
  15. parsl/configs/local_threads.py +5 -1
  16. parsl/configs/midway.py +2 -0
  17. parsl/configs/osg.py +3 -1
  18. parsl/configs/polaris.py +3 -1
  19. parsl/configs/stampede2.py +2 -0
  20. parsl/configs/summit.py +2 -0
  21. parsl/configs/toss3_llnl.py +3 -1
  22. parsl/configs/vineex_local.py +3 -1
  23. parsl/configs/wqex_local.py +3 -1
  24. parsl/executors/high_throughput/executor.py +36 -31
  25. parsl/executors/high_throughput/interchange.py +5 -8
  26. parsl/executors/workqueue/executor.py +25 -5
  27. parsl/providers/kubernetes/kube.py +3 -3
  28. parsl/tests/test_htex/test_htex.py +24 -7
  29. parsl/version.py +1 -1
  30. parsl-2024.6.24.data/scripts/interchange.py +681 -0
  31. {parsl-2024.6.10.dist-info → parsl-2024.6.24.dist-info}/METADATA +2 -2
  32. {parsl-2024.6.10.dist-info → parsl-2024.6.24.dist-info}/RECORD +39 -38
  33. {parsl-2024.6.10.data → parsl-2024.6.24.data}/scripts/exec_parsl_function.py +0 -0
  34. {parsl-2024.6.10.data → parsl-2024.6.24.data}/scripts/parsl_coprocess.py +0 -0
  35. {parsl-2024.6.10.data → parsl-2024.6.24.data}/scripts/process_worker_pool.py +0 -0
  36. {parsl-2024.6.10.dist-info → parsl-2024.6.24.dist-info}/LICENSE +0 -0
  37. {parsl-2024.6.10.dist-info → parsl-2024.6.24.dist-info}/WHEEL +0 -0
  38. {parsl-2024.6.10.dist-info → parsl-2024.6.24.dist-info}/entry_points.txt +0 -0
  39. {parsl-2024.6.10.dist-info → parsl-2024.6.24.dist-info}/top_level.txt +0 -0
parsl/app/app.py CHANGED
@@ -66,8 +66,6 @@ class AppBase(metaclass=ABCMeta):
66
66
  self.kwargs['walltime'] = params['walltime'].default
67
67
  if 'parsl_resource_specification' in params:
68
68
  self.kwargs['parsl_resource_specification'] = params['parsl_resource_specification'].default
69
- self.outputs = params['outputs'].default if 'outputs' in params else []
70
- self.inputs = params['inputs'].default if 'inputs' in params else []
71
69
 
72
70
  @abstractmethod
73
71
  def __call__(self, *args: Any, **kwargs: Any) -> AppFuture:
parsl/app/bash.py CHANGED
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from functools import partial, update_wrapper
2
+ from functools import partial
3
3
  from inspect import Parameter, signature
4
4
 
5
5
  from parsl.app.app import AppBase
@@ -123,11 +123,10 @@ class BashApp(AppBase):
123
123
  if sig.parameters[s].default is not Parameter.empty:
124
124
  self.kwargs[s] = sig.parameters[s].default
125
125
 
126
- # update_wrapper allows remote_side_bash_executor to masquerade as self.func
127
126
  # partial is used to attach the first arg the "func" to the remote_side_bash_executor
128
127
  # this is done to avoid passing a function type in the args which parsl.serializer
129
128
  # doesn't support
130
- remote_fn = partial(update_wrapper(remote_side_bash_executor, self.func), self.func)
129
+ remote_fn = partial(remote_side_bash_executor, self.func)
131
130
  remote_fn.__name__ = self.func.__name__
132
131
  self.wrapped_remote_function = wrap_error(remote_fn)
133
132
 
@@ -55,6 +55,7 @@ class LocalChannel(Channel, RepresentationMixin):
55
55
  current_env.update(envs)
56
56
 
57
57
  try:
58
+ logger.debug("Creating process with command '%s'", cmd)
58
59
  proc = subprocess.Popen(
59
60
  cmd,
60
61
  stdout=subprocess.PIPE,
@@ -64,12 +65,16 @@ class LocalChannel(Channel, RepresentationMixin):
64
65
  shell=True,
65
66
  preexec_fn=os.setpgrp
66
67
  )
68
+ logger.debug("Created process with pid %s. Performing communicate", proc.pid)
67
69
  (stdout, stderr) = proc.communicate(timeout=walltime)
68
70
  retcode = proc.returncode
71
+ logger.debug("Process %s returned %s", proc.pid, proc.returncode)
69
72
 
70
- except Exception as e:
71
- logger.warning("Execution of command '{}' failed due to \n{}".format(cmd, e))
73
+ except Exception:
74
+ logger.exception(f"Execution of command failed:\n{cmd}")
72
75
  raise
76
+ else:
77
+ logger.debug("Execution of command in process %s completed normally", proc.pid)
73
78
 
74
79
  return (retcode, stdout.decode("utf-8"), stderr.decode("utf-8"))
75
80
 
parsl/configs/ASPIRE1.py CHANGED
@@ -4,6 +4,7 @@ from parsl.executors import HighThroughputExecutor
4
4
  from parsl.launchers import MpiRunLauncher
5
5
  from parsl.monitoring.monitoring import MonitoringHub
6
6
  from parsl.providers import PBSProProvider
7
+ from parsl.usage_tracking.levels import LEVEL_1
7
8
 
8
9
  config = Config(
9
10
  executors=[
@@ -39,5 +40,6 @@ config = Config(
39
40
  strategy='simple',
40
41
  retries=3,
41
42
  app_cache=True,
42
- checkpoint_mode='task_exit'
43
+ checkpoint_mode='task_exit',
44
+ usage_tracking=LEVEL_1,
43
45
  )
parsl/configs/Azure.py CHANGED
@@ -8,6 +8,7 @@ from parsl.data_provider.http import HTTPInTaskStaging
8
8
  from parsl.data_provider.rsync import RSyncStaging
9
9
  from parsl.executors import HighThroughputExecutor
10
10
  from parsl.providers import AzureProvider
11
+ from parsl.usage_tracking.levels import LEVEL_1
11
12
 
12
13
  vm_reference = {
13
14
  # All fields below are required
@@ -33,5 +34,6 @@ config = Config(
33
34
  FTPInTaskStaging(),
34
35
  RSyncStaging(getpass.getuser() + "@" + address_by_query())],
35
36
  )
36
- ]
37
+ ],
38
+ usage_tracking=LEVEL_1,
37
39
  )
parsl/configs/ad_hoc.py CHANGED
@@ -4,6 +4,7 @@ from parsl.channels import SSHChannel
4
4
  from parsl.config import Config
5
5
  from parsl.executors import HighThroughputExecutor
6
6
  from parsl.providers import AdHocProvider
7
+ from parsl.usage_tracking.levels import LEVEL_1
7
8
 
8
9
  user_opts: Dict[str, Dict[str, Any]]
9
10
  user_opts = {'adhoc':
@@ -33,4 +34,5 @@ config = Config(
33
34
  ],
34
35
  # AdHoc Clusters should not be setup with scaling strategy.
35
36
  strategy='none',
37
+ usage_tracking=LEVEL_1,
36
38
  )
parsl/configs/bridges.py CHANGED
@@ -3,6 +3,7 @@ from parsl.config import Config
3
3
  from parsl.executors import HighThroughputExecutor
4
4
  from parsl.launchers import SrunLauncher
5
5
  from parsl.providers import SlurmProvider
6
+ from parsl.usage_tracking.levels import LEVEL_1
6
7
 
7
8
  """ This config assumes that it is used to launch parsl tasks from the login nodes
8
9
  of Bridges at PSC. Each job submitted to the scheduler will request 2 nodes for 10 minutes.
@@ -34,5 +35,6 @@ config = Config(
34
35
  cmd_timeout=120,
35
36
  ),
36
37
  )
37
- ]
38
+ ],
39
+ usage_tracking=LEVEL_1,
38
40
  )
parsl/configs/cc_in2p3.py CHANGED
@@ -2,6 +2,7 @@ from parsl.channels import LocalChannel
2
2
  from parsl.config import Config
3
3
  from parsl.executors import HighThroughputExecutor
4
4
  from parsl.providers import GridEngineProvider
5
+ from parsl.usage_tracking.levels import LEVEL_1
5
6
 
6
7
  config = Config(
7
8
  executors=[
@@ -19,4 +20,5 @@ config = Config(
19
20
  ),
20
21
  )
21
22
  ],
23
+ usage_tracking=LEVEL_1,
22
24
  )
parsl/configs/ec2.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from parsl.config import Config
2
2
  from parsl.executors import HighThroughputExecutor
3
3
  from parsl.providers import AWSProvider
4
+ from parsl.usage_tracking.levels import LEVEL_1
4
5
 
5
6
  config = Config(
6
7
  executors=[
@@ -25,4 +26,5 @@ config = Config(
25
26
  ),
26
27
  )
27
28
  ],
29
+ usage_tracking=LEVEL_1,
28
30
  )
parsl/configs/expanse.py CHANGED
@@ -2,6 +2,7 @@ from parsl.config import Config
2
2
  from parsl.executors import HighThroughputExecutor
3
3
  from parsl.launchers import SrunLauncher
4
4
  from parsl.providers import SlurmProvider
5
+ from parsl.usage_tracking.levels import LEVEL_1
5
6
 
6
7
  config = Config(
7
8
  executors=[
@@ -24,5 +25,6 @@ config = Config(
24
25
  nodes_per_block=2,
25
26
  ),
26
27
  )
27
- ]
28
+ ],
29
+ usage_tracking=LEVEL_1,
28
30
  )
parsl/configs/frontera.py CHANGED
@@ -3,6 +3,7 @@ from parsl.config import Config
3
3
  from parsl.executors import HighThroughputExecutor
4
4
  from parsl.launchers import SrunLauncher
5
5
  from parsl.providers import SlurmProvider
6
+ from parsl.usage_tracking.levels import LEVEL_1
6
7
 
7
8
  """ This config assumes that it is used to launch parsl tasks from the login nodes
8
9
  of Frontera at TACC. Each job submitted to the scheduler will request 2 nodes for 10 minutes.
@@ -32,4 +33,5 @@ config = Config(
32
33
  ),
33
34
  )
34
35
  ],
36
+ usage_tracking=LEVEL_1,
35
37
  )
@@ -2,6 +2,7 @@ from parsl.channels import LocalChannel
2
2
  from parsl.config import Config
3
3
  from parsl.executors import HighThroughputExecutor
4
4
  from parsl.providers import LocalProvider
5
+ from parsl.usage_tracking.levels import LEVEL_1
5
6
 
6
7
  config = Config(
7
8
  executors=[
@@ -15,4 +16,5 @@ config = Config(
15
16
  ),
16
17
  )
17
18
  ],
19
+ usage_tracking=LEVEL_1,
18
20
  )
@@ -2,6 +2,7 @@ from parsl.config import Config
2
2
  from parsl.executors import HighThroughputExecutor
3
3
  from parsl.launchers import SrunLauncher
4
4
  from parsl.providers import SlurmProvider
5
+ from parsl.usage_tracking.levels import LEVEL_1
5
6
 
6
7
  """ This config assumes that it is used to launch parsl tasks from the login nodes
7
8
  of the Campus Cluster at UIUC. Each job submitted to the scheduler will request 2 nodes for 10 minutes.
@@ -25,4 +26,5 @@ config = Config(
25
26
  ),
26
27
  )
27
28
  ],
29
+ usage_tracking=LEVEL_1,
28
30
  )
@@ -2,6 +2,7 @@ from parsl.addresses import address_by_route
2
2
  from parsl.config import Config
3
3
  from parsl.executors import HighThroughputExecutor
4
4
  from parsl.providers import KubernetesProvider
5
+ from parsl.usage_tracking.levels import LEVEL_1
5
6
 
6
7
  config = Config(
7
8
  executors=[
@@ -36,5 +37,6 @@ config = Config(
36
37
  max_blocks=10,
37
38
  ),
38
39
  ),
39
- ]
40
+ ],
41
+ usage_tracking=LEVEL_1,
40
42
  )
@@ -1,4 +1,8 @@
1
1
  from parsl.config import Config
2
2
  from parsl.executors.threads import ThreadPoolExecutor
3
+ from parsl.usage_tracking.levels import LEVEL_1
3
4
 
4
- config = Config(executors=[ThreadPoolExecutor()])
5
+ config = Config(
6
+ executors=[ThreadPoolExecutor()],
7
+ usage_tracking=LEVEL_1,
8
+ )
parsl/configs/midway.py CHANGED
@@ -3,6 +3,7 @@ from parsl.config import Config
3
3
  from parsl.executors import HighThroughputExecutor
4
4
  from parsl.launchers import SrunLauncher
5
5
  from parsl.providers import SlurmProvider
6
+ from parsl.usage_tracking.levels import LEVEL_1
6
7
 
7
8
  config = Config(
8
9
  executors=[
@@ -28,4 +29,5 @@ config = Config(
28
29
  ),
29
30
  )
30
31
  ],
32
+ usage_tracking=LEVEL_1,
31
33
  )
parsl/configs/osg.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from parsl.config import Config
2
2
  from parsl.executors import HighThroughputExecutor
3
3
  from parsl.providers import CondorProvider
4
+ from parsl.usage_tracking.levels import LEVEL_1
4
5
 
5
6
  config = Config(
6
7
  executors=[
@@ -26,5 +27,6 @@ python3 -m venv parsl_env; source parsl_env/bin/activate; python3 -m pip install
26
27
  worker_logdir_root='$OSG_WN_TMP',
27
28
  worker_ports=(31000, 31001)
28
29
  )
29
- ]
30
+ ],
31
+ usage_tracking=LEVEL_1,
30
32
  )
parsl/configs/polaris.py CHANGED
@@ -3,6 +3,7 @@ from parsl.config import Config
3
3
  from parsl.executors import HighThroughputExecutor
4
4
  from parsl.launchers import MpiExecLauncher
5
5
  from parsl.providers import PBSProProvider
6
+ from parsl.usage_tracking.levels import LEVEL_1
6
7
 
7
8
  # There are three user parameters to change for the PBSProProvider:
8
9
  # YOUR_ACCOUNT: Account to charge usage
@@ -34,5 +35,6 @@ config = Config(
34
35
  cpus_per_node=64,
35
36
  ),
36
37
  ),
37
- ]
38
+ ],
39
+ usage_tracking=LEVEL_1,
38
40
  )
@@ -4,6 +4,7 @@ from parsl.data_provider.globus import GlobusStaging
4
4
  from parsl.executors import HighThroughputExecutor
5
5
  from parsl.launchers import SrunLauncher
6
6
  from parsl.providers import SlurmProvider
7
+ from parsl.usage_tracking.levels import LEVEL_1
7
8
 
8
9
  config = Config(
9
10
  executors=[
@@ -34,4 +35,5 @@ config = Config(
34
35
  )
35
36
 
36
37
  ],
38
+ usage_tracking=LEVEL_1,
37
39
  )
parsl/configs/summit.py CHANGED
@@ -3,6 +3,7 @@ from parsl.config import Config
3
3
  from parsl.executors import HighThroughputExecutor
4
4
  from parsl.launchers import JsrunLauncher
5
5
  from parsl.providers import LSFProvider
6
+ from parsl.usage_tracking.levels import LEVEL_1
6
7
 
7
8
  config = Config(
8
9
  executors=[
@@ -26,4 +27,5 @@ config = Config(
26
27
  )
27
28
 
28
29
  ],
30
+ usage_tracking=LEVEL_1,
29
31
  )
@@ -2,6 +2,7 @@ from parsl.config import Config
2
2
  from parsl.executors import FluxExecutor
3
3
  from parsl.launchers import SrunLauncher
4
4
  from parsl.providers import SlurmProvider
5
+ from parsl.usage_tracking.levels import LEVEL_1
5
6
 
6
7
  config = Config(
7
8
  executors=[
@@ -24,5 +25,6 @@ config = Config(
24
25
  cmd_timeout=120,
25
26
  ),
26
27
  )
27
- ]
28
+ ],
29
+ usage_tracking=LEVEL_1,
28
30
  )
@@ -2,6 +2,7 @@ import uuid
2
2
 
3
3
  from parsl.config import Config
4
4
  from parsl.executors.taskvine import TaskVineExecutor, TaskVineManagerConfig
5
+ from parsl.usage_tracking.levels import LEVEL_1
5
6
 
6
7
  config = Config(
7
8
  executors=[
@@ -15,5 +16,6 @@ config = Config(
15
16
  # To disable status reporting, comment out the project_name.
16
17
  manager_config=TaskVineManagerConfig(project_name="parsl-vine-" + str(uuid.uuid4())),
17
18
  )
18
- ]
19
+ ],
20
+ usage_tracking=LEVEL_1,
19
21
  )
@@ -2,6 +2,7 @@ import uuid
2
2
 
3
3
  from parsl.config import Config
4
4
  from parsl.executors import WorkQueueExecutor
5
+ from parsl.usage_tracking.levels import LEVEL_1
5
6
 
6
7
  config = Config(
7
8
  executors=[
@@ -21,5 +22,6 @@ config = Config(
21
22
  # A shared filesystem is not needed when using Work Queue.
22
23
  shared_fs=False
23
24
  )
24
- ]
25
+ ],
26
+ usage_tracking=LEVEL_1,
25
27
  )
@@ -1,13 +1,13 @@
1
1
  import logging
2
2
  import math
3
3
  import pickle
4
+ import subprocess
4
5
  import threading
5
6
  import typing
6
7
  import warnings
7
8
  from collections import defaultdict
8
9
  from concurrent.futures import Future
9
10
  from dataclasses import dataclass
10
- from multiprocessing import Process
11
11
  from typing import Callable, Dict, List, Optional, Sequence, Tuple, Union
12
12
 
13
13
  import typeguard
@@ -18,7 +18,7 @@ from parsl.addresses import get_all_addresses
18
18
  from parsl.app.errors import RemoteExceptionWrapper
19
19
  from parsl.data_provider.staging import Staging
20
20
  from parsl.executors.errors import BadMessage, ScalingFailed
21
- from parsl.executors.high_throughput import interchange, zmq_pipes
21
+ from parsl.executors.high_throughput import zmq_pipes
22
22
  from parsl.executors.high_throughput.errors import CommandClientTimeoutError
23
23
  from parsl.executors.high_throughput.mpi_prefix_composer import (
24
24
  VALID_LAUNCHERS,
@@ -26,7 +26,6 @@ from parsl.executors.high_throughput.mpi_prefix_composer import (
26
26
  )
27
27
  from parsl.executors.status_handling import BlockProviderExecutor
28
28
  from parsl.jobs.states import TERMINAL_STATES, JobState, JobStatus
29
- from parsl.multiprocessing import ForkProcess
30
29
  from parsl.process_loggers import wrap_with_logs
31
30
  from parsl.providers import LocalProvider
32
31
  from parsl.providers.base import ExecutionProvider
@@ -305,7 +304,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
305
304
  self._task_counter = 0
306
305
  self.worker_ports = worker_ports
307
306
  self.worker_port_range = worker_port_range
308
- self.interchange_proc: Optional[Process] = None
307
+ self.interchange_proc: Optional[subprocess.Popen] = None
309
308
  self.interchange_port_range = interchange_port_range
310
309
  self.heartbeat_threshold = heartbeat_threshold
311
310
  self.heartbeat_period = heartbeat_period
@@ -520,38 +519,45 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
520
519
 
521
520
  logger.info("Queue management worker finished")
522
521
 
523
- def _start_local_interchange_process(self):
522
+ def _start_local_interchange_process(self) -> None:
524
523
  """ Starts the interchange process locally
525
524
 
526
- Starts the interchange process locally and uses an internal command queue to
525
+ Starts the interchange process locally and uses the command queue to
527
526
  get the worker task and result ports that the interchange has bound to.
528
527
  """
529
- self.interchange_proc = ForkProcess(target=interchange.starter,
530
- kwargs={"client_address": "127.0.0.1",
531
- "client_ports": (self.outgoing_q.port,
532
- self.incoming_q.port,
533
- self.command_client.port),
534
- "interchange_address": self.address,
535
- "worker_ports": self.worker_ports,
536
- "worker_port_range": self.worker_port_range,
537
- "hub_address": self.hub_address,
538
- "hub_zmq_port": self.hub_zmq_port,
539
- "logdir": self.logdir,
540
- "heartbeat_threshold": self.heartbeat_threshold,
541
- "poll_period": self.poll_period,
542
- "logging_level": logging.DEBUG if self.worker_debug else logging.INFO,
543
- "cert_dir": self.cert_dir,
544
- },
545
- daemon=True,
546
- name="HTEX-Interchange"
547
- )
548
- self.interchange_proc.start()
549
528
 
529
+ interchange_config = {"client_address": "127.0.0.1",
530
+ "client_ports": (self.outgoing_q.port,
531
+ self.incoming_q.port,
532
+ self.command_client.port),
533
+ "interchange_address": self.address,
534
+ "worker_ports": self.worker_ports,
535
+ "worker_port_range": self.worker_port_range,
536
+ "hub_address": self.hub_address,
537
+ "hub_zmq_port": self.hub_zmq_port,
538
+ "logdir": self.logdir,
539
+ "heartbeat_threshold": self.heartbeat_threshold,
540
+ "poll_period": self.poll_period,
541
+ "logging_level": logging.DEBUG if self.worker_debug else logging.INFO,
542
+ "cert_dir": self.cert_dir,
543
+ }
544
+
545
+ config_pickle = pickle.dumps(interchange_config)
546
+
547
+ self.interchange_proc = subprocess.Popen(b"interchange.py", stdin=subprocess.PIPE)
548
+ stdin = self.interchange_proc.stdin
549
+ assert stdin is not None, "Popen should have created an IO object (vs default None) because of PIPE mode"
550
+
551
+ logger.debug("Popened interchange process. Writing config object")
552
+ stdin.write(config_pickle)
553
+ stdin.flush()
554
+ logger.debug("Sent config object. Requesting worker ports")
550
555
  try:
551
556
  (self.worker_task_port, self.worker_result_port) = self.command_client.run("WORKER_PORTS", timeout_s=120)
552
557
  except CommandClientTimeoutError:
553
- logger.error("Interchange has not completed initialization in 120s. Aborting")
558
+ logger.error("Interchange has not completed initialization. Aborting")
554
559
  raise Exception("Interchange failed to start")
560
+ logger.debug("Got worker ports")
555
561
 
556
562
  def _start_queue_management_thread(self):
557
563
  """Method to start the management thread as a daemon.
@@ -810,13 +816,12 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
810
816
  logger.info("Attempting HighThroughputExecutor shutdown")
811
817
 
812
818
  self.interchange_proc.terminate()
813
- self.interchange_proc.join(timeout=timeout)
814
- if self.interchange_proc.is_alive():
819
+ try:
820
+ self.interchange_proc.wait(timeout=timeout)
821
+ except subprocess.TimeoutExpired:
815
822
  logger.info("Unable to terminate Interchange process; sending SIGKILL")
816
823
  self.interchange_proc.kill()
817
824
 
818
- self.interchange_proc.close()
819
-
820
825
  logger.info("Finished HighThroughputExecutor shutdown attempt")
821
826
 
822
827
  def get_usage_information(self):
@@ -672,13 +672,10 @@ def start_file_logger(filename: str, level: int = logging.DEBUG, format_string:
672
672
  logger.addHandler(handler)
673
673
 
674
674
 
675
- @wrap_with_logs(target="interchange")
676
- def starter(*args: Any, **kwargs: Any) -> None:
677
- """Start the interchange process
678
-
679
- The executor is expected to call this function. The args, kwargs match that of the Interchange.__init__
680
- """
675
+ if __name__ == "__main__":
681
676
  setproctitle("parsl: HTEX interchange")
682
- # logger = multiprocessing.get_logger()
683
- ic = Interchange(*args, **kwargs)
677
+
678
+ config = pickle.load(sys.stdin.buffer)
679
+
680
+ ic = Interchange(**config)
684
681
  ic.start()
@@ -215,6 +215,13 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
215
215
  This requires a version of Work Queue / cctools after commit
216
216
  874df524516441da531b694afc9d591e8b134b73 (release 7.5.0 is too early).
217
217
  Default is False.
218
+
219
+ scaling_cores_per_worker: int
220
+ When using Parsl scaling, this specifies the number of cores that a
221
+ worker is expected to have available for computation. Default 1. This
222
+ parameter can be ignored when using a fixed number of blocks, or when
223
+ using one task per worker (by omitting a ``cores`` resource
224
+ specifiation for each task).
218
225
  """
219
226
 
220
227
  radio_mode = "filesystem"
@@ -244,12 +251,14 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
244
251
  full_debug: bool = True,
245
252
  worker_executable: str = 'work_queue_worker',
246
253
  function_dir: Optional[str] = None,
247
- coprocess: bool = False):
254
+ coprocess: bool = False,
255
+ scaling_cores_per_worker: int = 1):
248
256
  BlockProviderExecutor.__init__(self, provider=provider,
249
257
  block_error_handler=True)
250
258
  if not _work_queue_enabled:
251
259
  raise OptionalModuleMissing(['work_queue'], "WorkQueueExecutor requires the work_queue module.")
252
260
 
261
+ self.scaling_cores_per_worker = scaling_cores_per_worker
253
262
  self.label = label
254
263
  self.task_queue = multiprocessing.Queue() # type: multiprocessing.Queue
255
264
  self.collector_queue = multiprocessing.Queue() # type: multiprocessing.Queue
@@ -469,6 +478,8 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
469
478
  # Create a Future object and have it be mapped from the task ID in the tasks dictionary
470
479
  fu = Future()
471
480
  fu.parsl_executor_task_id = executor_task_id
481
+ assert isinstance(resource_specification, dict)
482
+ fu.resource_specification = resource_specification
472
483
  logger.debug("Getting tasks_lock to set WQ-level task entry")
473
484
  with self.tasks_lock:
474
485
  logger.debug("Got tasks_lock to set WQ-level task entry")
@@ -654,20 +665,29 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
654
665
 
655
666
  @property
656
667
  def outstanding(self) -> int:
657
- """Count the number of outstanding tasks. This is inefficiently
668
+ """Count the number of outstanding slots required. This is inefficiently
658
669
  implemented and probably could be replaced with a counter.
659
670
  """
671
+ logger.debug("Calculating outstanding task slot load")
660
672
  outstanding = 0
673
+ tasks = 0 # only for log message...
661
674
  with self.tasks_lock:
662
675
  for fut in self.tasks.values():
663
676
  if not fut.done():
664
- outstanding += 1
665
- logger.debug(f"Counted {outstanding} outstanding tasks")
677
+ # if a task does not specify a core count, Work Queue will allocate an entire
678
+ # worker node to that task. That's approximated here by saying that it uses
679
+ # scaling_cores_per_worker.
680
+ resource_spec = getattr(fut, 'resource_specification', {})
681
+ cores = resource_spec.get('cores', self.scaling_cores_per_worker)
682
+
683
+ outstanding += cores
684
+ tasks += 1
685
+ logger.debug(f"Counted {tasks} outstanding tasks with {outstanding} outstanding slots")
666
686
  return outstanding
667
687
 
668
688
  @property
669
689
  def workers_per_node(self) -> Union[int, float]:
670
- return 1
690
+ return self.scaling_cores_per_worker
671
691
 
672
692
  def scale_in(self, count: int) -> List[str]:
673
693
  """Scale in method.
@@ -243,13 +243,13 @@ class KubernetesProvider(ExecutionProvider, RepresentationMixin):
243
243
  for jid in to_poll_job_ids:
244
244
  phase = None
245
245
  try:
246
- pod_status = self.kube_client.read_namespaced_pod_status(name=jid, namespace=self.namespace)
246
+ pod = self.kube_client.read_namespaced_pod(name=jid, namespace=self.namespace)
247
247
  except Exception:
248
248
  logger.exception("Failed to poll pod {} status, most likely because pod was terminated".format(jid))
249
249
  if self.resources[jid]['status'] is JobStatus(JobState.RUNNING):
250
250
  phase = 'Unknown'
251
251
  else:
252
- phase = pod_status.status.phase
252
+ phase = pod.status.phase
253
253
  if phase:
254
254
  status = translate_table.get(phase, JobState.UNKNOWN)
255
255
  logger.debug("Updating pod {} with status {} to parsl status {}".format(jid,
@@ -286,7 +286,7 @@ class KubernetesProvider(ExecutionProvider, RepresentationMixin):
286
286
  # Create the environment variables and command to initiate IPP
287
287
  environment_vars = client.V1EnvVar(name="TEST", value="SOME DATA")
288
288
 
289
- launch_args = ["-c", "{0};".format(cmd_string)]
289
+ launch_args = ["-c", "{0}".format(cmd_string)]
290
290
 
291
291
  volume_mounts = []
292
292
  # Create mount paths for the volumes