parsl 2024.6.10__py3-none-any.whl → 2024.6.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsl/app/app.py +0 -2
- parsl/app/bash.py +2 -3
- parsl/channels/local/local.py +7 -2
- parsl/configs/ASPIRE1.py +3 -1
- parsl/configs/Azure.py +3 -1
- parsl/configs/ad_hoc.py +2 -0
- parsl/configs/bridges.py +3 -1
- parsl/configs/cc_in2p3.py +2 -0
- parsl/configs/ec2.py +2 -0
- parsl/configs/expanse.py +3 -1
- parsl/configs/frontera.py +2 -0
- parsl/configs/htex_local.py +2 -0
- parsl/configs/illinoiscluster.py +2 -0
- parsl/configs/kubernetes.py +3 -1
- parsl/configs/local_threads.py +5 -1
- parsl/configs/midway.py +2 -0
- parsl/configs/osg.py +3 -1
- parsl/configs/polaris.py +3 -1
- parsl/configs/stampede2.py +2 -0
- parsl/configs/summit.py +2 -0
- parsl/configs/toss3_llnl.py +3 -1
- parsl/configs/vineex_local.py +3 -1
- parsl/configs/wqex_local.py +3 -1
- parsl/executors/high_throughput/executor.py +36 -31
- parsl/executors/high_throughput/interchange.py +5 -8
- parsl/executors/workqueue/executor.py +25 -5
- parsl/providers/kubernetes/kube.py +3 -3
- parsl/tests/test_htex/test_htex.py +24 -7
- parsl/version.py +1 -1
- parsl-2024.6.24.data/scripts/interchange.py +681 -0
- {parsl-2024.6.10.dist-info → parsl-2024.6.24.dist-info}/METADATA +2 -2
- {parsl-2024.6.10.dist-info → parsl-2024.6.24.dist-info}/RECORD +39 -38
- {parsl-2024.6.10.data → parsl-2024.6.24.data}/scripts/exec_parsl_function.py +0 -0
- {parsl-2024.6.10.data → parsl-2024.6.24.data}/scripts/parsl_coprocess.py +0 -0
- {parsl-2024.6.10.data → parsl-2024.6.24.data}/scripts/process_worker_pool.py +0 -0
- {parsl-2024.6.10.dist-info → parsl-2024.6.24.dist-info}/LICENSE +0 -0
- {parsl-2024.6.10.dist-info → parsl-2024.6.24.dist-info}/WHEEL +0 -0
- {parsl-2024.6.10.dist-info → parsl-2024.6.24.dist-info}/entry_points.txt +0 -0
- {parsl-2024.6.10.dist-info → parsl-2024.6.24.dist-info}/top_level.txt +0 -0
parsl/app/app.py
CHANGED
@@ -66,8 +66,6 @@ class AppBase(metaclass=ABCMeta):
|
|
66
66
|
self.kwargs['walltime'] = params['walltime'].default
|
67
67
|
if 'parsl_resource_specification' in params:
|
68
68
|
self.kwargs['parsl_resource_specification'] = params['parsl_resource_specification'].default
|
69
|
-
self.outputs = params['outputs'].default if 'outputs' in params else []
|
70
|
-
self.inputs = params['inputs'].default if 'inputs' in params else []
|
71
69
|
|
72
70
|
@abstractmethod
|
73
71
|
def __call__(self, *args: Any, **kwargs: Any) -> AppFuture:
|
parsl/app/bash.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
import logging
|
2
|
-
from functools import partial
|
2
|
+
from functools import partial
|
3
3
|
from inspect import Parameter, signature
|
4
4
|
|
5
5
|
from parsl.app.app import AppBase
|
@@ -123,11 +123,10 @@ class BashApp(AppBase):
|
|
123
123
|
if sig.parameters[s].default is not Parameter.empty:
|
124
124
|
self.kwargs[s] = sig.parameters[s].default
|
125
125
|
|
126
|
-
# update_wrapper allows remote_side_bash_executor to masquerade as self.func
|
127
126
|
# partial is used to attach the first arg the "func" to the remote_side_bash_executor
|
128
127
|
# this is done to avoid passing a function type in the args which parsl.serializer
|
129
128
|
# doesn't support
|
130
|
-
remote_fn = partial(
|
129
|
+
remote_fn = partial(remote_side_bash_executor, self.func)
|
131
130
|
remote_fn.__name__ = self.func.__name__
|
132
131
|
self.wrapped_remote_function = wrap_error(remote_fn)
|
133
132
|
|
parsl/channels/local/local.py
CHANGED
@@ -55,6 +55,7 @@ class LocalChannel(Channel, RepresentationMixin):
|
|
55
55
|
current_env.update(envs)
|
56
56
|
|
57
57
|
try:
|
58
|
+
logger.debug("Creating process with command '%s'", cmd)
|
58
59
|
proc = subprocess.Popen(
|
59
60
|
cmd,
|
60
61
|
stdout=subprocess.PIPE,
|
@@ -64,12 +65,16 @@ class LocalChannel(Channel, RepresentationMixin):
|
|
64
65
|
shell=True,
|
65
66
|
preexec_fn=os.setpgrp
|
66
67
|
)
|
68
|
+
logger.debug("Created process with pid %s. Performing communicate", proc.pid)
|
67
69
|
(stdout, stderr) = proc.communicate(timeout=walltime)
|
68
70
|
retcode = proc.returncode
|
71
|
+
logger.debug("Process %s returned %s", proc.pid, proc.returncode)
|
69
72
|
|
70
|
-
except Exception
|
71
|
-
logger.
|
73
|
+
except Exception:
|
74
|
+
logger.exception(f"Execution of command failed:\n{cmd}")
|
72
75
|
raise
|
76
|
+
else:
|
77
|
+
logger.debug("Execution of command in process %s completed normally", proc.pid)
|
73
78
|
|
74
79
|
return (retcode, stdout.decode("utf-8"), stderr.decode("utf-8"))
|
75
80
|
|
parsl/configs/ASPIRE1.py
CHANGED
@@ -4,6 +4,7 @@ from parsl.executors import HighThroughputExecutor
|
|
4
4
|
from parsl.launchers import MpiRunLauncher
|
5
5
|
from parsl.monitoring.monitoring import MonitoringHub
|
6
6
|
from parsl.providers import PBSProProvider
|
7
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
7
8
|
|
8
9
|
config = Config(
|
9
10
|
executors=[
|
@@ -39,5 +40,6 @@ config = Config(
|
|
39
40
|
strategy='simple',
|
40
41
|
retries=3,
|
41
42
|
app_cache=True,
|
42
|
-
checkpoint_mode='task_exit'
|
43
|
+
checkpoint_mode='task_exit',
|
44
|
+
usage_tracking=LEVEL_1,
|
43
45
|
)
|
parsl/configs/Azure.py
CHANGED
@@ -8,6 +8,7 @@ from parsl.data_provider.http import HTTPInTaskStaging
|
|
8
8
|
from parsl.data_provider.rsync import RSyncStaging
|
9
9
|
from parsl.executors import HighThroughputExecutor
|
10
10
|
from parsl.providers import AzureProvider
|
11
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
11
12
|
|
12
13
|
vm_reference = {
|
13
14
|
# All fields below are required
|
@@ -33,5 +34,6 @@ config = Config(
|
|
33
34
|
FTPInTaskStaging(),
|
34
35
|
RSyncStaging(getpass.getuser() + "@" + address_by_query())],
|
35
36
|
)
|
36
|
-
]
|
37
|
+
],
|
38
|
+
usage_tracking=LEVEL_1,
|
37
39
|
)
|
parsl/configs/ad_hoc.py
CHANGED
@@ -4,6 +4,7 @@ from parsl.channels import SSHChannel
|
|
4
4
|
from parsl.config import Config
|
5
5
|
from parsl.executors import HighThroughputExecutor
|
6
6
|
from parsl.providers import AdHocProvider
|
7
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
7
8
|
|
8
9
|
user_opts: Dict[str, Dict[str, Any]]
|
9
10
|
user_opts = {'adhoc':
|
@@ -33,4 +34,5 @@ config = Config(
|
|
33
34
|
],
|
34
35
|
# AdHoc Clusters should not be setup with scaling strategy.
|
35
36
|
strategy='none',
|
37
|
+
usage_tracking=LEVEL_1,
|
36
38
|
)
|
parsl/configs/bridges.py
CHANGED
@@ -3,6 +3,7 @@ from parsl.config import Config
|
|
3
3
|
from parsl.executors import HighThroughputExecutor
|
4
4
|
from parsl.launchers import SrunLauncher
|
5
5
|
from parsl.providers import SlurmProvider
|
6
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
6
7
|
|
7
8
|
""" This config assumes that it is used to launch parsl tasks from the login nodes
|
8
9
|
of Bridges at PSC. Each job submitted to the scheduler will request 2 nodes for 10 minutes.
|
@@ -34,5 +35,6 @@ config = Config(
|
|
34
35
|
cmd_timeout=120,
|
35
36
|
),
|
36
37
|
)
|
37
|
-
]
|
38
|
+
],
|
39
|
+
usage_tracking=LEVEL_1,
|
38
40
|
)
|
parsl/configs/cc_in2p3.py
CHANGED
@@ -2,6 +2,7 @@ from parsl.channels import LocalChannel
|
|
2
2
|
from parsl.config import Config
|
3
3
|
from parsl.executors import HighThroughputExecutor
|
4
4
|
from parsl.providers import GridEngineProvider
|
5
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
5
6
|
|
6
7
|
config = Config(
|
7
8
|
executors=[
|
@@ -19,4 +20,5 @@ config = Config(
|
|
19
20
|
),
|
20
21
|
)
|
21
22
|
],
|
23
|
+
usage_tracking=LEVEL_1,
|
22
24
|
)
|
parsl/configs/ec2.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
from parsl.config import Config
|
2
2
|
from parsl.executors import HighThroughputExecutor
|
3
3
|
from parsl.providers import AWSProvider
|
4
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
4
5
|
|
5
6
|
config = Config(
|
6
7
|
executors=[
|
@@ -25,4 +26,5 @@ config = Config(
|
|
25
26
|
),
|
26
27
|
)
|
27
28
|
],
|
29
|
+
usage_tracking=LEVEL_1,
|
28
30
|
)
|
parsl/configs/expanse.py
CHANGED
@@ -2,6 +2,7 @@ from parsl.config import Config
|
|
2
2
|
from parsl.executors import HighThroughputExecutor
|
3
3
|
from parsl.launchers import SrunLauncher
|
4
4
|
from parsl.providers import SlurmProvider
|
5
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
5
6
|
|
6
7
|
config = Config(
|
7
8
|
executors=[
|
@@ -24,5 +25,6 @@ config = Config(
|
|
24
25
|
nodes_per_block=2,
|
25
26
|
),
|
26
27
|
)
|
27
|
-
]
|
28
|
+
],
|
29
|
+
usage_tracking=LEVEL_1,
|
28
30
|
)
|
parsl/configs/frontera.py
CHANGED
@@ -3,6 +3,7 @@ from parsl.config import Config
|
|
3
3
|
from parsl.executors import HighThroughputExecutor
|
4
4
|
from parsl.launchers import SrunLauncher
|
5
5
|
from parsl.providers import SlurmProvider
|
6
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
6
7
|
|
7
8
|
""" This config assumes that it is used to launch parsl tasks from the login nodes
|
8
9
|
of Frontera at TACC. Each job submitted to the scheduler will request 2 nodes for 10 minutes.
|
@@ -32,4 +33,5 @@ config = Config(
|
|
32
33
|
),
|
33
34
|
)
|
34
35
|
],
|
36
|
+
usage_tracking=LEVEL_1,
|
35
37
|
)
|
parsl/configs/htex_local.py
CHANGED
@@ -2,6 +2,7 @@ from parsl.channels import LocalChannel
|
|
2
2
|
from parsl.config import Config
|
3
3
|
from parsl.executors import HighThroughputExecutor
|
4
4
|
from parsl.providers import LocalProvider
|
5
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
5
6
|
|
6
7
|
config = Config(
|
7
8
|
executors=[
|
@@ -15,4 +16,5 @@ config = Config(
|
|
15
16
|
),
|
16
17
|
)
|
17
18
|
],
|
19
|
+
usage_tracking=LEVEL_1,
|
18
20
|
)
|
parsl/configs/illinoiscluster.py
CHANGED
@@ -2,6 +2,7 @@ from parsl.config import Config
|
|
2
2
|
from parsl.executors import HighThroughputExecutor
|
3
3
|
from parsl.launchers import SrunLauncher
|
4
4
|
from parsl.providers import SlurmProvider
|
5
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
5
6
|
|
6
7
|
""" This config assumes that it is used to launch parsl tasks from the login nodes
|
7
8
|
of the Campus Cluster at UIUC. Each job submitted to the scheduler will request 2 nodes for 10 minutes.
|
@@ -25,4 +26,5 @@ config = Config(
|
|
25
26
|
),
|
26
27
|
)
|
27
28
|
],
|
29
|
+
usage_tracking=LEVEL_1,
|
28
30
|
)
|
parsl/configs/kubernetes.py
CHANGED
@@ -2,6 +2,7 @@ from parsl.addresses import address_by_route
|
|
2
2
|
from parsl.config import Config
|
3
3
|
from parsl.executors import HighThroughputExecutor
|
4
4
|
from parsl.providers import KubernetesProvider
|
5
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
5
6
|
|
6
7
|
config = Config(
|
7
8
|
executors=[
|
@@ -36,5 +37,6 @@ config = Config(
|
|
36
37
|
max_blocks=10,
|
37
38
|
),
|
38
39
|
),
|
39
|
-
]
|
40
|
+
],
|
41
|
+
usage_tracking=LEVEL_1,
|
40
42
|
)
|
parsl/configs/local_threads.py
CHANGED
@@ -1,4 +1,8 @@
|
|
1
1
|
from parsl.config import Config
|
2
2
|
from parsl.executors.threads import ThreadPoolExecutor
|
3
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
3
4
|
|
4
|
-
config = Config(
|
5
|
+
config = Config(
|
6
|
+
executors=[ThreadPoolExecutor()],
|
7
|
+
usage_tracking=LEVEL_1,
|
8
|
+
)
|
parsl/configs/midway.py
CHANGED
@@ -3,6 +3,7 @@ from parsl.config import Config
|
|
3
3
|
from parsl.executors import HighThroughputExecutor
|
4
4
|
from parsl.launchers import SrunLauncher
|
5
5
|
from parsl.providers import SlurmProvider
|
6
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
6
7
|
|
7
8
|
config = Config(
|
8
9
|
executors=[
|
@@ -28,4 +29,5 @@ config = Config(
|
|
28
29
|
),
|
29
30
|
)
|
30
31
|
],
|
32
|
+
usage_tracking=LEVEL_1,
|
31
33
|
)
|
parsl/configs/osg.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
from parsl.config import Config
|
2
2
|
from parsl.executors import HighThroughputExecutor
|
3
3
|
from parsl.providers import CondorProvider
|
4
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
4
5
|
|
5
6
|
config = Config(
|
6
7
|
executors=[
|
@@ -26,5 +27,6 @@ python3 -m venv parsl_env; source parsl_env/bin/activate; python3 -m pip install
|
|
26
27
|
worker_logdir_root='$OSG_WN_TMP',
|
27
28
|
worker_ports=(31000, 31001)
|
28
29
|
)
|
29
|
-
]
|
30
|
+
],
|
31
|
+
usage_tracking=LEVEL_1,
|
30
32
|
)
|
parsl/configs/polaris.py
CHANGED
@@ -3,6 +3,7 @@ from parsl.config import Config
|
|
3
3
|
from parsl.executors import HighThroughputExecutor
|
4
4
|
from parsl.launchers import MpiExecLauncher
|
5
5
|
from parsl.providers import PBSProProvider
|
6
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
6
7
|
|
7
8
|
# There are three user parameters to change for the PBSProProvider:
|
8
9
|
# YOUR_ACCOUNT: Account to charge usage
|
@@ -34,5 +35,6 @@ config = Config(
|
|
34
35
|
cpus_per_node=64,
|
35
36
|
),
|
36
37
|
),
|
37
|
-
]
|
38
|
+
],
|
39
|
+
usage_tracking=LEVEL_1,
|
38
40
|
)
|
parsl/configs/stampede2.py
CHANGED
@@ -4,6 +4,7 @@ from parsl.data_provider.globus import GlobusStaging
|
|
4
4
|
from parsl.executors import HighThroughputExecutor
|
5
5
|
from parsl.launchers import SrunLauncher
|
6
6
|
from parsl.providers import SlurmProvider
|
7
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
7
8
|
|
8
9
|
config = Config(
|
9
10
|
executors=[
|
@@ -34,4 +35,5 @@ config = Config(
|
|
34
35
|
)
|
35
36
|
|
36
37
|
],
|
38
|
+
usage_tracking=LEVEL_1,
|
37
39
|
)
|
parsl/configs/summit.py
CHANGED
@@ -3,6 +3,7 @@ from parsl.config import Config
|
|
3
3
|
from parsl.executors import HighThroughputExecutor
|
4
4
|
from parsl.launchers import JsrunLauncher
|
5
5
|
from parsl.providers import LSFProvider
|
6
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
6
7
|
|
7
8
|
config = Config(
|
8
9
|
executors=[
|
@@ -26,4 +27,5 @@ config = Config(
|
|
26
27
|
)
|
27
28
|
|
28
29
|
],
|
30
|
+
usage_tracking=LEVEL_1,
|
29
31
|
)
|
parsl/configs/toss3_llnl.py
CHANGED
@@ -2,6 +2,7 @@ from parsl.config import Config
|
|
2
2
|
from parsl.executors import FluxExecutor
|
3
3
|
from parsl.launchers import SrunLauncher
|
4
4
|
from parsl.providers import SlurmProvider
|
5
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
5
6
|
|
6
7
|
config = Config(
|
7
8
|
executors=[
|
@@ -24,5 +25,6 @@ config = Config(
|
|
24
25
|
cmd_timeout=120,
|
25
26
|
),
|
26
27
|
)
|
27
|
-
]
|
28
|
+
],
|
29
|
+
usage_tracking=LEVEL_1,
|
28
30
|
)
|
parsl/configs/vineex_local.py
CHANGED
@@ -2,6 +2,7 @@ import uuid
|
|
2
2
|
|
3
3
|
from parsl.config import Config
|
4
4
|
from parsl.executors.taskvine import TaskVineExecutor, TaskVineManagerConfig
|
5
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
5
6
|
|
6
7
|
config = Config(
|
7
8
|
executors=[
|
@@ -15,5 +16,6 @@ config = Config(
|
|
15
16
|
# To disable status reporting, comment out the project_name.
|
16
17
|
manager_config=TaskVineManagerConfig(project_name="parsl-vine-" + str(uuid.uuid4())),
|
17
18
|
)
|
18
|
-
]
|
19
|
+
],
|
20
|
+
usage_tracking=LEVEL_1,
|
19
21
|
)
|
parsl/configs/wqex_local.py
CHANGED
@@ -2,6 +2,7 @@ import uuid
|
|
2
2
|
|
3
3
|
from parsl.config import Config
|
4
4
|
from parsl.executors import WorkQueueExecutor
|
5
|
+
from parsl.usage_tracking.levels import LEVEL_1
|
5
6
|
|
6
7
|
config = Config(
|
7
8
|
executors=[
|
@@ -21,5 +22,6 @@ config = Config(
|
|
21
22
|
# A shared filesystem is not needed when using Work Queue.
|
22
23
|
shared_fs=False
|
23
24
|
)
|
24
|
-
]
|
25
|
+
],
|
26
|
+
usage_tracking=LEVEL_1,
|
25
27
|
)
|
@@ -1,13 +1,13 @@
|
|
1
1
|
import logging
|
2
2
|
import math
|
3
3
|
import pickle
|
4
|
+
import subprocess
|
4
5
|
import threading
|
5
6
|
import typing
|
6
7
|
import warnings
|
7
8
|
from collections import defaultdict
|
8
9
|
from concurrent.futures import Future
|
9
10
|
from dataclasses import dataclass
|
10
|
-
from multiprocessing import Process
|
11
11
|
from typing import Callable, Dict, List, Optional, Sequence, Tuple, Union
|
12
12
|
|
13
13
|
import typeguard
|
@@ -18,7 +18,7 @@ from parsl.addresses import get_all_addresses
|
|
18
18
|
from parsl.app.errors import RemoteExceptionWrapper
|
19
19
|
from parsl.data_provider.staging import Staging
|
20
20
|
from parsl.executors.errors import BadMessage, ScalingFailed
|
21
|
-
from parsl.executors.high_throughput import
|
21
|
+
from parsl.executors.high_throughput import zmq_pipes
|
22
22
|
from parsl.executors.high_throughput.errors import CommandClientTimeoutError
|
23
23
|
from parsl.executors.high_throughput.mpi_prefix_composer import (
|
24
24
|
VALID_LAUNCHERS,
|
@@ -26,7 +26,6 @@ from parsl.executors.high_throughput.mpi_prefix_composer import (
|
|
26
26
|
)
|
27
27
|
from parsl.executors.status_handling import BlockProviderExecutor
|
28
28
|
from parsl.jobs.states import TERMINAL_STATES, JobState, JobStatus
|
29
|
-
from parsl.multiprocessing import ForkProcess
|
30
29
|
from parsl.process_loggers import wrap_with_logs
|
31
30
|
from parsl.providers import LocalProvider
|
32
31
|
from parsl.providers.base import ExecutionProvider
|
@@ -305,7 +304,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
|
|
305
304
|
self._task_counter = 0
|
306
305
|
self.worker_ports = worker_ports
|
307
306
|
self.worker_port_range = worker_port_range
|
308
|
-
self.interchange_proc: Optional[
|
307
|
+
self.interchange_proc: Optional[subprocess.Popen] = None
|
309
308
|
self.interchange_port_range = interchange_port_range
|
310
309
|
self.heartbeat_threshold = heartbeat_threshold
|
311
310
|
self.heartbeat_period = heartbeat_period
|
@@ -520,38 +519,45 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
|
|
520
519
|
|
521
520
|
logger.info("Queue management worker finished")
|
522
521
|
|
523
|
-
def _start_local_interchange_process(self):
|
522
|
+
def _start_local_interchange_process(self) -> None:
|
524
523
|
""" Starts the interchange process locally
|
525
524
|
|
526
|
-
Starts the interchange process locally and uses
|
525
|
+
Starts the interchange process locally and uses the command queue to
|
527
526
|
get the worker task and result ports that the interchange has bound to.
|
528
527
|
"""
|
529
|
-
self.interchange_proc = ForkProcess(target=interchange.starter,
|
530
|
-
kwargs={"client_address": "127.0.0.1",
|
531
|
-
"client_ports": (self.outgoing_q.port,
|
532
|
-
self.incoming_q.port,
|
533
|
-
self.command_client.port),
|
534
|
-
"interchange_address": self.address,
|
535
|
-
"worker_ports": self.worker_ports,
|
536
|
-
"worker_port_range": self.worker_port_range,
|
537
|
-
"hub_address": self.hub_address,
|
538
|
-
"hub_zmq_port": self.hub_zmq_port,
|
539
|
-
"logdir": self.logdir,
|
540
|
-
"heartbeat_threshold": self.heartbeat_threshold,
|
541
|
-
"poll_period": self.poll_period,
|
542
|
-
"logging_level": logging.DEBUG if self.worker_debug else logging.INFO,
|
543
|
-
"cert_dir": self.cert_dir,
|
544
|
-
},
|
545
|
-
daemon=True,
|
546
|
-
name="HTEX-Interchange"
|
547
|
-
)
|
548
|
-
self.interchange_proc.start()
|
549
528
|
|
529
|
+
interchange_config = {"client_address": "127.0.0.1",
|
530
|
+
"client_ports": (self.outgoing_q.port,
|
531
|
+
self.incoming_q.port,
|
532
|
+
self.command_client.port),
|
533
|
+
"interchange_address": self.address,
|
534
|
+
"worker_ports": self.worker_ports,
|
535
|
+
"worker_port_range": self.worker_port_range,
|
536
|
+
"hub_address": self.hub_address,
|
537
|
+
"hub_zmq_port": self.hub_zmq_port,
|
538
|
+
"logdir": self.logdir,
|
539
|
+
"heartbeat_threshold": self.heartbeat_threshold,
|
540
|
+
"poll_period": self.poll_period,
|
541
|
+
"logging_level": logging.DEBUG if self.worker_debug else logging.INFO,
|
542
|
+
"cert_dir": self.cert_dir,
|
543
|
+
}
|
544
|
+
|
545
|
+
config_pickle = pickle.dumps(interchange_config)
|
546
|
+
|
547
|
+
self.interchange_proc = subprocess.Popen(b"interchange.py", stdin=subprocess.PIPE)
|
548
|
+
stdin = self.interchange_proc.stdin
|
549
|
+
assert stdin is not None, "Popen should have created an IO object (vs default None) because of PIPE mode"
|
550
|
+
|
551
|
+
logger.debug("Popened interchange process. Writing config object")
|
552
|
+
stdin.write(config_pickle)
|
553
|
+
stdin.flush()
|
554
|
+
logger.debug("Sent config object. Requesting worker ports")
|
550
555
|
try:
|
551
556
|
(self.worker_task_port, self.worker_result_port) = self.command_client.run("WORKER_PORTS", timeout_s=120)
|
552
557
|
except CommandClientTimeoutError:
|
553
|
-
logger.error("Interchange has not completed initialization
|
558
|
+
logger.error("Interchange has not completed initialization. Aborting")
|
554
559
|
raise Exception("Interchange failed to start")
|
560
|
+
logger.debug("Got worker ports")
|
555
561
|
|
556
562
|
def _start_queue_management_thread(self):
|
557
563
|
"""Method to start the management thread as a daemon.
|
@@ -810,13 +816,12 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
|
|
810
816
|
logger.info("Attempting HighThroughputExecutor shutdown")
|
811
817
|
|
812
818
|
self.interchange_proc.terminate()
|
813
|
-
|
814
|
-
|
819
|
+
try:
|
820
|
+
self.interchange_proc.wait(timeout=timeout)
|
821
|
+
except subprocess.TimeoutExpired:
|
815
822
|
logger.info("Unable to terminate Interchange process; sending SIGKILL")
|
816
823
|
self.interchange_proc.kill()
|
817
824
|
|
818
|
-
self.interchange_proc.close()
|
819
|
-
|
820
825
|
logger.info("Finished HighThroughputExecutor shutdown attempt")
|
821
826
|
|
822
827
|
def get_usage_information(self):
|
@@ -672,13 +672,10 @@ def start_file_logger(filename: str, level: int = logging.DEBUG, format_string:
|
|
672
672
|
logger.addHandler(handler)
|
673
673
|
|
674
674
|
|
675
|
-
|
676
|
-
def starter(*args: Any, **kwargs: Any) -> None:
|
677
|
-
"""Start the interchange process
|
678
|
-
|
679
|
-
The executor is expected to call this function. The args, kwargs match that of the Interchange.__init__
|
680
|
-
"""
|
675
|
+
if __name__ == "__main__":
|
681
676
|
setproctitle("parsl: HTEX interchange")
|
682
|
-
|
683
|
-
|
677
|
+
|
678
|
+
config = pickle.load(sys.stdin.buffer)
|
679
|
+
|
680
|
+
ic = Interchange(**config)
|
684
681
|
ic.start()
|
@@ -215,6 +215,13 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
|
|
215
215
|
This requires a version of Work Queue / cctools after commit
|
216
216
|
874df524516441da531b694afc9d591e8b134b73 (release 7.5.0 is too early).
|
217
217
|
Default is False.
|
218
|
+
|
219
|
+
scaling_cores_per_worker: int
|
220
|
+
When using Parsl scaling, this specifies the number of cores that a
|
221
|
+
worker is expected to have available for computation. Default 1. This
|
222
|
+
parameter can be ignored when using a fixed number of blocks, or when
|
223
|
+
using one task per worker (by omitting a ``cores`` resource
|
224
|
+
specifiation for each task).
|
218
225
|
"""
|
219
226
|
|
220
227
|
radio_mode = "filesystem"
|
@@ -244,12 +251,14 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
|
|
244
251
|
full_debug: bool = True,
|
245
252
|
worker_executable: str = 'work_queue_worker',
|
246
253
|
function_dir: Optional[str] = None,
|
247
|
-
coprocess: bool = False
|
254
|
+
coprocess: bool = False,
|
255
|
+
scaling_cores_per_worker: int = 1):
|
248
256
|
BlockProviderExecutor.__init__(self, provider=provider,
|
249
257
|
block_error_handler=True)
|
250
258
|
if not _work_queue_enabled:
|
251
259
|
raise OptionalModuleMissing(['work_queue'], "WorkQueueExecutor requires the work_queue module.")
|
252
260
|
|
261
|
+
self.scaling_cores_per_worker = scaling_cores_per_worker
|
253
262
|
self.label = label
|
254
263
|
self.task_queue = multiprocessing.Queue() # type: multiprocessing.Queue
|
255
264
|
self.collector_queue = multiprocessing.Queue() # type: multiprocessing.Queue
|
@@ -469,6 +478,8 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
|
|
469
478
|
# Create a Future object and have it be mapped from the task ID in the tasks dictionary
|
470
479
|
fu = Future()
|
471
480
|
fu.parsl_executor_task_id = executor_task_id
|
481
|
+
assert isinstance(resource_specification, dict)
|
482
|
+
fu.resource_specification = resource_specification
|
472
483
|
logger.debug("Getting tasks_lock to set WQ-level task entry")
|
473
484
|
with self.tasks_lock:
|
474
485
|
logger.debug("Got tasks_lock to set WQ-level task entry")
|
@@ -654,20 +665,29 @@ class WorkQueueExecutor(BlockProviderExecutor, putils.RepresentationMixin):
|
|
654
665
|
|
655
666
|
@property
|
656
667
|
def outstanding(self) -> int:
|
657
|
-
"""Count the number of outstanding
|
668
|
+
"""Count the number of outstanding slots required. This is inefficiently
|
658
669
|
implemented and probably could be replaced with a counter.
|
659
670
|
"""
|
671
|
+
logger.debug("Calculating outstanding task slot load")
|
660
672
|
outstanding = 0
|
673
|
+
tasks = 0 # only for log message...
|
661
674
|
with self.tasks_lock:
|
662
675
|
for fut in self.tasks.values():
|
663
676
|
if not fut.done():
|
664
|
-
|
665
|
-
|
677
|
+
# if a task does not specify a core count, Work Queue will allocate an entire
|
678
|
+
# worker node to that task. That's approximated here by saying that it uses
|
679
|
+
# scaling_cores_per_worker.
|
680
|
+
resource_spec = getattr(fut, 'resource_specification', {})
|
681
|
+
cores = resource_spec.get('cores', self.scaling_cores_per_worker)
|
682
|
+
|
683
|
+
outstanding += cores
|
684
|
+
tasks += 1
|
685
|
+
logger.debug(f"Counted {tasks} outstanding tasks with {outstanding} outstanding slots")
|
666
686
|
return outstanding
|
667
687
|
|
668
688
|
@property
|
669
689
|
def workers_per_node(self) -> Union[int, float]:
|
670
|
-
return
|
690
|
+
return self.scaling_cores_per_worker
|
671
691
|
|
672
692
|
def scale_in(self, count: int) -> List[str]:
|
673
693
|
"""Scale in method.
|
@@ -243,13 +243,13 @@ class KubernetesProvider(ExecutionProvider, RepresentationMixin):
|
|
243
243
|
for jid in to_poll_job_ids:
|
244
244
|
phase = None
|
245
245
|
try:
|
246
|
-
|
246
|
+
pod = self.kube_client.read_namespaced_pod(name=jid, namespace=self.namespace)
|
247
247
|
except Exception:
|
248
248
|
logger.exception("Failed to poll pod {} status, most likely because pod was terminated".format(jid))
|
249
249
|
if self.resources[jid]['status'] is JobStatus(JobState.RUNNING):
|
250
250
|
phase = 'Unknown'
|
251
251
|
else:
|
252
|
-
phase =
|
252
|
+
phase = pod.status.phase
|
253
253
|
if phase:
|
254
254
|
status = translate_table.get(phase, JobState.UNKNOWN)
|
255
255
|
logger.debug("Updating pod {} with status {} to parsl status {}".format(jid,
|
@@ -286,7 +286,7 @@ class KubernetesProvider(ExecutionProvider, RepresentationMixin):
|
|
286
286
|
# Create the environment variables and command to initiate IPP
|
287
287
|
environment_vars = client.V1EnvVar(name="TEST", value="SOME DATA")
|
288
288
|
|
289
|
-
launch_args = ["-c", "{0}
|
289
|
+
launch_args = ["-c", "{0}".format(cmd_string)]
|
290
290
|
|
291
291
|
volume_mounts = []
|
292
292
|
# Create mount paths for the volumes
|