parsl 2024.10.28__py3-none-any.whl → 2024.11.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsl/channels/base.py +6 -46
- parsl/channels/errors.py +0 -67
- parsl/channels/local/local.py +5 -56
- parsl/dataflow/dflow.py +6 -61
- parsl/executors/high_throughput/executor.py +0 -1
- parsl/executors/high_throughput/mpi_resource_management.py +0 -12
- parsl/executors/taskvine/manager.py +6 -0
- parsl/executors/taskvine/manager_config.py +5 -0
- parsl/monitoring/monitoring.py +23 -26
- parsl/monitoring/radios.py +4 -17
- parsl/monitoring/remote.py +3 -5
- parsl/providers/__init__.py +0 -2
- parsl/providers/base.py +1 -1
- parsl/providers/cluster_provider.py +1 -4
- parsl/providers/condor/condor.py +1 -4
- parsl/providers/grid_engine/grid_engine.py +1 -4
- parsl/providers/lsf/lsf.py +1 -4
- parsl/providers/pbspro/pbspro.py +1 -4
- parsl/providers/slurm/slurm.py +1 -4
- parsl/providers/torque/torque.py +1 -4
- parsl/tests/configs/user_opts.py +0 -7
- parsl/tests/conftest.py +4 -4
- parsl/tests/site_tests/site_config_selector.py +1 -6
- parsl/tests/test_bash_apps/test_basic.py +3 -0
- parsl/tests/test_bash_apps/test_error_codes.py +4 -0
- parsl/tests/test_bash_apps/test_kwarg_storage.py +1 -0
- parsl/tests/test_bash_apps/test_memoize.py +2 -6
- parsl/tests/test_bash_apps/test_memoize_ignore_args.py +3 -0
- parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py +1 -0
- parsl/tests/test_bash_apps/test_multiline.py +1 -0
- parsl/tests/test_bash_apps/test_stdout.py +2 -0
- parsl/tests/test_channels/test_local_channel.py +0 -19
- parsl/tests/test_docs/test_from_slides.py +3 -0
- parsl/tests/test_docs/test_kwargs.py +3 -0
- parsl/tests/test_monitoring/test_basic.py +13 -1
- parsl/tests/test_providers/test_local_provider.py +0 -135
- parsl/tests/test_providers/test_pbspro_template.py +2 -1
- parsl/tests/test_providers/test_slurm_template.py +2 -1
- parsl/tests/test_python_apps/test_outputs.py +1 -0
- parsl/tests/test_regression/test_226.py +1 -0
- parsl/tests/test_staging/test_docs_1.py +1 -0
- parsl/tests/test_staging/test_output_chain_filenames.py +3 -0
- parsl/tests/test_staging/test_staging_ftp.py +1 -0
- parsl/tests/test_staging/test_staging_https.py +3 -0
- parsl/tests/test_staging/test_staging_stdout.py +2 -0
- parsl/version.py +1 -1
- {parsl-2024.10.28.dist-info → parsl-2024.11.11.dist-info}/METADATA +2 -8
- {parsl-2024.10.28.dist-info → parsl-2024.11.11.dist-info}/RECORD +56 -74
- {parsl-2024.10.28.dist-info → parsl-2024.11.11.dist-info}/WHEEL +1 -1
- parsl/channels/oauth_ssh/__init__.py +0 -0
- parsl/channels/oauth_ssh/oauth_ssh.py +0 -119
- parsl/channels/ssh/__init__.py +0 -0
- parsl/channels/ssh/ssh.py +0 -295
- parsl/channels/ssh_il/__init__.py +0 -0
- parsl/channels/ssh_il/ssh_il.py +0 -85
- parsl/providers/ad_hoc/__init__.py +0 -0
- parsl/providers/ad_hoc/ad_hoc.py +0 -252
- parsl/providers/cobalt/__init__.py +0 -0
- parsl/providers/cobalt/cobalt.py +0 -236
- parsl/providers/cobalt/template.py +0 -17
- parsl/tests/configs/cooley_htex.py +0 -37
- parsl/tests/configs/local_adhoc.py +0 -18
- parsl/tests/configs/theta.py +0 -37
- parsl/tests/manual_tests/test_fan_in_out_htex_remote.py +0 -88
- parsl/tests/sites/test_local_adhoc.py +0 -62
- parsl/tests/test_channels/test_dfk_close.py +0 -26
- parsl/tests/test_providers/test_cobalt_deprecation_warning.py +0 -18
- {parsl-2024.10.28.data → parsl-2024.11.11.data}/scripts/exec_parsl_function.py +0 -0
- {parsl-2024.10.28.data → parsl-2024.11.11.data}/scripts/interchange.py +0 -0
- {parsl-2024.10.28.data → parsl-2024.11.11.data}/scripts/parsl_coprocess.py +0 -0
- {parsl-2024.10.28.data → parsl-2024.11.11.data}/scripts/process_worker_pool.py +0 -0
- {parsl-2024.10.28.dist-info → parsl-2024.11.11.dist-info}/LICENSE +0 -0
- {parsl-2024.10.28.dist-info → parsl-2024.11.11.dist-info}/entry_points.txt +0 -0
- {parsl-2024.10.28.dist-info → parsl-2024.11.11.dist-info}/top_level.txt +0 -0
parsl/channels/ssh/ssh.py
DELETED
@@ -1,295 +0,0 @@
|
|
1
|
-
import errno
|
2
|
-
import logging
|
3
|
-
import os
|
4
|
-
|
5
|
-
from parsl.channels.base import Channel
|
6
|
-
from parsl.channels.errors import (
|
7
|
-
AuthException,
|
8
|
-
BadHostKeyException,
|
9
|
-
BadPermsScriptPath,
|
10
|
-
BadScriptPath,
|
11
|
-
FileCopyException,
|
12
|
-
SSHException,
|
13
|
-
)
|
14
|
-
from parsl.errors import OptionalModuleMissing
|
15
|
-
from parsl.utils import RepresentationMixin
|
16
|
-
|
17
|
-
try:
|
18
|
-
import paramiko
|
19
|
-
_ssh_enabled = True
|
20
|
-
except (ImportError, NameError, FileNotFoundError):
|
21
|
-
_ssh_enabled = False
|
22
|
-
|
23
|
-
|
24
|
-
logger = logging.getLogger(__name__)
|
25
|
-
|
26
|
-
|
27
|
-
if _ssh_enabled:
|
28
|
-
class NoAuthSSHClient(paramiko.SSHClient):
|
29
|
-
def _auth(self, username, *args):
|
30
|
-
self._transport.auth_none(username)
|
31
|
-
return
|
32
|
-
|
33
|
-
|
34
|
-
class DeprecatedSSHChannel(Channel, RepresentationMixin):
|
35
|
-
''' SSH persistent channel. This enables remote execution on sites
|
36
|
-
accessible via ssh. It is assumed that the user has setup host keys
|
37
|
-
so as to ssh to the remote host. Which goes to say that the following
|
38
|
-
test on the commandline should work:
|
39
|
-
|
40
|
-
>>> ssh <username>@<hostname>
|
41
|
-
|
42
|
-
'''
|
43
|
-
|
44
|
-
def __init__(self, hostname, username=None, password=None, script_dir=None, envs=None,
|
45
|
-
gssapi_auth=False, skip_auth=False, port=22, key_filename=None, host_keys_filename=None):
|
46
|
-
''' Initialize a persistent connection to the remote system.
|
47
|
-
We should know at this point whether ssh connectivity is possible
|
48
|
-
|
49
|
-
Args:
|
50
|
-
- hostname (String) : Hostname
|
51
|
-
|
52
|
-
KWargs:
|
53
|
-
- username (string) : Username on remote system
|
54
|
-
- password (string) : Password for remote system
|
55
|
-
- port : The port designated for the ssh connection. Default is 22.
|
56
|
-
- script_dir (string) : Full path to a script dir where
|
57
|
-
generated scripts could be sent to.
|
58
|
-
- envs (dict) : A dictionary of environment variables to be set when executing commands
|
59
|
-
- key_filename (string or list): the filename, or list of filenames, of optional private key(s)
|
60
|
-
|
61
|
-
Raises:
|
62
|
-
'''
|
63
|
-
if not _ssh_enabled:
|
64
|
-
raise OptionalModuleMissing(['ssh'],
|
65
|
-
"SSHChannel requires the ssh module and config.")
|
66
|
-
|
67
|
-
self.hostname = hostname
|
68
|
-
self.username = username
|
69
|
-
self.password = password
|
70
|
-
self.port = port
|
71
|
-
self.script_dir = script_dir
|
72
|
-
self.skip_auth = skip_auth
|
73
|
-
self.gssapi_auth = gssapi_auth
|
74
|
-
self.key_filename = key_filename
|
75
|
-
self.host_keys_filename = host_keys_filename
|
76
|
-
|
77
|
-
if self.skip_auth:
|
78
|
-
self.ssh_client = NoAuthSSHClient()
|
79
|
-
else:
|
80
|
-
self.ssh_client = paramiko.SSHClient()
|
81
|
-
self.ssh_client.load_system_host_keys(filename=host_keys_filename)
|
82
|
-
self.ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
83
|
-
self.sftp_client = None
|
84
|
-
|
85
|
-
self.envs = {}
|
86
|
-
if envs is not None:
|
87
|
-
self.envs = envs
|
88
|
-
|
89
|
-
def _is_connected(self):
|
90
|
-
transport = self.ssh_client.get_transport() if self.ssh_client else None
|
91
|
-
return transport and transport.is_active()
|
92
|
-
|
93
|
-
def _connect(self):
|
94
|
-
if not self._is_connected():
|
95
|
-
logger.debug(f"connecting to {self.hostname}:{self.port}")
|
96
|
-
try:
|
97
|
-
self.ssh_client.connect(
|
98
|
-
self.hostname,
|
99
|
-
username=self.username,
|
100
|
-
password=self.password,
|
101
|
-
port=self.port,
|
102
|
-
allow_agent=True,
|
103
|
-
gss_auth=self.gssapi_auth,
|
104
|
-
gss_kex=self.gssapi_auth,
|
105
|
-
key_filename=self.key_filename
|
106
|
-
)
|
107
|
-
transport = self.ssh_client.get_transport()
|
108
|
-
self.sftp_client = paramiko.SFTPClient.from_transport(transport)
|
109
|
-
|
110
|
-
except paramiko.BadHostKeyException as e:
|
111
|
-
raise BadHostKeyException(e, self.hostname)
|
112
|
-
|
113
|
-
except paramiko.AuthenticationException as e:
|
114
|
-
raise AuthException(e, self.hostname)
|
115
|
-
|
116
|
-
except paramiko.SSHException as e:
|
117
|
-
raise SSHException(e, self.hostname)
|
118
|
-
|
119
|
-
except Exception as e:
|
120
|
-
raise SSHException(e, self.hostname)
|
121
|
-
|
122
|
-
def _valid_sftp_client(self):
|
123
|
-
self._connect()
|
124
|
-
return self.sftp_client
|
125
|
-
|
126
|
-
def _valid_ssh_client(self):
|
127
|
-
self._connect()
|
128
|
-
return self.ssh_client
|
129
|
-
|
130
|
-
def prepend_envs(self, cmd, env={}):
|
131
|
-
env.update(self.envs)
|
132
|
-
|
133
|
-
if len(env.keys()) > 0:
|
134
|
-
env_vars = ' '.join(['{}={}'.format(key, value) for key, value in env.items()])
|
135
|
-
return 'env {0} {1}'.format(env_vars, cmd)
|
136
|
-
return cmd
|
137
|
-
|
138
|
-
def execute_wait(self, cmd, walltime=2, envs={}):
|
139
|
-
''' Synchronously execute a commandline string on the shell.
|
140
|
-
|
141
|
-
Args:
|
142
|
-
- cmd (string) : Commandline string to execute
|
143
|
-
- walltime (int) : walltime in seconds
|
144
|
-
|
145
|
-
Kwargs:
|
146
|
-
- envs (dict) : Dictionary of env variables
|
147
|
-
|
148
|
-
Returns:
|
149
|
-
- retcode : Return code from the execution, -1 on fail
|
150
|
-
- stdout : stdout string
|
151
|
-
- stderr : stderr string
|
152
|
-
|
153
|
-
Raises:
|
154
|
-
None.
|
155
|
-
'''
|
156
|
-
|
157
|
-
# Execute the command
|
158
|
-
stdin, stdout, stderr = self._valid_ssh_client().exec_command(
|
159
|
-
self.prepend_envs(cmd, envs), bufsize=-1, timeout=walltime
|
160
|
-
)
|
161
|
-
# Block on exit status from the command
|
162
|
-
exit_status = stdout.channel.recv_exit_status()
|
163
|
-
return exit_status, stdout.read().decode("utf-8"), stderr.read().decode("utf-8")
|
164
|
-
|
165
|
-
def push_file(self, local_source, remote_dir):
|
166
|
-
''' Transport a local file to a directory on a remote machine
|
167
|
-
|
168
|
-
Args:
|
169
|
-
- local_source (string): Path
|
170
|
-
- remote_dir (string): Remote path
|
171
|
-
|
172
|
-
Returns:
|
173
|
-
- str: Path to copied file on remote machine
|
174
|
-
|
175
|
-
Raises:
|
176
|
-
- BadScriptPath : if script path on the remote side is bad
|
177
|
-
- BadPermsScriptPath : You do not have perms to make the channel script dir
|
178
|
-
- FileCopyException : FileCopy failed.
|
179
|
-
|
180
|
-
'''
|
181
|
-
remote_dest = os.path.join(remote_dir, os.path.basename(local_source))
|
182
|
-
|
183
|
-
try:
|
184
|
-
self.makedirs(remote_dir, exist_ok=True)
|
185
|
-
except IOError as e:
|
186
|
-
logger.exception("Pushing {0} to {1} failed".format(local_source, remote_dir))
|
187
|
-
if e.errno == 2:
|
188
|
-
raise BadScriptPath(e, self.hostname)
|
189
|
-
elif e.errno == 13:
|
190
|
-
raise BadPermsScriptPath(e, self.hostname)
|
191
|
-
else:
|
192
|
-
logger.exception("File push failed due to SFTP client failure")
|
193
|
-
raise FileCopyException(e, self.hostname)
|
194
|
-
try:
|
195
|
-
self._valid_sftp_client().put(local_source, remote_dest, confirm=True)
|
196
|
-
# Set perm because some systems require the script to be executable
|
197
|
-
self._valid_sftp_client().chmod(remote_dest, 0o700)
|
198
|
-
except Exception as e:
|
199
|
-
logger.exception("File push from local source {} to remote destination {} failed".format(
|
200
|
-
local_source, remote_dest))
|
201
|
-
raise FileCopyException(e, self.hostname)
|
202
|
-
|
203
|
-
return remote_dest
|
204
|
-
|
205
|
-
def pull_file(self, remote_source, local_dir):
|
206
|
-
''' Transport file on the remote side to a local directory
|
207
|
-
|
208
|
-
Args:
|
209
|
-
- remote_source (string): remote_source
|
210
|
-
- local_dir (string): Local directory to copy to
|
211
|
-
|
212
|
-
|
213
|
-
Returns:
|
214
|
-
- str: Local path to file
|
215
|
-
|
216
|
-
Raises:
|
217
|
-
- FileCopyException : FileCopy failed.
|
218
|
-
'''
|
219
|
-
|
220
|
-
local_dest = os.path.join(local_dir, os.path.basename(remote_source))
|
221
|
-
|
222
|
-
try:
|
223
|
-
os.makedirs(local_dir)
|
224
|
-
except OSError as e:
|
225
|
-
if e.errno != errno.EEXIST:
|
226
|
-
logger.exception("Failed to create local_dir: {0}".format(local_dir))
|
227
|
-
raise BadScriptPath(e, self.hostname)
|
228
|
-
|
229
|
-
try:
|
230
|
-
self._valid_sftp_client().get(remote_source, local_dest)
|
231
|
-
except Exception as e:
|
232
|
-
logger.exception("File pull failed")
|
233
|
-
raise FileCopyException(e, self.hostname)
|
234
|
-
|
235
|
-
return local_dest
|
236
|
-
|
237
|
-
def close(self) -> None:
|
238
|
-
if self._is_connected():
|
239
|
-
transport = self.ssh_client.get_transport()
|
240
|
-
self.ssh_client.close()
|
241
|
-
|
242
|
-
# ssh_client.close calls transport.close, but transport.close does
|
243
|
-
# not always wait for the transport thread to be stopped. See impl
|
244
|
-
# of Transport.close in paramiko and issue
|
245
|
-
# https://github.com/paramiko/paramiko/issues/520
|
246
|
-
logger.debug("Waiting for transport thread to stop")
|
247
|
-
transport.join(30)
|
248
|
-
if transport.is_alive():
|
249
|
-
logger.warning("SSH transport thread did not shut down")
|
250
|
-
else:
|
251
|
-
logger.debug("SSH transport thread stopped")
|
252
|
-
|
253
|
-
def isdir(self, path):
|
254
|
-
"""Return true if the path refers to an existing directory.
|
255
|
-
|
256
|
-
Parameters
|
257
|
-
----------
|
258
|
-
path : str
|
259
|
-
Path of directory on the remote side to check.
|
260
|
-
"""
|
261
|
-
result = True
|
262
|
-
try:
|
263
|
-
self._valid_sftp_client().lstat(path)
|
264
|
-
except FileNotFoundError:
|
265
|
-
result = False
|
266
|
-
|
267
|
-
return result
|
268
|
-
|
269
|
-
def makedirs(self, path, mode=0o700, exist_ok=False):
|
270
|
-
"""Create a directory on the remote side.
|
271
|
-
|
272
|
-
If intermediate directories do not exist, they will be created.
|
273
|
-
|
274
|
-
Parameters
|
275
|
-
----------
|
276
|
-
path : str
|
277
|
-
Path of directory on the remote side to create.
|
278
|
-
mode : int
|
279
|
-
Permissions (posix-style) for the newly-created directory.
|
280
|
-
exist_ok : bool
|
281
|
-
If False, raise an OSError if the target directory already exists.
|
282
|
-
"""
|
283
|
-
if exist_ok is False and self.isdir(path):
|
284
|
-
raise OSError('Target directory {} already exists'.format(path))
|
285
|
-
|
286
|
-
self.execute_wait('mkdir -p {}'.format(path))
|
287
|
-
self._valid_sftp_client().chmod(path, mode)
|
288
|
-
|
289
|
-
@property
|
290
|
-
def script_dir(self):
|
291
|
-
return self._script_dir
|
292
|
-
|
293
|
-
@script_dir.setter
|
294
|
-
def script_dir(self, value):
|
295
|
-
self._script_dir = value
|
File without changes
|
parsl/channels/ssh_il/ssh_il.py
DELETED
@@ -1,85 +0,0 @@
|
|
1
|
-
import getpass
|
2
|
-
import logging
|
3
|
-
|
4
|
-
from parsl.channels.ssh.ssh import DeprecatedSSHChannel
|
5
|
-
from parsl.errors import OptionalModuleMissing
|
6
|
-
|
7
|
-
try:
|
8
|
-
import paramiko
|
9
|
-
_ssh_enabled = True
|
10
|
-
except (ImportError, NameError, FileNotFoundError):
|
11
|
-
_ssh_enabled = False
|
12
|
-
|
13
|
-
|
14
|
-
logger = logging.getLogger(__name__)
|
15
|
-
|
16
|
-
|
17
|
-
class DeprecatedSSHInteractiveLoginChannel(DeprecatedSSHChannel):
|
18
|
-
"""SSH persistent channel. This enables remote execution on sites
|
19
|
-
accessible via ssh. This channel supports interactive login and is appropriate when
|
20
|
-
keys are not set up.
|
21
|
-
"""
|
22
|
-
|
23
|
-
def __init__(self, hostname, username=None, password=None, script_dir=None, envs=None):
|
24
|
-
''' Initialize a persistent connection to the remote system.
|
25
|
-
We should know at this point whether ssh connectivity is possible
|
26
|
-
|
27
|
-
Args:
|
28
|
-
- hostname (String) : Hostname
|
29
|
-
|
30
|
-
KWargs:
|
31
|
-
- username (string) : Username on remote system
|
32
|
-
- password (string) : Password for remote system
|
33
|
-
- script_dir (string) : Full path to a script dir where
|
34
|
-
generated scripts could be sent to.
|
35
|
-
- envs (dict) : A dictionary of env variables to be set when executing commands
|
36
|
-
|
37
|
-
Raises:
|
38
|
-
'''
|
39
|
-
if not _ssh_enabled:
|
40
|
-
raise OptionalModuleMissing(['ssh'],
|
41
|
-
"SSHInteractiveLoginChannel requires the ssh module and config.")
|
42
|
-
|
43
|
-
self.hostname = hostname
|
44
|
-
self.username = username
|
45
|
-
self.password = password
|
46
|
-
|
47
|
-
self.ssh_client = paramiko.SSHClient()
|
48
|
-
self.ssh_client.load_system_host_keys()
|
49
|
-
self.ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
50
|
-
|
51
|
-
self.script_dir = script_dir
|
52
|
-
|
53
|
-
self.envs = {}
|
54
|
-
if envs is not None:
|
55
|
-
self.envs = envs
|
56
|
-
|
57
|
-
try:
|
58
|
-
self.ssh_client.connect(
|
59
|
-
hostname, username=username, password=password, allow_agent=True
|
60
|
-
)
|
61
|
-
|
62
|
-
except Exception:
|
63
|
-
logger.debug("Caught the SSHException in SSHInteractive")
|
64
|
-
pass
|
65
|
-
'''
|
66
|
-
except paramiko.BadHostKeyException as e:
|
67
|
-
raise BadHostKeyException(e, self.hostname)
|
68
|
-
|
69
|
-
except paramiko.AuthenticationException as e:
|
70
|
-
raise AuthException(e, self.hostname)
|
71
|
-
|
72
|
-
except paramiko.SSHException as e:
|
73
|
-
logger.debug("Caught the SSHException in SSHInteractive")
|
74
|
-
pass
|
75
|
-
|
76
|
-
except Exception as e:
|
77
|
-
raise SSHException(e, self.hostname)
|
78
|
-
'''
|
79
|
-
|
80
|
-
transport = self.ssh_client.get_transport()
|
81
|
-
|
82
|
-
il_password = getpass.getpass('Enter {0} Logon password :'.format(hostname))
|
83
|
-
transport.auth_password(username, il_password)
|
84
|
-
|
85
|
-
self.sftp_client = paramiko.SFTPClient.from_transport(transport)
|
File without changes
|
parsl/providers/ad_hoc/ad_hoc.py
DELETED
@@ -1,252 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
import os
|
3
|
-
import time
|
4
|
-
|
5
|
-
from parsl.channels import LocalChannel
|
6
|
-
from parsl.jobs.states import JobState, JobStatus
|
7
|
-
from parsl.launchers import SimpleLauncher
|
8
|
-
from parsl.providers.base import ExecutionProvider
|
9
|
-
from parsl.providers.errors import ScriptPathError
|
10
|
-
from parsl.utils import RepresentationMixin
|
11
|
-
|
12
|
-
logger = logging.getLogger(__name__)
|
13
|
-
|
14
|
-
|
15
|
-
class DeprecatedAdHocProvider(ExecutionProvider, RepresentationMixin):
|
16
|
-
""" Deprecated ad-hoc execution provider
|
17
|
-
|
18
|
-
The (former) AdHocProvider is deprecated. See
|
19
|
-
`issue #3515 <https://github.com/Parsl/parsl/issues/3515>`_
|
20
|
-
for further discussion.
|
21
|
-
|
22
|
-
This provider is used to provision execution resources over one or more ad hoc nodes
|
23
|
-
that are each accessible over a Channel (say, ssh) but otherwise lack a cluster scheduler.
|
24
|
-
|
25
|
-
Parameters
|
26
|
-
----------
|
27
|
-
|
28
|
-
channels : list of Channel ojects
|
29
|
-
Each channel represents a connection to a remote node
|
30
|
-
|
31
|
-
worker_init : str
|
32
|
-
Command to be run before starting a worker, such as 'module load Anaconda; source activate env'.
|
33
|
-
Since this provider calls the same worker_init across all nodes in the ad-hoc cluster, it is
|
34
|
-
recommended that a single script is made available across nodes such as ~/setup.sh that can
|
35
|
-
be invoked.
|
36
|
-
|
37
|
-
cmd_timeout : int
|
38
|
-
Duration for which the provider will wait for a command to be invoked on a remote system.
|
39
|
-
Defaults to 30s
|
40
|
-
|
41
|
-
parallelism : float
|
42
|
-
Determines the ratio of workers to tasks as managed by the strategy component
|
43
|
-
|
44
|
-
"""
|
45
|
-
|
46
|
-
def __init__(self,
|
47
|
-
channels=[],
|
48
|
-
worker_init='',
|
49
|
-
cmd_timeout=30,
|
50
|
-
parallelism=1,
|
51
|
-
move_files=None):
|
52
|
-
|
53
|
-
self.channels = channels
|
54
|
-
self._label = 'ad-hoc'
|
55
|
-
self.worker_init = worker_init
|
56
|
-
self.cmd_timeout = cmd_timeout
|
57
|
-
self.parallelism = 1
|
58
|
-
self.move_files = move_files
|
59
|
-
self.launcher = SimpleLauncher()
|
60
|
-
self.init_blocks = self.min_blocks = self.max_blocks = len(channels)
|
61
|
-
|
62
|
-
# This will be overridden by the DFK to the rundirs.
|
63
|
-
self.script_dir = "."
|
64
|
-
|
65
|
-
# In ad-hoc mode, nodes_per_block should be 1
|
66
|
-
self.nodes_per_block = 1
|
67
|
-
|
68
|
-
# Dictionary that keeps track of jobs, keyed on job_id
|
69
|
-
self.resources = {}
|
70
|
-
|
71
|
-
self.least_loaded = self._least_loaded()
|
72
|
-
logger.debug("AdHoc provider initialized")
|
73
|
-
|
74
|
-
def _write_submit_script(self, script_string, script_filename):
|
75
|
-
'''
|
76
|
-
Load the template string with config values and write the generated submit script to
|
77
|
-
a submit script file.
|
78
|
-
|
79
|
-
Parameters
|
80
|
-
----------
|
81
|
-
script_string: (string)
|
82
|
-
The template string to be used for the writing submit script
|
83
|
-
|
84
|
-
script_filename: (string)
|
85
|
-
Name of the submit script
|
86
|
-
|
87
|
-
Returns
|
88
|
-
-------
|
89
|
-
None: on success
|
90
|
-
|
91
|
-
Raises
|
92
|
-
------
|
93
|
-
ScriptPathError
|
94
|
-
Unable to write submit script out
|
95
|
-
'''
|
96
|
-
|
97
|
-
try:
|
98
|
-
with open(script_filename, 'w') as f:
|
99
|
-
f.write(script_string)
|
100
|
-
|
101
|
-
except IOError as e:
|
102
|
-
logger.error("Failed writing to submit script: %s", script_filename)
|
103
|
-
raise ScriptPathError(script_filename, e)
|
104
|
-
|
105
|
-
return None
|
106
|
-
|
107
|
-
def _least_loaded(self):
|
108
|
-
""" Find channels that are not in use
|
109
|
-
|
110
|
-
Returns
|
111
|
-
-------
|
112
|
-
channel : Channel object
|
113
|
-
None : When there are no more available channels
|
114
|
-
"""
|
115
|
-
while True:
|
116
|
-
channel_counts = {channel: 0 for channel in self.channels}
|
117
|
-
for job_id in self.resources:
|
118
|
-
channel = self.resources[job_id]['channel']
|
119
|
-
if self.resources[job_id]['status'].state == JobState.RUNNING:
|
120
|
-
channel_counts[channel] = channel_counts.get(channel, 0) + 1
|
121
|
-
else:
|
122
|
-
channel_counts[channel] = channel_counts.get(channel, 0)
|
123
|
-
|
124
|
-
logger.debug("Channel_counts : {}".format(channel_counts))
|
125
|
-
if 0 not in channel_counts.values():
|
126
|
-
yield None
|
127
|
-
|
128
|
-
for channel in channel_counts:
|
129
|
-
if channel_counts[channel] == 0:
|
130
|
-
yield channel
|
131
|
-
|
132
|
-
def submit(self, command, tasks_per_node, job_name="parsl.adhoc"):
|
133
|
-
''' Submits the command onto a channel from the list of channels
|
134
|
-
|
135
|
-
Submit returns an ID that corresponds to the task that was just submitted.
|
136
|
-
|
137
|
-
Parameters
|
138
|
-
----------
|
139
|
-
command: (String)
|
140
|
-
Commandline invocation to be made on the remote side.
|
141
|
-
|
142
|
-
tasks_per_node: (int)
|
143
|
-
command invocations to be launched per node
|
144
|
-
|
145
|
-
job_name: (String)
|
146
|
-
Name of the job. Default : parsl.adhoc
|
147
|
-
|
148
|
-
|
149
|
-
Returns
|
150
|
-
-------
|
151
|
-
None
|
152
|
-
At capacity, cannot provision more
|
153
|
-
|
154
|
-
job_id: (string)
|
155
|
-
Identifier for the job
|
156
|
-
|
157
|
-
'''
|
158
|
-
channel = next(self.least_loaded)
|
159
|
-
if channel is None:
|
160
|
-
logger.warning("All Channels in Ad-Hoc provider are in use")
|
161
|
-
return None
|
162
|
-
|
163
|
-
job_name = "{0}.{1}".format(job_name, time.time())
|
164
|
-
|
165
|
-
# Set script path
|
166
|
-
script_path = "{0}/{1}.sh".format(self.script_dir, job_name)
|
167
|
-
script_path = os.path.abspath(script_path)
|
168
|
-
|
169
|
-
wrap_command = self.worker_init + '\n' + self.launcher(command, tasks_per_node, self.nodes_per_block)
|
170
|
-
|
171
|
-
self._write_submit_script(wrap_command, script_path)
|
172
|
-
|
173
|
-
job_id = None
|
174
|
-
remote_pid = None
|
175
|
-
final_cmd = None
|
176
|
-
|
177
|
-
if (self.move_files is None and not isinstance(channel, LocalChannel)) or (self.move_files):
|
178
|
-
logger.debug("Pushing start script")
|
179
|
-
script_path = channel.push_file(script_path, channel.script_dir)
|
180
|
-
|
181
|
-
# Bash would return until the streams are closed. So we redirect to a outs file
|
182
|
-
final_cmd = 'bash {0} > {0}.out 2>&1 & \n echo "PID:$!" '.format(script_path)
|
183
|
-
retcode, stdout, stderr = channel.execute_wait(final_cmd, self.cmd_timeout)
|
184
|
-
for line in stdout.split('\n'):
|
185
|
-
if line.startswith("PID:"):
|
186
|
-
remote_pid = line.split("PID:")[1].strip()
|
187
|
-
job_id = remote_pid
|
188
|
-
if job_id is None:
|
189
|
-
logger.warning("Channel failed to start remote command/retrieve PID")
|
190
|
-
|
191
|
-
self.resources[job_id] = {'job_id': job_id,
|
192
|
-
'status': JobStatus(JobState.RUNNING),
|
193
|
-
'cmd': final_cmd,
|
194
|
-
'channel': channel,
|
195
|
-
'remote_pid': remote_pid}
|
196
|
-
|
197
|
-
return job_id
|
198
|
-
|
199
|
-
def status(self, job_ids):
|
200
|
-
""" Get status of the list of jobs with job_ids
|
201
|
-
|
202
|
-
Parameters
|
203
|
-
----------
|
204
|
-
job_ids : list of strings
|
205
|
-
List of job id strings
|
206
|
-
|
207
|
-
Returns
|
208
|
-
-------
|
209
|
-
list of JobStatus objects
|
210
|
-
"""
|
211
|
-
for job_id in job_ids:
|
212
|
-
channel = self.resources[job_id]['channel']
|
213
|
-
status_command = "ps --pid {} | grep {}".format(self.resources[job_id]['job_id'],
|
214
|
-
self.resources[job_id]['cmd'].split()[0])
|
215
|
-
retcode, stdout, stderr = channel.execute_wait(status_command)
|
216
|
-
if retcode != 0 and self.resources[job_id]['status'].state == JobState.RUNNING:
|
217
|
-
self.resources[job_id]['status'] = JobStatus(JobState.FAILED)
|
218
|
-
|
219
|
-
return [self.resources[job_id]['status'] for job_id in job_ids]
|
220
|
-
|
221
|
-
def cancel(self, job_ids):
|
222
|
-
""" Cancel a list of jobs with job_ids
|
223
|
-
|
224
|
-
Parameters
|
225
|
-
----------
|
226
|
-
job_ids : list of strings
|
227
|
-
List of job id strings
|
228
|
-
|
229
|
-
Returns
|
230
|
-
-------
|
231
|
-
list of confirmation bools: [True, False...]
|
232
|
-
"""
|
233
|
-
logger.debug("Cancelling jobs: {}".format(job_ids))
|
234
|
-
rets = []
|
235
|
-
for job_id in job_ids:
|
236
|
-
channel = self.resources[job_id]['channel']
|
237
|
-
cmd = "kill -TERM -$(ps -o pgid= {} | grep -o '[0-9]*')".format(self.resources[job_id]['job_id'])
|
238
|
-
retcode, stdout, stderr = channel.execute_wait(cmd)
|
239
|
-
if retcode == 0:
|
240
|
-
rets.append(True)
|
241
|
-
else:
|
242
|
-
rets.append(False)
|
243
|
-
self.resources[job_id]['status'] = JobStatus(JobState.COMPLETED)
|
244
|
-
return rets
|
245
|
-
|
246
|
-
@property
|
247
|
-
def label(self):
|
248
|
-
return self._label
|
249
|
-
|
250
|
-
@property
|
251
|
-
def status_polling_interval(self):
|
252
|
-
return 10
|
File without changes
|