parsl 2024.11.4__py3-none-any.whl → 2024.11.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsl/channels/base.py +6 -46
- parsl/channels/errors.py +0 -67
- parsl/channels/local/local.py +5 -56
- parsl/dataflow/dflow.py +1 -56
- parsl/executors/taskvine/manager.py +6 -0
- parsl/executors/taskvine/manager_config.py +5 -0
- parsl/monitoring/monitoring.py +20 -21
- parsl/monitoring/radios.py +1 -3
- parsl/providers/cluster_provider.py +1 -4
- parsl/providers/condor/condor.py +1 -4
- parsl/providers/grid_engine/grid_engine.py +1 -4
- parsl/providers/lsf/lsf.py +1 -4
- parsl/providers/pbspro/pbspro.py +1 -4
- parsl/providers/slurm/slurm.py +1 -4
- parsl/providers/torque/torque.py +1 -4
- parsl/tests/configs/user_opts.py +0 -7
- parsl/tests/conftest.py +0 -4
- parsl/tests/test_channels/test_local_channel.py +0 -19
- parsl/tests/test_providers/test_local_provider.py +0 -135
- parsl/tests/test_providers/test_pbspro_template.py +2 -1
- parsl/tests/test_providers/test_slurm_template.py +2 -1
- parsl/version.py +1 -1
- {parsl-2024.11.4.dist-info → parsl-2024.11.11.dist-info}/METADATA +2 -8
- {parsl-2024.11.4.dist-info → parsl-2024.11.11.dist-info}/RECORD +32 -43
- {parsl-2024.11.4.dist-info → parsl-2024.11.11.dist-info}/WHEEL +1 -1
- parsl/channels/oauth_ssh/__init__.py +0 -0
- parsl/channels/oauth_ssh/oauth_ssh.py +0 -119
- parsl/channels/ssh/__init__.py +0 -0
- parsl/channels/ssh/ssh.py +0 -295
- parsl/channels/ssh_il/__init__.py +0 -0
- parsl/channels/ssh_il/ssh_il.py +0 -85
- parsl/providers/ad_hoc/__init__.py +0 -0
- parsl/providers/ad_hoc/ad_hoc.py +0 -252
- parsl/tests/configs/local_adhoc.py +0 -18
- parsl/tests/sites/test_local_adhoc.py +0 -62
- parsl/tests/test_channels/test_dfk_close.py +0 -26
- {parsl-2024.11.4.data → parsl-2024.11.11.data}/scripts/exec_parsl_function.py +0 -0
- {parsl-2024.11.4.data → parsl-2024.11.11.data}/scripts/interchange.py +0 -0
- {parsl-2024.11.4.data → parsl-2024.11.11.data}/scripts/parsl_coprocess.py +0 -0
- {parsl-2024.11.4.data → parsl-2024.11.11.data}/scripts/process_worker_pool.py +0 -0
- {parsl-2024.11.4.dist-info → parsl-2024.11.11.dist-info}/LICENSE +0 -0
- {parsl-2024.11.4.dist-info → parsl-2024.11.11.dist-info}/entry_points.txt +0 -0
- {parsl-2024.11.4.dist-info → parsl-2024.11.11.dist-info}/top_level.txt +0 -0
parsl/channels/base.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
from abc import ABCMeta, abstractmethod, abstractproperty
|
2
|
-
from typing import
|
2
|
+
from typing import Tuple
|
3
3
|
|
4
4
|
|
5
5
|
class Channel(metaclass=ABCMeta):
|
@@ -8,33 +8,27 @@ class Channel(metaclass=ABCMeta):
|
|
8
8
|
|
9
9
|
For certain resources such as campus clusters or supercomputers at
|
10
10
|
research laboratories, resource requirements may require authentication.
|
11
|
-
For instance some resources may allow access to their job schedulers from
|
12
|
-
only their login-nodes which require you to authenticate through SSH, or
|
13
|
-
require two factor authentication.
|
14
11
|
|
15
|
-
The
|
16
|
-
shell
|
12
|
+
The only remaining Channel, *LocalChannel*, executes commands locally in a
|
13
|
+
shell.
|
17
14
|
|
18
15
|
Channels provide the ability to execute commands remotely, using the
|
19
16
|
execute_wait method, and manipulate the remote file system using methods
|
20
17
|
such as push_file, pull_file and makedirs.
|
21
18
|
|
22
19
|
Channels should ensure that each launched command runs in a new process
|
23
|
-
group, so that providers (such as
|
24
|
-
|
20
|
+
group, so that providers (such as LocalProvider) which terminate long
|
21
|
+
running commands using process groups can do so.
|
25
22
|
"""
|
26
23
|
|
27
24
|
@abstractmethod
|
28
|
-
def execute_wait(self, cmd: str, walltime: int = 0
|
25
|
+
def execute_wait(self, cmd: str, walltime: int = 0) -> Tuple[int, str, str]:
|
29
26
|
''' Executes the cmd, with a defined walltime.
|
30
27
|
|
31
28
|
Args:
|
32
29
|
- cmd (string): Command string to execute over the channel
|
33
30
|
- walltime (int) : Timeout in seconds
|
34
31
|
|
35
|
-
KWargs:
|
36
|
-
- envs (Dict[str, str]) : Environment variables to push to the remote side
|
37
|
-
|
38
32
|
Returns:
|
39
33
|
- (exit_code, stdout, stderr) (int, string, string)
|
40
34
|
'''
|
@@ -86,37 +80,3 @@ class Channel(metaclass=ABCMeta):
|
|
86
80
|
destination_path (string)
|
87
81
|
'''
|
88
82
|
pass
|
89
|
-
|
90
|
-
@abstractmethod
|
91
|
-
def close(self) -> None:
|
92
|
-
''' Closes the channel.
|
93
|
-
'''
|
94
|
-
pass
|
95
|
-
|
96
|
-
@abstractmethod
|
97
|
-
def makedirs(self, path: str, mode: int = 0o511, exist_ok: bool = False) -> None:
|
98
|
-
"""Create a directory.
|
99
|
-
|
100
|
-
If intermediate directories do not exist, they will be created.
|
101
|
-
|
102
|
-
Parameters
|
103
|
-
----------
|
104
|
-
path : str
|
105
|
-
Path of directory to create.
|
106
|
-
mode : int
|
107
|
-
Permissions (posix-style) for the newly-created directory.
|
108
|
-
exist_ok : bool
|
109
|
-
If False, raise an OSError if the target directory already exists.
|
110
|
-
"""
|
111
|
-
pass
|
112
|
-
|
113
|
-
@abstractmethod
|
114
|
-
def isdir(self, path: str) -> bool:
|
115
|
-
"""Return true if the path refers to an existing directory.
|
116
|
-
|
117
|
-
Parameters
|
118
|
-
----------
|
119
|
-
path : str
|
120
|
-
Path of directory to check.
|
121
|
-
"""
|
122
|
-
pass
|
parsl/channels/errors.py
CHANGED
@@ -17,73 +17,6 @@ class ChannelError(ParslError):
|
|
17
17
|
return "Hostname:{0}, Reason:{1}".format(self.hostname, self.reason)
|
18
18
|
|
19
19
|
|
20
|
-
class BadHostKeyException(ChannelError):
|
21
|
-
''' SSH channel could not be created since server's host keys could not
|
22
|
-
be verified
|
23
|
-
|
24
|
-
Contains:
|
25
|
-
reason(string)
|
26
|
-
e (paramiko exception object)
|
27
|
-
hostname (string)
|
28
|
-
'''
|
29
|
-
|
30
|
-
def __init__(self, e: Exception, hostname: str) -> None:
|
31
|
-
super().__init__("SSH channel could not be created since server's host keys could not be "
|
32
|
-
"verified", e, hostname)
|
33
|
-
|
34
|
-
|
35
|
-
class BadScriptPath(ChannelError):
|
36
|
-
''' An error raised during execution of an app.
|
37
|
-
What this exception contains depends entirely on context
|
38
|
-
Contains:
|
39
|
-
reason(string)
|
40
|
-
e (paramiko exception object)
|
41
|
-
hostname (string)
|
42
|
-
'''
|
43
|
-
|
44
|
-
def __init__(self, e: Exception, hostname: str) -> None:
|
45
|
-
super().__init__("Inaccessible remote script dir. Specify script_dir", e, hostname)
|
46
|
-
|
47
|
-
|
48
|
-
class BadPermsScriptPath(ChannelError):
|
49
|
-
''' User does not have permissions to access the script_dir on the remote site
|
50
|
-
|
51
|
-
Contains:
|
52
|
-
reason(string)
|
53
|
-
e (paramiko exception object)
|
54
|
-
hostname (string)
|
55
|
-
'''
|
56
|
-
|
57
|
-
def __init__(self, e: Exception, hostname: str) -> None:
|
58
|
-
super().__init__("User does not have permissions to access the script_dir", e, hostname)
|
59
|
-
|
60
|
-
|
61
|
-
class AuthException(ChannelError):
|
62
|
-
''' An error raised during execution of an app.
|
63
|
-
What this exception contains depends entirely on context
|
64
|
-
Contains:
|
65
|
-
reason(string)
|
66
|
-
e (paramiko exception object)
|
67
|
-
hostname (string)
|
68
|
-
'''
|
69
|
-
|
70
|
-
def __init__(self, e: Exception, hostname: str) -> None:
|
71
|
-
super().__init__("Authentication to remote server failed", e, hostname)
|
72
|
-
|
73
|
-
|
74
|
-
class SSHException(ChannelError):
|
75
|
-
''' if there was any other error connecting or establishing an SSH session
|
76
|
-
|
77
|
-
Contains:
|
78
|
-
reason(string)
|
79
|
-
e (paramiko exception object)
|
80
|
-
hostname (string)
|
81
|
-
'''
|
82
|
-
|
83
|
-
def __init__(self, e: Exception, hostname: str) -> None:
|
84
|
-
super().__init__("Error connecting or establishing an SSH session", e, hostname)
|
85
|
-
|
86
|
-
|
87
20
|
class FileCopyException(ChannelError):
|
88
21
|
''' File copy operation failed
|
89
22
|
|
parsl/channels/local/local.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
import copy
|
2
1
|
import logging
|
3
2
|
import os
|
4
3
|
import shutil
|
@@ -16,49 +15,32 @@ class LocalChannel(Channel, RepresentationMixin):
|
|
16
15
|
and done so infrequently that they do not need a persistent channel
|
17
16
|
'''
|
18
17
|
|
19
|
-
def __init__(self
|
18
|
+
def __init__(self):
|
20
19
|
''' Initialize the local channel. script_dir is required by set to a default.
|
21
20
|
|
22
21
|
KwArgs:
|
23
|
-
- userhome (string): (default='.') This is provided as a way to override and set a specific userhome
|
24
|
-
- envs (dict) : A dictionary of env variables to be set when launching the shell
|
25
22
|
- script_dir (string): Directory to place scripts
|
26
23
|
'''
|
27
|
-
self.
|
28
|
-
|
29
|
-
|
30
|
-
local_env = os.environ.copy()
|
31
|
-
self._envs = copy.deepcopy(local_env)
|
32
|
-
self._envs.update(envs)
|
33
|
-
self.script_dir = script_dir
|
34
|
-
|
35
|
-
def execute_wait(self, cmd, walltime=None, envs={}):
|
24
|
+
self.script_dir = None
|
25
|
+
|
26
|
+
def execute_wait(self, cmd, walltime=None):
|
36
27
|
''' Synchronously execute a commandline string on the shell.
|
37
28
|
|
38
29
|
Args:
|
39
30
|
- cmd (string) : Commandline string to execute
|
40
31
|
- walltime (int) : walltime in seconds
|
41
32
|
|
42
|
-
Kwargs:
|
43
|
-
- envs (dict) : Dictionary of env variables. This will be used
|
44
|
-
to override the envs set at channel initialization.
|
45
|
-
|
46
33
|
Returns:
|
47
34
|
- retcode : Return code from the execution
|
48
35
|
- stdout : stdout string
|
49
36
|
- stderr : stderr string
|
50
37
|
'''
|
51
|
-
current_env = copy.deepcopy(self._envs)
|
52
|
-
current_env.update(envs)
|
53
|
-
|
54
38
|
try:
|
55
39
|
logger.debug("Creating process with command '%s'", cmd)
|
56
40
|
proc = subprocess.Popen(
|
57
41
|
cmd,
|
58
42
|
stdout=subprocess.PIPE,
|
59
43
|
stderr=subprocess.PIPE,
|
60
|
-
cwd=self.userhome,
|
61
|
-
env=current_env,
|
62
44
|
shell=True,
|
63
45
|
preexec_fn=os.setpgrp
|
64
46
|
)
|
@@ -99,7 +81,7 @@ class LocalChannel(Channel, RepresentationMixin):
|
|
99
81
|
os.chmod(local_dest, 0o700)
|
100
82
|
|
101
83
|
except OSError as e:
|
102
|
-
raise FileCopyException(e,
|
84
|
+
raise FileCopyException(e, "localhost")
|
103
85
|
|
104
86
|
else:
|
105
87
|
os.chmod(local_dest, 0o700)
|
@@ -109,39 +91,6 @@ class LocalChannel(Channel, RepresentationMixin):
|
|
109
91
|
def pull_file(self, remote_source, local_dir):
|
110
92
|
return self.push_file(remote_source, local_dir)
|
111
93
|
|
112
|
-
def close(self) -> None:
|
113
|
-
''' There's nothing to close here, and so this doesn't do anything
|
114
|
-
'''
|
115
|
-
pass
|
116
|
-
|
117
|
-
def isdir(self, path):
|
118
|
-
"""Return true if the path refers to an existing directory.
|
119
|
-
|
120
|
-
Parameters
|
121
|
-
----------
|
122
|
-
path : str
|
123
|
-
Path of directory to check.
|
124
|
-
"""
|
125
|
-
|
126
|
-
return os.path.isdir(path)
|
127
|
-
|
128
|
-
def makedirs(self, path, mode=0o700, exist_ok=False):
|
129
|
-
"""Create a directory.
|
130
|
-
|
131
|
-
If intermediate directories do not exist, they will be created.
|
132
|
-
|
133
|
-
Parameters
|
134
|
-
----------
|
135
|
-
path : str
|
136
|
-
Path of directory to create.
|
137
|
-
mode : int
|
138
|
-
Permissions (posix-style) for the newly-created directory.
|
139
|
-
exist_ok : bool
|
140
|
-
If False, raise an OSError if the target directory already exists.
|
141
|
-
"""
|
142
|
-
|
143
|
-
return os.makedirs(path, mode, exist_ok)
|
144
|
-
|
145
94
|
@property
|
146
95
|
def script_dir(self):
|
147
96
|
return self._script_dir
|
parsl/dataflow/dflow.py
CHANGED
@@ -6,7 +6,6 @@ import datetime
|
|
6
6
|
import inspect
|
7
7
|
import logging
|
8
8
|
import os
|
9
|
-
import pathlib
|
10
9
|
import pickle
|
11
10
|
import random
|
12
11
|
import sys
|
@@ -25,7 +24,6 @@ from typeguard import typechecked
|
|
25
24
|
import parsl
|
26
25
|
from parsl.app.errors import RemoteExceptionWrapper
|
27
26
|
from parsl.app.futures import DataFuture
|
28
|
-
from parsl.channels import Channel
|
29
27
|
from parsl.config import Config
|
30
28
|
from parsl.data_provider.data_manager import DataManager
|
31
29
|
from parsl.data_provider.files import File
|
@@ -49,7 +47,6 @@ from parsl.monitoring import MonitoringHub
|
|
49
47
|
from parsl.monitoring.message_type import MessageType
|
50
48
|
from parsl.monitoring.remote import monitor_wrapper
|
51
49
|
from parsl.process_loggers import wrap_with_logs
|
52
|
-
from parsl.providers.base import ExecutionProvider
|
53
50
|
from parsl.usage_tracking.usage import UsageTracker
|
54
51
|
from parsl.utils import Timer, get_all_checkpoints, get_std_fname_mode, get_version
|
55
52
|
|
@@ -1143,36 +1140,6 @@ class DataFlowKernel:
|
|
1143
1140
|
|
1144
1141
|
logger.info("End of summary")
|
1145
1142
|
|
1146
|
-
def _create_remote_dirs_over_channel(self, provider: ExecutionProvider, channel: Channel) -> None:
|
1147
|
-
"""Create script directories across a channel
|
1148
|
-
|
1149
|
-
Parameters
|
1150
|
-
----------
|
1151
|
-
provider: Provider obj
|
1152
|
-
Provider for which scripts dirs are being created
|
1153
|
-
channel: Channel obj
|
1154
|
-
Channel over which the remote dirs are to be created
|
1155
|
-
"""
|
1156
|
-
run_dir = self.run_dir
|
1157
|
-
if channel.script_dir is None:
|
1158
|
-
|
1159
|
-
# This case will be detected as unreachable by mypy, because of
|
1160
|
-
# the type of script_dir, which is str, not Optional[str].
|
1161
|
-
# The type system doesn't represent the initialized/uninitialized
|
1162
|
-
# state of a channel so cannot represent that a channel needs
|
1163
|
-
# its script directory set or not.
|
1164
|
-
|
1165
|
-
channel.script_dir = os.path.join(run_dir, 'submit_scripts') # type: ignore[unreachable]
|
1166
|
-
|
1167
|
-
# Only create dirs if we aren't on a shared-fs
|
1168
|
-
if not channel.isdir(run_dir):
|
1169
|
-
parent, child = pathlib.Path(run_dir).parts[-2:]
|
1170
|
-
remote_run_dir = os.path.join(parent, child)
|
1171
|
-
channel.script_dir = os.path.join(remote_run_dir, 'remote_submit_scripts')
|
1172
|
-
provider.script_dir = os.path.join(run_dir, 'local_submit_scripts')
|
1173
|
-
|
1174
|
-
channel.makedirs(channel.script_dir, exist_ok=True)
|
1175
|
-
|
1176
1143
|
def add_executors(self, executors: Sequence[ParslExecutor]) -> None:
|
1177
1144
|
for executor in executors:
|
1178
1145
|
executor.run_id = self.run_id
|
@@ -1186,12 +1153,7 @@ class DataFlowKernel:
|
|
1186
1153
|
executor.provider.script_dir = os.path.join(self.run_dir, 'submit_scripts')
|
1187
1154
|
os.makedirs(executor.provider.script_dir, exist_ok=True)
|
1188
1155
|
|
1189
|
-
|
1190
|
-
logger.debug("Creating script_dir across multiple channels")
|
1191
|
-
for channel in executor.provider.channels:
|
1192
|
-
self._create_remote_dirs_over_channel(executor.provider, channel)
|
1193
|
-
else:
|
1194
|
-
self._create_remote_dirs_over_channel(executor.provider, executor.provider.channel)
|
1156
|
+
executor.provider.channel.script_dir = executor.provider.script_dir
|
1195
1157
|
|
1196
1158
|
self.executors[executor.label] = executor
|
1197
1159
|
executor.start()
|
@@ -1273,23 +1235,6 @@ class DataFlowKernel:
|
|
1273
1235
|
executor.shutdown()
|
1274
1236
|
logger.info(f"Shut down executor {executor.label}")
|
1275
1237
|
|
1276
|
-
if hasattr(executor, 'provider'):
|
1277
|
-
if hasattr(executor.provider, 'script_dir'):
|
1278
|
-
logger.info(f"Closing channel(s) for {executor.label}")
|
1279
|
-
|
1280
|
-
if hasattr(executor.provider, 'channels'):
|
1281
|
-
for channel in executor.provider.channels:
|
1282
|
-
logger.info(f"Closing channel {channel}")
|
1283
|
-
channel.close()
|
1284
|
-
logger.info(f"Closed channel {channel}")
|
1285
|
-
else:
|
1286
|
-
assert hasattr(executor.provider, 'channel'), "If provider has no .channels, it must have .channel"
|
1287
|
-
logger.info(f"Closing channel {executor.provider.channel}")
|
1288
|
-
executor.provider.channel.close()
|
1289
|
-
logger.info(f"Closed channel {executor.provider.channel}")
|
1290
|
-
|
1291
|
-
logger.info(f"Closed executor channel(s) for {executor.label}")
|
1292
|
-
|
1293
1238
|
logger.info("Terminated executors")
|
1294
1239
|
self.time_completed = datetime.datetime.now()
|
1295
1240
|
|
@@ -44,11 +44,17 @@ def _set_manager_attributes(m, config):
|
|
44
44
|
# Enable peer transfer feature between workers if specified
|
45
45
|
if config.enable_peer_transfers:
|
46
46
|
m.enable_peer_transfers()
|
47
|
+
else:
|
48
|
+
m.disable_peer_transfers()
|
47
49
|
|
48
50
|
# Set catalog report to parsl if project name exists
|
49
51
|
if m.name:
|
50
52
|
m.set_property("framework", "parsl")
|
51
53
|
|
54
|
+
if config.tune_parameters is not None:
|
55
|
+
for k, v in config.tune_parameters.items():
|
56
|
+
m.tune(k, v)
|
57
|
+
|
52
58
|
|
53
59
|
def _prepare_environment_serverless(manager_config, env_cache_dir, poncho_create_script):
|
54
60
|
# Return path to a packaged poncho environment
|
@@ -156,6 +156,10 @@ class TaskVineManagerConfig:
|
|
156
156
|
Directory to store TaskVine logging facilities.
|
157
157
|
Default is None, in which all TaskVine logs will be contained
|
158
158
|
in the Parsl logging directory.
|
159
|
+
|
160
|
+
tune_parameters: Optional[dict]
|
161
|
+
Extended vine_tune parameters, expressed in a dictionary
|
162
|
+
by { 'tune-parameter' : value }.
|
159
163
|
"""
|
160
164
|
|
161
165
|
# Connection and communication settings
|
@@ -181,6 +185,7 @@ class TaskVineManagerConfig:
|
|
181
185
|
autocategory: bool = True
|
182
186
|
enable_peer_transfers: bool = True
|
183
187
|
wait_for_workers: Optional[int] = None
|
188
|
+
tune_parameters: Optional[dict] = None
|
184
189
|
|
185
190
|
# Logging settings
|
186
191
|
vine_log_dir: Optional[str] = None
|
parsl/monitoring/monitoring.py
CHANGED
@@ -3,9 +3,10 @@ from __future__ import annotations
|
|
3
3
|
import logging
|
4
4
|
import multiprocessing.synchronize as ms
|
5
5
|
import os
|
6
|
+
import pickle
|
6
7
|
import queue
|
7
8
|
import time
|
8
|
-
from multiprocessing import Event
|
9
|
+
from multiprocessing import Event
|
9
10
|
from multiprocessing.queues import Queue
|
10
11
|
from typing import TYPE_CHECKING, Literal, Optional, Tuple, Union, cast
|
11
12
|
|
@@ -18,7 +19,6 @@ from parsl.monitoring.router import router_starter
|
|
18
19
|
from parsl.monitoring.types import TaggedMonitoringMessage
|
19
20
|
from parsl.multiprocessing import ForkProcess, SizedQueue
|
20
21
|
from parsl.process_loggers import wrap_with_logs
|
21
|
-
from parsl.serialize import deserialize
|
22
22
|
from parsl.utils import RepresentationMixin, setproctitle
|
23
23
|
|
24
24
|
_db_manager_excepts: Optional[Exception]
|
@@ -169,15 +169,15 @@ class MonitoringHub(RepresentationMixin):
|
|
169
169
|
daemon=True,
|
170
170
|
)
|
171
171
|
self.dbm_proc.start()
|
172
|
-
logger.info("Started the router process
|
172
|
+
logger.info("Started the router process %s and DBM process %s", self.router_proc.pid, self.dbm_proc.pid)
|
173
173
|
|
174
|
-
self.filesystem_proc =
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
174
|
+
self.filesystem_proc = ForkProcess(target=filesystem_receiver,
|
175
|
+
args=(self.logdir, self.resource_msgs, dfk_run_dir),
|
176
|
+
name="Monitoring-Filesystem-Process",
|
177
|
+
daemon=True
|
178
|
+
)
|
179
179
|
self.filesystem_proc.start()
|
180
|
-
logger.info(
|
180
|
+
logger.info("Started filesystem radio receiver process %s", self.filesystem_proc.pid)
|
181
181
|
|
182
182
|
self.radio = MultiprocessingQueueRadioSender(self.resource_msgs)
|
183
183
|
|
@@ -190,7 +190,7 @@ class MonitoringHub(RepresentationMixin):
|
|
190
190
|
raise MonitoringHubStartError()
|
191
191
|
|
192
192
|
if isinstance(comm_q_result, str):
|
193
|
-
logger.error(
|
193
|
+
logger.error("MonitoringRouter sent an error message: %s", comm_q_result)
|
194
194
|
raise RuntimeError(f"MonitoringRouter failed to start: {comm_q_result}")
|
195
195
|
|
196
196
|
udp_port, zmq_port = comm_q_result
|
@@ -202,7 +202,7 @@ class MonitoringHub(RepresentationMixin):
|
|
202
202
|
self.hub_zmq_port = zmq_port
|
203
203
|
|
204
204
|
def send(self, message: TaggedMonitoringMessage) -> None:
|
205
|
-
logger.debug("Sending message type
|
205
|
+
logger.debug("Sending message type %s", message[0])
|
206
206
|
self.radio.send(message)
|
207
207
|
|
208
208
|
def close(self) -> None:
|
@@ -219,10 +219,9 @@ class MonitoringHub(RepresentationMixin):
|
|
219
219
|
if exception_msgs:
|
220
220
|
for exception_msg in exception_msgs:
|
221
221
|
logger.error(
|
222
|
-
"
|
223
|
-
|
224
|
-
|
225
|
-
)
|
222
|
+
"%s process delivered an exception: %s. Terminating all monitoring processes immediately.",
|
223
|
+
exception_msg[0],
|
224
|
+
exception_msg[1]
|
226
225
|
)
|
227
226
|
self.router_proc.terminate()
|
228
227
|
self.dbm_proc.terminate()
|
@@ -259,7 +258,7 @@ class MonitoringHub(RepresentationMixin):
|
|
259
258
|
|
260
259
|
|
261
260
|
@wrap_with_logs
|
262
|
-
def filesystem_receiver(logdir: str, q:
|
261
|
+
def filesystem_receiver(logdir: str, q: Queue[TaggedMonitoringMessage], run_dir: str) -> None:
|
263
262
|
logger = set_file_logger("{}/monitoring_filesystem_radio.log".format(logdir),
|
264
263
|
name="monitoring_filesystem_radio",
|
265
264
|
level=logging.INFO)
|
@@ -269,7 +268,7 @@ def filesystem_receiver(logdir: str, q: "queue.Queue[TaggedMonitoringMessage]",
|
|
269
268
|
base_path = f"{run_dir}/monitor-fs-radio/"
|
270
269
|
tmp_dir = f"{base_path}/tmp/"
|
271
270
|
new_dir = f"{base_path}/new/"
|
272
|
-
logger.debug(
|
271
|
+
logger.debug("Creating new and tmp paths under %s", base_path)
|
273
272
|
|
274
273
|
os.makedirs(tmp_dir, exist_ok=True)
|
275
274
|
os.makedirs(new_dir, exist_ok=True)
|
@@ -280,15 +279,15 @@ def filesystem_receiver(logdir: str, q: "queue.Queue[TaggedMonitoringMessage]",
|
|
280
279
|
# iterate over files in new_dir
|
281
280
|
for filename in os.listdir(new_dir):
|
282
281
|
try:
|
283
|
-
logger.info(
|
282
|
+
logger.info("Processing filesystem radio file %s", filename)
|
284
283
|
full_path_filename = f"{new_dir}/{filename}"
|
285
284
|
with open(full_path_filename, "rb") as f:
|
286
|
-
message =
|
287
|
-
logger.debug(
|
285
|
+
message = pickle.load(f)
|
286
|
+
logger.debug("Message received is: %s", message)
|
288
287
|
assert isinstance(message, tuple)
|
289
288
|
q.put(cast(TaggedMonitoringMessage, message))
|
290
289
|
os.remove(full_path_filename)
|
291
290
|
except Exception:
|
292
|
-
logger.exception(
|
291
|
+
logger.exception("Exception processing %s - probably will be retried next iteration", filename)
|
293
292
|
|
294
293
|
time.sleep(1) # whats a good time for this poll?
|
parsl/monitoring/radios.py
CHANGED
@@ -8,8 +8,6 @@ from multiprocessing.queues import Queue
|
|
8
8
|
|
9
9
|
import zmq
|
10
10
|
|
11
|
-
from parsl.serialize import serialize
|
12
|
-
|
13
11
|
logger = logging.getLogger(__name__)
|
14
12
|
|
15
13
|
|
@@ -59,7 +57,7 @@ class FilesystemRadioSender(MonitoringRadioSender):
|
|
59
57
|
# move it into new/, so that a partially written
|
60
58
|
# file will never be observed in new/
|
61
59
|
with open(tmp_filename, "wb") as f:
|
62
|
-
|
60
|
+
pickle.dump(buffer, f)
|
63
61
|
os.rename(tmp_filename, new_filename)
|
64
62
|
|
65
63
|
|
@@ -18,10 +18,7 @@ class ClusterProvider(ExecutionProvider):
|
|
18
18
|
label : str
|
19
19
|
Label for this provider.
|
20
20
|
channel : Channel
|
21
|
-
Channel for accessing this provider.
|
22
|
-
:class:`~parsl.channels.LocalChannel` (the default),
|
23
|
-
:class:`~parsl.channels.SSHChannel`, or
|
24
|
-
:class:`~parsl.channels.SSHInteractiveLoginChannel`.
|
21
|
+
Channel for accessing this provider.
|
25
22
|
walltime : str
|
26
23
|
Walltime requested per block in HH:MM:SS.
|
27
24
|
launcher : Launcher
|
parsl/providers/condor/condor.py
CHANGED
@@ -37,10 +37,7 @@ class CondorProvider(RepresentationMixin, ClusterProvider):
|
|
37
37
|
Parameters
|
38
38
|
----------
|
39
39
|
channel : Channel
|
40
|
-
Channel for accessing this provider.
|
41
|
-
:class:`~parsl.channels.LocalChannel` (the default),
|
42
|
-
:class:`~parsl.channels.SSHChannel`, or
|
43
|
-
:class:`~parsl.channels.SSHInteractiveLoginChannel`.
|
40
|
+
Channel for accessing this provider.
|
44
41
|
nodes_per_block : int
|
45
42
|
Nodes to provision per block.
|
46
43
|
cores_per_slot : int
|
@@ -37,10 +37,7 @@ class GridEngineProvider(ClusterProvider, RepresentationMixin):
|
|
37
37
|
Parameters
|
38
38
|
----------
|
39
39
|
channel : Channel
|
40
|
-
Channel for accessing this provider.
|
41
|
-
:class:`~parsl.channels.LocalChannel` (the default),
|
42
|
-
:class:`~parsl.channels.SSHChannel`, or
|
43
|
-
:class:`~parsl.channels.SSHInteractiveLoginChannel`.
|
40
|
+
Channel for accessing this provider.
|
44
41
|
nodes_per_block : int
|
45
42
|
Nodes to provision per block.
|
46
43
|
min_blocks : int
|
parsl/providers/lsf/lsf.py
CHANGED
@@ -33,10 +33,7 @@ class LSFProvider(ClusterProvider, RepresentationMixin):
|
|
33
33
|
Parameters
|
34
34
|
----------
|
35
35
|
channel : Channel
|
36
|
-
Channel for accessing this provider.
|
37
|
-
:class:`~parsl.channels.LocalChannel` (the default),
|
38
|
-
:class:`~parsl.channels.SSHChannel`, or
|
39
|
-
:class:`~parsl.channels.SSHInteractiveLoginChannel`.
|
36
|
+
Channel for accessing this provider.
|
40
37
|
nodes_per_block : int
|
41
38
|
Nodes to provision per block.
|
42
39
|
When request_by_nodes is False, it is computed by cores_per_block / cores_per_node.
|
parsl/providers/pbspro/pbspro.py
CHANGED
@@ -18,10 +18,7 @@ class PBSProProvider(TorqueProvider):
|
|
18
18
|
Parameters
|
19
19
|
----------
|
20
20
|
channel : Channel
|
21
|
-
Channel for accessing this provider.
|
22
|
-
:class:`~parsl.channels.LocalChannel` (the default),
|
23
|
-
:class:`~parsl.channels.SSHChannel`, or
|
24
|
-
:class:`~parsl.channels.SSHInteractiveLoginChannel`.
|
21
|
+
Channel for accessing this provider.
|
25
22
|
account : str
|
26
23
|
Account the job will be charged against.
|
27
24
|
queue : str
|
parsl/providers/slurm/slurm.py
CHANGED
@@ -71,10 +71,7 @@ class SlurmProvider(ClusterProvider, RepresentationMixin):
|
|
71
71
|
constraint : str
|
72
72
|
Slurm job constraint, often used to choose cpu or gpu type. If unspecified or ``None``, no constraint slurm directive will be added.
|
73
73
|
channel : Channel
|
74
|
-
Channel for accessing this provider.
|
75
|
-
:class:`~parsl.channels.LocalChannel` (the default),
|
76
|
-
:class:`~parsl.channels.SSHChannel`, or
|
77
|
-
:class:`~parsl.channels.SSHInteractiveLoginChannel`.
|
74
|
+
Channel for accessing this provider.
|
78
75
|
nodes_per_block : int
|
79
76
|
Nodes to provision per block.
|
80
77
|
cores_per_node : int
|
parsl/providers/torque/torque.py
CHANGED
@@ -34,10 +34,7 @@ class TorqueProvider(ClusterProvider, RepresentationMixin):
|
|
34
34
|
Parameters
|
35
35
|
----------
|
36
36
|
channel : Channel
|
37
|
-
Channel for accessing this provider.
|
38
|
-
:class:`~parsl.channels.LocalChannel` (the default),
|
39
|
-
:class:`~parsl.channels.SSHChannel`, or
|
40
|
-
:class:`~parsl.channels.SSHInteractiveLoginChannel`.
|
37
|
+
Channel for accessing this provider.
|
41
38
|
account : str
|
42
39
|
Account the job will be charged against.
|
43
40
|
queue : str
|
parsl/tests/configs/user_opts.py
CHANGED
@@ -135,13 +135,6 @@ user_opts = {
|
|
135
135
|
# # For example:
|
136
136
|
# 'remote_writeable': 'globus://af7bda53-6d04-11e5-ba46-22000b92c6ec/home/bzc/'
|
137
137
|
# },
|
138
|
-
# 'adhoc': {
|
139
|
-
# # This specifies configuration parameters when testing an ad-hoc SSH based cluster
|
140
|
-
# 'username': 'fixme', # username on remote systems
|
141
|
-
# 'remote_hostnames': ['hostname1', 'hostname2'], # addresses of remote systems
|
142
|
-
# 'worker_init': 'init commands', # worker_init for remote systems
|
143
|
-
# 'script_dir': "/path" # script directory on remote systems
|
144
|
-
# }
|
145
138
|
#
|
146
139
|
} # type: Dict[str, Any]
|
147
140
|
|
parsl/tests/conftest.py
CHANGED
@@ -143,10 +143,6 @@ def pytest_configure(config):
|
|
143
143
|
'markers',
|
144
144
|
'staging_required: Marks tests that require a staging provider, when there is no sharedFS'
|
145
145
|
)
|
146
|
-
config.addinivalue_line(
|
147
|
-
'markers',
|
148
|
-
'sshd_required: Marks tests that require a SSHD'
|
149
|
-
)
|
150
146
|
config.addinivalue_line(
|
151
147
|
'markers',
|
152
148
|
'multiple_cores_required: Marks tests that require multiple cores, such as htex affinity'
|
@@ -17,22 +17,3 @@ def test_env():
|
|
17
17
|
|
18
18
|
x = [s for s in stdout if s.startswith("HOME=")]
|
19
19
|
assert x, "HOME not found"
|
20
|
-
|
21
|
-
|
22
|
-
@pytest.mark.local
|
23
|
-
def test_env_mod():
|
24
|
-
''' Testing for env update at execute time.
|
25
|
-
'''
|
26
|
-
|
27
|
-
lc = LocalChannel()
|
28
|
-
rc, stdout, stderr = lc.execute_wait("env", 1, {'TEST_ENV': 'fooo'})
|
29
|
-
|
30
|
-
stdout = stdout.split('\n')
|
31
|
-
x = [s for s in stdout if s.startswith("PATH=")]
|
32
|
-
assert x, "PATH not found"
|
33
|
-
|
34
|
-
x = [s for s in stdout if s.startswith("HOME=")]
|
35
|
-
assert x, "HOME not found"
|
36
|
-
|
37
|
-
x = [s for s in stdout if s.startswith("TEST_ENV=fooo")]
|
38
|
-
assert x, "User set env missing"
|