parsl 2024.12.2__py3-none-any.whl → 2024.12.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsl/configs/cc_in2p3.py +0 -2
- parsl/configs/frontera.py +0 -2
- parsl/configs/htex_local.py +0 -2
- parsl/dataflow/dflow.py +0 -2
- parsl/executors/base.py +1 -1
- parsl/executors/high_throughput/interchange.py +2 -1
- parsl/monitoring/monitoring.py +1 -1
- parsl/monitoring/radios/base.py +13 -0
- parsl/monitoring/radios/filesystem.py +52 -0
- parsl/monitoring/radios/htex.py +57 -0
- parsl/monitoring/radios/multiprocessing.py +17 -0
- parsl/monitoring/radios/udp.py +56 -0
- parsl/monitoring/radios/zmq.py +17 -0
- parsl/monitoring/remote.py +4 -6
- parsl/monitoring/router.py +1 -1
- parsl/providers/cluster_provider.py +2 -5
- parsl/providers/condor/condor.py +1 -8
- parsl/providers/grid_engine/grid_engine.py +1 -6
- parsl/providers/local/local.py +5 -8
- parsl/providers/lsf/lsf.py +1 -6
- parsl/providers/pbspro/pbspro.py +2 -7
- parsl/providers/slurm/slurm.py +3 -9
- parsl/providers/torque/torque.py +1 -7
- parsl/tests/configs/cc_in2p3.py +0 -2
- parsl/tests/configs/frontera.py +0 -2
- parsl/tests/configs/htex_local.py +0 -2
- parsl/tests/configs/htex_local_alternate.py +0 -2
- parsl/tests/configs/htex_local_intask_staging.py +0 -2
- parsl/tests/configs/htex_local_rsync_staging.py +0 -2
- parsl/tests/configs/slurm_local.py +0 -2
- parsl/tests/manual_tests/htex_local.py +0 -2
- parsl/tests/manual_tests/test_memory_limits.py +0 -2
- parsl/tests/scaling_tests/htex_local.py +0 -2
- parsl/tests/sites/test_affinity.py +0 -2
- parsl/tests/sites/test_worker_info.py +0 -2
- parsl/tests/test_htex/test_drain.py +0 -2
- parsl/tests/test_htex/test_manager_selector_by_block.py +0 -2
- parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +0 -2
- parsl/tests/test_providers/test_local_provider.py +1 -2
- parsl/tests/test_providers/test_pbspro_template.py +1 -3
- parsl/tests/test_providers/test_slurm_template.py +1 -3
- parsl/tests/test_scaling/test_regression_1621.py +0 -2
- parsl/tests/test_scaling/test_regression_3568_scaledown_vs_MISSING.py +0 -1
- parsl/tests/test_scaling/test_scale_down.py +0 -2
- parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +0 -2
- parsl/tests/test_scaling/test_scale_down_htex_unregistered.py +0 -2
- parsl/tests/test_scaling/test_shutdown_scalein.py +0 -2
- parsl/tests/test_scaling/test_worker_interchange_bad_messages_3262.py +0 -2
- parsl/tests/test_staging/test_zip_in.py +0 -1
- parsl/tests/test_staging/test_zip_out.py +0 -1
- parsl/tests/test_staging/test_zip_to_zip.py +0 -1
- parsl/tests/test_utils/test_execute_wait.py +35 -0
- parsl/utils.py +35 -0
- parsl/version.py +1 -1
- {parsl-2024.12.2.data → parsl-2024.12.9.data}/scripts/interchange.py +2 -1
- {parsl-2024.12.2.dist-info → parsl-2024.12.9.dist-info}/METADATA +2 -2
- {parsl-2024.12.2.dist-info → parsl-2024.12.9.dist-info}/RECORD +65 -67
- parsl/channels/__init__.py +0 -4
- parsl/channels/base.py +0 -54
- parsl/channels/errors.py +0 -30
- parsl/channels/local/local.py +0 -66
- parsl/monitoring/radios.py +0 -191
- parsl/tests/integration/test_channels/__init__.py +0 -0
- parsl/tests/test_channels/__init__.py +0 -0
- parsl/tests/test_channels/test_large_output.py +0 -22
- parsl/tests/test_channels/test_local_channel.py +0 -19
- /parsl/{channels/local → monitoring/radios}/__init__.py +0 -0
- {parsl-2024.12.2.data → parsl-2024.12.9.data}/scripts/exec_parsl_function.py +0 -0
- {parsl-2024.12.2.data → parsl-2024.12.9.data}/scripts/parsl_coprocess.py +0 -0
- {parsl-2024.12.2.data → parsl-2024.12.9.data}/scripts/process_worker_pool.py +0 -0
- {parsl-2024.12.2.dist-info → parsl-2024.12.9.dist-info}/LICENSE +0 -0
- {parsl-2024.12.2.dist-info → parsl-2024.12.9.dist-info}/WHEEL +0 -0
- {parsl-2024.12.2.dist-info → parsl-2024.12.9.dist-info}/entry_points.txt +0 -0
- {parsl-2024.12.2.dist-info → parsl-2024.12.9.dist-info}/top_level.txt +0 -0
parsl/configs/cc_in2p3.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
from parsl.channels import LocalChannel
|
2
1
|
from parsl.config import Config
|
3
2
|
from parsl.executors import HighThroughputExecutor
|
4
3
|
from parsl.providers import GridEngineProvider
|
@@ -10,7 +9,6 @@ config = Config(
|
|
10
9
|
label='cc_in2p3_htex',
|
11
10
|
max_workers_per_node=2,
|
12
11
|
provider=GridEngineProvider(
|
13
|
-
channel=LocalChannel(),
|
14
12
|
nodes_per_block=1,
|
15
13
|
init_blocks=2,
|
16
14
|
max_blocks=2,
|
parsl/configs/frontera.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
from parsl.channels import LocalChannel
|
2
1
|
from parsl.config import Config
|
3
2
|
from parsl.executors import HighThroughputExecutor
|
4
3
|
from parsl.launchers import SrunLauncher
|
@@ -15,7 +14,6 @@ config = Config(
|
|
15
14
|
max_workers_per_node=1, # Set number of workers per node
|
16
15
|
provider=SlurmProvider(
|
17
16
|
cmd_timeout=60, # Add extra time for slow scheduler responses
|
18
|
-
channel=LocalChannel(),
|
19
17
|
nodes_per_block=2,
|
20
18
|
init_blocks=1,
|
21
19
|
min_blocks=1,
|
parsl/configs/htex_local.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
from parsl.channels import LocalChannel
|
2
1
|
from parsl.config import Config
|
3
2
|
from parsl.executors import HighThroughputExecutor
|
4
3
|
from parsl.providers import LocalProvider
|
@@ -10,7 +9,6 @@ config = Config(
|
|
10
9
|
label="htex_local",
|
11
10
|
cores_per_worker=1,
|
12
11
|
provider=LocalProvider(
|
13
|
-
channel=LocalChannel(),
|
14
12
|
init_blocks=1,
|
15
13
|
max_blocks=1,
|
16
14
|
),
|
parsl/dataflow/dflow.py
CHANGED
@@ -1151,8 +1151,6 @@ class DataFlowKernel:
|
|
1151
1151
|
executor.provider.script_dir = os.path.join(self.run_dir, 'submit_scripts')
|
1152
1152
|
os.makedirs(executor.provider.script_dir, exist_ok=True)
|
1153
1153
|
|
1154
|
-
executor.provider.channel.script_dir = executor.provider.script_dir
|
1155
|
-
|
1156
1154
|
self.executors[executor.label] = executor
|
1157
1155
|
executor.start()
|
1158
1156
|
block_executors = [e for e in executors if isinstance(e, BlockProviderExecutor)]
|
parsl/executors/base.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Any, Callable, Dict, Optional
|
|
5
5
|
|
6
6
|
from typing_extensions import Literal, Self
|
7
7
|
|
8
|
-
from parsl.monitoring.radios import MonitoringRadioSender
|
8
|
+
from parsl.monitoring.radios.base import MonitoringRadioSender
|
9
9
|
|
10
10
|
|
11
11
|
class ParslExecutor(metaclass=ABCMeta):
|
@@ -20,7 +20,8 @@ from parsl.executors.high_throughput.errors import ManagerLost, VersionMismatch
|
|
20
20
|
from parsl.executors.high_throughput.manager_record import ManagerRecord
|
21
21
|
from parsl.executors.high_throughput.manager_selector import ManagerSelector
|
22
22
|
from parsl.monitoring.message_type import MessageType
|
23
|
-
from parsl.monitoring.radios import MonitoringRadioSender
|
23
|
+
from parsl.monitoring.radios.base import MonitoringRadioSender
|
24
|
+
from parsl.monitoring.radios.zmq import ZMQRadioSender
|
24
25
|
from parsl.process_loggers import wrap_with_logs
|
25
26
|
from parsl.serialize import serialize as serialize_object
|
26
27
|
from parsl.utils import setproctitle
|
parsl/monitoring/monitoring.py
CHANGED
@@ -14,7 +14,7 @@ import typeguard
|
|
14
14
|
|
15
15
|
from parsl.log_utils import set_file_logger
|
16
16
|
from parsl.monitoring.errors import MonitoringHubStartError
|
17
|
-
from parsl.monitoring.radios import MultiprocessingQueueRadioSender
|
17
|
+
from parsl.monitoring.radios.multiprocessing import MultiprocessingQueueRadioSender
|
18
18
|
from parsl.monitoring.router import router_starter
|
19
19
|
from parsl.monitoring.types import TaggedMonitoringMessage
|
20
20
|
from parsl.multiprocessing import ForkProcess, SizedQueue
|
@@ -0,0 +1,13 @@
|
|
1
|
+
import logging
|
2
|
+
from abc import ABCMeta, abstractmethod
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
_db_manager_excepts: Optional[Exception]
|
6
|
+
|
7
|
+
logger = logging.getLogger(__name__)
|
8
|
+
|
9
|
+
|
10
|
+
class MonitoringRadioSender(metaclass=ABCMeta):
|
11
|
+
@abstractmethod
|
12
|
+
def send(self, message: object) -> None:
|
13
|
+
pass
|
@@ -0,0 +1,52 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
import pickle
|
4
|
+
import uuid
|
5
|
+
|
6
|
+
from parsl.monitoring.radios.base import MonitoringRadioSender
|
7
|
+
|
8
|
+
logger = logging.getLogger(__name__)
|
9
|
+
|
10
|
+
|
11
|
+
class FilesystemRadioSender(MonitoringRadioSender):
|
12
|
+
"""A MonitoringRadioSender that sends messages over a shared filesystem.
|
13
|
+
|
14
|
+
The messsage directory structure is based on maildir,
|
15
|
+
https://en.wikipedia.org/wiki/Maildir
|
16
|
+
|
17
|
+
The writer creates a message in tmp/ and then when it is fully
|
18
|
+
written, moves it atomically into new/
|
19
|
+
|
20
|
+
The reader ignores tmp/ and only reads and deletes messages from
|
21
|
+
new/
|
22
|
+
|
23
|
+
This avoids a race condition of reading partially written messages.
|
24
|
+
|
25
|
+
This radio is likely to give higher shared filesystem load compared to
|
26
|
+
the UDP radio, but should be much more reliable.
|
27
|
+
"""
|
28
|
+
|
29
|
+
def __init__(self, *, monitoring_url: str, timeout: int = 10, run_dir: str):
|
30
|
+
logger.info("filesystem based monitoring channel initializing")
|
31
|
+
self.base_path = f"{run_dir}/monitor-fs-radio/"
|
32
|
+
self.tmp_path = f"{self.base_path}/tmp"
|
33
|
+
self.new_path = f"{self.base_path}/new"
|
34
|
+
|
35
|
+
os.makedirs(self.tmp_path, exist_ok=True)
|
36
|
+
os.makedirs(self.new_path, exist_ok=True)
|
37
|
+
|
38
|
+
def send(self, message: object) -> None:
|
39
|
+
logger.info("Sending a monitoring message via filesystem")
|
40
|
+
|
41
|
+
unique_id = str(uuid.uuid4())
|
42
|
+
|
43
|
+
tmp_filename = f"{self.tmp_path}/{unique_id}"
|
44
|
+
new_filename = f"{self.new_path}/{unique_id}"
|
45
|
+
buffer = message
|
46
|
+
|
47
|
+
# this will write the message out then atomically
|
48
|
+
# move it into new/, so that a partially written
|
49
|
+
# file will never be observed in new/
|
50
|
+
with open(tmp_filename, "wb") as f:
|
51
|
+
pickle.dump(buffer, f)
|
52
|
+
os.rename(tmp_filename, new_filename)
|
@@ -0,0 +1,57 @@
|
|
1
|
+
import logging
|
2
|
+
import pickle
|
3
|
+
|
4
|
+
from parsl.monitoring.radios.base import MonitoringRadioSender
|
5
|
+
|
6
|
+
logger = logging.getLogger(__name__)
|
7
|
+
|
8
|
+
|
9
|
+
class HTEXRadioSender(MonitoringRadioSender):
|
10
|
+
|
11
|
+
def __init__(self, monitoring_url: str, timeout: int = 10):
|
12
|
+
"""
|
13
|
+
Parameters
|
14
|
+
----------
|
15
|
+
|
16
|
+
monitoring_url : str
|
17
|
+
URL of the form <scheme>://<IP>:<PORT>
|
18
|
+
timeout : int
|
19
|
+
timeout, default=10s
|
20
|
+
"""
|
21
|
+
logger.info("htex-based monitoring channel initialising")
|
22
|
+
|
23
|
+
def send(self, message: object) -> None:
|
24
|
+
""" Sends a message to the UDP receiver
|
25
|
+
|
26
|
+
Parameter
|
27
|
+
---------
|
28
|
+
|
29
|
+
message: object
|
30
|
+
Arbitrary pickle-able object that is to be sent
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
None
|
34
|
+
"""
|
35
|
+
|
36
|
+
import parsl.executors.high_throughput.monitoring_info
|
37
|
+
|
38
|
+
result_queue = parsl.executors.high_throughput.monitoring_info.result_queue
|
39
|
+
|
40
|
+
# this message needs to go in the result queue tagged so that it is treated
|
41
|
+
# i) as a monitoring message by the interchange, and then further more treated
|
42
|
+
# as a RESOURCE_INFO message when received by monitoring (rather than a NODE_INFO
|
43
|
+
# which is the implicit default for messages from the interchange)
|
44
|
+
|
45
|
+
# for the interchange, the outer wrapper, this needs to be a dict:
|
46
|
+
|
47
|
+
interchange_msg = {
|
48
|
+
'type': 'monitoring',
|
49
|
+
'payload': message
|
50
|
+
}
|
51
|
+
|
52
|
+
if result_queue:
|
53
|
+
result_queue.put(pickle.dumps(interchange_msg))
|
54
|
+
else:
|
55
|
+
logger.error("result_queue is uninitialized - cannot put monitoring message")
|
56
|
+
|
57
|
+
return
|
@@ -0,0 +1,17 @@
|
|
1
|
+
from multiprocessing.queues import Queue
|
2
|
+
|
3
|
+
from parsl.monitoring.radios.base import MonitoringRadioSender
|
4
|
+
|
5
|
+
|
6
|
+
class MultiprocessingQueueRadioSender(MonitoringRadioSender):
|
7
|
+
"""A monitoring radio which connects over a multiprocessing Queue.
|
8
|
+
This radio is intended to be used on the submit side, where components
|
9
|
+
in the submit process, or processes launched by multiprocessing, will have
|
10
|
+
access to a Queue shared with the monitoring database code (bypassing the
|
11
|
+
monitoring router).
|
12
|
+
"""
|
13
|
+
def __init__(self, queue: Queue) -> None:
|
14
|
+
self.queue = queue
|
15
|
+
|
16
|
+
def send(self, message: object) -> None:
|
17
|
+
self.queue.put(message)
|
@@ -0,0 +1,56 @@
|
|
1
|
+
import logging
|
2
|
+
import pickle
|
3
|
+
import socket
|
4
|
+
|
5
|
+
from parsl.monitoring.radios.base import MonitoringRadioSender
|
6
|
+
|
7
|
+
|
8
|
+
class UDPRadioSender(MonitoringRadioSender):
|
9
|
+
|
10
|
+
def __init__(self, monitoring_url: str, timeout: int = 10):
|
11
|
+
"""
|
12
|
+
Parameters
|
13
|
+
----------
|
14
|
+
|
15
|
+
monitoring_url : str
|
16
|
+
URL of the form <scheme>://<IP>:<PORT>
|
17
|
+
timeout : int
|
18
|
+
timeout, default=10s
|
19
|
+
"""
|
20
|
+
self.monitoring_url = monitoring_url
|
21
|
+
self.sock_timeout = timeout
|
22
|
+
try:
|
23
|
+
self.scheme, self.ip, port = (x.strip('/') for x in monitoring_url.split(':'))
|
24
|
+
self.port = int(port)
|
25
|
+
except Exception:
|
26
|
+
raise Exception("Failed to parse monitoring url: {}".format(monitoring_url))
|
27
|
+
|
28
|
+
self.sock = socket.socket(socket.AF_INET,
|
29
|
+
socket.SOCK_DGRAM,
|
30
|
+
socket.IPPROTO_UDP) # UDP
|
31
|
+
self.sock.settimeout(self.sock_timeout)
|
32
|
+
|
33
|
+
def send(self, message: object) -> None:
|
34
|
+
""" Sends a message to the UDP receiver
|
35
|
+
|
36
|
+
Parameter
|
37
|
+
---------
|
38
|
+
|
39
|
+
message: object
|
40
|
+
Arbitrary pickle-able object that is to be sent
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
None
|
44
|
+
"""
|
45
|
+
try:
|
46
|
+
buffer = pickle.dumps(message)
|
47
|
+
except Exception:
|
48
|
+
logging.exception("Exception during pickling", exc_info=True)
|
49
|
+
return
|
50
|
+
|
51
|
+
try:
|
52
|
+
self.sock.sendto(buffer, (self.ip, self.port))
|
53
|
+
except socket.timeout:
|
54
|
+
logging.error("Could not send message within timeout limit")
|
55
|
+
return
|
56
|
+
return
|
@@ -0,0 +1,17 @@
|
|
1
|
+
import zmq
|
2
|
+
|
3
|
+
from parsl.monitoring.radios.base import MonitoringRadioSender
|
4
|
+
|
5
|
+
|
6
|
+
class ZMQRadioSender(MonitoringRadioSender):
|
7
|
+
"""A monitoring radio which connects over ZMQ. This radio is not
|
8
|
+
thread-safe, because its use of ZMQ is not thread-safe.
|
9
|
+
"""
|
10
|
+
|
11
|
+
def __init__(self, hub_address: str, hub_zmq_port: int) -> None:
|
12
|
+
self._hub_channel = zmq.Context().socket(zmq.DEALER)
|
13
|
+
self._hub_channel.set_hwm(0)
|
14
|
+
self._hub_channel.connect(f"tcp://{hub_address}:{hub_zmq_port}")
|
15
|
+
|
16
|
+
def send(self, message: object) -> None:
|
17
|
+
self._hub_channel.send_pyobj(message)
|
parsl/monitoring/remote.py
CHANGED
@@ -7,12 +7,10 @@ from multiprocessing import Event
|
|
7
7
|
from typing import Any, Callable, Dict, List, Sequence, Tuple
|
8
8
|
|
9
9
|
from parsl.monitoring.message_type import MessageType
|
10
|
-
from parsl.monitoring.radios import
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
UDPRadioSender,
|
15
|
-
)
|
10
|
+
from parsl.monitoring.radios.base import MonitoringRadioSender
|
11
|
+
from parsl.monitoring.radios.filesystem import FilesystemRadioSender
|
12
|
+
from parsl.monitoring.radios.htex import HTEXRadioSender
|
13
|
+
from parsl.monitoring.radios.udp import UDPRadioSender
|
16
14
|
from parsl.multiprocessing import ForkProcess
|
17
15
|
from parsl.process_loggers import wrap_with_logs
|
18
16
|
|
parsl/monitoring/router.py
CHANGED
@@ -14,7 +14,7 @@ import typeguard
|
|
14
14
|
import zmq
|
15
15
|
|
16
16
|
from parsl.log_utils import set_file_logger
|
17
|
-
from parsl.monitoring.radios import MultiprocessingQueueRadioSender
|
17
|
+
from parsl.monitoring.radios.multiprocessing import MultiprocessingQueueRadioSender
|
18
18
|
from parsl.monitoring.types import TaggedMonitoringMessage
|
19
19
|
from parsl.process_loggers import wrap_with_logs
|
20
20
|
from parsl.utils import setproctitle
|
@@ -6,6 +6,7 @@ from parsl.launchers.base import Launcher
|
|
6
6
|
from parsl.launchers.errors import BadLauncher
|
7
7
|
from parsl.providers.base import ExecutionProvider
|
8
8
|
from parsl.providers.errors import SchedulerMissingArgs, ScriptPathError
|
9
|
+
from parsl.utils import execute_wait
|
9
10
|
|
10
11
|
logger = logging.getLogger(__name__)
|
11
12
|
|
@@ -17,8 +18,6 @@ class ClusterProvider(ExecutionProvider):
|
|
17
18
|
----------
|
18
19
|
label : str
|
19
20
|
Label for this provider.
|
20
|
-
channel : Channel
|
21
|
-
Channel for accessing this provider.
|
22
21
|
walltime : str
|
23
22
|
Walltime requested per block in HH:MM:SS.
|
24
23
|
launcher : Launcher
|
@@ -44,7 +43,6 @@ class ClusterProvider(ExecutionProvider):
|
|
44
43
|
|
45
44
|
def __init__(self,
|
46
45
|
label,
|
47
|
-
channel,
|
48
46
|
nodes_per_block,
|
49
47
|
init_blocks,
|
50
48
|
min_blocks,
|
@@ -55,7 +53,6 @@ class ClusterProvider(ExecutionProvider):
|
|
55
53
|
cmd_timeout=10):
|
56
54
|
|
57
55
|
self._label = label
|
58
|
-
self.channel = channel
|
59
56
|
self.nodes_per_block = nodes_per_block
|
60
57
|
self.init_blocks = init_blocks
|
61
58
|
self.min_blocks = min_blocks
|
@@ -76,7 +73,7 @@ class ClusterProvider(ExecutionProvider):
|
|
76
73
|
t = self.cmd_timeout
|
77
74
|
if timeout is not None:
|
78
75
|
t = timeout
|
79
|
-
return
|
76
|
+
return execute_wait(cmd, t)
|
80
77
|
|
81
78
|
def _write_submit_script(self, template, script_filename, job_name, configs):
|
82
79
|
"""Generate submit script and write it to a file.
|
parsl/providers/condor/condor.py
CHANGED
@@ -5,7 +5,6 @@ import time
|
|
5
5
|
|
6
6
|
import typeguard
|
7
7
|
|
8
|
-
from parsl.channels import LocalChannel
|
9
8
|
from parsl.jobs.states import JobState, JobStatus
|
10
9
|
from parsl.launchers import SingleNodeLauncher
|
11
10
|
from parsl.launchers.base import Launcher
|
@@ -18,8 +17,6 @@ logger = logging.getLogger(__name__)
|
|
18
17
|
|
19
18
|
from typing import Dict, List, Optional
|
20
19
|
|
21
|
-
from parsl.channels.base import Channel
|
22
|
-
|
23
20
|
# See http://pages.cs.wisc.edu/~adesmet/status.html
|
24
21
|
translate_table = {
|
25
22
|
'1': JobState.PENDING,
|
@@ -36,8 +33,6 @@ class CondorProvider(RepresentationMixin, ClusterProvider):
|
|
36
33
|
|
37
34
|
Parameters
|
38
35
|
----------
|
39
|
-
channel : Channel
|
40
|
-
Channel for accessing this provider.
|
41
36
|
nodes_per_block : int
|
42
37
|
Nodes to provision per block.
|
43
38
|
cores_per_slot : int
|
@@ -79,7 +74,6 @@ class CondorProvider(RepresentationMixin, ClusterProvider):
|
|
79
74
|
"""
|
80
75
|
@typeguard.typechecked
|
81
76
|
def __init__(self,
|
82
|
-
channel: Channel = LocalChannel(),
|
83
77
|
nodes_per_block: int = 1,
|
84
78
|
cores_per_slot: Optional[int] = None,
|
85
79
|
mem_per_slot: Optional[float] = None,
|
@@ -100,7 +94,6 @@ class CondorProvider(RepresentationMixin, ClusterProvider):
|
|
100
94
|
|
101
95
|
label = 'condor'
|
102
96
|
super().__init__(label,
|
103
|
-
channel,
|
104
97
|
nodes_per_block,
|
105
98
|
init_blocks,
|
106
99
|
min_blocks,
|
@@ -226,7 +219,7 @@ class CondorProvider(RepresentationMixin, ClusterProvider):
|
|
226
219
|
|
227
220
|
job_config = {}
|
228
221
|
job_config["job_name"] = job_name
|
229
|
-
job_config["submit_script_dir"] = self.
|
222
|
+
job_config["submit_script_dir"] = self.script_dir
|
230
223
|
job_config["project"] = self.project
|
231
224
|
job_config["nodes"] = self.nodes_per_block
|
232
225
|
job_config["scheduler_options"] = scheduler_options
|
@@ -2,7 +2,6 @@ import logging
|
|
2
2
|
import os
|
3
3
|
import time
|
4
4
|
|
5
|
-
from parsl.channels import LocalChannel
|
6
5
|
from parsl.jobs.states import JobState, JobStatus
|
7
6
|
from parsl.launchers import SingleNodeLauncher
|
8
7
|
from parsl.providers.cluster_provider import ClusterProvider
|
@@ -36,8 +35,6 @@ class GridEngineProvider(ClusterProvider, RepresentationMixin):
|
|
36
35
|
|
37
36
|
Parameters
|
38
37
|
----------
|
39
|
-
channel : Channel
|
40
|
-
Channel for accessing this provider.
|
41
38
|
nodes_per_block : int
|
42
39
|
Nodes to provision per block.
|
43
40
|
min_blocks : int
|
@@ -62,7 +59,6 @@ class GridEngineProvider(ClusterProvider, RepresentationMixin):
|
|
62
59
|
"""
|
63
60
|
|
64
61
|
def __init__(self,
|
65
|
-
channel=LocalChannel(),
|
66
62
|
nodes_per_block=1,
|
67
63
|
init_blocks=1,
|
68
64
|
min_blocks=0,
|
@@ -76,7 +72,6 @@ class GridEngineProvider(ClusterProvider, RepresentationMixin):
|
|
76
72
|
queue=None):
|
77
73
|
label = 'grid_engine'
|
78
74
|
super().__init__(label,
|
79
|
-
channel,
|
80
75
|
nodes_per_block,
|
81
76
|
init_blocks,
|
82
77
|
min_blocks,
|
@@ -100,7 +95,7 @@ class GridEngineProvider(ClusterProvider, RepresentationMixin):
|
|
100
95
|
self.nodes_per_block, tasks_per_node))
|
101
96
|
|
102
97
|
job_config = {}
|
103
|
-
job_config["submit_script_dir"] = self.
|
98
|
+
job_config["submit_script_dir"] = self.script_dir
|
104
99
|
job_config["nodes"] = self.nodes_per_block
|
105
100
|
job_config["walltime"] = self.walltime
|
106
101
|
job_config["scheduler_options"] = self.scheduler_options
|
parsl/providers/local/local.py
CHANGED
@@ -2,7 +2,6 @@ import logging
|
|
2
2
|
import os
|
3
3
|
import time
|
4
4
|
|
5
|
-
from parsl.channels import LocalChannel
|
6
5
|
from parsl.jobs.states import JobState, JobStatus
|
7
6
|
from parsl.launchers import SingleNodeLauncher
|
8
7
|
from parsl.providers.base import ExecutionProvider
|
@@ -11,7 +10,7 @@ from parsl.providers.errors import (
|
|
11
10
|
ScriptPathError,
|
12
11
|
SubmitException,
|
13
12
|
)
|
14
|
-
from parsl.utils import RepresentationMixin
|
13
|
+
from parsl.utils import RepresentationMixin, execute_wait
|
15
14
|
|
16
15
|
logger = logging.getLogger(__name__)
|
17
16
|
|
@@ -37,7 +36,6 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
|
|
37
36
|
"""
|
38
37
|
|
39
38
|
def __init__(self,
|
40
|
-
channel=LocalChannel(),
|
41
39
|
nodes_per_block=1,
|
42
40
|
launcher=SingleNodeLauncher(),
|
43
41
|
init_blocks=1,
|
@@ -46,7 +44,6 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
|
|
46
44
|
worker_init='',
|
47
45
|
cmd_timeout=30,
|
48
46
|
parallelism=1):
|
49
|
-
self.channel = channel
|
50
47
|
self._label = 'local'
|
51
48
|
self.nodes_per_block = nodes_per_block
|
52
49
|
self.launcher = launcher
|
@@ -118,7 +115,7 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
|
|
118
115
|
return [self.resources[jid]['status'] for jid in job_ids]
|
119
116
|
|
120
117
|
def _is_alive(self, job_dict):
|
121
|
-
retcode, stdout, stderr =
|
118
|
+
retcode, stdout, stderr = execute_wait(
|
122
119
|
'ps -p {} > /dev/null 2> /dev/null; echo "STATUS:$?" '.format(
|
123
120
|
job_dict['remote_pid']), self.cmd_timeout)
|
124
121
|
for line in stdout.split('\n'):
|
@@ -223,11 +220,11 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
|
|
223
220
|
# cancel the task later.
|
224
221
|
#
|
225
222
|
# We need to do the >/dev/null 2>&1 so that bash closes stdout, otherwise
|
226
|
-
#
|
223
|
+
# execute_wait hangs reading the process stdout until all the
|
227
224
|
# background commands complete.
|
228
225
|
cmd = '/bin/bash -c \'echo - >{0}.ec && {{ {{ bash {0} 1>{0}.out 2>{0}.err ; ' \
|
229
226
|
'echo $? > {0}.ec ; }} >/dev/null 2>&1 & echo "PID:$!" ; }}\''.format(script_path)
|
230
|
-
retcode, stdout, stderr =
|
227
|
+
retcode, stdout, stderr = execute_wait(cmd, self.cmd_timeout)
|
231
228
|
if retcode != 0:
|
232
229
|
raise SubmitException(job_name, "Launch command exited with code {0}".format(retcode),
|
233
230
|
stdout, stderr)
|
@@ -258,7 +255,7 @@ class LocalProvider(ExecutionProvider, RepresentationMixin):
|
|
258
255
|
job_dict['cancelled'] = True
|
259
256
|
logger.debug("Terminating job/process ID: {0}".format(job))
|
260
257
|
cmd = "kill -- -$(ps -o pgid= {} | grep -o '[0-9]*')".format(job_dict['remote_pid'])
|
261
|
-
retcode, stdout, stderr =
|
258
|
+
retcode, stdout, stderr = execute_wait(cmd, self.cmd_timeout)
|
262
259
|
if retcode != 0:
|
263
260
|
logger.warning("Failed to kill PID: {} and child processes on {}".format(job_dict['remote_pid'],
|
264
261
|
self.label))
|
parsl/providers/lsf/lsf.py
CHANGED
@@ -3,7 +3,6 @@ import math
|
|
3
3
|
import os
|
4
4
|
import time
|
5
5
|
|
6
|
-
from parsl.channels import LocalChannel
|
7
6
|
from parsl.jobs.states import JobState, JobStatus
|
8
7
|
from parsl.launchers import SingleNodeLauncher
|
9
8
|
from parsl.providers.cluster_provider import ClusterProvider
|
@@ -32,8 +31,6 @@ class LSFProvider(ClusterProvider, RepresentationMixin):
|
|
32
31
|
|
33
32
|
Parameters
|
34
33
|
----------
|
35
|
-
channel : Channel
|
36
|
-
Channel for accessing this provider.
|
37
34
|
nodes_per_block : int
|
38
35
|
Nodes to provision per block.
|
39
36
|
When request_by_nodes is False, it is computed by cores_per_block / cores_per_node.
|
@@ -77,7 +74,6 @@ class LSFProvider(ClusterProvider, RepresentationMixin):
|
|
77
74
|
"""
|
78
75
|
|
79
76
|
def __init__(self,
|
80
|
-
channel=LocalChannel(),
|
81
77
|
nodes_per_block=1,
|
82
78
|
cores_per_block=None,
|
83
79
|
cores_per_node=None,
|
@@ -96,7 +92,6 @@ class LSFProvider(ClusterProvider, RepresentationMixin):
|
|
96
92
|
launcher=SingleNodeLauncher()):
|
97
93
|
label = 'LSF'
|
98
94
|
super().__init__(label,
|
99
|
-
channel,
|
100
95
|
nodes_per_block,
|
101
96
|
init_blocks,
|
102
97
|
min_blocks,
|
@@ -211,7 +206,7 @@ class LSFProvider(ClusterProvider, RepresentationMixin):
|
|
211
206
|
logger.debug("Requesting one block with {} nodes".format(self.nodes_per_block))
|
212
207
|
|
213
208
|
job_config = {}
|
214
|
-
job_config["submit_script_dir"] = self.
|
209
|
+
job_config["submit_script_dir"] = self.script_dir
|
215
210
|
job_config["nodes"] = self.nodes_per_block
|
216
211
|
job_config["tasks_per_node"] = tasks_per_node
|
217
212
|
job_config["walltime"] = wtime_to_minutes(self.walltime)
|
parsl/providers/pbspro/pbspro.py
CHANGED
@@ -3,7 +3,6 @@ import logging
|
|
3
3
|
import os
|
4
4
|
import time
|
5
5
|
|
6
|
-
from parsl.channels import LocalChannel
|
7
6
|
from parsl.jobs.states import JobState, JobStatus
|
8
7
|
from parsl.launchers import SingleNodeLauncher
|
9
8
|
from parsl.providers.pbspro.template import template_string
|
@@ -17,8 +16,6 @@ class PBSProProvider(TorqueProvider):
|
|
17
16
|
|
18
17
|
Parameters
|
19
18
|
----------
|
20
|
-
channel : Channel
|
21
|
-
Channel for accessing this provider.
|
22
19
|
account : str
|
23
20
|
Account the job will be charged against.
|
24
21
|
queue : str
|
@@ -51,7 +48,6 @@ class PBSProProvider(TorqueProvider):
|
|
51
48
|
:class:`~parsl.launchers.SingleNodeLauncher`.
|
52
49
|
"""
|
53
50
|
def __init__(self,
|
54
|
-
channel=LocalChannel(),
|
55
51
|
account=None,
|
56
52
|
queue=None,
|
57
53
|
scheduler_options='',
|
@@ -66,8 +62,7 @@ class PBSProProvider(TorqueProvider):
|
|
66
62
|
launcher=SingleNodeLauncher(),
|
67
63
|
walltime="00:20:00",
|
68
64
|
cmd_timeout=120):
|
69
|
-
super().__init__(
|
70
|
-
account,
|
65
|
+
super().__init__(account,
|
71
66
|
queue,
|
72
67
|
scheduler_options,
|
73
68
|
worker_init,
|
@@ -159,7 +154,7 @@ class PBSProProvider(TorqueProvider):
|
|
159
154
|
)
|
160
155
|
|
161
156
|
job_config = {}
|
162
|
-
job_config["submit_script_dir"] = self.
|
157
|
+
job_config["submit_script_dir"] = self.script_dir
|
163
158
|
job_config["nodes_per_block"] = self.nodes_per_block
|
164
159
|
job_config["ncpus"] = self.cpus_per_node
|
165
160
|
job_config["walltime"] = self.walltime
|
parsl/providers/slurm/slurm.py
CHANGED
@@ -3,12 +3,10 @@ import math
|
|
3
3
|
import os
|
4
4
|
import re
|
5
5
|
import time
|
6
|
-
from typing import Optional
|
6
|
+
from typing import Any, Dict, Optional
|
7
7
|
|
8
8
|
import typeguard
|
9
9
|
|
10
|
-
from parsl.channels import LocalChannel
|
11
|
-
from parsl.channels.base import Channel
|
12
10
|
from parsl.jobs.states import JobState, JobStatus
|
13
11
|
from parsl.launchers import SingleNodeLauncher
|
14
12
|
from parsl.launchers.base import Launcher
|
@@ -73,8 +71,6 @@ class SlurmProvider(ClusterProvider, RepresentationMixin):
|
|
73
71
|
clusters : str
|
74
72
|
Slurm cluster name, or comma seperated cluster list, used to choose between different clusters in a federated Slurm instance.
|
75
73
|
If unspecified or ``None``, no slurm directive for clusters will be added.
|
76
|
-
channel : Channel
|
77
|
-
Channel for accessing this provider.
|
78
74
|
nodes_per_block : int
|
79
75
|
Nodes to provision per block.
|
80
76
|
cores_per_node : int
|
@@ -119,7 +115,6 @@ class SlurmProvider(ClusterProvider, RepresentationMixin):
|
|
119
115
|
qos: Optional[str] = None,
|
120
116
|
constraint: Optional[str] = None,
|
121
117
|
clusters: Optional[str] = None,
|
122
|
-
channel: Channel = LocalChannel(),
|
123
118
|
nodes_per_block: int = 1,
|
124
119
|
cores_per_node: Optional[int] = None,
|
125
120
|
mem_per_node: Optional[int] = None,
|
@@ -136,7 +131,6 @@ class SlurmProvider(ClusterProvider, RepresentationMixin):
|
|
136
131
|
launcher: Launcher = SingleNodeLauncher()):
|
137
132
|
label = 'slurm'
|
138
133
|
super().__init__(label,
|
139
|
-
channel,
|
140
134
|
nodes_per_block,
|
141
135
|
init_blocks,
|
142
136
|
min_blocks,
|
@@ -286,8 +280,8 @@ class SlurmProvider(ClusterProvider, RepresentationMixin):
|
|
286
280
|
|
287
281
|
logger.debug("Requesting one block with {} nodes".format(self.nodes_per_block))
|
288
282
|
|
289
|
-
job_config = {}
|
290
|
-
job_config["submit_script_dir"] = self.
|
283
|
+
job_config: Dict[str, Any] = {}
|
284
|
+
job_config["submit_script_dir"] = self.script_dir
|
291
285
|
job_config["nodes"] = self.nodes_per_block
|
292
286
|
job_config["tasks_per_node"] = tasks_per_node
|
293
287
|
job_config["walltime"] = wtime_to_minutes(self.walltime)
|