parsl 2025.8.4__py3-none-any.whl → 2025.11.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsl/__init__.py +0 -4
- parsl/app/bash.py +1 -1
- parsl/benchmark/perf.py +73 -17
- parsl/concurrent/__init__.py +95 -14
- parsl/curvezmq.py +0 -16
- parsl/data_provider/globus.py +3 -1
- parsl/dataflow/dflow.py +107 -207
- parsl/dataflow/memoization.py +144 -31
- parsl/dataflow/states.py +5 -5
- parsl/executors/base.py +2 -2
- parsl/executors/execute_task.py +2 -8
- parsl/executors/flux/executor.py +4 -6
- parsl/executors/globus_compute.py +0 -4
- parsl/executors/high_throughput/executor.py +86 -25
- parsl/executors/high_throughput/interchange.py +55 -42
- parsl/executors/high_throughput/mpi_executor.py +1 -2
- parsl/executors/high_throughput/mpi_resource_management.py +7 -14
- parsl/executors/high_throughput/process_worker_pool.py +32 -7
- parsl/executors/high_throughput/zmq_pipes.py +36 -67
- parsl/executors/radical/executor.py +2 -6
- parsl/executors/radical/rpex_worker.py +2 -2
- parsl/executors/taskvine/executor.py +5 -1
- parsl/executors/threads.py +5 -2
- parsl/jobs/states.py +2 -2
- parsl/jobs/strategy.py +7 -6
- parsl/monitoring/db_manager.py +21 -23
- parsl/monitoring/monitoring.py +2 -2
- parsl/monitoring/radios/filesystem.py +2 -1
- parsl/monitoring/radios/htex.py +2 -1
- parsl/monitoring/radios/multiprocessing.py +2 -1
- parsl/monitoring/radios/udp.py +2 -1
- parsl/monitoring/radios/udp_router.py +2 -2
- parsl/monitoring/radios/zmq_router.py +2 -2
- parsl/multiprocessing.py +0 -49
- parsl/providers/base.py +24 -37
- parsl/providers/pbspro/pbspro.py +1 -1
- parsl/serialize/__init__.py +6 -9
- parsl/serialize/facade.py +0 -32
- parsl/tests/configs/local_threads_globus.py +18 -14
- parsl/tests/configs/taskvine_ex.py +1 -1
- parsl/tests/manual_tests/test_memory_limits.py +1 -1
- parsl/tests/sites/test_concurrent.py +51 -3
- parsl/tests/test_checkpointing/test_periodic.py +15 -9
- parsl/tests/test_checkpointing/test_python_checkpoint_1.py +6 -3
- parsl/tests/test_checkpointing/test_regression_233.py +0 -1
- parsl/tests/test_curvezmq.py +0 -42
- parsl/tests/test_execute_task.py +2 -11
- parsl/tests/test_htex/test_command_concurrency_regression_1321.py +54 -0
- parsl/tests/test_htex/test_htex.py +36 -1
- parsl/tests/test_htex/test_interchange_exit_bad_registration.py +2 -2
- parsl/tests/test_htex/test_priority_queue.py +26 -3
- parsl/tests/test_htex/test_zmq_binding.py +2 -1
- parsl/tests/test_mpi_apps/test_mpi_scheduler.py +18 -43
- parsl/tests/test_python_apps/test_basic.py +0 -14
- parsl/tests/test_python_apps/test_depfail_propagation.py +11 -1
- parsl/tests/test_python_apps/test_exception.py +19 -0
- parsl/tests/test_python_apps/test_garbage_collect.py +1 -6
- parsl/tests/test_python_apps/test_memoize_2.py +11 -1
- parsl/tests/test_python_apps/test_memoize_exception.py +41 -0
- parsl/tests/test_regression/test_3874.py +47 -0
- parsl/tests/test_scaling/test_regression_3696_oscillation.py +1 -0
- parsl/tests/test_staging/test_staging_globus.py +2 -2
- parsl/tests/test_utils/test_representation_mixin.py +53 -0
- parsl/tests/unit/test_globus_compute_executor.py +11 -2
- parsl/utils.py +11 -3
- parsl/version.py +1 -1
- {parsl-2025.8.4.data → parsl-2025.11.10.data}/scripts/interchange.py +55 -42
- {parsl-2025.8.4.data → parsl-2025.11.10.data}/scripts/process_worker_pool.py +32 -7
- {parsl-2025.8.4.dist-info → parsl-2025.11.10.dist-info}/METADATA +64 -50
- {parsl-2025.8.4.dist-info → parsl-2025.11.10.dist-info}/RECORD +76 -81
- {parsl-2025.8.4.dist-info → parsl-2025.11.10.dist-info}/WHEEL +1 -1
- parsl/tests/configs/local_threads_checkpoint_periodic.py +0 -11
- parsl/tests/configs/local_threads_no_cache.py +0 -11
- parsl/tests/site_tests/test_provider.py +0 -88
- parsl/tests/site_tests/test_site.py +0 -70
- parsl/tests/test_aalst_patterns.py +0 -474
- parsl/tests/test_docs/test_workflow2.py +0 -42
- parsl/tests/test_error_handling/test_rand_fail.py +0 -171
- parsl/tests/test_regression/test_854.py +0 -62
- parsl/tests/test_serialization/test_pack_resource_spec.py +0 -23
- {parsl-2025.8.4.data → parsl-2025.11.10.data}/scripts/exec_parsl_function.py +0 -0
- {parsl-2025.8.4.data → parsl-2025.11.10.data}/scripts/parsl_coprocess.py +0 -0
- {parsl-2025.8.4.dist-info → parsl-2025.11.10.dist-info}/entry_points.txt +0 -0
- {parsl-2025.8.4.dist-info → parsl-2025.11.10.dist-info/licenses}/LICENSE +0 -0
- {parsl-2025.8.4.dist-info → parsl-2025.11.10.dist-info}/top_level.txt +0 -0
parsl/jobs/strategy.py
CHANGED
|
@@ -185,6 +185,11 @@ class Strategy:
|
|
|
185
185
|
|
|
186
186
|
for executor in executors:
|
|
187
187
|
label = executor.label
|
|
188
|
+
|
|
189
|
+
if executor.bad_state_is_set:
|
|
190
|
+
logger.info(f"Not strategizing for executor {label} because bad state is set")
|
|
191
|
+
continue
|
|
192
|
+
|
|
188
193
|
logger.debug(f"Strategizing for executor {label}")
|
|
189
194
|
|
|
190
195
|
if self.executors[label]['first']:
|
|
@@ -213,12 +218,8 @@ class Strategy:
|
|
|
213
218
|
|
|
214
219
|
logger.debug(f"Slot ratio calculation: active_slots = {active_slots}, active_tasks = {active_tasks}")
|
|
215
220
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
label, active_tasks, running, pending, executor.connected_workers))
|
|
219
|
-
else:
|
|
220
|
-
logger.debug('Executor {} has {} active tasks and {}/{} running/pending blocks'.format(
|
|
221
|
-
label, active_tasks, running, pending))
|
|
221
|
+
logger.debug('Executor {} has {} active tasks and {}/{} running/pending blocks'.format(
|
|
222
|
+
label, active_tasks, running, pending))
|
|
222
223
|
|
|
223
224
|
# reset idle timer if executor has active tasks
|
|
224
225
|
|
parsl/monitoring/db_manager.py
CHANGED
|
@@ -18,7 +18,7 @@ from parsl.monitoring.types import MonitoringMessage, TaggedMonitoringMessage
|
|
|
18
18
|
from parsl.process_loggers import wrap_with_logs
|
|
19
19
|
from parsl.utils import setproctitle
|
|
20
20
|
|
|
21
|
-
logger = logging.getLogger(
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
22
|
|
|
23
23
|
X = TypeVar('X')
|
|
24
24
|
|
|
@@ -293,13 +293,10 @@ class DatabaseManager:
|
|
|
293
293
|
self.run_dir = run_dir
|
|
294
294
|
os.makedirs(self.run_dir, exist_ok=True)
|
|
295
295
|
|
|
296
|
-
logger.propagate = False
|
|
297
|
-
|
|
298
296
|
set_file_logger(f"{self.run_dir}/database_manager.log", level=logging_level,
|
|
299
|
-
format_string="%(asctime)s.%(msecs)03d %(name)s:%(lineno)d [%(levelname)s] [%(threadName)s %(thread)d] %(message)s"
|
|
300
|
-
name="database_manager")
|
|
297
|
+
format_string="%(asctime)s.%(msecs)03d %(name)s:%(lineno)d [%(levelname)s] [%(threadName)s %(thread)d] %(message)s")
|
|
301
298
|
|
|
302
|
-
logger.
|
|
299
|
+
logger.info("Initializing Database Manager process")
|
|
303
300
|
|
|
304
301
|
self.db = Database(db_url)
|
|
305
302
|
self.batching_interval = batching_interval
|
|
@@ -349,9 +346,9 @@ class DatabaseManager:
|
|
|
349
346
|
exception_happened = False
|
|
350
347
|
|
|
351
348
|
while (not self._kill_event.is_set() or
|
|
352
|
-
self.pending_priority_queue.
|
|
353
|
-
self.pending_node_queue.
|
|
354
|
-
resource_queue.
|
|
349
|
+
not self.pending_priority_queue.empty() or not self.pending_resource_queue.empty() or
|
|
350
|
+
not self.pending_node_queue.empty() or not self.pending_block_queue.empty() or
|
|
351
|
+
not resource_queue.empty()):
|
|
355
352
|
|
|
356
353
|
"""
|
|
357
354
|
WORKFLOW_INFO and TASK_INFO messages (i.e. priority messages)
|
|
@@ -360,9 +357,9 @@ class DatabaseManager:
|
|
|
360
357
|
try:
|
|
361
358
|
logger.debug("""Checking STOP conditions: {}, {}, {}, {}, {}, {}""".format(
|
|
362
359
|
self._kill_event.is_set(),
|
|
363
|
-
self.pending_priority_queue.
|
|
364
|
-
self.pending_node_queue.
|
|
365
|
-
resource_queue.
|
|
360
|
+
not self.pending_priority_queue.empty(), not self.pending_resource_queue.empty(),
|
|
361
|
+
not self.pending_node_queue.empty(), not self.pending_block_queue.empty(),
|
|
362
|
+
not resource_queue.empty()))
|
|
366
363
|
|
|
367
364
|
# This is the list of resource messages which can be reprocessed as if they
|
|
368
365
|
# had just arrived because the corresponding first task message has been
|
|
@@ -512,7 +509,7 @@ class DatabaseManager:
|
|
|
512
509
|
msg['task_status_name'] = States.running.name
|
|
513
510
|
msg['task_try_time_running'] = msg['timestamp']
|
|
514
511
|
|
|
515
|
-
if task_try_id in inserted_tries:
|
|
512
|
+
if task_try_id in inserted_tries:
|
|
516
513
|
reprocessable_first_resource_messages.append(msg)
|
|
517
514
|
else:
|
|
518
515
|
if task_try_id in deferred_resource_messages:
|
|
@@ -550,19 +547,20 @@ class DatabaseManager:
|
|
|
550
547
|
"or some other error. monitoring data may have been lost"
|
|
551
548
|
)
|
|
552
549
|
exception_happened = True
|
|
550
|
+
|
|
551
|
+
if self.external_exit_event.is_set():
|
|
552
|
+
self.close()
|
|
553
|
+
|
|
553
554
|
if exception_happened:
|
|
554
555
|
raise RuntimeError("An exception happened sometime during database processing and should have been logged in database_manager.log")
|
|
555
556
|
|
|
556
|
-
@wrap_with_logs
|
|
557
|
-
def _migrate_logs_to_internal(self, logs_queue:
|
|
557
|
+
@wrap_with_logs
|
|
558
|
+
def _migrate_logs_to_internal(self, logs_queue: mpq.Queue, kill_event: threading.Event) -> None:
|
|
558
559
|
logger.info("Starting _migrate_logs_to_internal")
|
|
559
560
|
|
|
560
|
-
while not kill_event.is_set() or logs_queue.
|
|
561
|
+
while not kill_event.is_set() or not logs_queue.empty():
|
|
561
562
|
logger.debug("Checking STOP conditions: kill event: %s, queue has entries: %s",
|
|
562
|
-
kill_event.is_set(), logs_queue.
|
|
563
|
-
|
|
564
|
-
if self.external_exit_event.is_set():
|
|
565
|
-
self.close()
|
|
563
|
+
kill_event.is_set(), not logs_queue.empty())
|
|
566
564
|
|
|
567
565
|
try:
|
|
568
566
|
x = logs_queue.get(timeout=0.1)
|
|
@@ -583,10 +581,10 @@ class DatabaseManager:
|
|
|
583
581
|
elif x[0] == MessageType.NODE_INFO:
|
|
584
582
|
assert len(x) == 2, "expected NODE_INFO tuple to have exactly two elements"
|
|
585
583
|
|
|
586
|
-
logger.
|
|
584
|
+
logger.debug("Will put {} to pending node queue".format(x[1]))
|
|
587
585
|
self.pending_node_queue.put(x[1])
|
|
588
586
|
elif x[0] == MessageType.BLOCK_INFO:
|
|
589
|
-
logger.
|
|
587
|
+
logger.debug("Will put {} to pending block queue".format(x[1]))
|
|
590
588
|
self.pending_block_queue.put(x[-1])
|
|
591
589
|
else:
|
|
592
590
|
logger.error("Discarding message of unknown type {}".format(x[0]))
|
|
@@ -682,7 +680,7 @@ class DatabaseManager:
|
|
|
682
680
|
self._kill_event.set()
|
|
683
681
|
|
|
684
682
|
|
|
685
|
-
@wrap_with_logs
|
|
683
|
+
@wrap_with_logs
|
|
686
684
|
@typeguard.typechecked
|
|
687
685
|
def dbm_starter(resource_msgs: mpq.Queue,
|
|
688
686
|
db_url: str,
|
parsl/monitoring/monitoring.py
CHANGED
|
@@ -11,9 +11,9 @@ import typeguard
|
|
|
11
11
|
|
|
12
12
|
from parsl.monitoring.types import TaggedMonitoringMessage
|
|
13
13
|
from parsl.multiprocessing import (
|
|
14
|
-
SizedQueue,
|
|
15
14
|
SpawnEvent,
|
|
16
15
|
SpawnProcess,
|
|
16
|
+
SpawnQueue,
|
|
17
17
|
join_terminate_close_proc,
|
|
18
18
|
)
|
|
19
19
|
from parsl.utils import RepresentationMixin
|
|
@@ -126,7 +126,7 @@ class MonitoringHub(RepresentationMixin):
|
|
|
126
126
|
self.monitoring_hub_active = True
|
|
127
127
|
|
|
128
128
|
self.resource_msgs: Queue[TaggedMonitoringMessage]
|
|
129
|
-
self.resource_msgs =
|
|
129
|
+
self.resource_msgs = SpawnQueue()
|
|
130
130
|
|
|
131
131
|
self.dbm_exit_event: ms.Event
|
|
132
132
|
self.dbm_exit_event = SpawnEvent()
|
|
@@ -10,11 +10,12 @@ from parsl.monitoring.radios.base import (
|
|
|
10
10
|
RadioConfig,
|
|
11
11
|
)
|
|
12
12
|
from parsl.monitoring.radios.filesystem_router import FilesystemRadioReceiver
|
|
13
|
+
from parsl.utils import RepresentationMixin
|
|
13
14
|
|
|
14
15
|
logger = logging.getLogger(__name__)
|
|
15
16
|
|
|
16
17
|
|
|
17
|
-
class FilesystemRadio(RadioConfig):
|
|
18
|
+
class FilesystemRadio(RadioConfig, RepresentationMixin):
|
|
18
19
|
"""A MonitoringRadioSender that sends messages over a shared filesystem.
|
|
19
20
|
|
|
20
21
|
The messsage directory structure is based on maildir,
|
parsl/monitoring/radios/htex.py
CHANGED
|
@@ -7,11 +7,12 @@ from parsl.monitoring.radios.base import (
|
|
|
7
7
|
MonitoringRadioSender,
|
|
8
8
|
RadioConfig,
|
|
9
9
|
)
|
|
10
|
+
from parsl.utils import RepresentationMixin
|
|
10
11
|
|
|
11
12
|
logger = logging.getLogger(__name__)
|
|
12
13
|
|
|
13
14
|
|
|
14
|
-
class HTEXRadio(RadioConfig):
|
|
15
|
+
class HTEXRadio(RadioConfig, RepresentationMixin):
|
|
15
16
|
def create_sender(self) -> MonitoringRadioSender:
|
|
16
17
|
return HTEXRadioSender()
|
|
17
18
|
|
|
@@ -5,9 +5,10 @@ from parsl.monitoring.radios.base import (
|
|
|
5
5
|
MonitoringRadioSender,
|
|
6
6
|
RadioConfig,
|
|
7
7
|
)
|
|
8
|
+
from parsl.utils import RepresentationMixin
|
|
8
9
|
|
|
9
10
|
|
|
10
|
-
class MultiprocessingQueueRadioSender(MonitoringRadioSender):
|
|
11
|
+
class MultiprocessingQueueRadioSender(MonitoringRadioSender, RepresentationMixin):
|
|
11
12
|
"""A monitoring radio which connects over a multiprocessing Queue.
|
|
12
13
|
This radio is intended to be used on the submit side, where components
|
|
13
14
|
in the submit process, or processes launched by multiprocessing, will have
|
parsl/monitoring/radios/udp.py
CHANGED
|
@@ -13,11 +13,12 @@ from parsl.monitoring.radios.base import (
|
|
|
13
13
|
RadioConfig,
|
|
14
14
|
)
|
|
15
15
|
from parsl.monitoring.radios.udp_router import start_udp_receiver
|
|
16
|
+
from parsl.utils import RepresentationMixin
|
|
16
17
|
|
|
17
18
|
logger = logging.getLogger(__name__)
|
|
18
19
|
|
|
19
20
|
|
|
20
|
-
class UDPRadio(RadioConfig):
|
|
21
|
+
class UDPRadio(RadioConfig, RepresentationMixin):
|
|
21
22
|
def __init__(self, *, port: Optional[int] = None, atexit_timeout: int = 3, address: str, debug: bool = False, hmac_digest: str = 'sha512'):
|
|
22
23
|
self.port = port
|
|
23
24
|
self.atexit_timeout = atexit_timeout
|
|
@@ -21,9 +21,9 @@ from parsl.monitoring.errors import MonitoringRouterStartError
|
|
|
21
21
|
from parsl.monitoring.radios.base import MonitoringRadioReceiver
|
|
22
22
|
from parsl.monitoring.radios.multiprocessing import MultiprocessingQueueRadioSender
|
|
23
23
|
from parsl.multiprocessing import (
|
|
24
|
-
SizedQueue,
|
|
25
24
|
SpawnEvent,
|
|
26
25
|
SpawnProcess,
|
|
26
|
+
SpawnQueue,
|
|
27
27
|
join_terminate_close_proc,
|
|
28
28
|
)
|
|
29
29
|
from parsl.process_loggers import wrap_with_logs
|
|
@@ -198,7 +198,7 @@ def start_udp_receiver(*,
|
|
|
198
198
|
hmac_digest: str) -> UDPRadioReceiver:
|
|
199
199
|
|
|
200
200
|
udp_comm_q: Queue[Union[int, str]]
|
|
201
|
-
udp_comm_q =
|
|
201
|
+
udp_comm_q = SpawnQueue(maxsize=10)
|
|
202
202
|
|
|
203
203
|
router_exit_event = SpawnEvent()
|
|
204
204
|
|
|
@@ -19,9 +19,9 @@ from parsl.monitoring.errors import MonitoringRouterStartError
|
|
|
19
19
|
from parsl.monitoring.radios.multiprocessing import MultiprocessingQueueRadioSender
|
|
20
20
|
from parsl.monitoring.types import TaggedMonitoringMessage
|
|
21
21
|
from parsl.multiprocessing import (
|
|
22
|
-
SizedQueue,
|
|
23
22
|
SpawnEvent,
|
|
24
23
|
SpawnProcess,
|
|
24
|
+
SpawnQueue,
|
|
25
25
|
join_terminate_close_proc,
|
|
26
26
|
)
|
|
27
27
|
from parsl.process_loggers import wrap_with_logs
|
|
@@ -158,7 +158,7 @@ def start_zmq_receiver(*,
|
|
|
158
158
|
port_range: Tuple[int, int],
|
|
159
159
|
logdir: str,
|
|
160
160
|
worker_debug: bool) -> ZMQRadioReceiver:
|
|
161
|
-
comm_q =
|
|
161
|
+
comm_q = SpawnQueue(maxsize=10)
|
|
162
162
|
|
|
163
163
|
router_exit_event = SpawnEvent()
|
|
164
164
|
|
parsl/multiprocessing.py
CHANGED
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
import logging
|
|
5
5
|
import multiprocessing
|
|
6
6
|
import multiprocessing.queues
|
|
7
|
-
import platform
|
|
8
7
|
from multiprocessing.context import ForkProcess as ForkProcessType
|
|
9
8
|
from multiprocessing.context import SpawnProcess as SpawnProcessType
|
|
10
9
|
from typing import Callable
|
|
@@ -21,54 +20,6 @@ SpawnEvent = SpawnContext.Event
|
|
|
21
20
|
SpawnQueue = SpawnContext.Queue
|
|
22
21
|
|
|
23
22
|
|
|
24
|
-
class MacSafeQueue(multiprocessing.queues.Queue):
|
|
25
|
-
""" Multiprocessing queues do not have qsize attributes on MacOS.
|
|
26
|
-
This is slower but more portable version of the multiprocessing Queue
|
|
27
|
-
that adds a explicit counter
|
|
28
|
-
|
|
29
|
-
Reference : https://github.com/keras-team/autokeras/commit/4ddd568b06b4045ace777bc0fb7bc18573b85a75
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
def __init__(self, *args, **kwargs):
|
|
33
|
-
if 'ctx' not in kwargs:
|
|
34
|
-
kwargs['ctx'] = multiprocessing.get_context('spawn')
|
|
35
|
-
super().__init__(*args, **kwargs)
|
|
36
|
-
self._counter = multiprocessing.Value('i', 0)
|
|
37
|
-
|
|
38
|
-
def put(self, *args, **kwargs):
|
|
39
|
-
# logger.critical("Putting item {}".format(args))
|
|
40
|
-
x = super().put(*args, **kwargs)
|
|
41
|
-
with self._counter.get_lock():
|
|
42
|
-
self._counter.value += 1
|
|
43
|
-
return x
|
|
44
|
-
|
|
45
|
-
def get(self, *args, **kwargs):
|
|
46
|
-
x = super().get(*args, **kwargs)
|
|
47
|
-
with self._counter.get_lock():
|
|
48
|
-
self._counter.value -= 1
|
|
49
|
-
# logger.critical("Getting item {}".format(x))
|
|
50
|
-
return x
|
|
51
|
-
|
|
52
|
-
def qsize(self):
|
|
53
|
-
return self._counter.value
|
|
54
|
-
|
|
55
|
-
def empty(self):
|
|
56
|
-
return not self._counter.value
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
# SizedQueue should be constructable using the same calling
|
|
60
|
-
# convention as multiprocessing.Queue but that entire signature
|
|
61
|
-
# isn't expressible in mypy 0.790
|
|
62
|
-
SizedQueue: Callable[..., multiprocessing.Queue]
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
if platform.system() != 'Darwin':
|
|
66
|
-
import multiprocessing
|
|
67
|
-
SizedQueue = SpawnQueue
|
|
68
|
-
else:
|
|
69
|
-
SizedQueue = MacSafeQueue
|
|
70
|
-
|
|
71
|
-
|
|
72
23
|
def join_terminate_close_proc(process: SpawnProcessType, *, timeout: int = 30) -> None:
|
|
73
24
|
"""Increasingly aggressively terminate a process.
|
|
74
25
|
|
parsl/providers/base.py
CHANGED
|
@@ -33,7 +33,28 @@ class ExecutionProvider(metaclass=ABCMeta):
|
|
|
33
33
|
[cancel] <--------|----+
|
|
34
34
|
|
|
|
35
35
|
+-------------------
|
|
36
|
-
|
|
36
|
+
|
|
37
|
+
In addition to the listed methods, an ExecutionProvider instance must always
|
|
38
|
+
have these attributes, which both default to `None`:
|
|
39
|
+
|
|
40
|
+
mem_per_node: Real memory to provision per node in GB.
|
|
41
|
+
|
|
42
|
+
Providers which set this attribute should ask for mem_per_node of memory
|
|
43
|
+
when provisioning resources, and set the corresponding environment
|
|
44
|
+
variable PARSL_MEMORY_GB before executing submitted commands.
|
|
45
|
+
|
|
46
|
+
If this attribute is set, executors may use it to calculate how many tasks can
|
|
47
|
+
run concurrently per node.
|
|
48
|
+
|
|
49
|
+
cores_per_node: Number of cores to provision per node.
|
|
50
|
+
|
|
51
|
+
Providers which set this attribute should ask for cores_per_node cores
|
|
52
|
+
when provisioning resources, and set the corresponding environment
|
|
53
|
+
variable PARSL_CORES before executing submitted commands.
|
|
54
|
+
|
|
55
|
+
If this attribute is set, executors may use it to calculate how many tasks can
|
|
56
|
+
run concurrently per node.
|
|
57
|
+
"""
|
|
37
58
|
|
|
38
59
|
@abstractmethod
|
|
39
60
|
def __init__(self) -> None:
|
|
@@ -44,8 +65,8 @@ class ExecutionProvider(metaclass=ABCMeta):
|
|
|
44
65
|
self.script_dir: Optional[str]
|
|
45
66
|
self.parallelism: float
|
|
46
67
|
self.resources: Dict[object, Any]
|
|
47
|
-
self.
|
|
48
|
-
self.
|
|
68
|
+
self.cores_per_node: Optional[int] = None
|
|
69
|
+
self.mem_per_node: Optional[float] = None
|
|
49
70
|
pass
|
|
50
71
|
|
|
51
72
|
@abstractmethod
|
|
@@ -111,40 +132,6 @@ class ExecutionProvider(metaclass=ABCMeta):
|
|
|
111
132
|
''' Provides the label for this provider '''
|
|
112
133
|
pass
|
|
113
134
|
|
|
114
|
-
@property
|
|
115
|
-
def mem_per_node(self) -> Optional[float]:
|
|
116
|
-
"""Real memory to provision per node in GB.
|
|
117
|
-
|
|
118
|
-
Providers which set this property should ask for mem_per_node of memory
|
|
119
|
-
when provisioning resources, and set the corresponding environment
|
|
120
|
-
variable PARSL_MEMORY_GB before executing submitted commands.
|
|
121
|
-
|
|
122
|
-
If this property is set, executors may use it to calculate how many tasks can
|
|
123
|
-
run concurrently per node.
|
|
124
|
-
"""
|
|
125
|
-
return self._mem_per_node
|
|
126
|
-
|
|
127
|
-
@mem_per_node.setter
|
|
128
|
-
def mem_per_node(self, value: float) -> None:
|
|
129
|
-
self._mem_per_node = value
|
|
130
|
-
|
|
131
|
-
@property
|
|
132
|
-
def cores_per_node(self) -> Optional[int]:
|
|
133
|
-
"""Number of cores to provision per node.
|
|
134
|
-
|
|
135
|
-
Providers which set this property should ask for cores_per_node cores
|
|
136
|
-
when provisioning resources, and set the corresponding environment
|
|
137
|
-
variable PARSL_CORES before executing submitted commands.
|
|
138
|
-
|
|
139
|
-
If this property is set, executors may use it to calculate how many tasks can
|
|
140
|
-
run concurrently per node.
|
|
141
|
-
"""
|
|
142
|
-
return self._cores_per_node
|
|
143
|
-
|
|
144
|
-
@cores_per_node.setter
|
|
145
|
-
def cores_per_node(self, value: int) -> None:
|
|
146
|
-
self._cores_per_node = value
|
|
147
|
-
|
|
148
135
|
@property
|
|
149
136
|
@abstractmethod
|
|
150
137
|
def status_polling_interval(self) -> int:
|
parsl/providers/pbspro/pbspro.py
CHANGED
|
@@ -96,7 +96,7 @@ class PBSProProvider(TorqueProvider):
|
|
|
96
96
|
|
|
97
97
|
jobs_missing = list(self.resources.keys())
|
|
98
98
|
|
|
99
|
-
retcode, stdout, stderr = self.execute_wait("qstat -f -F json {0}".format(job_id_list))
|
|
99
|
+
retcode, stdout, stderr = self.execute_wait("qstat -x -f -F json {0}".format(job_id_list))
|
|
100
100
|
|
|
101
101
|
# If qstat failed do not update job state
|
|
102
102
|
if retcode != 0:
|
parsl/serialize/__init__.py
CHANGED
|
@@ -1,16 +1,13 @@
|
|
|
1
1
|
from parsl.serialize.facade import (
|
|
2
2
|
deserialize,
|
|
3
3
|
pack_apply_message,
|
|
4
|
-
pack_res_spec_apply_message,
|
|
5
4
|
serialize,
|
|
6
5
|
unpack_apply_message,
|
|
7
|
-
unpack_res_spec_apply_message,
|
|
8
6
|
)
|
|
9
7
|
|
|
10
|
-
__all__ =
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
]
|
|
8
|
+
__all__ = (
|
|
9
|
+
"serialize",
|
|
10
|
+
"deserialize",
|
|
11
|
+
"pack_apply_message",
|
|
12
|
+
"unpack_apply_message",
|
|
13
|
+
)
|
parsl/serialize/facade.py
CHANGED
|
@@ -62,44 +62,12 @@ def pack_apply_message(func: Any, args: Any, kwargs: Any, buffer_threshold: int
|
|
|
62
62
|
return packed_buffer
|
|
63
63
|
|
|
64
64
|
|
|
65
|
-
def pack_res_spec_apply_message(func: Any, args: Any, kwargs: Any, resource_specification: Any, buffer_threshold: int = int(128 * 1e6)) -> bytes:
|
|
66
|
-
"""Serialize and pack function, parameters, and resource_specification
|
|
67
|
-
|
|
68
|
-
Parameters
|
|
69
|
-
----------
|
|
70
|
-
|
|
71
|
-
func: Function
|
|
72
|
-
A function to ship
|
|
73
|
-
|
|
74
|
-
args: Tuple/list of objects
|
|
75
|
-
positional parameters as a list
|
|
76
|
-
|
|
77
|
-
kwargs: Dict
|
|
78
|
-
Dict containing named parameters
|
|
79
|
-
|
|
80
|
-
resource_specification: Dict
|
|
81
|
-
Dict containing application resource specification
|
|
82
|
-
|
|
83
|
-
buffer_threshold: int
|
|
84
|
-
Limits buffer to specified size in bytes. Exceeding this limit would give you
|
|
85
|
-
a warning in the log. Default is 128MB.
|
|
86
|
-
"""
|
|
87
|
-
return pack_apply_message(func, args, (kwargs, resource_specification), buffer_threshold=buffer_threshold)
|
|
88
|
-
|
|
89
|
-
|
|
90
65
|
def unpack_apply_message(packed_buffer: bytes) -> List[Any]:
|
|
91
66
|
""" Unpack and deserialize function and parameters
|
|
92
67
|
"""
|
|
93
68
|
return [deserialize(buf) for buf in unpack_buffers(packed_buffer)]
|
|
94
69
|
|
|
95
70
|
|
|
96
|
-
def unpack_res_spec_apply_message(packed_buffer: bytes) -> List[Any]:
|
|
97
|
-
""" Unpack and deserialize function, parameters, and resource_specification
|
|
98
|
-
"""
|
|
99
|
-
func, args, (kwargs, resource_spec) = unpack_apply_message(packed_buffer)
|
|
100
|
-
return [func, args, kwargs, resource_spec]
|
|
101
|
-
|
|
102
|
-
|
|
103
71
|
def serialize(obj: Any, buffer_threshold: int = int(1e6)) -> bytes:
|
|
104
72
|
""" Try available serialization methods one at a time
|
|
105
73
|
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from parsl.config import Config
|
|
2
2
|
from parsl.data_provider.data_manager import default_staging
|
|
3
|
-
from parsl.data_provider.globus import GlobusStaging
|
|
4
3
|
from parsl.executors.threads import ThreadPoolExecutor
|
|
5
4
|
|
|
6
5
|
# If you are a developer running tests, make sure to update parsl/tests/configs/user_opts.py
|
|
@@ -10,19 +9,24 @@ from parsl.executors.threads import ThreadPoolExecutor
|
|
|
10
9
|
# (i.e., user_opts['swan']['username'] -> 'your_username')
|
|
11
10
|
from .user_opts import user_opts
|
|
12
11
|
|
|
13
|
-
storage_access = default_staging + [GlobusStaging(
|
|
14
|
-
endpoint_uuid=user_opts['globus']['endpoint'],
|
|
15
|
-
endpoint_path=user_opts['globus']['path']
|
|
16
|
-
)]
|
|
17
12
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
13
|
+
def fresh_config():
|
|
14
|
+
from parsl.data_provider.globus import GlobusStaging
|
|
15
|
+
|
|
16
|
+
storage_access = default_staging + [GlobusStaging(
|
|
17
|
+
endpoint_uuid=user_opts['globus']['endpoint'],
|
|
18
|
+
endpoint_path=user_opts['globus']['path']
|
|
19
|
+
)]
|
|
20
|
+
|
|
21
|
+
return Config(
|
|
22
|
+
executors=[
|
|
23
|
+
ThreadPoolExecutor(
|
|
24
|
+
label='local_threads_globus',
|
|
25
|
+
working_dir=user_opts['globus']['path'],
|
|
26
|
+
storage_access=storage_access
|
|
27
|
+
)
|
|
28
|
+
]
|
|
29
|
+
)
|
|
30
|
+
|
|
27
31
|
|
|
28
32
|
remote_writeable = user_opts['globus']['remote_writeable']
|
|
@@ -53,7 +53,7 @@ def test_simple(mem_per_worker):
|
|
|
53
53
|
# Prime a worker
|
|
54
54
|
double(5).result()
|
|
55
55
|
dfk = parsl.dfk()
|
|
56
|
-
connected = dfk.executors['htex_local'].connected_workers
|
|
56
|
+
connected = dfk.executors['htex_local'].connected_workers()
|
|
57
57
|
print("Connected : ", connected)
|
|
58
58
|
assert expected_workers == connected, "Expected {} workers, instead got {} workers".format(expected_workers,
|
|
59
59
|
connected)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Tests of the interfaces to Python's concurrent library"""
|
|
2
|
-
from pytest import mark, warns
|
|
2
|
+
from pytest import mark, raises, warns
|
|
3
3
|
|
|
4
|
-
from parsl import Config, HighThroughputExecutor
|
|
4
|
+
from parsl import Config, HighThroughputExecutor, load, python_app
|
|
5
5
|
from parsl.concurrent import ParslPoolExecutor
|
|
6
6
|
|
|
7
7
|
|
|
@@ -9,21 +9,43 @@ def f(x):
|
|
|
9
9
|
return x + 1
|
|
10
10
|
|
|
11
11
|
|
|
12
|
+
def g(x):
|
|
13
|
+
return 2 * x
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@python_app
|
|
17
|
+
def is_odd(x):
|
|
18
|
+
if x % 2 == 1:
|
|
19
|
+
return 1
|
|
20
|
+
else:
|
|
21
|
+
return 0
|
|
22
|
+
|
|
23
|
+
|
|
12
24
|
def make_config():
|
|
13
25
|
return Config(
|
|
14
26
|
executors=[
|
|
15
27
|
HighThroughputExecutor(
|
|
28
|
+
label='test_executor',
|
|
16
29
|
address="127.0.0.1",
|
|
17
30
|
max_workers_per_node=2,
|
|
18
31
|
heartbeat_period=2,
|
|
19
32
|
heartbeat_threshold=4,
|
|
20
|
-
encrypted=
|
|
33
|
+
encrypted=False,
|
|
21
34
|
)
|
|
22
35
|
],
|
|
23
36
|
strategy='none',
|
|
24
37
|
)
|
|
25
38
|
|
|
26
39
|
|
|
40
|
+
@mark.local
|
|
41
|
+
def test_init_errors():
|
|
42
|
+
with load(make_config()) as dfk, raises(ValueError, match='Specify only one of config or dfk'):
|
|
43
|
+
ParslPoolExecutor(config=make_config(), dfk=dfk)
|
|
44
|
+
|
|
45
|
+
with raises(ValueError, match='Must specify one of config or dfk'):
|
|
46
|
+
ParslPoolExecutor()
|
|
47
|
+
|
|
48
|
+
|
|
27
49
|
@mark.local
|
|
28
50
|
def test_executor():
|
|
29
51
|
my_config = make_config()
|
|
@@ -44,5 +66,31 @@ def test_executor():
|
|
|
44
66
|
# Make sure only one function was registered
|
|
45
67
|
assert exc.app_count == 1
|
|
46
68
|
|
|
69
|
+
with raises(RuntimeError, match='shut down'):
|
|
70
|
+
exc.submit(f, 1)
|
|
71
|
+
|
|
47
72
|
with warns(UserWarning):
|
|
48
73
|
ParslPoolExecutor(make_config()).shutdown(False, cancel_futures=True)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@mark.local
|
|
77
|
+
def test_with_dfk():
|
|
78
|
+
config = make_config()
|
|
79
|
+
|
|
80
|
+
with load(config) as dfk, ParslPoolExecutor(dfk=dfk, executors=['test_executor']) as exc:
|
|
81
|
+
future = exc.submit(f, 1)
|
|
82
|
+
assert future.result() == 2
|
|
83
|
+
assert exc.get_app(f).executors == ['test_executor']
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@mark.local
|
|
87
|
+
def test_chaining():
|
|
88
|
+
"""Make sure the executor functions can be chained together"""
|
|
89
|
+
config = make_config()
|
|
90
|
+
|
|
91
|
+
with ParslPoolExecutor(config) as exc:
|
|
92
|
+
future_odd = exc.submit(f, 10)
|
|
93
|
+
assert is_odd(future_odd).result()
|
|
94
|
+
|
|
95
|
+
future_even = exc.submit(g, future_odd)
|
|
96
|
+
assert not is_odd(future_even).result()
|
|
@@ -2,11 +2,17 @@ import pytest
|
|
|
2
2
|
|
|
3
3
|
import parsl
|
|
4
4
|
from parsl.app.app import python_app
|
|
5
|
-
from parsl.
|
|
5
|
+
from parsl.config import Config
|
|
6
|
+
from parsl.executors.threads import ThreadPoolExecutor
|
|
6
7
|
|
|
7
8
|
|
|
8
|
-
def
|
|
9
|
-
|
|
9
|
+
def fresh_config():
|
|
10
|
+
tpe = ThreadPoolExecutor(label='local_threads_checkpoint_periodic', max_threads=1)
|
|
11
|
+
return Config(
|
|
12
|
+
executors=[tpe],
|
|
13
|
+
checkpoint_mode='periodic',
|
|
14
|
+
checkpoint_period='00:00:02'
|
|
15
|
+
)
|
|
10
16
|
|
|
11
17
|
|
|
12
18
|
@python_app(cache=True)
|
|
@@ -25,12 +31,12 @@ def tstamp_to_seconds(line):
|
|
|
25
31
|
def test_periodic():
|
|
26
32
|
"""Test checkpointing with task_periodic behavior
|
|
27
33
|
"""
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
+
with parsl.load(fresh_config()):
|
|
35
|
+
h, m, s = map(int, parsl.dfk().config.checkpoint_period.split(":"))
|
|
36
|
+
assert h == 0, "Verify test setup"
|
|
37
|
+
assert m == 0, "Verify test setup"
|
|
38
|
+
assert s > 0, "Verify test setup"
|
|
39
|
+
sleep_for = s + 1
|
|
34
40
|
futs = [slow_double(sleep_for) for _ in range(4)]
|
|
35
41
|
[f.result() for f in futs]
|
|
36
42
|
run_dir = parsl.dfk().run_dir
|