parsl 2024.3.11__py3-none-any.whl → 2024.3.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsl/addresses.py +3 -1
- parsl/config.py +4 -0
- parsl/dataflow/dflow.py +3 -1
- parsl/executors/high_throughput/executor.py +12 -0
- parsl/executors/high_throughput/interchange.py +30 -8
- parsl/executors/high_throughput/manager_record.py +1 -0
- parsl/executors/high_throughput/process_worker_pool.py +41 -5
- parsl/executors/taskvine/executor.py +3 -2
- parsl/executors/taskvine/manager.py +1 -0
- parsl/executors/taskvine/manager_config.py +3 -4
- parsl/jobs/job_status_poller.py +4 -3
- parsl/launchers/launchers.py +6 -6
- parsl/log_utils.py +8 -4
- parsl/monitoring/db_manager.py +4 -2
- parsl/monitoring/monitoring.py +9 -52
- parsl/monitoring/visualization/plots/default/workflow_plots.py +3 -0
- parsl/monitoring/visualization/views.py +2 -1
- parsl/providers/cluster_provider.py +1 -3
- parsl/tests/configs/user_opts.py +2 -1
- parsl/tests/test_htex/test_drain.py +78 -0
- parsl/tests/test_monitoring/test_app_names.py +86 -0
- parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +1 -10
- parsl/utils.py +2 -2
- parsl/version.py +1 -1
- {parsl-2024.3.11.data → parsl-2024.3.18.data}/scripts/process_worker_pool.py +41 -5
- {parsl-2024.3.11.dist-info → parsl-2024.3.18.dist-info}/METADATA +2 -2
- {parsl-2024.3.11.dist-info → parsl-2024.3.18.dist-info}/RECORD +33 -31
- {parsl-2024.3.11.data → parsl-2024.3.18.data}/scripts/exec_parsl_function.py +0 -0
- {parsl-2024.3.11.data → parsl-2024.3.18.data}/scripts/parsl_coprocess.py +0 -0
- {parsl-2024.3.11.dist-info → parsl-2024.3.18.dist-info}/LICENSE +0 -0
- {parsl-2024.3.11.dist-info → parsl-2024.3.18.dist-info}/WHEEL +0 -0
- {parsl-2024.3.11.dist-info → parsl-2024.3.18.dist-info}/entry_points.txt +0 -0
- {parsl-2024.3.11.dist-info → parsl-2024.3.18.dist-info}/top_level.txt +0 -0
parsl/addresses.py
CHANGED
@@ -81,7 +81,9 @@ def address_by_hostname() -> str:
|
|
81
81
|
def address_by_interface(ifname: str) -> str:
|
82
82
|
"""Returns the IP address of the given interface name, e.g. 'eth0'
|
83
83
|
|
84
|
-
This is from a Stack Overflow answer:
|
84
|
+
This is taken from a Stack Overflow answer:
|
85
|
+
https://stackoverflow.com/questions/24196932/how-can-i-get-the-ip-address-of-eth0-in-python#24196955
|
86
|
+
|
85
87
|
|
86
88
|
Parameters
|
87
89
|
----------
|
parsl/config.py
CHANGED
@@ -55,6 +55,8 @@ class Config(RepresentationMixin):
|
|
55
55
|
or `None`.
|
56
56
|
If 'none' or `None`, dynamic scaling will be disabled. Default is 'simple'. The literal value `None` is
|
57
57
|
deprecated.
|
58
|
+
strategy_period : float or int, optional
|
59
|
+
How often the scaling strategy should be executed. Default is 5 seconds.
|
58
60
|
max_idletime : float, optional
|
59
61
|
The maximum idle time allowed for an executor before strategy could shut down unused blocks. Default is 120.0 seconds.
|
60
62
|
usage_tracking : bool, optional
|
@@ -88,6 +90,7 @@ class Config(RepresentationMixin):
|
|
88
90
|
retry_handler: Optional[Callable[[Exception, TaskRecord], float]] = None,
|
89
91
|
run_dir: str = 'runinfo',
|
90
92
|
strategy: Optional[str] = 'simple',
|
93
|
+
strategy_period: Union[float, int] = 5,
|
91
94
|
max_idletime: float = 120.0,
|
92
95
|
monitoring: Optional[MonitoringHub] = None,
|
93
96
|
usage_tracking: bool = False,
|
@@ -121,6 +124,7 @@ class Config(RepresentationMixin):
|
|
121
124
|
self.retry_handler = retry_handler
|
122
125
|
self.run_dir = run_dir
|
123
126
|
self.strategy = strategy
|
127
|
+
self.strategy_period = strategy_period
|
124
128
|
self.max_idletime = max_idletime
|
125
129
|
self.usage_tracking = usage_tracking
|
126
130
|
self.initialize_logging = initialize_logging
|
parsl/dataflow/dflow.py
CHANGED
@@ -178,6 +178,7 @@ class DataFlowKernel:
|
|
178
178
|
# this must be set before executors are added since add_executors calls
|
179
179
|
# job_status_poller.add_executors.
|
180
180
|
self.job_status_poller = JobStatusPoller(strategy=self.config.strategy,
|
181
|
+
strategy_period=self.config.strategy_period,
|
181
182
|
max_idletime=self.config.max_idletime,
|
182
183
|
dfk=self)
|
183
184
|
|
@@ -1170,7 +1171,8 @@ class DataFlowKernel:
|
|
1170
1171
|
fut = task_record['app_fu']
|
1171
1172
|
if not fut.done():
|
1172
1173
|
fut.exception()
|
1173
|
-
# now app future is done, poll until DFK state is final: a
|
1174
|
+
# now app future is done, poll until DFK state is final: a
|
1175
|
+
# DFK state being final and the app future being done do not imply each other.
|
1174
1176
|
while task_record['status'] not in FINAL_STATES:
|
1175
1177
|
time.sleep(0.1)
|
1176
1178
|
|
@@ -55,6 +55,7 @@ DEFAULT_LAUNCH_CMD = ("process_worker_pool.py {debug} {max_workers_per_node} "
|
|
55
55
|
"--hb_period={heartbeat_period} "
|
56
56
|
"{address_probe_timeout_string} "
|
57
57
|
"--hb_threshold={heartbeat_threshold} "
|
58
|
+
"--drain_period={drain_period} "
|
58
59
|
"--cpu-affinity {cpu_affinity} "
|
59
60
|
"{enable_mpi_mode} "
|
60
61
|
"--mpi-launcher={mpi_launcher} "
|
@@ -201,6 +202,14 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin):
|
|
201
202
|
Timeout period to be used by the executor components in milliseconds. Increasing poll_periods
|
202
203
|
trades performance for cpu efficiency. Default: 10ms
|
203
204
|
|
205
|
+
drain_period : int
|
206
|
+
The number of seconds after start when workers will begin to drain
|
207
|
+
and then exit. Set this to a time that is slightly less than the
|
208
|
+
maximum walltime of batch jobs to avoid killing tasks while they
|
209
|
+
execute. For example, you could set this to the walltime minus a grace
|
210
|
+
period for the batch job to start the workers, minus the expected
|
211
|
+
maximum length of an individual task.
|
212
|
+
|
204
213
|
worker_logdir_root : string
|
205
214
|
In case of a remote file system, specify the path to where logs will be kept.
|
206
215
|
|
@@ -240,6 +249,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin):
|
|
240
249
|
prefetch_capacity: int = 0,
|
241
250
|
heartbeat_threshold: int = 120,
|
242
251
|
heartbeat_period: int = 30,
|
252
|
+
drain_period: Optional[int] = None,
|
243
253
|
poll_period: int = 10,
|
244
254
|
address_probe_timeout: Optional[int] = None,
|
245
255
|
worker_logdir_root: Optional[str] = None,
|
@@ -303,6 +313,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin):
|
|
303
313
|
self.interchange_port_range = interchange_port_range
|
304
314
|
self.heartbeat_threshold = heartbeat_threshold
|
305
315
|
self.heartbeat_period = heartbeat_period
|
316
|
+
self.drain_period = drain_period
|
306
317
|
self.poll_period = poll_period
|
307
318
|
self.run_dir = '.'
|
308
319
|
self.worker_logdir_root = worker_logdir_root
|
@@ -376,6 +387,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin):
|
|
376
387
|
nodes_per_block=self.provider.nodes_per_block,
|
377
388
|
heartbeat_period=self.heartbeat_period,
|
378
389
|
heartbeat_threshold=self.heartbeat_threshold,
|
390
|
+
drain_period=self.drain_period,
|
379
391
|
poll_period=self.poll_period,
|
380
392
|
cert_dir=self.cert_dir,
|
381
393
|
logdir=self.worker_logdir,
|
@@ -28,6 +28,7 @@ from parsl.process_loggers import wrap_with_logs
|
|
28
28
|
|
29
29
|
|
30
30
|
PKL_HEARTBEAT_CODE = pickle.dumps((2 ** 32) - 1)
|
31
|
+
PKL_DRAINED_CODE = pickle.dumps((2 ** 32) - 2)
|
31
32
|
|
32
33
|
LOGGER_NAME = "interchange"
|
33
34
|
logger = logging.getLogger(LOGGER_NAME)
|
@@ -101,12 +102,12 @@ class Interchange:
|
|
101
102
|
This is overridden when the worker_ports option is set. Default: (54000, 55000)
|
102
103
|
|
103
104
|
hub_address : str
|
104
|
-
The
|
105
|
-
|
105
|
+
The IP address at which the interchange can send info about managers to when monitoring is enabled.
|
106
|
+
Default: None (meaning monitoring disabled)
|
106
107
|
|
107
108
|
hub_port : str
|
108
109
|
The port at which the interchange can send info about managers to when monitoring is enabled.
|
109
|
-
|
110
|
+
Default: None (meaning monitoring disabled)
|
110
111
|
|
111
112
|
heartbeat_threshold : int
|
112
113
|
Number of seconds since the last heartbeat after which worker is considered lost.
|
@@ -244,19 +245,19 @@ class Interchange:
|
|
244
245
|
|
245
246
|
def _create_monitoring_channel(self) -> Optional[zmq.Socket]:
|
246
247
|
if self.hub_address and self.hub_port:
|
247
|
-
logger.info("Connecting to
|
248
|
+
logger.info("Connecting to MonitoringHub")
|
248
249
|
# This is a one-off because monitoring is unencrypted
|
249
250
|
hub_channel = zmq.Context().socket(zmq.DEALER)
|
250
251
|
hub_channel.set_hwm(0)
|
251
252
|
hub_channel.connect("tcp://{}:{}".format(self.hub_address, self.hub_port))
|
252
|
-
logger.info("
|
253
|
+
logger.info("Connected to MonitoringHub")
|
253
254
|
return hub_channel
|
254
255
|
else:
|
255
256
|
return None
|
256
257
|
|
257
258
|
def _send_monitoring_info(self, hub_channel: Optional[zmq.Socket], manager: ManagerRecord) -> None:
|
258
259
|
if hub_channel:
|
259
|
-
logger.info("Sending message {} to
|
260
|
+
logger.info("Sending message {} to MonitoringHub".format(manager))
|
260
261
|
|
261
262
|
d: Dict = cast(Dict, manager.copy())
|
262
263
|
d['timestamp'] = datetime.datetime.now()
|
@@ -308,7 +309,8 @@ class Interchange:
|
|
308
309
|
'worker_count': m['worker_count'],
|
309
310
|
'tasks': len(m['tasks']),
|
310
311
|
'idle_duration': idle_duration,
|
311
|
-
'active': m['active']
|
312
|
+
'active': m['active'],
|
313
|
+
'draining': m['draining']}
|
312
314
|
reply.append(resp)
|
313
315
|
|
314
316
|
elif command_req.startswith("HOLD_WORKER"):
|
@@ -385,6 +387,7 @@ class Interchange:
|
|
385
387
|
self.process_task_outgoing_incoming(interesting_managers, hub_channel, kill_event)
|
386
388
|
self.process_results_incoming(interesting_managers, hub_channel)
|
387
389
|
self.expire_bad_managers(interesting_managers, hub_channel)
|
390
|
+
self.expire_drained_managers(interesting_managers, hub_channel)
|
388
391
|
self.process_tasks_to_send(interesting_managers)
|
389
392
|
|
390
393
|
self.zmq_context.destroy()
|
@@ -431,6 +434,7 @@ class Interchange:
|
|
431
434
|
'max_capacity': 0,
|
432
435
|
'worker_count': 0,
|
433
436
|
'active': True,
|
437
|
+
'draining': False,
|
434
438
|
'tasks': []}
|
435
439
|
self.connected_block_history.append(msg['block_id'])
|
436
440
|
|
@@ -469,10 +473,28 @@ class Interchange:
|
|
469
473
|
self._ready_managers[manager_id]['last_heartbeat'] = time.time()
|
470
474
|
logger.debug("Manager {!r} sent heartbeat via tasks connection".format(manager_id))
|
471
475
|
self.task_outgoing.send_multipart([manager_id, b'', PKL_HEARTBEAT_CODE])
|
476
|
+
elif msg['type'] == 'drain':
|
477
|
+
self._ready_managers[manager_id]['draining'] = True
|
478
|
+
logger.debug(f"Manager {manager_id!r} requested drain")
|
472
479
|
else:
|
473
480
|
logger.error(f"Unexpected message type received from manager: {msg['type']}")
|
474
481
|
logger.debug("leaving task_outgoing section")
|
475
482
|
|
483
|
+
def expire_drained_managers(self, interesting_managers: Set[bytes], hub_channel: Optional[zmq.Socket]) -> None:
|
484
|
+
|
485
|
+
for manager_id in list(interesting_managers):
|
486
|
+
# is it always true that a draining manager will be in interesting managers?
|
487
|
+
# i think so because it will have outstanding capacity?
|
488
|
+
m = self._ready_managers[manager_id]
|
489
|
+
if m['draining'] and len(m['tasks']) == 0:
|
490
|
+
logger.info(f"Manager {manager_id!r} is drained - sending drained message to manager")
|
491
|
+
self.task_outgoing.send_multipart([manager_id, b'', PKL_DRAINED_CODE])
|
492
|
+
interesting_managers.remove(manager_id)
|
493
|
+
self._ready_managers.pop(manager_id)
|
494
|
+
|
495
|
+
m['active'] = False
|
496
|
+
self._send_monitoring_info(hub_channel, m)
|
497
|
+
|
476
498
|
def process_tasks_to_send(self, interesting_managers: Set[bytes]) -> None:
|
477
499
|
# Check if there are tasks that could be sent to managers
|
478
500
|
|
@@ -490,7 +512,7 @@ class Interchange:
|
|
490
512
|
tasks_inflight = len(m['tasks'])
|
491
513
|
real_capacity = m['max_capacity'] - tasks_inflight
|
492
514
|
|
493
|
-
if (real_capacity and m['active']):
|
515
|
+
if (real_capacity and m['active'] and not m['draining']):
|
494
516
|
tasks = self.get_tasks(real_capacity)
|
495
517
|
if tasks:
|
496
518
|
self.task_outgoing.send_multipart([manager_id, b'', pickle.dumps(tasks)])
|
@@ -36,6 +36,7 @@ from parsl.executors.high_throughput.mpi_resource_management import (
|
|
36
36
|
from parsl.executors.high_throughput.mpi_prefix_composer import compose_all, VALID_LAUNCHERS
|
37
37
|
|
38
38
|
HEARTBEAT_CODE = (2 ** 32) - 1
|
39
|
+
DRAINED_CODE = (2 ** 32) - 2
|
39
40
|
|
40
41
|
|
41
42
|
class Manager:
|
@@ -73,7 +74,8 @@ class Manager:
|
|
73
74
|
enable_mpi_mode: bool = False,
|
74
75
|
mpi_launcher: str = "mpiexec",
|
75
76
|
available_accelerators: Sequence[str],
|
76
|
-
cert_dir: Optional[str]
|
77
|
+
cert_dir: Optional[str],
|
78
|
+
drain_period: Optional[int]):
|
77
79
|
"""
|
78
80
|
Parameters
|
79
81
|
----------
|
@@ -138,6 +140,9 @@ class Manager:
|
|
138
140
|
|
139
141
|
cert_dir : str | None
|
140
142
|
Path to the certificate directory.
|
143
|
+
|
144
|
+
drain_period: int | None
|
145
|
+
Number of seconds to drain after TODO: could be a nicer timespec involving m,s,h qualifiers for user friendliness?
|
141
146
|
"""
|
142
147
|
|
143
148
|
logger.info("Manager initializing")
|
@@ -227,6 +232,14 @@ class Manager:
|
|
227
232
|
self.heartbeat_period = heartbeat_period
|
228
233
|
self.heartbeat_threshold = heartbeat_threshold
|
229
234
|
self.poll_period = poll_period
|
235
|
+
|
236
|
+
self.drain_time: float
|
237
|
+
if drain_period:
|
238
|
+
self.drain_time = self._start_time + drain_period
|
239
|
+
logger.info(f"Will request drain at {self.drain_time}")
|
240
|
+
else:
|
241
|
+
self.drain_time = float('inf')
|
242
|
+
|
230
243
|
self.cpu_affinity = cpu_affinity
|
231
244
|
|
232
245
|
# Define accelerator available, adjust worker count accordingly
|
@@ -262,10 +275,19 @@ class Manager:
|
|
262
275
|
""" Send heartbeat to the incoming task queue
|
263
276
|
"""
|
264
277
|
msg = {'type': 'heartbeat'}
|
278
|
+
# don't need to dumps and encode this every time - could do as a global on import?
|
265
279
|
b_msg = json.dumps(msg).encode('utf-8')
|
266
280
|
self.task_incoming.send(b_msg)
|
267
281
|
logger.debug("Sent heartbeat")
|
268
282
|
|
283
|
+
def drain_to_incoming(self):
|
284
|
+
""" Send heartbeat to the incoming task queue
|
285
|
+
"""
|
286
|
+
msg = {'type': 'drain'}
|
287
|
+
b_msg = json.dumps(msg).encode('utf-8')
|
288
|
+
self.task_incoming.send(b_msg)
|
289
|
+
logger.debug("Sent drain")
|
290
|
+
|
269
291
|
@wrap_with_logs
|
270
292
|
def pull_tasks(self, kill_event):
|
271
293
|
""" Pull tasks from the incoming tasks zmq pipe onto the internal
|
@@ -298,6 +320,7 @@ class Manager:
|
|
298
320
|
# time here are correctly copy-pasted from the relevant if
|
299
321
|
# statements.
|
300
322
|
next_interesting_event_time = min(last_beat + self.heartbeat_period,
|
323
|
+
self.drain_time,
|
301
324
|
last_interchange_contact + self.heartbeat_threshold)
|
302
325
|
try:
|
303
326
|
pending_task_count = self.pending_task_queue.qsize()
|
@@ -312,6 +335,14 @@ class Manager:
|
|
312
335
|
self.heartbeat_to_incoming()
|
313
336
|
last_beat = time.time()
|
314
337
|
|
338
|
+
if self.drain_time and time.time() > self.drain_time:
|
339
|
+
logger.info("Requesting drain")
|
340
|
+
self.drain_to_incoming()
|
341
|
+
self.drain_time = None
|
342
|
+
# This will start the pool draining...
|
343
|
+
# Drained exit behaviour does not happen here. It will be
|
344
|
+
# driven by the interchange sending a DRAINED_CODE message.
|
345
|
+
|
315
346
|
poll_duration_s = max(0, next_interesting_event_time - time.time())
|
316
347
|
socks = dict(poller.poll(timeout=poll_duration_s * 1000))
|
317
348
|
|
@@ -322,7 +353,9 @@ class Manager:
|
|
322
353
|
|
323
354
|
if tasks == HEARTBEAT_CODE:
|
324
355
|
logger.debug("Got heartbeat from interchange")
|
325
|
-
|
356
|
+
elif tasks == DRAINED_CODE:
|
357
|
+
logger.info("Got fulled drained message from interchange - setting kill flag")
|
358
|
+
kill_event.set()
|
326
359
|
else:
|
327
360
|
task_recv_counter += len(tasks)
|
328
361
|
logger.debug("Got executor tasks: {}, cumulative count of tasks: {}".format([t['task_id'] for t in tasks], task_recv_counter))
|
@@ -490,9 +523,8 @@ class Manager:
|
|
490
523
|
self._worker_watchdog_thread.start()
|
491
524
|
self._monitoring_handler_thread.start()
|
492
525
|
|
493
|
-
logger.info("
|
526
|
+
logger.info("Manager threads started")
|
494
527
|
|
495
|
-
# TODO : Add mechanism in this loop to stop the worker pool
|
496
528
|
# This might need a multiprocessing event to signal back.
|
497
529
|
self._kill_event.wait()
|
498
530
|
logger.critical("Received kill event, terminating worker processes")
|
@@ -804,6 +836,8 @@ if __name__ == "__main__":
|
|
804
836
|
help="Heartbeat period in seconds. Uses manager default unless set")
|
805
837
|
parser.add_argument("--hb_threshold", default=120,
|
806
838
|
help="Heartbeat threshold in seconds. Uses manager default unless set")
|
839
|
+
parser.add_argument("--drain_period", default=None,
|
840
|
+
help="Drain this pool after specified number of seconds. By default, does not drain.")
|
807
841
|
parser.add_argument("--address_probe_timeout", default=30,
|
808
842
|
help="Timeout to probe for viable address to interchange. Default: 30s")
|
809
843
|
parser.add_argument("--poll", default=10,
|
@@ -824,7 +858,7 @@ if __name__ == "__main__":
|
|
824
858
|
required=True,
|
825
859
|
help="Whether/how workers should control CPU affinity.")
|
826
860
|
parser.add_argument("--available-accelerators", type=str, nargs="*",
|
827
|
-
help="Names of available accelerators")
|
861
|
+
help="Names of available accelerators, if not given assumed to be zero accelerators available", default=[])
|
828
862
|
parser.add_argument("--enable_mpi_mode", action='store_true',
|
829
863
|
help="Enable MPI mode")
|
830
864
|
parser.add_argument("--mpi-launcher", type=str, choices=VALID_LAUNCHERS,
|
@@ -856,6 +890,7 @@ if __name__ == "__main__":
|
|
856
890
|
logger.info("Prefetch capacity: {}".format(args.prefetch_capacity))
|
857
891
|
logger.info("Heartbeat threshold: {}".format(args.hb_threshold))
|
858
892
|
logger.info("Heartbeat period: {}".format(args.hb_period))
|
893
|
+
logger.info("Drain period: {}".format(args.drain_period))
|
859
894
|
logger.info("CPU affinity: {}".format(args.cpu_affinity))
|
860
895
|
logger.info("Accelerators: {}".format(" ".join(args.available_accelerators)))
|
861
896
|
logger.info("enable_mpi_mode: {}".format(args.enable_mpi_mode))
|
@@ -876,6 +911,7 @@ if __name__ == "__main__":
|
|
876
911
|
prefetch_capacity=int(args.prefetch_capacity),
|
877
912
|
heartbeat_threshold=int(args.hb_threshold),
|
878
913
|
heartbeat_period=int(args.hb_period),
|
914
|
+
drain_period=None if args.drain_period == "None" else int(args.drain_period),
|
879
915
|
poll_period=int(args.poll),
|
880
916
|
cpu_affinity=args.cpu_affinity,
|
881
917
|
enable_mpi_mode=args.enable_mpi_mode,
|
@@ -196,8 +196,9 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
|
|
196
196
|
if self.manager_config.port == 0 and self.manager_config.project_name is None:
|
197
197
|
self.manager_config.project_name = "parsl-vine-" + str(uuid.uuid4())
|
198
198
|
|
199
|
-
# guess the host name if the project name is not given
|
200
|
-
|
199
|
+
# guess the host name if the project name is not given and none has been supplied
|
200
|
+
# explicitly in the manager config.
|
201
|
+
if not self.manager_config.project_name and self.manager_config.address is None:
|
201
202
|
self.manager_config.address = get_any_address()
|
202
203
|
|
203
204
|
# Factory communication settings are overridden by manager communication settings.
|
@@ -376,6 +376,7 @@ def _taskvine_submit_wait(ready_task_queue=None,
|
|
376
376
|
task_out_file = parsl_file_name_to_vine_file[spec.parsl_name]
|
377
377
|
else:
|
378
378
|
task_out_file = m.declare_file(spec.parsl_name, cache=spec.cache, peer_transfer=True)
|
379
|
+
parsl_file_name_to_vine_file[spec.parsl_name] = task_out_file
|
379
380
|
t.add_output(task_out_file, spec.parsl_name)
|
380
381
|
|
381
382
|
# Submit the task to the TaskVine object
|
@@ -1,4 +1,3 @@
|
|
1
|
-
import socket
|
2
1
|
from dataclasses import dataclass
|
3
2
|
from typing import Optional
|
4
3
|
|
@@ -23,9 +22,9 @@ class TaskVineManagerConfig:
|
|
23
22
|
A value of 0 means TaskVine chooses any available port.
|
24
23
|
Default is VINE_DEFAULT_PORT.
|
25
24
|
|
26
|
-
address: str
|
25
|
+
address: Optional[str]
|
27
26
|
Address of the local machine.
|
28
|
-
|
27
|
+
If None, socket.gethostname() will be used to determine the address.
|
29
28
|
|
30
29
|
project_name: Optional[str]
|
31
30
|
If given, TaskVine will periodically report its status and performance
|
@@ -161,7 +160,7 @@ class TaskVineManagerConfig:
|
|
161
160
|
|
162
161
|
# Connection and communication settings
|
163
162
|
port: int = VINE_DEFAULT_PORT
|
164
|
-
address: str =
|
163
|
+
address: Optional[str] = None
|
165
164
|
project_name: Optional[str] = None
|
166
165
|
project_password_file: Optional[str] = None
|
167
166
|
|
parsl/jobs/job_status_poller.py
CHANGED
@@ -2,7 +2,7 @@ import logging
|
|
2
2
|
import parsl
|
3
3
|
import time
|
4
4
|
import zmq
|
5
|
-
from typing import Dict, List, Sequence, Optional
|
5
|
+
from typing import Dict, List, Sequence, Optional, Union
|
6
6
|
|
7
7
|
from parsl.jobs.states import JobStatus, JobState
|
8
8
|
from parsl.jobs.strategy import Strategy
|
@@ -106,13 +106,14 @@ class PollItem:
|
|
106
106
|
|
107
107
|
|
108
108
|
class JobStatusPoller(Timer):
|
109
|
-
def __init__(self, strategy: Optional[str]
|
109
|
+
def __init__(self, *, strategy: Optional[str], max_idletime: float,
|
110
|
+
strategy_period: Union[float, int],
|
110
111
|
dfk: Optional["parsl.dataflow.dflow.DataFlowKernel"] = None) -> None:
|
111
112
|
self._poll_items = [] # type: List[PollItem]
|
112
113
|
self.dfk = dfk
|
113
114
|
self._strategy = Strategy(strategy=strategy,
|
114
115
|
max_idletime=max_idletime)
|
115
|
-
super().__init__(self.poll, interval=
|
116
|
+
super().__init__(self.poll, interval=strategy_period, name="JobStatusPoller")
|
116
117
|
|
117
118
|
def poll(self) -> None:
|
118
119
|
self._update_state()
|
parsl/launchers/launchers.py
CHANGED
@@ -8,16 +8,16 @@ logger = logging.getLogger(__name__)
|
|
8
8
|
class SimpleLauncher(Launcher):
|
9
9
|
""" Does no wrapping. Just returns the command as-is
|
10
10
|
"""
|
11
|
-
def
|
11
|
+
def __init__(self, debug: bool = True) -> None:
|
12
12
|
super().__init__(debug=debug)
|
13
13
|
|
14
14
|
def __call__(self, command: str, tasks_per_node: int, nodes_per_block: int) -> str:
|
15
|
-
"""
|
16
|
-
Args:
|
17
|
-
- command (string): The command string to be launched
|
18
|
-
- task_block (string) : bash evaluated string.
|
19
15
|
|
20
|
-
|
16
|
+
if nodes_per_block > 1:
|
17
|
+
logger.warning('Simple Launcher only supports single node per block. '
|
18
|
+
f'Requested nodes: {nodes_per_block}. '
|
19
|
+
'You may be getting fewer workers than expected')
|
20
|
+
|
21
21
|
return command
|
22
22
|
|
23
23
|
|
parsl/log_utils.py
CHANGED
@@ -28,7 +28,7 @@ DEFAULT_FORMAT = (
|
|
28
28
|
def set_stream_logger(name: str = 'parsl',
|
29
29
|
level: int = logging.DEBUG,
|
30
30
|
format_string: Optional[str] = None,
|
31
|
-
stream: Optional[io.TextIOWrapper] = None) ->
|
31
|
+
stream: Optional[io.TextIOWrapper] = None) -> logging.Logger:
|
32
32
|
"""Add a stream log handler.
|
33
33
|
|
34
34
|
Args:
|
@@ -39,7 +39,7 @@ def set_stream_logger(name: str = 'parsl',
|
|
39
39
|
If not specified, the default stream for logging.StreamHandler is used.
|
40
40
|
|
41
41
|
Returns:
|
42
|
-
-
|
42
|
+
- logger for specified name
|
43
43
|
"""
|
44
44
|
if format_string is None:
|
45
45
|
# format_string = "%(asctime)s %(name)s [%(levelname)s] Thread:%(thread)d %(message)s"
|
@@ -59,12 +59,14 @@ def set_stream_logger(name: str = 'parsl',
|
|
59
59
|
futures_logger = logging.getLogger("concurrent.futures")
|
60
60
|
futures_logger.addHandler(handler)
|
61
61
|
|
62
|
+
return logger
|
63
|
+
|
62
64
|
|
63
65
|
@typeguard.typechecked
|
64
66
|
def set_file_logger(filename: str,
|
65
67
|
name: str = 'parsl',
|
66
68
|
level: int = logging.DEBUG,
|
67
|
-
format_string: Optional[str] = None) ->
|
69
|
+
format_string: Optional[str] = None) -> logging.Logger:
|
68
70
|
"""Add a file log handler.
|
69
71
|
|
70
72
|
Args:
|
@@ -74,7 +76,7 @@ def set_file_logger(filename: str,
|
|
74
76
|
- format_string (string): Set the format string
|
75
77
|
|
76
78
|
Returns:
|
77
|
-
-
|
79
|
+
- logger for specified name
|
78
80
|
"""
|
79
81
|
if format_string is None:
|
80
82
|
format_string = DEFAULT_FORMAT
|
@@ -91,3 +93,5 @@ def set_file_logger(filename: str,
|
|
91
93
|
# concurrent.futures
|
92
94
|
futures_logger = logging.getLogger("concurrent.futures")
|
93
95
|
futures_logger.addHandler(handler)
|
96
|
+
|
97
|
+
return logger
|
parsl/monitoring/db_manager.py
CHANGED
@@ -633,7 +633,8 @@ class DatabaseManager:
|
|
633
633
|
# if retried - for example, the database being locked because someone else is readying
|
634
634
|
# the tables we are trying to write to. If that assumption is wrong, then this loop
|
635
635
|
# may go on forever.
|
636
|
-
logger.warning("Got a database OperationalError.
|
636
|
+
logger.warning("Got a database OperationalError. "
|
637
|
+
"Ignoring and retrying on the assumption that it is recoverable: {}".format(e))
|
637
638
|
self.db.rollback()
|
638
639
|
time.sleep(1) # hard coded 1s wait - this should be configurable or exponential backoff or something
|
639
640
|
|
@@ -660,7 +661,8 @@ class DatabaseManager:
|
|
660
661
|
done = True
|
661
662
|
except sa.exc.OperationalError as e:
|
662
663
|
# hoping that this is a database locked error during _update, not some other problem
|
663
|
-
logger.warning("Got a database OperationalError.
|
664
|
+
logger.warning("Got a database OperationalError. "
|
665
|
+
"Ignoring and retrying on the assumption that it is recoverable: {}".format(e))
|
664
666
|
self.db.rollback()
|
665
667
|
time.sleep(1) # hard coded 1s wait - this should be configurable or exponential backoff or something
|
666
668
|
except KeyboardInterrupt:
|
parsl/monitoring/monitoring.py
CHANGED
@@ -15,6 +15,7 @@ import parsl.monitoring.remote
|
|
15
15
|
from parsl.multiprocessing import ForkProcess, SizedQueue
|
16
16
|
from multiprocessing import Process
|
17
17
|
from multiprocessing.queues import Queue
|
18
|
+
from parsl.log_utils import set_file_logger
|
18
19
|
from parsl.utils import RepresentationMixin
|
19
20
|
from parsl.process_loggers import wrap_with_logs
|
20
21
|
from parsl.utils import setproctitle
|
@@ -38,40 +39,6 @@ else:
|
|
38
39
|
logger = logging.getLogger(__name__)
|
39
40
|
|
40
41
|
|
41
|
-
def start_file_logger(filename: str, name: str = 'monitoring', level: int = logging.DEBUG, format_string: Optional[str] = None) -> logging.Logger:
|
42
|
-
"""Add a stream log handler.
|
43
|
-
|
44
|
-
Parameters
|
45
|
-
---------
|
46
|
-
|
47
|
-
filename: string
|
48
|
-
Name of the file to write logs to. Required.
|
49
|
-
name: string
|
50
|
-
Logger name.
|
51
|
-
level: logging.LEVEL
|
52
|
-
Set the logging level. Default=logging.DEBUG
|
53
|
-
- format_string (string): Set the format string
|
54
|
-
format_string: string
|
55
|
-
Format string to use.
|
56
|
-
|
57
|
-
Returns
|
58
|
-
-------
|
59
|
-
None.
|
60
|
-
"""
|
61
|
-
if format_string is None:
|
62
|
-
format_string = "%(asctime)s.%(msecs)03d %(name)s:%(lineno)d [%(levelname)s] %(message)s"
|
63
|
-
|
64
|
-
logger = logging.getLogger(name)
|
65
|
-
logger.setLevel(level)
|
66
|
-
logger.propagate = False
|
67
|
-
handler = logging.FileHandler(filename)
|
68
|
-
handler.setLevel(level)
|
69
|
-
formatter = logging.Formatter(format_string, datefmt='%Y-%m-%d %H:%M:%S')
|
70
|
-
handler.setFormatter(formatter)
|
71
|
-
logger.addHandler(handler)
|
72
|
-
return logger
|
73
|
-
|
74
|
-
|
75
42
|
@typeguard.typechecked
|
76
43
|
class MonitoringHub(RepresentationMixin):
|
77
44
|
def __init__(self,
|
@@ -79,9 +46,6 @@ class MonitoringHub(RepresentationMixin):
|
|
79
46
|
hub_port: Optional[int] = None,
|
80
47
|
hub_port_range: Tuple[int, int] = (55050, 56000),
|
81
48
|
|
82
|
-
client_address: str = "127.0.0.1",
|
83
|
-
client_port_range: Tuple[int, int] = (55000, 56000),
|
84
|
-
|
85
49
|
workflow_name: Optional[str] = None,
|
86
50
|
workflow_version: Optional[str] = None,
|
87
51
|
logging_endpoint: Optional[str] = None,
|
@@ -106,11 +70,6 @@ class MonitoringHub(RepresentationMixin):
|
|
106
70
|
to deliver monitoring messages to the monitoring router.
|
107
71
|
Note that despite the similar name, this is not related to hub_port.
|
108
72
|
Default: (55050, 56000)
|
109
|
-
client_address : str
|
110
|
-
The ip address at which the dfk will be able to reach Hub. Default: "127.0.0.1"
|
111
|
-
client_port_range : tuple(int, int)
|
112
|
-
The MonitoringHub picks ports at random from the range which will be used by Hub.
|
113
|
-
Default: (55000, 56000)
|
114
73
|
workflow_name : str
|
115
74
|
The name for the workflow. Default to the name of the parsl script
|
116
75
|
workflow_version : str
|
@@ -145,9 +104,6 @@ class MonitoringHub(RepresentationMixin):
|
|
145
104
|
if _db_manager_excepts:
|
146
105
|
raise _db_manager_excepts
|
147
106
|
|
148
|
-
self.client_address = client_address
|
149
|
-
self.client_port_range = client_port_range
|
150
|
-
|
151
107
|
self.hub_address = hub_address
|
152
108
|
self.hub_port = hub_port
|
153
109
|
self.hub_port_range = hub_port_range
|
@@ -337,9 +293,9 @@ class MonitoringHub(RepresentationMixin):
|
|
337
293
|
|
338
294
|
@wrap_with_logs
|
339
295
|
def filesystem_receiver(logdir: str, q: "queue.Queue[AddressedMonitoringMessage]", run_dir: str) -> None:
|
340
|
-
logger =
|
341
|
-
|
342
|
-
|
296
|
+
logger = set_file_logger("{}/monitoring_filesystem_radio.log".format(logdir),
|
297
|
+
name="monitoring_filesystem_radio",
|
298
|
+
level=logging.INFO)
|
343
299
|
|
344
300
|
logger.info("Starting filesystem radio receiver")
|
345
301
|
setproctitle("parsl: monitoring filesystem receiver")
|
@@ -405,9 +361,9 @@ class MonitoringRouter:
|
|
405
361
|
|
406
362
|
"""
|
407
363
|
os.makedirs(logdir, exist_ok=True)
|
408
|
-
self.logger =
|
409
|
-
|
410
|
-
|
364
|
+
self.logger = set_file_logger("{}/monitoring_router.log".format(logdir),
|
365
|
+
name="monitoring_router",
|
366
|
+
level=logging_level)
|
411
367
|
self.logger.debug("Monitoring router starting")
|
412
368
|
|
413
369
|
self.hub_address = hub_address
|
@@ -493,7 +449,8 @@ class MonitoringRouter:
|
|
493
449
|
# but there is no verification that the message
|
494
450
|
# received from ic_channel.recv_pyobj() is actually
|
495
451
|
# of that type.
|
496
|
-
self.logger.error(
|
452
|
+
self.logger.error("Discarding message " # type: ignore[unreachable]
|
453
|
+
f"from interchange with unknown type {msg[0].value}")
|
497
454
|
except zmq.Again:
|
498
455
|
pass
|
499
456
|
except Exception:
|
@@ -27,6 +27,9 @@ gantt_colors = {'unsched': 'rgb(240, 240, 240)',
|
|
27
27
|
|
28
28
|
def task_gantt_plot(df_task, df_status, time_completed=None):
|
29
29
|
|
30
|
+
if df_task.empty:
|
31
|
+
return None
|
32
|
+
|
30
33
|
# if the workflow is not recorded as completed, then assume
|
31
34
|
# that tasks should continue in their last state until now,
|
32
35
|
# rather than the workflow end time.
|
@@ -8,7 +8,8 @@ from parsl.monitoring.visualization.models import Workflow, Task, Status, db
|
|
8
8
|
|
9
9
|
from parsl.monitoring.visualization.plots.default.workflow_plots import task_gantt_plot, task_per_app_plot, workflow_dag_plot
|
10
10
|
from parsl.monitoring.visualization.plots.default.task_plots import time_series_memory_per_task_plot
|
11
|
-
from parsl.monitoring.visualization.plots.default.workflow_resource_plots import resource_distribution_plot,
|
11
|
+
from parsl.monitoring.visualization.plots.default.workflow_resource_plots import (resource_distribution_plot,
|
12
|
+
resource_efficiency, worker_efficiency)
|
12
13
|
|
13
14
|
dummy = True
|
14
15
|
|
@@ -91,7 +91,7 @@ class ClusterProvider(ExecutionProvider):
|
|
91
91
|
- configs (dict) : configs that get pushed into the template
|
92
92
|
|
93
93
|
Returns:
|
94
|
-
-
|
94
|
+
- None
|
95
95
|
|
96
96
|
Raises:
|
97
97
|
SchedulerMissingArgs : If template is missing args
|
@@ -117,8 +117,6 @@ class ClusterProvider(ExecutionProvider):
|
|
117
117
|
logger.error("Uncategorized error: %s", e)
|
118
118
|
raise e
|
119
119
|
|
120
|
-
return True
|
121
|
-
|
122
120
|
@abstractmethod
|
123
121
|
def _status(self):
|
124
122
|
pass
|
parsl/tests/configs/user_opts.py
CHANGED
@@ -60,7 +60,8 @@ user_opts = {
|
|
60
60
|
# 'username': OSG_USERNAME,
|
61
61
|
# 'script_dir': '/home/{}/parsl_scripts'.format(OSG_USERNAME),
|
62
62
|
# 'scheduler_options': "",
|
63
|
-
# 'worker_init' : 'module load python/3.5.2; python3 -m venv parsl_env;
|
63
|
+
# 'worker_init' : 'module load python/3.5.2; python3 -m venv parsl_env;
|
64
|
+
# source parsl_env/bin/activate; python3 -m pip install parsl==0.5.2'
|
64
65
|
# },
|
65
66
|
# 'swan': {
|
66
67
|
# 'username': SWAN_USERNAME,
|
@@ -0,0 +1,78 @@
|
|
1
|
+
import parsl
|
2
|
+
import pytest
|
3
|
+
import time
|
4
|
+
|
5
|
+
from parsl.providers import LocalProvider
|
6
|
+
from parsl.channels import LocalChannel
|
7
|
+
from parsl.launchers import SimpleLauncher
|
8
|
+
|
9
|
+
from parsl.config import Config
|
10
|
+
from parsl.executors import HighThroughputExecutor
|
11
|
+
|
12
|
+
# this constant is used to scale some durations that happen
|
13
|
+
# based around the expected drain period: the drain period
|
14
|
+
# is TIME_CONST seconds, and the single executed task will
|
15
|
+
# last twice that many number of seconds.
|
16
|
+
TIME_CONST = 1
|
17
|
+
|
18
|
+
|
19
|
+
def local_config():
|
20
|
+
return Config(
|
21
|
+
executors=[
|
22
|
+
HighThroughputExecutor(
|
23
|
+
label="htex_local",
|
24
|
+
drain_period=TIME_CONST,
|
25
|
+
worker_debug=True,
|
26
|
+
cores_per_worker=1,
|
27
|
+
encrypted=True,
|
28
|
+
provider=LocalProvider(
|
29
|
+
channel=LocalChannel(),
|
30
|
+
init_blocks=1,
|
31
|
+
min_blocks=0,
|
32
|
+
max_blocks=0,
|
33
|
+
launcher=SimpleLauncher(),
|
34
|
+
),
|
35
|
+
)
|
36
|
+
],
|
37
|
+
strategy='none',
|
38
|
+
)
|
39
|
+
|
40
|
+
|
41
|
+
@parsl.python_app
|
42
|
+
def f(n):
|
43
|
+
import time
|
44
|
+
time.sleep(n)
|
45
|
+
|
46
|
+
|
47
|
+
@pytest.mark.local
|
48
|
+
def test_drain(try_assert):
|
49
|
+
|
50
|
+
htex = parsl.dfk().executors['htex_local']
|
51
|
+
|
52
|
+
# wait till we have a block running...
|
53
|
+
|
54
|
+
try_assert(lambda: len(htex.connected_managers()) == 1)
|
55
|
+
|
56
|
+
managers = htex.connected_managers()
|
57
|
+
assert managers[0]['active'], "The manager should be active"
|
58
|
+
assert not managers[0]['draining'], "The manager should not be draining"
|
59
|
+
|
60
|
+
fut = f(TIME_CONST * 2)
|
61
|
+
|
62
|
+
time.sleep(TIME_CONST)
|
63
|
+
|
64
|
+
# this assert should happen *very fast* after the above delay...
|
65
|
+
try_assert(lambda: htex.connected_managers()[0]['draining'], timeout_ms=500)
|
66
|
+
|
67
|
+
# and the test task should still be running...
|
68
|
+
assert not fut.done(), "The test task should still be running"
|
69
|
+
|
70
|
+
fut.result()
|
71
|
+
|
72
|
+
# and now we should see the manager disappear...
|
73
|
+
# ... with strategy='none', this should be coming from draining but
|
74
|
+
# that information isn't immediately obvious from the absence in
|
75
|
+
# connected managers.
|
76
|
+
# As with the above draining assert, this should happen very fast after
|
77
|
+
# the task ends.
|
78
|
+
try_assert(lambda: len(htex.connected_managers()) == 0, timeout_ms=500)
|
@@ -0,0 +1,86 @@
|
|
1
|
+
"""Tests monitoring records app name under various decoration patterns.
|
2
|
+
"""
|
3
|
+
|
4
|
+
import os
|
5
|
+
import parsl
|
6
|
+
import pytest
|
7
|
+
import time
|
8
|
+
|
9
|
+
from parsl.tests.configs.htex_local_alternate import fresh_config
|
10
|
+
|
11
|
+
|
12
|
+
@parsl.python_app
|
13
|
+
def regular_decorated_app():
|
14
|
+
return 5
|
15
|
+
|
16
|
+
|
17
|
+
@pytest.mark.local
|
18
|
+
def get_regular_decorated_app():
|
19
|
+
return regular_decorated_app
|
20
|
+
|
21
|
+
|
22
|
+
def for_decoration_later():
|
23
|
+
return 77
|
24
|
+
|
25
|
+
|
26
|
+
def get_for_decoration_later():
|
27
|
+
return parsl.python_app(for_decoration_later)
|
28
|
+
|
29
|
+
|
30
|
+
def get_decorated_closure():
|
31
|
+
|
32
|
+
r = 53
|
33
|
+
|
34
|
+
@parsl.python_app
|
35
|
+
def decorated_closure():
|
36
|
+
return r
|
37
|
+
|
38
|
+
return decorated_closure
|
39
|
+
|
40
|
+
|
41
|
+
@pytest.mark.local
|
42
|
+
@pytest.mark.parametrize("get_app,expected_name,expected_result",
|
43
|
+
[(get_regular_decorated_app, "regular_decorated_app", 5),
|
44
|
+
(get_for_decoration_later, "for_decoration_later", 77),
|
45
|
+
(get_decorated_closure, "decorated_closure", 53)
|
46
|
+
])
|
47
|
+
def test_app_name(get_app, expected_name, expected_result, tmpd_cwd):
|
48
|
+
|
49
|
+
# this is imported here rather than at module level because
|
50
|
+
# it isn't available in a plain parsl install, so this module
|
51
|
+
# would otherwise fail to import and break even a basic test
|
52
|
+
# run.
|
53
|
+
import sqlalchemy
|
54
|
+
|
55
|
+
c = fresh_config()
|
56
|
+
c.run_dir = tmpd_cwd
|
57
|
+
c.monitoring.logging_endpoint = f"sqlite:///{tmpd_cwd}/monitoring.db"
|
58
|
+
parsl.load(c)
|
59
|
+
|
60
|
+
app = get_app()
|
61
|
+
assert app().result() == expected_result
|
62
|
+
|
63
|
+
parsl.dfk().cleanup()
|
64
|
+
parsl.clear()
|
65
|
+
|
66
|
+
engine = sqlalchemy.create_engine(c.monitoring.logging_endpoint)
|
67
|
+
with engine.begin() as connection:
|
68
|
+
|
69
|
+
def count_rows(table: str):
|
70
|
+
result = connection.execute(f"SELECT COUNT(*) FROM {table}")
|
71
|
+
(c, ) = result.first()
|
72
|
+
return c
|
73
|
+
|
74
|
+
# one workflow...
|
75
|
+
assert count_rows("workflow") == 1
|
76
|
+
|
77
|
+
# ... with one task ...
|
78
|
+
assert count_rows("task") == 1
|
79
|
+
|
80
|
+
# ... that was tried once ...
|
81
|
+
assert count_rows("try") == 1
|
82
|
+
|
83
|
+
# ... and has the expected name.
|
84
|
+
result = connection.execute("SELECT task_func_name FROM task")
|
85
|
+
(c, ) = result.first()
|
86
|
+
assert c == expected_name
|
@@ -37,6 +37,7 @@ def local_config():
|
|
37
37
|
],
|
38
38
|
max_idletime=0.5,
|
39
39
|
strategy='htex_auto_scale',
|
40
|
+
strategy_period=0.1
|
40
41
|
)
|
41
42
|
|
42
43
|
|
@@ -62,16 +63,6 @@ def waiting_app(ident: int, outputs=(), inputs=()):
|
|
62
63
|
def test_scale_out(tmpd_cwd, try_assert):
|
63
64
|
dfk = parsl.dfk()
|
64
65
|
|
65
|
-
# reconfigure scaling strategy to run faster than usual. This allows
|
66
|
-
# this test to complete faster - at time of writing 27s with default
|
67
|
-
# 5s strategy, vs XXXX with 0.5s strategy.
|
68
|
-
|
69
|
-
# check this attribute still exists, in the presence of ongoing
|
70
|
-
# development, so we have some belief that setting it will not be
|
71
|
-
# setting a now-ignored parameter.
|
72
|
-
assert hasattr(dfk.job_status_poller, 'interval')
|
73
|
-
dfk.job_status_poller.interval = 0.1
|
74
|
-
|
75
66
|
num_managers = len(dfk.executors['htex_local'].connected_managers())
|
76
67
|
|
77
68
|
assert num_managers == 0, "Expected 0 managers at start"
|
parsl/utils.py
CHANGED
@@ -296,12 +296,12 @@ class Timer:
|
|
296
296
|
|
297
297
|
"""
|
298
298
|
|
299
|
-
def __init__(self, callback: Callable, *args: Any, interval: int = 5, name: Optional[str] = None) -> None:
|
299
|
+
def __init__(self, callback: Callable, *args: Any, interval: Union[float, int] = 5, name: Optional[str] = None) -> None:
|
300
300
|
"""Initialize the Timer object.
|
301
301
|
We start the timer thread here
|
302
302
|
|
303
303
|
KWargs:
|
304
|
-
- interval (int) : number of seconds between callback events
|
304
|
+
- interval (int or float) : number of seconds between callback events
|
305
305
|
- name (str) : a base name to use when naming the started thread
|
306
306
|
"""
|
307
307
|
|
parsl/version.py
CHANGED
@@ -36,6 +36,7 @@ from parsl.executors.high_throughput.mpi_resource_management import (
|
|
36
36
|
from parsl.executors.high_throughput.mpi_prefix_composer import compose_all, VALID_LAUNCHERS
|
37
37
|
|
38
38
|
HEARTBEAT_CODE = (2 ** 32) - 1
|
39
|
+
DRAINED_CODE = (2 ** 32) - 2
|
39
40
|
|
40
41
|
|
41
42
|
class Manager:
|
@@ -73,7 +74,8 @@ class Manager:
|
|
73
74
|
enable_mpi_mode: bool = False,
|
74
75
|
mpi_launcher: str = "mpiexec",
|
75
76
|
available_accelerators: Sequence[str],
|
76
|
-
cert_dir: Optional[str]
|
77
|
+
cert_dir: Optional[str],
|
78
|
+
drain_period: Optional[int]):
|
77
79
|
"""
|
78
80
|
Parameters
|
79
81
|
----------
|
@@ -138,6 +140,9 @@ class Manager:
|
|
138
140
|
|
139
141
|
cert_dir : str | None
|
140
142
|
Path to the certificate directory.
|
143
|
+
|
144
|
+
drain_period: int | None
|
145
|
+
Number of seconds to drain after TODO: could be a nicer timespec involving m,s,h qualifiers for user friendliness?
|
141
146
|
"""
|
142
147
|
|
143
148
|
logger.info("Manager initializing")
|
@@ -227,6 +232,14 @@ class Manager:
|
|
227
232
|
self.heartbeat_period = heartbeat_period
|
228
233
|
self.heartbeat_threshold = heartbeat_threshold
|
229
234
|
self.poll_period = poll_period
|
235
|
+
|
236
|
+
self.drain_time: float
|
237
|
+
if drain_period:
|
238
|
+
self.drain_time = self._start_time + drain_period
|
239
|
+
logger.info(f"Will request drain at {self.drain_time}")
|
240
|
+
else:
|
241
|
+
self.drain_time = float('inf')
|
242
|
+
|
230
243
|
self.cpu_affinity = cpu_affinity
|
231
244
|
|
232
245
|
# Define accelerator available, adjust worker count accordingly
|
@@ -262,10 +275,19 @@ class Manager:
|
|
262
275
|
""" Send heartbeat to the incoming task queue
|
263
276
|
"""
|
264
277
|
msg = {'type': 'heartbeat'}
|
278
|
+
# don't need to dumps and encode this every time - could do as a global on import?
|
265
279
|
b_msg = json.dumps(msg).encode('utf-8')
|
266
280
|
self.task_incoming.send(b_msg)
|
267
281
|
logger.debug("Sent heartbeat")
|
268
282
|
|
283
|
+
def drain_to_incoming(self):
|
284
|
+
""" Send heartbeat to the incoming task queue
|
285
|
+
"""
|
286
|
+
msg = {'type': 'drain'}
|
287
|
+
b_msg = json.dumps(msg).encode('utf-8')
|
288
|
+
self.task_incoming.send(b_msg)
|
289
|
+
logger.debug("Sent drain")
|
290
|
+
|
269
291
|
@wrap_with_logs
|
270
292
|
def pull_tasks(self, kill_event):
|
271
293
|
""" Pull tasks from the incoming tasks zmq pipe onto the internal
|
@@ -298,6 +320,7 @@ class Manager:
|
|
298
320
|
# time here are correctly copy-pasted from the relevant if
|
299
321
|
# statements.
|
300
322
|
next_interesting_event_time = min(last_beat + self.heartbeat_period,
|
323
|
+
self.drain_time,
|
301
324
|
last_interchange_contact + self.heartbeat_threshold)
|
302
325
|
try:
|
303
326
|
pending_task_count = self.pending_task_queue.qsize()
|
@@ -312,6 +335,14 @@ class Manager:
|
|
312
335
|
self.heartbeat_to_incoming()
|
313
336
|
last_beat = time.time()
|
314
337
|
|
338
|
+
if self.drain_time and time.time() > self.drain_time:
|
339
|
+
logger.info("Requesting drain")
|
340
|
+
self.drain_to_incoming()
|
341
|
+
self.drain_time = None
|
342
|
+
# This will start the pool draining...
|
343
|
+
# Drained exit behaviour does not happen here. It will be
|
344
|
+
# driven by the interchange sending a DRAINED_CODE message.
|
345
|
+
|
315
346
|
poll_duration_s = max(0, next_interesting_event_time - time.time())
|
316
347
|
socks = dict(poller.poll(timeout=poll_duration_s * 1000))
|
317
348
|
|
@@ -322,7 +353,9 @@ class Manager:
|
|
322
353
|
|
323
354
|
if tasks == HEARTBEAT_CODE:
|
324
355
|
logger.debug("Got heartbeat from interchange")
|
325
|
-
|
356
|
+
elif tasks == DRAINED_CODE:
|
357
|
+
logger.info("Got fulled drained message from interchange - setting kill flag")
|
358
|
+
kill_event.set()
|
326
359
|
else:
|
327
360
|
task_recv_counter += len(tasks)
|
328
361
|
logger.debug("Got executor tasks: {}, cumulative count of tasks: {}".format([t['task_id'] for t in tasks], task_recv_counter))
|
@@ -490,9 +523,8 @@ class Manager:
|
|
490
523
|
self._worker_watchdog_thread.start()
|
491
524
|
self._monitoring_handler_thread.start()
|
492
525
|
|
493
|
-
logger.info("
|
526
|
+
logger.info("Manager threads started")
|
494
527
|
|
495
|
-
# TODO : Add mechanism in this loop to stop the worker pool
|
496
528
|
# This might need a multiprocessing event to signal back.
|
497
529
|
self._kill_event.wait()
|
498
530
|
logger.critical("Received kill event, terminating worker processes")
|
@@ -804,6 +836,8 @@ if __name__ == "__main__":
|
|
804
836
|
help="Heartbeat period in seconds. Uses manager default unless set")
|
805
837
|
parser.add_argument("--hb_threshold", default=120,
|
806
838
|
help="Heartbeat threshold in seconds. Uses manager default unless set")
|
839
|
+
parser.add_argument("--drain_period", default=None,
|
840
|
+
help="Drain this pool after specified number of seconds. By default, does not drain.")
|
807
841
|
parser.add_argument("--address_probe_timeout", default=30,
|
808
842
|
help="Timeout to probe for viable address to interchange. Default: 30s")
|
809
843
|
parser.add_argument("--poll", default=10,
|
@@ -824,7 +858,7 @@ if __name__ == "__main__":
|
|
824
858
|
required=True,
|
825
859
|
help="Whether/how workers should control CPU affinity.")
|
826
860
|
parser.add_argument("--available-accelerators", type=str, nargs="*",
|
827
|
-
help="Names of available accelerators")
|
861
|
+
help="Names of available accelerators, if not given assumed to be zero accelerators available", default=[])
|
828
862
|
parser.add_argument("--enable_mpi_mode", action='store_true',
|
829
863
|
help="Enable MPI mode")
|
830
864
|
parser.add_argument("--mpi-launcher", type=str, choices=VALID_LAUNCHERS,
|
@@ -856,6 +890,7 @@ if __name__ == "__main__":
|
|
856
890
|
logger.info("Prefetch capacity: {}".format(args.prefetch_capacity))
|
857
891
|
logger.info("Heartbeat threshold: {}".format(args.hb_threshold))
|
858
892
|
logger.info("Heartbeat period: {}".format(args.hb_period))
|
893
|
+
logger.info("Drain period: {}".format(args.drain_period))
|
859
894
|
logger.info("CPU affinity: {}".format(args.cpu_affinity))
|
860
895
|
logger.info("Accelerators: {}".format(" ".join(args.available_accelerators)))
|
861
896
|
logger.info("enable_mpi_mode: {}".format(args.enable_mpi_mode))
|
@@ -876,6 +911,7 @@ if __name__ == "__main__":
|
|
876
911
|
prefetch_capacity=int(args.prefetch_capacity),
|
877
912
|
heartbeat_threshold=int(args.hb_threshold),
|
878
913
|
heartbeat_period=int(args.hb_period),
|
914
|
+
drain_period=None if args.drain_period == "None" else int(args.drain_period),
|
879
915
|
poll_period=int(args.poll),
|
880
916
|
cpu_affinity=args.cpu_affinity,
|
881
917
|
enable_mpi_mode=args.enable_mpi_mode,
|
@@ -1,9 +1,9 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: parsl
|
3
|
-
Version: 2024.3.
|
3
|
+
Version: 2024.3.18
|
4
4
|
Summary: Simple data dependent workflows in Python
|
5
5
|
Home-page: https://github.com/Parsl/parsl
|
6
|
-
Download-URL: https://github.com/Parsl/parsl/archive/2024.03.
|
6
|
+
Download-URL: https://github.com/Parsl/parsl/archive/2024.03.18.tar.gz
|
7
7
|
Author: The Parsl Team
|
8
8
|
Author-email: parsl@googlegroups.com
|
9
9
|
License: Apache 2.0
|
@@ -1,14 +1,14 @@
|
|
1
1
|
parsl/__init__.py,sha256=hq8rJmP59wzd9-yxaGcmq5gPpshOopH-Y1K0BkUBNY0,1843
|
2
|
-
parsl/addresses.py,sha256=
|
3
|
-
parsl/config.py,sha256=
|
2
|
+
parsl/addresses.py,sha256=mO4u1kVxAnBHHIIUyqdzsOxT9aUsGeBP5PfAPkz3sug,4819
|
3
|
+
parsl/config.py,sha256=E90pKPeagHpIdk9XYifHqSpTAaKdDQN59NPDi8PrTAc,7038
|
4
4
|
parsl/curvezmq.py,sha256=FtZEYP1IWDry39cH-tOKUm9TnaR1U7krOmvVYpATcOk,6939
|
5
5
|
parsl/errors.py,sha256=SzINzQFZDBDbj9l-DPQznD0TbGkNhHIRAPkcBCogf_A,1019
|
6
|
-
parsl/log_utils.py,sha256=
|
6
|
+
parsl/log_utils.py,sha256=Ckeb7YiIoK0FA8dA5CsWJDe28i9Sf4sxhFwp__VsD3o,3274
|
7
7
|
parsl/multiprocessing.py,sha256=hakfdg-sgxEjwloZeDrt6EhzwdzecvjJhkPHHxh8lII,1938
|
8
8
|
parsl/process_loggers.py,sha256=1G3Rfrh5wuZNo2X03grG4kTYPGOxz7hHCyG6L_A3b0A,1137
|
9
9
|
parsl/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
-
parsl/utils.py,sha256=
|
11
|
-
parsl/version.py,sha256=
|
10
|
+
parsl/utils.py,sha256=A3WDMGaNB4ajVx_jCuc-74W6PFy4zswJy-pLE7u8Dz0,10979
|
11
|
+
parsl/version.py,sha256=dhmXAlQKRlQPsExiUS22Ca2clSFcL-0irfA1G-7kIyo,131
|
12
12
|
parsl/app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
13
|
parsl/app/app.py,sha256=wAHchJetgnicT1pn0NJKDeDX0lV3vDFlG8cQd_Ciax4,8522
|
14
14
|
parsl/app/bash.py,sha256=bx9x1XFwkOTpZZD3CPwnVL9SyNRDjbUGtOnuGLvxN_8,5396
|
@@ -60,7 +60,7 @@ parsl/data_provider/http.py,sha256=nDHTW7XmJqAukWJjPRQjyhUXt8r6GsQ36mX9mv_wOig,2
|
|
60
60
|
parsl/data_provider/rsync.py,sha256=2-ZxqrT-hBj39x082NusJaBqsGW4Jd2qCW6JkVPpEl0,4254
|
61
61
|
parsl/data_provider/staging.py,sha256=l-mAXFburs3BWPjkSmiQKuAgJpsxCG62yATPDbrafYI,4523
|
62
62
|
parsl/dataflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
63
|
-
parsl/dataflow/dflow.py,sha256=
|
63
|
+
parsl/dataflow/dflow.py,sha256=1xvZKXDleb1GMS7x4dg8BGhKJfD0wPq4bjbAtFbkkc4,64256
|
64
64
|
parsl/dataflow/errors.py,sha256=w2vOt_ymzG2dOqJUO4IDcmTlrCIHlMZL8nBVyVq0O_8,2176
|
65
65
|
parsl/dataflow/futures.py,sha256=aVfEUTzp4-EdunDAtNcqVQf8l_A7ArDi2c82KZMwxfY,5256
|
66
66
|
parsl/dataflow/memoization.py,sha256=AsJO6c6cRp2ac6H8uGn2USlEi78_nX3QWvpxYt4XdYE,9583
|
@@ -78,14 +78,14 @@ parsl/executors/flux/executor.py,sha256=0omXRPvykdW5VZb8mwgBJjxVk4H6G8xoL5D_R9pu
|
|
78
78
|
parsl/executors/flux/flux_instance_manager.py,sha256=tTEOATClm9SwdgLeBRWPC6D55iNDuh0YxqJOw3c3eQ4,2036
|
79
79
|
parsl/executors/high_throughput/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
80
80
|
parsl/executors/high_throughput/errors.py,sha256=vl69wLuVOplbKxHI9WphEGBExHWkTn5n8T9QhBXuNH0,380
|
81
|
-
parsl/executors/high_throughput/executor.py,sha256=
|
82
|
-
parsl/executors/high_throughput/interchange.py,sha256=
|
83
|
-
parsl/executors/high_throughput/manager_record.py,sha256=
|
81
|
+
parsl/executors/high_throughput/executor.py,sha256=mSxDizbw79zmnW4yMDuRflZfGwZKciWmZ8XW9rAy4gI,37591
|
82
|
+
parsl/executors/high_throughput/interchange.py,sha256=Rt6HyFvQYFuqUJ1ytXmUFTDIK9wOBm4l96IHoL6OFRc,31491
|
83
|
+
parsl/executors/high_throughput/manager_record.py,sha256=w5EwzVqPtsLOyOW8jP44U3uaogt8H--tkwp7FNyKN_o,385
|
84
84
|
parsl/executors/high_throughput/monitoring_info.py,sha256=3gQpwQjjNDEBz0cQqJZB6hRiwLiWwXs83zkQDmbOwxY,297
|
85
85
|
parsl/executors/high_throughput/mpi_prefix_composer.py,sha256=GPSejuNL407gvzw9f7dTWArTLn6heTi-erJjgcM-_8Y,4273
|
86
86
|
parsl/executors/high_throughput/mpi_resource_management.py,sha256=geLYmp2teKYgTnzATAR_JPtjAa0ysu6pHpXs90vwkds,7975
|
87
87
|
parsl/executors/high_throughput/probe.py,sha256=lvnuf-vBv57tHvFh-J51F9sDYBES7jCgs6KYgWvmKRs,2749
|
88
|
-
parsl/executors/high_throughput/process_worker_pool.py,sha256=
|
88
|
+
parsl/executors/high_throughput/process_worker_pool.py,sha256=hUKno44b3hzPQHKnF91K_BQxusDoapT8K8y-2E0DlDo,41109
|
89
89
|
parsl/executors/high_throughput/zmq_pipes.py,sha256=TEIr1PcBDVbchBukzPaEsku2lbIIFCYYjeUq5zw_VBA,6514
|
90
90
|
parsl/executors/radical/__init__.py,sha256=CKbtV2numw5QvgIBq1htMUrt9TqDCIC2zifyf2svTNU,186
|
91
91
|
parsl/executors/radical/executor.py,sha256=5r9WZkOr0clg79zm35E7nC7zNv0DpbyM8iTC2B6d4N0,21024
|
@@ -95,11 +95,11 @@ parsl/executors/radical/rpex_worker.py,sha256=1M1df-hzFdmZMWbRZlUzIX7uAWMKJ_SkxL
|
|
95
95
|
parsl/executors/taskvine/__init__.py,sha256=sWIJdvSLgQKul9dlSjIkNat7yBDgU3SrBF3X2yhT86E,293
|
96
96
|
parsl/executors/taskvine/errors.py,sha256=MNS_NjpvHjwevQXOjqjSEBFroqEWi-LT1ZEVZ2C5Dx0,652
|
97
97
|
parsl/executors/taskvine/exec_parsl_function.py,sha256=oUAKbPWwpbzWwQ47bZQlVDxS8txhnhPsonMf3AOEMGQ,7085
|
98
|
-
parsl/executors/taskvine/executor.py,sha256=
|
98
|
+
parsl/executors/taskvine/executor.py,sha256=KHiRGcLWHyVBm_6E4IowCV20Z15lsf2ZUnTR49IQxNg,31544
|
99
99
|
parsl/executors/taskvine/factory.py,sha256=sHhfGv7xRFrWkQclzRXuFEAHuSXhsZu2lR5LJ81aucA,2638
|
100
100
|
parsl/executors/taskvine/factory_config.py,sha256=AbE2fN2snrF5ITYrrS4DnGn2XkJHUFr_17DYHDHIwq0,3693
|
101
|
-
parsl/executors/taskvine/manager.py,sha256=
|
102
|
-
parsl/executors/taskvine/manager_config.py,sha256=
|
101
|
+
parsl/executors/taskvine/manager.py,sha256=VxVN2L5zFVPNfSAJrGgq87MRJKpcxf-BHdO5QWxB4TU,25822
|
102
|
+
parsl/executors/taskvine/manager_config.py,sha256=LzHc-bXqxixPlxMG5TdxxZ5E46D_hnn1tiPqLRPvUHY,7457
|
103
103
|
parsl/executors/taskvine/utils.py,sha256=iSrIogeiauL3UNy_9tiZp1cBSNn6fIJkMYQRVi1n_r8,4156
|
104
104
|
parsl/executors/workqueue/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
105
105
|
parsl/executors/workqueue/errors.py,sha256=ghB93Ptb_QbOAvgLe7siV_snRRkU_T-cFHv3AR6Ziwo,541
|
@@ -110,17 +110,17 @@ parsl/executors/workqueue/parsl_coprocess_stub.py,sha256=_bJmpPIgL42qM6bVzeEKt1M
|
|
110
110
|
parsl/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
111
111
|
parsl/jobs/error_handlers.py,sha256=dvfZFqTwAcFXrIx3sjFTcjtwOB0-xGn6QnPwJEG-kAI,2311
|
112
112
|
parsl/jobs/errors.py,sha256=cpSQXCrlKtuHsQf7usjF-lX8XsDkFnE5kWpmFjiN6OU,178
|
113
|
-
parsl/jobs/job_status_poller.py,sha256=
|
113
|
+
parsl/jobs/job_status_poller.py,sha256=30W-KvzYGQfblFwe_6ZpRVvzerwZD76IC4Xvt9FBw1c,5349
|
114
114
|
parsl/jobs/states.py,sha256=rPBoAEEudKngWFijlwvXXhAagDs_9DCXvQP9rwzVgCM,4855
|
115
115
|
parsl/jobs/strategy.py,sha256=a-W3vxEHHCfe521LMfSoZLpJjdTtwCfTgdn1ChxzUuI,12959
|
116
116
|
parsl/launchers/__init__.py,sha256=k8zAB3IBP-brfqXUptKwGkvsIRaXjAJZNBJa2XVtY1A,546
|
117
117
|
parsl/launchers/base.py,sha256=CblcvPTJiu-MNLWaRtFe29SZQ0BpTOlaY8CGcHdlHIE,538
|
118
118
|
parsl/launchers/errors.py,sha256=v5i460H_rovzukSccQetxQBVtd92jLQz-NbuDe2TdGI,467
|
119
|
-
parsl/launchers/launchers.py,sha256=
|
119
|
+
parsl/launchers/launchers.py,sha256=VB--fiVv_IQne3DydTMSdGUY0o0g69puAs-Hd3mJ2vo,15464
|
120
120
|
parsl/monitoring/__init__.py,sha256=0ywNz6i0lM1xo_7_BIxhETDGeVd2C_0wwD7qgeaMR4c,83
|
121
|
-
parsl/monitoring/db_manager.py,sha256=
|
121
|
+
parsl/monitoring/db_manager.py,sha256=hdmmXSTXp8Wwhr7vLpQalD_ahRl3SNxKYVsplnThRk8,37021
|
122
122
|
parsl/monitoring/message_type.py,sha256=Khn88afNxcOIciKiCK4GLnn90I5BlRTiOL3zK-P07yQ,401
|
123
|
-
parsl/monitoring/monitoring.py,sha256=
|
123
|
+
parsl/monitoring/monitoring.py,sha256=N_g3ijGJK46y0k2CEWMGvGR4Ym1mR13yeBkW6TiHHMo,23488
|
124
124
|
parsl/monitoring/radios.py,sha256=T2_6QuUjC-dd_7qMnIk6WHQead1iWz7m_P6ZC4QAqdA,5265
|
125
125
|
parsl/monitoring/remote.py,sha256=OcIgudujtPO_DsY-YV36x92skeiNdGt-6aEOqaCU8T0,13900
|
126
126
|
parsl/monitoring/types.py,sha256=SO6Fjjbb83sv_MtbutoxGssiWh6oXKkEEsD4EvwOnZ4,629
|
@@ -131,11 +131,11 @@ parsl/monitoring/visualization/app.py,sha256=_RZNBHWJYKwadusPE5p0uFMTqKqI3gI2fBz
|
|
131
131
|
parsl/monitoring/visualization/models.py,sha256=-pzynw6PCh-7_Kat4cqDFt9wAKbo9lWjF4O2muBqDDc,5139
|
132
132
|
parsl/monitoring/visualization/utils.py,sha256=IIKnDdHSkvk6e8q-1O9cE-DclPMdrhXbum04EDDYI-0,314
|
133
133
|
parsl/monitoring/visualization/version.py,sha256=mjxUkRW-XaF1dhu2IEUPPviGpfOvbxhWfTOYl250lVs,134
|
134
|
-
parsl/monitoring/visualization/views.py,sha256=
|
134
|
+
parsl/monitoring/visualization/views.py,sha256=o6mfKSPv85kSPb6fgLG5v3OEKGj3efpngE3UoiyTkDY,8256
|
135
135
|
parsl/monitoring/visualization/plots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
136
136
|
parsl/monitoring/visualization/plots/default/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
137
137
|
parsl/monitoring/visualization/plots/default/task_plots.py,sha256=j-0PZ5MsK3CU83kQVWDIuGHGFeXF5Z0se-wlRRd_z9s,1654
|
138
|
-
parsl/monitoring/visualization/plots/default/workflow_plots.py,sha256=
|
138
|
+
parsl/monitoring/visualization/plots/default/workflow_plots.py,sha256=zd2k6O4YMKGe69c56EtGKgGalAKYxbbGpoxIE0iB_yE,11985
|
139
139
|
parsl/monitoring/visualization/plots/default/workflow_resource_plots.py,sha256=Z7LK5atAWn-2zwRYmCks3OKbYX80UKXHOmoK6Mec1TI,10269
|
140
140
|
parsl/monitoring/visualization/static/parsl-logo-white.png,sha256=LAua5kwDq0Ngt2D3yk9Vh2dBNSIsjj6cqiHKaP2Rn08,14199
|
141
141
|
parsl/monitoring/visualization/static/parsl-monitor.css,sha256=9j2mW1IOXBcuaT6-i0feIftuQLMV6gpJ1mpFhxUE3VU,337
|
@@ -149,7 +149,7 @@ parsl/monitoring/visualization/templates/workflow.html,sha256=QCSHAPHK_2C3gNcZ3N
|
|
149
149
|
parsl/monitoring/visualization/templates/workflows_summary.html,sha256=7brKKNsxcT4z-l10BKJlgTxQtGL033ZS5jEDdSmsPEE,891
|
150
150
|
parsl/providers/__init__.py,sha256=jd-1_vd-HtWYDHzwO30lNW5GMw6nfeTyNn3tI8CG7L4,1207
|
151
151
|
parsl/providers/base.py,sha256=LvSMClsbCQI_7geGdNDpKZ6vWCl1EpD73o0xkxilqJ4,5702
|
152
|
-
parsl/providers/cluster_provider.py,sha256=
|
152
|
+
parsl/providers/cluster_provider.py,sha256=7eM2i9bVkNRDhrggFbnlQBxzzC9wz_BmNNqxtFM8VJ4,4668
|
153
153
|
parsl/providers/errors.py,sha256=7ig0DEqyFIwjVx1QVXfeutz1Ek16IEAS4doaqg-BTfQ,2270
|
154
154
|
parsl/providers/ad_hoc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
155
155
|
parsl/providers/ad_hoc/ad_hoc.py,sha256=jeYMxMT_ox7banr8Db_UeT2qer6XTGLZOZvC307S54U,8302
|
@@ -240,7 +240,7 @@ parsl/tests/configs/summit.py,sha256=_Dd8r5mod30QRCSp16BlzsQYSKbdM71Rt4azvz9Pzfk
|
|
240
240
|
parsl/tests/configs/swan_htex.py,sha256=5mIYnzwOFEQ8fNsepP9QjFj9ZMW2m8u4pJluZ5BZGpw,1501
|
241
241
|
parsl/tests/configs/taskvine_ex.py,sha256=e_sXe3d-uSL8cli19-xrBloqQcEQQ4tKVa_x_aDcYGM,632
|
242
242
|
parsl/tests/configs/theta.py,sha256=d5fLCW7SpgKkH3K8iRzj6IhsDozlKr4qgDrQ6Dv1bYA,1298
|
243
|
-
parsl/tests/configs/user_opts.py,sha256=
|
243
|
+
parsl/tests/configs/user_opts.py,sha256=fNO1OxISFPP7IyJ_iwf8dQ6EagVr2StXtOWmGnA9MeI,6265
|
244
244
|
parsl/tests/configs/workqueue_ex.py,sha256=5gbjlGECkUd0p9PJbq-IBYARlEbEBiPpXWgHn1GC1d8,514
|
245
245
|
parsl/tests/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
246
246
|
parsl/tests/integration/latency.py,sha256=kWYkXsbnVnpwS6rHsdm7a1FsUOJWHhuXDsRPlApw6iE,3289
|
@@ -340,6 +340,7 @@ parsl/tests/test_htex/test_basic.py,sha256=GIOF3cr6A87QDVMxeN0LrvJnXO2Nap0c-TH46
|
|
340
340
|
parsl/tests/test_htex/test_connected_blocks.py,sha256=0628oJ_0_aVsda5xuFwG3_3q8ZiEAM-sfIOINkUHQrk,1639
|
341
341
|
parsl/tests/test_htex/test_cpu_affinity_explicit.py,sha256=tv12ojw4DdymlVBjVNnrFX7_mhwix2jnBLkRbKOQRao,1422
|
342
342
|
parsl/tests/test_htex/test_disconnected_blocks.py,sha256=HQhtX757t4CdWbtu-VT3MttLHQowGyHPOp9vX0TN_a4,1890
|
343
|
+
parsl/tests/test_htex/test_drain.py,sha256=bnbQfoEQi9EIlDJsDMp7adR45gsAScTNPoOZ2vc12HY,2260
|
343
344
|
parsl/tests/test_htex/test_htex.py,sha256=4dXtcthZQvgEDtMc00g6Pw7FnqNWB_0j8fuJqHKO-IE,3896
|
344
345
|
parsl/tests/test_htex/test_manager_failure.py,sha256=gemQopZoDEoZLOvep5JZkY6tQlZoko8Z0Kmpj1-Gbws,1161
|
345
346
|
parsl/tests/test_htex/test_missing_worker.py,sha256=oiDN3ylsf-72jmX-Y5OWA2kQWpbVbvmoSLnu2vnyZeY,976
|
@@ -347,6 +348,7 @@ parsl/tests/test_htex/test_multiple_disconnected_blocks.py,sha256=L4vw_Mo-upp2p9
|
|
347
348
|
parsl/tests/test_htex/test_worker_failure.py,sha256=Uz-RHI-LK78FMjXUvrUFmo4iYfmpDVBUcBxxRb3UG9M,603
|
348
349
|
parsl/tests/test_htex/test_zmq_binding.py,sha256=MQWAL7XxaI1P2k_VUgm5DKD4GVEn5c2Y8MEyOSABRA4,2937
|
349
350
|
parsl/tests/test_monitoring/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
351
|
+
parsl/tests/test_monitoring/test_app_names.py,sha256=4Ziggxv0JLP0UGAd5jjXdivUdZQLlMvVVMfiTStjxRk,2191
|
350
352
|
parsl/tests/test_monitoring/test_basic.py,sha256=uXWx2O2Y2gfSO4e8zTjyj5bucKHG9OVzMxQNnq9abeY,2776
|
351
353
|
parsl/tests/test_monitoring/test_db_locks.py,sha256=PGoRmvqA6AYPXTPHOZPLH38Z4D6EEgSb6ZgNfZtwIGk,2910
|
352
354
|
parsl/tests/test_monitoring/test_fuzz_zmq.py,sha256=Xi08Drt_DZKbo3Ihl83cgsTTMRok1bBpyrVOQQWlbf4,3145
|
@@ -411,7 +413,7 @@ parsl/tests/test_scaling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
|
|
411
413
|
parsl/tests/test_scaling/test_block_error_handler.py,sha256=VFKs_jq7yd7bpdfYva3Sa_TBS8VcjGUS6YJ9Y34RbyI,6050
|
412
414
|
parsl/tests/test_scaling/test_regression_1621.py,sha256=iRu3GFsg2l9J61AVZKWLc6zJcvI2JYD0WvtTYDSv22I,1770
|
413
415
|
parsl/tests/test_scaling/test_scale_down.py,sha256=T8NVmoIebdpSjrNJCdgDHumpz9eKLkJrpeW7Kwi8cBg,2821
|
414
|
-
parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py,sha256=
|
416
|
+
parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py,sha256=1vP2a8qygnxuUji7B3kJOUgwjmmIC1fDPhDdqzs5YFA,4597
|
415
417
|
parsl/tests/test_serialization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
416
418
|
parsl/tests/test_serialization/test_2555_caching_deserializer.py,sha256=J8__b4djA5tErd8FUSXGkGcdXlW2KHbBWRbCTAnV08Q,767
|
417
419
|
parsl/tests/test_serialization/test_basic.py,sha256=51KshqIk2RNr7S2iSkl5tZo40CJBb0h6uby8YPgOGlg,543
|
@@ -436,12 +438,12 @@ parsl/tests/test_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
|
|
436
438
|
parsl/tests/test_utils/test_representation_mixin.py,sha256=kUZeIDwA2rlbJ3-beGzLLwf3dOplTMCrWJN87etHcyY,1633
|
437
439
|
parsl/usage_tracking/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
438
440
|
parsl/usage_tracking/usage.py,sha256=pSADeogWqvkYI_n2pojv4IWDEFAQ3KwXNx6LDTohMHQ,6684
|
439
|
-
parsl-2024.3.
|
440
|
-
parsl-2024.3.
|
441
|
-
parsl-2024.3.
|
442
|
-
parsl-2024.3.
|
443
|
-
parsl-2024.3.
|
444
|
-
parsl-2024.3.
|
445
|
-
parsl-2024.3.
|
446
|
-
parsl-2024.3.
|
447
|
-
parsl-2024.3.
|
441
|
+
parsl-2024.3.18.data/scripts/exec_parsl_function.py,sha256=NtWNeBvRqksej38eRPw8zPBJ1CeW6vgaitve0tfz_qc,7801
|
442
|
+
parsl-2024.3.18.data/scripts/parsl_coprocess.py,sha256=Y7Tc-h9WGui-YDe3w_h91w2Sm1JNL1gJ9kAV4PE_gw8,5722
|
443
|
+
parsl-2024.3.18.data/scripts/process_worker_pool.py,sha256=iVrw160CpTAVuX9PH-ezU4ebm9C1_U6IMrkcdyTQJ58,41095
|
444
|
+
parsl-2024.3.18.dist-info/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
|
445
|
+
parsl-2024.3.18.dist-info/METADATA,sha256=HywLgkSikpur2yGd42LiMsFytWwnayht8sQasJ6n5j8,3960
|
446
|
+
parsl-2024.3.18.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
447
|
+
parsl-2024.3.18.dist-info/entry_points.txt,sha256=XqnsWDYoEcLbsMcpnYGKLEnSBmaIe1YoM5YsBdJG2tI,176
|
448
|
+
parsl-2024.3.18.dist-info/top_level.txt,sha256=PIheYoUFQtF2icLsgOykgU-Cjuwr2Oi6On2jo5RYgRM,6
|
449
|
+
parsl-2024.3.18.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|