parsl 2024.2.26__py3-none-any.whl → 2024.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsl/addresses.py +1 -1
- parsl/configs/ASPIRE1.py +1 -1
- parsl/configs/ad_hoc.py +1 -1
- parsl/configs/bridges.py +1 -1
- parsl/configs/cc_in2p3.py +1 -1
- parsl/configs/expanse.py +1 -1
- parsl/configs/frontera.py +1 -1
- parsl/configs/kubernetes.py +1 -1
- parsl/configs/midway.py +1 -1
- parsl/configs/osg.py +1 -1
- parsl/configs/stampede2.py +1 -1
- parsl/dataflow/dflow.py +11 -6
- parsl/dataflow/taskrecord.py +3 -1
- parsl/executors/high_throughput/executor.py +69 -37
- parsl/executors/high_throughput/interchange.py +78 -59
- parsl/executors/high_throughput/process_worker_pool.py +40 -28
- parsl/executors/taskvine/executor.py +3 -1
- parsl/executors/workqueue/executor.py +5 -2
- parsl/executors/workqueue/parsl_coprocess.py +107 -95
- parsl/jobs/job_status_poller.py +9 -3
- parsl/jobs/strategy.py +4 -3
- parsl/monitoring/db_manager.py +25 -5
- parsl/monitoring/monitoring.py +6 -2
- parsl/monitoring/remote.py +29 -0
- parsl/monitoring/visualization/models.py +7 -0
- parsl/providers/slurm/slurm.py +13 -2
- parsl/tests/configs/ad_hoc_cluster_htex.py +1 -1
- parsl/tests/configs/bluewaters.py +1 -1
- parsl/tests/configs/bridges.py +1 -1
- parsl/tests/configs/cc_in2p3.py +1 -1
- parsl/tests/configs/comet.py +1 -1
- parsl/tests/configs/frontera.py +1 -1
- parsl/tests/configs/midway.py +1 -1
- parsl/tests/configs/nscc_singapore.py +1 -1
- parsl/tests/configs/osg_htex.py +1 -1
- parsl/tests/configs/petrelkube.py +1 -1
- parsl/tests/configs/summit.py +1 -1
- parsl/tests/configs/theta.py +1 -1
- parsl/tests/configs/user_opts.py +3 -1
- parsl/tests/manual_tests/test_ad_hoc_htex.py +1 -1
- parsl/tests/scaling_tests/htex_local.py +1 -1
- parsl/tests/sites/test_affinity.py +1 -1
- parsl/tests/sites/test_concurrent.py +1 -1
- parsl/tests/sites/test_dynamic_executor.py +1 -1
- parsl/tests/sites/test_worker_info.py +1 -1
- parsl/tests/test_htex/test_basic.py +1 -1
- parsl/tests/test_htex/test_connected_blocks.py +1 -1
- parsl/tests/test_htex/test_cpu_affinity_explicit.py +1 -1
- parsl/tests/test_htex/test_disconnected_blocks.py +1 -1
- parsl/tests/test_htex/test_htex.py +13 -0
- parsl/tests/test_htex/test_manager_failure.py +1 -1
- parsl/tests/test_htex/test_missing_worker.py +1 -1
- parsl/tests/test_htex/test_multiple_disconnected_blocks.py +1 -1
- parsl/tests/test_htex/test_worker_failure.py +1 -1
- parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +1 -1
- parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +1 -1
- parsl/tests/test_mpi_apps/test_resource_spec.py +1 -1
- parsl/tests/test_scaling/test_scale_down.py +2 -2
- parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +159 -0
- parsl/usage_tracking/usage.py +5 -9
- parsl/version.py +1 -1
- parsl-2024.3.11.data/scripts/parsl_coprocess.py +166 -0
- {parsl-2024.2.26.data → parsl-2024.3.11.data}/scripts/process_worker_pool.py +40 -28
- {parsl-2024.2.26.dist-info → parsl-2024.3.11.dist-info}/METADATA +2 -2
- {parsl-2024.2.26.dist-info → parsl-2024.3.11.dist-info}/RECORD +70 -70
- {parsl-2024.2.26.dist-info → parsl-2024.3.11.dist-info}/WHEEL +1 -1
- parsl/configs/bluewaters.py +0 -28
- parsl-2024.2.26.data/scripts/parsl_coprocess.py +0 -154
- {parsl-2024.2.26.data → parsl-2024.3.11.data}/scripts/exec_parsl_function.py +0 -0
- {parsl-2024.2.26.dist-info → parsl-2024.3.11.dist-info}/LICENSE +0 -0
- {parsl-2024.2.26.dist-info → parsl-2024.3.11.dist-info}/entry_points.txt +0 -0
- {parsl-2024.2.26.dist-info → parsl-2024.3.11.dist-info}/top_level.txt +0 -0
parsl/monitoring/remote.py
CHANGED
@@ -201,6 +201,8 @@ def monitor(pid: int,
|
|
201
201
|
|
202
202
|
children_user_time = {} # type: Dict[int, float]
|
203
203
|
children_system_time = {} # type: Dict[int, float]
|
204
|
+
children_num_ctx_switches_voluntary = {} # type: Dict[int, float]
|
205
|
+
children_num_ctx_switches_involuntary = {} # type: Dict[int, float]
|
204
206
|
|
205
207
|
def accumulate_and_prepare() -> Dict[str, Any]:
|
206
208
|
d = {"psutil_process_" + str(k): v for k, v in pm.as_dict().items() if k in simple}
|
@@ -218,6 +220,15 @@ def monitor(pid: int,
|
|
218
220
|
logging.debug("got children")
|
219
221
|
|
220
222
|
d["psutil_cpu_count"] = psutil.cpu_count()
|
223
|
+
|
224
|
+
# note that this will be the CPU number of the base process, not anything launched by it
|
225
|
+
d["psutil_cpu_num"] = pm.cpu_num()
|
226
|
+
|
227
|
+
pctxsw = pm.num_ctx_switches()
|
228
|
+
|
229
|
+
d["psutil_process_num_ctx_switches_voluntary"] = pctxsw.voluntary
|
230
|
+
d["psutil_process_num_ctx_switches_involuntary"] = pctxsw.involuntary
|
231
|
+
|
221
232
|
d['psutil_process_memory_virtual'] = pm.memory_info().vms
|
222
233
|
d['psutil_process_memory_resident'] = pm.memory_info().rss
|
223
234
|
d['psutil_process_time_user'] = pm.cpu_times().user
|
@@ -238,6 +249,11 @@ def monitor(pid: int,
|
|
238
249
|
child_system_time = child.cpu_times().system
|
239
250
|
children_user_time[child.pid] = child_user_time
|
240
251
|
children_system_time[child.pid] = child_system_time
|
252
|
+
|
253
|
+
pctxsw = child.num_ctx_switches()
|
254
|
+
children_num_ctx_switches_voluntary[child.pid] = pctxsw.voluntary
|
255
|
+
children_num_ctx_switches_involuntary[child.pid] = pctxsw.involuntary
|
256
|
+
|
241
257
|
d['psutil_process_memory_virtual'] += child.memory_info().vms
|
242
258
|
d['psutil_process_memory_resident'] += child.memory_info().rss
|
243
259
|
try:
|
@@ -248,14 +264,27 @@ def monitor(pid: int,
|
|
248
264
|
logging.exception("Exception reading IO counters for child {k}. Recorded IO usage may be incomplete".format(k=k), exc_info=True)
|
249
265
|
d['psutil_process_disk_write'] += 0
|
250
266
|
d['psutil_process_disk_read'] += 0
|
267
|
+
|
251
268
|
total_children_user_time = 0.0
|
252
269
|
for child_pid in children_user_time:
|
253
270
|
total_children_user_time += children_user_time[child_pid]
|
271
|
+
|
254
272
|
total_children_system_time = 0.0
|
255
273
|
for child_pid in children_system_time:
|
256
274
|
total_children_system_time += children_system_time[child_pid]
|
275
|
+
|
276
|
+
total_children_num_ctx_switches_voluntary = 0.0
|
277
|
+
for child_pid in children_num_ctx_switches_voluntary:
|
278
|
+
total_children_num_ctx_switches_voluntary += children_num_ctx_switches_voluntary[child_pid]
|
279
|
+
|
280
|
+
total_children_num_ctx_switches_involuntary = 0.0
|
281
|
+
for child_pid in children_num_ctx_switches_involuntary:
|
282
|
+
total_children_num_ctx_switches_involuntary += children_num_ctx_switches_involuntary[child_pid]
|
283
|
+
|
257
284
|
d['psutil_process_time_user'] += total_children_user_time
|
258
285
|
d['psutil_process_time_system'] += total_children_system_time
|
286
|
+
d['psutil_process_num_ctx_switches_voluntary'] += total_children_num_ctx_switches_voluntary
|
287
|
+
d['psutil_process_num_ctx_switches_involuntary'] += total_children_num_ctx_switches_involuntary
|
259
288
|
logging.debug("sending message")
|
260
289
|
return d
|
261
290
|
|
@@ -102,5 +102,12 @@ class Resource(db.Model):
|
|
102
102
|
'psutil_process_disk_write', db.Float, nullable=True)
|
103
103
|
psutil_process_status = db.Column(
|
104
104
|
'psutil_process_status', db.Text, nullable=True)
|
105
|
+
psutil_cpu_num = db.Column(
|
106
|
+
'psutil_cpu_num', db.Text, nullable=True)
|
107
|
+
psutil_process_num_ctx_switches_voluntary = db.Column(
|
108
|
+
'psutil_process_num_ctx_switches_voluntary', db.Float, nullable=True)
|
109
|
+
psutil_process_num_ctx_switches_involuntary = db.Column(
|
110
|
+
'psutil_process_num_ctx_switches_involuntary', db.Float, nullable=True)
|
111
|
+
|
105
112
|
__table_args__ = (
|
106
113
|
db.PrimaryKeyConstraint('task_id', 'run_id', 'timestamp'),)
|
parsl/providers/slurm/slurm.py
CHANGED
@@ -280,11 +280,22 @@ class SlurmProvider(ClusterProvider, RepresentationMixin):
|
|
280
280
|
else:
|
281
281
|
logger.error("Could not read job ID from submit command standard output.")
|
282
282
|
logger.error("Retcode:%s STDOUT:%s STDERR:%s", retcode, stdout.strip(), stderr.strip())
|
283
|
-
raise SubmitException(
|
283
|
+
raise SubmitException(
|
284
|
+
job_name,
|
285
|
+
"Could not read job ID from submit command standard output",
|
286
|
+
stdout=stdout,
|
287
|
+
stderr=stderr,
|
288
|
+
retcode=retcode
|
289
|
+
)
|
284
290
|
else:
|
285
291
|
logger.error("Submit command failed")
|
286
292
|
logger.error("Retcode:%s STDOUT:%s STDERR:%s", retcode, stdout.strip(), stderr.strip())
|
287
|
-
raise SubmitException(
|
293
|
+
raise SubmitException(
|
294
|
+
job_name, "Could not read job ID from submit command standard output",
|
295
|
+
stdout=stdout,
|
296
|
+
stderr=stderr,
|
297
|
+
retcode=retcode
|
298
|
+
)
|
288
299
|
|
289
300
|
def cancel(self, job_ids):
|
290
301
|
''' Cancels the jobs specified by a list of job ids
|
parsl/tests/configs/bridges.py
CHANGED
@@ -13,7 +13,7 @@ def fresh_config():
|
|
13
13
|
# This is the network interface on the login node to
|
14
14
|
# which compute nodes can communicate
|
15
15
|
# address=address_by_interface('bond0.144'),
|
16
|
-
|
16
|
+
max_workers_per_node=1,
|
17
17
|
encrypted=True,
|
18
18
|
provider=SlurmProvider(
|
19
19
|
user_opts['bridges']['partition'], # Partition / QOS
|
parsl/tests/configs/cc_in2p3.py
CHANGED
parsl/tests/configs/comet.py
CHANGED
parsl/tests/configs/frontera.py
CHANGED
parsl/tests/configs/midway.py
CHANGED
parsl/tests/configs/osg_htex.py
CHANGED
parsl/tests/configs/summit.py
CHANGED
@@ -20,7 +20,7 @@ def fresh_config():
|
|
20
20
|
|
21
21
|
# address=address_by_interface('ib0'), # This assumes Parsl is running on login node
|
22
22
|
worker_port_range=(50000, 55000),
|
23
|
-
|
23
|
+
max_workers_per_node=1,
|
24
24
|
encrypted=True,
|
25
25
|
provider=LSFProvider(
|
26
26
|
launcher=JsrunLauncher(),
|
parsl/tests/configs/theta.py
CHANGED
parsl/tests/configs/user_opts.py
CHANGED
@@ -52,7 +52,9 @@ user_opts = {
|
|
52
52
|
# 'username': MIDWAY_USERNAME,
|
53
53
|
# 'script_dir': '/scratch/midway2/{}/parsl_scripts'.format(MIDWAY_USERNAME),
|
54
54
|
# 'scheduler_options': "",
|
55
|
-
# 'worker_init': 'cd /scratch/midway2/{}/parsl_scripts;
|
55
|
+
# 'worker_init': 'cd /scratch/midway2/{}/parsl_scripts; '
|
56
|
+
# 'module load Anaconda3/5.1.0; source activate parsl_testing;'
|
57
|
+
# .format(MIDWAY_USERNAME),
|
56
58
|
# },
|
57
59
|
# 'osg': {
|
58
60
|
# 'username': OSG_USERNAME,
|
@@ -34,7 +34,7 @@ def test_cpu_affinity_explicit():
|
|
34
34
|
|
35
35
|
config = fresh_config()
|
36
36
|
config.executors[0].cpu_affinity = affinity
|
37
|
-
config.executors[0].
|
37
|
+
config.executors[0].max_workers_per_node = 1
|
38
38
|
|
39
39
|
logger.debug(f"config: {config}")
|
40
40
|
# TODO: is there a `with` style for this, to properly deal with exceptions?
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import pathlib
|
2
|
+
import warnings
|
2
3
|
from unittest import mock
|
3
4
|
|
4
5
|
import pytest
|
@@ -107,3 +108,15 @@ def test_htex_shutdown(
|
|
107
108
|
assert not mock_ix_proc.terminate.called
|
108
109
|
assert not mock_ix_proc.join.called
|
109
110
|
assert "has not started" in mock_logs[0][0][0]
|
111
|
+
|
112
|
+
|
113
|
+
@pytest.mark.local
|
114
|
+
def test_max_workers_per_node():
|
115
|
+
with pytest.warns(DeprecationWarning) as record:
|
116
|
+
htex = HighThroughputExecutor(max_workers_per_node=1, max_workers=2)
|
117
|
+
|
118
|
+
warning_msg = "max_workers is deprecated"
|
119
|
+
assert any(warning_msg in str(warning.message) for warning in record)
|
120
|
+
|
121
|
+
# Ensure max_workers_per_node takes precedence
|
122
|
+
assert htex.max_workers_per_node == htex.max_workers == 1
|
@@ -13,7 +13,7 @@ from parsl.tests.configs.htex_local import fresh_config
|
|
13
13
|
def load_config():
|
14
14
|
config = fresh_config()
|
15
15
|
config.executors[0].poll_period = 1
|
16
|
-
config.executors[0].
|
16
|
+
config.executors[0].max_workers_per_node = 1
|
17
17
|
config.executors[0].heartbeat_period = 1
|
18
18
|
|
19
19
|
parsl.load(config)
|
@@ -8,7 +8,7 @@ from parsl.tests.configs.htex_local import fresh_config
|
|
8
8
|
def local_setup():
|
9
9
|
config = fresh_config()
|
10
10
|
config.executors[0].poll_period = 1
|
11
|
-
config.executors[0].
|
11
|
+
config.executors[0].max_workers_per_node = 1
|
12
12
|
config.executors[0].launch_cmd = "executable_that_hopefully_does_not_exist_1030509.py"
|
13
13
|
parsl.load(config)
|
14
14
|
|
@@ -8,7 +8,7 @@ def local_config():
|
|
8
8
|
from parsl.tests.configs.htex_local import fresh_config
|
9
9
|
config = fresh_config()
|
10
10
|
config.executors[0].poll_period = 1
|
11
|
-
config.executors[0].
|
11
|
+
config.executors[0].max_workers_per_node = 1
|
12
12
|
config.executors[0].heartbeat_period = 1
|
13
13
|
return config
|
14
14
|
|
@@ -11,7 +11,7 @@ EXECUTOR_LABEL = "MPI_TEST"
|
|
11
11
|
def local_setup():
|
12
12
|
config = fresh_config()
|
13
13
|
config.executors[0].label = EXECUTOR_LABEL
|
14
|
-
config.executors[0].
|
14
|
+
config.executors[0].max_workers_per_node = 1
|
15
15
|
config.executors[0].enable_mpi_mode = False
|
16
16
|
parsl.load(config)
|
17
17
|
|
@@ -14,7 +14,7 @@ EXECUTOR_LABEL = "MPI_TEST"
|
|
14
14
|
def local_setup():
|
15
15
|
config = fresh_config()
|
16
16
|
config.executors[0].label = EXECUTOR_LABEL
|
17
|
-
config.executors[0].
|
17
|
+
config.executors[0].max_workers_per_node = 2
|
18
18
|
config.executors[0].enable_mpi_mode = True
|
19
19
|
config.executors[0].mpi_launcher = "mpiexec"
|
20
20
|
|
@@ -27,7 +27,7 @@ def local_config():
|
|
27
27
|
poll_period=100,
|
28
28
|
label="htex_local",
|
29
29
|
address="127.0.0.1",
|
30
|
-
|
30
|
+
max_workers_per_node=1,
|
31
31
|
encrypted=True,
|
32
32
|
provider=LocalProvider(
|
33
33
|
channel=LocalChannel(),
|
@@ -39,7 +39,7 @@ def local_config():
|
|
39
39
|
)
|
40
40
|
],
|
41
41
|
max_idletime=0.5,
|
42
|
-
strategy='
|
42
|
+
strategy='simple',
|
43
43
|
)
|
44
44
|
|
45
45
|
|
@@ -0,0 +1,159 @@
|
|
1
|
+
import pytest
|
2
|
+
|
3
|
+
import parsl
|
4
|
+
|
5
|
+
from parsl import File, python_app
|
6
|
+
from parsl.providers import LocalProvider
|
7
|
+
from parsl.channels import LocalChannel
|
8
|
+
from parsl.launchers import SingleNodeLauncher
|
9
|
+
from parsl.config import Config
|
10
|
+
from parsl.executors import HighThroughputExecutor
|
11
|
+
|
12
|
+
from threading import Event
|
13
|
+
|
14
|
+
_max_blocks = 5
|
15
|
+
_min_blocks = 0
|
16
|
+
|
17
|
+
|
18
|
+
def local_config():
|
19
|
+
return Config(
|
20
|
+
executors=[
|
21
|
+
HighThroughputExecutor(
|
22
|
+
heartbeat_period=1,
|
23
|
+
heartbeat_threshold=2,
|
24
|
+
poll_period=100,
|
25
|
+
label="htex_local",
|
26
|
+
address="127.0.0.1",
|
27
|
+
max_workers=1,
|
28
|
+
encrypted=True,
|
29
|
+
provider=LocalProvider(
|
30
|
+
channel=LocalChannel(),
|
31
|
+
init_blocks=0,
|
32
|
+
max_blocks=_max_blocks,
|
33
|
+
min_blocks=_min_blocks,
|
34
|
+
launcher=SingleNodeLauncher(),
|
35
|
+
),
|
36
|
+
)
|
37
|
+
],
|
38
|
+
max_idletime=0.5,
|
39
|
+
strategy='htex_auto_scale',
|
40
|
+
)
|
41
|
+
|
42
|
+
|
43
|
+
@python_app
|
44
|
+
def waiting_app(ident: int, outputs=(), inputs=()):
|
45
|
+
import pathlib
|
46
|
+
import time
|
47
|
+
|
48
|
+
# Approximate an Event by writing to files; the test logic will poll this file
|
49
|
+
with open(outputs[0], "a") as f:
|
50
|
+
f.write(f"Ready: {ident}\n")
|
51
|
+
|
52
|
+
# Similarly, use Event approximation (file check!) by polling.
|
53
|
+
may_finish_file = pathlib.Path(inputs[0])
|
54
|
+
while not may_finish_file.exists():
|
55
|
+
time.sleep(0.01)
|
56
|
+
|
57
|
+
|
58
|
+
# see issue #1885 for details of failures of this test.
|
59
|
+
# at the time of issue #1885 this test was failing frequently
|
60
|
+
# in CI.
|
61
|
+
@pytest.mark.local
|
62
|
+
def test_scale_out(tmpd_cwd, try_assert):
|
63
|
+
dfk = parsl.dfk()
|
64
|
+
|
65
|
+
# reconfigure scaling strategy to run faster than usual. This allows
|
66
|
+
# this test to complete faster - at time of writing 27s with default
|
67
|
+
# 5s strategy, vs XXXX with 0.5s strategy.
|
68
|
+
|
69
|
+
# check this attribute still exists, in the presence of ongoing
|
70
|
+
# development, so we have some belief that setting it will not be
|
71
|
+
# setting a now-ignored parameter.
|
72
|
+
assert hasattr(dfk.job_status_poller, 'interval')
|
73
|
+
dfk.job_status_poller.interval = 0.1
|
74
|
+
|
75
|
+
num_managers = len(dfk.executors['htex_local'].connected_managers())
|
76
|
+
|
77
|
+
assert num_managers == 0, "Expected 0 managers at start"
|
78
|
+
assert dfk.executors['htex_local'].outstanding == 0, "Expected 0 tasks at start"
|
79
|
+
|
80
|
+
ntasks = _max_blocks * 2
|
81
|
+
ready_path = tmpd_cwd / "workers_ready"
|
82
|
+
finish_path = tmpd_cwd / "stage1_workers_may_continue"
|
83
|
+
ready_path.touch()
|
84
|
+
inputs = [File(finish_path)]
|
85
|
+
outputs = [File(ready_path)]
|
86
|
+
|
87
|
+
futs = [waiting_app(i, outputs=outputs, inputs=inputs) for i in range(ntasks)]
|
88
|
+
|
89
|
+
try_assert(lambda: ready_path.read_text().count("\n") == _max_blocks, "Wait for _max_blocks tasks to be running", timeout_ms=15000)
|
90
|
+
|
91
|
+
# This should be true immediately, because the previous try_assert should
|
92
|
+
# wait until there are max_blocks tasks running, and his test should be
|
93
|
+
# configured to use 1 worker per block.
|
94
|
+
assert len(dfk.executors['htex_local'].connected_managers()) == _max_blocks
|
95
|
+
|
96
|
+
finish_path.touch() # Approximation of Event, via files
|
97
|
+
[x.result() for x in futs]
|
98
|
+
|
99
|
+
assert dfk.executors['htex_local'].outstanding == 0
|
100
|
+
|
101
|
+
# now we can launch one "long" task -
|
102
|
+
# and what should happen is that the connected_managers count "eventually" (?) converges to 1 and stays there.
|
103
|
+
|
104
|
+
finish_path = tmpd_cwd / "stage2_workers_may_continue"
|
105
|
+
|
106
|
+
fut = waiting_app(0, outputs=outputs, inputs=[File(finish_path)])
|
107
|
+
|
108
|
+
def check_one_block():
|
109
|
+
return len(dfk.executors['htex_local'].connected_managers()) == 1
|
110
|
+
|
111
|
+
try_assert(
|
112
|
+
check_one_block,
|
113
|
+
fail_msg="Expected 1 managers during a single long task",
|
114
|
+
)
|
115
|
+
|
116
|
+
# the task should not have finished by the time we end up with 1 manager
|
117
|
+
assert not fut.done()
|
118
|
+
|
119
|
+
# This section wait for the strategy to run again, with the above single
|
120
|
+
# task outstanding, and check that the strategy has not scaled up or
|
121
|
+
# down more on those subsequent iterations.
|
122
|
+
|
123
|
+
# It does this by hooking the callback of the job status poller, and
|
124
|
+
# waiting until it has run.
|
125
|
+
|
126
|
+
old_cb = dfk.job_status_poller.callback
|
127
|
+
|
128
|
+
strategy_iterated = Event()
|
129
|
+
|
130
|
+
def hook_cb(*args, **kwargs):
|
131
|
+
r = old_cb(*args, **kwargs)
|
132
|
+
strategy_iterated.set()
|
133
|
+
return r
|
134
|
+
|
135
|
+
dfk.job_status_poller.callback = hook_cb
|
136
|
+
|
137
|
+
# hack strategies to run more frequently. this allo
|
138
|
+
# dfk.job_status_poller.
|
139
|
+
|
140
|
+
try_assert(
|
141
|
+
strategy_iterated.is_set,
|
142
|
+
fail_msg="Expected strategy to have run within this period",
|
143
|
+
)
|
144
|
+
|
145
|
+
assert check_one_block()
|
146
|
+
|
147
|
+
finish_path.touch() # now we can end the single stage-2 task
|
148
|
+
|
149
|
+
fut.result()
|
150
|
+
|
151
|
+
# now we should expect min_blocks scale down
|
152
|
+
|
153
|
+
def check_min_blocks():
|
154
|
+
return len(dfk.executors['htex_local'].connected_managers()) == _min_blocks
|
155
|
+
|
156
|
+
try_assert(
|
157
|
+
check_min_blocks,
|
158
|
+
fail_msg=f"Expected {_min_blocks} managers when no tasks (min_blocks)",
|
159
|
+
)
|
parsl/usage_tracking/usage.py
CHANGED
@@ -109,7 +109,6 @@ class UsageTracker:
|
|
109
109
|
sys.version_info.micro)
|
110
110
|
self.tracking_enabled = self.check_tracking_enabled()
|
111
111
|
logger.debug("Tracking status: {}".format(self.tracking_enabled))
|
112
|
-
self.initialized = False # Once first message is sent this will be True
|
113
112
|
|
114
113
|
def check_tracking_enabled(self):
|
115
114
|
"""Check if tracking is enabled.
|
@@ -176,15 +175,12 @@ class UsageTracker:
|
|
176
175
|
except Exception as e:
|
177
176
|
logger.debug("Usage tracking failed: {}".format(e))
|
178
177
|
|
179
|
-
def
|
180
|
-
|
181
|
-
|
182
|
-
if not self.initialized:
|
183
|
-
message = self.construct_start_message()
|
184
|
-
self.initialized = True
|
185
|
-
else:
|
186
|
-
message = self.construct_end_message()
|
178
|
+
def send_start_message(self) -> None:
|
179
|
+
message = self.construct_start_message()
|
180
|
+
self.send_UDP_message(message)
|
187
181
|
|
182
|
+
def send_end_message(self) -> None:
|
183
|
+
message = self.construct_end_message()
|
188
184
|
self.send_UDP_message(message)
|
189
185
|
|
190
186
|
def close(self, timeout: float = 10.0) -> None:
|
parsl/version.py
CHANGED