parsl 2024.2.12__py3-none-any.whl → 2024.2.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsl/channels/errors.py +1 -4
- parsl/configs/{comet.py → expanse.py} +5 -5
- parsl/dataflow/dflow.py +12 -12
- parsl/executors/flux/executor.py +5 -3
- parsl/executors/high_throughput/executor.py +56 -10
- parsl/executors/high_throughput/mpi_prefix_composer.py +137 -0
- parsl/executors/high_throughput/mpi_resource_management.py +217 -0
- parsl/executors/high_throughput/process_worker_pool.py +65 -9
- parsl/executors/radical/executor.py +6 -3
- parsl/executors/radical/rpex_worker.py +2 -2
- parsl/jobs/states.py +5 -5
- parsl/monitoring/db_manager.py +2 -1
- parsl/monitoring/monitoring.py +7 -4
- parsl/multiprocessing.py +3 -4
- parsl/providers/cobalt/cobalt.py +6 -0
- parsl/providers/pbspro/pbspro.py +18 -4
- parsl/providers/pbspro/template.py +2 -2
- parsl/providers/slurm/slurm.py +17 -4
- parsl/providers/slurm/template.py +2 -2
- parsl/serialize/__init__.py +7 -2
- parsl/serialize/facade.py +32 -1
- parsl/tests/test_error_handling/test_resource_spec.py +6 -0
- parsl/tests/test_htex/test_htex.py +66 -3
- parsl/tests/test_monitoring/test_incomplete_futures.py +65 -0
- parsl/tests/test_mpi_apps/__init__.py +0 -0
- parsl/tests/test_mpi_apps/test_bad_mpi_config.py +41 -0
- parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +51 -0
- parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +171 -0
- parsl/tests/test_mpi_apps/test_mpi_prefix.py +71 -0
- parsl/tests/test_mpi_apps/test_mpi_scheduler.py +158 -0
- parsl/tests/test_mpi_apps/test_resource_spec.py +145 -0
- parsl/tests/test_providers/test_cobalt_deprecation_warning.py +16 -0
- parsl/tests/test_providers/test_pbspro_template.py +28 -0
- parsl/tests/test_providers/test_slurm_template.py +29 -0
- parsl/tests/test_radical/test_mpi_funcs.py +1 -0
- parsl/tests/test_scaling/test_scale_down.py +6 -5
- parsl/tests/test_serialization/test_htex_code_cache.py +57 -0
- parsl/tests/test_serialization/test_pack_resource_spec.py +22 -0
- parsl/usage_tracking/usage.py +29 -55
- parsl/utils.py +12 -35
- parsl/version.py +1 -1
- {parsl-2024.2.12.data → parsl-2024.2.26.data}/scripts/process_worker_pool.py +65 -9
- {parsl-2024.2.12.dist-info → parsl-2024.2.26.dist-info}/METADATA +2 -2
- {parsl-2024.2.12.dist-info → parsl-2024.2.26.dist-info}/RECORD +50 -37
- parsl/configs/cooley.py +0 -29
- parsl/configs/theta.py +0 -33
- {parsl-2024.2.12.data → parsl-2024.2.26.data}/scripts/exec_parsl_function.py +0 -0
- {parsl-2024.2.12.data → parsl-2024.2.26.data}/scripts/parsl_coprocess.py +0 -0
- {parsl-2024.2.12.dist-info → parsl-2024.2.26.dist-info}/LICENSE +0 -0
- {parsl-2024.2.12.dist-info → parsl-2024.2.26.dist-info}/WHEEL +0 -0
- {parsl-2024.2.12.dist-info → parsl-2024.2.26.dist-info}/entry_points.txt +0 -0
- {parsl-2024.2.12.dist-info → parsl-2024.2.26.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,145 @@
|
|
1
|
+
import contextlib
|
2
|
+
import logging
|
3
|
+
import os
|
4
|
+
import typing
|
5
|
+
|
6
|
+
|
7
|
+
import pytest
|
8
|
+
import unittest
|
9
|
+
|
10
|
+
import parsl
|
11
|
+
from parsl.app.app import python_app
|
12
|
+
from parsl.tests.configs.htex_local import fresh_config
|
13
|
+
from typing import Dict
|
14
|
+
from parsl.executors.high_throughput.mpi_resource_management import (
|
15
|
+
get_pbs_hosts_list,
|
16
|
+
get_slurm_hosts_list,
|
17
|
+
get_nodes_in_batchjob,
|
18
|
+
identify_scheduler,
|
19
|
+
)
|
20
|
+
from parsl.executors.high_throughput.mpi_prefix_composer import (
|
21
|
+
validate_resource_spec,
|
22
|
+
InvalidResourceSpecification
|
23
|
+
)
|
24
|
+
|
25
|
+
EXECUTOR_LABEL = "MPI_TEST"
|
26
|
+
|
27
|
+
|
28
|
+
def local_setup():
|
29
|
+
config = fresh_config()
|
30
|
+
config.executors[0].label = EXECUTOR_LABEL
|
31
|
+
config.executors[0].max_workers = 1
|
32
|
+
parsl.load(config)
|
33
|
+
|
34
|
+
|
35
|
+
def local_teardown():
|
36
|
+
logging.warning("Exiting")
|
37
|
+
parsl.dfk().cleanup()
|
38
|
+
parsl.clear()
|
39
|
+
|
40
|
+
|
41
|
+
@python_app
|
42
|
+
def double(x, resource_spec=None):
|
43
|
+
return x * 2
|
44
|
+
|
45
|
+
|
46
|
+
@python_app
|
47
|
+
def get_env_vars(parsl_resource_specification: Dict = {}) -> Dict:
|
48
|
+
import os
|
49
|
+
|
50
|
+
parsl_vars = {}
|
51
|
+
for key in os.environ:
|
52
|
+
if key.startswith("PARSL_"):
|
53
|
+
parsl_vars[key] = os.environ[key]
|
54
|
+
return parsl_vars
|
55
|
+
|
56
|
+
|
57
|
+
@pytest.mark.local
|
58
|
+
def test_resource_spec_env_vars():
|
59
|
+
resource_spec = {
|
60
|
+
"num_nodes": 4,
|
61
|
+
"ranks_per_node": 2,
|
62
|
+
}
|
63
|
+
|
64
|
+
assert double(5).result() == 10
|
65
|
+
|
66
|
+
future = get_env_vars(parsl_resource_specification=resource_spec)
|
67
|
+
|
68
|
+
result = future.result()
|
69
|
+
assert isinstance(result, Dict)
|
70
|
+
assert result["PARSL_NUM_NODES"] == str(resource_spec["num_nodes"])
|
71
|
+
assert result["PARSL_RANKS_PER_NODE"] == str(resource_spec["ranks_per_node"])
|
72
|
+
|
73
|
+
|
74
|
+
@pytest.mark.local
|
75
|
+
@unittest.mock.patch("subprocess.check_output", return_value=b"c203-031\nc203-032\n")
|
76
|
+
def test_slurm_mocked_mpi_fetch(subprocess_check):
|
77
|
+
nodeinfo = get_slurm_hosts_list()
|
78
|
+
assert isinstance(nodeinfo, list)
|
79
|
+
assert len(nodeinfo) == 2
|
80
|
+
|
81
|
+
|
82
|
+
@contextlib.contextmanager
|
83
|
+
def add_to_path(path: os.PathLike) -> typing.Generator[None, None, None]:
|
84
|
+
old_path = os.environ["PATH"]
|
85
|
+
try:
|
86
|
+
os.environ["PATH"] += str(path)
|
87
|
+
yield
|
88
|
+
finally:
|
89
|
+
os.environ["PATH"] = old_path
|
90
|
+
|
91
|
+
|
92
|
+
@pytest.mark.local
|
93
|
+
@pytest.mark.skip
|
94
|
+
def test_slurm_mpi_fetch():
|
95
|
+
logging.warning(f"Current pwd : {os.path.dirname(__file__)}")
|
96
|
+
with add_to_path(os.path.dirname(__file__)):
|
97
|
+
logging.warning(f"PATH: {os.environ['PATH']}")
|
98
|
+
nodeinfo = get_slurm_hosts_list()
|
99
|
+
logging.warning(f"Got : {nodeinfo}")
|
100
|
+
|
101
|
+
|
102
|
+
@contextlib.contextmanager
|
103
|
+
def mock_pbs_nodefile(nodefile: str = "pbs_nodefile") -> typing.Generator[None, None, None]:
|
104
|
+
cwd = os.path.abspath(os.path.dirname(__file__))
|
105
|
+
filename = os.path.join(cwd, "mocks", nodefile)
|
106
|
+
try:
|
107
|
+
os.environ["PBS_NODEFILE"] = filename
|
108
|
+
yield
|
109
|
+
finally:
|
110
|
+
del os.environ["PBS_NODEFILE"]
|
111
|
+
|
112
|
+
|
113
|
+
@pytest.mark.local
|
114
|
+
def test_get_pbs_hosts_list():
|
115
|
+
with mock_pbs_nodefile():
|
116
|
+
nodelist = get_pbs_hosts_list()
|
117
|
+
assert nodelist
|
118
|
+
assert len(nodelist) == 4
|
119
|
+
|
120
|
+
|
121
|
+
@pytest.mark.local
|
122
|
+
def test_top_level():
|
123
|
+
with mock_pbs_nodefile():
|
124
|
+
scheduler = identify_scheduler()
|
125
|
+
nodelist = get_nodes_in_batchjob(scheduler)
|
126
|
+
assert len(nodelist) > 0
|
127
|
+
|
128
|
+
|
129
|
+
@pytest.mark.local
|
130
|
+
@pytest.mark.parametrize(
|
131
|
+
"resource_spec, exception",
|
132
|
+
(
|
133
|
+
({"num_nodes": 2, "ranks_per_node": 1}, None),
|
134
|
+
({"launcher_options": "--debug_foo"}, None),
|
135
|
+
({"num_nodes": 2, "BAD_OPT": 1}, InvalidResourceSpecification),
|
136
|
+
({}, None),
|
137
|
+
)
|
138
|
+
)
|
139
|
+
def test_resource_spec(resource_spec: Dict, exception):
|
140
|
+
if exception:
|
141
|
+
with pytest.raises(exception):
|
142
|
+
validate_resource_spec(resource_spec)
|
143
|
+
else:
|
144
|
+
result = validate_resource_spec(resource_spec)
|
145
|
+
assert result is None
|
@@ -0,0 +1,16 @@
|
|
1
|
+
import warnings
|
2
|
+
import pytest
|
3
|
+
from parsl.providers import CobaltProvider
|
4
|
+
|
5
|
+
|
6
|
+
@pytest.mark.local
|
7
|
+
def test_deprecation_warning():
|
8
|
+
|
9
|
+
with warnings.catch_warnings(record=True) as w:
|
10
|
+
warnings.simplefilter("always")
|
11
|
+
|
12
|
+
CobaltProvider()
|
13
|
+
|
14
|
+
assert len(w) == 1
|
15
|
+
assert issubclass(w[-1].category, DeprecationWarning)
|
16
|
+
assert "CobaltProvider" in str(w[-1].message)
|
@@ -0,0 +1,28 @@
|
|
1
|
+
import random
|
2
|
+
|
3
|
+
from unittest import mock
|
4
|
+
import pytest
|
5
|
+
|
6
|
+
from parsl.channels import LocalChannel
|
7
|
+
from parsl.providers import PBSProProvider
|
8
|
+
|
9
|
+
|
10
|
+
@pytest.mark.local
|
11
|
+
def test_submit_script_basic(tmp_path):
|
12
|
+
"""Test slurm resources table"""
|
13
|
+
|
14
|
+
provider = PBSProProvider(
|
15
|
+
queue="debug", channel=LocalChannel(script_dir=tmp_path)
|
16
|
+
)
|
17
|
+
provider.script_dir = tmp_path
|
18
|
+
job_id = str(random.randint(55000, 59000))
|
19
|
+
provider.execute_wait = mock.Mock(spec=PBSProProvider.execute_wait)
|
20
|
+
provider.execute_wait.return_value = (0, job_id, "")
|
21
|
+
result_job_id = provider.submit("test", tasks_per_node=1)
|
22
|
+
assert job_id == result_job_id
|
23
|
+
provider.execute_wait.assert_called()
|
24
|
+
assert job_id in provider.resources
|
25
|
+
|
26
|
+
job_info = provider.resources[job_id]
|
27
|
+
assert "job_stdout_path" in job_info
|
28
|
+
assert "job_stderr_path" in job_info
|
@@ -0,0 +1,29 @@
|
|
1
|
+
import logging
|
2
|
+
import random
|
3
|
+
|
4
|
+
from unittest import mock
|
5
|
+
import pytest
|
6
|
+
|
7
|
+
from parsl.channels import LocalChannel
|
8
|
+
from parsl.providers import SlurmProvider
|
9
|
+
|
10
|
+
|
11
|
+
@pytest.mark.local
|
12
|
+
def test_submit_script_basic(tmp_path):
|
13
|
+
"""Test slurm resources table"""
|
14
|
+
|
15
|
+
provider = SlurmProvider(
|
16
|
+
partition="debug", channel=LocalChannel(script_dir=tmp_path)
|
17
|
+
)
|
18
|
+
provider.script_dir = tmp_path
|
19
|
+
job_id = str(random.randint(55000, 59000))
|
20
|
+
provider.execute_wait = mock.MagicMock(spec=SlurmProvider.execute_wait)
|
21
|
+
provider.execute_wait.return_value = (0, f"Submitted batch job {job_id}", "")
|
22
|
+
result_job_id = provider.submit("test", tasks_per_node=1)
|
23
|
+
assert job_id == result_job_id
|
24
|
+
provider.execute_wait.assert_called()
|
25
|
+
assert job_id in provider.resources
|
26
|
+
|
27
|
+
job_info = provider.resources[job_id]
|
28
|
+
assert "job_stdout_path" in job_info
|
29
|
+
assert "job_stderr_path" in job_info
|
@@ -44,16 +44,16 @@ def local_config():
|
|
44
44
|
|
45
45
|
|
46
46
|
@python_app
|
47
|
-
def waiting_app(ident: int, inputs=()):
|
47
|
+
def waiting_app(ident: int, inputs=(), outputs=()):
|
48
48
|
import pathlib
|
49
49
|
import time
|
50
50
|
|
51
51
|
# Approximate an Event by writing to files; the test logic will poll this file
|
52
|
-
with open(
|
52
|
+
with open(outputs[0], "a") as f:
|
53
53
|
f.write(f"Ready: {ident}\n")
|
54
54
|
|
55
55
|
# Similarly, use Event approximation (file check!) by polling.
|
56
|
-
may_finish_file = pathlib.Path(inputs[
|
56
|
+
may_finish_file = pathlib.Path(inputs[0])
|
57
57
|
while not may_finish_file.exists():
|
58
58
|
time.sleep(0.01)
|
59
59
|
|
@@ -74,9 +74,10 @@ def test_scale_out(tmpd_cwd, try_assert):
|
|
74
74
|
ready_path = tmpd_cwd / "workers_ready"
|
75
75
|
finish_path = tmpd_cwd / "workers_may_continue"
|
76
76
|
ready_path.touch()
|
77
|
-
inputs = [File(
|
77
|
+
inputs = [File(finish_path)]
|
78
|
+
outputs = [File(ready_path)]
|
78
79
|
|
79
|
-
futs = [waiting_app(i, inputs=inputs) for i in range(ntasks)]
|
80
|
+
futs = [waiting_app(i, outputs=outputs, inputs=inputs) for i in range(ntasks)]
|
80
81
|
|
81
82
|
while ready_path.read_text().count("\n") < _max_blocks:
|
82
83
|
time.sleep(0.5)
|
@@ -0,0 +1,57 @@
|
|
1
|
+
import parsl
|
2
|
+
import pytest
|
3
|
+
|
4
|
+
from typing import Any
|
5
|
+
|
6
|
+
from parsl.serialize.facade import methods_for_code
|
7
|
+
|
8
|
+
from parsl.tests.configs.htex_local import fresh_config as local_config
|
9
|
+
|
10
|
+
|
11
|
+
@parsl.python_app
|
12
|
+
def f(x):
|
13
|
+
return x + 1
|
14
|
+
|
15
|
+
|
16
|
+
@pytest.mark.local
|
17
|
+
def test_caching() -> None:
|
18
|
+
# for future serializer devs: if this is failing because you added another
|
19
|
+
# code serializer, you'll also probably need to re-think what is being tested
|
20
|
+
# about serialization caching here.
|
21
|
+
assert len(methods_for_code) == 1
|
22
|
+
|
23
|
+
serializer = methods_for_code[b'C2']
|
24
|
+
|
25
|
+
# force type to Any here because a serializer method coming from
|
26
|
+
# methods_for_code doesn't statically have any cache management
|
27
|
+
# methods on itself such as cache_clear or cache_info.
|
28
|
+
serialize_method: Any = serializer.serialize
|
29
|
+
|
30
|
+
serialize_method.cache_clear()
|
31
|
+
|
32
|
+
assert serialize_method.cache_info().hits == 0
|
33
|
+
assert serialize_method.cache_info().misses == 0
|
34
|
+
assert serialize_method.cache_info().currsize == 0
|
35
|
+
|
36
|
+
assert f(7).result() == 8
|
37
|
+
|
38
|
+
# the code serializer cache should now contain only a (probably wrapped) f ...
|
39
|
+
assert serialize_method.cache_info().currsize == 1
|
40
|
+
|
41
|
+
# ... which was not already in the cache.
|
42
|
+
assert serialize_method.cache_info().misses == 1
|
43
|
+
assert serialize_method.cache_info().hits == 0
|
44
|
+
|
45
|
+
assert f(100).result() == 101
|
46
|
+
|
47
|
+
# this time round, we should have got a single cache hit...
|
48
|
+
assert serialize_method.cache_info().hits == 1
|
49
|
+
assert serialize_method.cache_info().misses == 1
|
50
|
+
assert serialize_method.cache_info().currsize == 1
|
51
|
+
|
52
|
+
assert f(200).result() == 201
|
53
|
+
|
54
|
+
# this time round, we should have got another single cache hit...
|
55
|
+
assert serialize_method.cache_info().hits == 2
|
56
|
+
assert serialize_method.cache_info().misses == 1
|
57
|
+
assert serialize_method.cache_info().currsize == 1
|
@@ -0,0 +1,22 @@
|
|
1
|
+
import pytest
|
2
|
+
from parsl.serialize import unpack_res_spec_apply_message, pack_res_spec_apply_message
|
3
|
+
|
4
|
+
|
5
|
+
def double(x: int, y: int = 2) -> int:
|
6
|
+
return x * y
|
7
|
+
|
8
|
+
|
9
|
+
@pytest.mark.local
|
10
|
+
def test_pack_and_unpack():
|
11
|
+
args = (5,)
|
12
|
+
kwargs = {'y': 10}
|
13
|
+
resource_spec = {'num_nodes': 4}
|
14
|
+
packed = pack_res_spec_apply_message(double, args, kwargs, resource_specification=resource_spec)
|
15
|
+
|
16
|
+
unpacked = unpack_res_spec_apply_message(packed)
|
17
|
+
assert len(unpacked) == 4
|
18
|
+
u_fn, u_args, u_kwargs, u_res_spec = unpacked
|
19
|
+
assert u_fn == double
|
20
|
+
assert u_args == args
|
21
|
+
assert u_kwargs == kwargs
|
22
|
+
assert u_res_spec == resource_spec
|
parsl/usage_tracking/usage.py
CHANGED
@@ -14,8 +14,13 @@ from parsl.version import VERSION as PARSL_VERSION
|
|
14
14
|
|
15
15
|
logger = logging.getLogger(__name__)
|
16
16
|
|
17
|
+
from typing import Callable
|
18
|
+
from typing_extensions import ParamSpec
|
17
19
|
|
18
|
-
|
20
|
+
P = ParamSpec("P")
|
21
|
+
|
22
|
+
|
23
|
+
def async_process(fn: Callable[P, None]) -> Callable[P, None]:
|
19
24
|
""" Decorator function to launch a function as a separate process """
|
20
25
|
|
21
26
|
def run(*args, **kwargs):
|
@@ -27,41 +32,23 @@ def async_process(fn):
|
|
27
32
|
|
28
33
|
|
29
34
|
@async_process
|
30
|
-
def udp_messenger(domain_name
|
35
|
+
def udp_messenger(domain_name: str, UDP_PORT: int, sock_timeout: int, message: str) -> None:
|
31
36
|
"""Send UDP messages to usage tracker asynchronously
|
32
37
|
|
33
38
|
This multiprocessing based messenger was written to overcome the limitations
|
34
|
-
of signalling/terminating a thread that is blocked on a system call.
|
35
|
-
messenger is created as a separate process, and initialized with 2 queues,
|
36
|
-
to_send to receive messages to be sent to the internet.
|
39
|
+
of signalling/terminating a thread that is blocked on a system call.
|
37
40
|
|
38
41
|
Args:
|
39
42
|
- domain_name (str) : Domain name string
|
40
|
-
- UDP_IP (str) : IP address YYY.YYY.YYY.YYY
|
41
43
|
- UDP_PORT (int) : UDP port to send out on
|
42
44
|
- sock_timeout (int) : Socket timeout
|
43
|
-
- to_send (multiprocessing.Queue) : Queue of outgoing messages to internet
|
44
45
|
"""
|
45
46
|
setproctitle("parsl: Usage tracking")
|
46
47
|
|
47
48
|
try:
|
48
|
-
if message is None:
|
49
|
-
raise ValueError("message was none")
|
50
|
-
|
51
49
|
encoded_message = bytes(message, "utf-8")
|
52
50
|
|
53
|
-
|
54
|
-
raise ValueError("utf-8 encoding of message failed")
|
55
|
-
|
56
|
-
if domain_name:
|
57
|
-
try:
|
58
|
-
UDP_IP = socket.gethostbyname(domain_name)
|
59
|
-
except Exception:
|
60
|
-
# (False, "Domain lookup failed, defaulting to {0}".format(UDP_IP))
|
61
|
-
pass
|
62
|
-
|
63
|
-
if UDP_IP is None:
|
64
|
-
raise Exception("UDP_IP is None")
|
51
|
+
UDP_IP = socket.gethostbyname(domain_name)
|
65
52
|
|
66
53
|
if UDP_PORT is None:
|
67
54
|
raise Exception("UDP_PORT is None")
|
@@ -88,14 +75,14 @@ class UsageTracker:
|
|
88
75
|
|
89
76
|
"""
|
90
77
|
|
91
|
-
def __init__(self, dfk,
|
78
|
+
def __init__(self, dfk, port=50077,
|
92
79
|
domain_name='tracking.parsl-project.org'):
|
93
80
|
"""Initialize usage tracking unless the user has opted-out.
|
94
81
|
|
95
82
|
We will try to resolve the hostname specified in kwarg:domain_name
|
96
83
|
and if that fails attempt to use the kwarg:ip. Determining the
|
97
|
-
IP and sending message
|
98
|
-
initialization.
|
84
|
+
IP and sending message happens in an asynchronous processs to avoid
|
85
|
+
slowing down DFK initialization.
|
99
86
|
|
100
87
|
Tracks usage stats by inspecting the internal state of the dfk.
|
101
88
|
|
@@ -103,18 +90,15 @@ class UsageTracker:
|
|
103
90
|
- dfk (DFK object) : Data Flow Kernel object
|
104
91
|
|
105
92
|
KWargs:
|
106
|
-
- ip (string) : IP address
|
107
93
|
- port (int) : Port number, Default:50077
|
108
94
|
- domain_name (string) : Domain name, will override IP
|
109
95
|
Default: tracking.parsl-project.org
|
110
96
|
"""
|
111
97
|
|
112
98
|
self.domain_name = domain_name
|
113
|
-
self.ip = ip
|
114
99
|
# The sock timeout will only apply to UDP send and not domain resolution
|
115
100
|
self.sock_timeout = 5
|
116
101
|
self.UDP_PORT = port
|
117
|
-
self.UDP_IP = None
|
118
102
|
self.procs = []
|
119
103
|
self.dfk = dfk
|
120
104
|
self.config = self.dfk.config
|
@@ -147,16 +131,13 @@ class UsageTracker:
|
|
147
131
|
|
148
132
|
return track
|
149
133
|
|
150
|
-
def construct_start_message(self):
|
134
|
+
def construct_start_message(self) -> str:
|
151
135
|
"""Collect preliminary run info at the start of the DFK.
|
152
136
|
|
153
137
|
Returns :
|
154
138
|
- Message dict dumped as json string, ready for UDP
|
155
139
|
"""
|
156
140
|
message = {'uuid': self.uuid,
|
157
|
-
'test': False, # this field previously indicated if parsl
|
158
|
-
# was being run in test mode, and is
|
159
|
-
# retained for protocol compatibility
|
160
141
|
'parsl_v': self.parsl_version,
|
161
142
|
'python_v': self.python_version,
|
162
143
|
'os': platform.system(),
|
@@ -165,7 +146,7 @@ class UsageTracker:
|
|
165
146
|
|
166
147
|
return json.dumps(message)
|
167
148
|
|
168
|
-
def construct_end_message(self):
|
149
|
+
def construct_end_message(self) -> str:
|
169
150
|
"""Collect the final run information at the time of DFK cleanup.
|
170
151
|
|
171
152
|
Returns:
|
@@ -181,35 +162,23 @@ class UsageTracker:
|
|
181
162
|
'end': time.time(),
|
182
163
|
't_apps': app_count,
|
183
164
|
'sites': site_count,
|
184
|
-
'
|
185
|
-
'failed': app_fails,
|
186
|
-
'test': False, # see comment in construct_start_message
|
165
|
+
'failed': app_fails
|
187
166
|
}
|
188
167
|
|
189
168
|
return json.dumps(message)
|
190
169
|
|
191
|
-
def send_UDP_message(self, message):
|
170
|
+
def send_UDP_message(self, message: str) -> None:
|
192
171
|
"""Send UDP message."""
|
193
|
-
x = 0
|
194
172
|
if self.tracking_enabled:
|
195
173
|
try:
|
196
|
-
proc = udp_messenger(self.domain_name, self.
|
174
|
+
proc = udp_messenger(self.domain_name, self.UDP_PORT, self.sock_timeout, message)
|
197
175
|
self.procs.append(proc)
|
198
176
|
except Exception as e:
|
199
177
|
logger.debug("Usage tracking failed: {}".format(e))
|
200
|
-
else:
|
201
|
-
x = -1
|
202
|
-
|
203
|
-
return x
|
204
178
|
|
205
|
-
def send_message(self) ->
|
179
|
+
def send_message(self) -> None:
|
206
180
|
"""Send message over UDP.
|
207
|
-
|
208
|
-
Returns:
|
209
|
-
time taken
|
210
181
|
"""
|
211
|
-
start = time.time()
|
212
|
-
message = None
|
213
182
|
if not self.initialized:
|
214
183
|
message = self.construct_start_message()
|
215
184
|
self.initialized = True
|
@@ -217,11 +186,16 @@ class UsageTracker:
|
|
217
186
|
message = self.construct_end_message()
|
218
187
|
|
219
188
|
self.send_UDP_message(message)
|
220
|
-
end = time.time()
|
221
|
-
|
222
|
-
return end - start
|
223
189
|
|
224
|
-
def close(self):
|
225
|
-
"""
|
190
|
+
def close(self, timeout: float = 10.0) -> None:
|
191
|
+
"""First give each process one timeout period to finish what it is
|
192
|
+
doing, then kill it (SIGKILL). There's no softer SIGTERM step,
|
193
|
+
because that adds one join period of delay for what is almost
|
194
|
+
definitely either: going to behave broadly the same as to SIGKILL,
|
195
|
+
or won't respond to SIGTERM.
|
196
|
+
"""
|
226
197
|
for proc in self.procs:
|
227
|
-
proc.
|
198
|
+
proc.join(timeout=timeout)
|
199
|
+
if proc.is_alive():
|
200
|
+
logger.info("Usage tracking process did not end itself; sending SIGKILL")
|
201
|
+
proc.kill()
|
parsl/utils.py
CHANGED
@@ -305,59 +305,36 @@ class Timer:
|
|
305
305
|
- name (str) : a base name to use when naming the started thread
|
306
306
|
"""
|
307
307
|
|
308
|
-
self.interval = interval
|
308
|
+
self.interval = max(0, interval)
|
309
309
|
self.cb_args = args
|
310
310
|
self.callback = callback
|
311
|
-
self._wake_up_time = time.time() + 1
|
312
311
|
|
313
312
|
self._kill_event = threading.Event()
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
313
|
+
tname = f"Timer-Thread-{id(self)}"
|
314
|
+
if name:
|
315
|
+
tname = f"{name}-{tname}"
|
316
|
+
self._thread = threading.Thread(
|
317
|
+
target=self._wake_up_timer, name=tname, daemon=True
|
318
|
+
)
|
320
319
|
self._thread.start()
|
321
320
|
|
322
|
-
def _wake_up_timer(self
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
Args:
|
327
|
-
- kill_event (threading.Event) : Event to wait on
|
328
|
-
"""
|
329
|
-
|
330
|
-
# Sleep till time to wake up
|
331
|
-
while True:
|
332
|
-
prev = self._wake_up_time
|
333
|
-
|
334
|
-
# Waiting for the event returns True only when the event
|
335
|
-
# is set, usually by the parent thread
|
336
|
-
time_to_die = kill_event.wait(float(max(prev - time.time(), 0)))
|
337
|
-
|
338
|
-
if time_to_die:
|
339
|
-
return
|
340
|
-
|
341
|
-
if prev == self._wake_up_time:
|
342
|
-
self.make_callback()
|
343
|
-
else:
|
344
|
-
print("Sleeping a bit more")
|
321
|
+
def _wake_up_timer(self) -> None:
|
322
|
+
while not self._kill_event.wait(self.interval):
|
323
|
+
self.make_callback()
|
345
324
|
|
346
325
|
def make_callback(self) -> None:
|
347
326
|
"""Makes the callback and resets the timer.
|
348
327
|
"""
|
349
|
-
self._wake_up_time = time.time() + self.interval
|
350
|
-
|
351
328
|
try:
|
352
329
|
self.callback(*self.cb_args)
|
353
330
|
except Exception:
|
354
331
|
logger.error("Callback threw an exception - logging and proceeding anyway", exc_info=True)
|
355
332
|
|
356
|
-
def close(self) -> None:
|
333
|
+
def close(self, timeout: Optional[float] = None) -> None:
|
357
334
|
"""Merge the threads and terminate.
|
358
335
|
"""
|
359
336
|
self._kill_event.set()
|
360
|
-
self._thread.join()
|
337
|
+
self._thread.join(timeout=timeout)
|
361
338
|
|
362
339
|
|
363
340
|
class AutoCancelTimer(threading.Timer):
|
parsl/version.py
CHANGED