parsl 2024.3.11__py3-none-any.whl → 2024.3.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. parsl/addresses.py +3 -1
  2. parsl/config.py +4 -0
  3. parsl/dataflow/dflow.py +14 -5
  4. parsl/executors/base.py +10 -0
  5. parsl/executors/high_throughput/executor.py +12 -0
  6. parsl/executors/high_throughput/interchange.py +30 -8
  7. parsl/executors/high_throughput/manager_record.py +1 -0
  8. parsl/executors/high_throughput/process_worker_pool.py +41 -5
  9. parsl/executors/status_handling.py +2 -9
  10. parsl/executors/taskvine/executor.py +24 -3
  11. parsl/executors/taskvine/manager.py +1 -0
  12. parsl/executors/taskvine/manager_config.py +3 -4
  13. parsl/executors/workqueue/executor.py +19 -0
  14. parsl/jobs/error_handlers.py +1 -1
  15. parsl/jobs/job_status_poller.py +8 -7
  16. parsl/launchers/launchers.py +6 -6
  17. parsl/log_utils.py +8 -4
  18. parsl/monitoring/db_manager.py +4 -2
  19. parsl/monitoring/monitoring.py +30 -264
  20. parsl/monitoring/router.py +208 -0
  21. parsl/monitoring/visualization/plots/default/workflow_plots.py +3 -0
  22. parsl/monitoring/visualization/views.py +2 -1
  23. parsl/providers/cluster_provider.py +1 -3
  24. parsl/tests/configs/user_opts.py +2 -1
  25. parsl/tests/test_htex/test_drain.py +78 -0
  26. parsl/tests/test_monitoring/test_app_names.py +86 -0
  27. parsl/tests/test_monitoring/test_fuzz_zmq.py +2 -2
  28. parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +82 -0
  29. parsl/tests/test_python_apps/test_context_manager.py +40 -0
  30. parsl/tests/test_scaling/test_scale_down_htex_auto_scale.py +1 -10
  31. parsl/tests/test_shutdown/__init__.py +0 -0
  32. parsl/tests/test_shutdown/test_kill_monitoring.py +65 -0
  33. parsl/utils.py +2 -2
  34. parsl/version.py +1 -1
  35. {parsl-2024.3.11.data → parsl-2024.3.25.data}/scripts/process_worker_pool.py +41 -5
  36. {parsl-2024.3.11.dist-info → parsl-2024.3.25.dist-info}/METADATA +4 -4
  37. {parsl-2024.3.11.dist-info → parsl-2024.3.25.dist-info}/RECORD +43 -36
  38. {parsl-2024.3.11.data → parsl-2024.3.25.data}/scripts/exec_parsl_function.py +0 -0
  39. {parsl-2024.3.11.data → parsl-2024.3.25.data}/scripts/parsl_coprocess.py +0 -0
  40. {parsl-2024.3.11.dist-info → parsl-2024.3.25.dist-info}/LICENSE +0 -0
  41. {parsl-2024.3.11.dist-info → parsl-2024.3.25.dist-info}/WHEEL +0 -0
  42. {parsl-2024.3.11.dist-info → parsl-2024.3.25.dist-info}/entry_points.txt +0 -0
  43. {parsl-2024.3.11.dist-info → parsl-2024.3.25.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,208 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import socket
5
+ import time
6
+ import pickle
7
+ import logging
8
+ import zmq
9
+
10
+ import queue
11
+
12
+ from parsl.log_utils import set_file_logger
13
+ from parsl.process_loggers import wrap_with_logs
14
+ from parsl.utils import setproctitle
15
+
16
+ from parsl.monitoring.message_type import MessageType
17
+ from parsl.monitoring.types import AddressedMonitoringMessage, TaggedMonitoringMessage
18
+ from typing import Optional, Tuple, Union
19
+
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class MonitoringRouter:
25
+
26
+ def __init__(self,
27
+ *,
28
+ hub_address: str,
29
+ udp_port: Optional[int] = None,
30
+ zmq_port_range: Tuple[int, int] = (55050, 56000),
31
+
32
+ monitoring_hub_address: str = "127.0.0.1",
33
+ logdir: str = ".",
34
+ run_id: str,
35
+ logging_level: int = logging.INFO,
36
+ atexit_timeout: int = 3 # in seconds
37
+ ):
38
+ """ Initializes a monitoring configuration class.
39
+
40
+ Parameters
41
+ ----------
42
+ hub_address : str
43
+ The ip address at which the workers will be able to reach the Hub.
44
+ udp_port : int
45
+ The specific port at which workers will be able to reach the Hub via UDP. Default: None
46
+ zmq_port_range : tuple(int, int)
47
+ The MonitoringHub picks ports at random from the range which will be used by Hub.
48
+ Default: (55050, 56000)
49
+ logdir : str
50
+ Parsl log directory paths. Logs and temp files go here. Default: '.'
51
+ logging_level : int
52
+ Logging level as defined in the logging module. Default: logging.INFO
53
+ atexit_timeout : float, optional
54
+ The amount of time in seconds to terminate the hub without receiving any messages, after the last dfk workflow message is received.
55
+
56
+ """
57
+ os.makedirs(logdir, exist_ok=True)
58
+ self.logger = set_file_logger("{}/monitoring_router.log".format(logdir),
59
+ name="monitoring_router",
60
+ level=logging_level)
61
+ self.logger.debug("Monitoring router starting")
62
+
63
+ self.hub_address = hub_address
64
+ self.atexit_timeout = atexit_timeout
65
+ self.run_id = run_id
66
+
67
+ self.loop_freq = 10.0 # milliseconds
68
+
69
+ # Initialize the UDP socket
70
+ self.udp_sock = socket.socket(socket.AF_INET,
71
+ socket.SOCK_DGRAM,
72
+ socket.IPPROTO_UDP)
73
+
74
+ # We are trying to bind to all interfaces with 0.0.0.0
75
+ if not udp_port:
76
+ self.udp_sock.bind(('0.0.0.0', 0))
77
+ self.udp_port = self.udp_sock.getsockname()[1]
78
+ else:
79
+ self.udp_port = udp_port
80
+ try:
81
+ self.udp_sock.bind(('0.0.0.0', self.udp_port))
82
+ except Exception as e:
83
+ raise RuntimeError(f"Could not bind to udp_port {udp_port} because: {e}")
84
+ self.udp_sock.settimeout(self.loop_freq / 1000)
85
+ self.logger.info("Initialized the UDP socket on 0.0.0.0:{}".format(self.udp_port))
86
+
87
+ self._context = zmq.Context()
88
+ self.zmq_receiver_channel = self._context.socket(zmq.DEALER)
89
+ self.zmq_receiver_channel.setsockopt(zmq.LINGER, 0)
90
+ self.zmq_receiver_channel.set_hwm(0)
91
+ self.zmq_receiver_channel.RCVTIMEO = int(self.loop_freq) # in milliseconds
92
+ self.logger.debug("hub_address: {}. zmq_port_range {}".format(hub_address, zmq_port_range))
93
+ self.zmq_receiver_port = self.zmq_receiver_channel.bind_to_random_port("tcp://*",
94
+ min_port=zmq_port_range[0],
95
+ max_port=zmq_port_range[1])
96
+
97
+ def start(self,
98
+ priority_msgs: "queue.Queue[AddressedMonitoringMessage]",
99
+ node_msgs: "queue.Queue[AddressedMonitoringMessage]",
100
+ block_msgs: "queue.Queue[AddressedMonitoringMessage]",
101
+ resource_msgs: "queue.Queue[AddressedMonitoringMessage]") -> None:
102
+ try:
103
+ router_keep_going = True
104
+ while router_keep_going:
105
+ try:
106
+ data, addr = self.udp_sock.recvfrom(2048)
107
+ resource_msg = pickle.loads(data)
108
+ self.logger.debug("Got UDP Message from {}: {}".format(addr, resource_msg))
109
+ resource_msgs.put((resource_msg, addr))
110
+ except socket.timeout:
111
+ pass
112
+
113
+ try:
114
+ dfk_loop_start = time.time()
115
+ while time.time() - dfk_loop_start < 1.0: # TODO make configurable
116
+ # note that nothing checks that msg really is of the annotated type
117
+ msg: TaggedMonitoringMessage
118
+ msg = self.zmq_receiver_channel.recv_pyobj()
119
+
120
+ assert isinstance(msg, tuple), "ZMQ Receiver expects only tuples, got {}".format(msg)
121
+ assert len(msg) >= 1, "ZMQ Receiver expects tuples of length at least 1, got {}".format(msg)
122
+ assert len(msg) == 2, "ZMQ Receiver expects message tuples of exactly length 2, got {}".format(msg)
123
+
124
+ msg_0: AddressedMonitoringMessage
125
+ msg_0 = (msg, 0)
126
+
127
+ if msg[0] == MessageType.NODE_INFO:
128
+ msg[1]['run_id'] = self.run_id
129
+ node_msgs.put(msg_0)
130
+ elif msg[0] == MessageType.RESOURCE_INFO:
131
+ resource_msgs.put(msg_0)
132
+ elif msg[0] == MessageType.BLOCK_INFO:
133
+ block_msgs.put(msg_0)
134
+ elif msg[0] == MessageType.TASK_INFO:
135
+ priority_msgs.put(msg_0)
136
+ elif msg[0] == MessageType.WORKFLOW_INFO:
137
+ priority_msgs.put(msg_0)
138
+ if 'exit_now' in msg[1] and msg[1]['exit_now']:
139
+ router_keep_going = False
140
+ else:
141
+ # There is a type: ignore here because if msg[0]
142
+ # is of the correct type, this code is unreachable,
143
+ # but there is no verification that the message
144
+ # received from zmq_receiver_channel.recv_pyobj() is actually
145
+ # of that type.
146
+ self.logger.error("Discarding message " # type: ignore[unreachable]
147
+ f"from interchange with unknown type {msg[0].value}")
148
+ except zmq.Again:
149
+ pass
150
+ except Exception:
151
+ # This will catch malformed messages. What happens if the
152
+ # channel is broken in such a way that it always raises
153
+ # an exception? Looping on this would maybe be the wrong
154
+ # thing to do.
155
+ self.logger.warning("Failure processing a ZMQ message", exc_info=True)
156
+
157
+ self.logger.info("Monitoring router draining")
158
+ last_msg_received_time = time.time()
159
+ while time.time() - last_msg_received_time < self.atexit_timeout:
160
+ try:
161
+ data, addr = self.udp_sock.recvfrom(2048)
162
+ msg = pickle.loads(data)
163
+ self.logger.debug("Got UDP Message from {}: {}".format(addr, msg))
164
+ resource_msgs.put((msg, addr))
165
+ last_msg_received_time = time.time()
166
+ except socket.timeout:
167
+ pass
168
+
169
+ self.logger.info("Monitoring router finishing normally")
170
+ finally:
171
+ self.logger.info("Monitoring router finished")
172
+
173
+
174
+ @wrap_with_logs
175
+ def router_starter(comm_q: "queue.Queue[Union[Tuple[int, int], str]]",
176
+ exception_q: "queue.Queue[Tuple[str, str]]",
177
+ priority_msgs: "queue.Queue[AddressedMonitoringMessage]",
178
+ node_msgs: "queue.Queue[AddressedMonitoringMessage]",
179
+ block_msgs: "queue.Queue[AddressedMonitoringMessage]",
180
+ resource_msgs: "queue.Queue[AddressedMonitoringMessage]",
181
+
182
+ hub_address: str,
183
+ udp_port: Optional[int],
184
+ zmq_port_range: Tuple[int, int],
185
+
186
+ logdir: str,
187
+ logging_level: int,
188
+ run_id: str) -> None:
189
+ setproctitle("parsl: monitoring router")
190
+ try:
191
+ router = MonitoringRouter(hub_address=hub_address,
192
+ udp_port=udp_port,
193
+ zmq_port_range=zmq_port_range,
194
+ logdir=logdir,
195
+ logging_level=logging_level,
196
+ run_id=run_id)
197
+ except Exception as e:
198
+ logger.error("MonitoringRouter construction failed.", exc_info=True)
199
+ comm_q.put(f"Monitoring router construction failed: {e}")
200
+ else:
201
+ comm_q.put((router.udp_port, router.zmq_receiver_port))
202
+
203
+ router.logger.info("Starting MonitoringRouter in router_starter")
204
+ try:
205
+ router.start(priority_msgs, node_msgs, block_msgs, resource_msgs)
206
+ except Exception as e:
207
+ router.logger.exception("router.start exception")
208
+ exception_q.put(('Hub', str(e)))
@@ -27,6 +27,9 @@ gantt_colors = {'unsched': 'rgb(240, 240, 240)',
27
27
 
28
28
  def task_gantt_plot(df_task, df_status, time_completed=None):
29
29
 
30
+ if df_task.empty:
31
+ return None
32
+
30
33
  # if the workflow is not recorded as completed, then assume
31
34
  # that tasks should continue in their last state until now,
32
35
  # rather than the workflow end time.
@@ -8,7 +8,8 @@ from parsl.monitoring.visualization.models import Workflow, Task, Status, db
8
8
 
9
9
  from parsl.monitoring.visualization.plots.default.workflow_plots import task_gantt_plot, task_per_app_plot, workflow_dag_plot
10
10
  from parsl.monitoring.visualization.plots.default.task_plots import time_series_memory_per_task_plot
11
- from parsl.monitoring.visualization.plots.default.workflow_resource_plots import resource_distribution_plot, resource_efficiency, worker_efficiency
11
+ from parsl.monitoring.visualization.plots.default.workflow_resource_plots import (resource_distribution_plot,
12
+ resource_efficiency, worker_efficiency)
12
13
 
13
14
  dummy = True
14
15
 
@@ -91,7 +91,7 @@ class ClusterProvider(ExecutionProvider):
91
91
  - configs (dict) : configs that get pushed into the template
92
92
 
93
93
  Returns:
94
- - True: on success
94
+ - None
95
95
 
96
96
  Raises:
97
97
  SchedulerMissingArgs : If template is missing args
@@ -117,8 +117,6 @@ class ClusterProvider(ExecutionProvider):
117
117
  logger.error("Uncategorized error: %s", e)
118
118
  raise e
119
119
 
120
- return True
121
-
122
120
  @abstractmethod
123
121
  def _status(self):
124
122
  pass
@@ -60,7 +60,8 @@ user_opts = {
60
60
  # 'username': OSG_USERNAME,
61
61
  # 'script_dir': '/home/{}/parsl_scripts'.format(OSG_USERNAME),
62
62
  # 'scheduler_options': "",
63
- # 'worker_init' : 'module load python/3.5.2; python3 -m venv parsl_env; source parsl_env/bin/activate; python3 -m pip install parsl==0.5.2'
63
+ # 'worker_init' : 'module load python/3.5.2; python3 -m venv parsl_env;
64
+ # source parsl_env/bin/activate; python3 -m pip install parsl==0.5.2'
64
65
  # },
65
66
  # 'swan': {
66
67
  # 'username': SWAN_USERNAME,
@@ -0,0 +1,78 @@
1
+ import parsl
2
+ import pytest
3
+ import time
4
+
5
+ from parsl.providers import LocalProvider
6
+ from parsl.channels import LocalChannel
7
+ from parsl.launchers import SimpleLauncher
8
+
9
+ from parsl.config import Config
10
+ from parsl.executors import HighThroughputExecutor
11
+
12
+ # this constant is used to scale some durations that happen
13
+ # based around the expected drain period: the drain period
14
+ # is TIME_CONST seconds, and the single executed task will
15
+ # last twice that many number of seconds.
16
+ TIME_CONST = 1
17
+
18
+
19
+ def local_config():
20
+ return Config(
21
+ executors=[
22
+ HighThroughputExecutor(
23
+ label="htex_local",
24
+ drain_period=TIME_CONST,
25
+ worker_debug=True,
26
+ cores_per_worker=1,
27
+ encrypted=True,
28
+ provider=LocalProvider(
29
+ channel=LocalChannel(),
30
+ init_blocks=1,
31
+ min_blocks=0,
32
+ max_blocks=0,
33
+ launcher=SimpleLauncher(),
34
+ ),
35
+ )
36
+ ],
37
+ strategy='none',
38
+ )
39
+
40
+
41
+ @parsl.python_app
42
+ def f(n):
43
+ import time
44
+ time.sleep(n)
45
+
46
+
47
+ @pytest.mark.local
48
+ def test_drain(try_assert):
49
+
50
+ htex = parsl.dfk().executors['htex_local']
51
+
52
+ # wait till we have a block running...
53
+
54
+ try_assert(lambda: len(htex.connected_managers()) == 1)
55
+
56
+ managers = htex.connected_managers()
57
+ assert managers[0]['active'], "The manager should be active"
58
+ assert not managers[0]['draining'], "The manager should not be draining"
59
+
60
+ fut = f(TIME_CONST * 2)
61
+
62
+ time.sleep(TIME_CONST)
63
+
64
+ # this assert should happen *very fast* after the above delay...
65
+ try_assert(lambda: htex.connected_managers()[0]['draining'], timeout_ms=500)
66
+
67
+ # and the test task should still be running...
68
+ assert not fut.done(), "The test task should still be running"
69
+
70
+ fut.result()
71
+
72
+ # and now we should see the manager disappear...
73
+ # ... with strategy='none', this should be coming from draining but
74
+ # that information isn't immediately obvious from the absence in
75
+ # connected managers.
76
+ # As with the above draining assert, this should happen very fast after
77
+ # the task ends.
78
+ try_assert(lambda: len(htex.connected_managers()) == 0, timeout_ms=500)
@@ -0,0 +1,86 @@
1
+ """Tests monitoring records app name under various decoration patterns.
2
+ """
3
+
4
+ import os
5
+ import parsl
6
+ import pytest
7
+ import time
8
+
9
+ from parsl.tests.configs.htex_local_alternate import fresh_config
10
+
11
+
12
+ @parsl.python_app
13
+ def regular_decorated_app():
14
+ return 5
15
+
16
+
17
+ @pytest.mark.local
18
+ def get_regular_decorated_app():
19
+ return regular_decorated_app
20
+
21
+
22
+ def for_decoration_later():
23
+ return 77
24
+
25
+
26
+ def get_for_decoration_later():
27
+ return parsl.python_app(for_decoration_later)
28
+
29
+
30
+ def get_decorated_closure():
31
+
32
+ r = 53
33
+
34
+ @parsl.python_app
35
+ def decorated_closure():
36
+ return r
37
+
38
+ return decorated_closure
39
+
40
+
41
+ @pytest.mark.local
42
+ @pytest.mark.parametrize("get_app,expected_name,expected_result",
43
+ [(get_regular_decorated_app, "regular_decorated_app", 5),
44
+ (get_for_decoration_later, "for_decoration_later", 77),
45
+ (get_decorated_closure, "decorated_closure", 53)
46
+ ])
47
+ def test_app_name(get_app, expected_name, expected_result, tmpd_cwd):
48
+
49
+ # this is imported here rather than at module level because
50
+ # it isn't available in a plain parsl install, so this module
51
+ # would otherwise fail to import and break even a basic test
52
+ # run.
53
+ import sqlalchemy
54
+
55
+ c = fresh_config()
56
+ c.run_dir = tmpd_cwd
57
+ c.monitoring.logging_endpoint = f"sqlite:///{tmpd_cwd}/monitoring.db"
58
+ parsl.load(c)
59
+
60
+ app = get_app()
61
+ assert app().result() == expected_result
62
+
63
+ parsl.dfk().cleanup()
64
+ parsl.clear()
65
+
66
+ engine = sqlalchemy.create_engine(c.monitoring.logging_endpoint)
67
+ with engine.begin() as connection:
68
+
69
+ def count_rows(table: str):
70
+ result = connection.execute(f"SELECT COUNT(*) FROM {table}")
71
+ (c, ) = result.first()
72
+ return c
73
+
74
+ # one workflow...
75
+ assert count_rows("workflow") == 1
76
+
77
+ # ... with one task ...
78
+ assert count_rows("task") == 1
79
+
80
+ # ... that was tried once ...
81
+ assert count_rows("try") == 1
82
+
83
+ # ... and has the expected name.
84
+ result = connection.execute("SELECT task_func_name FROM task")
85
+ (c, ) = result.first()
86
+ assert c == expected_name
@@ -41,11 +41,11 @@ def test_row_counts():
41
41
 
42
42
  # dig out the interchange port...
43
43
  hub_address = parsl.dfk().hub_address
44
- hub_interchange_port = parsl.dfk().hub_interchange_port
44
+ hub_zmq_port = parsl.dfk().hub_zmq_port
45
45
 
46
46
  # this will send a string to a new socket connection
47
47
  with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
48
- s.connect((hub_address, hub_interchange_port))
48
+ s.connect((hub_address, hub_zmq_port))
49
49
  s.sendall(b'fuzzing\r')
50
50
 
51
51
  # this will send a non-object down the DFK's existing ZMQ connection
@@ -0,0 +1,82 @@
1
+ import logging
2
+ import os
3
+ import parsl
4
+ import pytest
5
+ import time
6
+
7
+ from parsl.providers import LocalProvider
8
+ from parsl.channels import LocalChannel
9
+ from parsl.launchers import SimpleLauncher
10
+
11
+ from parsl.config import Config
12
+ from parsl.executors import HighThroughputExecutor
13
+ from parsl.monitoring import MonitoringHub
14
+
15
+
16
+ def fresh_config(run_dir, strategy, db_url):
17
+ return Config(
18
+ run_dir=os.fspath(run_dir),
19
+ executors=[
20
+ HighThroughputExecutor(
21
+ label="htex_local",
22
+ cores_per_worker=1,
23
+ encrypted=True,
24
+ provider=LocalProvider(
25
+ channel=LocalChannel(),
26
+ init_blocks=1,
27
+ # min and max are set to 0 to ensure that we don't get
28
+ # a block from ongoing strategy scaling, only from
29
+ # init_blocks
30
+ min_blocks=0,
31
+ max_blocks=0,
32
+ launcher=SimpleLauncher(),
33
+ ),
34
+ )
35
+ ],
36
+ strategy=strategy,
37
+ strategy_period=0.1,
38
+ monitoring=MonitoringHub(
39
+ hub_address="localhost",
40
+ hub_port=55055,
41
+ logging_endpoint=db_url
42
+ )
43
+ )
44
+
45
+
46
+ @parsl.python_app
47
+ def this_app():
48
+ pass
49
+
50
+
51
+ @pytest.mark.local
52
+ @pytest.mark.parametrize("strategy", ('none', 'simple', 'htex_auto_scale'))
53
+ def test_row_counts(tmpd_cwd, strategy):
54
+ # this is imported here rather than at module level because
55
+ # it isn't available in a plain parsl install, so this module
56
+ # would otherwise fail to import and break even a basic test
57
+ # run.
58
+ import sqlalchemy
59
+ from sqlalchemy import text
60
+
61
+ db_url = f"sqlite:///{tmpd_cwd}/monitoring.db"
62
+ parsl.load(fresh_config(tmpd_cwd, strategy, db_url))
63
+
64
+ this_app().result()
65
+
66
+ parsl.dfk().cleanup()
67
+ parsl.clear()
68
+
69
+ engine = sqlalchemy.create_engine(db_url)
70
+ with engine.begin() as connection:
71
+
72
+ result = connection.execute(text("SELECT COUNT(DISTINCT block_id) FROM block"))
73
+ (c, ) = result.first()
74
+ assert c == 1, "We should see a single block in this database"
75
+
76
+ result = connection.execute(text("SELECT COUNT(*) FROM block WHERE block_id = 0 AND status = 'PENDING'"))
77
+ (c, ) = result.first()
78
+ assert c == 1, "There should be a single pending status"
79
+
80
+ result = connection.execute(text("SELECT COUNT(*) FROM block WHERE block_id = 0 AND status = 'CANCELLED'"))
81
+ (c, ) = result.first()
82
+ assert c == 1, "There should be a single cancelled status"
@@ -0,0 +1,40 @@
1
+ import parsl
2
+ from parsl.tests.configs.local_threads import fresh_config
3
+ import pytest
4
+ from parsl.errors import NoDataFlowKernelError
5
+
6
+
7
+ @parsl.python_app
8
+ def square(x):
9
+ return x * x
10
+
11
+
12
+ @parsl.bash_app
13
+ def foo(x, stdout='foo.stdout'):
14
+ return f"echo {x + 1}"
15
+
16
+
17
+ def local_setup():
18
+ pass
19
+
20
+
21
+ def local_teardown():
22
+ parsl.clear()
23
+
24
+
25
+ @pytest.mark.local
26
+ def test_within_context_manger():
27
+ config = fresh_config()
28
+ with parsl.load(config=config):
29
+ py_future = square(2)
30
+ assert py_future.result() == 4
31
+
32
+ bash_future = foo(1)
33
+ assert bash_future.result() == 0
34
+
35
+ with open('foo.stdout', 'r') as f:
36
+ assert f.read() == "2\n"
37
+
38
+ with pytest.raises(NoDataFlowKernelError) as excinfo:
39
+ square(2).result()
40
+ assert str(excinfo.value) == "Cannot submit to a DFK that has been cleaned up"
@@ -37,6 +37,7 @@ def local_config():
37
37
  ],
38
38
  max_idletime=0.5,
39
39
  strategy='htex_auto_scale',
40
+ strategy_period=0.1
40
41
  )
41
42
 
42
43
 
@@ -62,16 +63,6 @@ def waiting_app(ident: int, outputs=(), inputs=()):
62
63
  def test_scale_out(tmpd_cwd, try_assert):
63
64
  dfk = parsl.dfk()
64
65
 
65
- # reconfigure scaling strategy to run faster than usual. This allows
66
- # this test to complete faster - at time of writing 27s with default
67
- # 5s strategy, vs XXXX with 0.5s strategy.
68
-
69
- # check this attribute still exists, in the presence of ongoing
70
- # development, so we have some belief that setting it will not be
71
- # setting a now-ignored parameter.
72
- assert hasattr(dfk.job_status_poller, 'interval')
73
- dfk.job_status_poller.interval = 0.1
74
-
75
66
  num_managers = len(dfk.executors['htex_local'].connected_managers())
76
67
 
77
68
  assert num_managers == 0, "Expected 0 managers at start"
File without changes
@@ -0,0 +1,65 @@
1
+ import os
2
+ import parsl
3
+ import pytest
4
+ import signal
5
+ import time
6
+
7
+ from parsl.tests.configs.htex_local_alternate import fresh_config
8
+
9
+ # This is a very generous upper bound on expected shutdown time of target
10
+ # process after receiving a signal, measured in seconds.
11
+ PERMITTED_SHUTDOWN_TIME_S = 60
12
+
13
+
14
+ @parsl.python_app
15
+ def simple_app():
16
+ return True
17
+
18
+
19
+ @pytest.mark.local
20
+ def test_no_kills():
21
+ """This tests that we can create a monitoring-enabled DFK and shut it down."""
22
+
23
+ parsl.load(fresh_config())
24
+
25
+ assert parsl.dfk().monitoring is not None, "This test requires monitoring"
26
+
27
+ parsl.dfk().cleanup()
28
+ parsl.clear()
29
+
30
+
31
+ @pytest.mark.local
32
+ @pytest.mark.parametrize("sig", [signal.SIGINT, signal.SIGTERM, signal.SIGKILL, signal.SIGQUIT])
33
+ @pytest.mark.parametrize("process_attr", ["router_proc", "dbm_proc"])
34
+ def test_kill_monitoring_helper_process(sig, process_attr, try_assert):
35
+ """This tests that we can kill a monitoring process and still have successful shutdown.
36
+ SIGINT emulates some racy behaviour when ctrl-C is pressed: that
37
+ monitoring processes receive a ctrl-C too, and so the other processes
38
+ need to be tolerant to monitoring processes arbitrarily exiting.
39
+ """
40
+
41
+ parsl.load(fresh_config())
42
+
43
+ dfk = parsl.dfk()
44
+
45
+ assert dfk.monitoring is not None, "Monitoring required"
46
+
47
+ target_proc = getattr(dfk.monitoring, process_attr)
48
+
49
+ assert target_proc is not None, "prereq: target process must exist"
50
+ assert target_proc.is_alive(), "prereq: target process must be alive"
51
+
52
+ target_pid = target_proc.pid
53
+ assert target_pid is not None, "prereq: target process must have a pid"
54
+
55
+ os.kill(target_pid, sig)
56
+
57
+ try_assert(lambda: not target_proc.is_alive(), timeout_ms=PERMITTED_SHUTDOWN_TIME_S * 1000)
58
+
59
+ # now we have broken one piece of the monitoring system, do some app
60
+ # execution and then shut down.
61
+
62
+ simple_app().result()
63
+
64
+ parsl.dfk().cleanup()
65
+ parsl.clear()
parsl/utils.py CHANGED
@@ -296,12 +296,12 @@ class Timer:
296
296
 
297
297
  """
298
298
 
299
- def __init__(self, callback: Callable, *args: Any, interval: int = 5, name: Optional[str] = None) -> None:
299
+ def __init__(self, callback: Callable, *args: Any, interval: Union[float, int] = 5, name: Optional[str] = None) -> None:
300
300
  """Initialize the Timer object.
301
301
  We start the timer thread here
302
302
 
303
303
  KWargs:
304
- - interval (int) : number of seconds between callback events
304
+ - interval (int or float) : number of seconds between callback events
305
305
  - name (str) : a base name to use when naming the started thread
306
306
  """
307
307
 
parsl/version.py CHANGED
@@ -3,4 +3,4 @@
3
3
  Year.Month.Day[alpha/beta/..]
4
4
  Alphas will be numbered like this -> 2024.12.10a0
5
5
  """
6
- VERSION = '2024.03.11'
6
+ VERSION = '2024.03.25'