parsl 2024.5.20__py3-none-any.whl → 2024.5.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. parsl/config.py +7 -1
  2. parsl/dataflow/dependency_resolvers.py +115 -0
  3. parsl/dataflow/dflow.py +44 -38
  4. parsl/executors/high_throughput/errors.py +10 -0
  5. parsl/executors/high_throughput/executor.py +2 -1
  6. parsl/executors/high_throughput/mpi_executor.py +1 -1
  7. parsl/executors/high_throughput/mpi_prefix_composer.py +18 -2
  8. parsl/executors/high_throughput/zmq_pipes.py +36 -2
  9. parsl/executors/radical/rpex_resources.py +3 -7
  10. parsl/tests/conftest.py +2 -2
  11. parsl/tests/sites/test_dynamic_executor.py +0 -1
  12. parsl/tests/test_bash_apps/test_std_uri.py +0 -6
  13. parsl/tests/test_checkpointing/test_periodic.py +2 -7
  14. parsl/tests/test_checkpointing/test_python_checkpoint_2.py +0 -1
  15. parsl/tests/test_checkpointing/test_python_checkpoint_3.py +0 -1
  16. parsl/tests/test_checkpointing/test_task_exit.py +0 -1
  17. parsl/tests/test_htex/test_basic.py +0 -1
  18. parsl/tests/test_htex/test_command_client_timeout.py +69 -0
  19. parsl/tests/test_htex/test_cpu_affinity_explicit.py +1 -8
  20. parsl/tests/test_htex/test_manager_failure.py +0 -1
  21. parsl/tests/test_htex/test_managers_command.py +2 -7
  22. parsl/tests/test_htex/test_missing_worker.py +2 -8
  23. parsl/tests/test_monitoring/test_app_names.py +0 -1
  24. parsl/tests/test_monitoring/test_basic.py +0 -2
  25. parsl/tests/test_monitoring/test_db_locks.py +0 -1
  26. parsl/tests/test_monitoring/test_fuzz_zmq.py +0 -1
  27. parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +0 -2
  28. parsl/tests/test_monitoring/test_incomplete_futures.py +0 -1
  29. parsl/tests/test_monitoring/test_memoization_representation.py +0 -1
  30. parsl/tests/test_monitoring/test_stdouterr.py +0 -2
  31. parsl/tests/test_mpi_apps/test_mpi_mode_disabled.py +2 -7
  32. parsl/tests/test_mpi_apps/test_mpi_mode_enabled.py +10 -1
  33. parsl/tests/test_mpi_apps/test_resource_spec.py +14 -9
  34. parsl/tests/test_python_apps/test_context_manager.py +1 -9
  35. parsl/tests/test_python_apps/test_lifted.py +10 -6
  36. parsl/tests/test_python_apps/test_pluggable_future_resolution.py +161 -0
  37. parsl/tests/test_scaling/test_regression_1621.py +0 -2
  38. parsl/tests/test_scaling/test_shutdown_scalein.py +0 -2
  39. parsl/tests/test_serialization/test_proxystore_configured.py +0 -1
  40. parsl/tests/test_shutdown/test_kill_monitoring.py +0 -2
  41. parsl/tests/test_staging/test_1316.py +0 -2
  42. parsl/tests/test_staging/test_elaborate_noop_file.py +0 -1
  43. parsl/tests/test_summary.py +0 -1
  44. parsl/tests/test_threads/test_configs.py +0 -1
  45. parsl/tests/test_threads/test_lazy_errors.py +0 -1
  46. parsl/version.py +1 -1
  47. {parsl-2024.5.20.dist-info → parsl-2024.5.27.dist-info}/METADATA +6 -6
  48. {parsl-2024.5.20.dist-info → parsl-2024.5.27.dist-info}/RECORD +55 -52
  49. {parsl-2024.5.20.data → parsl-2024.5.27.data}/scripts/exec_parsl_function.py +0 -0
  50. {parsl-2024.5.20.data → parsl-2024.5.27.data}/scripts/parsl_coprocess.py +0 -0
  51. {parsl-2024.5.20.data → parsl-2024.5.27.data}/scripts/process_worker_pool.py +0 -0
  52. {parsl-2024.5.20.dist-info → parsl-2024.5.27.dist-info}/LICENSE +0 -0
  53. {parsl-2024.5.20.dist-info → parsl-2024.5.27.dist-info}/WHEEL +0 -0
  54. {parsl-2024.5.20.dist-info → parsl-2024.5.27.dist-info}/entry_points.txt +0 -0
  55. {parsl-2024.5.20.dist-info → parsl-2024.5.27.dist-info}/top_level.txt +0 -0
parsl/config.py CHANGED
@@ -5,6 +5,7 @@ from typing import Callable, Iterable, Optional, Sequence, Union
5
5
  from typing_extensions import Literal
6
6
 
7
7
  from parsl.utils import RepresentationMixin
8
+ from parsl.dataflow.dependency_resolvers import DependencyResolver
8
9
  from parsl.executors.base import ParslExecutor
9
10
  from parsl.executors.threads import ThreadPoolExecutor
10
11
  from parsl.errors import ConfigurationError
@@ -35,6 +36,8 @@ class Config(RepresentationMixin, UsageInformation):
35
36
  checkpoint_period : str, optional
36
37
  Time interval (in "HH:MM:SS") at which to checkpoint completed tasks. Only has an effect if
37
38
  ``checkpoint_mode='periodic'``.
39
+ dependency_resolver: plugin point for custom dependency resolvers. Default: only resolve Futures,
40
+ using the `SHALLOW_DEPENDENCY_RESOLVER`.
38
41
  garbage_collect : bool. optional.
39
42
  Delete task records from DFK when tasks have completed. Default: True
40
43
  internal_tasks_max_threads : int, optional
@@ -88,6 +91,7 @@ class Config(RepresentationMixin, UsageInformation):
88
91
  Literal['dfk_exit'],
89
92
  Literal['manual']] = None,
90
93
  checkpoint_period: Optional[str] = None,
94
+ dependency_resolver: Optional[DependencyResolver] = None,
91
95
  garbage_collect: bool = True,
92
96
  internal_tasks_max_threads: int = 10,
93
97
  retries: int = 0,
@@ -123,6 +127,7 @@ class Config(RepresentationMixin, UsageInformation):
123
127
  if checkpoint_mode == 'periodic' and checkpoint_period is None:
124
128
  checkpoint_period = "00:30:00"
125
129
  self.checkpoint_period = checkpoint_period
130
+ self.dependency_resolver = dependency_resolver
126
131
  self.garbage_collect = garbage_collect
127
132
  self.internal_tasks_max_threads = internal_tasks_max_threads
128
133
  self.retries = retries
@@ -152,4 +157,5 @@ class Config(RepresentationMixin, UsageInformation):
152
157
  ', '.join(['label={}'.format(repr(d)) for d in duplicates])))
153
158
 
154
159
  def get_usage_information(self):
155
- return {"executors_len": len(self.executors)}
160
+ return {"executors_len": len(self.executors),
161
+ "dependency_resolver": self.dependency_resolver is not None}
@@ -0,0 +1,115 @@
1
+ from concurrent.futures import Future
2
+ from dataclasses import dataclass
3
+ from functools import singledispatch
4
+ from typing import Callable, Sequence
5
+
6
+
7
+ @dataclass
8
+ class DependencyResolver:
9
+ """A DependencyResolver describes how app dependencies can be resolved.
10
+ It is specified as two functions: `traverse_to_gather` which turns an
11
+ app parameter into a sequence of futures which must be waited for before
12
+ the task can be executed (for example, in the case of
13
+ `DEEP_DEPENDENCY_RESOLVER` this traverses structures such as lists to
14
+ find every contained ``Future``), and `traverse_to_unwrap` which turns an
15
+ app parameter into its value to be passed to the app on execution
16
+ (for example in the case of `DEEP_DEPENDENCY_RESOLVER` this replaces a
17
+ list containing futures with a new list containing the values of those
18
+ resolved futures).
19
+
20
+ By default, Parsl will use `SHALLOW_DEPENDENCY_RESOLVER` which only
21
+ resolves Futures passed directly as arguments.
22
+ """
23
+ traverse_to_gather: Callable[[object], Sequence[Future]]
24
+ traverse_to_unwrap: Callable[[object], object]
25
+
26
+
27
+ @singledispatch
28
+ def shallow_traverse_to_gather(o):
29
+ # objects in general do not expose futures that we can see
30
+ return []
31
+
32
+
33
+ @singledispatch
34
+ def shallow_traverse_to_unwrap(o):
35
+ # objects in general unwrap to themselves
36
+ return o
37
+
38
+
39
+ @shallow_traverse_to_gather.register
40
+ def _(fut: Future):
41
+ return [fut]
42
+
43
+
44
+ @shallow_traverse_to_unwrap.register
45
+ @singledispatch
46
+ def _(fut: Future):
47
+ assert fut.done()
48
+ return fut.result()
49
+
50
+
51
+ @singledispatch
52
+ def deep_traverse_to_gather(o):
53
+ # objects in general do not expose futures that we can see
54
+ return []
55
+
56
+
57
+ @singledispatch
58
+ def deep_traverse_to_unwrap(o):
59
+ # objects in general unwrap to themselves
60
+ return o
61
+
62
+
63
+ @deep_traverse_to_gather.register
64
+ def _(fut: Future):
65
+ return [fut]
66
+
67
+
68
+ @deep_traverse_to_unwrap.register
69
+ @singledispatch
70
+ def _(fut: Future):
71
+ assert fut.done()
72
+ return fut.result()
73
+
74
+
75
+ @deep_traverse_to_gather.register(tuple)
76
+ @deep_traverse_to_gather.register(list)
77
+ @deep_traverse_to_gather.register(set)
78
+ def _(iterable):
79
+ return [e for v in iterable for e in deep_traverse_to_gather(v)]
80
+
81
+
82
+ @deep_traverse_to_unwrap.register(tuple)
83
+ @deep_traverse_to_unwrap.register(list)
84
+ @deep_traverse_to_unwrap.register(set)
85
+ @singledispatch
86
+ def _(iterable):
87
+
88
+ type_ = type(iterable)
89
+ return type_(map(deep_traverse_to_unwrap, iterable))
90
+
91
+
92
+ @deep_traverse_to_gather.register(dict)
93
+ def _(dictionary):
94
+ futures = []
95
+ for key, value in dictionary.items():
96
+ futures.extend(deep_traverse_to_gather(key))
97
+ futures.extend(deep_traverse_to_gather(value))
98
+ return futures
99
+
100
+
101
+ @deep_traverse_to_unwrap.register(dict)
102
+ def _(dictionary):
103
+ unwrapped_dict = {}
104
+ for key, value in dictionary.items():
105
+ key = deep_traverse_to_unwrap(key)
106
+ value = deep_traverse_to_unwrap(value)
107
+ unwrapped_dict[key] = value
108
+ return unwrapped_dict
109
+
110
+
111
+ DEEP_DEPENDENCY_RESOLVER = DependencyResolver(traverse_to_gather=deep_traverse_to_gather,
112
+ traverse_to_unwrap=deep_traverse_to_unwrap)
113
+
114
+ SHALLOW_DEPENDENCY_RESOLVER = DependencyResolver(traverse_to_gather=shallow_traverse_to_gather,
115
+ traverse_to_unwrap=shallow_traverse_to_unwrap)
parsl/dataflow/dflow.py CHANGED
@@ -26,6 +26,7 @@ from parsl.channels import Channel
26
26
  from parsl.config import Config
27
27
  from parsl.data_provider.data_manager import DataManager
28
28
  from parsl.data_provider.files import File
29
+ from parsl.dataflow.dependency_resolvers import SHALLOW_DEPENDENCY_RESOLVER
29
30
  from parsl.dataflow.errors import BadCheckpoint, DependencyError, JoinError
30
31
  from parsl.dataflow.futures import AppFuture
31
32
  from parsl.dataflow.memoization import Memoizer
@@ -203,6 +204,9 @@ class DataFlowKernel:
203
204
  self.tasks: Dict[int, TaskRecord] = {}
204
205
  self.submitter_lock = threading.Lock()
205
206
 
207
+ self.dependency_resolver = self.config.dependency_resolver if self.config.dependency_resolver is not None \
208
+ else SHALLOW_DEPENDENCY_RESOLVER
209
+
206
210
  atexit.register(self.atexit_cleanup)
207
211
 
208
212
  def __enter__(self):
@@ -852,8 +856,11 @@ class DataFlowKernel:
852
856
  depends: List[Future] = []
853
857
 
854
858
  def check_dep(d: Any) -> None:
855
- if isinstance(d, Future):
856
- depends.extend([d])
859
+ try:
860
+ depends.extend(self.dependency_resolver.traverse_to_gather(d))
861
+ except Exception:
862
+ logger.exception("Exception in dependency_resolver.traverse_to_gather")
863
+ raise
857
864
 
858
865
  # Check the positional args
859
866
  for dep in args:
@@ -870,7 +877,8 @@ class DataFlowKernel:
870
877
 
871
878
  return depends
872
879
 
873
- def _unwrap_futures(self, args, kwargs):
880
+ def _unwrap_futures(self, args: Sequence[Any], kwargs: Dict[str, Any]) \
881
+ -> Tuple[Sequence[Any], Dict[str, Any], Sequence[Tuple[Exception, str]]]:
874
882
  """This function should be called when all dependencies have completed.
875
883
 
876
884
  It will rewrite the arguments for that task, replacing each Future
@@ -891,53 +899,40 @@ class DataFlowKernel:
891
899
  """
892
900
  dep_failures = []
893
901
 
902
+ def append_failure(e: Exception, dep: Future) -> None:
903
+ # If this Future is associated with a task inside this DFK,
904
+ # then refer to the task ID.
905
+ # Otherwise make a repr of the Future object.
906
+ if hasattr(dep, 'task_record') and dep.task_record['dfk'] == self:
907
+ tid = "task " + repr(dep.task_record['id'])
908
+ else:
909
+ tid = repr(dep)
910
+ dep_failures.extend([(e, tid)])
911
+
894
912
  # Replace item in args
895
913
  new_args = []
896
914
  for dep in args:
897
- if isinstance(dep, Future):
898
- try:
899
- new_args.extend([dep.result()])
900
- except Exception as e:
901
- # If this Future is associated with a task inside this DFK,
902
- # then refer to the task ID.
903
- # Otherwise make a repr of the Future object.
904
- if hasattr(dep, 'task_record') and dep.task_record['dfk'] == self:
905
- tid = "task " + repr(dep.task_record['id'])
906
- else:
907
- tid = repr(dep)
908
- dep_failures.extend([(e, tid)])
909
- else:
910
- new_args.extend([dep])
915
+ try:
916
+ new_args.extend([self.dependency_resolver.traverse_to_unwrap(dep)])
917
+ except Exception as e:
918
+ append_failure(e, dep)
911
919
 
912
920
  # Check for explicit kwargs ex, fu_1=<fut>
913
921
  for key in kwargs:
914
922
  dep = kwargs[key]
915
- if isinstance(dep, Future):
916
- try:
917
- kwargs[key] = dep.result()
918
- except Exception as e:
919
- if hasattr(dep, 'task_record'):
920
- tid = dep.task_record['id']
921
- else:
922
- tid = None
923
- dep_failures.extend([(e, tid)])
923
+ try:
924
+ kwargs[key] = self.dependency_resolver.traverse_to_unwrap(dep)
925
+ except Exception as e:
926
+ append_failure(e, dep)
924
927
 
925
928
  # Check for futures in inputs=[<fut>...]
926
929
  if 'inputs' in kwargs:
927
930
  new_inputs = []
928
931
  for dep in kwargs['inputs']:
929
- if isinstance(dep, Future):
930
- try:
931
- new_inputs.extend([dep.result()])
932
- except Exception as e:
933
- if hasattr(dep, 'task_record'):
934
- tid = dep.task_record['id']
935
- else:
936
- tid = None
937
- dep_failures.extend([(e, tid)])
938
-
939
- else:
940
- new_inputs.extend([dep])
932
+ try:
933
+ new_inputs.extend([self.dependency_resolver.traverse_to_unwrap(dep)])
934
+ except Exception as e:
935
+ append_failure(e, dep)
941
936
  kwargs['inputs'] = new_inputs
942
937
 
943
938
  return new_args, kwargs, dep_failures
@@ -1042,6 +1037,8 @@ class DataFlowKernel:
1042
1037
 
1043
1038
  func = self._add_output_deps(executor, app_args, app_kwargs, app_fu, func)
1044
1039
 
1040
+ logger.debug("Added output dependencies")
1041
+
1045
1042
  # Replace the function invocation in the TaskRecord with whatever file-staging
1046
1043
  # substitutions have been made.
1047
1044
  task_record.update({
@@ -1053,8 +1050,10 @@ class DataFlowKernel:
1053
1050
 
1054
1051
  self.tasks[task_id] = task_record
1055
1052
 
1053
+ logger.debug("Gathering dependencies")
1056
1054
  # Get the list of dependencies for the task
1057
1055
  depends = self._gather_all_deps(app_args, app_kwargs)
1056
+ logger.debug("Gathered dependencies")
1058
1057
  task_record['depends'] = depends
1059
1058
 
1060
1059
  depend_descs = []
@@ -1271,6 +1270,13 @@ class DataFlowKernel:
1271
1270
  atexit.unregister(self.atexit_cleanup)
1272
1271
  logger.info("Unregistered atexit hook")
1273
1272
 
1273
+ if DataFlowKernelLoader._dfk is self:
1274
+ logger.info("Unregistering default DFK")
1275
+ parsl.clear()
1276
+ logger.info("Unregistered default DFK")
1277
+ else:
1278
+ logger.debug("Cleaning up non-default DFK - not unregistering")
1279
+
1274
1280
  logger.info("DFK cleanup complete")
1275
1281
 
1276
1282
  def checkpoint(self, tasks: Optional[Sequence[TaskRecord]] = None) -> str:
@@ -10,3 +10,13 @@ class WorkerLost(Exception):
10
10
 
11
11
  def __str__(self):
12
12
  return self.__repr__()
13
+
14
+
15
+ class CommandClientTimeoutError(Exception):
16
+ """Raised when the command client times out waiting for a response.
17
+ """
18
+
19
+
20
+ class CommandClientBadError(Exception):
21
+ """Raised when the command client is bad from an earlier timeout.
22
+ """
@@ -645,7 +645,8 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
645
645
  Returns:
646
646
  Future
647
647
  """
648
- validate_resource_spec(resource_specification)
648
+
649
+ validate_resource_spec(resource_specification, self.enable_mpi_mode)
649
650
 
650
651
  if self.bad_state_is_set:
651
652
  raise self.executor_exception
@@ -20,7 +20,7 @@ class MPIExecutor(HighThroughputExecutor):
20
20
  to spawn multi-node tasks.
21
21
 
22
22
  Specify the maximum number of multi-node tasks to run at once using ``max_workers_per_block``.
23
- The maximum number should be smaller than the ``nodes_per_block`` in the Provider.
23
+ The value should be less than or equal to the ``nodes_per_block`` in the Provider.
24
24
 
25
25
  Parameters
26
26
  ----------
@@ -8,8 +8,18 @@ VALID_LAUNCHERS = ('srun',
8
8
  'mpiexec')
9
9
 
10
10
 
11
+ class MissingResourceSpecification(Exception):
12
+ """Exception raised when input is not supplied a resource specification"""
13
+
14
+ def __init__(self, reason: str):
15
+ self.reason = reason
16
+
17
+ def __str__(self):
18
+ return f"Missing resource specification: {self.reason}"
19
+
20
+
11
21
  class InvalidResourceSpecification(Exception):
12
- """Exception raised when Invalid keys are supplied via resource specification"""
22
+ """Exception raised when Invalid input is supplied via resource specification"""
13
23
 
14
24
  def __init__(self, invalid_keys: Set[str]):
15
25
  self.invalid_keys = invalid_keys
@@ -18,13 +28,19 @@ class InvalidResourceSpecification(Exception):
18
28
  return f"Invalid resource specification options supplied: {self.invalid_keys}"
19
29
 
20
30
 
21
- def validate_resource_spec(resource_spec: Dict[str, str]):
31
+ def validate_resource_spec(resource_spec: Dict[str, str], is_mpi_enabled: bool):
22
32
  """Basic validation of keys in the resource_spec
23
33
 
24
34
  Raises: InvalidResourceSpecification if the resource_spec
25
35
  is invalid (e.g, contains invalid keys)
26
36
  """
27
37
  user_keys = set(resource_spec.keys())
38
+
39
+ # empty resource_spec when mpi_mode is set causes parsl to hang
40
+ # ref issue #3427
41
+ if is_mpi_enabled and len(user_keys) == 0:
42
+ raise MissingResourceSpecification('MPI mode requires optional parsl_resource_specification keyword argument to be configured')
43
+
28
44
  legal_keys = set(("ranks_per_node",
29
45
  "num_nodes",
30
46
  "num_ranks",
@@ -3,8 +3,11 @@
3
3
  import zmq
4
4
  import logging
5
5
  import threading
6
+ import time
6
7
 
7
8
  from parsl import curvezmq
9
+ from parsl.errors import InternalConsistencyError
10
+ from parsl.executors.high_throughput.errors import CommandClientBadError, CommandClientTimeoutError
8
11
 
9
12
  logger = logging.getLogger(__name__)
10
13
 
@@ -31,6 +34,7 @@ class CommandClient:
31
34
  self.port = None
32
35
  self.create_socket_and_bind()
33
36
  self._lock = threading.Lock()
37
+ self.ok = True
34
38
 
35
39
  def create_socket_and_bind(self):
36
40
  """ Creates socket and binds to a port.
@@ -46,7 +50,7 @@ class CommandClient:
46
50
  else:
47
51
  self.zmq_socket.bind("tcp://{}:{}".format(self.ip_address, self.port))
48
52
 
49
- def run(self, message, max_retries=3):
53
+ def run(self, message, max_retries=3, timeout_s=None):
50
54
  """ This function needs to be fast at the same time aware of the possibility of
51
55
  ZMQ pipes overflowing.
52
56
 
@@ -54,13 +58,43 @@ class CommandClient:
54
58
  in ZMQ sockets reaching a broken state once there are ~10k tasks in flight.
55
59
  This issue can be magnified if each the serialized buffer itself is larger.
56
60
  """
61
+ if not self.ok:
62
+ raise CommandClientBadError()
63
+
64
+ start_time_s = time.monotonic()
65
+
57
66
  reply = '__PARSL_ZMQ_PIPES_MAGIC__'
58
67
  with self._lock:
59
68
  for _ in range(max_retries):
60
69
  try:
61
70
  logger.debug("Sending command client command")
71
+
72
+ if timeout_s is not None:
73
+ remaining_time_s = start_time_s + timeout_s - time.monotonic()
74
+ poll_result = self.zmq_socket.poll(timeout=remaining_time_s * 1000, flags=zmq.POLLOUT)
75
+ if poll_result == zmq.POLLOUT:
76
+ pass # this is OK, so continue
77
+ elif poll_result == 0:
78
+ raise CommandClientTimeoutError("Waiting for command channel to be ready for a command")
79
+ else:
80
+ raise InternalConsistencyError(f"ZMQ poll returned unexpected value: {poll_result}")
81
+
62
82
  self.zmq_socket.send_pyobj(message, copy=True)
63
- logger.debug("Waiting for command client response")
83
+
84
+ if timeout_s is not None:
85
+ logger.debug("Polling for command client response or timeout")
86
+ remaining_time_s = start_time_s + timeout_s - time.monotonic()
87
+ poll_result = self.zmq_socket.poll(timeout=remaining_time_s * 1000, flags=zmq.POLLIN)
88
+ if poll_result == zmq.POLLIN:
89
+ pass # this is OK, so continue
90
+ elif poll_result == 0:
91
+ logger.error("Command timed-out - command client is now bad forever")
92
+ self.ok = False
93
+ raise CommandClientTimeoutError("Waiting for a reply from command channel")
94
+ else:
95
+ raise InternalConsistencyError(f"ZMQ poll returned unexpected value: {poll_result}")
96
+
97
+ logger.debug("Receiving command client response")
64
98
  reply = self.zmq_socket.recv_pyobj()
65
99
  logger.debug("Received command client response")
66
100
  except zmq.ZMQError:
@@ -3,15 +3,11 @@ import json
3
3
 
4
4
  from typing import List
5
5
 
6
- _setup_paths: List[str]
6
+ _setup_paths: List[str] = []
7
7
  try:
8
8
  import radical.pilot as rp
9
- import radical.utils as ru
10
9
  except ImportError:
11
- _setup_paths = []
12
- else:
13
- _setup_paths = [rp.sdist_path,
14
- ru.sdist_path]
10
+ pass
15
11
 
16
12
 
17
13
  MPI = "mpi"
@@ -77,7 +73,7 @@ class ResourceConfig:
77
73
 
78
74
  pilot_env_setup : list
79
75
  List of setup commands/packages for the pilot environment.
80
- Default setup includes "parsl", rp.sdist_path, and ru.sdist_path.
76
+ Default is an empty list.
81
77
 
82
78
  python_v : str
83
79
  The Python version to be used in the pilot environment.
parsl/tests/conftest.py CHANGED
@@ -201,7 +201,7 @@ def load_dfk_session(request, pytestconfig, tmpd_cwd_session):
201
201
  if parsl.dfk() != dfk:
202
202
  raise RuntimeError("DFK changed unexpectedly during test")
203
203
  dfk.cleanup()
204
- parsl.clear()
204
+ assert DataFlowKernelLoader._dfk is None
205
205
  else:
206
206
  yield
207
207
 
@@ -253,7 +253,7 @@ def load_dfk_local_module(request, pytestconfig, tmpd_cwd_session):
253
253
  if parsl.dfk() != dfk:
254
254
  raise RuntimeError("DFK changed unexpectedly during test")
255
255
  dfk.cleanup()
256
- parsl.clear()
256
+ assert DataFlowKernelLoader._dfk is None
257
257
 
258
258
  else:
259
259
  yield
@@ -75,4 +75,3 @@ def test_dynamic_executor():
75
75
  print("Done testing")
76
76
 
77
77
  dfk.cleanup()
78
- parsl.clear()
@@ -35,8 +35,6 @@ def const_with_cpath(autopath_specifier, content_path, caplog):
35
35
  for record in caplog.records:
36
36
  assert record.levelno < logging.ERROR
37
37
 
38
- parsl.clear()
39
-
40
38
 
41
39
  @pytest.mark.local
42
40
  def test_std_autopath_const_str(caplog, tmpd_cwd):
@@ -74,8 +72,6 @@ def test_std_autopath_fail(caplog):
74
72
  with pytest.raises(URIFailError):
75
73
  app_stdout()
76
74
 
77
- parsl.clear()
78
-
79
75
 
80
76
  @parsl.bash_app
81
77
  def app_both(stdout=parsl.AUTO_LOGNAME, stderr=parsl.AUTO_LOGNAME):
@@ -124,5 +120,3 @@ def test_std_autopath_zip(caplog, tmpd_cwd):
124
120
 
125
121
  for record in caplog.records:
126
122
  assert record.levelno < logging.ERROR
127
-
128
- parsl.clear()
@@ -9,12 +9,6 @@ def local_setup():
9
9
  parsl.load(fresh_config())
10
10
 
11
11
 
12
- def local_teardown():
13
- # explicit clear without dfk.cleanup here, because the
14
- # test does that already
15
- parsl.clear()
16
-
17
-
18
12
  @python_app(cache=True)
19
13
  def slow_double(x, sleep_dur=1):
20
14
  import time
@@ -39,9 +33,10 @@ def test_periodic():
39
33
  with parsl.dfk():
40
34
  futs = [slow_double(sleep_for) for _ in range(4)]
41
35
  [f.result() for f in futs]
36
+ run_dir = parsl.dfk().run_dir
42
37
 
43
38
  # Here we will check if the loglines came back with 5 seconds deltas
44
- with open("{}/parsl.log".format(parsl.dfk().run_dir)) as f:
39
+ with open("{}/parsl.log".format(run_dir)) as f:
45
40
  log_lines = f.readlines()
46
41
  expected_msg = " Done checkpointing"
47
42
  expected_msg2 = " No tasks checkpointed in this pass"
@@ -19,7 +19,6 @@ def parsl_configured(run_dir, **kw):
19
19
  yield dfk
20
20
 
21
21
  parsl.dfk().cleanup()
22
- parsl.clear()
23
22
 
24
23
 
25
24
  @python_app(cache=True)
@@ -14,7 +14,6 @@ def local_setup():
14
14
 
15
15
  def local_teardown():
16
16
  parsl.dfk().cleanup()
17
- parsl.clear()
18
17
 
19
18
 
20
19
  @python_app
@@ -16,7 +16,6 @@ def local_setup():
16
16
 
17
17
  def local_teardown():
18
18
  parsl.dfk().cleanup()
19
- parsl.clear()
20
19
 
21
20
 
22
21
  @python_app(cache=True)
@@ -14,7 +14,6 @@ def local_setup():
14
14
 
15
15
  def local_teardown():
16
16
  parsl.dfk().cleanup()
17
- parsl.clear()
18
17
 
19
18
 
20
19
  @python_app
@@ -0,0 +1,69 @@
1
+ import pytest
2
+ import threading
3
+ import time
4
+ import zmq
5
+ from parsl import curvezmq
6
+ from parsl.executors.high_throughput.zmq_pipes import CommandClient
7
+ from parsl.executors.high_throughput.errors import CommandClientTimeoutError, CommandClientBadError
8
+
9
+
10
+ # Time constant used for timeout tests: various delays and
11
+ # timeouts will be appropriate multiples of this, but the
12
+ # value of T itself should not matter too much as long as
13
+ # it is big enough for zmq connections to happen successfully.
14
+ T = 0.25
15
+
16
+
17
+ @pytest.mark.local
18
+ def test_command_not_sent() -> None:
19
+ """Tests timeout on command send.
20
+ """
21
+ ctx = curvezmq.ClientContext(None)
22
+
23
+ # RFC6335 ephemeral port range
24
+ cc = CommandClient(ctx, "127.0.0.1", (49152, 65535))
25
+
26
+ # cc will now wait for a connection, but we won't do anything to make the
27
+ # other side of the connection exist, so any command given to cc should
28
+ # timeout.
29
+
30
+ with pytest.raises(CommandClientTimeoutError):
31
+ cc.run("SOMECOMMAND", timeout_s=T)
32
+
33
+ cc.close()
34
+
35
+
36
+ @pytest.mark.local
37
+ def test_command_ignored() -> None:
38
+ """Tests timeout on command response.
39
+ Tests that we timeout after a response and that the command client
40
+ sets itself into a bad state.
41
+
42
+ This only tests sequential access to the command client, even though
43
+ htex makes multithreaded use of the command client: see issue #3376 about
44
+ that lack of thread safety.
45
+ """
46
+ ctx = curvezmq.ClientContext(None)
47
+
48
+ # RFC6335 ephemeral port range
49
+ cc = CommandClient(ctx, "127.0.0.1", (49152, 65535))
50
+
51
+ ic_ctx = curvezmq.ServerContext(None)
52
+ ic_channel = ic_ctx.socket(zmq.REP)
53
+ ic_channel.connect(f"tcp://127.0.0.1:{cc.port}")
54
+
55
+ with pytest.raises(CommandClientTimeoutError):
56
+ cc.run("SLOW_COMMAND", timeout_s=T)
57
+
58
+ req = ic_channel.recv_pyobj()
59
+ assert req == "SLOW_COMMAND", "Should have received command on interchange side"
60
+ assert not cc.ok, "CommandClient should have set itself to bad"
61
+
62
+ with pytest.raises(CommandClientBadError):
63
+ cc.run("ANOTHER_COMMAND")
64
+
65
+ cc.close()
66
+ ctx.term()
67
+
68
+ ic_channel.close()
69
+ ic_ctx.term()
@@ -37,16 +37,9 @@ def test_cpu_affinity_explicit():
37
37
  config.executors[0].max_workers_per_node = 1
38
38
 
39
39
  logger.debug(f"config: {config}")
40
- # TODO: is there a `with` style for this, to properly deal with exceptions?
41
-
42
- parsl.load(config)
43
- try:
44
40
 
41
+ with parsl.load(config):
45
42
  worker_affinity = my_affinity().result()
46
43
  logger.debug(f"worker reported this affinity: {worker_affinity}")
47
44
  assert len(worker_affinity) == 1
48
45
  assert worker_affinity == set((single_core,))
49
-
50
- finally:
51
- parsl.dfk().cleanup()
52
- parsl.clear()