parsl 2025.9.8__py3-none-any.whl → 2025.11.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. parsl/app/bash.py +1 -1
  2. parsl/benchmark/perf.py +73 -17
  3. parsl/concurrent/__init__.py +95 -14
  4. parsl/curvezmq.py +0 -16
  5. parsl/data_provider/globus.py +3 -1
  6. parsl/dataflow/dflow.py +106 -204
  7. parsl/dataflow/memoization.py +146 -19
  8. parsl/dataflow/states.py +5 -5
  9. parsl/executors/base.py +2 -2
  10. parsl/executors/execute_task.py +2 -8
  11. parsl/executors/flux/executor.py +4 -6
  12. parsl/executors/globus_compute.py +0 -4
  13. parsl/executors/high_throughput/executor.py +86 -24
  14. parsl/executors/high_throughput/interchange.py +39 -20
  15. parsl/executors/high_throughput/mpi_executor.py +1 -2
  16. parsl/executors/high_throughput/mpi_resource_management.py +7 -14
  17. parsl/executors/high_throughput/process_worker_pool.py +32 -7
  18. parsl/executors/high_throughput/zmq_pipes.py +36 -67
  19. parsl/executors/radical/executor.py +2 -6
  20. parsl/executors/radical/rpex_worker.py +2 -2
  21. parsl/executors/taskvine/executor.py +5 -1
  22. parsl/executors/threads.py +5 -2
  23. parsl/jobs/states.py +2 -2
  24. parsl/jobs/strategy.py +7 -6
  25. parsl/monitoring/monitoring.py +2 -2
  26. parsl/monitoring/radios/filesystem.py +2 -1
  27. parsl/monitoring/radios/htex.py +2 -1
  28. parsl/monitoring/radios/multiprocessing.py +2 -1
  29. parsl/monitoring/radios/udp.py +2 -1
  30. parsl/multiprocessing.py +0 -49
  31. parsl/providers/base.py +24 -37
  32. parsl/providers/pbspro/pbspro.py +1 -1
  33. parsl/serialize/__init__.py +6 -9
  34. parsl/serialize/facade.py +0 -32
  35. parsl/tests/configs/local_threads_globus.py +18 -14
  36. parsl/tests/configs/taskvine_ex.py +1 -1
  37. parsl/tests/sites/test_concurrent.py +51 -3
  38. parsl/tests/test_checkpointing/test_periodic.py +15 -9
  39. parsl/tests/test_checkpointing/test_regression_233.py +0 -1
  40. parsl/tests/test_curvezmq.py +0 -42
  41. parsl/tests/test_execute_task.py +2 -11
  42. parsl/tests/test_htex/test_command_concurrency_regression_1321.py +54 -0
  43. parsl/tests/test_htex/test_htex.py +36 -1
  44. parsl/tests/test_htex/test_interchange_exit_bad_registration.py +2 -2
  45. parsl/tests/test_htex/test_priority_queue.py +26 -3
  46. parsl/tests/test_htex/test_zmq_binding.py +2 -1
  47. parsl/tests/test_mpi_apps/test_mpi_scheduler.py +18 -43
  48. parsl/tests/test_python_apps/test_basic.py +0 -14
  49. parsl/tests/test_python_apps/test_depfail_propagation.py +11 -1
  50. parsl/tests/test_python_apps/test_exception.py +19 -0
  51. parsl/tests/test_python_apps/test_garbage_collect.py +1 -6
  52. parsl/tests/test_python_apps/test_memoize_2.py +11 -1
  53. parsl/tests/test_regression/test_3874.py +47 -0
  54. parsl/tests/test_scaling/test_regression_3696_oscillation.py +1 -0
  55. parsl/tests/test_staging/test_staging_globus.py +2 -2
  56. parsl/tests/unit/test_globus_compute_executor.py +11 -2
  57. parsl/utils.py +8 -3
  58. parsl/version.py +1 -1
  59. {parsl-2025.9.8.data → parsl-2025.11.10.data}/scripts/interchange.py +39 -20
  60. {parsl-2025.9.8.data → parsl-2025.11.10.data}/scripts/process_worker_pool.py +32 -7
  61. {parsl-2025.9.8.dist-info → parsl-2025.11.10.dist-info}/METADATA +64 -50
  62. {parsl-2025.9.8.dist-info → parsl-2025.11.10.dist-info}/RECORD +68 -74
  63. {parsl-2025.9.8.dist-info → parsl-2025.11.10.dist-info}/WHEEL +1 -1
  64. parsl/tests/configs/local_threads_checkpoint_periodic.py +0 -11
  65. parsl/tests/configs/local_threads_no_cache.py +0 -11
  66. parsl/tests/site_tests/test_provider.py +0 -88
  67. parsl/tests/site_tests/test_site.py +0 -70
  68. parsl/tests/test_aalst_patterns.py +0 -474
  69. parsl/tests/test_docs/test_workflow2.py +0 -42
  70. parsl/tests/test_error_handling/test_rand_fail.py +0 -171
  71. parsl/tests/test_regression/test_854.py +0 -62
  72. parsl/tests/test_serialization/test_pack_resource_spec.py +0 -23
  73. {parsl-2025.9.8.data → parsl-2025.11.10.data}/scripts/exec_parsl_function.py +0 -0
  74. {parsl-2025.9.8.data → parsl-2025.11.10.data}/scripts/parsl_coprocess.py +0 -0
  75. {parsl-2025.9.8.dist-info → parsl-2025.11.10.dist-info}/entry_points.txt +0 -0
  76. {parsl-2025.9.8.dist-info → parsl-2025.11.10.dist-info/licenses}/LICENSE +0 -0
  77. {parsl-2025.9.8.dist-info → parsl-2025.11.10.dist-info}/top_level.txt +0 -0
@@ -14,12 +14,6 @@ def import_square(x):
14
14
  return math.pow(x, 2)
15
15
 
16
16
 
17
- @python_app
18
- def custom_exception():
19
- from globus_sdk import GlobusError
20
- raise GlobusError('foobar')
21
-
22
-
23
17
  def test_simple(n=2):
24
18
  x = double(n)
25
19
  assert x.result() == n * 2
@@ -38,11 +32,3 @@ def test_parallel_for(n):
38
32
 
39
33
  for i in d:
40
34
  assert d[i].result() == 2 * i
41
-
42
-
43
- def test_custom_exception():
44
- from globus_sdk import GlobusError
45
-
46
- x = custom_exception()
47
- with pytest.raises(GlobusError):
48
- x.result()
@@ -1,5 +1,7 @@
1
+ import parsl
1
2
  from parsl import python_app
2
3
  from parsl.dataflow.errors import DependencyError
4
+ from parsl.dataflow.states import States
3
5
 
4
6
 
5
7
  @python_app
@@ -14,6 +16,7 @@ def depends(parent):
14
16
 
15
17
  def test_depfail_once():
16
18
  """Test the simplest dependency failure case"""
19
+ start_dep_fail_count = parsl.dfk().task_state_counts[States.dep_fail]
17
20
  f1 = fails()
18
21
  f2 = depends(f1)
19
22
 
@@ -25,9 +28,12 @@ def test_depfail_once():
25
28
  # in the DependencyError message
26
29
  assert ("task " + str(f1.task_record['id'])) in str(f2.exception())
27
30
 
31
+ assert parsl.dfk().task_state_counts[States.dep_fail] == start_dep_fail_count + 1
32
+
28
33
 
29
34
  def test_depfail_chain():
30
35
  """Test that dependency failures chain"""
36
+ start_dep_fail_count = parsl.dfk().task_state_counts[States.dep_fail]
31
37
  f1 = fails()
32
38
  f2 = depends(f1)
33
39
  f3 = depends(f2)
@@ -39,11 +45,13 @@ def test_depfail_chain():
39
45
  assert isinstance(f3.exception(), DependencyError)
40
46
  assert isinstance(f4.exception(), DependencyError)
41
47
 
48
+ assert parsl.dfk().task_state_counts[States.dep_fail] == start_dep_fail_count + 3
49
+
42
50
 
43
51
  def test_depfail_branches():
44
52
  """Test that dependency failures propagate in the
45
53
  presence of multiple downstream tasks."""
46
-
54
+ start_dep_fail_count = parsl.dfk().task_state_counts[States.dep_fail]
47
55
  f1 = fails()
48
56
  f2 = depends(f1)
49
57
  f3 = depends(f1)
@@ -52,3 +60,5 @@ def test_depfail_branches():
52
60
  assert not isinstance(f1.exception(), DependencyError)
53
61
  assert isinstance(f2.exception(), DependencyError)
54
62
  assert isinstance(f3.exception(), DependencyError)
63
+
64
+ assert parsl.dfk().task_state_counts[States.dep_fail] == start_dep_fail_count + 2
@@ -0,0 +1,19 @@
1
+ import pytest
2
+
3
+ from parsl.app.app import python_app
4
+
5
+
6
+ class CustomException(Exception):
7
+ pass
8
+
9
+
10
+ @python_app
11
+ def custom_exception():
12
+ from parsl.tests.test_python_apps.test_exception import CustomException
13
+ raise CustomException('foobar')
14
+
15
+
16
+ def test_custom_exception():
17
+ x = custom_exception()
18
+ with pytest.raises(CustomException):
19
+ x.result()
@@ -27,10 +27,5 @@ def test_garbage_collect():
27
27
 
28
28
  evt.set()
29
29
  assert x.result() == 10 * 4
30
- if parsl.dfk().checkpoint_mode is not None:
31
- # We explicit call checkpoint if checkpoint_mode is enabled covering
32
- # cases like manual/periodic where checkpointing may be deferred.
33
- parsl.dfk().checkpoint()
34
-
35
- time.sleep(0.01) # Give enough time for task wipes to work
30
+ time.sleep(0.01) # Give enough time for task wipes to work - see issue #1279
36
31
  assert x.tid not in parsl.dfk().tasks, "Task record should be wiped after task completion"
@@ -4,7 +4,17 @@ import pytest
4
4
 
5
5
  import parsl
6
6
  from parsl.app.app import python_app
7
- from parsl.tests.configs.local_threads_no_cache import fresh_config as local_config
7
+ from parsl.config import Config
8
+ from parsl.executors.threads import ThreadPoolExecutor
9
+
10
+
11
+ def local_config():
12
+ return Config(
13
+ executors=[
14
+ ThreadPoolExecutor(max_threads=4),
15
+ ],
16
+ app_cache=False
17
+ )
8
18
 
9
19
 
10
20
  @python_app
@@ -0,0 +1,47 @@
1
+ import shutil
2
+
3
+ import pytest
4
+
5
+ import parsl
6
+ from parsl.app.app import python_app
7
+ from parsl.config import Config
8
+ from parsl.executors import HighThroughputExecutor
9
+
10
+
11
+ @python_app
12
+ def noop():
13
+ pass
14
+
15
+
16
+ @pytest.mark.local
17
+ def test_regression_3874(tmpd_cwd_session):
18
+ # HTEX run 1
19
+
20
+ rundir_1 = str(tmpd_cwd_session / "1")
21
+
22
+ config = Config(executors=[HighThroughputExecutor()], strategy_period=0.5)
23
+ config.run_dir = rundir_1
24
+
25
+ with parsl.load(config):
26
+ noop().result()
27
+
28
+ # It is necessary to delete this rundir to exercise the bug. Otherwise,
29
+ # the next run will be able to continue looking at this directory - the
30
+ # bug manifests when it cannot.
31
+
32
+ shutil.rmtree(rundir_1)
33
+
34
+ # HTEX run 2
35
+ # In the case of issue 3874, this run hangs (rather than failing) as the
36
+ # JobStatusPoller fails to collect status of all of its managed tasks
37
+ # every iteration, without converging towards failure.
38
+
39
+ rundir_2 = str(tmpd_cwd_session / "2")
40
+
41
+ config = Config(executors=[HighThroughputExecutor()], strategy_period=0.5)
42
+ config.run_dir = rundir_2
43
+
44
+ with parsl.load(config):
45
+ noop().result()
46
+
47
+ shutil.rmtree(rundir_2)
@@ -51,6 +51,7 @@ def test_htex_strategy_does_not_oscillate(ns):
51
51
  executor.outstanding = lambda: n_tasks
52
52
  executor.status_facade = statuses
53
53
  executor.workers_per_node = n_workers
54
+ executor.bad_state_is_set = False
54
55
 
55
56
  provider.parallelism = 1
56
57
  provider.init_blocks = 0
@@ -3,9 +3,9 @@ import pytest
3
3
  import parsl
4
4
  from parsl.app.app import python_app
5
5
  from parsl.data_provider.files import File
6
- from parsl.tests.configs.local_threads_globus import config, remote_writeable
6
+ from parsl.tests.configs.local_threads_globus import fresh_config, remote_writeable
7
7
 
8
- local_config = config
8
+ local_config = fresh_config
9
9
 
10
10
 
11
11
  @python_app
@@ -2,18 +2,21 @@ import random
2
2
  from unittest import mock
3
3
 
4
4
  import pytest
5
- from globus_compute_sdk import Executor
6
5
 
7
6
  from parsl.executors import GlobusComputeExecutor
8
7
 
9
8
 
10
9
  @pytest.fixture
11
10
  def mock_ex():
12
- # Not Parsl's job to test GC's Executor
11
+ # Not Parsl's job to test GC's Executor, although it
12
+ # still needs to be importable for these test cases.
13
+ from globus_compute_sdk import Executor
14
+
13
15
  yield mock.Mock(spec=Executor)
14
16
 
15
17
 
16
18
  @pytest.mark.local
19
+ @pytest.mark.globus_compute
17
20
  def test_gc_executor_mock_spec(mock_ex):
18
21
  # a test of tests -- make sure we're using spec= in the mock
19
22
  with pytest.raises(AttributeError):
@@ -21,12 +24,14 @@ def test_gc_executor_mock_spec(mock_ex):
21
24
 
22
25
 
23
26
  @pytest.mark.local
27
+ @pytest.mark.globus_compute
24
28
  def test_gc_executor_label_default(mock_ex):
25
29
  gce = GlobusComputeExecutor(mock_ex)
26
30
  assert gce.label == type(gce).__name__, "Expect reasonable default label"
27
31
 
28
32
 
29
33
  @pytest.mark.local
34
+ @pytest.mark.globus_compute
30
35
  def test_gc_executor_label(mock_ex, randomstring):
31
36
  exp_label = randomstring()
32
37
  gce = GlobusComputeExecutor(mock_ex, label=exp_label)
@@ -34,6 +39,7 @@ def test_gc_executor_label(mock_ex, randomstring):
34
39
 
35
40
 
36
41
  @pytest.mark.local
42
+ @pytest.mark.globus_compute
37
43
  def test_gc_executor_resets_spec_after_submit(mock_ex, randomstring):
38
44
  submit_res = {randomstring(): "some submit res"}
39
45
  res = {"some": randomstring(), "spec": randomstring()}
@@ -57,6 +63,7 @@ def test_gc_executor_resets_spec_after_submit(mock_ex, randomstring):
57
63
 
58
64
 
59
65
  @pytest.mark.local
66
+ @pytest.mark.globus_compute
60
67
  def test_gc_executor_resets_uep_after_submit(mock_ex, randomstring):
61
68
  uep_conf = randomstring()
62
69
  res = {"some": randomstring()}
@@ -79,6 +86,7 @@ def test_gc_executor_resets_uep_after_submit(mock_ex, randomstring):
79
86
 
80
87
 
81
88
  @pytest.mark.local
89
+ @pytest.mark.globus_compute
82
90
  def test_gc_executor_happy_path(mock_ex, randomstring):
83
91
  mock_fn = mock.Mock()
84
92
  args = tuple(randomstring() for _ in range(random.randint(0, 3)))
@@ -95,6 +103,7 @@ def test_gc_executor_happy_path(mock_ex, randomstring):
95
103
 
96
104
 
97
105
  @pytest.mark.local
106
+ @pytest.mark.globus_compute
98
107
  def test_gc_executor_shuts_down_asynchronously(mock_ex):
99
108
  gce = GlobusComputeExecutor(mock_ex)
100
109
  gce.shutdown()
parsl/utils.py CHANGED
@@ -11,7 +11,6 @@ from types import TracebackType
11
11
  from typing import (
12
12
  IO,
13
13
  Any,
14
- AnyStr,
15
14
  Callable,
16
15
  Dict,
17
16
  Generator,
@@ -132,7 +131,13 @@ def get_std_fname_mode(
132
131
  mode = 'a+'
133
132
  elif isinstance(stdfspec, tuple):
134
133
  if len(stdfspec) != 2:
135
- msg = (f"std descriptor {fdname} has incorrect tuple length "
134
+ # this is annotated as unreachable because the type annotation says
135
+ # it cannot be reached. Earlier versions of typeguard did not enforce
136
+ # that type annotation at runtime, though, and the parameters to this
137
+ # function come from the user.
138
+ # When typeguard lower bound is raised to around version 4, this
139
+ # unreachable can be removed.
140
+ msg = (f"std descriptor {fdname} has incorrect tuple length " # type: ignore[unreachable]
136
141
  f"{len(stdfspec)}")
137
142
  raise pe.BadStdStreamFile(msg)
138
143
  fname, mode = stdfspec
@@ -157,7 +162,7 @@ def wait_for_file(path: str, seconds: int = 10) -> Generator[None, None, None]:
157
162
 
158
163
 
159
164
  @contextmanager
160
- def time_limited_open(path: str, mode: str, seconds: int = 1) -> Generator[IO[AnyStr], None, None]:
165
+ def time_limited_open(path: str, mode: str, seconds: int = 1) -> Generator[IO, None, None]:
161
166
  with wait_for_file(path, seconds):
162
167
  logger.debug("wait_for_file yielded")
163
168
  f = open(path, mode)
parsl/version.py CHANGED
@@ -3,4 +3,4 @@
3
3
  Year.Month.Day[alpha/beta/..]
4
4
  Alphas will be numbered like this -> 2024.12.10a0
5
5
  """
6
- VERSION = '2025.09.08'
6
+ VERSION = '2025.11.10'
@@ -23,7 +23,6 @@ from parsl.monitoring.radios.base import MonitoringRadioSender
23
23
  from parsl.monitoring.radios.zmq import ZMQRadioSender
24
24
  from parsl.process_loggers import wrap_with_logs
25
25
  from parsl.serialize import serialize as serialize_object
26
- from parsl.utils import setproctitle
27
26
  from parsl.version import VERSION as PARSL_VERSION
28
27
 
29
28
  PKL_HEARTBEAT_CODE = pickle.dumps((2 ** 32) - 1)
@@ -56,6 +55,7 @@ class Interchange:
56
55
  cert_dir: Optional[str],
57
56
  manager_selector: ManagerSelector,
58
57
  run_id: str,
58
+ _check_python_mismatch: bool,
59
59
  ) -> None:
60
60
  """
61
61
  Parameters
@@ -99,6 +99,11 @@ class Interchange:
99
99
 
100
100
  cert_dir : str | None
101
101
  Path to the certificate directory.
102
+
103
+ _check_python_mismatch : bool
104
+ If True, the interchange and worker managers must run the same version of
105
+ Python. Running different versions can cause inter-process communication
106
+ errors, so proceed with caution.
102
107
  """
103
108
  self.cert_dir = cert_dir
104
109
  self.logdir = logdir
@@ -126,15 +131,13 @@ class Interchange:
126
131
  logger.info("Connected to client")
127
132
 
128
133
  self.run_id = run_id
134
+ self._check_python_mismatch = _check_python_mismatch
129
135
 
130
136
  self.hub_address = hub_address
131
137
  self.hub_zmq_port = hub_zmq_port
132
138
 
133
139
  self.pending_task_queue: SortedList[Any] = SortedList(key=lambda tup: (tup[0], tup[1]))
134
140
 
135
- # count of tasks that have been received from the submit side
136
- self.task_counter = 0
137
-
138
141
  # count of tasks that have been sent out to worker pools
139
142
  self.count = 0
140
143
 
@@ -157,6 +160,7 @@ class Interchange:
157
160
  logger.info(f"Bound to port {worker_port} for incoming worker connections")
158
161
 
159
162
  self._ready_managers: Dict[bytes, ManagerRecord] = {}
163
+ self._logged_manager_count_token: object = None
160
164
  self.connected_block_history: List[str] = []
161
165
 
162
166
  self.heartbeat_threshold = heartbeat_threshold
@@ -213,7 +217,7 @@ class Interchange:
213
217
 
214
218
  reply: Any # the type of reply depends on the command_req received (aka this needs dependent types...)
215
219
 
216
- if self.command_channel in self.socks and self.socks[self.command_channel] == zmq.POLLIN:
220
+ if self.socks.get(self.command_channel) == zmq.POLLIN:
217
221
  logger.debug("entering command_server section")
218
222
 
219
223
  command_req = self.command_channel.recv_pyobj()
@@ -310,6 +314,7 @@ class Interchange:
310
314
  self.process_manager_socket_message(interesting_managers, monitoring_radio, kill_event)
311
315
  self.expire_bad_managers(interesting_managers, monitoring_radio)
312
316
  self.expire_drained_managers(interesting_managers, monitoring_radio)
317
+ self.log_manager_counts(interesting_managers)
313
318
  self.process_tasks_to_send(interesting_managers, monitoring_radio)
314
319
 
315
320
  self.zmq_context.destroy()
@@ -321,20 +326,20 @@ class Interchange:
321
326
  """Process incoming task message(s).
322
327
  """
323
328
 
324
- if self.task_incoming in self.socks and self.socks[self.task_incoming] == zmq.POLLIN:
329
+ if self.socks.get(self.task_incoming) == zmq.POLLIN:
325
330
  logger.debug("start task_incoming section")
326
331
  msg = self.task_incoming.recv_pyobj()
327
332
 
328
333
  # Process priority, higher number = lower priority
329
- resource_spec = msg.get('resource_spec', {})
334
+ task_id = msg['task_id']
335
+ resource_spec = msg['context'].get('resource_spec', {})
330
336
  priority = resource_spec.get('priority', float('inf'))
331
- queue_entry = (-priority, -self.task_counter, msg)
337
+ queue_entry = (-priority, -task_id, msg)
332
338
 
333
- logger.debug("putting message onto pending_task_queue")
339
+ logger.debug("Putting task %s onto pending_task_queue", task_id)
334
340
 
335
341
  self.pending_task_queue.add(queue_entry)
336
- self.task_counter += 1
337
- logger.debug(f"Fetched {self.task_counter} tasks so far")
342
+ logger.debug("Put task %s onto pending_task_queue", task_id)
338
343
 
339
344
  def process_manager_socket_message(
340
345
  self,
@@ -354,9 +359,10 @@ class Interchange:
354
359
  mtype = meta['type']
355
360
  except Exception as e:
356
361
  logger.warning(
357
- f'Failed to read manager message ([{type(e).__name__}] {e})'
362
+ 'Failed to read manager message; ignoring message'
363
+ f' (Exception: [{type(e).__name__}] {e})'
358
364
  )
359
- logger.debug('Message:\n %r\n', msg_parts, exc_info=e)
365
+ logger.debug('Raw message bytes:\n %r\n', msg_parts, exc_info=e)
360
366
  return
361
367
 
362
368
  logger.debug(
@@ -396,7 +402,9 @@ class Interchange:
396
402
  logger.info(f'Registration info for manager {manager_id!r}: {meta}')
397
403
  self._send_monitoring_info(monitoring_radio, new_rec)
398
404
 
399
- if (mgr_minor_py, mgr_parsl_v) != (ix_minor_py, ix_parsl_v):
405
+ python_mismatch: bool = ix_minor_py != mgr_minor_py
406
+ parsl_mismatch: bool = ix_parsl_v != mgr_parsl_v
407
+ if parsl_mismatch or (self._check_python_mismatch and python_mismatch):
400
408
  kill_event.set()
401
409
  vm_exc = VersionMismatch(
402
410
  f"py.v={ix_minor_py} parsl.v={ix_parsl_v}",
@@ -517,15 +525,24 @@ class Interchange:
517
525
  m['active'] = False
518
526
  self._send_monitoring_info(monitoring_radio, m)
519
527
 
528
+ def log_manager_counts(self, interesting_managers: Set[bytes]) -> None:
529
+ count_interesting = len(interesting_managers)
530
+ count_ready = len(self._ready_managers)
531
+
532
+ new_logged_manager_count_token = (count_interesting, count_ready)
533
+
534
+ if self._logged_manager_count_token != new_logged_manager_count_token:
535
+
536
+ logger.debug(
537
+ "Managers count (interesting/total): %d/%d",
538
+ count_interesting,
539
+ count_ready
540
+ )
541
+ self._logged_manager_count_token = new_logged_manager_count_token
542
+
520
543
  def process_tasks_to_send(self, interesting_managers: Set[bytes], monitoring_radio: Optional[MonitoringRadioSender]) -> None:
521
544
  # Check if there are tasks that could be sent to managers
522
545
 
523
- logger.debug(
524
- "Managers count (interesting/total): %d/%d",
525
- len(interesting_managers),
526
- len(self._ready_managers)
527
- )
528
-
529
546
  if interesting_managers and self.pending_task_queue:
530
547
  shuffled_managers = self.manager_selector.sort_managers(self._ready_managers, interesting_managers)
531
548
 
@@ -618,6 +635,8 @@ def start_file_logger(filename: str, level: int = logging.DEBUG, format_string:
618
635
 
619
636
 
620
637
  if __name__ == "__main__":
638
+ from parsl.utils import setproctitle
639
+
621
640
  setproctitle("parsl: HTEX interchange")
622
641
 
623
642
  config = pickle.load(sys.stdin.buffer)
@@ -1,6 +1,7 @@
1
1
  #!python
2
2
 
3
3
  import argparse
4
+ import importlib
4
5
  import logging
5
6
  import math
6
7
  import multiprocessing
@@ -17,7 +18,7 @@ from importlib.metadata import distributions
17
18
  from multiprocessing.context import SpawnProcess
18
19
  from multiprocessing.managers import DictProxy
19
20
  from multiprocessing.sharedctypes import Synchronized
20
- from typing import Dict, List, Optional, Sequence
21
+ from typing import Callable, Dict, List, Optional, Sequence
21
22
 
22
23
  import psutil
23
24
  import zmq
@@ -348,7 +349,7 @@ class Manager:
348
349
 
349
350
  logger.debug(
350
351
  'ready workers: %d, pending tasks: %d',
351
- self.ready_worker_count.value, # type: ignore[attr-defined]
352
+ self.ready_worker_count.value,
352
353
  pending_task_count,
353
354
  )
354
355
 
@@ -373,10 +374,12 @@ class Manager:
373
374
  if socks.get(ix_sock) == zmq.POLLIN:
374
375
  pkl_msg = ix_sock.recv()
375
376
  tasks = pickle.loads(pkl_msg)
377
+ del pkl_msg
378
+
376
379
  last_interchange_contact = time.time()
377
380
 
378
381
  if tasks == HEARTBEAT_CODE:
379
- logger.debug("Got heartbeat from interchange")
382
+ logger.debug("Got heartbeat response from interchange")
380
383
  elif tasks == DRAINED_CODE:
381
384
  logger.info("Got fully drained message from interchange - setting kill flag")
382
385
  self._stop_event.set()
@@ -454,6 +457,7 @@ class Manager:
454
457
  'exception': serialize(RemoteExceptionWrapper(*sys.exc_info()))}
455
458
  pkl_package = pickle.dumps(result_package)
456
459
  self.pending_result_queue.put(pkl_package)
460
+ del pkl_package
457
461
  except KeyError:
458
462
  logger.info("Worker {} was not busy when it died".format(worker_id))
459
463
 
@@ -603,6 +607,10 @@ def update_resource_spec_env_vars(mpi_launcher: str, resource_spec: Dict, node_i
603
607
 
604
608
 
605
609
  def _init_mpi_env(mpi_launcher: str, resource_spec: Dict):
610
+ for varname in resource_spec:
611
+ envname = "PARSL_" + str(varname).upper()
612
+ os.environ[envname] = str(resource_spec[varname])
613
+
606
614
  node_list = resource_spec.get("MPI_NODELIST")
607
615
  if node_list is None:
608
616
  return
@@ -753,8 +761,8 @@ def worker(
753
761
  worker_enqueued = True
754
762
 
755
763
  try:
756
- # The worker will receive {'task_id':<tid>, 'buffer':<buf>}
757
764
  req = task_queue.get(timeout=task_queue_timeout)
765
+ # req is {'task_id':<tid>, 'buffer':<buf>, 'resource_spec':<dict>}
758
766
  except queue.Empty:
759
767
  continue
760
768
 
@@ -766,17 +774,33 @@ def worker(
766
774
  ready_worker_count.value -= 1
767
775
  worker_enqueued = False
768
776
 
769
- _init_mpi_env(mpi_launcher=mpi_launcher, resource_spec=req["resource_spec"])
777
+ ctxt = req["context"]
778
+ res_spec = ctxt.get("resource_spec", {})
779
+
780
+ _init_mpi_env(mpi_launcher=mpi_launcher, resource_spec=res_spec)
781
+
782
+ exec_func: Callable = execute_task
783
+ exec_args = ()
784
+ exec_kwargs = {}
770
785
 
771
786
  try:
772
- result = execute_task(req['buffer'])
787
+ if task_executor := ctxt.get("task_executor", None):
788
+ mod_name, _, fn_name = task_executor["f"].rpartition(".")
789
+ exec_mod = importlib.import_module(mod_name)
790
+ exec_func = getattr(exec_mod, fn_name)
791
+
792
+ exec_args = task_executor.get("a", ())
793
+ exec_kwargs = task_executor.get("k", {})
794
+
795
+ result = exec_func(req['buffer'], *exec_args, **exec_kwargs)
773
796
  serialized_result = serialize(result, buffer_threshold=1000000)
774
797
  except Exception as e:
775
798
  logger.info('Caught an exception: {}'.format(e))
776
799
  result_package = {'type': 'result', 'task_id': tid, 'exception': serialize(RemoteExceptionWrapper(*sys.exc_info()))}
777
800
  else:
778
801
  result_package = {'type': 'result', 'task_id': tid, 'result': serialized_result}
779
- # logger.debug("Result: {}".format(result))
802
+ del serialized_result
803
+ del req
780
804
 
781
805
  logger.info("Completed executor task {}".format(tid))
782
806
  try:
@@ -788,6 +812,7 @@ def worker(
788
812
  })
789
813
 
790
814
  result_queue.put(pkl_package)
815
+ del pkl_package, result_package
791
816
  tasks_in_progress.pop(worker_id)
792
817
  logger.info("All processing finished for executor task {}".format(tid))
793
818