parsl 2024.11.18__py3-none-any.whl → 2024.12.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. parsl/addresses.py +19 -1
  2. parsl/channels/base.py +0 -28
  3. parsl/channels/local/local.py +0 -36
  4. parsl/configs/ASPIRE1.py +0 -1
  5. parsl/curvezmq.py +4 -0
  6. parsl/executors/execute_task.py +37 -0
  7. parsl/executors/flux/execute_parsl_task.py +1 -1
  8. parsl/executors/high_throughput/executor.py +29 -7
  9. parsl/executors/high_throughput/interchange.py +8 -7
  10. parsl/executors/high_throughput/mpi_executor.py +2 -0
  11. parsl/executors/high_throughput/mpi_resource_management.py +2 -3
  12. parsl/executors/high_throughput/probe.py +4 -4
  13. parsl/executors/high_throughput/process_worker_pool.py +15 -43
  14. parsl/executors/high_throughput/zmq_pipes.py +18 -8
  15. parsl/executors/radical/rpex_worker.py +2 -2
  16. parsl/executors/workqueue/exec_parsl_function.py +1 -1
  17. parsl/providers/condor/condor.py +3 -5
  18. parsl/providers/grid_engine/grid_engine.py +2 -3
  19. parsl/providers/local/local.py +1 -15
  20. parsl/providers/lsf/lsf.py +2 -12
  21. parsl/providers/pbspro/pbspro.py +1 -3
  22. parsl/providers/slurm/slurm.py +1 -11
  23. parsl/providers/torque/torque.py +1 -3
  24. parsl/serialize/facade.py +3 -3
  25. parsl/tests/configs/htex_local.py +1 -0
  26. parsl/tests/configs/htex_local_alternate.py +0 -1
  27. parsl/tests/configs/local_threads_monitoring.py +0 -1
  28. parsl/tests/manual_tests/test_udp_simple.py +0 -1
  29. parsl/tests/test_execute_task.py +29 -0
  30. parsl/tests/test_htex/test_zmq_binding.py +3 -2
  31. parsl/tests/test_monitoring/test_htex_init_blocks_vs_monitoring.py +0 -1
  32. parsl/tests/test_monitoring/test_stdouterr.py +0 -1
  33. parsl/tests/unit/test_address.py +20 -0
  34. parsl/version.py +1 -1
  35. {parsl-2024.11.18.data → parsl-2024.12.2.data}/scripts/exec_parsl_function.py +1 -1
  36. {parsl-2024.11.18.data → parsl-2024.12.2.data}/scripts/interchange.py +8 -7
  37. {parsl-2024.11.18.data → parsl-2024.12.2.data}/scripts/process_worker_pool.py +15 -43
  38. {parsl-2024.11.18.dist-info → parsl-2024.12.2.dist-info}/METADATA +2 -2
  39. {parsl-2024.11.18.dist-info → parsl-2024.12.2.dist-info}/RECORD +44 -41
  40. {parsl-2024.11.18.dist-info → parsl-2024.12.2.dist-info}/WHEEL +1 -1
  41. {parsl-2024.11.18.data → parsl-2024.12.2.data}/scripts/parsl_coprocess.py +0 -0
  42. {parsl-2024.11.18.dist-info → parsl-2024.12.2.dist-info}/LICENSE +0 -0
  43. {parsl-2024.11.18.dist-info → parsl-2024.12.2.dist-info}/entry_points.txt +0 -0
  44. {parsl-2024.11.18.dist-info → parsl-2024.12.2.dist-info}/top_level.txt +0 -0
parsl/addresses.py CHANGED
@@ -6,6 +6,7 @@ The helper to use depends on the network environment around the submitter,
6
6
  so some experimentation will probably be needed to choose the correct one.
7
7
  """
8
8
 
9
+ import ipaddress
9
10
  import logging
10
11
  import platform
11
12
  import socket
@@ -17,7 +18,7 @@ try:
17
18
  except ImportError:
18
19
  fcntl = None # type: ignore[assignment]
19
20
  import struct
20
- from typing import Callable, List, Set
21
+ from typing import Callable, List, Set, Union
21
22
 
22
23
  import psutil
23
24
  import typeguard
@@ -156,3 +157,20 @@ def get_any_address() -> str:
156
157
  if addr == '':
157
158
  raise Exception('Cannot find address of the local machine.')
158
159
  return addr
160
+
161
+
162
+ def tcp_url(address: str, port: Union[str, int, None] = None) -> str:
163
+ """Construct a tcp url safe for IPv4 and IPv6"""
164
+ if address == "*":
165
+ return "tcp://*"
166
+
167
+ ip_addr = ipaddress.ip_address(address)
168
+
169
+ port_suffix = f":{port}" if port else ""
170
+
171
+ if ip_addr.version == 6 and port_suffix:
172
+ url = f"tcp://[{address}]{port_suffix}"
173
+ else:
174
+ url = f"tcp://{address}{port_suffix}"
175
+
176
+ return url
parsl/channels/base.py CHANGED
@@ -52,31 +52,3 @@ class Channel(metaclass=ABCMeta):
52
52
  @script_dir.setter
53
53
  def script_dir(self, value: str) -> None:
54
54
  pass
55
-
56
- @abstractmethod
57
- def push_file(self, source: str, dest_dir: str) -> str:
58
- ''' Channel will take care of moving the file from source to the destination
59
- directory
60
-
61
- Args:
62
- source (string) : Full filepath of the file to be moved
63
- dest_dir (string) : Absolute path of the directory to move to
64
-
65
- Returns:
66
- destination_path (string)
67
- '''
68
- pass
69
-
70
- @abstractmethod
71
- def pull_file(self, remote_source: str, local_dir: str) -> str:
72
- ''' Transport file on the remote side to a local directory
73
-
74
- Args:
75
- remote_source (string): remote_source
76
- local_dir (string): Local directory to copy to
77
-
78
-
79
- Returns:
80
- destination_path (string)
81
- '''
82
- pass
@@ -1,10 +1,8 @@
1
1
  import logging
2
2
  import os
3
- import shutil
4
3
  import subprocess
5
4
 
6
5
  from parsl.channels.base import Channel
7
- from parsl.channels.errors import FileCopyException
8
6
  from parsl.utils import RepresentationMixin
9
7
 
10
8
  logger = logging.getLogger(__name__)
@@ -57,40 +55,6 @@ class LocalChannel(Channel, RepresentationMixin):
57
55
 
58
56
  return (retcode, stdout.decode("utf-8"), stderr.decode("utf-8"))
59
57
 
60
- def push_file(self, source, dest_dir):
61
- ''' If the source files dirpath is the same as dest_dir, a copy
62
- is not necessary, and nothing is done. Else a copy is made.
63
-
64
- Args:
65
- - source (string) : Path to the source file
66
- - dest_dir (string) : Path to the directory to which the files is to be copied
67
-
68
- Returns:
69
- - destination_path (String) : Absolute path of the destination file
70
-
71
- Raises:
72
- - FileCopyException : If file copy failed.
73
- '''
74
-
75
- local_dest = os.path.join(dest_dir, os.path.basename(source))
76
-
77
- # Only attempt to copy if the target dir and source dir are different
78
- if os.path.dirname(source) != dest_dir:
79
- try:
80
- shutil.copyfile(source, local_dest)
81
- os.chmod(local_dest, 0o700)
82
-
83
- except OSError as e:
84
- raise FileCopyException(e, "localhost")
85
-
86
- else:
87
- os.chmod(local_dest, 0o700)
88
-
89
- return local_dest
90
-
91
- def pull_file(self, remote_source, local_dir):
92
- return self.push_file(remote_source, local_dir)
93
-
94
58
  @property
95
59
  def script_dir(self):
96
60
  return self._script_dir
parsl/configs/ASPIRE1.py CHANGED
@@ -34,7 +34,6 @@ config = Config(
34
34
  ],
35
35
  monitoring=MonitoringHub(
36
36
  hub_address=address_by_interface('ib0'),
37
- hub_port=55055,
38
37
  resource_monitoring_interval=10,
39
38
  ),
40
39
  strategy='simple',
parsl/curvezmq.py CHANGED
@@ -160,6 +160,9 @@ class ServerContext(BaseContext):
160
160
  except zmq.ZMQError as e:
161
161
  raise ValueError("Invalid CurveZMQ key format") from e
162
162
  sock.setsockopt(zmq.CURVE_SERVER, True) # Must come before bind
163
+
164
+ # This flag enables IPV6 in addition to IPV4
165
+ sock.setsockopt(zmq.IPV6, True)
163
166
  return sock
164
167
 
165
168
  def term(self):
@@ -202,4 +205,5 @@ class ClientContext(BaseContext):
202
205
  sock.setsockopt(zmq.CURVE_SERVERKEY, server_public_key)
203
206
  except zmq.ZMQError as e:
204
207
  raise ValueError("Invalid CurveZMQ key format") from e
208
+ sock.setsockopt(zmq.IPV6, True)
205
209
  return sock
@@ -0,0 +1,37 @@
1
+ import os
2
+
3
+ from parsl.serialize import unpack_res_spec_apply_message
4
+
5
+
6
+ def execute_task(bufs: bytes):
7
+ """Deserialize the buffer and execute the task.
8
+ Returns the result or throws exception.
9
+ """
10
+ f, args, kwargs, resource_spec = unpack_res_spec_apply_message(bufs)
11
+
12
+ for varname in resource_spec:
13
+ envname = "PARSL_" + str(varname).upper()
14
+ os.environ[envname] = str(resource_spec[varname])
15
+
16
+ # We might need to look into callability of the function from itself
17
+ # since we change it's name in the new namespace
18
+ prefix = "parsl_"
19
+ fname = prefix + "f"
20
+ argname = prefix + "args"
21
+ kwargname = prefix + "kwargs"
22
+ resultname = prefix + "result"
23
+
24
+ code = "{0} = {1}(*{2}, **{3})".format(resultname, fname,
25
+ argname, kwargname)
26
+
27
+ user_ns = locals()
28
+ user_ns.update({
29
+ '__builtins__': __builtins__,
30
+ fname: f,
31
+ argname: args,
32
+ kwargname: kwargs,
33
+ resultname: resultname
34
+ })
35
+
36
+ exec(code, user_ns, user_ns)
37
+ return user_ns.get(resultname)
@@ -4,8 +4,8 @@ import argparse
4
4
  import logging
5
5
  import os
6
6
 
7
+ from parsl.executors.execute_task import execute_task
7
8
  from parsl.executors.flux import TaskResult
8
- from parsl.executors.high_throughput.process_worker_pool import execute_task
9
9
  from parsl.serialize import serialize
10
10
 
11
11
 
@@ -86,7 +86,7 @@ GENERAL_HTEX_PARAM_DOCS = """provider : :class:`~parsl.providers.base.ExecutionP
86
86
 
87
87
  address : string
88
88
  An address to connect to the main Parsl process which is reachable from the network in which
89
- workers will be running. This field expects an IPv4 address (xxx.xxx.xxx.xxx).
89
+ workers will be running. This field expects an IPv4 or IPv6 address.
90
90
  Most login nodes on clusters have several network interfaces available, only some of which
91
91
  can be reached from the compute nodes. This field can be used to limit the executor to listen
92
92
  only on a specific interface, and limiting connections to the internal network.
@@ -94,6 +94,11 @@ GENERAL_HTEX_PARAM_DOCS = """provider : :class:`~parsl.providers.base.ExecutionP
94
94
  Setting an address here overrides the default behavior.
95
95
  default=None
96
96
 
97
+ loopback_address: string
98
+ Specify address used for internal communication between executor and interchange.
99
+ Supports IPv4 and IPv6 addresses
100
+ default=127.0.0.1
101
+
97
102
  worker_ports : (int, int)
98
103
  Specify the ports to be used by workers to connect to Parsl. If this option is specified,
99
104
  worker_port_range will not be honored.
@@ -224,6 +229,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
224
229
  Parsl will create names as integers starting with 0.
225
230
 
226
231
  default: empty list
232
+
227
233
  """
228
234
 
229
235
  @typeguard.typechecked
@@ -233,6 +239,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
233
239
  launch_cmd: Optional[str] = None,
234
240
  interchange_launch_cmd: Optional[Sequence[str]] = None,
235
241
  address: Optional[str] = None,
242
+ loopback_address: str = "127.0.0.1",
236
243
  worker_ports: Optional[Tuple[int, int]] = None,
237
244
  worker_port_range: Optional[Tuple[int, int]] = (54000, 55000),
238
245
  interchange_port_range: Optional[Tuple[int, int]] = (55000, 56000),
@@ -268,6 +275,8 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
268
275
  self.address = address
269
276
  self.address_probe_timeout = address_probe_timeout
270
277
  self.manager_selector = manager_selector
278
+ self.loopback_address = loopback_address
279
+
271
280
  if self.address:
272
281
  self.all_addresses = address
273
282
  else:
@@ -322,6 +331,9 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
322
331
  interchange_launch_cmd = DEFAULT_INTERCHANGE_LAUNCH_CMD
323
332
  self.interchange_launch_cmd = interchange_launch_cmd
324
333
 
334
+ self._result_queue_thread_exit = threading.Event()
335
+ self._result_queue_thread: Optional[threading.Thread] = None
336
+
325
337
  radio_mode = "htex"
326
338
  enable_mpi_mode: bool = False
327
339
  mpi_launcher: str = "mpiexec"
@@ -408,13 +420,13 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
408
420
  )
409
421
 
410
422
  self.outgoing_q = zmq_pipes.TasksOutgoing(
411
- "127.0.0.1", self.interchange_port_range, self.cert_dir
423
+ self.loopback_address, self.interchange_port_range, self.cert_dir
412
424
  )
413
425
  self.incoming_q = zmq_pipes.ResultsIncoming(
414
- "127.0.0.1", self.interchange_port_range, self.cert_dir
426
+ self.loopback_address, self.interchange_port_range, self.cert_dir
415
427
  )
416
428
  self.command_client = zmq_pipes.CommandClient(
417
- "127.0.0.1", self.interchange_port_range, self.cert_dir
429
+ self.loopback_address, self.interchange_port_range, self.cert_dir
418
430
  )
419
431
 
420
432
  self._result_queue_thread = None
@@ -446,9 +458,11 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
446
458
  """
447
459
  logger.debug("Result queue worker starting")
448
460
 
449
- while not self.bad_state_is_set:
461
+ while not self.bad_state_is_set and not self._result_queue_thread_exit.is_set():
450
462
  try:
451
- msgs = self.incoming_q.get()
463
+ msgs = self.incoming_q.get(timeout_ms=self.poll_period)
464
+ if msgs is None: # timeout
465
+ continue
452
466
 
453
467
  except IOError as e:
454
468
  logger.exception("Caught broken queue with exception code {}: {}".format(e.errno, e))
@@ -506,6 +520,8 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
506
520
  else:
507
521
  raise BadMessage("Message received with unknown type {}".format(msg['type']))
508
522
 
523
+ logger.info("Closing result ZMQ pipe")
524
+ self.incoming_q.close()
509
525
  logger.info("Result queue worker finished")
510
526
 
511
527
  def _start_local_interchange_process(self) -> None:
@@ -515,7 +531,7 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
515
531
  get the worker task and result ports that the interchange has bound to.
516
532
  """
517
533
 
518
- interchange_config = {"client_address": "127.0.0.1",
534
+ interchange_config = {"client_address": self.loopback_address,
519
535
  "client_ports": (self.outgoing_q.port,
520
536
  self.incoming_q.port,
521
537
  self.command_client.port),
@@ -808,6 +824,8 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
808
824
 
809
825
  logger.info("Attempting HighThroughputExecutor shutdown")
810
826
 
827
+ logger.info("Terminating interchange and result queue thread")
828
+ self._result_queue_thread_exit.set()
811
829
  self.interchange_proc.terminate()
812
830
  try:
813
831
  self.interchange_proc.wait(timeout=timeout)
@@ -832,6 +850,10 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin, UsageIn
832
850
  logger.info("Closing command client")
833
851
  self.command_client.close()
834
852
 
853
+ logger.info("Waiting for result queue thread exit")
854
+ if self._result_queue_thread:
855
+ self._result_queue_thread.join()
856
+
835
857
  logger.info("Finished HighThroughputExecutor shutdown attempt")
836
858
 
837
859
  def get_usage_information(self):
@@ -14,6 +14,7 @@ from typing import Any, Dict, List, NoReturn, Optional, Sequence, Set, Tuple, ca
14
14
  import zmq
15
15
 
16
16
  from parsl import curvezmq
17
+ from parsl.addresses import tcp_url
17
18
  from parsl.app.errors import RemoteExceptionWrapper
18
19
  from parsl.executors.high_throughput.errors import ManagerLost, VersionMismatch
19
20
  from parsl.executors.high_throughput.manager_record import ManagerRecord
@@ -115,13 +116,13 @@ class Interchange:
115
116
  self.zmq_context = curvezmq.ServerContext(self.cert_dir)
116
117
  self.task_incoming = self.zmq_context.socket(zmq.DEALER)
117
118
  self.task_incoming.set_hwm(0)
118
- self.task_incoming.connect("tcp://{}:{}".format(client_address, client_ports[0]))
119
+ self.task_incoming.connect(tcp_url(client_address, client_ports[0]))
119
120
  self.results_outgoing = self.zmq_context.socket(zmq.DEALER)
120
121
  self.results_outgoing.set_hwm(0)
121
- self.results_outgoing.connect("tcp://{}:{}".format(client_address, client_ports[1]))
122
+ self.results_outgoing.connect(tcp_url(client_address, client_ports[1]))
122
123
 
123
124
  self.command_channel = self.zmq_context.socket(zmq.REP)
124
- self.command_channel.connect("tcp://{}:{}".format(client_address, client_ports[2]))
125
+ self.command_channel.connect(tcp_url(client_address, client_ports[2]))
125
126
  logger.info("Connected to client")
126
127
 
127
128
  self.run_id = run_id
@@ -144,14 +145,14 @@ class Interchange:
144
145
  self.worker_task_port = self.worker_ports[0]
145
146
  self.worker_result_port = self.worker_ports[1]
146
147
 
147
- self.task_outgoing.bind(f"tcp://{self.interchange_address}:{self.worker_task_port}")
148
- self.results_incoming.bind(f"tcp://{self.interchange_address}:{self.worker_result_port}")
148
+ self.task_outgoing.bind(tcp_url(self.interchange_address, self.worker_task_port))
149
+ self.results_incoming.bind(tcp_url(self.interchange_address, self.worker_result_port))
149
150
 
150
151
  else:
151
- self.worker_task_port = self.task_outgoing.bind_to_random_port(f"tcp://{self.interchange_address}",
152
+ self.worker_task_port = self.task_outgoing.bind_to_random_port(tcp_url(self.interchange_address),
152
153
  min_port=worker_port_range[0],
153
154
  max_port=worker_port_range[1], max_tries=100)
154
- self.worker_result_port = self.results_incoming.bind_to_random_port(f"tcp://{self.interchange_address}",
155
+ self.worker_result_port = self.results_incoming.bind_to_random_port(tcp_url(self.interchange_address),
155
156
  min_port=worker_port_range[0],
156
157
  max_port=worker_port_range[1], max_tries=100)
157
158
 
@@ -50,6 +50,7 @@ class MPIExecutor(HighThroughputExecutor):
50
50
  launch_cmd: Optional[str] = None,
51
51
  interchange_launch_cmd: Optional[str] = None,
52
52
  address: Optional[str] = None,
53
+ loopback_address: str = "127.0.0.1",
53
54
  worker_ports: Optional[Tuple[int, int]] = None,
54
55
  worker_port_range: Optional[Tuple[int, int]] = (54000, 55000),
55
56
  interchange_port_range: Optional[Tuple[int, int]] = (55000, 56000),
@@ -78,6 +79,7 @@ class MPIExecutor(HighThroughputExecutor):
78
79
  launch_cmd=launch_cmd,
79
80
  interchange_launch_cmd=interchange_launch_cmd,
80
81
  address=address,
82
+ loopback_address=loopback_address,
81
83
  worker_ports=worker_ports,
82
84
  worker_port_range=worker_port_range,
83
85
  interchange_port_range=interchange_port_range,
@@ -160,9 +160,7 @@ class MPITaskScheduler(TaskScheduler):
160
160
  """Schedule task if resources are available otherwise backlog the task"""
161
161
  user_ns = locals()
162
162
  user_ns.update({"__builtins__": __builtins__})
163
- _f, _args, _kwargs, resource_spec = unpack_res_spec_apply_message(
164
- task_package["buffer"], user_ns, copy=False
165
- )
163
+ _f, _args, _kwargs, resource_spec = unpack_res_spec_apply_message(task_package["buffer"])
166
164
 
167
165
  nodes_needed = resource_spec.get("num_nodes")
168
166
  if nodes_needed:
@@ -177,6 +175,7 @@ class MPITaskScheduler(TaskScheduler):
177
175
  self._map_tasks_to_nodes[task_package["task_id"]] = allocated_nodes
178
176
  buffer = pack_res_spec_apply_message(_f, _args, _kwargs, resource_spec)
179
177
  task_package["buffer"] = buffer
178
+ task_package["resource_spec"] = resource_spec
180
179
 
181
180
  self.pending_task_q.put(task_package)
182
181
 
@@ -6,7 +6,7 @@ import uuid
6
6
  import zmq
7
7
  from zmq.utils.monitor import recv_monitor_message
8
8
 
9
- from parsl.addresses import get_all_addresses
9
+ from parsl.addresses import get_all_addresses, tcp_url
10
10
 
11
11
  logger = logging.getLogger(__name__)
12
12
 
@@ -32,7 +32,8 @@ def probe_addresses(addresses, task_port, timeout=120):
32
32
  for addr in addresses:
33
33
  socket = context.socket(zmq.DEALER)
34
34
  socket.setsockopt(zmq.LINGER, 0)
35
- url = "tcp://{}:{}".format(addr, task_port)
35
+ socket.setsockopt(zmq.IPV6, True)
36
+ url = tcp_url(addr, task_port)
36
37
  logger.debug("Trying to connect back on {}".format(url))
37
38
  socket.connect(url)
38
39
  addr_map[addr] = {'sock': socket,
@@ -71,8 +72,7 @@ class TestWorker:
71
72
 
72
73
  address = probe_addresses(addresses, port)
73
74
  print("Viable address :", address)
74
- self.task_incoming.connect("tcp://{}:{}".format(address, port))
75
- print("Here")
75
+ self.task_incoming.connect(tcp_url(address, port))
76
76
 
77
77
  def heartbeat(self):
78
78
  """ Send heartbeat to the incoming task queue
@@ -22,7 +22,9 @@ import psutil
22
22
  import zmq
23
23
 
24
24
  from parsl import curvezmq
25
+ from parsl.addresses import tcp_url
25
26
  from parsl.app.errors import RemoteExceptionWrapper
27
+ from parsl.executors.execute_task import execute_task
26
28
  from parsl.executors.high_throughput.errors import WorkerLost
27
29
  from parsl.executors.high_throughput.mpi_prefix_composer import (
28
30
  VALID_LAUNCHERS,
@@ -35,7 +37,7 @@ from parsl.executors.high_throughput.mpi_resource_management import (
35
37
  from parsl.executors.high_throughput.probe import probe_addresses
36
38
  from parsl.multiprocessing import SpawnContext
37
39
  from parsl.process_loggers import wrap_with_logs
38
- from parsl.serialize import serialize, unpack_res_spec_apply_message
40
+ from parsl.serialize import serialize
39
41
  from parsl.version import VERSION as PARSL_VERSION
40
42
 
41
43
  HEARTBEAT_CODE = (2 ** 32) - 1
@@ -158,8 +160,8 @@ class Manager:
158
160
  raise Exception("No viable address found")
159
161
  else:
160
162
  logger.info("Connection to Interchange successful on {}".format(ix_address))
161
- task_q_url = "tcp://{}:{}".format(ix_address, task_port)
162
- result_q_url = "tcp://{}:{}".format(ix_address, result_port)
163
+ task_q_url = tcp_url(ix_address, task_port)
164
+ result_q_url = tcp_url(ix_address, result_port)
163
165
  logger.info("Task url : {}".format(task_q_url))
164
166
  logger.info("Result url : {}".format(result_q_url))
165
167
  except Exception:
@@ -590,45 +592,13 @@ def update_resource_spec_env_vars(mpi_launcher: str, resource_spec: Dict, node_i
590
592
  os.environ[key] = prefix_table[key]
591
593
 
592
594
 
593
- def execute_task(bufs, mpi_launcher: Optional[str] = None):
594
- """Deserialize the buffer and execute the task.
595
-
596
- Returns the result or throws exception.
597
- """
598
- user_ns = locals()
599
- user_ns.update({'__builtins__': __builtins__})
600
-
601
- f, args, kwargs, resource_spec = unpack_res_spec_apply_message(bufs, user_ns, copy=False)
602
-
603
- for varname in resource_spec:
604
- envname = "PARSL_" + str(varname).upper()
605
- os.environ[envname] = str(resource_spec[varname])
606
-
607
- if resource_spec.get("MPI_NODELIST"):
608
- worker_id = os.environ['PARSL_WORKER_RANK']
609
- nodes_for_task = resource_spec["MPI_NODELIST"].split(',')
610
- logger.info(f"Launching task on provisioned nodes: {nodes_for_task}")
611
- assert mpi_launcher
612
- update_resource_spec_env_vars(mpi_launcher,
613
- resource_spec=resource_spec,
614
- node_info=nodes_for_task)
615
- # We might need to look into callability of the function from itself
616
- # since we change it's name in the new namespace
617
- prefix = "parsl_"
618
- fname = prefix + "f"
619
- argname = prefix + "args"
620
- kwargname = prefix + "kwargs"
621
- resultname = prefix + "result"
622
-
623
- user_ns.update({fname: f,
624
- argname: args,
625
- kwargname: kwargs,
626
- resultname: resultname})
627
-
628
- code = "{0} = {1}(*{2}, **{3})".format(resultname, fname,
629
- argname, kwargname)
630
- exec(code, user_ns, user_ns)
631
- return user_ns.get(resultname)
595
+ def _init_mpi_env(mpi_launcher: str, resource_spec: Dict):
596
+ node_list = resource_spec.get("MPI_NODELIST")
597
+ if node_list is None:
598
+ return
599
+ nodes_for_task = node_list.split(',')
600
+ logger.info(f"Launching task on provisioned nodes: {nodes_for_task}")
601
+ update_resource_spec_env_vars(mpi_launcher=mpi_launcher, resource_spec=resource_spec, node_info=nodes_for_task)
632
602
 
633
603
 
634
604
  @wrap_with_logs(target="worker_log")
@@ -786,8 +756,10 @@ def worker(
786
756
  ready_worker_count.value -= 1
787
757
  worker_enqueued = False
788
758
 
759
+ _init_mpi_env(mpi_launcher=mpi_launcher, resource_spec=req["resource_spec"])
760
+
789
761
  try:
790
- result = execute_task(req['buffer'], mpi_launcher=mpi_launcher)
762
+ result = execute_task(req['buffer'])
791
763
  serialized_result = serialize(result, buffer_threshold=1000000)
792
764
  except Exception as e:
793
765
  logger.info('Caught an exception: {}'.format(e))
@@ -8,6 +8,7 @@ from typing import Optional
8
8
  import zmq
9
9
 
10
10
  from parsl import curvezmq
11
+ from parsl.addresses import tcp_url
11
12
  from parsl.errors import InternalConsistencyError
12
13
  from parsl.executors.high_throughput.errors import (
13
14
  CommandClientBadError,
@@ -52,11 +53,11 @@ class CommandClient:
52
53
  self.zmq_socket = self.zmq_context.socket(zmq.REQ)
53
54
  self.zmq_socket.setsockopt(zmq.LINGER, 0)
54
55
  if self.port is None:
55
- self.port = self.zmq_socket.bind_to_random_port("tcp://{}".format(self.ip_address),
56
+ self.port = self.zmq_socket.bind_to_random_port(tcp_url(self.ip_address),
56
57
  min_port=self.port_range[0],
57
58
  max_port=self.port_range[1])
58
59
  else:
59
- self.zmq_socket.bind("tcp://{}:{}".format(self.ip_address, self.port))
60
+ self.zmq_socket.bind(tcp_url(self.ip_address, self.port))
60
61
 
61
62
  def run(self, message, max_retries=3, timeout_s=None):
62
63
  """ This function needs to be fast at the same time aware of the possibility of
@@ -146,7 +147,7 @@ class TasksOutgoing:
146
147
  self.zmq_context = curvezmq.ClientContext(cert_dir)
147
148
  self.zmq_socket = self.zmq_context.socket(zmq.DEALER)
148
149
  self.zmq_socket.set_hwm(0)
149
- self.port = self.zmq_socket.bind_to_random_port("tcp://{}".format(ip_address),
150
+ self.port = self.zmq_socket.bind_to_random_port(tcp_url(ip_address),
150
151
  min_port=port_range[0],
151
152
  max_port=port_range[1])
152
153
  self.poller = zmq.Poller()
@@ -202,15 +203,24 @@ class ResultsIncoming:
202
203
  self.zmq_context = curvezmq.ClientContext(cert_dir)
203
204
  self.results_receiver = self.zmq_context.socket(zmq.DEALER)
204
205
  self.results_receiver.set_hwm(0)
205
- self.port = self.results_receiver.bind_to_random_port("tcp://{}".format(ip_address),
206
+ self.port = self.results_receiver.bind_to_random_port(tcp_url(ip_address),
206
207
  min_port=port_range[0],
207
208
  max_port=port_range[1])
209
+ self.poller = zmq.Poller()
210
+ self.poller.register(self.results_receiver, zmq.POLLIN)
208
211
 
209
- def get(self):
212
+ def get(self, timeout_ms=None):
213
+ """Get a message from the queue, returning None if timeout expires
214
+ without a message. timeout is measured in milliseconds.
215
+ """
210
216
  logger.debug("Waiting for ResultsIncoming message")
211
- m = self.results_receiver.recv_multipart()
212
- logger.debug("Received ResultsIncoming message")
213
- return m
217
+ socks = dict(self.poller.poll(timeout=timeout_ms))
218
+ if self.results_receiver in socks and socks[self.results_receiver] == zmq.POLLIN:
219
+ m = self.results_receiver.recv_multipart()
220
+ logger.debug("Received ResultsIncoming message")
221
+ return m
222
+ else:
223
+ return None
214
224
 
215
225
  def close(self):
216
226
  self.results_receiver.close()
@@ -4,7 +4,7 @@ import radical.pilot as rp
4
4
 
5
5
  import parsl.app.errors as pe
6
6
  from parsl.app.bash import remote_side_bash_executor
7
- from parsl.executors.high_throughput.process_worker_pool import execute_task
7
+ from parsl.executors.execute_task import execute_task
8
8
  from parsl.serialize import serialize, unpack_res_spec_apply_message
9
9
 
10
10
 
@@ -33,7 +33,7 @@ class ParslWorker:
33
33
 
34
34
  try:
35
35
  buffer = rp.utils.deserialize_bson(task['description']['executable'])
36
- func, args, kwargs, _resource_spec = unpack_res_spec_apply_message(buffer, {}, copy=False)
36
+ func, args, kwargs, _resource_spec = unpack_res_spec_apply_message(buffer)
37
37
  ret = remote_side_bash_executor(func, *args, **kwargs)
38
38
  exc = (None, None)
39
39
  val = None
@@ -94,7 +94,7 @@ def unpack_source_code_function(function_info, user_namespace):
94
94
 
95
95
  def unpack_byte_code_function(function_info, user_namespace):
96
96
  from parsl.serialize import unpack_apply_message
97
- func, args, kwargs = unpack_apply_message(function_info["byte code"], user_namespace, copy=False)
97
+ func, args, kwargs = unpack_apply_message(function_info["byte code"])
98
98
  return (func, 'parsl_function_name', args, kwargs)
99
99
 
100
100
 
@@ -245,16 +245,14 @@ class CondorProvider(RepresentationMixin, ClusterProvider):
245
245
  with open(userscript_path, 'w') as f:
246
246
  f.write(job_config["worker_init"] + '\n' + wrapped_command)
247
247
 
248
- user_script_path = self.channel.push_file(userscript_path, self.channel.script_dir)
249
- the_input_files = [user_script_path] + self.transfer_input_files
248
+ the_input_files = [userscript_path] + self.transfer_input_files
250
249
  job_config["input_files"] = ','.join(the_input_files)
251
- job_config["job_script"] = os.path.basename(user_script_path)
250
+ job_config["job_script"] = os.path.basename(userscript_path)
252
251
 
253
252
  # Construct and move the submit script
254
253
  self._write_submit_script(template_string, script_path, job_name, job_config)
255
- channel_script_path = self.channel.push_file(script_path, self.channel.script_dir)
256
254
 
257
- cmd = "condor_submit {0}".format(channel_script_path)
255
+ cmd = "condor_submit {0}".format(script_path)
258
256
  try:
259
257
  retcode, stdout, stderr = self.execute_wait(cmd)
260
258
  except Exception as e:
@@ -142,11 +142,10 @@ class GridEngineProvider(ClusterProvider, RepresentationMixin):
142
142
  logger.debug("Writing submit script")
143
143
  self._write_submit_script(template_string, script_path, job_name, job_config)
144
144
 
145
- channel_script_path = self.channel.push_file(script_path, self.channel.script_dir)
146
145
  if self.queue is not None:
147
- cmd = "qsub -q {0} -terse {1}".format(self.queue, channel_script_path)
146
+ cmd = "qsub -q {0} -terse {1}".format(self.queue, script_path)
148
147
  else:
149
- cmd = "qsub -terse {0}".format(channel_script_path)
148
+ cmd = "qsub -terse {0}".format(script_path)
150
149
  retcode, stdout, stderr = self.execute_wait(cmd)
151
150
 
152
151
  if retcode == 0: