parsl 2024.10.28__py3-none-any.whl → 2024.11.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. parsl/channels/base.py +6 -46
  2. parsl/channels/errors.py +0 -67
  3. parsl/channels/local/local.py +5 -56
  4. parsl/dataflow/dflow.py +6 -61
  5. parsl/executors/high_throughput/executor.py +0 -1
  6. parsl/executors/high_throughput/mpi_resource_management.py +0 -12
  7. parsl/executors/taskvine/manager.py +6 -0
  8. parsl/executors/taskvine/manager_config.py +5 -0
  9. parsl/monitoring/monitoring.py +23 -26
  10. parsl/monitoring/radios.py +4 -17
  11. parsl/monitoring/remote.py +3 -5
  12. parsl/providers/__init__.py +0 -2
  13. parsl/providers/base.py +1 -1
  14. parsl/providers/cluster_provider.py +1 -4
  15. parsl/providers/condor/condor.py +1 -4
  16. parsl/providers/grid_engine/grid_engine.py +1 -4
  17. parsl/providers/lsf/lsf.py +1 -4
  18. parsl/providers/pbspro/pbspro.py +1 -4
  19. parsl/providers/slurm/slurm.py +1 -4
  20. parsl/providers/torque/torque.py +1 -4
  21. parsl/tests/configs/user_opts.py +0 -7
  22. parsl/tests/conftest.py +4 -4
  23. parsl/tests/site_tests/site_config_selector.py +1 -6
  24. parsl/tests/test_bash_apps/test_basic.py +3 -0
  25. parsl/tests/test_bash_apps/test_error_codes.py +4 -0
  26. parsl/tests/test_bash_apps/test_kwarg_storage.py +1 -0
  27. parsl/tests/test_bash_apps/test_memoize.py +2 -6
  28. parsl/tests/test_bash_apps/test_memoize_ignore_args.py +3 -0
  29. parsl/tests/test_bash_apps/test_memoize_ignore_args_regr.py +1 -0
  30. parsl/tests/test_bash_apps/test_multiline.py +1 -0
  31. parsl/tests/test_bash_apps/test_stdout.py +2 -0
  32. parsl/tests/test_channels/test_local_channel.py +0 -19
  33. parsl/tests/test_docs/test_from_slides.py +3 -0
  34. parsl/tests/test_docs/test_kwargs.py +3 -0
  35. parsl/tests/test_monitoring/test_basic.py +13 -1
  36. parsl/tests/test_providers/test_local_provider.py +0 -135
  37. parsl/tests/test_providers/test_pbspro_template.py +2 -1
  38. parsl/tests/test_providers/test_slurm_template.py +2 -1
  39. parsl/tests/test_python_apps/test_outputs.py +1 -0
  40. parsl/tests/test_regression/test_226.py +1 -0
  41. parsl/tests/test_staging/test_docs_1.py +1 -0
  42. parsl/tests/test_staging/test_output_chain_filenames.py +3 -0
  43. parsl/tests/test_staging/test_staging_ftp.py +1 -0
  44. parsl/tests/test_staging/test_staging_https.py +3 -0
  45. parsl/tests/test_staging/test_staging_stdout.py +2 -0
  46. parsl/version.py +1 -1
  47. {parsl-2024.10.28.dist-info → parsl-2024.11.11.dist-info}/METADATA +2 -8
  48. {parsl-2024.10.28.dist-info → parsl-2024.11.11.dist-info}/RECORD +56 -74
  49. {parsl-2024.10.28.dist-info → parsl-2024.11.11.dist-info}/WHEEL +1 -1
  50. parsl/channels/oauth_ssh/__init__.py +0 -0
  51. parsl/channels/oauth_ssh/oauth_ssh.py +0 -119
  52. parsl/channels/ssh/__init__.py +0 -0
  53. parsl/channels/ssh/ssh.py +0 -295
  54. parsl/channels/ssh_il/__init__.py +0 -0
  55. parsl/channels/ssh_il/ssh_il.py +0 -85
  56. parsl/providers/ad_hoc/__init__.py +0 -0
  57. parsl/providers/ad_hoc/ad_hoc.py +0 -252
  58. parsl/providers/cobalt/__init__.py +0 -0
  59. parsl/providers/cobalt/cobalt.py +0 -236
  60. parsl/providers/cobalt/template.py +0 -17
  61. parsl/tests/configs/cooley_htex.py +0 -37
  62. parsl/tests/configs/local_adhoc.py +0 -18
  63. parsl/tests/configs/theta.py +0 -37
  64. parsl/tests/manual_tests/test_fan_in_out_htex_remote.py +0 -88
  65. parsl/tests/sites/test_local_adhoc.py +0 -62
  66. parsl/tests/test_channels/test_dfk_close.py +0 -26
  67. parsl/tests/test_providers/test_cobalt_deprecation_warning.py +0 -18
  68. {parsl-2024.10.28.data → parsl-2024.11.11.data}/scripts/exec_parsl_function.py +0 -0
  69. {parsl-2024.10.28.data → parsl-2024.11.11.data}/scripts/interchange.py +0 -0
  70. {parsl-2024.10.28.data → parsl-2024.11.11.data}/scripts/parsl_coprocess.py +0 -0
  71. {parsl-2024.10.28.data → parsl-2024.11.11.data}/scripts/process_worker_pool.py +0 -0
  72. {parsl-2024.10.28.dist-info → parsl-2024.11.11.dist-info}/LICENSE +0 -0
  73. {parsl-2024.10.28.dist-info → parsl-2024.11.11.dist-info}/entry_points.txt +0 -0
  74. {parsl-2024.10.28.dist-info → parsl-2024.11.11.dist-info}/top_level.txt +0 -0
parsl/channels/base.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from abc import ABCMeta, abstractmethod, abstractproperty
2
- from typing import Dict, Tuple
2
+ from typing import Tuple
3
3
 
4
4
 
5
5
  class Channel(metaclass=ABCMeta):
@@ -8,33 +8,27 @@ class Channel(metaclass=ABCMeta):
8
8
 
9
9
  For certain resources such as campus clusters or supercomputers at
10
10
  research laboratories, resource requirements may require authentication.
11
- For instance some resources may allow access to their job schedulers from
12
- only their login-nodes which require you to authenticate through SSH, or
13
- require two factor authentication.
14
11
 
15
- The simplest Channel, *LocalChannel*, executes commands locally in a
16
- shell, while the *SSHChannel* authenticates you to remote systems.
12
+ The only remaining Channel, *LocalChannel*, executes commands locally in a
13
+ shell.
17
14
 
18
15
  Channels provide the ability to execute commands remotely, using the
19
16
  execute_wait method, and manipulate the remote file system using methods
20
17
  such as push_file, pull_file and makedirs.
21
18
 
22
19
  Channels should ensure that each launched command runs in a new process
23
- group, so that providers (such as AdHocProvider and LocalProvider) which
24
- terminate long running commands using process groups can do so.
20
+ group, so that providers (such as LocalProvider) which terminate long
21
+ running commands using process groups can do so.
25
22
  """
26
23
 
27
24
  @abstractmethod
28
- def execute_wait(self, cmd: str, walltime: int = 0, envs: Dict[str, str] = {}) -> Tuple[int, str, str]:
25
+ def execute_wait(self, cmd: str, walltime: int = 0) -> Tuple[int, str, str]:
29
26
  ''' Executes the cmd, with a defined walltime.
30
27
 
31
28
  Args:
32
29
  - cmd (string): Command string to execute over the channel
33
30
  - walltime (int) : Timeout in seconds
34
31
 
35
- KWargs:
36
- - envs (Dict[str, str]) : Environment variables to push to the remote side
37
-
38
32
  Returns:
39
33
  - (exit_code, stdout, stderr) (int, string, string)
40
34
  '''
@@ -86,37 +80,3 @@ class Channel(metaclass=ABCMeta):
86
80
  destination_path (string)
87
81
  '''
88
82
  pass
89
-
90
- @abstractmethod
91
- def close(self) -> None:
92
- ''' Closes the channel.
93
- '''
94
- pass
95
-
96
- @abstractmethod
97
- def makedirs(self, path: str, mode: int = 0o511, exist_ok: bool = False) -> None:
98
- """Create a directory.
99
-
100
- If intermediate directories do not exist, they will be created.
101
-
102
- Parameters
103
- ----------
104
- path : str
105
- Path of directory to create.
106
- mode : int
107
- Permissions (posix-style) for the newly-created directory.
108
- exist_ok : bool
109
- If False, raise an OSError if the target directory already exists.
110
- """
111
- pass
112
-
113
- @abstractmethod
114
- def isdir(self, path: str) -> bool:
115
- """Return true if the path refers to an existing directory.
116
-
117
- Parameters
118
- ----------
119
- path : str
120
- Path of directory to check.
121
- """
122
- pass
parsl/channels/errors.py CHANGED
@@ -17,73 +17,6 @@ class ChannelError(ParslError):
17
17
  return "Hostname:{0}, Reason:{1}".format(self.hostname, self.reason)
18
18
 
19
19
 
20
- class BadHostKeyException(ChannelError):
21
- ''' SSH channel could not be created since server's host keys could not
22
- be verified
23
-
24
- Contains:
25
- reason(string)
26
- e (paramiko exception object)
27
- hostname (string)
28
- '''
29
-
30
- def __init__(self, e: Exception, hostname: str) -> None:
31
- super().__init__("SSH channel could not be created since server's host keys could not be "
32
- "verified", e, hostname)
33
-
34
-
35
- class BadScriptPath(ChannelError):
36
- ''' An error raised during execution of an app.
37
- What this exception contains depends entirely on context
38
- Contains:
39
- reason(string)
40
- e (paramiko exception object)
41
- hostname (string)
42
- '''
43
-
44
- def __init__(self, e: Exception, hostname: str) -> None:
45
- super().__init__("Inaccessible remote script dir. Specify script_dir", e, hostname)
46
-
47
-
48
- class BadPermsScriptPath(ChannelError):
49
- ''' User does not have permissions to access the script_dir on the remote site
50
-
51
- Contains:
52
- reason(string)
53
- e (paramiko exception object)
54
- hostname (string)
55
- '''
56
-
57
- def __init__(self, e: Exception, hostname: str) -> None:
58
- super().__init__("User does not have permissions to access the script_dir", e, hostname)
59
-
60
-
61
- class AuthException(ChannelError):
62
- ''' An error raised during execution of an app.
63
- What this exception contains depends entirely on context
64
- Contains:
65
- reason(string)
66
- e (paramiko exception object)
67
- hostname (string)
68
- '''
69
-
70
- def __init__(self, e: Exception, hostname: str) -> None:
71
- super().__init__("Authentication to remote server failed", e, hostname)
72
-
73
-
74
- class SSHException(ChannelError):
75
- ''' if there was any other error connecting or establishing an SSH session
76
-
77
- Contains:
78
- reason(string)
79
- e (paramiko exception object)
80
- hostname (string)
81
- '''
82
-
83
- def __init__(self, e: Exception, hostname: str) -> None:
84
- super().__init__("Error connecting or establishing an SSH session", e, hostname)
85
-
86
-
87
20
  class FileCopyException(ChannelError):
88
21
  ''' File copy operation failed
89
22
 
@@ -1,4 +1,3 @@
1
- import copy
2
1
  import logging
3
2
  import os
4
3
  import shutil
@@ -16,49 +15,32 @@ class LocalChannel(Channel, RepresentationMixin):
16
15
  and done so infrequently that they do not need a persistent channel
17
16
  '''
18
17
 
19
- def __init__(self, userhome=".", envs={}, script_dir=None):
18
+ def __init__(self):
20
19
  ''' Initialize the local channel. script_dir is required by set to a default.
21
20
 
22
21
  KwArgs:
23
- - userhome (string): (default='.') This is provided as a way to override and set a specific userhome
24
- - envs (dict) : A dictionary of env variables to be set when launching the shell
25
22
  - script_dir (string): Directory to place scripts
26
23
  '''
27
- self.userhome = os.path.abspath(userhome)
28
- self.hostname = "localhost"
29
- self.envs = envs
30
- local_env = os.environ.copy()
31
- self._envs = copy.deepcopy(local_env)
32
- self._envs.update(envs)
33
- self.script_dir = script_dir
34
-
35
- def execute_wait(self, cmd, walltime=None, envs={}):
24
+ self.script_dir = None
25
+
26
+ def execute_wait(self, cmd, walltime=None):
36
27
  ''' Synchronously execute a commandline string on the shell.
37
28
 
38
29
  Args:
39
30
  - cmd (string) : Commandline string to execute
40
31
  - walltime (int) : walltime in seconds
41
32
 
42
- Kwargs:
43
- - envs (dict) : Dictionary of env variables. This will be used
44
- to override the envs set at channel initialization.
45
-
46
33
  Returns:
47
34
  - retcode : Return code from the execution
48
35
  - stdout : stdout string
49
36
  - stderr : stderr string
50
37
  '''
51
- current_env = copy.deepcopy(self._envs)
52
- current_env.update(envs)
53
-
54
38
  try:
55
39
  logger.debug("Creating process with command '%s'", cmd)
56
40
  proc = subprocess.Popen(
57
41
  cmd,
58
42
  stdout=subprocess.PIPE,
59
43
  stderr=subprocess.PIPE,
60
- cwd=self.userhome,
61
- env=current_env,
62
44
  shell=True,
63
45
  preexec_fn=os.setpgrp
64
46
  )
@@ -99,7 +81,7 @@ class LocalChannel(Channel, RepresentationMixin):
99
81
  os.chmod(local_dest, 0o700)
100
82
 
101
83
  except OSError as e:
102
- raise FileCopyException(e, self.hostname)
84
+ raise FileCopyException(e, "localhost")
103
85
 
104
86
  else:
105
87
  os.chmod(local_dest, 0o700)
@@ -109,39 +91,6 @@ class LocalChannel(Channel, RepresentationMixin):
109
91
  def pull_file(self, remote_source, local_dir):
110
92
  return self.push_file(remote_source, local_dir)
111
93
 
112
- def close(self) -> None:
113
- ''' There's nothing to close here, and so this doesn't do anything
114
- '''
115
- pass
116
-
117
- def isdir(self, path):
118
- """Return true if the path refers to an existing directory.
119
-
120
- Parameters
121
- ----------
122
- path : str
123
- Path of directory to check.
124
- """
125
-
126
- return os.path.isdir(path)
127
-
128
- def makedirs(self, path, mode=0o700, exist_ok=False):
129
- """Create a directory.
130
-
131
- If intermediate directories do not exist, they will be created.
132
-
133
- Parameters
134
- ----------
135
- path : str
136
- Path of directory to create.
137
- mode : int
138
- Permissions (posix-style) for the newly-created directory.
139
- exist_ok : bool
140
- If False, raise an OSError if the target directory already exists.
141
- """
142
-
143
- return os.makedirs(path, mode, exist_ok)
144
-
145
94
  @property
146
95
  def script_dir(self):
147
96
  return self._script_dir
parsl/dataflow/dflow.py CHANGED
@@ -6,7 +6,6 @@ import datetime
6
6
  import inspect
7
7
  import logging
8
8
  import os
9
- import pathlib
10
9
  import pickle
11
10
  import random
12
11
  import sys
@@ -25,7 +24,6 @@ from typeguard import typechecked
25
24
  import parsl
26
25
  from parsl.app.errors import RemoteExceptionWrapper
27
26
  from parsl.app.futures import DataFuture
28
- from parsl.channels import Channel
29
27
  from parsl.config import Config
30
28
  from parsl.data_provider.data_manager import DataManager
31
29
  from parsl.data_provider.files import File
@@ -49,7 +47,6 @@ from parsl.monitoring import MonitoringHub
49
47
  from parsl.monitoring.message_type import MessageType
50
48
  from parsl.monitoring.remote import monitor_wrapper
51
49
  from parsl.process_loggers import wrap_with_logs
52
- from parsl.providers.base import ExecutionProvider
53
50
  from parsl.usage_tracking.usage import UsageTracker
54
51
  from parsl.utils import Timer, get_all_checkpoints, get_std_fname_mode, get_version
55
52
 
@@ -162,8 +159,8 @@ class DataFlowKernel:
162
159
  }
163
160
 
164
161
  if self.monitoring:
165
- self.monitoring.send(MessageType.WORKFLOW_INFO,
166
- workflow_info)
162
+ self.monitoring.send((MessageType.WORKFLOW_INFO,
163
+ workflow_info))
167
164
 
168
165
  if config.checkpoint_files is not None:
169
166
  checkpoints = self.load_checkpoints(config.checkpoint_files)
@@ -238,7 +235,7 @@ class DataFlowKernel:
238
235
  def _send_task_log_info(self, task_record: TaskRecord) -> None:
239
236
  if self.monitoring:
240
237
  task_log_info = self._create_task_log_info(task_record)
241
- self.monitoring.send(MessageType.TASK_INFO, task_log_info)
238
+ self.monitoring.send((MessageType.TASK_INFO, task_log_info))
242
239
 
243
240
  def _create_task_log_info(self, task_record: TaskRecord) -> Dict[str, Any]:
244
241
  """
@@ -1143,36 +1140,6 @@ class DataFlowKernel:
1143
1140
 
1144
1141
  logger.info("End of summary")
1145
1142
 
1146
- def _create_remote_dirs_over_channel(self, provider: ExecutionProvider, channel: Channel) -> None:
1147
- """Create script directories across a channel
1148
-
1149
- Parameters
1150
- ----------
1151
- provider: Provider obj
1152
- Provider for which scripts dirs are being created
1153
- channel: Channel obj
1154
- Channel over which the remote dirs are to be created
1155
- """
1156
- run_dir = self.run_dir
1157
- if channel.script_dir is None:
1158
-
1159
- # This case will be detected as unreachable by mypy, because of
1160
- # the type of script_dir, which is str, not Optional[str].
1161
- # The type system doesn't represent the initialized/uninitialized
1162
- # state of a channel so cannot represent that a channel needs
1163
- # its script directory set or not.
1164
-
1165
- channel.script_dir = os.path.join(run_dir, 'submit_scripts') # type: ignore[unreachable]
1166
-
1167
- # Only create dirs if we aren't on a shared-fs
1168
- if not channel.isdir(run_dir):
1169
- parent, child = pathlib.Path(run_dir).parts[-2:]
1170
- remote_run_dir = os.path.join(parent, child)
1171
- channel.script_dir = os.path.join(remote_run_dir, 'remote_submit_scripts')
1172
- provider.script_dir = os.path.join(run_dir, 'local_submit_scripts')
1173
-
1174
- channel.makedirs(channel.script_dir, exist_ok=True)
1175
-
1176
1143
  def add_executors(self, executors: Sequence[ParslExecutor]) -> None:
1177
1144
  for executor in executors:
1178
1145
  executor.run_id = self.run_id
@@ -1186,12 +1153,7 @@ class DataFlowKernel:
1186
1153
  executor.provider.script_dir = os.path.join(self.run_dir, 'submit_scripts')
1187
1154
  os.makedirs(executor.provider.script_dir, exist_ok=True)
1188
1155
 
1189
- if hasattr(executor.provider, 'channels'):
1190
- logger.debug("Creating script_dir across multiple channels")
1191
- for channel in executor.provider.channels:
1192
- self._create_remote_dirs_over_channel(executor.provider, channel)
1193
- else:
1194
- self._create_remote_dirs_over_channel(executor.provider, executor.provider.channel)
1156
+ executor.provider.channel.script_dir = executor.provider.script_dir
1195
1157
 
1196
1158
  self.executors[executor.label] = executor
1197
1159
  executor.start()
@@ -1273,34 +1235,17 @@ class DataFlowKernel:
1273
1235
  executor.shutdown()
1274
1236
  logger.info(f"Shut down executor {executor.label}")
1275
1237
 
1276
- if hasattr(executor, 'provider'):
1277
- if hasattr(executor.provider, 'script_dir'):
1278
- logger.info(f"Closing channel(s) for {executor.label}")
1279
-
1280
- if hasattr(executor.provider, 'channels'):
1281
- for channel in executor.provider.channels:
1282
- logger.info(f"Closing channel {channel}")
1283
- channel.close()
1284
- logger.info(f"Closed channel {channel}")
1285
- else:
1286
- assert hasattr(executor.provider, 'channel'), "If provider has no .channels, it must have .channel"
1287
- logger.info(f"Closing channel {executor.provider.channel}")
1288
- executor.provider.channel.close()
1289
- logger.info(f"Closed channel {executor.provider.channel}")
1290
-
1291
- logger.info(f"Closed executor channel(s) for {executor.label}")
1292
-
1293
1238
  logger.info("Terminated executors")
1294
1239
  self.time_completed = datetime.datetime.now()
1295
1240
 
1296
1241
  if self.monitoring:
1297
1242
  logger.info("Sending final monitoring message")
1298
- self.monitoring.send(MessageType.WORKFLOW_INFO,
1243
+ self.monitoring.send((MessageType.WORKFLOW_INFO,
1299
1244
  {'tasks_failed_count': self.task_state_counts[States.failed],
1300
1245
  'tasks_completed_count': self.task_state_counts[States.exec_done],
1301
1246
  "time_began": self.time_began,
1302
1247
  'time_completed': self.time_completed,
1303
- 'run_id': self.run_id, 'rundir': self.run_dir})
1248
+ 'run_id': self.run_id, 'rundir': self.run_dir}))
1304
1249
 
1305
1250
  logger.info("Terminating monitoring")
1306
1251
  self.monitoring.close()
@@ -63,7 +63,6 @@ DEFAULT_INTERCHANGE_LAUNCH_CMD = ["interchange.py"]
63
63
 
64
64
  GENERAL_HTEX_PARAM_DOCS = """provider : :class:`~parsl.providers.base.ExecutionProvider`
65
65
  Provider to access computation resources. Can be one of :class:`~parsl.providers.aws.aws.EC2Provider`,
66
- :class:`~parsl.providers.cobalt.cobalt.Cobalt`,
67
66
  :class:`~parsl.providers.condor.condor.Condor`,
68
67
  :class:`~parsl.providers.googlecloud.googlecloud.GoogleCloud`,
69
68
  :class:`~parsl.providers.gridEngine.gridEngine.GridEngine`,
@@ -17,7 +17,6 @@ class Scheduler(Enum):
17
17
  Unknown = 0
18
18
  Slurm = 1
19
19
  PBS = 2
20
- Cobalt = 3
21
20
 
22
21
 
23
22
  def get_slurm_hosts_list() -> List[str]:
@@ -37,13 +36,6 @@ def get_pbs_hosts_list() -> List[str]:
37
36
  return [line.strip() for line in f.readlines()]
38
37
 
39
38
 
40
- def get_cobalt_hosts_list() -> List[str]:
41
- """Get list of COBALT hosts from envvar: COBALT_NODEFILE"""
42
- nodefile_name = os.environ["COBALT_NODEFILE"]
43
- with open(nodefile_name) as f:
44
- return [line.strip() for line in f.readlines()]
45
-
46
-
47
39
  def get_nodes_in_batchjob(scheduler: Scheduler) -> List[str]:
48
40
  """Get nodelist from all supported schedulers"""
49
41
  nodelist = []
@@ -51,8 +43,6 @@ def get_nodes_in_batchjob(scheduler: Scheduler) -> List[str]:
51
43
  nodelist = get_slurm_hosts_list()
52
44
  elif scheduler == Scheduler.PBS:
53
45
  nodelist = get_pbs_hosts_list()
54
- elif scheduler == Scheduler.Cobalt:
55
- nodelist = get_cobalt_hosts_list()
56
46
  else:
57
47
  raise RuntimeError(f"mpi_mode does not support scheduler:{scheduler}")
58
48
  return nodelist
@@ -64,8 +54,6 @@ def identify_scheduler() -> Scheduler:
64
54
  return Scheduler.Slurm
65
55
  elif os.environ.get("PBS_NODEFILE"):
66
56
  return Scheduler.PBS
67
- elif os.environ.get("COBALT_NODEFILE"):
68
- return Scheduler.Cobalt
69
57
  else:
70
58
  return Scheduler.Unknown
71
59
 
@@ -44,11 +44,17 @@ def _set_manager_attributes(m, config):
44
44
  # Enable peer transfer feature between workers if specified
45
45
  if config.enable_peer_transfers:
46
46
  m.enable_peer_transfers()
47
+ else:
48
+ m.disable_peer_transfers()
47
49
 
48
50
  # Set catalog report to parsl if project name exists
49
51
  if m.name:
50
52
  m.set_property("framework", "parsl")
51
53
 
54
+ if config.tune_parameters is not None:
55
+ for k, v in config.tune_parameters.items():
56
+ m.tune(k, v)
57
+
52
58
 
53
59
  def _prepare_environment_serverless(manager_config, env_cache_dir, poncho_create_script):
54
60
  # Return path to a packaged poncho environment
@@ -156,6 +156,10 @@ class TaskVineManagerConfig:
156
156
  Directory to store TaskVine logging facilities.
157
157
  Default is None, in which all TaskVine logs will be contained
158
158
  in the Parsl logging directory.
159
+
160
+ tune_parameters: Optional[dict]
161
+ Extended vine_tune parameters, expressed in a dictionary
162
+ by { 'tune-parameter' : value }.
159
163
  """
160
164
 
161
165
  # Connection and communication settings
@@ -181,6 +185,7 @@ class TaskVineManagerConfig:
181
185
  autocategory: bool = True
182
186
  enable_peer_transfers: bool = True
183
187
  wait_for_workers: Optional[int] = None
188
+ tune_parameters: Optional[dict] = None
184
189
 
185
190
  # Logging settings
186
191
  vine_log_dir: Optional[str] = None
@@ -3,23 +3,22 @@ from __future__ import annotations
3
3
  import logging
4
4
  import multiprocessing.synchronize as ms
5
5
  import os
6
+ import pickle
6
7
  import queue
7
8
  import time
8
- from multiprocessing import Event, Process
9
+ from multiprocessing import Event
9
10
  from multiprocessing.queues import Queue
10
- from typing import TYPE_CHECKING, Any, Literal, Optional, Tuple, Union, cast
11
+ from typing import TYPE_CHECKING, Literal, Optional, Tuple, Union, cast
11
12
 
12
13
  import typeguard
13
14
 
14
15
  from parsl.log_utils import set_file_logger
15
16
  from parsl.monitoring.errors import MonitoringHubStartError
16
- from parsl.monitoring.message_type import MessageType
17
17
  from parsl.monitoring.radios import MultiprocessingQueueRadioSender
18
18
  from parsl.monitoring.router import router_starter
19
19
  from parsl.monitoring.types import TaggedMonitoringMessage
20
20
  from parsl.multiprocessing import ForkProcess, SizedQueue
21
21
  from parsl.process_loggers import wrap_with_logs
22
- from parsl.serialize import deserialize
23
22
  from parsl.utils import RepresentationMixin, setproctitle
24
23
 
25
24
  _db_manager_excepts: Optional[Exception]
@@ -170,15 +169,15 @@ class MonitoringHub(RepresentationMixin):
170
169
  daemon=True,
171
170
  )
172
171
  self.dbm_proc.start()
173
- logger.info("Started the router process {} and DBM process {}".format(self.router_proc.pid, self.dbm_proc.pid))
172
+ logger.info("Started the router process %s and DBM process %s", self.router_proc.pid, self.dbm_proc.pid)
174
173
 
175
- self.filesystem_proc = Process(target=filesystem_receiver,
176
- args=(self.logdir, self.resource_msgs, dfk_run_dir),
177
- name="Monitoring-Filesystem-Process",
178
- daemon=True
179
- )
174
+ self.filesystem_proc = ForkProcess(target=filesystem_receiver,
175
+ args=(self.logdir, self.resource_msgs, dfk_run_dir),
176
+ name="Monitoring-Filesystem-Process",
177
+ daemon=True
178
+ )
180
179
  self.filesystem_proc.start()
181
- logger.info(f"Started filesystem radio receiver process {self.filesystem_proc.pid}")
180
+ logger.info("Started filesystem radio receiver process %s", self.filesystem_proc.pid)
182
181
 
183
182
  self.radio = MultiprocessingQueueRadioSender(self.resource_msgs)
184
183
 
@@ -191,7 +190,7 @@ class MonitoringHub(RepresentationMixin):
191
190
  raise MonitoringHubStartError()
192
191
 
193
192
  if isinstance(comm_q_result, str):
194
- logger.error(f"MonitoringRouter sent an error message: {comm_q_result}")
193
+ logger.error("MonitoringRouter sent an error message: %s", comm_q_result)
195
194
  raise RuntimeError(f"MonitoringRouter failed to start: {comm_q_result}")
196
195
 
197
196
  udp_port, zmq_port = comm_q_result
@@ -202,10 +201,9 @@ class MonitoringHub(RepresentationMixin):
202
201
 
203
202
  self.hub_zmq_port = zmq_port
204
203
 
205
- # TODO: tighten the Any message format
206
- def send(self, mtype: MessageType, message: Any) -> None:
207
- logger.debug("Sending message type {}".format(mtype))
208
- self.radio.send((mtype, message))
204
+ def send(self, message: TaggedMonitoringMessage) -> None:
205
+ logger.debug("Sending message type %s", message[0])
206
+ self.radio.send(message)
209
207
 
210
208
  def close(self) -> None:
211
209
  logger.info("Terminating Monitoring Hub")
@@ -221,10 +219,9 @@ class MonitoringHub(RepresentationMixin):
221
219
  if exception_msgs:
222
220
  for exception_msg in exception_msgs:
223
221
  logger.error(
224
- "{} process delivered an exception: {}. Terminating all monitoring processes immediately.".format(
225
- exception_msg[0],
226
- exception_msg[1]
227
- )
222
+ "%s process delivered an exception: %s. Terminating all monitoring processes immediately.",
223
+ exception_msg[0],
224
+ exception_msg[1]
228
225
  )
229
226
  self.router_proc.terminate()
230
227
  self.dbm_proc.terminate()
@@ -261,7 +258,7 @@ class MonitoringHub(RepresentationMixin):
261
258
 
262
259
 
263
260
  @wrap_with_logs
264
- def filesystem_receiver(logdir: str, q: "queue.Queue[TaggedMonitoringMessage]", run_dir: str) -> None:
261
+ def filesystem_receiver(logdir: str, q: Queue[TaggedMonitoringMessage], run_dir: str) -> None:
265
262
  logger = set_file_logger("{}/monitoring_filesystem_radio.log".format(logdir),
266
263
  name="monitoring_filesystem_radio",
267
264
  level=logging.INFO)
@@ -271,7 +268,7 @@ def filesystem_receiver(logdir: str, q: "queue.Queue[TaggedMonitoringMessage]",
271
268
  base_path = f"{run_dir}/monitor-fs-radio/"
272
269
  tmp_dir = f"{base_path}/tmp/"
273
270
  new_dir = f"{base_path}/new/"
274
- logger.debug(f"Creating new and tmp paths under {base_path}")
271
+ logger.debug("Creating new and tmp paths under %s", base_path)
275
272
 
276
273
  os.makedirs(tmp_dir, exist_ok=True)
277
274
  os.makedirs(new_dir, exist_ok=True)
@@ -282,15 +279,15 @@ def filesystem_receiver(logdir: str, q: "queue.Queue[TaggedMonitoringMessage]",
282
279
  # iterate over files in new_dir
283
280
  for filename in os.listdir(new_dir):
284
281
  try:
285
- logger.info(f"Processing filesystem radio file {filename}")
282
+ logger.info("Processing filesystem radio file %s", filename)
286
283
  full_path_filename = f"{new_dir}/{filename}"
287
284
  with open(full_path_filename, "rb") as f:
288
- message = deserialize(f.read())
289
- logger.debug(f"Message received is: {message}")
285
+ message = pickle.load(f)
286
+ logger.debug("Message received is: %s", message)
290
287
  assert isinstance(message, tuple)
291
288
  q.put(cast(TaggedMonitoringMessage, message))
292
289
  os.remove(full_path_filename)
293
290
  except Exception:
294
- logger.exception(f"Exception processing {filename} - probably will be retried next iteration")
291
+ logger.exception("Exception processing %s - probably will be retried next iteration", filename)
295
292
 
296
293
  time.sleep(1) # whats a good time for this poll?