parsl 2024.11.4__py3-none-any.whl → 2024.11.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. parsl/channels/base.py +6 -46
  2. parsl/channels/errors.py +0 -67
  3. parsl/channels/local/local.py +5 -56
  4. parsl/dataflow/dflow.py +1 -58
  5. parsl/executors/taskvine/manager.py +6 -0
  6. parsl/executors/taskvine/manager_config.py +5 -0
  7. parsl/monitoring/db_manager.py +6 -6
  8. parsl/monitoring/monitoring.py +27 -33
  9. parsl/monitoring/radios.py +1 -3
  10. parsl/monitoring/router.py +11 -11
  11. parsl/providers/cluster_provider.py +1 -4
  12. parsl/providers/condor/condor.py +1 -4
  13. parsl/providers/grid_engine/grid_engine.py +1 -4
  14. parsl/providers/lsf/lsf.py +1 -4
  15. parsl/providers/pbspro/pbspro.py +1 -4
  16. parsl/providers/slurm/slurm.py +26 -7
  17. parsl/providers/torque/torque.py +1 -4
  18. parsl/tests/configs/user_opts.py +0 -7
  19. parsl/tests/conftest.py +0 -4
  20. parsl/tests/test_channels/test_local_channel.py +0 -19
  21. parsl/tests/test_providers/test_local_provider.py +0 -135
  22. parsl/tests/test_providers/test_pbspro_template.py +2 -1
  23. parsl/tests/test_providers/test_slurm_template.py +2 -1
  24. parsl/version.py +1 -1
  25. {parsl-2024.11.4.dist-info → parsl-2024.11.18.dist-info}/METADATA +2 -8
  26. {parsl-2024.11.4.dist-info → parsl-2024.11.18.dist-info}/RECORD +34 -45
  27. {parsl-2024.11.4.dist-info → parsl-2024.11.18.dist-info}/WHEEL +1 -1
  28. parsl/channels/oauth_ssh/__init__.py +0 -0
  29. parsl/channels/oauth_ssh/oauth_ssh.py +0 -119
  30. parsl/channels/ssh/__init__.py +0 -0
  31. parsl/channels/ssh/ssh.py +0 -295
  32. parsl/channels/ssh_il/__init__.py +0 -0
  33. parsl/channels/ssh_il/ssh_il.py +0 -85
  34. parsl/providers/ad_hoc/__init__.py +0 -0
  35. parsl/providers/ad_hoc/ad_hoc.py +0 -252
  36. parsl/tests/configs/local_adhoc.py +0 -18
  37. parsl/tests/sites/test_local_adhoc.py +0 -62
  38. parsl/tests/test_channels/test_dfk_close.py +0 -26
  39. {parsl-2024.11.4.data → parsl-2024.11.18.data}/scripts/exec_parsl_function.py +0 -0
  40. {parsl-2024.11.4.data → parsl-2024.11.18.data}/scripts/interchange.py +0 -0
  41. {parsl-2024.11.4.data → parsl-2024.11.18.data}/scripts/parsl_coprocess.py +0 -0
  42. {parsl-2024.11.4.data → parsl-2024.11.18.data}/scripts/process_worker_pool.py +0 -0
  43. {parsl-2024.11.4.dist-info → parsl-2024.11.18.dist-info}/LICENSE +0 -0
  44. {parsl-2024.11.4.dist-info → parsl-2024.11.18.dist-info}/entry_points.txt +0 -0
  45. {parsl-2024.11.4.dist-info → parsl-2024.11.18.dist-info}/top_level.txt +0 -0
parsl/channels/base.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from abc import ABCMeta, abstractmethod, abstractproperty
2
- from typing import Dict, Tuple
2
+ from typing import Tuple
3
3
 
4
4
 
5
5
  class Channel(metaclass=ABCMeta):
@@ -8,33 +8,27 @@ class Channel(metaclass=ABCMeta):
8
8
 
9
9
  For certain resources such as campus clusters or supercomputers at
10
10
  research laboratories, resource requirements may require authentication.
11
- For instance some resources may allow access to their job schedulers from
12
- only their login-nodes which require you to authenticate through SSH, or
13
- require two factor authentication.
14
11
 
15
- The simplest Channel, *LocalChannel*, executes commands locally in a
16
- shell, while the *SSHChannel* authenticates you to remote systems.
12
+ The only remaining Channel, *LocalChannel*, executes commands locally in a
13
+ shell.
17
14
 
18
15
  Channels provide the ability to execute commands remotely, using the
19
16
  execute_wait method, and manipulate the remote file system using methods
20
17
  such as push_file, pull_file and makedirs.
21
18
 
22
19
  Channels should ensure that each launched command runs in a new process
23
- group, so that providers (such as AdHocProvider and LocalProvider) which
24
- terminate long running commands using process groups can do so.
20
+ group, so that providers (such as LocalProvider) which terminate long
21
+ running commands using process groups can do so.
25
22
  """
26
23
 
27
24
  @abstractmethod
28
- def execute_wait(self, cmd: str, walltime: int = 0, envs: Dict[str, str] = {}) -> Tuple[int, str, str]:
25
+ def execute_wait(self, cmd: str, walltime: int = 0) -> Tuple[int, str, str]:
29
26
  ''' Executes the cmd, with a defined walltime.
30
27
 
31
28
  Args:
32
29
  - cmd (string): Command string to execute over the channel
33
30
  - walltime (int) : Timeout in seconds
34
31
 
35
- KWargs:
36
- - envs (Dict[str, str]) : Environment variables to push to the remote side
37
-
38
32
  Returns:
39
33
  - (exit_code, stdout, stderr) (int, string, string)
40
34
  '''
@@ -86,37 +80,3 @@ class Channel(metaclass=ABCMeta):
86
80
  destination_path (string)
87
81
  '''
88
82
  pass
89
-
90
- @abstractmethod
91
- def close(self) -> None:
92
- ''' Closes the channel.
93
- '''
94
- pass
95
-
96
- @abstractmethod
97
- def makedirs(self, path: str, mode: int = 0o511, exist_ok: bool = False) -> None:
98
- """Create a directory.
99
-
100
- If intermediate directories do not exist, they will be created.
101
-
102
- Parameters
103
- ----------
104
- path : str
105
- Path of directory to create.
106
- mode : int
107
- Permissions (posix-style) for the newly-created directory.
108
- exist_ok : bool
109
- If False, raise an OSError if the target directory already exists.
110
- """
111
- pass
112
-
113
- @abstractmethod
114
- def isdir(self, path: str) -> bool:
115
- """Return true if the path refers to an existing directory.
116
-
117
- Parameters
118
- ----------
119
- path : str
120
- Path of directory to check.
121
- """
122
- pass
parsl/channels/errors.py CHANGED
@@ -17,73 +17,6 @@ class ChannelError(ParslError):
17
17
  return "Hostname:{0}, Reason:{1}".format(self.hostname, self.reason)
18
18
 
19
19
 
20
- class BadHostKeyException(ChannelError):
21
- ''' SSH channel could not be created since server's host keys could not
22
- be verified
23
-
24
- Contains:
25
- reason(string)
26
- e (paramiko exception object)
27
- hostname (string)
28
- '''
29
-
30
- def __init__(self, e: Exception, hostname: str) -> None:
31
- super().__init__("SSH channel could not be created since server's host keys could not be "
32
- "verified", e, hostname)
33
-
34
-
35
- class BadScriptPath(ChannelError):
36
- ''' An error raised during execution of an app.
37
- What this exception contains depends entirely on context
38
- Contains:
39
- reason(string)
40
- e (paramiko exception object)
41
- hostname (string)
42
- '''
43
-
44
- def __init__(self, e: Exception, hostname: str) -> None:
45
- super().__init__("Inaccessible remote script dir. Specify script_dir", e, hostname)
46
-
47
-
48
- class BadPermsScriptPath(ChannelError):
49
- ''' User does not have permissions to access the script_dir on the remote site
50
-
51
- Contains:
52
- reason(string)
53
- e (paramiko exception object)
54
- hostname (string)
55
- '''
56
-
57
- def __init__(self, e: Exception, hostname: str) -> None:
58
- super().__init__("User does not have permissions to access the script_dir", e, hostname)
59
-
60
-
61
- class AuthException(ChannelError):
62
- ''' An error raised during execution of an app.
63
- What this exception contains depends entirely on context
64
- Contains:
65
- reason(string)
66
- e (paramiko exception object)
67
- hostname (string)
68
- '''
69
-
70
- def __init__(self, e: Exception, hostname: str) -> None:
71
- super().__init__("Authentication to remote server failed", e, hostname)
72
-
73
-
74
- class SSHException(ChannelError):
75
- ''' if there was any other error connecting or establishing an SSH session
76
-
77
- Contains:
78
- reason(string)
79
- e (paramiko exception object)
80
- hostname (string)
81
- '''
82
-
83
- def __init__(self, e: Exception, hostname: str) -> None:
84
- super().__init__("Error connecting or establishing an SSH session", e, hostname)
85
-
86
-
87
20
  class FileCopyException(ChannelError):
88
21
  ''' File copy operation failed
89
22
 
@@ -1,4 +1,3 @@
1
- import copy
2
1
  import logging
3
2
  import os
4
3
  import shutil
@@ -16,49 +15,32 @@ class LocalChannel(Channel, RepresentationMixin):
16
15
  and done so infrequently that they do not need a persistent channel
17
16
  '''
18
17
 
19
- def __init__(self, userhome=".", envs={}, script_dir=None):
18
+ def __init__(self):
20
19
  ''' Initialize the local channel. script_dir is required by set to a default.
21
20
 
22
21
  KwArgs:
23
- - userhome (string): (default='.') This is provided as a way to override and set a specific userhome
24
- - envs (dict) : A dictionary of env variables to be set when launching the shell
25
22
  - script_dir (string): Directory to place scripts
26
23
  '''
27
- self.userhome = os.path.abspath(userhome)
28
- self.hostname = "localhost"
29
- self.envs = envs
30
- local_env = os.environ.copy()
31
- self._envs = copy.deepcopy(local_env)
32
- self._envs.update(envs)
33
- self.script_dir = script_dir
34
-
35
- def execute_wait(self, cmd, walltime=None, envs={}):
24
+ self.script_dir = None
25
+
26
+ def execute_wait(self, cmd, walltime=None):
36
27
  ''' Synchronously execute a commandline string on the shell.
37
28
 
38
29
  Args:
39
30
  - cmd (string) : Commandline string to execute
40
31
  - walltime (int) : walltime in seconds
41
32
 
42
- Kwargs:
43
- - envs (dict) : Dictionary of env variables. This will be used
44
- to override the envs set at channel initialization.
45
-
46
33
  Returns:
47
34
  - retcode : Return code from the execution
48
35
  - stdout : stdout string
49
36
  - stderr : stderr string
50
37
  '''
51
- current_env = copy.deepcopy(self._envs)
52
- current_env.update(envs)
53
-
54
38
  try:
55
39
  logger.debug("Creating process with command '%s'", cmd)
56
40
  proc = subprocess.Popen(
57
41
  cmd,
58
42
  stdout=subprocess.PIPE,
59
43
  stderr=subprocess.PIPE,
60
- cwd=self.userhome,
61
- env=current_env,
62
44
  shell=True,
63
45
  preexec_fn=os.setpgrp
64
46
  )
@@ -99,7 +81,7 @@ class LocalChannel(Channel, RepresentationMixin):
99
81
  os.chmod(local_dest, 0o700)
100
82
 
101
83
  except OSError as e:
102
- raise FileCopyException(e, self.hostname)
84
+ raise FileCopyException(e, "localhost")
103
85
 
104
86
  else:
105
87
  os.chmod(local_dest, 0o700)
@@ -109,39 +91,6 @@ class LocalChannel(Channel, RepresentationMixin):
109
91
  def pull_file(self, remote_source, local_dir):
110
92
  return self.push_file(remote_source, local_dir)
111
93
 
112
- def close(self) -> None:
113
- ''' There's nothing to close here, and so this doesn't do anything
114
- '''
115
- pass
116
-
117
- def isdir(self, path):
118
- """Return true if the path refers to an existing directory.
119
-
120
- Parameters
121
- ----------
122
- path : str
123
- Path of directory to check.
124
- """
125
-
126
- return os.path.isdir(path)
127
-
128
- def makedirs(self, path, mode=0o700, exist_ok=False):
129
- """Create a directory.
130
-
131
- If intermediate directories do not exist, they will be created.
132
-
133
- Parameters
134
- ----------
135
- path : str
136
- Path of directory to create.
137
- mode : int
138
- Permissions (posix-style) for the newly-created directory.
139
- exist_ok : bool
140
- If False, raise an OSError if the target directory already exists.
141
- """
142
-
143
- return os.makedirs(path, mode, exist_ok)
144
-
145
94
  @property
146
95
  def script_dir(self):
147
96
  return self._script_dir
parsl/dataflow/dflow.py CHANGED
@@ -6,7 +6,6 @@ import datetime
6
6
  import inspect
7
7
  import logging
8
8
  import os
9
- import pathlib
10
9
  import pickle
11
10
  import random
12
11
  import sys
@@ -25,7 +24,6 @@ from typeguard import typechecked
25
24
  import parsl
26
25
  from parsl.app.errors import RemoteExceptionWrapper
27
26
  from parsl.app.futures import DataFuture
28
- from parsl.channels import Channel
29
27
  from parsl.config import Config
30
28
  from parsl.data_provider.data_manager import DataManager
31
29
  from parsl.data_provider.files import File
@@ -49,7 +47,6 @@ from parsl.monitoring import MonitoringHub
49
47
  from parsl.monitoring.message_type import MessageType
50
48
  from parsl.monitoring.remote import monitor_wrapper
51
49
  from parsl.process_loggers import wrap_with_logs
52
- from parsl.providers.base import ExecutionProvider
53
50
  from parsl.usage_tracking.usage import UsageTracker
54
51
  from parsl.utils import Timer, get_all_checkpoints, get_std_fname_mode, get_version
55
52
 
@@ -114,8 +111,6 @@ class DataFlowKernel:
114
111
  self.monitoring = config.monitoring
115
112
 
116
113
  if self.monitoring:
117
- if self.monitoring.logdir is None:
118
- self.monitoring.logdir = self.run_dir
119
114
  self.monitoring.start(self.run_dir, self.config.run_dir)
120
115
 
121
116
  self.time_began = datetime.datetime.now()
@@ -1143,36 +1138,6 @@ class DataFlowKernel:
1143
1138
 
1144
1139
  logger.info("End of summary")
1145
1140
 
1146
- def _create_remote_dirs_over_channel(self, provider: ExecutionProvider, channel: Channel) -> None:
1147
- """Create script directories across a channel
1148
-
1149
- Parameters
1150
- ----------
1151
- provider: Provider obj
1152
- Provider for which scripts dirs are being created
1153
- channel: Channel obj
1154
- Channel over which the remote dirs are to be created
1155
- """
1156
- run_dir = self.run_dir
1157
- if channel.script_dir is None:
1158
-
1159
- # This case will be detected as unreachable by mypy, because of
1160
- # the type of script_dir, which is str, not Optional[str].
1161
- # The type system doesn't represent the initialized/uninitialized
1162
- # state of a channel so cannot represent that a channel needs
1163
- # its script directory set or not.
1164
-
1165
- channel.script_dir = os.path.join(run_dir, 'submit_scripts') # type: ignore[unreachable]
1166
-
1167
- # Only create dirs if we aren't on a shared-fs
1168
- if not channel.isdir(run_dir):
1169
- parent, child = pathlib.Path(run_dir).parts[-2:]
1170
- remote_run_dir = os.path.join(parent, child)
1171
- channel.script_dir = os.path.join(remote_run_dir, 'remote_submit_scripts')
1172
- provider.script_dir = os.path.join(run_dir, 'local_submit_scripts')
1173
-
1174
- channel.makedirs(channel.script_dir, exist_ok=True)
1175
-
1176
1141
  def add_executors(self, executors: Sequence[ParslExecutor]) -> None:
1177
1142
  for executor in executors:
1178
1143
  executor.run_id = self.run_id
@@ -1186,12 +1151,7 @@ class DataFlowKernel:
1186
1151
  executor.provider.script_dir = os.path.join(self.run_dir, 'submit_scripts')
1187
1152
  os.makedirs(executor.provider.script_dir, exist_ok=True)
1188
1153
 
1189
- if hasattr(executor.provider, 'channels'):
1190
- logger.debug("Creating script_dir across multiple channels")
1191
- for channel in executor.provider.channels:
1192
- self._create_remote_dirs_over_channel(executor.provider, channel)
1193
- else:
1194
- self._create_remote_dirs_over_channel(executor.provider, executor.provider.channel)
1154
+ executor.provider.channel.script_dir = executor.provider.script_dir
1195
1155
 
1196
1156
  self.executors[executor.label] = executor
1197
1157
  executor.start()
@@ -1273,23 +1233,6 @@ class DataFlowKernel:
1273
1233
  executor.shutdown()
1274
1234
  logger.info(f"Shut down executor {executor.label}")
1275
1235
 
1276
- if hasattr(executor, 'provider'):
1277
- if hasattr(executor.provider, 'script_dir'):
1278
- logger.info(f"Closing channel(s) for {executor.label}")
1279
-
1280
- if hasattr(executor.provider, 'channels'):
1281
- for channel in executor.provider.channels:
1282
- logger.info(f"Closing channel {channel}")
1283
- channel.close()
1284
- logger.info(f"Closed channel {channel}")
1285
- else:
1286
- assert hasattr(executor.provider, 'channel'), "If provider has no .channels, it must have .channel"
1287
- logger.info(f"Closing channel {executor.provider.channel}")
1288
- executor.provider.channel.close()
1289
- logger.info(f"Closed channel {executor.provider.channel}")
1290
-
1291
- logger.info(f"Closed executor channel(s) for {executor.label}")
1292
-
1293
1236
  logger.info("Terminated executors")
1294
1237
  self.time_completed = datetime.datetime.now()
1295
1238
 
@@ -44,11 +44,17 @@ def _set_manager_attributes(m, config):
44
44
  # Enable peer transfer feature between workers if specified
45
45
  if config.enable_peer_transfers:
46
46
  m.enable_peer_transfers()
47
+ else:
48
+ m.disable_peer_transfers()
47
49
 
48
50
  # Set catalog report to parsl if project name exists
49
51
  if m.name:
50
52
  m.set_property("framework", "parsl")
51
53
 
54
+ if config.tune_parameters is not None:
55
+ for k, v in config.tune_parameters.items():
56
+ m.tune(k, v)
57
+
52
58
 
53
59
  def _prepare_environment_serverless(manager_config, env_cache_dir, poncho_create_script):
54
60
  # Return path to a packaged poncho environment
@@ -156,6 +156,10 @@ class TaskVineManagerConfig:
156
156
  Directory to store TaskVine logging facilities.
157
157
  Default is None, in which all TaskVine logs will be contained
158
158
  in the Parsl logging directory.
159
+
160
+ tune_parameters: Optional[dict]
161
+ Extended vine_tune parameters, expressed in a dictionary
162
+ by { 'tune-parameter' : value }.
159
163
  """
160
164
 
161
165
  # Connection and communication settings
@@ -181,6 +185,7 @@ class TaskVineManagerConfig:
181
185
  autocategory: bool = True
182
186
  enable_peer_transfers: bool = True
183
187
  wait_for_workers: Optional[int] = None
188
+ tune_parameters: Optional[dict] = None
184
189
 
185
190
  # Logging settings
186
191
  vine_log_dir: Optional[str] = None
@@ -279,7 +279,7 @@ class Database:
279
279
  class DatabaseManager:
280
280
  def __init__(self,
281
281
  db_url: str = 'sqlite:///runinfo/monitoring.db',
282
- logdir: str = '.',
282
+ run_dir: str = '.',
283
283
  logging_level: int = logging.INFO,
284
284
  batching_interval: float = 1,
285
285
  batching_threshold: float = 99999,
@@ -287,12 +287,12 @@ class DatabaseManager:
287
287
 
288
288
  self.workflow_end = False
289
289
  self.workflow_start_message: Optional[MonitoringMessage] = None
290
- self.logdir = logdir
291
- os.makedirs(self.logdir, exist_ok=True)
290
+ self.run_dir = run_dir
291
+ os.makedirs(self.run_dir, exist_ok=True)
292
292
 
293
293
  logger.propagate = False
294
294
 
295
- set_file_logger("{}/database_manager.log".format(self.logdir), level=logging_level,
295
+ set_file_logger(f"{self.run_dir}/database_manager.log", level=logging_level,
296
296
  format_string="%(asctime)s.%(msecs)03d %(name)s:%(lineno)d [%(levelname)s] [%(threadName)s %(thread)d] %(message)s",
297
297
  name="database_manager")
298
298
 
@@ -681,7 +681,7 @@ class DatabaseManager:
681
681
  def dbm_starter(exception_q: mpq.Queue,
682
682
  resource_msgs: mpq.Queue,
683
683
  db_url: str,
684
- logdir: str,
684
+ run_dir: str,
685
685
  logging_level: int) -> None:
686
686
  """Start the database manager process
687
687
 
@@ -692,7 +692,7 @@ def dbm_starter(exception_q: mpq.Queue,
692
692
 
693
693
  try:
694
694
  dbm = DatabaseManager(db_url=db_url,
695
- logdir=logdir,
695
+ run_dir=run_dir,
696
696
  logging_level=logging_level)
697
697
  logger.info("Starting dbm in dbm starter")
698
698
  dbm.start(resource_msgs)
@@ -3,9 +3,10 @@ from __future__ import annotations
3
3
  import logging
4
4
  import multiprocessing.synchronize as ms
5
5
  import os
6
+ import pickle
6
7
  import queue
7
8
  import time
8
- from multiprocessing import Event, Process
9
+ from multiprocessing import Event
9
10
  from multiprocessing.queues import Queue
10
11
  from typing import TYPE_CHECKING, Literal, Optional, Tuple, Union, cast
11
12
 
@@ -18,7 +19,6 @@ from parsl.monitoring.router import router_starter
18
19
  from parsl.monitoring.types import TaggedMonitoringMessage
19
20
  from parsl.multiprocessing import ForkProcess, SizedQueue
20
21
  from parsl.process_loggers import wrap_with_logs
21
- from parsl.serialize import deserialize
22
22
  from parsl.utils import RepresentationMixin, setproctitle
23
23
 
24
24
  _db_manager_excepts: Optional[Exception]
@@ -44,7 +44,6 @@ class MonitoringHub(RepresentationMixin):
44
44
  workflow_name: Optional[str] = None,
45
45
  workflow_version: Optional[str] = None,
46
46
  logging_endpoint: Optional[str] = None,
47
- logdir: Optional[str] = None,
48
47
  monitoring_debug: bool = False,
49
48
  resource_monitoring_enabled: bool = True,
50
49
  resource_monitoring_interval: float = 30): # in seconds
@@ -73,8 +72,6 @@ class MonitoringHub(RepresentationMixin):
73
72
  The database connection url for monitoring to log the information.
74
73
  These URLs follow RFC-1738, and can include username, password, hostname, database name.
75
74
  Default: sqlite, in the configured run_dir.
76
- logdir : str
77
- Parsl log directory paths. Logs and temp files go here. Default: '.'
78
75
  monitoring_debug : Bool
79
76
  Enable monitoring debug logging. Default: False
80
77
  resource_monitoring_enabled : boolean
@@ -96,7 +93,6 @@ class MonitoringHub(RepresentationMixin):
96
93
  self.hub_port_range = hub_port_range
97
94
 
98
95
  self.logging_endpoint = logging_endpoint
99
- self.logdir = logdir
100
96
  self.monitoring_debug = monitoring_debug
101
97
 
102
98
  self.workflow_name = workflow_name
@@ -109,13 +105,10 @@ class MonitoringHub(RepresentationMixin):
109
105
 
110
106
  logger.debug("Starting MonitoringHub")
111
107
 
112
- if self.logdir is None:
113
- self.logdir = "."
114
-
115
108
  if self.logging_endpoint is None:
116
109
  self.logging_endpoint = f"sqlite:///{os.fspath(config_run_dir)}/monitoring.db"
117
110
 
118
- os.makedirs(self.logdir, exist_ok=True)
111
+ os.makedirs(dfk_run_dir, exist_ok=True)
119
112
 
120
113
  self.monitoring_hub_active = True
121
114
 
@@ -151,7 +144,7 @@ class MonitoringHub(RepresentationMixin):
151
144
  "hub_address": self.hub_address,
152
145
  "udp_port": self.hub_port,
153
146
  "zmq_port_range": self.hub_port_range,
154
- "logdir": self.logdir,
147
+ "run_dir": dfk_run_dir,
155
148
  "logging_level": logging.DEBUG if self.monitoring_debug else logging.INFO,
156
149
  },
157
150
  name="Monitoring-Router-Process",
@@ -161,7 +154,7 @@ class MonitoringHub(RepresentationMixin):
161
154
 
162
155
  self.dbm_proc = ForkProcess(target=dbm_starter,
163
156
  args=(self.exception_q, self.resource_msgs,),
164
- kwargs={"logdir": self.logdir,
157
+ kwargs={"run_dir": dfk_run_dir,
165
158
  "logging_level": logging.DEBUG if self.monitoring_debug else logging.INFO,
166
159
  "db_url": self.logging_endpoint,
167
160
  },
@@ -169,15 +162,15 @@ class MonitoringHub(RepresentationMixin):
169
162
  daemon=True,
170
163
  )
171
164
  self.dbm_proc.start()
172
- logger.info("Started the router process {} and DBM process {}".format(self.router_proc.pid, self.dbm_proc.pid))
165
+ logger.info("Started the router process %s and DBM process %s", self.router_proc.pid, self.dbm_proc.pid)
173
166
 
174
- self.filesystem_proc = Process(target=filesystem_receiver,
175
- args=(self.logdir, self.resource_msgs, dfk_run_dir),
176
- name="Monitoring-Filesystem-Process",
177
- daemon=True
178
- )
167
+ self.filesystem_proc = ForkProcess(target=filesystem_receiver,
168
+ args=(self.resource_msgs, dfk_run_dir),
169
+ name="Monitoring-Filesystem-Process",
170
+ daemon=True
171
+ )
179
172
  self.filesystem_proc.start()
180
- logger.info(f"Started filesystem radio receiver process {self.filesystem_proc.pid}")
173
+ logger.info("Started filesystem radio receiver process %s", self.filesystem_proc.pid)
181
174
 
182
175
  self.radio = MultiprocessingQueueRadioSender(self.resource_msgs)
183
176
 
@@ -190,7 +183,7 @@ class MonitoringHub(RepresentationMixin):
190
183
  raise MonitoringHubStartError()
191
184
 
192
185
  if isinstance(comm_q_result, str):
193
- logger.error(f"MonitoringRouter sent an error message: {comm_q_result}")
186
+ logger.error("MonitoringRouter sent an error message: %s", comm_q_result)
194
187
  raise RuntimeError(f"MonitoringRouter failed to start: {comm_q_result}")
195
188
 
196
189
  udp_port, zmq_port = comm_q_result
@@ -202,7 +195,7 @@ class MonitoringHub(RepresentationMixin):
202
195
  self.hub_zmq_port = zmq_port
203
196
 
204
197
  def send(self, message: TaggedMonitoringMessage) -> None:
205
- logger.debug("Sending message type {}".format(message[0]))
198
+ logger.debug("Sending message type %s", message[0])
206
199
  self.radio.send(message)
207
200
 
208
201
  def close(self) -> None:
@@ -219,10 +212,9 @@ class MonitoringHub(RepresentationMixin):
219
212
  if exception_msgs:
220
213
  for exception_msg in exception_msgs:
221
214
  logger.error(
222
- "{} process delivered an exception: {}. Terminating all monitoring processes immediately.".format(
223
- exception_msg[0],
224
- exception_msg[1]
225
- )
215
+ "%s process delivered an exception: %s. Terminating all monitoring processes immediately.",
216
+ exception_msg[0],
217
+ exception_msg[1]
226
218
  )
227
219
  self.router_proc.terminate()
228
220
  self.dbm_proc.terminate()
@@ -259,8 +251,8 @@ class MonitoringHub(RepresentationMixin):
259
251
 
260
252
 
261
253
  @wrap_with_logs
262
- def filesystem_receiver(logdir: str, q: "queue.Queue[TaggedMonitoringMessage]", run_dir: str) -> None:
263
- logger = set_file_logger("{}/monitoring_filesystem_radio.log".format(logdir),
254
+ def filesystem_receiver(q: Queue[TaggedMonitoringMessage], run_dir: str) -> None:
255
+ logger = set_file_logger(f"{run_dir}/monitoring_filesystem_radio.log",
264
256
  name="monitoring_filesystem_radio",
265
257
  level=logging.INFO)
266
258
 
@@ -269,7 +261,9 @@ def filesystem_receiver(logdir: str, q: "queue.Queue[TaggedMonitoringMessage]",
269
261
  base_path = f"{run_dir}/monitor-fs-radio/"
270
262
  tmp_dir = f"{base_path}/tmp/"
271
263
  new_dir = f"{base_path}/new/"
272
- logger.debug(f"Creating new and tmp paths under {base_path}")
264
+ logger.debug("Creating new and tmp paths under %s", base_path)
265
+
266
+ target_radio = MultiprocessingQueueRadioSender(q)
273
267
 
274
268
  os.makedirs(tmp_dir, exist_ok=True)
275
269
  os.makedirs(new_dir, exist_ok=True)
@@ -280,15 +274,15 @@ def filesystem_receiver(logdir: str, q: "queue.Queue[TaggedMonitoringMessage]",
280
274
  # iterate over files in new_dir
281
275
  for filename in os.listdir(new_dir):
282
276
  try:
283
- logger.info(f"Processing filesystem radio file {filename}")
277
+ logger.info("Processing filesystem radio file %s", filename)
284
278
  full_path_filename = f"{new_dir}/{filename}"
285
279
  with open(full_path_filename, "rb") as f:
286
- message = deserialize(f.read())
287
- logger.debug(f"Message received is: {message}")
280
+ message = pickle.load(f)
281
+ logger.debug("Message received is: %s", message)
288
282
  assert isinstance(message, tuple)
289
- q.put(cast(TaggedMonitoringMessage, message))
283
+ target_radio.send(cast(TaggedMonitoringMessage, message))
290
284
  os.remove(full_path_filename)
291
285
  except Exception:
292
- logger.exception(f"Exception processing {filename} - probably will be retried next iteration")
286
+ logger.exception("Exception processing %s - probably will be retried next iteration", filename)
293
287
 
294
288
  time.sleep(1) # whats a good time for this poll?
@@ -8,8 +8,6 @@ from multiprocessing.queues import Queue
8
8
 
9
9
  import zmq
10
10
 
11
- from parsl.serialize import serialize
12
-
13
11
  logger = logging.getLogger(__name__)
14
12
 
15
13
 
@@ -59,7 +57,7 @@ class FilesystemRadioSender(MonitoringRadioSender):
59
57
  # move it into new/, so that a partially written
60
58
  # file will never be observed in new/
61
59
  with open(tmp_filename, "wb") as f:
62
- f.write(serialize(buffer))
60
+ pickle.dump(buffer, f)
63
61
  os.rename(tmp_filename, new_filename)
64
62
 
65
63