asyncmd 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,81 +12,117 @@
12
12
  #
13
13
  # You should have received a copy of the GNU General Public License
14
14
  # along with asyncmd. If not, see <https://www.gnu.org/licenses/>.
15
- import os
15
+ """
16
+ This module contains the implementation of the gromacs engine classes.
17
+
18
+ The two classes GmxEngine and SlurmGmxEngine share most of their methods, the
19
+ slurm-enabled superclass only overrides a few methods to submit gmx mdrun via slurm.
20
+ """
21
+ import asyncio
16
22
  import copy
17
- import shlex
23
+ import dataclasses
24
+ import logging
25
+ import os
18
26
  import random
19
- import string
27
+ import shlex
20
28
  import shutil
29
+ import string
21
30
  import typing
22
- import asyncio
23
- import logging
31
+
24
32
  import aiofiles
25
33
  import aiofiles.os
26
34
  import aiofiles.ospath
27
35
 
28
- from .._config import _SEMAPHORES
29
- from ..mdengine import MDEngine, EngineError, EngineCrashedError
30
- from ..trajectory.trajectory import Trajectory
31
36
  from .. import slurm
37
+ from .._config import _OPT_SEMAPHORES, _SEMAPHORES
38
+ from ..mdengine import EngineCrashedError, MDEngine
39
+ from ..tools import (
40
+ ensure_executable_available,
41
+ attach_kwargs_to_object as _attach_kwargs_to_object,
42
+ DescriptorWithDefaultOnInstanceAndClass as _DescriptorWithDefaultOnInstanceAndClass,
43
+ DescriptorOutputTrajType as _DescriptorOutputTrajType,
44
+ )
45
+ from ..trajectory.trajectory import Trajectory
32
46
  from .mdconfig import MDP
33
- from .utils import nstout_from_mdp, get_all_traj_parts
34
- from ..tools import ensure_executable_available
47
+ from .utils import get_all_traj_parts, nstout_from_mdp
48
+
49
+
50
+ if typing.TYPE_CHECKING: # pragma: no cover
51
+ from asyncio.subprocess import Process
35
52
 
36
53
 
37
54
  logger = logging.getLogger(__name__)
38
55
 
39
56
 
40
- class _descriptor_on_instance_and_class:
41
- # a descriptor that makes the (default) value of the private attribute
42
- # "_name" accessible as "name" on both the class and the instance level
43
- # Accessing the default value works from the class-level, i.e. without
44
- # instantiating the object, but note that setting on the class level
45
- # overwrites the descriptor and does not call __set__
46
- # Setting from an instance calls __set__ and therefore only sets
47
- # the attribute for the given instance (and also runs our checks)
48
- # also see the python docs:
49
- # https://docs.python.org/3/howto/descriptor.html#customized-names
50
- def __set_name__(self, owner, name):
51
- self.public_name = name
52
- self.private_name = "_" + name
53
-
54
- def __get__(self, obj, objtype=None):
55
- if obj is None:
56
- # I (hejung) think if obj is None objtype will always be set
57
- # to the class of the obj
58
- obj = objtype
59
- val = getattr(obj, self.private_name)
60
- return val
61
-
62
- def __set__(self, obj, val):
63
- setattr(obj, self.private_name, val)
64
-
65
-
66
- class _descriptor_output_traj_type(_descriptor_on_instance_and_class):
67
- # Check the output_traj_type for consistency before setting
68
- def __set__(self, obj, val):
69
- allowed_values = ["trr", "xtc"]
70
- val = val.lower()
71
- if val not in allowed_values:
72
- raise ValueError("output_traj_type must be one of "
73
- + f"{allowed_values}, but was {val}."
74
- )
75
- return super().__set__(obj, val)
76
-
77
-
78
- class _descriptor_check_executable(_descriptor_on_instance_and_class):
79
- # check if a given value is a valid executable when setting it
80
- # we use this to make sure gmx grompp and gmx mdrun are available as set
81
- def __set__(self, obj, val):
57
+ # pylint: disable-next=too-few-public-methods
58
+ class _DescriptorCheckExecutable(_DescriptorWithDefaultOnInstanceAndClass):
59
+ """
60
+ Check if the given value is a valid (gmx) executable when setting it.
61
+
62
+ We use this to make sure gmx grompp and gmx mdrun are available as set.
63
+ It therefore is specifically tailored towards them and uses only the first
64
+ part of the executable until the first space.
65
+ """
66
+ def __set__(self, obj, val: str) -> None:
82
67
  # split because mdrun and grompp can be both subcommands of gmx
83
68
  test_exe = val.split(" ")[0]
84
69
  ensure_executable_available(test_exe)
85
- return super().__set__(obj, val)
70
+ super().__set__(obj, val)
71
+
72
+
73
+ # pylint: disable-next=too-few-public-methods
74
+ class _DescriptorOutputTrajTypeGmx(_DescriptorOutputTrajType):
75
+ # only need to set allowed values to work, default type is set via
76
+ # engine._output_traj_type
77
+ ALLOWED_VALUES = {"trr", "xtc"}
78
+
79
+
80
+ # pylint: disable-next=too-few-public-methods
81
+ class _DescriptorMdrunTimeConversionFactor(_DescriptorWithDefaultOnInstanceAndClass):
82
+ """
83
+ Check that the given time conversion factor is 0 < factor <= 1 when setting.
84
+ """
85
+ def __set__(self, obj, val: float) -> None:
86
+ if val > 1.:
87
+ raise ValueError("`mdrun_time_conversion_factor` must be <= 1.")
88
+ if val <= 0:
89
+ raise ValueError("`mdrun_time_conversion_factor` must be > 0.")
90
+ super().__set__(obj, val)
91
+
92
+
93
+ @dataclasses.dataclass
94
+ class _GmxInputFiles:
95
+ """
96
+ Dataclass to bundle/store all info related to the input files gromacs needs/gets.
97
+ """
98
+ mdp: MDP
99
+ gro_file: str
100
+ top_file: str
101
+ ndx_file: str | None = None
102
+ tpr_file: str | None = None
103
+
104
+
105
+ @dataclasses.dataclass
106
+ class _GmxEngineState:
107
+ """
108
+ Dataclass to bundle/store all engine-state related data.
109
+
110
+ This includes, e.g., the total number of integration steps or the total
111
+ integration time the GmxEngine this is attached to has performed.
112
+ """
113
+ frames_done: int = 0
114
+ steps_done: int = 0
115
+ time_done: float = 0.
116
+ simulation_part: int = 0
117
+ workdir: str = "."
118
+ deffnm: str | None = None
86
119
 
87
120
 
88
- # NOTE: with tra we usually mean trr, i.e. a full precision trajectory with velocities
89
121
  class GmxEngine(MDEngine):
122
+ # The way we (re)set our descriptor attributes in __init__ throws off pylints counting
123
+ # pylint: disable=too-many-instance-attributes
124
+ # and this class just has a lot of properties (which all count as public methods)
125
+ # pylint: disable=too-many-public-methods
90
126
  """
91
127
  Steer gromacs molecular dynamics simulation from python.
92
128
 
@@ -107,50 +143,52 @@ class GmxEngine(MDEngine):
107
143
  grompp_extra_args : str
108
144
  Can be used to pass extra command line arguments to grompp calls,
109
145
  e.g. "-maxwarn 1".
146
+ Will simply be appended to the end of the command after a separating space.
110
147
  mdrun_extra_args : str
111
148
  Can be used to pass extra command line arguments to mdrun calls,
112
149
  e.g. "-ntomp 8".
150
+ Will simply be appended to the end of the command after a separating space.
113
151
  output_traj_type : str
114
152
  Sets the trajectory type (ending) this engine returns/looks for.
115
153
  Note that we simply ignore all other trajectories, i.e. depending on
116
154
  the MDP settings we will still write xtc and trr, but return only the
117
155
  trajectories with matching ending.
156
+ mdrun_time_conversion_factor : float
157
+ When running gmx mdrun with a given `time_limit`, run it for
158
+ `mdrun_time_conversion_factor * time_limit`.
159
+ This option is relevant only for the :class:`SlurmGmxEngine` and here
160
+ ensures that gmx mdrun finishes during the slurm time limit (which will
161
+ be set to `time_limit`).
162
+ The default value for the :class:`SlurmGmxEngine` is 0.99.
118
163
  """
119
164
 
120
- # local prepare and option to run a local gmx (mainly for testing)
121
165
  _grompp_executable = "gmx grompp"
122
- grompp_executable = _descriptor_check_executable()
166
+ grompp_executable = _DescriptorCheckExecutable()
123
167
  _mdrun_executable = "gmx mdrun"
124
- mdrun_executable = _descriptor_check_executable()
125
- # extra_args are expected to be str and will be appended to the end of the
126
- # respective commands after a separating space,
127
- # i.e. cmd = base_cmd + " " + extra_args
168
+ mdrun_executable = _DescriptorCheckExecutable()
169
+ # extra_args are expected to be str and will be appended to the end of the respective command
128
170
  grompp_extra_args = ""
129
171
  mdrun_extra_args = ""
130
- # file ending of the returned output trajectories,
131
- # exposed as property output_traj_type
132
- # NOTE: this will be the traj we count frames for and check the mdp, etc.
133
- # However this does not mean that no other trajs will/can be written,
134
- # we simply ignore them
172
+ # file ending of the returned output trajectories, exposed as output_traj_type
135
173
  _output_traj_type = "xtc"
136
- output_traj_type = _descriptor_output_traj_type()
174
+ output_traj_type = _DescriptorOutputTrajTypeGmx()
137
175
  # See the notes below for the SlurmGmxEngine on why this conversion factor
138
176
  # is needed (there), here we have it only for consistency
139
177
  _mdrun_time_conversion_factor = 1. # run mdrun for 1. * time-limit
140
-
178
+ mdrun_time_conversion_factor = _DescriptorMdrunTimeConversionFactor()
141
179
 
142
180
  def __init__(self,
143
181
  mdconfig: MDP,
144
182
  gro_file: str,
145
- top_file: str,
183
+ top_file: str, *,
146
184
  ndx_file: str | None = None,
147
185
  **kwargs) -> None:
148
186
  """
149
187
  Initialize a :class:`GmxEngine`.
150
188
 
151
- Note that all attributes can be set at intialization by passing keyword
152
- arguments with their name, e.g. mdrun_extra_args="-ntomp 2" to instruct
153
- gromacs to use 2 openMP threads.
189
+ Note that all attributes can be set at initialization by passing keyword
190
+ arguments with their name, e.g. ``mdrun_extra_args="-ntomp 2"`` to
191
+ instruct gromacs to use 2 openMP threads.
154
192
 
155
193
  Parameters
156
194
  ----------
@@ -159,67 +197,31 @@ class GmxEngine(MDEngine):
159
197
  gro_file: str
160
198
  Absolute or relative path to a gromacs structure file.
161
199
  top_file: str
162
- Absolute or relative path to a gromacs topolgy (.top) file.
200
+ Absolute or relative path to a gromacs topology (.top) file.
163
201
  ndx_file: str or None
164
202
  Optional, absolute or relative path to a gromacs index file.
165
203
  """
166
- # make it possible to set any attribute via kwargs
167
- # check the type for attributes with default values
168
- dval = object()
169
- for kwarg, value in kwargs.items():
170
- cval = getattr(self, kwarg, dval)
171
- if cval is not dval:
172
- if isinstance(value, type(cval)):
173
- # value is of same type as default so set it
174
- setattr(self, kwarg, value)
175
- else:
176
- raise TypeError(f"Setting attribute {kwarg} with "
177
- + f"mismatching type ({type(value)}). "
178
- + f" Default type is {type(cval)}."
179
- )
180
- else:
181
- # not previously defined, so warn that we ignore it
182
- logger.warning("Ignoring unknown keyword-argument %s.", kwarg)
183
- # NOTE: after the kwargs setting to be sure they are what we set/expect
184
- # TODO: store a hash/the file contents for gro, top, ndx?
185
- # to check against when we load from storage/restart?
186
- # if we do this do it in the property!
187
- # (but still write one hashfunc for all!)
188
- self.gro_file = gro_file # sets self._gro_file
189
- self.top_file = top_file # sets self._top_file
190
- self.ndx_file = ndx_file # sets self._ndx_file
191
- # dirty hack to make sure we also check for our defaults if they are
192
- # available + executable
193
- self.mdrun_executable = self.mdrun_executable
194
- self.grompp_executable = self.grompp_executable
204
+ # make it possible to set any attribute via kwargs, check them when setting
205
+ _attach_kwargs_to_object(obj=self, logger=logger, **kwargs)
206
+ # give it only the required arguments, we reset below anyway using the
207
+ # properties to use the checks implemented in them
208
+ self._input_files = _GmxInputFiles(mdp=mdconfig,
209
+ gro_file=gro_file,
210
+ top_file=top_file,
211
+ )
212
+ self._engine_state = _GmxEngineState()
213
+ # TODO: store a hash/the file contents for gro, top, ndx to check against
214
+ # when we load from storage/restart? if we do this, do it in the property!
215
+ self.gro_file = gro_file
216
+ self.top_file = top_file
217
+ self.ndx_file = ndx_file
195
218
  # basic checks for mdp are done in the property-setter, e.g. if the
196
219
  # output_traj_type is actually written with current mdp-settings
197
220
  self.mdp = mdconfig
198
- # initialize internal state variables
199
- self._workdir = None
200
- self._prepared = False
201
- # NOTE: frames_done and steps_done do not have an easy relation!
202
- # See the steps_done property docstring for more!
203
- # number of frames produced since last call to prepare
204
- self._frames_done = 0
205
- # number of integration steps done since last call to prepare
206
- self._steps_done = 0
207
- # integration time since last call to prepare in ps
208
- self._time_done = 0.
209
- self._nstout = None # get this from the mdp only when we need it
210
- # Popen handle for gmx mdrun, used to check if we are running
211
- self._proc = None
212
- # these are set by prepare() and used by run_XX()
213
- self._simulation_part = None
214
- self._deffnm = None
215
- # tpr for trajectory (part), will become the structure/topology file
216
- self._tpr = None
217
-
218
- def __getstate__(self) -> dict:
219
- state = self.__dict__.copy()
220
- # cant pickle the process, + its probably dead when we unpickle :)
221
- state["_proc"] = None
222
- return state
221
+ # also (re)-set our descriptors to trigger __set__ and make sure that
222
+ # also our (class) defaults are available + executable
223
+ self.mdrun_executable = self.mdrun_executable
224
+ self.grompp_executable = self.grompp_executable
223
225
 
224
226
  @property
225
227
  def current_trajectory(self) -> Trajectory | None:
@@ -231,142 +233,157 @@ class GmxEngine(MDEngine):
231
233
  Trajectory
232
234
  Last complete trajectory produced by this engine.
233
235
  """
234
- if self._simulation_part == 0:
235
- # we could check if self_proc is set (which prepare sets to None)
236
- # this should make sure that calling current trajectory after
237
- # calling prepare does not return a traj, as soon as we called
238
- # run self._proc will be set, i.e. there is still no gurantee that
239
- # the traj is done, but it will be started always
240
- # (even when accessing simulataneous to the call to run),
241
- # i.e. it is most likely done
242
- # we can also check for simulation part, since it seems
243
- # gmx ignores that if no checkpoint is passed, i.e. we will
244
- # **always** start with part0001 anyways!
245
- # but checking for self._simulation_part == 0 also just makes sure
246
- # we never started a run (i.e. same as checking self._proc)
247
- return None
248
- if (all(v is not None for v in [self._tpr, self._deffnm])
249
- and not self.running):
250
- # self._tpr and self._deffnm are set in prepare, i.e. having them
236
+ if (
237
+ self.tpr_file is not None
238
+ and self.deffnm is not None
239
+ and self.simulation_part > 0
240
+ ):
241
+ # tpr_file and deffnm are set in prepare, i.e. having them
251
242
  # set makes sure that we have at least prepared running the traj
252
243
  # but it might not be done yet
244
+ # also check if we ever started a run, i.e. if there might be a
245
+ # trajectory to return. If simulation_part == 0 we never executed a
246
+ # run method (where it is increased) and also did not (re)start a run
253
247
  traj = Trajectory(
254
- trajectory_files=os.path.join(
255
- self.workdir,
256
- (f"{self._deffnm}"
257
- + f"{self._num_suffix(self._simulation_part)}"
258
- + f".{self.output_traj_type}")
259
- ),
260
- # NOTE: self._tpr already contains the path to workdir
261
- structure_file=self._tpr,
262
- nstout=self.nstout,
263
- )
248
+ trajectory_files=os.path.join(
249
+ # prepend engine workdir to make traj file paths relative to python workdir
250
+ self.workdir,
251
+ (f"{self.deffnm}"
252
+ f"{self._num_suffix(self.simulation_part)}"
253
+ f".{self.output_traj_type}"
254
+ ),
255
+ ),
256
+ # NOTE: tpr_file is already relative to the workdir of the python interpreter
257
+ structure_file=self.tpr_file,
258
+ nstout=self.nstout,
259
+ )
264
260
  return traj
265
261
  return None
266
262
 
267
- @property
268
- def ready_for_run(self) -> bool:
269
- """Whether this engine is ready to run, i.e. generate a trajectory."""
270
- return self._prepared and not self.running
271
-
272
- @property
273
- def running(self) -> bool:
274
- """Whether this engine is currently running/generating a trajectory."""
275
- if self._proc is None:
276
- # this happens when we did not call run() yet
277
- return False
278
- if self._proc.returncode is None:
279
- # no return code means it is still running
280
- return True
281
- # dont care for the value of the exit code,
282
- # we are not running anymore if we crashed ;)
283
- return False
284
-
285
263
  @property
286
264
  def workdir(self) -> str:
287
- """The current woring directory of the engine."""
288
- return self._workdir
265
+ """The current working directory of the engine."""
266
+ return self._engine_state.workdir
289
267
 
290
268
  @workdir.setter
291
269
  def workdir(self, value: str) -> None:
292
270
  if not os.path.isdir(value):
293
271
  raise TypeError(f"Not a directory ({value}).")
294
272
  value = os.path.relpath(value)
295
- self._workdir = value
273
+ self._engine_state.workdir = value
296
274
 
297
275
  @property
298
276
  def gro_file(self) -> str:
299
277
  """The (path to the) gro file this engine uses/used to call grompp."""
300
- return self._gro_file
278
+ return self._input_files.gro_file
301
279
 
302
280
  @gro_file.setter
303
- def gro_file(self, val: str) -> str:
281
+ def gro_file(self, val: str) -> None:
304
282
  if not os.path.isfile(val):
305
283
  raise FileNotFoundError(f"gro file not found: {val}")
306
284
  val = os.path.relpath(val)
307
- self._gro_file = val
285
+ self._input_files.gro_file = val
308
286
 
309
287
  @property
310
288
  def top_file(self) -> str:
311
289
  """The (path to the) top file this engine uses/used to call grompp."""
312
- return self._top_file
290
+ return self._input_files.top_file
313
291
 
314
292
  @top_file.setter
315
293
  def top_file(self, val: str) -> None:
316
294
  if not os.path.isfile(val):
317
295
  raise FileNotFoundError(f"top file not found: {val}")
318
296
  val = os.path.relpath(val)
319
- self._top_file = val
297
+ self._input_files.top_file = val
320
298
 
321
299
  @property
322
300
  def ndx_file(self) -> str | None:
323
301
  """The (path to the) ndx file this engine uses/used to call grompp."""
324
- return self._ndx_file
302
+ return self._input_files.ndx_file
325
303
 
326
304
  @ndx_file.setter
327
305
  def ndx_file(self, val: str | None) -> None:
328
306
  if val is not None:
329
- # GMX does not require an ndx file, so we accept None
330
307
  if not os.path.isfile(val):
331
308
  raise FileNotFoundError(f"ndx file not found: {val}")
332
309
  val = os.path.relpath(val)
333
- # set it anyway (even if it is None)
334
- self._ndx_file = val
310
+ # GMX does not require an ndx file, so we accept None
311
+ self._input_files.ndx_file = val
312
+
313
+ # NOTE: This does not have a setter on purpose, only prepare methods must
314
+ # set this (and there we can be bothered to access via _input_files)
315
+ @property
316
+ def tpr_file(self) -> str | None:
317
+ """
318
+ The (path to the) tpr file this engine uses to call gmx mdrun.
319
+
320
+ None before a call to any prepare method.
321
+ """
322
+ return self._input_files.tpr_file
335
323
 
336
324
  @property
337
325
  def mdp(self) -> MDP:
338
326
  """The configuration of this engine as a :class:`MDP` object."""
339
- return self._mdp
327
+ return self._input_files.mdp
340
328
 
341
329
  @mdp.setter
342
330
  def mdp(self, val: MDP) -> None:
343
331
  if not isinstance(val, MDP):
344
332
  raise TypeError(f"Value must be of type {MDP}.")
345
333
  try:
346
- if val["nsteps"] != -1:
347
- logger.info("Changing nsteps from %s to -1 (infinte), the run "
348
- "length is controlled via arguments of the run "
349
- "method.",
350
- val['nsteps'])
351
- val["nsteps"] = -1
334
+ nsteps = val["nsteps"]
352
335
  except KeyError:
353
336
  # nsteps not defined
354
337
  logger.info("Setting previously undefined nsteps to -1 (infinite).")
338
+ else:
339
+ if nsteps != -1:
340
+ logger.info("Changing nsteps from %s to -1 (infinite), the run "
341
+ "length is controlled via arguments of the run "
342
+ "method.", nsteps)
343
+ finally:
355
344
  val["nsteps"] = -1
356
345
  # check that we get a trajectory of the format we expect with our
357
346
  # current mdp, we do this by using nstout_from_mdp since it throws a
358
347
  # nice error if the mdp does not generate output for given traj-format
359
- # TODO: ensure that x-out and v-out/f-out are the same (if applicable)?
360
348
  _ = nstout_from_mdp(mdp=val, traj_type=self.output_traj_type)
361
- self._mdp = val
349
+ # check if we do an energy minimization: in this case gromacs writes no
350
+ # compressed trajectory (even if so requested by the mdp-file), so we
351
+ # check that self.output_traj_type == trr and generate an error if not
352
+ try:
353
+ integrator = val["integrator"]
354
+ except KeyError:
355
+ # integrator not defined, although this probably seldomly happens,
356
+ # gmx grompp does use the (implicit) default "integrator=md" in
357
+ # that case
358
+ integrator = "md"
359
+ if any(integrator == em_algo for em_algo in ("steep", "cg", "l-bfgs")):
360
+ if not self.output_traj_type.lower() == "trr":
361
+ raise ValueError("Gromacs only writes full precision (trr) "
362
+ "trajectories when performing an energy "
363
+ "minimization.")
364
+ self._input_files.mdp = val
362
365
 
363
366
  # alias for mdp to mdconfig (since some users may expect mdconfig)
364
367
  mdconfig = mdp
365
368
 
369
+ # NOTE: This does not have a setter on purpose, only prepare methods must
370
+ # set this (and there we can be bothered to access via _input_files)
371
+ @property
372
+ def deffnm(self) -> str | None:
373
+ """The ``deffnm`` this engine uses. None before a call to any prepare method."""
374
+ return self._engine_state.deffnm
375
+
376
+ # NOTE: This does not have a setter on purpose, only prepare and run methods
377
+ # must set this (and there we can be bothered to access via _input_files)
378
+ @property
379
+ def simulation_part(self) -> int:
380
+ """Return the current ``simulation_part`` number."""
381
+ return self._engine_state.simulation_part
382
+
366
383
  @property
367
384
  def dt(self) -> float:
368
385
  """Integration timestep in ps."""
369
- return self._mdp["dt"]
386
+ return self.mdp["dt"]
370
387
 
371
388
  @property
372
389
  def time_done(self) -> float:
@@ -376,22 +393,16 @@ class GmxEngine(MDEngine):
376
393
  Takes into account 'tinit' from the .mdp file if set.
377
394
  """
378
395
  try:
379
- tinit = self._mdp["tinit"]
396
+ tinit = self.mdp["tinit"]
380
397
  except KeyError:
381
398
  tinit = 0.
382
- return self._time_done - tinit
399
+ return self._engine_state.time_done - tinit
383
400
 
384
- # TODO/FIXME: we assume that all output frequencies are multiples of the
385
- # smallest when determing the number of frames etc
386
- # TODO: check that nstxout == nstvout?!
387
401
  @property
388
402
  def nstout(self) -> int:
389
403
  """Smallest output frequency for current output_traj_type."""
390
- if self._nstout is None:
391
- nstout = nstout_from_mdp(self._mdp,
392
- traj_type=self.output_traj_type)
393
- self._nstout = nstout
394
- return self._nstout
404
+ return nstout_from_mdp(self.mdp,
405
+ traj_type=self.output_traj_type)
395
406
 
396
407
  @property
397
408
  def steps_done(self) -> int:
@@ -400,7 +411,7 @@ class GmxEngine(MDEngine):
400
411
 
401
412
  NOTE: steps != frames * nstout
402
413
  Some remarks on the relation between frames_done and steps_done:
403
- Usually (when passing `nsteps` to `run()`) frames_done will be equal to
414
+ Usually (when passing ``nsteps`` to ``run()``) frames_done will be equal to
404
415
  steps_done/nstout + 1 because the initial/final configuration will be
405
416
  written twice (since then the first/last step is always an output step)
406
417
  However as soon as we run for a specific walltime (without specifying
@@ -410,11 +421,11 @@ class GmxEngine(MDEngine):
410
421
  to the traj and then the plus 1 rule for the double written
411
422
  initial/final configuration is off (since it will then be a 'normal'
412
423
  configuration written just once).
413
- If however the neighbor search and trajectory output fall togehter on
424
+ If however the neighbor search and trajectory output fall together on
414
425
  the same step the configuration will be written twice (as with `nsteps`
415
426
  specified).
416
427
  """
417
- return self._steps_done
428
+ return self._engine_state.steps_done
418
429
 
419
430
  @property
420
431
  def frames_done(self) -> int:
@@ -424,15 +435,16 @@ class GmxEngine(MDEngine):
424
435
  NOTE: frames != steps / nstout
425
436
  See the steps_done docstring for more.
426
437
  """
427
- return self._frames_done
438
+ return self._engine_state.frames_done
428
439
 
429
- async def apply_constraints(self, conf_in, conf_out_name, wdir="."):
440
+ async def apply_constraints(self, conf_in: Trajectory, conf_out_name: str, *,
441
+ wdir: str = ".") -> Trajectory:
430
442
  """
431
443
  Apply constraints to given configuration.
432
444
 
433
445
  Parameters
434
446
  ----------
435
- conf_in : asyncmd.Trajectory
447
+ conf_in : Trajectory
436
448
  A (one-frame) trajectory, only the first frame will be used.
437
449
  conf_out_name : str
438
450
  Output path for the constrained configuration.
@@ -452,14 +464,15 @@ class GmxEngine(MDEngine):
452
464
  generate_velocities=False,
453
465
  )
454
466
 
455
- async def generate_velocities(self, conf_in, conf_out_name, wdir=".",
456
- constraints=True):
467
+ async def generate_velocities(self, conf_in: Trajectory, conf_out_name: str, *,
468
+ wdir: str = ".", constraints: bool = True,
469
+ ) -> Trajectory:
457
470
  """
458
471
  Generate random Maxwell-Boltzmann velocities for given configuration.
459
472
 
460
473
  Parameters
461
474
  ----------
462
- conf_in : asyncmd.Trajectory
475
+ conf_in : Trajectory
463
476
  A (one-frame) trajectory, only the first frame will be used.
464
477
  conf_out_name : str
465
478
  Output path for the velocity randomized configuration.
@@ -482,11 +495,9 @@ class GmxEngine(MDEngine):
482
495
  generate_velocities=True,
483
496
  )
484
497
 
485
- async def _0step_md(self, conf_in, conf_out_name, wdir,
486
- constraints: bool, generate_velocities: bool):
487
- if (self.workdir is not None) and (wdir == "."):
488
- # use own working directory if know/set
489
- wdir = self.workdir
498
+ async def _0step_md(self, conf_in: Trajectory, conf_out_name: str, *,
499
+ wdir: str, constraints: bool, generate_velocities: bool,
500
+ ) -> Trajectory:
490
501
  if not os.path.isabs(conf_out_name):
491
502
  # assume conf_out is to be meant relative to wdir if not an abspath
492
503
  conf_out_name = os.path.join(wdir, conf_out_name)
@@ -501,7 +512,7 @@ class GmxEngine(MDEngine):
501
512
  )
502
513
  swdir = os.path.join(wdir, run_name)
503
514
  await aiofiles.os.mkdir(swdir)
504
- constraints_mdp = copy.deepcopy(self._mdp)
515
+ constraints_mdp = copy.deepcopy(self.mdp)
505
516
  constraints_mdp["continuation"] = "no" if constraints else "yes"
506
517
  constraints_mdp["gen-vel"] = "yes" if generate_velocities else "no"
507
518
  # make sure we write a trr and a xtc to read the final configuration
@@ -518,41 +529,30 @@ class GmxEngine(MDEngine):
518
529
  trr_in=conf_in.trajectory_files[0],
519
530
  tpr_out=os.path.join(swdir, f"{run_name}.tpr"),
520
531
  mdp_obj=constraints_mdp)
521
- # TODO: this is a bit hacky, and should probably not be necessary?
522
- # we keep a ref to the 'old' self._proc to reset it after we are
523
- # done, because the gmx_mdrun method set self._proc to the running
524
- # constraints engine
525
- # and it is probably not necessary since no engine should be able
526
- # to be runing when/if we are able to call apply_constraints?
527
- old_proc_val = self._proc
528
532
  cmd_str = self._mdrun_cmd(tpr=os.path.join(swdir, f"{run_name}.tpr"),
529
533
  workdir=swdir,
530
534
  deffnm=run_name)
531
535
  logger.debug("About to execute gmx mdrun command for constraints and"
532
536
  "/or velocity generation: %s",
533
537
  cmd_str)
534
- returncode = None
535
538
  stderr = bytes()
536
539
  stdout = bytes()
537
540
  await self._acquire_resources_gmx_mdrun()
541
+ mdrun_proc = await self._start_gmx_mdrun(
542
+ cmd_str=cmd_str, workdir=swdir,
543
+ run_name=run_name,
544
+ # TODO: we hardcode that the 0step MD runs can not be longer than 15 min
545
+ # (but i think this should be fine for randomizing velocities and/or
546
+ # applying constraints?!)
547
+ walltime=0.25,
548
+ )
538
549
  try:
539
- await self._start_gmx_mdrun(cmd_str=cmd_str, workdir=swdir,
540
- run_name=run_name,
541
- # TODO/FIXME: we hardcode that the runs
542
- # can not be longer than 15 min here
543
- # (but i think this should be fine for
544
- # randomizing velocities and/or
545
- # applying constraints?!)
546
- walltime=0.25,
547
- )
548
- # self._proc is set by _start_gmx_mdrun!
549
- stdout, stderr = await self._proc.communicate()
550
- returncode = self._proc.returncode
550
+ stdout, stderr = await mdrun_proc.communicate()
551
551
  except asyncio.CancelledError:
552
- self._proc.kill()
552
+ mdrun_proc.kill()
553
553
  raise # reraise the error for encompassing coroutines
554
554
  else:
555
- if returncode != 0:
555
+ if (returncode := mdrun_proc.returncode):
556
556
  raise EngineCrashedError(
557
557
  f"Non-zero (or no) exit code from mdrun (= {returncode}).\n"
558
558
  + "\n--------\n"
@@ -575,24 +575,29 @@ class GmxEngine(MDEngine):
575
575
  )
576
576
  finally:
577
577
  await self._cleanup_gmx_mdrun(workdir=swdir, run_name=run_name)
578
- self._proc = old_proc_val
579
578
 
580
- async def prepare(self, starting_configuration, workdir, deffnm):
579
+ async def prepare(self, starting_configuration: Trajectory | None | str,
580
+ workdir: str, deffnm: str) -> None:
581
581
  """
582
582
  Prepare a fresh simulation (starting with part0001).
583
583
 
584
584
  Can also be used to continue a simulation from a checkpoint file with
585
- matching name ('deffnm.cpt'). In that case, the `simulation-part` mdp
585
+ matching name ('deffnm.cpt'). In that case, the 'simulation-part' mdp
586
586
  option must match the number of the next part to be generated, e.g. it
587
587
  must be 2 if the last part generated was part0001. The previously
588
588
  generated trajectory files do not need to exist.
589
+ If 'simulation-part' is not set and previous trajectories are found an
590
+ error is raised.
589
591
 
590
592
  Parameters
591
593
  ----------
592
- starting_configuration : asyncmd.Trajectory or None
594
+ starting_configuration : Trajectory or None or str
593
595
  A (trr) trajectory of which we take the first frame as starting
594
596
  configuration (including velocities) or None, then the initial
595
597
  configuration is the gro-file.
598
+ Can also be a str, then it is assumed to be the path to a trr, cpt,
599
+ or tng (i.e. a full precision trajectory) and will be passed directly
600
+ to grompp.
596
601
  workdir : str
597
602
  Absolute or relative path to an existing directory to use as
598
603
  working directory.
@@ -600,8 +605,8 @@ class GmxEngine(MDEngine):
600
605
  The name (prefix) to use for all files.
601
606
  """
602
607
  # deffnm is the default name/prefix for all outfiles (as in gmx)
603
- self._deffnm = deffnm
604
- self.workdir = workdir # sets to abspath and check if it is a dir
608
+ self._engine_state.deffnm = deffnm
609
+ self.workdir = workdir # sets to relpath and check if it is a dir
605
610
  # check 'simulation-part' option in mdp file / MDP options
606
611
  # it decides at which .partXXXX the gmx numbering starts,
607
612
  # however gromacs ignores it if there is no -cpi [CheckPointIn]
@@ -611,13 +616,13 @@ class GmxEngine(MDEngine):
611
616
  # the mdp - 1 (we increase *before* each simulation part)
612
617
  cpt_fname = os.path.join(self.workdir, f"{deffnm}.cpt")
613
618
  try:
614
- sim_part = self._mdp["simulation-part"]
619
+ sim_part = self.mdp["simulation-part"]
615
620
  except KeyError:
616
621
  # the gmx mdp default is 1, it starts at part0001
617
622
  # we add one at the start of each run, i.e. the numberings match up
618
623
  # and we will have tra=`...part0001.trr` from gmx
619
624
  # and confout=`...part0001.gro` from our naming
620
- self._simulation_part = 0
625
+ self._engine_state.simulation_part = 0
621
626
  else:
622
627
  if sim_part > 1:
623
628
  if not os.path.isfile(cpt_fname):
@@ -631,8 +636,8 @@ class GmxEngine(MDEngine):
631
636
  "Using the checkpoint file as "
632
637
  "`starting_configuration`.",
633
638
  sim_part, cpt_fname)
634
- # always substract one from sim_part so we get 0 if it was 1
635
- self._simulation_part = sim_part - 1
639
+ # always subtract one from sim_part so we get 0 if it was 1
640
+ self._engine_state.simulation_part = sim_part - 1
636
641
  # check for previous runs with the same deffnm in workdir
637
642
  # NOTE: we only check for checkpoint files and trajectory parts as gmx
638
643
  # will move everything and only the checkpoint and trajs let us
@@ -646,51 +651,50 @@ class GmxEngine(MDEngine):
646
651
  # starting simulation_part, because we assume that if we find a
647
652
  # checkpoint file (above) and simulation_part > 0 that the
648
653
  # checkpoint file matches the correct part-number
649
- if len(trajs_with_same_deffnm) > self._simulation_part:
654
+ if len(trajs_with_same_deffnm) > self.simulation_part:
650
655
  raise ValueError(f"There are files in workdir ({self.workdir}) "
651
656
  + f"with the same deffnm ({deffnm}). Use the "
652
- + "`prepare_from_files()` method to continue an "
657
+ + "``prepare_from_files()`` method to continue an "
653
658
  + "existing MD run or change the workdir and/or "
654
659
  + "deffnm.")
655
- # actucal preparation of MDrun: sort out starting configuration...
656
- if ((starting_configuration is None)
660
+ # actual preparation of MD run: sort out starting configuration...
661
+ if (
657
662
  # None enables start from the initial structure file ('-c' option)
658
- or isinstance(starting_configuration, str)
663
+ starting_configuration is None
659
664
  # str enables passing the path to the full precision trajectory
660
665
  # directly, i.e. trr, cpt, or tng
661
- ):
666
+ or isinstance(starting_configuration, str)
667
+ ):
662
668
  trr_in = starting_configuration
663
669
  elif isinstance(starting_configuration, Trajectory):
664
670
  # enable passing of asyncmd.Trajectories as starting_configuration
665
671
  trr_in = starting_configuration.trajectory_files[0]
666
672
  else:
667
673
  raise TypeError("Starting_configuration must be None, a wrapped "
668
- "full precission trajectrtory, or the path to a "
669
- "full precission trajectory (trr, cpt, or tng).")
674
+ "full precision trajectory, or the path to a "
675
+ "full precision trajectory (trr, cpt, or tng).")
670
676
  # ...and call grompp to get a tpr
671
677
  # remember the path to use as structure file for out trajs
672
- self._tpr = os.path.join(self.workdir, deffnm + ".tpr")
673
- await self._run_grompp(workdir=self.workdir, deffnm=self._deffnm,
674
- trr_in=trr_in, tpr_out=self._tpr,
675
- mdp_obj=self._mdp)
676
- if not await aiofiles.ospath.isfile(self._tpr):
678
+ self._input_files.tpr_file = os.path.join(self.workdir, deffnm + ".tpr")
679
+ await self._run_grompp(workdir=self.workdir, deffnm=self.deffnm,
680
+ trr_in=trr_in, tpr_out=self.tpr_file,
681
+ mdp_obj=self.mdp)
682
+ if not await aiofiles.ospath.isfile(self.tpr_file):
677
683
  # better be save than sorry :)
678
684
  raise RuntimeError("Something went wrong generating the tpr. "
679
- f"{self._tpr} does not seem to be a file.")
680
- # make sure we can not mistake a previous Popen for current mdrun
681
- self._proc = None
682
- self._frames_done = 0 # (re-)set how many frames we did
683
- self._steps_done = 0
684
- self._time_done = 0.
685
- self._prepared = True
686
-
687
- async def _run_grompp(self, workdir, deffnm, trr_in, tpr_out, mdp_obj):
685
+ f"{self.tpr_file} does not seem to be a file.")
686
+ self._engine_state.frames_done = 0 # (re-)set how many frames we did
687
+ self._engine_state.steps_done = 0
688
+ self._engine_state.time_done = 0.
689
+
690
+ async def _run_grompp(self, *, workdir: str, deffnm: str, trr_in: str | None,
691
+ tpr_out: str, mdp_obj: MDP) -> None:
688
692
  # NOTE: file paths from workdir and deffnm
689
693
  mdp_in = os.path.join(workdir, deffnm + ".mdp")
690
694
  # write the mdp file (always overwriting existing mdps)
691
695
  # I (hejung) think this is what we want as the prepare methods check
692
696
  # for leftover files with the same deffnm, so if only the mdp is there
693
- # we can (and want to) just ovewrite it without raising an err
697
+ # we can (and want to) just overwrite it without raising an err
694
698
  async with _SEMAPHORES["MAX_FILES_OPEN"]:
695
699
  mdp_obj.write(mdp_in, overwrite=True)
696
700
  mdp_out = os.path.join(workdir, deffnm + "_mdout.mdp")
@@ -702,20 +706,20 @@ class GmxEngine(MDEngine):
702
706
  # NOTE: The max open files semaphores counts for 3 open files, so we
703
707
  # only need it once
704
708
  await _SEMAPHORES["MAX_FILES_OPEN"].acquire()
705
- try:
706
- grompp_proc = await asyncio.create_subprocess_exec(
709
+ grompp_proc = await asyncio.create_subprocess_exec(
707
710
  *shlex.split(cmd_str),
708
711
  stdout=asyncio.subprocess.PIPE,
709
712
  stderr=asyncio.subprocess.PIPE,
710
713
  cwd=workdir,
711
- )
714
+ )
715
+ try:
712
716
  stdout, stderr = await grompp_proc.communicate()
717
+ except asyncio.CancelledError as e:
718
+ grompp_proc.kill() # kill grompp
719
+ raise e from None # and reraise the cancellation
720
+ else:
713
721
  return_code = grompp_proc.returncode
714
- logger.debug("gmx grompp command returned return code %s.",
715
- str(return_code) if return_code is not None else "not available")
716
- #logger.debug("grompp stdout:\n%s", stdout.decode())
717
- #logger.debug("grompp stderr:\n%s", stderr.decode())
718
- if return_code != 0:
722
+ if (return_code := grompp_proc.returncode):
719
723
  # this assumes POSIX
720
724
  raise RuntimeError("grompp had non-zero return code "
721
725
  + f"({return_code}).\n"
@@ -724,14 +728,13 @@ class GmxEngine(MDEngine):
724
728
  + "\n--------\n"
725
729
  + f"stdout: \n--------\n {stdout.decode()}"
726
730
  )
727
- except asyncio.CancelledError as e:
728
- grompp_proc.kill() # kill grompp
729
- raise e from None # and reraise the cancelation
731
+ logger.debug("gmx grompp command returned return code %s.",
732
+ str(return_code) if return_code is not None else "not available")
730
733
  finally:
731
734
  # release the semaphore
732
735
  _SEMAPHORES["MAX_FILES_OPEN"].release()
733
736
 
734
- async def prepare_from_files(self, workdir: str, deffnm: str):
737
+ async def prepare_from_files(self, workdir: str, deffnm: str) -> None:
735
738
  """
736
739
  Prepare continuation run starting from the last part found in workdir.
737
740
 
@@ -741,7 +744,7 @@ class GmxEngine(MDEngine):
741
744
  Parameters
742
745
  ----------
743
746
  workdir : str
744
- Absolute or relative path to an exisiting directory to use as
747
+ Absolute or relative path to an existing directory to use as
745
748
  working directory.
746
749
  deffnm : str
747
750
  The name (prefix) to use for all files.
@@ -760,51 +763,65 @@ class GmxEngine(MDEngine):
760
763
  )
761
764
  # load the 'old' mdp_in
762
765
  async with _SEMAPHORES["MAX_FILES_OPEN"]:
763
- self._mdp = MDP(os.path.join(self.workdir, f"{deffnm}.mdp"))
764
- self._deffnm = deffnm
766
+ self.mdp = MDP(os.path.join(self.workdir, f"{deffnm}.mdp"))
767
+ self._engine_state.deffnm = deffnm
765
768
  # Note that we dont need to explicitly check for the tpr existing,
766
769
  # if it does not exist we will err when getting the traj lengths
767
- self._tpr = os.path.join(self.workdir, deffnm + ".tpr")
768
- self._simulation_part = last_partnum
770
+ self._input_files.tpr_file = os.path.join(self.workdir, deffnm + ".tpr")
771
+ self._engine_state.simulation_part = last_partnum
769
772
  # len(t), because for frames we do not care if first frame is in traj
770
- self._frames_done = sum(len(t) for t in previous_trajs)
773
+ self._engine_state.frames_done = sum(len(t) for t in previous_trajs)
771
774
  # steps done is the more reliable info if we want to know how many
772
775
  # integration steps we did
773
- self._steps_done = previous_trajs[-1].last_step
774
- self._time_done = previous_trajs[-1].last_time
775
- self._proc = None
776
- self._prepared = True
776
+ self._engine_state.steps_done = previous_trajs[-1].last_step
777
+ self._engine_state.time_done = previous_trajs[-1].last_time
777
778
 
778
779
  # NOTE: this enables us to reuse run and prepare methods in SlurmGmxEngine,
779
- # i.e. we only need to overwite the next 3 functions to write out the slurm
780
+ # i.e. we only need to overwrite the next 3 functions to write out the slurm
780
781
  # submission script, submit the job and allocate/release different resources
781
- async def _start_gmx_mdrun(self, cmd_str, workdir, **kwargs):
782
- proc = await asyncio.create_subprocess_exec(
782
+ async def _start_gmx_mdrun(self, *, cmd_str: str, workdir: str,
783
+ # the next two arguments are only used by SlurmGmxEngine
784
+ # but we rather make them explicit here already
785
+ # pylint: disable-next=unused-argument
786
+ walltime: float | None,
787
+ # pylint: disable-next=unused-argument
788
+ run_name: str | None = None,
789
+ ) -> "Process | slurm.SlurmProcess":
790
+ return await asyncio.create_subprocess_exec(
783
791
  *shlex.split(cmd_str),
784
792
  stdout=asyncio.subprocess.PIPE,
785
793
  stderr=asyncio.subprocess.PIPE,
786
794
  cwd=workdir,
787
795
  )
788
- self._proc = proc
789
796
 
790
- async def _acquire_resources_gmx_mdrun(self, **kwargs):
797
+ async def _acquire_resources_gmx_mdrun(self) -> None:
791
798
  # *always* called before any gmx_mdrun, used to reserve resources
792
799
  # for local gmx we need 3 file descriptors: stdin, stdout, stderr
793
800
  # (one max files semaphore counts for 3 open files)
794
801
  await _SEMAPHORES["MAX_FILES_OPEN"].acquire()
795
802
 
796
- async def _cleanup_gmx_mdrun(self, **kwargs):
803
+ async def _cleanup_gmx_mdrun(self,
804
+ # the next two arguments are only used by SlurmGmxEngine
805
+ # but we rather make them explicit here already
806
+ # pylint: disable-next=unused-argument
807
+ workdir: str, run_name: str | None = None,
808
+ ) -> None:
797
809
  # *always* called after any gmx_mdrun, use to release resources
798
810
  # release the semaphore for the 3 file descriptors
799
811
  _SEMAPHORES["MAX_FILES_OPEN"].release()
800
812
 
801
- async def run(self, nsteps=None, walltime=None, steps_per_part=False):
813
+ async def run(self, nsteps: int | None = None, walltime: float | None = None,
814
+ steps_per_part: bool = False,
815
+ ) -> Trajectory | None:
802
816
  """
803
817
  Run simulation for specified number of steps or/and a given walltime.
804
818
 
805
819
  Note that you can pass both nsteps and walltime and the simulation will
806
820
  stop on the condition that is reached first.
807
821
 
822
+ Return None if no integration is needed because nsteps integration steps
823
+ have already been performed.
824
+
808
825
  Parameters
809
826
  ----------
810
827
  nsteps : int or None
@@ -819,67 +836,47 @@ class GmxEngine(MDEngine):
819
836
  counted, default False.
820
837
  """
821
838
  # generic run method is actually easier to implement for gmx :D
822
- if not self.ready_for_run:
823
- raise RuntimeError("Engine not ready for run. Call self.prepare() "
824
- + "and/or check if it is still running.")
825
- if all(kwarg is None for kwarg in [nsteps, walltime]):
839
+ if self.tpr_file is None or self.deffnm is None:
840
+ raise RuntimeError(
841
+ "Engine not ready for run. Call self.prepare() before calling a run method."
842
+ )
843
+ if all(kwarg is None for kwarg in (nsteps, walltime)):
826
844
  raise ValueError("Neither steps nor walltime given.")
827
845
  if nsteps is not None:
828
846
  nsteps = int(nsteps)
829
- if nsteps % self.nstout != 0:
847
+ if nsteps % self.nstout:
830
848
  raise ValueError(f"nsteps ({nsteps}) must be a multiple of "
831
849
  + f"nstout ({self.nstout}).")
832
850
  if not steps_per_part:
833
- nsteps = nsteps - self.steps_done
834
- if nsteps == 0:
851
+ nsteps -= self.steps_done
852
+ if not nsteps:
835
853
  # Return None instead of raising an error, this makes it nicer
836
854
  # to use the run method with walltime and total nsteps inside
837
855
  # while loops, i.e. we can just call traj = e.run(...) and then
838
856
  # while traj is not None: traj = e.run()
839
- # TODO: this will make it complicated to ever use the GmxEngine
840
- # for zero-step simulations to only apply constraints,
841
- # but we do have the _0_step_md methods for that...?!
842
857
  return None
843
- elif nsteps < 0:
844
- raise ValueError(f"nsteps is too small ({nsteps} steps for "
845
- + "this part). Can not travel backwards in "
846
- + "time...")
847
-
848
- self._simulation_part += 1
849
- cmd_str = self._mdrun_cmd(tpr=self._tpr, workdir=self.workdir,
850
- deffnm=self._deffnm,
851
- # TODO: use more/any other kwargs?
858
+ if nsteps < 0:
859
+ raise ValueError(f"nsteps is too small ({nsteps} steps for this part). "
860
+ "Can not travel backwards in time...")
861
+
862
+ self._engine_state.simulation_part += 1
863
+ cmd_str = self._mdrun_cmd(tpr=self.tpr_file, workdir=self.workdir,
864
+ deffnm=self.deffnm,
852
865
  maxh=walltime, nsteps=nsteps)
853
866
  logger.debug("About to execute gmx mdrun command: %s", cmd_str)
854
867
  returncode = None
855
868
  stderr = bytes()
856
869
  stdout = bytes()
857
870
  await self._acquire_resources_gmx_mdrun()
871
+ mdrun_proc = await self._start_gmx_mdrun(cmd_str=cmd_str, workdir=self.workdir,
872
+ walltime=walltime,)
858
873
  try:
859
- await self._start_gmx_mdrun(cmd_str=cmd_str, workdir=self.workdir,
860
- walltime=walltime,)
861
- # self._proc is set by _start_gmx_mdrun!
862
- stdout, stderr = await self._proc.communicate()
863
- returncode = self._proc.returncode
874
+ stdout, stderr = await mdrun_proc.communicate()
864
875
  except asyncio.CancelledError as e:
865
- if self._proc is not None:
866
- # make sure _proc is set, it can still be None if we get
867
- # canceled while _start_gmx_mdrun is setting up the process
868
- self._proc.kill()
876
+ mdrun_proc.kill()
869
877
  raise e from None # reraise the error for encompassing coroutines
870
878
  else:
871
- logger.debug("gmx mdrun command returned return code %s.",
872
- str(returncode) if returncode is not None else "not available")
873
- #logger.debug("gmx mdrun stdout:\n%s", stdout.decode())
874
- #logger.debug("gmx mdrun stderr:\n%s", stderr.decode())
875
- if returncode == 0:
876
- self._frames_done += len(self.current_trajectory)
877
- # dont care if we did a little more and only the checkpoint knows
878
- # we will only find out with the next trajectory part anyways
879
- self._steps_done = self.current_trajectory.last_step
880
- self._time_done = self.current_trajectory.last_time
881
- return self.current_trajectory
882
- else:
879
+ if (returncode := mdrun_proc.returncode):
883
880
  raise EngineCrashedError(
884
881
  f"Non-zero (or no) exit code from mdrun (= {returncode}).\n"
885
882
  + "\n--------\n"
@@ -887,13 +884,25 @@ class GmxEngine(MDEngine):
887
884
  + "\n--------\n"
888
885
  + f"stdout: \n--------\n {stdout.decode()}"
889
886
  )
887
+ logger.debug("gmx mdrun command returned return code %s.",
888
+ str(returncode) if returncode is not None else "not available")
889
+ self._engine_state.frames_done += len(self.current_trajectory)
890
+ # dont care if we did a little more and only the checkpoint knows
891
+ # we will only find out with the next trajectory part anyways
892
+ self._engine_state.steps_done = self.current_trajectory.last_step
893
+ self._engine_state.time_done = self.current_trajectory.last_time
894
+ return self.current_trajectory
890
895
  finally:
891
896
  await self._cleanup_gmx_mdrun(workdir=self.workdir)
892
897
 
893
- async def run_steps(self, nsteps, steps_per_part=False):
898
+ async def run_steps(self, nsteps: int, steps_per_part: bool = False
899
+ ) -> Trajectory | None:
894
900
  """
895
901
  Run simulation for specified number of steps.
896
902
 
903
+ Return None if no integration is needed because nsteps integration steps
904
+ have already been performed.
905
+
897
906
  Parameters
898
907
  ----------
899
908
  nsteps : int or None
@@ -907,73 +916,82 @@ class GmxEngine(MDEngine):
907
916
  """
908
917
  return await self.run(nsteps=nsteps, steps_per_part=steps_per_part)
909
918
 
910
- async def run_walltime(self, walltime):
919
+ async def run_walltime(self, walltime: float, max_steps: int | None = None,
920
+ ) -> Trajectory | None:
911
921
  """
912
922
  Run simulation for a given walltime.
913
923
 
924
+ Return None if no integration is needed because max_steps integration
925
+ steps have already been performed.
926
+
914
927
  Parameters
915
928
  ----------
916
929
  walltime : float or None
917
- (Maximum) walltime in hours, `None` means unlimited.
930
+ (Maximum) walltime in hours.
931
+ max_steps : int | None, optional
932
+ If not None, terminate when max_steps integration steps are reached
933
+ in total, also if this is before walltime is reached.
934
+ By default None.
918
935
  """
919
- return await self.run(walltime=walltime)
936
+ return await self.run(walltime=walltime, nsteps=max_steps,
937
+ steps_per_part=False)
920
938
 
921
939
  def _num_suffix(self, sim_part: int) -> str:
922
940
  # construct gromacs num part suffix from simulation_part
923
941
  num_suffix = f".part{sim_part:04d}"
924
942
  return num_suffix
925
943
 
926
- def _grompp_cmd(self, mdp_in, tpr_out, workdir, trr_in=None, mdp_out=None):
944
+ def _grompp_cmd(self, *, mdp_in: str, tpr_out: str, workdir: str,
945
+ trr_in: str | None = None, mdp_out: str | None = None,
946
+ ) -> str:
927
947
  # all args are expected to be file paths
928
- # make sure we use the right ones, i.e. relative to workdir
929
- if workdir is not None:
930
- mdp_in = os.path.relpath(mdp_in, start=workdir)
931
- tpr_out = os.path.relpath(tpr_out, start=workdir)
932
- gro_file = os.path.relpath(self.gro_file, start=workdir)
933
- top_file = os.path.relpath(self.top_file, start=workdir)
948
+ # make sure we use the right ones, i.e. relative to workdir of the engine
949
+ # because they will be relative to workdir of the python interpreter
950
+ mdp_in = os.path.relpath(mdp_in, start=workdir)
951
+ tpr_out = os.path.relpath(tpr_out, start=workdir)
952
+ gro_file = os.path.relpath(self.gro_file, start=workdir)
953
+ top_file = os.path.relpath(self.top_file, start=workdir)
934
954
  cmd = f"{self.grompp_executable} -f {mdp_in} -c {gro_file}"
935
955
  cmd += f" -p {top_file}"
936
956
  if self.ndx_file is not None:
937
- if workdir is not None:
938
- ndx_file = os.path.relpath(self.ndx_file, start=workdir)
939
- else:
940
- ndx_file = self.ndx_file
957
+ ndx_file = os.path.relpath(self.ndx_file, start=workdir)
941
958
  cmd += f" -n {ndx_file}"
942
959
  if trr_in is not None:
943
960
  # input trr is optional
944
- # TODO/FIXME?!
945
- # TODO/NOTE: currently we do not pass '-time', i.e. we just use the
961
+ # TODO /NOTE: currently we do not pass '-time', i.e. we just use the
946
962
  # gmx default frame selection: last frame from trr
947
- if workdir is not None:
948
- trr_in = os.path.relpath(trr_in, start=workdir)
963
+ trr_in = os.path.relpath(trr_in, start=workdir)
949
964
  cmd += f" -t {trr_in}"
950
965
  if mdp_out is None:
951
966
  # find out the name and dir of the tpr to put the mdp next to it
952
967
  head, tail = os.path.split(tpr_out)
953
968
  name = tail.split(".")[0]
954
969
  mdp_out = os.path.join(head, name + ".mdout.mdp")
955
- if workdir is not None:
956
- mdp_out = os.path.relpath(mdp_out, start=workdir)
970
+ mdp_out = os.path.relpath(mdp_out, start=workdir)
957
971
  cmd += f" -o {tpr_out} -po {mdp_out}"
958
- if self.grompp_extra_args != "":
972
+ if self.grompp_extra_args:
959
973
  # add extra args string if it is not empty
960
974
  cmd += f" {self.grompp_extra_args}"
961
975
  return cmd
962
976
 
963
- def _mdrun_cmd(self, tpr, workdir, deffnm=None, maxh=None, nsteps=None):
977
+ def _mdrun_cmd(self, *, tpr: str, workdir: str, deffnm: str | None = None,
978
+ maxh: float | None = None, nsteps: int | None = None,
979
+ ) -> str:
964
980
  # use "-noappend" to avoid appending to the trajectories when starting
965
981
  # from checkpoints, instead let gmx create new files with .partXXXX suffix
966
- if workdir is not None:
967
- tpr = os.path.relpath(tpr, start=workdir)
982
+ tpr = os.path.relpath(tpr, start=workdir)
968
983
  if deffnm is None:
969
984
  # find out the name of the tpr and use that as deffnm
970
- head, tail = os.path.split(tpr)
985
+ _, tail = os.path.split(tpr)
971
986
  deffnm = tail.split(".")[0]
972
- #cmd = f"{self.mdrun_executable} -noappend -deffnm {deffnm} -cpi"
987
+ # cmd = f"{self.mdrun_executable} -noappend -deffnm {deffnm} -cpi"
973
988
  # NOTE: the line above does the same as the four below before the if-clauses
974
989
  # however gromacs -deffnm is deprecated (and buggy),
975
990
  # so we just make our own 'deffnm', i.e. we name all files the same
976
991
  # except for the ending but do so explicitly
992
+ # TODO /FIXME: we dont specify the names for e.g. pull outputfiles,
993
+ # so they will have their default names and will collide
994
+ # when running multiple engines in the same folder!
977
995
  cmd = f"{self.mdrun_executable} -noappend -s {tpr}"
978
996
  # always add the -cpi option, this lets gmx figure out if it wants
979
997
  # to start from a checkpoint (if there is one with deffnm)
@@ -983,37 +1001,29 @@ class GmxEngine(MDEngine):
983
1001
  cmd += f" -o {deffnm}.trr -x {deffnm}.xtc -c {deffnm}.confout.gro"
984
1002
  cmd += f" -e {deffnm}.edr -g {deffnm}.log"
985
1003
  if maxh is not None:
986
- maxh = self._mdrun_time_conversion_factor * maxh
1004
+ maxh = self.mdrun_time_conversion_factor * maxh
987
1005
  cmd += f" -maxh {maxh}"
988
1006
  if nsteps is not None:
989
1007
  cmd += f" -nsteps {nsteps}"
990
- if self.mdrun_extra_args != "":
1008
+ if self.mdrun_extra_args:
991
1009
  cmd += f" {self.mdrun_extra_args}"
992
1010
  return cmd
993
1011
 
994
1012
 
995
1013
  class SlurmGmxEngine(GmxEngine):
996
1014
  __doc__ = GmxEngine.__doc__
997
- # use local prepare (i.e. grompp) of GmxEngine then submit run to slurm
998
- # we reuse the `GmxEngine._proc` to keep a reference to a `SlurmProcess`
1015
+ # Use local prepare (i.e. grompp) of GmxEngine then submit run to slurm.
1016
+ # Take submit script as str/file, use pythons .format to insert stuff.
1017
+ # We overwrite the `GmxEngine._start_gmx_mdrun` to instead return a `SlurmProcess`,
999
1018
  # which emulates the API of `asyncio.subprocess.Process` and can (for our
1000
- # purposes) be used as a drop-in replacement, therefore we only need to
1019
+ # purposes) be used as a drop-in replacement. Therefore we only need to
1001
1020
  # reimplement `_start_gmx_mdrun()`, `_acquire_resources_gmx_mdrun()` and
1002
- # `_cleanup_gmx_mdrun()` to have a working SlurmGmxEngine
1003
- # take submit script as str/file, use pythons .format to insert stuff!
1004
- # TODO: use SLURM also for grompp?! (would make stuff faster?)
1021
+ # `_cleanup_gmx_mdrun()` to have a working SlurmGmxEngine.
1022
+ # TODO: use SLURM also for grompp?! (would it make stuff faster?)
1005
1023
  # I (hejung) think probably not by much because we already use
1006
- # asyncios subprocess for grompp (i.e. do it asyncronous) and grompp
1024
+ # asyncios subprocess for grompp (i.e. do it asynchronous) and grompp
1007
1025
  # will most likely not take much resources on the login (local) node
1008
1026
 
1009
- # NOTE: these are possible options, but they result in added dependencies
1010
- # - jinja2 templates for slurm submission scripts?
1011
- # (does not look like we gain flexibility but we get more work,
1012
- # so probably not?!)
1013
- # - pyslurm for job status checks?!
1014
- # (it seems submission is frickly/impossible in pyslurm,
1015
- # so also probably not?!)
1016
-
1017
1027
  _mdrun_executable = "gmx_mpi mdrun" # MPI as default for clusters
1018
1028
  _mdrun_time_conversion_factor = 0.99 # run mdrun for 0.99 * time-limit
1019
1029
  # NOTE: The rationale behind the (slightly) reduced mdrun time compared to
@@ -1022,11 +1032,14 @@ class SlurmGmxEngine(GmxEngine):
1022
1032
  # environments, etc.) and this can result in jobs that are cancelled
1023
1033
  # due to reaching the maximum time limit in slurm. This in turn means
1024
1034
  # that we would believe the job failed because it got cancelled
1025
- # although the mdrun was successfull.
1035
+ # although the mdrun was successful.
1026
1036
 
1027
- def __init__(self, mdconfig, gro_file, top_file, sbatch_script, ndx_file=None,
1028
- sbatch_options: dict | None = None,
1029
- **kwargs):
1037
+ # pylint: disable-next=too-many-arguments
1038
+ def __init__(self, mdconfig: MDP, gro_file: str, top_file: str, *,
1039
+ ndx_file: str | None = None,
1040
+ sbatch_script: str,
1041
+ sbatch_options: dict[str, str] | None = None,
1042
+ **kwargs) -> None:
1030
1043
  """
1031
1044
  Initialize a :class:`SlurmGmxEngine`.
1032
1045
 
@@ -1037,7 +1050,7 @@ class SlurmGmxEngine(GmxEngine):
1037
1050
  gro_file: str
1038
1051
  Absolute or relative path to a gromacs structure file.
1039
1052
  top_file: str
1040
- Absolute or relative path to a gromacs topolgy (.top) file.
1053
+ Absolute or relative path to a gromacs topology (.top) file.
1041
1054
  sbatch_script : str
1042
1055
  Absolute or relative path to a slurm sbatch script or a string with
1043
1056
  the content of the sbatch script. Note that the submission script
@@ -1050,19 +1063,19 @@ class SlurmGmxEngine(GmxEngine):
1050
1063
  Optional, absolute or relative path to a gromacs index file.
1051
1064
  sbatch_options : dict or None
1052
1065
  Dictionary of sbatch options, keys are long names for options,
1053
- values are the correponding values. The keys/long names are given
1054
- without the dashes, e.g. to specify "--mem=1024" the dictionary
1055
- needs to be {"mem": "1024"}. To specify options without values use
1056
- keys with empty strings as values, e.g. to specify "--contiguous"
1057
- the dictionary needs to be {"contiguous": ""}.
1066
+ values are the corresponding values. The keys/long names are given
1067
+ without the dashes, e.g. to specify ``--mem=1024`` the dictionary
1068
+ needs to be ``{"mem": "1024"}``. To specify options without values
1069
+ use keys with empty strings as values, e.g. to specify
1070
+ ``--contiguous`` the dictionary needs to be ``{"contiguous": ""}``.
1058
1071
  See the SLURM documentation for a full list of sbatch options
1059
1072
  (https://slurm.schedmd.com/sbatch.html).
1060
- Note: This argument is passed as is to the `SlurmProcess` in which
1073
+ Note: This argument is passed as is to the ``SlurmProcess`` in which
1061
1074
  the computation is performed. Each call to the engines `run` method
1062
- triggers the creation of a new `SlurmProcess` and will use the then
1063
- current `sbatch_options`.
1075
+ triggers the creation of a new :class:`asyncmd.slurm.SlurmProcess`
1076
+ and will use the then current ``sbatch_options``.
1064
1077
 
1065
- Note that all attributes can be set at intialization by passing keyword
1078
+ Note that all attributes can be set at initialization by passing keyword
1066
1079
  arguments with their name, e.g. mdrun_extra_args="-ntomp 2" to instruct
1067
1080
  gromacs to use 2 openMP threads.
1068
1081
  """
@@ -1074,21 +1087,23 @@ class SlurmGmxEngine(GmxEngine):
1074
1087
  # we decide what it is by checking for the shebang
1075
1088
  if not sbatch_script.startswith("#!"):
1076
1089
  # probably path to a file, lets try to read it
1077
- with open(sbatch_script, 'r') as f:
1090
+ with open(sbatch_script, 'r', encoding="locale") as f:
1078
1091
  sbatch_script = f.read()
1079
1092
  self.sbatch_script = sbatch_script
1080
1093
  self.sbatch_options = sbatch_options
1081
1094
 
1082
- def _name_from_name_or_none(self, run_name: typing.Optional[str]) -> str:
1095
+ def _name_from_name_or_none(self, run_name: str | None) -> str:
1083
1096
  if run_name is not None:
1084
1097
  name = run_name
1085
1098
  else:
1086
1099
  # create a name from deffnm and partnum
1087
- name = self._deffnm + self._num_suffix(sim_part=self._simulation_part)
1100
+ name = self.deffnm + self._num_suffix(sim_part=self.simulation_part)
1088
1101
  return name
1089
1102
 
1090
- async def _start_gmx_mdrun(self, cmd_str, workdir, walltime=None,
1091
- run_name=None, **kwargs):
1103
+ async def _start_gmx_mdrun(self, *, cmd_str: str, workdir: str,
1104
+ walltime: float | None,
1105
+ run_name: str | None = None,
1106
+ ) -> slurm.SlurmProcess:
1092
1107
  name = self._name_from_name_or_none(run_name=run_name)
1093
1108
  # substitute placeholders in submit script
1094
1109
  script = self.sbatch_script.format(mdrun_cmd=cmd_str)
@@ -1103,7 +1118,7 @@ class SlurmGmxEngine(GmxEngine):
1103
1118
  async with _SEMAPHORES["MAX_FILES_OPEN"]:
1104
1119
  async with aiofiles.open(fname, 'w') as f:
1105
1120
  await f.write(script)
1106
- self._proc = await slurm.create_slurmprocess_submit(
1121
+ return await slurm.create_slurmprocess_submit(
1107
1122
  jobname=name,
1108
1123
  sbatch_script=fname,
1109
1124
  workdir=workdir,
@@ -1113,17 +1128,18 @@ class SlurmGmxEngine(GmxEngine):
1113
1128
  stdin=None,
1114
1129
  )
1115
1130
 
1116
- async def _acquire_resources_gmx_mdrun(self, **kwargs):
1117
- if _SEMAPHORES["SLURM_MAX_JOB"] is not None:
1131
+ async def _acquire_resources_gmx_mdrun(self) -> None:
1132
+ if _OPT_SEMAPHORES["SLURM_MAX_JOB"] is not None:
1118
1133
  logger.debug("SLURM_MAX_JOB semaphore is %s before acquiring.",
1119
- _SEMAPHORES['SLURM_MAX_JOB'])
1120
- await _SEMAPHORES["SLURM_MAX_JOB"].acquire()
1134
+ _OPT_SEMAPHORES['SLURM_MAX_JOB'])
1135
+ await _OPT_SEMAPHORES["SLURM_MAX_JOB"].acquire()
1121
1136
  else:
1122
1137
  logger.debug("SLURM_MAX_JOB semaphore is None")
1123
1138
 
1124
- async def _cleanup_gmx_mdrun(self, workdir, run_name=None, **kwargs):
1125
- if _SEMAPHORES["SLURM_MAX_JOB"] is not None:
1126
- _SEMAPHORES["SLURM_MAX_JOB"].release()
1139
+ async def _cleanup_gmx_mdrun(self, workdir: str, run_name: str | None = None,
1140
+ ) -> None:
1141
+ if _OPT_SEMAPHORES["SLURM_MAX_JOB"] is not None:
1142
+ _OPT_SEMAPHORES["SLURM_MAX_JOB"].release()
1127
1143
  # remove the sbatch script
1128
1144
  name = self._name_from_name_or_none(run_name=run_name)
1129
1145
  fname = os.path.join(workdir, name + ".slurm")
@@ -1133,11 +1149,3 @@ class SlurmGmxEngine(GmxEngine):
1133
1149
  await aiofiles.os.remove(fname)
1134
1150
  except FileNotFoundError:
1135
1151
  pass
1136
-
1137
- # TODO: do we even need/want this?
1138
- @property
1139
- def slurm_job_state(self) -> str | None:
1140
- """The state of the slurm job as reported by slurm."""
1141
- if self._proc is None:
1142
- return None
1143
- return self._proc.slurm_job_state