imdclient 0.1.3__py3-none-any.whl → 0.2.0b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. imdclient/IMDClient.py +43 -12
  2. imdclient/IMDProtocol.py +1 -0
  3. imdclient/__init__.py +0 -5
  4. imdclient/data/gromacs/md/gromacs_v3_nst1.mdp +3 -3
  5. imdclient/data/namd/md/namd3 +0 -0
  6. imdclient/data/namd/md/namd_v3_nst_1.namd +1 -1
  7. imdclient/tests/base.py +108 -83
  8. imdclient/tests/conftest.py +0 -39
  9. imdclient/tests/datafiles.py +16 -1
  10. imdclient/tests/docker_testing/docker.md +1 -1
  11. imdclient/tests/hpc_testing/gromacs/README.md +112 -0
  12. imdclient/tests/hpc_testing/gromacs/gmx_gpu_test.mdp +58 -0
  13. imdclient/tests/hpc_testing/gromacs/gmx_gpu_test.top +11764 -0
  14. imdclient/tests/hpc_testing/gromacs/struct.gro +21151 -0
  15. imdclient/tests/hpc_testing/gromacs/validate_gmx.sh +90 -0
  16. imdclient/tests/hpc_testing/lammps/README.md +62 -0
  17. imdclient/tests/hpc_testing/lammps/lammps_v3_nst_1.in +71 -0
  18. imdclient/tests/hpc_testing/lammps/topology_after_min.data +8022 -0
  19. imdclient/tests/hpc_testing/lammps/validate_lmp.sh +66 -0
  20. imdclient/tests/hpc_testing/namd/README.md +147 -0
  21. imdclient/tests/hpc_testing/namd/alanin.params +402 -0
  22. imdclient/tests/hpc_testing/namd/alanin.pdb +77 -0
  23. imdclient/tests/hpc_testing/namd/alanin.psf +206 -0
  24. imdclient/tests/hpc_testing/namd/namd_v3_nst_1.namd +59 -0
  25. imdclient/tests/hpc_testing/namd/validate_namd.sh +71 -0
  26. imdclient/tests/minimalreader.py +86 -0
  27. imdclient/tests/server.py +6 -14
  28. imdclient/tests/test_gromacs.py +15 -3
  29. imdclient/tests/test_imdclient.py +26 -7
  30. imdclient/tests/test_lammps.py +22 -19
  31. imdclient/tests/test_manual.py +224 -66
  32. imdclient/tests/test_namd.py +39 -16
  33. imdclient/tests/test_utils.py +31 -0
  34. imdclient/utils.py +50 -17
  35. {imdclient-0.1.3.dist-info → imdclient-0.2.0b0.dist-info}/METADATA +60 -39
  36. imdclient-0.2.0b0.dist-info/RECORD +53 -0
  37. {imdclient-0.1.3.dist-info → imdclient-0.2.0b0.dist-info}/WHEEL +1 -1
  38. {imdclient-0.1.3.dist-info → imdclient-0.2.0b0.dist-info/licenses}/AUTHORS.md +4 -1
  39. {imdclient-0.1.3.dist-info → imdclient-0.2.0b0.dist-info/licenses}/LICENSE +3 -1
  40. imdclient/IMD.py +0 -130
  41. imdclient/backends.py +0 -352
  42. imdclient/results.py +0 -332
  43. imdclient/streamanalysis.py +0 -1056
  44. imdclient/streambase.py +0 -199
  45. imdclient/tests/test_imdreader.py +0 -658
  46. imdclient/tests/test_stream_analysis.py +0 -61
  47. imdclient-0.1.3.dist-info/RECORD +0 -42
  48. {imdclient-0.1.3.dist-info → imdclient-0.2.0b0.dist-info}/top_level.txt +0 -0
@@ -1,1056 +0,0 @@
1
- # -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
2
- # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
3
- #
4
- # MDAnalysis --- https://www.mdanalysis.org
5
- # Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
6
- # (see the file AUTHORS for the full list of names)
7
- #
8
- # Released under the GNU Public Licence, v2 or any higher version
9
- #
10
- # Please cite your use of MDAnalysis in published work:
11
- #
12
- # R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
13
- # D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
14
- # MDAnalysis: A Python package for the rapid analysis of molecular dynamics
15
- # simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
16
- # Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
17
- # doi: 10.25080/majora-629e541a-00e
18
- #
19
- # N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
20
- # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
21
- # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
22
- #
23
- """Analysis building blocks --- :mod:`MDAnalysis.analysis.base`
24
- ============================================================
25
-
26
- MDAnalysis provides building blocks for creating analysis classes. One can
27
- think of each analysis class as a "tool" that performs a specific analysis over
28
- the trajectory frames and stores the results in the tool.
29
-
30
- Analysis classes are derived from :class:`AnalysisBase` by subclassing. This
31
- inheritance provides a common workflow and API for users and makes many
32
- additional features automatically available (such as frame selections and a
33
- verbose progressbar). The important points for analysis classes are:
34
-
35
- #. Analysis tools are Python classes derived from :class:`AnalysisBase`.
36
- #. When instantiating an analysis, the :class:`Universe` or :class:`AtomGroup`
37
- that the analysis operates on is provided together with any other parameters
38
- that are kept fixed for the specific analysis.
39
- #. The analysis is performed with :meth:`~AnalysisBase.run` method. It has a
40
- common set of arguments such as being able to select the frames the analysis
41
- is performed on. The `verbose` keyword argument enables additional output. A
42
- progressbar is shown by default that also shows an estimate for the
43
- remaining time until the end of the analysis.
44
- #. Results are always stored in the attribute :attr:`AnalysisBase.results`,
45
- which is an instance of :class:`Results`, a kind of dictionary that allows
46
- allows item access via attributes. Each analysis class decides what and how
47
- to store in :class:`Results` and needs to document it. For time series, the
48
- :attr:`AnalysisBase.times` contains the time stamps of the analyzed frames.
49
-
50
-
51
- Example of using a standard analysis tool
52
- -----------------------------------------
53
-
54
- For example, the :class:`MDAnalysis.analysis.rms.RMSD` performs a
55
- root-mean-square distance analysis in the following way:
56
-
57
- .. code-block:: python
58
-
59
- import MDAnalysis as mda
60
- from MDAnalysisTests.datafiles import TPR, XTC
61
-
62
- from MDAnalysis.analysis import rms
63
-
64
- u = mda.Universe(TPR, XTC)
65
-
66
- # (2) instantiate analysis
67
- rmsd = rms.RMSD(u, select='name CA')
68
-
69
- # (3) the run() method can select frames in different ways
70
- # run on all frames (with progressbar)
71
- rmsd.run(verbose=True)
72
-
73
- # or start, stop, and step can be used
74
- rmsd.run(start=2, stop=8, step=2)
75
-
76
- # a list of frames to run the analysis on can be passed
77
- rmsd.run(frames=[0,2,3,6,9])
78
-
79
- # a list of booleans the same length of the trajectory can be used
80
- rmsd.run(frames=[True, False, True, True, False, False, True, False,
81
- False, True])
82
-
83
- # (4) analyze the results, e.g., plot
84
- t = rmsd.times
85
- y = rmsd.results.rmsd[:, 2] # RMSD at column index 2, see docs
86
-
87
- import matplotlib.pyplot as plt
88
- plt.plot(t, y)
89
- plt.xlabel("time (ps)")
90
- plt.ylabel("RMSD (Å)")
91
-
92
-
93
- Writing new analysis tools
94
- --------------------------
95
-
96
- In order to write new analysis tools, derive a class from :class:`AnalysisBase`
97
- and define at least the :meth:`_single_frame` method, as described in
98
- :class:`AnalysisBase`.
99
-
100
- .. SeeAlso::
101
-
102
- The chapter `Writing your own trajectory analysis`_ in the *User Guide*
103
- contains a step-by-step example for writing analysis tools with
104
- :class:`AnalysisBase`.
105
-
106
- .. _`Writing your own trajectory analysis`:
107
- https://userguide.mdanalysis.org/stable/examples/analysis/custom_trajectory_analysis.html
108
-
109
-
110
- If your analysis is operating independently on each frame, you might consider
111
- making it **parallelizable** via adding a :meth:`get_supported_backends` method,
112
- and appropriate aggregation function for each of its results. For example, if
113
- you have your :meth:`_single_frame` method storing important values under
114
- :attr:`self.results.timeseries`, you will write:
115
-
116
- .. code-block:: python
117
-
118
- class MyAnalysis(AnalysisBase):
119
- _analysis_algorithm_is_parallelizable = True
120
-
121
- @classmethod
122
- def get_supported_backends(cls):
123
- return ('serial', 'multiprocessing', 'dask',)
124
-
125
-
126
- def _get_aggregator(self):
127
- return ResultsGroup(lookup={'timeseries': ResultsGroup.ndarray_vstack})
128
-
129
- See :mod:`MDAnalysis.analysis.results` for more on aggregating results.
130
-
131
- .. SeeAlso::
132
-
133
- :ref:`parallel-analysis`
134
-
135
-
136
-
137
- Classes
138
- -------
139
-
140
- The :class:`MDAnalysis.results.Results` and :class:`AnalysisBase` classes
141
- are the essential building blocks for almost all MDAnalysis tools in the
142
- :mod:`MDAnalysis.analysis` module. They aim to be easily useable and
143
- extendable.
144
-
145
- :class:`AnalysisFromFunction` and the :func:`analysis_class` functions are
146
- simple wrappers that make it even easier to create fully-featured analysis
147
- tools if only the single-frame analysis function needs to be written.
148
-
149
- """
150
- import inspect
151
- import itertools
152
- import logging
153
- import warnings
154
- from functools import partial
155
- from typing import Iterable, Union
156
-
157
- import numpy as np
158
- from MDAnalysis import coordinates
159
- from MDAnalysis.core.groups import AtomGroup
160
- from MDAnalysis.lib.log import ProgressBar
161
-
162
- from .backends import (
163
- BackendDask,
164
- BackendMultiprocessing,
165
- BackendSerial,
166
- BackendBase,
167
- )
168
- from .results import Results, ResultsGroup
169
-
170
- from .streambase import StreamReaderBase
171
-
172
- logger = logging.getLogger(__name__)
173
-
174
-
175
- class AnalysisBase(object):
176
- r"""Base class for defining multi-frame analysis
177
-
178
- The class is designed as a template for creating multi-frame analyses.
179
- This class will automatically take care of setting up the trajectory
180
- reader for iterating, and it offers to show a progress meter.
181
- Computed results are stored inside the :attr:`results` attribute.
182
-
183
- To define a new Analysis, :class:`AnalysisBase` needs to be subclassed
184
- and :meth:`_single_frame` must be defined. It is also possible to define
185
- :meth:`_prepare` and :meth:`_conclude` for pre- and post-processing.
186
- All results should be stored as attributes of the
187
- :class:`MDAnalysis.analysis.results.Results` container.
188
-
189
- Parameters
190
- ----------
191
- trajectory : MDAnalysis.coordinates.base.ReaderBase
192
- A trajectory Reader
193
- verbose : bool, optional
194
- Turn on more logging and debugging
195
-
196
- Attributes
197
- ----------
198
- times: numpy.ndarray
199
- array of Timestep times. Only exists after calling
200
- :meth:`AnalysisBase.run`
201
- frames: numpy.ndarray
202
- array of Timestep frame indices. Only exists after calling
203
- :meth:`AnalysisBase.run`
204
- results: :class:`Results`
205
- results of calculation are stored after call
206
- to :meth:`AnalysisBase.run`
207
-
208
-
209
- Example
210
- -------
211
- .. code-block:: python
212
-
213
- from MDAnalysis.analysis.base import AnalysisBase
214
-
215
- class NewAnalysis(AnalysisBase):
216
- def __init__(self, atomgroup, parameter, **kwargs):
217
- super(NewAnalysis, self).__init__(atomgroup.universe.trajectory,
218
- **kwargs)
219
- self._parameter = parameter
220
- self._ag = atomgroup
221
-
222
- def _prepare(self):
223
- # OPTIONAL
224
- # Called before iteration on the trajectory has begun.
225
- # Data structures can be set up at this time
226
- self.results.example_result = []
227
-
228
- def _single_frame(self):
229
- # REQUIRED
230
- # Called after the trajectory is moved onto each new frame.
231
- # store an example_result of `some_function` for a single frame
232
- self.results.example_result.append(some_function(self._ag,
233
- self._parameter))
234
-
235
- def _conclude(self):
236
- # OPTIONAL
237
- # Called once iteration on the trajectory is finished.
238
- # Apply normalisation and averaging to results here.
239
- self.results.example_result = np.asarray(self.example_result)
240
- self.results.example_result /= np.sum(self.result)
241
-
242
- Afterwards the new analysis can be run like this
243
-
244
- .. code-block:: python
245
-
246
- import MDAnalysis as mda
247
- from MDAnalysisTests.datafiles import PSF, DCD
248
-
249
- u = mda.Universe(PSF, DCD)
250
-
251
- na = NewAnalysis(u.select_atoms('name CA'), 35)
252
- na.run(start=10, stop=20)
253
- print(na.results.example_result)
254
- # results can also be accessed by key
255
- print(na.results["example_result"])
256
-
257
-
258
- .. versionchanged:: 1.0.0
259
- Support for setting `start`, `stop`, and `step` has been removed. These
260
- should now be directly passed to :meth:`AnalysisBase.run`.
261
-
262
- .. versionchanged:: 2.0.0
263
- Added :attr:`results`
264
-
265
- .. versionchanged:: 2.8.0
266
- Added ability to run analysis in parallel using either a
267
- built-in backend (`multiprocessing` or `dask`) or a custom
268
- `backends.BackendBase` instance with an implemented `apply` method
269
- that is used to run the computations.
270
- """
271
-
272
- @classmethod
273
- def get_supported_backends(cls):
274
- """Tuple with backends supported by the core library for a given class.
275
- User can pass either one of these values as ``backend=...`` to
276
- :meth:`run()` method, or a custom object that has ``apply`` method
277
- (see documentation for :meth:`run()`):
278
-
279
- - 'serial': no parallelization
280
- - 'multiprocessing': parallelization using `multiprocessing.Pool`
281
- - 'dask': parallelization using `dask.delayed.compute()`. Requires
282
- installation of `mdanalysis[dask]`
283
-
284
- If you want to add your own backend to an existing class, pass a
285
- :class:`backends.BackendBase` subclass (see its documentation to learn
286
- how to implement it properly), and specify ``unsupported_backend=True``.
287
-
288
- Returns
289
- -------
290
- tuple
291
- names of built-in backends that can be used in :meth:`run(backend=...)`
292
-
293
-
294
- .. versionadded:: 2.8.0
295
- """
296
- return ("serial",)
297
-
298
- # class authors: override _analysis_algorithm_is_parallelizable
299
- # in derived classes and only set to True if you have confirmed
300
- # that your algorithm works reliably when parallelized with
301
- # the split-apply-combine approach (see docs)
302
- _analysis_algorithm_is_parallelizable = False
303
- _analysis_algorithm_is_streamable = True
304
-
305
- @property
306
- def parallelizable(self):
307
- """Boolean mark showing that a given class can be parallelizable with
308
- split-apply-combine procedure. Namely, if we can safely distribute
309
- :meth:`_single_frame` to multiple workers and then combine them with a
310
- proper :meth:`_conclude` call. If set to ``False``, no backends except
311
- for ``serial`` are supported.
312
-
313
- .. note:: If you want to check parallelizability of the whole class, without
314
- explicitly creating an instance of the class, see
315
- :attr:`_analysis_algorithm_is_parallelizable`. Note that you
316
- setting it to other value will break things if the algorithm
317
- behind the analysis is not trivially parallelizable.
318
-
319
-
320
- Returns
321
- -------
322
- bool
323
- if a given ``AnalysisBase`` subclass instance
324
- is parallelizable with split-apply-combine, or not
325
-
326
-
327
- .. versionadded:: 2.8.0
328
- """
329
- return self._analysis_algorithm_is_parallelizable
330
-
331
- def __init__(self, trajectory, verbose=False, **kwargs):
332
- self._streamed = False
333
- if isinstance(trajectory, StreamReaderBase):
334
- self._streamed = True
335
- self._trajectory = trajectory
336
- self._verbose = verbose
337
- self.results = Results()
338
-
339
- def _define_run_frames(
340
- self, trajectory, start=None, stop=None, step=None, frames=None
341
- ) -> Union[slice, np.ndarray]:
342
- """Defines limits for the whole run, as passed by self.run() arguments
343
-
344
- Parameters
345
- ----------
346
- trajectory : mda.Reader
347
- a trajectory Reader
348
- start : int, optional
349
- start frame of analysis, by default None
350
- stop : int, optional
351
- stop frame of analysis, by default None
352
- step : int, optional
353
- number of frames to skip between each analysed frame, by default None
354
- frames : array_like, optional
355
- array of integers or booleans to slice trajectory; cannot be
356
- combined with ``start``, ``stop``, ``step``; by default None
357
-
358
- Returns
359
- -------
360
- Union[slice, np.ndarray]
361
- Appropriate slicer for the trajectory that would give correct iteraction
362
- order via trajectory[slicer]
363
-
364
- Raises
365
- ------
366
- ValueError
367
- if *both* `frames` and at least one of ``start``, ``stop``, or ``step``
368
- is provided (i.e. set to not ``None`` value).
369
-
370
-
371
- .. versionadded:: 2.8.0
372
- """
373
- self._trajectory = trajectory
374
- if frames is not None:
375
- if not all(opt is None for opt in [start, stop, step]):
376
- raise ValueError(
377
- "start/stop/step cannot be combined with frames"
378
- )
379
- slicer = frames
380
- else:
381
- start, stop, step = trajectory.check_slice_indices(
382
- start, stop, step
383
- )
384
- slicer = slice(start, stop, step)
385
- self.start, self.stop, self.step = start, stop, step
386
- return slicer
387
-
388
- def _prepare_sliced_trajectory(self, slicer: Union[slice, np.ndarray]):
389
- """Prepares sliced trajectory for use in subsequent parallel computations:
390
- namely, assigns self._sliced_trajectory and its appropriate attributes,
391
- self.n_frames, self.frames and self.times.
392
-
393
- Parameters
394
- ----------
395
- slicer : Union[slice, np.ndarray]
396
- appropriate slicer for the trajectory
397
-
398
-
399
- .. versionadded:: 2.8.0
400
- """
401
- self._sliced_trajectory = self._trajectory[slicer]
402
-
403
- self.n_frames = len(self._sliced_trajectory)
404
- self.frames = np.zeros(self.n_frames, dtype=int)
405
- self.times = np.zeros(self.n_frames)
406
-
407
- def _setup_frames(
408
- self, trajectory, start=None, stop=None, step=None, frames=None
409
- ):
410
- """Pass a Reader object and define the desired iteration pattern
411
- through the trajectory
412
-
413
- Parameters
414
- ----------
415
- trajectory : mda.Reader
416
- A trajectory Reader
417
- start : int, optional
418
- start frame of analysis
419
- stop : int, optional
420
- stop frame of analysis
421
- step : int, optional
422
- number of frames to skip between each analysed frame
423
- frames : array_like, optional
424
- array of integers or booleans to slice trajectory; cannot be
425
- combined with ``start``, ``stop``, ``step``
426
-
427
- .. versionadded:: 2.2.0
428
-
429
- Raises
430
- ------
431
- ValueError
432
- if *both* `frames` and at least one of ``start``, ``stop``, or
433
- ``frames`` is provided (i.e., set to another value than ``None``)
434
-
435
-
436
- .. versionchanged:: 1.0.0
437
- Added .frames and .times arrays as attributes
438
-
439
- .. versionchanged:: 2.2.0
440
- Added ability to iterate through trajectory by passing a list of
441
- frame indices in the `frames` keyword argument
442
-
443
- .. versionchanged:: 2.8.0
444
- Split function into two: :meth:`_define_run_frames` and
445
- :meth:`_prepare_sliced_trajectory`: first one defines the limits
446
- for the whole run and is executed once during :meth:`run` in
447
- :meth:`_setup_frames`, second one prepares sliced trajectory for
448
- each of the workers and gets executed twice: one time in
449
- :meth:`_setup_frames` for the whole trajectory, second time in
450
- :meth:`_compute` for each of the computation groups.
451
- """
452
- slicer = self._define_run_frames(trajectory, start, stop, step, frames)
453
- self._prepare_sliced_trajectory(slicer)
454
-
455
- def _single_frame(self):
456
- """Calculate data from a single frame of trajectory
457
-
458
- Don't worry about normalising, just deal with a single frame.
459
- Attributes accessible during your calculations:
460
-
461
- - ``self._frame_index``: index of the frame in results array
462
- - ``self._ts`` -- Timestep instance
463
- - ``self._sliced_trajectory`` -- trajectory that you're iterating over
464
- - ``self.results`` -- :class:`MDAnalysis.analysis.results.Results` instance
465
- holding run results initialized in :meth:`_prepare`.
466
- """
467
- raise NotImplementedError("Only implemented in child classes")
468
-
469
- def _prepare(self):
470
- """
471
- Set things up before the analysis loop begins.
472
-
473
- Notes
474
- -----
475
- ``self.results`` is initialized already in :meth:`self.__init__` with an
476
- empty instance of :class:`MDAnalysis.analysis.results.Results` object.
477
- You can still call your attributes as if they were usual ones,
478
- ``Results`` just keeps track of that to be able to run a proper
479
- aggregation after a parallel run, if necessary.
480
- """
481
- pass # pylint: disable=unnecessary-pass
482
-
483
- def _conclude(self):
484
- """Finalize the results you've gathered.
485
-
486
- Called at the end of the :meth:`run` method to finish everything up.
487
-
488
- Notes
489
- -----
490
- Aggregation of results from individual workers happens in
491
- :meth:`self.run()`, so here you have to implement everything as if you
492
- had a non-parallel run. If you want to enable proper aggregation for
493
- parallel runs for you analysis class, implement ``self._get_aggregator``
494
- and check :mod:`MDAnalysis.analysis.results` for how to use it.
495
- """
496
- pass # pylint: disable=unnecessary-pass
497
-
498
- def _compute(
499
- self,
500
- indexed_frames: np.ndarray,
501
- verbose: bool = None,
502
- *,
503
- progressbar_kwargs={},
504
- ) -> "AnalysisBase":
505
- """Perform the calculation on a balanced slice of frames
506
- that have been setup prior to that using _setup_computation_groups()
507
-
508
- Parameters
509
- ----------
510
- indexed_frames : np.ndarray
511
- np.ndarray of (n, 2) shape, where first column is frame iteration
512
- indices and second is frame numbers
513
-
514
- verbose : bool, optional
515
- Turn on verbosity
516
-
517
- progressbar_kwargs : dict, optional
518
- ProgressBar keywords with custom parameters regarding progress bar
519
- position, etc; see :class:`MDAnalysis.lib.log.ProgressBar`
520
- for full list.
521
-
522
-
523
- .. versionadded:: 2.8.0
524
- """
525
- logger.info("Choosing frames to analyze")
526
- # if verbose unchanged, use class default
527
- verbose = (
528
- getattr(self, "_verbose", False) if verbose is None else verbose
529
- )
530
-
531
- frames = indexed_frames[:, 1]
532
-
533
- logger.info("Starting preparation")
534
- self._prepare_sliced_trajectory(slicer=frames)
535
- self._prepare()
536
- if len(frames) == 0: # if `frames` were empty in `run` or `stop=0`
537
- return self
538
-
539
- for idx, ts in enumerate(
540
- ProgressBar(
541
- self._sliced_trajectory, verbose=verbose, **progressbar_kwargs
542
- )
543
- ):
544
- self._frame_index = idx # accessed later by subclasses
545
- self._ts = ts
546
- self.frames[idx] = ts.frame
547
- self.times[idx] = ts.time
548
- self._single_frame()
549
- logger.info("Finishing up")
550
- return self
551
-
552
- def _setup_computation_groups(
553
- self,
554
- n_parts: int,
555
- start: int = None,
556
- stop: int = None,
557
- step: int = None,
558
- frames: Union[slice, np.ndarray] = None,
559
- ) -> list[np.ndarray]:
560
- """
561
- Splits the trajectory frames, defined by ``start/stop/step`` or
562
- ``frames``, into ``n_parts`` even groups, preserving their indices.
563
-
564
- Parameters
565
- ----------
566
- n_parts : int
567
- number of parts to split the workload into
568
- start : int, optional
569
- start frame
570
- stop : int, optional
571
- stop frame
572
- step : int, optional
573
- step size for analysis (1 means to read every frame)
574
- frames : array_like, optional
575
- array of integers or booleans to slice trajectory; ``frames`` can
576
- only be used *instead* of ``start``, ``stop``, and ``step``. Setting
577
- *both* ``frames`` and at least one of ``start``, ``stop``, ``step``
578
- to a non-default value will raise a :exc:`ValueError`.
579
-
580
- Raises
581
- ------
582
- ValueError
583
- if *both* ``frames`` and at least one of ``start``, ``stop``, or
584
- ``frames`` is provided (i.e., set to another value than ``None``)
585
-
586
- Returns
587
- -------
588
- computation_groups : list[np.ndarray]
589
- list of (n, 2) shaped np.ndarrays with frame indices and numbers
590
-
591
-
592
- .. versionadded:: 2.8.0
593
- """
594
- if frames is None:
595
- start, stop, step = self._trajectory.check_slice_indices(
596
- start, stop, step
597
- )
598
- used_frames = np.arange(start, stop, step)
599
- elif not all(opt is None for opt in [start, stop, step]):
600
- raise ValueError("start/stop/step cannot be combined with frames")
601
- else:
602
- used_frames = frames
603
-
604
- if all(isinstance(obj, bool) for obj in used_frames):
605
- arange = np.arange(len(used_frames))
606
- used_frames = arange[used_frames]
607
-
608
- # similar to list(enumerate(frames))
609
- enumerated_frames = np.vstack(
610
- [np.arange(len(used_frames)), used_frames]
611
- ).T
612
- if len(enumerated_frames) == 0:
613
- return [np.empty((0, 2), dtype=np.int64)]
614
- elif len(enumerated_frames) < n_parts:
615
- # Issue #4685
616
- n_parts = len(enumerated_frames)
617
- warnings.warn(
618
- f"Set `n_parts` to {n_parts} to match the total "
619
- "number of frames being analyzed"
620
- )
621
-
622
- return np.array_split(enumerated_frames, n_parts)
623
-
624
- def _configure_backend(
625
- self,
626
- backend: Union[str, BackendBase],
627
- n_workers: int,
628
- unsupported_backend: bool = False,
629
- ) -> BackendBase:
630
- """Matches a passed backend string value with class attributes
631
- :attr:`parallelizable` and :meth:`get_supported_backends()`
632
- to check if downstream calculations can be performed.
633
-
634
- Parameters
635
- ----------
636
- backend : Union[str, BackendBase]
637
- backend to be used:
638
- - ``str`` is matched to a builtin backend (one of "serial",
639
- "multiprocessing" and "dask")
640
- - ``BackendBase`` subclass is checked for the presence of
641
- an :meth:`apply` method
642
- n_workers : int
643
- positive integer with number of workers (processes, in case of
644
- built-in backends) to split the work between
645
- unsupported_backend : bool, optional
646
- if you want to run your custom backend on a parallelizable class
647
- that has not been tested by developers, by default ``False``
648
-
649
- Returns
650
- -------
651
- BackendBase
652
- instance of a ``BackendBase`` class that will be used for computations
653
-
654
- Raises
655
- ------
656
- ValueError
657
- if :attr:`parallelizable` is set to ``False`` but backend is
658
- not ``serial``
659
- ValueError
660
- if :attr:`parallelizable` is ``True`` and custom backend instance is used
661
- without specifying ``unsupported_backend=True``
662
- ValueError
663
- if your trajectory has associated parallelizable transformations
664
- but backend is not serial
665
- ValueError
666
- if ``n_workers`` was specified twice -- in the run() method and durin
667
- ``__init__`` of a custom backend
668
- ValueError
669
- if your backend object instance doesn't have an ``apply`` method
670
-
671
-
672
- .. versionadded:: 2.8.0
673
- """
674
- builtin_backends = {
675
- "serial": BackendSerial,
676
- "multiprocessing": BackendMultiprocessing,
677
- "dask": BackendDask,
678
- }
679
-
680
- backend_class = builtin_backends.get(backend, backend)
681
- supported_backend_classes = [
682
- builtin_backends.get(b) for b in self.get_supported_backends()
683
- ]
684
-
685
- # check for serial-only classes
686
- if not self.parallelizable and backend_class is not BackendSerial:
687
- raise ValueError(f"Can not parallelize class {self.__class__}")
688
-
689
- # make sure user enabled 'unsupported_backend=True' for custom classes
690
- if (
691
- not unsupported_backend
692
- and self.parallelizable
693
- and backend_class not in supported_backend_classes
694
- ):
695
- raise ValueError(
696
- (
697
- f"Must specify 'unsupported_backend=True'"
698
- f"if you want to use a custom {backend_class=} for {self.__class__}"
699
- )
700
- )
701
-
702
- # check for the presence of parallelizable transformations
703
- if backend_class is not BackendSerial and any(
704
- not t.parallelizable for t in self._trajectory.transformations
705
- ):
706
- raise ValueError(
707
- (
708
- "Trajectory should not have "
709
- "associated unparallelizable transformations"
710
- )
711
- )
712
-
713
- # conclude mapping from string to backend class if it's a builtin backend
714
- if isinstance(backend, str):
715
- return backend_class(n_workers=n_workers)
716
-
717
- # make sure we haven't specified n_workers twice
718
- if (
719
- isinstance(backend, BackendBase)
720
- and n_workers is not None
721
- and hasattr(backend, "n_workers")
722
- and backend.n_workers != n_workers
723
- ):
724
- raise ValueError(
725
- (
726
- f"n_workers specified twice: in {backend.n_workers=}"
727
- f"and in run({n_workers=}). Remove it from run()"
728
- )
729
- )
730
-
731
- # or pass along an instance of the class itself
732
- # after ensuring it has apply method
733
- if not isinstance(backend, BackendBase) or not hasattr(
734
- backend, "apply"
735
- ):
736
- raise ValueError(
737
- (
738
- f"{backend=} is invalid: should have 'apply' method "
739
- "and be instance of MDAnalysis.analysis.backends.BackendBase"
740
- )
741
- )
742
- return backend
743
-
744
- def run(
745
- self,
746
- start: int = None,
747
- stop: int = None,
748
- step: int = None,
749
- frames: Iterable = None,
750
- verbose: bool = None,
751
- n_workers: int = None,
752
- n_parts: int = None,
753
- backend: Union[str, BackendBase] = None,
754
- *,
755
- unsupported_backend: bool = False,
756
- progressbar_kwargs=None,
757
- ):
758
- """Perform the calculation
759
-
760
- Parameters
761
- ----------
762
- start : int, optional
763
- start frame of analysis
764
- stop : int, optional
765
- stop frame of analysis
766
- step : int, optional
767
- number of frames to skip between each analysed frame
768
- frames : array_like, optional
769
- array of integers or booleans to slice trajectory; ``frames`` can
770
- only be used *instead* of ``start``, ``stop``, and ``step``. Setting
771
- *both* ``frames`` and at least one of ``start``, ``stop``, ``step``
772
- to a non-default value will raise a :exc:`ValueError`.
773
-
774
- .. versionadded:: 2.2.0
775
- verbose : bool, optional
776
- Turn on verbosity
777
-
778
- progressbar_kwargs : dict, optional
779
- ProgressBar keywords with custom parameters regarding progress bar
780
- position, etc; see :class:`MDAnalysis.lib.log.ProgressBar`
781
- for full list. Available only for ``backend='serial'``
782
- backend : Union[str, BackendBase], optional
783
- By default, performs calculations in a serial fashion.
784
- Otherwise, user can choose a backend: ``str`` is matched to a
785
- builtin backend (one of ``serial``, ``multiprocessing`` and
786
- ``dask``), or a :class:`MDAnalysis.analysis.results.BackendBase`
787
- subclass.
788
-
789
- .. versionadded:: 2.8.0
790
- n_workers : int
791
- positive integer with number of workers (processes, in case of
792
- built-in backends) to split the work between
793
-
794
- .. versionadded:: 2.8.0
795
- n_parts : int, optional
796
- number of parts to split computations across. Can be more than
797
- number of workers.
798
-
799
- .. versionadded:: 2.8.0
800
- unsupported_backend : bool, optional
801
- if you want to run your custom backend on a parallelizable class
802
- that has not been tested by developers, by default False
803
-
804
- .. versionadded:: 2.8.0
805
-
806
-
807
- .. versionchanged:: 2.2.0
808
- Added ability to analyze arbitrary frames by passing a list of
809
- frame indices in the `frames` keyword argument.
810
-
811
- .. versionchanged:: 2.5.0
812
- Add `progressbar_kwargs` parameter,
813
- allowing to modify description, position etc of tqdm progressbars
814
-
815
- .. versionchanged:: 2.8.0
816
- Introduced ``backend``, ``n_workers``, ``n_parts`` and
817
- ``unsupported_backend`` keywords, and refactored the method logic to
818
- support parallelizable execution.
819
- """
820
- # default to serial execution
821
- backend = "serial" if backend is None else backend
822
-
823
- progressbar_kwargs = (
824
- {} if progressbar_kwargs is None else progressbar_kwargs
825
- )
826
- if (progressbar_kwargs or verbose) and not (
827
- backend == "serial" or isinstance(backend, BackendSerial)
828
- ):
829
- raise ValueError(
830
- "Can not display progressbar with non-serial backend"
831
- )
832
-
833
- if self._streamed:
834
- if backend != "serial":
835
- raise ValueError(
836
- "Can not run streamed analysis with non-serial backend"
837
- )
838
- if frames is not None:
839
- raise ValueError(
840
- "Can not run streamed analysis with frames argument"
841
- )
842
- if start is not None or stop is not None:
843
- raise ValueError(
844
- "Can not run streamed analysis with start/stop arguments"
845
- )
846
- self._streamed_run(
847
- step=step,
848
- verbose=verbose,
849
- progressbar_kwargs=progressbar_kwargs,
850
- )
851
- return self
852
-
853
- # if number of workers not specified, try getting the number from
854
- # the backend instance if possible, or set to 1
855
- if n_workers is None:
856
- n_workers = (
857
- backend.n_workers
858
- if isinstance(backend, BackendBase)
859
- and hasattr(backend, "n_workers")
860
- else 1
861
- )
862
-
863
- # set n_parts and check that is has a reasonable value
864
- n_parts = n_workers if n_parts is None else n_parts
865
-
866
- # do this as early as possible to check client parameters
867
- # before any computations occur
868
- executor = self._configure_backend(
869
- backend=backend,
870
- n_workers=n_workers,
871
- unsupported_backend=unsupported_backend,
872
- )
873
- if (
874
- hasattr(executor, "n_workers") and n_parts < executor.n_workers
875
- ): # using executor's value here for non-default executors
876
- warnings.warn(
877
- (
878
- f"Analysis not making use of all workers: "
879
- f"{executor.n_workers=} is greater than {n_parts=}"
880
- )
881
- )
882
-
883
- # start preparing the run
884
- worker_func = partial(
885
- self._compute,
886
- progressbar_kwargs=progressbar_kwargs,
887
- verbose=verbose,
888
- )
889
- self._setup_frames(
890
- trajectory=self._trajectory,
891
- start=start,
892
- stop=stop,
893
- step=step,
894
- frames=frames,
895
- )
896
-
897
- computation_groups = self._setup_computation_groups(
898
- start=start, stop=stop, step=step, frames=frames, n_parts=n_parts
899
- )
900
-
901
- # get all results from workers in other processes.
902
- # we need `AnalysisBase` classes
903
- # since they hold `frames`, `times` and `results` attributes
904
- remote_objects: list["AnalysisBase"] = executor.apply(
905
- worker_func, computation_groups
906
- )
907
- self.frames = np.hstack([obj.frames for obj in remote_objects])
908
- self.times = np.hstack([obj.times for obj in remote_objects])
909
-
910
- # aggregate results from results obtained in remote workers
911
- remote_results = [obj.results for obj in remote_objects]
912
- results_aggregator = self._get_aggregator()
913
- self.results = results_aggregator.merge(remote_results)
914
-
915
- self._conclude()
916
- return self
917
-
918
- def _get_aggregator(self) -> ResultsGroup:
919
- """Returns a default aggregator that takes entire results
920
- if there is a single object, and raises ValueError otherwise
921
-
922
- Returns
923
- -------
924
- ResultsGroup
925
- aggregating object
926
-
927
-
928
- .. versionadded:: 2.8.0
929
- """
930
- return ResultsGroup(lookup=None)
931
-
932
- def _streamed_run(self, step=None, verbose=False, progressbar_kwargs={}):
933
-
934
- self._sliced_trajectory = (
935
- self._trajectory[::step] if step is not None else self._trajectory
936
- )
937
- self._prepare()
938
- self.frames = []
939
- self.times = []
940
-
941
- for idx, ts in enumerate(
942
- ProgressBar(
943
- self._sliced_trajectory,
944
- verbose=verbose,
945
- total=float("inf"),
946
- **progressbar_kwargs,
947
- )
948
- ):
949
- self._frame_index = idx # accessed later by subclasses
950
- self._ts = ts
951
- self.frames.append(ts.frame)
952
- self.times.append(ts.time)
953
- self._single_frame()
954
-
955
- logger.info("Finishing up")
956
- self.frames = np.array(self.frames)
957
- self.times = np.array(self.times)
958
- self._conclude()
959
-
960
-
961
- class StackableAnalysis(AnalysisBase):
962
-
963
- def __init__(self, trajectory, analyses, verbose=False, **kwargs):
964
- super().__init__(trajectory, verbose=verbose, **kwargs)
965
- self._analyses = analyses
966
- if len(self._analyses) == 0:
967
- raise ValueError("No analyses provided")
968
-
969
- for analysis in self._analyses:
970
- if analysis._trajectory is not self._trajectory:
971
- raise ValueError("All analyses must use the same trajectory")
972
-
973
- def _compute(self, indexed_frames, verbose=None, progressbar_kwargs={}):
974
- logger.info("Choosing frames to analyze")
975
- # if verbose unchanged, use class default
976
- verbose = (
977
- getattr(self, "_verbose", False) if verbose is None else verbose
978
- )
979
-
980
- frames = indexed_frames[:, 1]
981
-
982
- logger.info("Starting preparation")
983
- self._prepare_sliced_trajectory(slicer=frames)
984
- for analysis in self._analyses:
985
- analysis.frames = self.frames
986
- analysis.times = self.times
987
- self._prepare()
988
-
989
- if len(frames) == 0: # if `frames` were empty in `run` or `stop=0`
990
- return self
991
-
992
- for idx, ts in enumerate(
993
- ProgressBar(
994
- self._sliced_trajectory, verbose=verbose, **progressbar_kwargs
995
- )
996
- ):
997
-
998
- self._frame_index = idx # accessed later by subclasses
999
- self._ts = ts
1000
- self.frames[idx] = ts.frame
1001
- self.times[idx] = ts.time
1002
- for analysis in self._analyses:
1003
- analysis._ts = ts
1004
- analysis._frame_index = self._frame_index
1005
- self._single_frame()
1006
-
1007
- self._conclude()
1008
-
1009
- logger.info("Finishing up")
1010
- return self
1011
-
1012
- def _single_frame(self):
1013
- for analysis in self._analyses:
1014
- analysis._single_frame()
1015
-
1016
- def _prepare(self):
1017
- for analysis in self._analyses:
1018
- analysis._prepare()
1019
-
1020
- def _conclude(self):
1021
- for analysis in self._analyses:
1022
- analysis._conclude()
1023
-
1024
- def _streamed_run(self, step=None, verbose=False, progressbar_kwargs={}):
1025
- self._sliced_trajectory = (
1026
- self._trajectory[::step] if step is not None else self._trajectory
1027
- )
1028
- self._prepare()
1029
- self.frames = []
1030
- self.times = []
1031
-
1032
- for analysis in self._analyses:
1033
- analysis.frames = self.frames
1034
- analysis.times = self.times
1035
-
1036
- for idx, ts in enumerate(
1037
- ProgressBar(
1038
- self._sliced_trajectory,
1039
- verbose=verbose,
1040
- total=float("inf"),
1041
- **progressbar_kwargs,
1042
- )
1043
- ):
1044
- self._frame_index = idx # accessed later by subclasses
1045
- self._ts = ts
1046
- self.frames.append(ts.frame)
1047
- self.times.append(ts.time)
1048
- for analysis in self._analyses:
1049
- analysis._ts = ts
1050
- analysis._frame_index = self._frame_index
1051
- self._single_frame()
1052
-
1053
- logger.info("Finishing up")
1054
- self.frames = np.array(self.frames)
1055
- self.times = np.array(self.times)
1056
- self._conclude()