asyncmd 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- asyncmd/__init__.py +7 -0
- asyncmd/_config.py +16 -9
- asyncmd/_version.py +22 -36
- asyncmd/config.py +66 -33
- asyncmd/gromacs/__init__.py +3 -0
- asyncmd/gromacs/mdconfig.py +7 -17
- asyncmd/gromacs/mdengine.py +448 -424
- asyncmd/gromacs/utils.py +40 -23
- asyncmd/mdconfig.py +55 -165
- asyncmd/mdengine.py +120 -39
- asyncmd/slurm.py +210 -77
- asyncmd/tools.py +284 -5
- asyncmd/trajectory/__init__.py +19 -1
- asyncmd/trajectory/convert.py +133 -97
- asyncmd/trajectory/functionwrapper.py +211 -159
- asyncmd/trajectory/propagate.py +308 -260
- asyncmd/trajectory/trajectory.py +498 -755
- asyncmd/trajectory/trajectory_cache.py +365 -0
- asyncmd/utils.py +18 -13
- asyncmd-0.4.0.dist-info/METADATA +90 -0
- asyncmd-0.4.0.dist-info/RECORD +24 -0
- {asyncmd-0.3.2.dist-info → asyncmd-0.4.0.dist-info}/WHEEL +1 -1
- asyncmd-0.3.2.dist-info/METADATA +0 -179
- asyncmd-0.3.2.dist-info/RECORD +0 -23
- {asyncmd-0.3.2.dist-info → asyncmd-0.4.0.dist-info/licenses}/LICENSE +0 -0
- {asyncmd-0.3.2.dist-info → asyncmd-0.4.0.dist-info}/top_level.txt +0 -0
asyncmd/trajectory/trajectory.py
CHANGED
@@ -12,20 +12,36 @@
|
|
12
12
|
#
|
13
13
|
# You should have received a copy of the GNU General Public License
|
14
14
|
# along with asyncmd. If not, see <https://www.gnu.org/licenses/>.
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
15
|
+
"""
|
16
|
+
This module contains the implementation the asyncmd.Trajectory class.
|
17
|
+
|
18
|
+
It also contains some helper function related to the global Trajectory registry
|
19
|
+
used for trajectory function value caching.
|
20
|
+
The actual :class:`TrajectoryFunctionValueCache` classes can be found in the
|
21
|
+
``trajectory_cache`` module.
|
22
|
+
"""
|
19
23
|
import asyncio
|
24
|
+
import collections
|
25
|
+
import dataclasses
|
20
26
|
import hashlib
|
27
|
+
import io
|
21
28
|
import logging
|
22
|
-
import
|
23
|
-
import
|
24
|
-
import numpy as np
|
25
|
-
import MDAnalysis as mda
|
29
|
+
import os
|
30
|
+
import typing
|
26
31
|
|
32
|
+
import MDAnalysis as mda
|
33
|
+
import numpy as np
|
27
34
|
|
28
35
|
from .._config import _GLOBALS
|
36
|
+
from .trajectory_cache import (TrajectoryFunctionValueCache,
|
37
|
+
TrajectoryFunctionValueCacheInH5PY,
|
38
|
+
TrajectoryFunctionValueCacheInMemory,
|
39
|
+
TrajectoryFunctionValueCacheInNPZ,
|
40
|
+
ValuesAlreadyStoredError)
|
41
|
+
|
42
|
+
if typing.TYPE_CHECKING: # pragma: no cover
|
43
|
+
# only import for typing to avoid circular imports
|
44
|
+
from .functionwrapper import TrajectoryFunctionWrapper
|
29
45
|
|
30
46
|
|
31
47
|
logger = logging.getLogger(__name__)
|
@@ -34,20 +50,58 @@ logger = logging.getLogger(__name__)
|
|
34
50
|
# dictionary in which we keep track of trajectory objects
|
35
51
|
# we use it to always return the *same* object for the same trajectory (by hash)
|
36
52
|
# this makes it easy to ensure that we never calculate CV functions twice
|
37
|
-
_TRAJECTORIES_BY_HASH = {}
|
53
|
+
_TRAJECTORIES_BY_HASH: dict[int, "Trajectory"] = {}
|
54
|
+
|
55
|
+
|
56
|
+
def clear_all_cache_values_for_all_trajectories() -> None:
|
57
|
+
"""
|
58
|
+
Clear all function values cached for each :class:`Trajectory` currently in existence.
|
59
|
+
|
60
|
+
For file-based caches, this also removes the associated cache files.
|
61
|
+
"""
|
62
|
+
for traj in _TRAJECTORIES_BY_HASH.values():
|
63
|
+
traj.clear_all_cache_values()
|
64
|
+
|
65
|
+
|
66
|
+
def _update_cache_type_for_all_trajectories(copy_content: bool = True,
|
67
|
+
clear_old_cache: bool = False,
|
68
|
+
) -> None:
|
69
|
+
"""
|
70
|
+
Update the cache type for each :class:`Trajectory` currently in existence.
|
71
|
+
|
72
|
+
By default the content of the current caches is copied to the new caches.
|
73
|
+
This will only have an effect if the globally set ``cache_type`` differs
|
74
|
+
from what each `Trajectory` currently uses.
|
75
|
+
See :func:`asyncmd.config.set_trajectory_cache_type` to set the ``cache_type``.
|
76
|
+
To clear the old/previously set caches (after copying their values), pass
|
77
|
+
``clear_old_cache=True``.
|
78
|
+
|
79
|
+
Parameters
|
80
|
+
----------
|
81
|
+
copy_content : bool, optional
|
82
|
+
Whether to copy the current cache content to the new cache,
|
83
|
+
by default True
|
84
|
+
clear_old_cache : bool, optional
|
85
|
+
Whether to clear the old/previously set cache, by default False.
|
86
|
+
"""
|
87
|
+
for traj in _TRAJECTORIES_BY_HASH.values():
|
88
|
+
traj.update_cache_type(copy_content=copy_content,
|
89
|
+
clear_old_cache=clear_old_cache,
|
90
|
+
)
|
38
91
|
|
39
92
|
|
40
93
|
def _forget_all_trajectories() -> None:
|
41
94
|
"""
|
42
95
|
Forget about the existence of all :class:`Trajectory` objects.
|
43
96
|
|
44
|
-
This will result in new :class:`Trajectory` objects
|
45
|
-
the same underlying trajectory_files.
|
46
|
-
results in
|
97
|
+
This will result in new :class:`Trajectory` objects being created even for
|
98
|
+
the same underlying trajectory_files. Usually you do not want this as it
|
99
|
+
results in unnecessary calculations if the same wrapped and cached function
|
47
100
|
is applied to both objects. This function exists as a hidden function as it
|
48
101
|
is used in the tests and it might be helpful under certain circumstances.
|
49
102
|
Use only if you know why you are using it!
|
50
103
|
"""
|
104
|
+
# pylint: disable-next=global-variable-not-assigned
|
51
105
|
global _TRAJECTORIES_BY_HASH
|
52
106
|
all_keys = set(_TRAJECTORIES_BY_HASH.keys())
|
53
107
|
for key in all_keys:
|
@@ -58,9 +112,9 @@ def _forget_trajectory(traj_hash: int) -> None:
|
|
58
112
|
"""
|
59
113
|
Forget about the existence of a given :class:`Trajectory` object.
|
60
114
|
|
61
|
-
This will result in new :class:`Trajectory` objects
|
62
|
-
the same underlying trajectory_files.
|
63
|
-
results in
|
115
|
+
This will result in new :class:`Trajectory` objects being created even for
|
116
|
+
the same underlying trajectory_files. Usually you do not want this as it
|
117
|
+
results in unnecessary calculations if the same wrapped and cached function
|
64
118
|
is applied to both objects. This function exists as a hidden function as it
|
65
119
|
is used when deleting a :class:`Trajectory` (i.e. calling its `__del__`
|
66
120
|
method) and it might be helpful under certain circumstances. Use only if
|
@@ -71,6 +125,7 @@ def _forget_trajectory(traj_hash: int) -> None:
|
|
71
125
|
traj_hash : int
|
72
126
|
The hash of the :class:`Trajectory` to forget about.
|
73
127
|
"""
|
128
|
+
# pylint: disable-next=global-variable-not-assigned
|
74
129
|
global _TRAJECTORIES_BY_HASH
|
75
130
|
try:
|
76
131
|
del _TRAJECTORIES_BY_HASH[traj_hash]
|
@@ -79,35 +134,89 @@ def _forget_trajectory(traj_hash: int) -> None:
|
|
79
134
|
pass
|
80
135
|
|
81
136
|
|
137
|
+
@dataclasses.dataclass(frozen=True)
|
138
|
+
class _TrajectoryPropertyData:
|
139
|
+
"""
|
140
|
+
Dataclass to store/bundle all information that is read from the trajectory
|
141
|
+
and made available as :class:`Trajectory` properties.
|
142
|
+
|
143
|
+
All data are immutable (we use ``frozen=True``), because the data are read
|
144
|
+
from the underlying trajectory file(s) only once and if they change the hash
|
145
|
+
(i.e. the :class:`Trajectory` object the data is tied to) will also change.
|
146
|
+
"""
|
147
|
+
length: int
|
148
|
+
dt: float
|
149
|
+
first_time: float
|
150
|
+
last_time: float
|
151
|
+
first_step: int | None
|
152
|
+
last_step: int | None
|
153
|
+
|
154
|
+
|
155
|
+
@dataclasses.dataclass(frozen=True)
|
156
|
+
class _TrajectoryFileData:
|
157
|
+
"""
|
158
|
+
Dataclass to store/bundle all information related to the file-paths and
|
159
|
+
trajectory hash for :class:`Trajectory` objects.
|
160
|
+
|
161
|
+
All of this is set in :meth:`Trajectory.__new__` and must not be overridden
|
162
|
+
or set again in :meth:`Trajectory.__init__`!
|
163
|
+
"""
|
164
|
+
trajectory_files: list[str]
|
165
|
+
structure_file: str
|
166
|
+
workdir: str
|
167
|
+
trajectory_hash: int
|
168
|
+
|
169
|
+
|
82
170
|
class Trajectory:
|
83
171
|
"""
|
84
172
|
Represent a trajectory.
|
85
173
|
|
86
174
|
Keep track of the paths of the trajectory and the structure files.
|
87
175
|
Caches values for (wrapped) functions acting on the trajectory.
|
88
|
-
Supports pickling and unpickling with the cached values restored,
|
89
|
-
|
176
|
+
Supports pickling and unpickling with the cached values restored, if a
|
177
|
+
non-persistent cache is used when pickling, the values will be written to a
|
178
|
+
hidden numpy npz file next to the trajectory and will be read at unpickling.
|
90
179
|
Supports equality checks with other :class:`Trajectory`.
|
91
180
|
Also makes available (and caches) a number of useful attributes, e.g.
|
92
|
-
``first_step`` and ``last_step`` (the first and last
|
93
|
-
the trajectory), ``dt``, ``first_time``, ``last_time``,
|
94
|
-
|
181
|
+
``first_step`` and ``last_step`` (the first and last integration step in
|
182
|
+
the trajectory), ``dt``, ``first_time``, ``last_time``,and ``length`` (in
|
183
|
+
frames). All properties are read-only (for the simple reason that they
|
184
|
+
depend only on the underlying trajectory files).
|
185
|
+
A special case is ``nstout``, the output frequency in integration steps.
|
186
|
+
Since it can not be reliably read/inferred from the trajectory files alone,
|
187
|
+
it can be set by the user (at initialization or later via the property).
|
95
188
|
|
96
189
|
Notes
|
97
190
|
-----
|
98
191
|
``first_step`` and ``last_step`` is only useful for trajectories that come
|
99
192
|
directly from a :class:`asyncmd.mdengine.MDEngine`.
|
100
|
-
As soon as the
|
193
|
+
As soon as the trajectory has been concatenated using MDAnalysis (e.g. with
|
101
194
|
the ``TrajectoryConcatenator``) the step information is just the frame
|
102
195
|
number in the trajectory part that became first/last frame in the
|
103
196
|
concatenated trajectory.
|
104
197
|
"""
|
105
198
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
199
|
+
_CACHE_CLASS_FOR_TYPE: dict[str, type[TrajectoryFunctionValueCache]] = {
|
200
|
+
"h5py": TrajectoryFunctionValueCacheInH5PY,
|
201
|
+
"npz": TrajectoryFunctionValueCacheInNPZ,
|
202
|
+
"memory": TrajectoryFunctionValueCacheInMemory,
|
203
|
+
}
|
204
|
+
_file_data: _TrajectoryFileData # type annotation for stuff we set in __new__
|
205
|
+
|
206
|
+
# Note: We want __init__ and __new__ to have the same call signature
|
207
|
+
# (at least for users, __new__ takes `old_workdir`...).
|
208
|
+
# So we will have unused arguments in __init__ (for the stuff we set
|
209
|
+
# in __new__) and we will have unused arguments in __new__ (for the
|
210
|
+
# stuff we set in __init__).
|
211
|
+
# The __new__/__init__ implementation is needed to get the global
|
212
|
+
# trajectory registry to work (to make each traj unique for the same
|
213
|
+
# hash), but pylint can not know that, so
|
214
|
+
def __init__(
|
215
|
+
self,
|
216
|
+
# pylint: disable-next=unused-argument
|
217
|
+
trajectory_files: list[str] | str, structure_file: str,
|
218
|
+
nstout: int | None = None,
|
219
|
+
) -> None:
|
111
220
|
"""
|
112
221
|
Initialize a :class:`Trajectory`.
|
113
222
|
|
@@ -121,12 +230,6 @@ class Trajectory:
|
|
121
230
|
nstout : int or None, optional
|
122
231
|
The output frequency used when creating the trajectory,
|
123
232
|
by default None
|
124
|
-
cache_type : str or None, optional
|
125
|
-
The cache type for the CV values cached for this trajectory,
|
126
|
-
must be one of 'h5py', 'npz' or 'memory'.
|
127
|
-
If None we will use 'h5py' if a h5py cache has been registered and
|
128
|
-
if not fallback to 'npz'.
|
129
|
-
See also the ``asyncmd.config.register_h5py_cache()`` function.
|
130
233
|
|
131
234
|
Raises
|
132
235
|
------
|
@@ -134,65 +237,35 @@ class Trajectory:
|
|
134
237
|
If the ``trajectory_files`` or the ``structure_file`` are not
|
135
238
|
accessible.
|
136
239
|
"""
|
137
|
-
# NOTE:
|
138
|
-
#
|
139
|
-
#
|
140
|
-
#
|
141
|
-
#
|
142
|
-
#
|
143
|
-
#
|
144
|
-
#
|
145
|
-
#
|
146
|
-
# # value is of same type as default so set it
|
147
|
-
# setattr(self, kwarg, value)
|
148
|
-
# else:
|
149
|
-
# logger.warn(f"Setting attribute {kwarg} with "
|
150
|
-
# + f"mismatching type ({type(value)}). "
|
151
|
-
# + f" Default type is {type(cval)}."
|
152
|
-
# )
|
153
|
-
# else:
|
154
|
-
# # not previously defined, so warn that we ignore it
|
155
|
-
# logger.warning("Ignoring unknown keyword-argument %s.", kwarg)
|
156
|
-
# NOTE: self._trajectory_files is set in __new__ because we otherwise
|
157
|
-
# would sanitize the files twice, but we need to check in __new__
|
158
|
-
# to make pickling work
|
159
|
-
# self._structure_file is also set in __new__ together with the
|
160
|
-
# trajectory_files as we also sanitize its path
|
161
|
-
# self._traj_hash and self._workdir are also set by __new__!
|
162
|
-
# self._trajectory_files
|
163
|
-
# self._structure_file
|
164
|
-
# self._workdir
|
165
|
-
# self._traj_hash
|
240
|
+
# NOTE: We expect that anything which works for mdanalysis as
|
241
|
+
# traj and struct should also work here as traj and struct
|
242
|
+
# NOTE: self._file_data is set in __new__ because we otherwise would:
|
243
|
+
# - calculate the hash twice (need it in __new__),
|
244
|
+
# - sanitize the files twice, but we need to check in __new__
|
245
|
+
# to make pickling work
|
246
|
+
# The _TrajectoryFileData dataclass therefore contains everything
|
247
|
+
# (and only those things) we need in __new__
|
248
|
+
# self._file_data
|
166
249
|
# properties
|
167
250
|
self.nstout = nstout # use the setter to make basic sanity checks
|
168
|
-
|
169
|
-
self.
|
170
|
-
|
171
|
-
self.
|
172
|
-
self._first_time = None
|
173
|
-
self._last_time = None
|
174
|
-
# stuff for caching of functions applied to this traj
|
175
|
-
self._memory_cache = None
|
176
|
-
self._npz_cache = None
|
177
|
-
self._h5py_cache = None
|
178
|
-
self._cache_type = None
|
179
|
-
# remember if we use the global default value,
|
180
|
-
# if yes we use the (possibly changed) global default when unpickling
|
181
|
-
self._using_default_cache_type = True
|
182
|
-
# use our property logic for checking the value
|
183
|
-
# (Note that self._trajectory_hash has already been set by __new__)
|
184
|
-
self.cache_type = cache_type
|
251
|
+
# store for all (immutable) properties we read from the trajectory files
|
252
|
+
self._property_data: None | _TrajectoryPropertyData = None
|
253
|
+
# setup cache for functions applied to this traj
|
254
|
+
self._cache = self._setup_cache()
|
185
255
|
# Locking mechanism such that only one application of a specific
|
186
256
|
# CV func can run at any given time on this trajectory
|
187
|
-
self._semaphores_by_func_id
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
257
|
+
self._semaphores_by_func_id: collections.defaultdict[
|
258
|
+
str,
|
259
|
+
asyncio.BoundedSemaphore,
|
260
|
+
] = collections.defaultdict(asyncio.BoundedSemaphore)
|
261
|
+
|
262
|
+
def __new__(cls,
|
263
|
+
trajectory_files: list[str] | str, structure_file: str,
|
264
|
+
# (see above note for __init__ why its ok to ignore this)
|
265
|
+
# pylint: disable-next:unused-argument
|
266
|
+
nstout: int | None = None,
|
267
|
+
**kwargs) -> "Trajectory":
|
268
|
+
# pylint: disable-next=global-variable-not-assigned
|
196
269
|
global _TRAJECTORIES_BY_HASH # our global traj registry
|
197
270
|
# see if old_workdir is given to sanitize file paths
|
198
271
|
old_workdir = kwargs.get("old_workdir", None)
|
@@ -208,13 +281,6 @@ class Trajectory:
|
|
208
281
|
try:
|
209
282
|
# see if we (i.e. a traj with the same hash) are already existing
|
210
283
|
other_traj = _TRAJECTORIES_BY_HASH[traj_hash]
|
211
|
-
# if yes return 'ourself'
|
212
|
-
# (but make sure that the filepaths match even after a potential
|
213
|
-
# change of workdir)
|
214
|
-
other_traj._trajectory_files = trajectory_files
|
215
|
-
other_traj._structure_file = structure_file
|
216
|
-
other_traj._workdir = current_workdir
|
217
|
-
return other_traj
|
218
284
|
except KeyError:
|
219
285
|
# not yet in there, so need to create us
|
220
286
|
# we just create cls so that we will be "created" by init or
|
@@ -222,100 +288,144 @@ class Trajectory:
|
|
222
288
|
# NOTE: we need to make sure that every attribute we set
|
223
289
|
# below is not overwritten by setstate and/or init!
|
224
290
|
obj = super().__new__(cls)
|
225
|
-
#
|
226
|
-
|
227
|
-
# and set self._trajectory_files so we dont sanitize twice
|
228
|
-
obj._trajectory_files = trajectory_files
|
229
|
-
# also set self._structure_file
|
230
|
-
obj._structure_file = structure_file
|
231
|
-
# and set self._workdir to the new value
|
291
|
+
# we directly set hash, files and friends so we dont recalculate
|
292
|
+
# the hash and dont sanitize the file paths twice
|
232
293
|
# Note:
|
233
294
|
# we remember the current workdir to be able to unpickle as long as
|
234
295
|
# either the relpath between traj and old/new workdir does not change
|
235
296
|
# or the trajectory did not change its location but we changed workdir
|
236
297
|
# (we need the workdir only for the second option)
|
237
|
-
obj.
|
298
|
+
obj._file_data = _TrajectoryFileData(
|
299
|
+
trajectory_files=trajectory_files,
|
300
|
+
structure_file=structure_file,
|
301
|
+
workdir=current_workdir,
|
302
|
+
trajectory_hash=traj_hash,
|
303
|
+
)
|
238
304
|
# and add us to the global trajectory registry
|
239
305
|
_TRAJECTORIES_BY_HASH[traj_hash] = obj
|
240
306
|
return obj
|
241
307
|
|
242
|
-
|
243
|
-
#
|
244
|
-
#
|
245
|
-
#
|
246
|
-
|
247
|
-
|
308
|
+
# we already exist (a traj object for the same traj files/hash),
|
309
|
+
# so return 'ourself'
|
310
|
+
# (but make sure that the filepaths match even after a potential
|
311
|
+
# change of workdir)
|
312
|
+
other_traj._file_data = _TrajectoryFileData(
|
313
|
+
trajectory_files=trajectory_files,
|
314
|
+
structure_file=structure_file,
|
315
|
+
workdir=current_workdir,
|
316
|
+
trajectory_hash=traj_hash,
|
317
|
+
)
|
318
|
+
return other_traj
|
319
|
+
|
320
|
+
# def __del__(self):
|
321
|
+
# NOTE: Running 'del traj' does not call this function,
|
322
|
+
# it only decreases the reference count by one.
|
323
|
+
# But since we still have the traj in the traj by hash dictionary
|
324
|
+
# i.e. we still have a reference, it will not call __del__ which
|
325
|
+
# is only called when the reference count reaches zero.
|
326
|
+
# So implementing it is quite pointless and misleading!
|
248
327
|
# _forget_trajectory(traj_hash=self.trajectory_hash)
|
249
328
|
|
250
329
|
@classmethod
|
251
|
-
def _sanitize_file_paths(cls,
|
252
|
-
trajectory_files:
|
330
|
+
def _sanitize_file_paths(cls, *,
|
331
|
+
trajectory_files: list[str] | str,
|
253
332
|
structure_file: str,
|
254
|
-
current_workdir:
|
255
|
-
old_workdir:
|
256
|
-
) ->
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
333
|
+
current_workdir: str,
|
334
|
+
old_workdir: str | None = None,
|
335
|
+
) -> tuple[list[str], str]:
|
336
|
+
"""
|
337
|
+
Return relpath for all files if no old_workdir is given and the trajectory
|
338
|
+
and structure files are accessible.
|
339
|
+
|
340
|
+
If old_workdir is given (and the traj not accessible) it (tries) to find
|
341
|
+
the trajs/struct by assuming the files did not change place and we just
|
342
|
+
need to add the "path_diff" from old to new workdir to the path, if the
|
343
|
+
file is then still not there it raises a FileNotFoundError.
|
344
|
+
|
345
|
+
Note: The file-path treatment here makes it possible to either change
|
346
|
+
the workdir of the python session OR change the location of the
|
347
|
+
trajectories as as long as the relative path between trajectory
|
348
|
+
and python workdir does not change!
|
349
|
+
|
350
|
+
Parameters
|
351
|
+
----------
|
352
|
+
trajectory_files : list[str] | str
|
353
|
+
Absolute or relative path(s) to the trajectory file(s),
|
354
|
+
e.g. trr, xtc, dcd, ...
|
355
|
+
Can be one str (one file) or a list of str (multiple traj files).
|
356
|
+
structure_file : str
|
357
|
+
Absolute or relative path to the structure file (e.g. tpr, gro).
|
358
|
+
current_workdir : str
|
359
|
+
The current working directory to use for "path_diff" calculations.
|
360
|
+
old_workdir : str | None, optional
|
361
|
+
The old working directory (e.g. at pickling time), by default None.
|
362
|
+
If None, no "path_diff" calculations will be performed, i.e. it is
|
363
|
+
assumed the working directory did not change or we are not unpickling.
|
364
|
+
|
365
|
+
Returns
|
366
|
+
-------
|
367
|
+
tuple[list[str], str]
|
368
|
+
trajectory_files, structure_file
|
369
|
+
Sanitized file-paths if the files exists, trajectory_files is always
|
370
|
+
a list[str], even if it is only one file.
|
371
|
+
|
372
|
+
Raises
|
373
|
+
------
|
374
|
+
FileNotFoundError
|
375
|
+
When the trajectory or structure files can not be found.
|
376
|
+
"""
|
269
377
|
def sanitize_path(f, pathdiff=None):
|
270
378
|
if os.path.isfile(f):
|
271
379
|
return os.path.relpath(f)
|
272
|
-
|
380
|
+
if pathdiff is not None:
|
273
381
|
f_diff = os.path.join(pathdiff, f)
|
274
382
|
if os.path.isfile(f_diff):
|
275
383
|
return os.path.relpath(f_diff)
|
276
384
|
# if we get until here we cant find the file
|
277
385
|
err_msg = f"File {f} is not accessible"
|
278
|
-
if pathdiff is not None
|
279
|
-
err_msg += f" (we also tried {f_diff})."
|
280
|
-
else:
|
281
|
-
err_msg += "."
|
386
|
+
err_msg += f" (we also tried {f_diff})." if pathdiff is not None else "."
|
282
387
|
raise FileNotFoundError(err_msg)
|
283
388
|
|
284
389
|
if old_workdir is not None:
|
285
|
-
if current_workdir is None:
|
286
|
-
raise ValueError("'old_workdir' given but 'current_workdir' "
|
287
|
-
"was None.")
|
288
390
|
path_diff = os.path.relpath(old_workdir, current_workdir)
|
289
391
|
else:
|
290
392
|
path_diff = None
|
291
|
-
|
292
393
|
if isinstance(trajectory_files, str):
|
293
394
|
trajectory_files = [trajectory_files]
|
294
|
-
|
295
395
|
traj_files_sanitized = [sanitize_path(f=traj_f, pathdiff=path_diff)
|
296
396
|
for traj_f in trajectory_files
|
297
397
|
]
|
298
|
-
struct_file_sanitized = sanitize_path(f=structure_file,
|
299
|
-
pathdiff=path_diff,
|
300
|
-
)
|
301
|
-
|
398
|
+
struct_file_sanitized = sanitize_path(f=structure_file, pathdiff=path_diff)
|
302
399
|
return traj_files_sanitized, struct_file_sanitized
|
303
400
|
|
304
401
|
@classmethod
|
305
|
-
def _calc_traj_hash(cls, trajectory_files):
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
402
|
+
def _calc_traj_hash(cls, trajectory_files: list[str]) -> int:
|
403
|
+
"""
|
404
|
+
Calculate a hash over the first and last part of the traj files.
|
405
|
+
|
406
|
+
We use it to make sure the cached CV values match the traj.
|
407
|
+
Note that we do not include the structure file on purpose because
|
408
|
+
that allows for changing .gro <-> .tpr or similar (which we expect to
|
409
|
+
not change the calculated CV values).
|
410
|
+
|
411
|
+
Parameters
|
412
|
+
----------
|
413
|
+
trajectory_files : list[str]
|
414
|
+
Path(s) to the trajectory file(s).
|
415
|
+
|
416
|
+
Returns
|
417
|
+
-------
|
418
|
+
int
|
419
|
+
The hash calculated over the trajectory files.
|
420
|
+
"""
|
421
|
+
# TODO: how much should we read to calculate the hash?
|
312
422
|
# (I [hejung] think the first and last .5 MB are enough)
|
313
423
|
data = bytes()
|
314
424
|
for traj_f in trajectory_files:
|
315
|
-
#data += traj_f.encode("utf-8") # DONT include filepaths!...
|
425
|
+
# data += traj_f.encode("utf-8") # DONT include filepaths!...
|
316
426
|
fsize = os.stat(traj_f).st_size
|
317
427
|
data += str(fsize).encode("utf-8")
|
318
|
-
if fsize
|
428
|
+
if not fsize:
|
319
429
|
# Note: we could also just warn as long as we do not do the
|
320
430
|
# negative seek below if filesize == 0. However,
|
321
431
|
# mdanalysis throws errors for empty trajectories anyway
|
@@ -326,7 +436,7 @@ class Trajectory:
|
|
326
436
|
# read the first bit of each file
|
327
437
|
data += traj_file.read(max_to_read)
|
328
438
|
# and read the last bit of each file
|
329
|
-
# Note that the last bit potentially
|
439
|
+
# Note that the last bit potentially overlaps with the first
|
330
440
|
traj_file.seek(-max_to_read, io.SEEK_END)
|
331
441
|
data += traj_file.read(max_to_read)
|
332
442
|
# calculate one hash over all traj_files
|
@@ -339,171 +449,161 @@ class Trajectory:
|
|
339
449
|
)
|
340
450
|
return traj_hash
|
341
451
|
|
342
|
-
|
343
|
-
def cache_type(self):
|
452
|
+
def _setup_cache(self) -> TrajectoryFunctionValueCache:
|
344
453
|
"""
|
345
|
-
|
454
|
+
Initialize and return a cache with the cache type/class set by _GLOBALS/config.
|
455
|
+
|
456
|
+
If the initialized cache is empty, this also checks for any npz cache
|
457
|
+
files and tries to append them to the new cache (irrespective of the
|
458
|
+
cache type).
|
346
459
|
"""
|
347
|
-
|
460
|
+
cache = self._CACHE_CLASS_FOR_TYPE[
|
461
|
+
_GLOBALS["TRAJECTORY_FUNCTION_CACHE_TYPE"]
|
462
|
+
](traj_hash=self.trajectory_hash,
|
463
|
+
traj_files=self.trajectory_files,
|
464
|
+
)
|
465
|
+
# only try to read npz files if our cache is empty and not already npz
|
466
|
+
if not cache and _GLOBALS["TRAJECTORY_FUNCTION_CACHE_TYPE"] != "npz":
|
467
|
+
# cache is empty at initialization
|
468
|
+
# check if we can find a npz-cache to populate from
|
469
|
+
if os.path.isfile(
|
470
|
+
TrajectoryFunctionValueCacheInNPZ.get_cache_filename(
|
471
|
+
traj_files=self.trajectory_files
|
472
|
+
)
|
473
|
+
):
|
474
|
+
logger.info("Initialized %s with an empty cache, but found "
|
475
|
+
"a (probably) matching npz cache file. Populating "
|
476
|
+
"our cache with the values stored there.",
|
477
|
+
self,
|
478
|
+
)
|
479
|
+
cache_to_copy = TrajectoryFunctionValueCacheInNPZ(
|
480
|
+
traj_hash=self.trajectory_hash,
|
481
|
+
traj_files=self.trajectory_files,
|
482
|
+
)
|
483
|
+
for func_id, values in cache_to_copy.items():
|
484
|
+
cache.append(func_id=func_id, values=values)
|
485
|
+
return cache
|
348
486
|
|
349
|
-
|
350
|
-
|
487
|
+
def update_cache_type(self, copy_content: bool = True,
|
488
|
+
clear_old_cache: bool = False) -> None:
|
351
489
|
"""
|
352
|
-
|
490
|
+
Update the :class:`TrajectoryFunctionValueCache` this :class:`Trajectory` uses.
|
491
|
+
|
492
|
+
By default the content of the current cache is copied to the new cache.
|
493
|
+
This will only have an effect if the globally set ``cache_type`` differs
|
494
|
+
from what this `Trajectory` currently uses.
|
495
|
+
See :func:`asyncmd.config.set_trajectory_cache_type` to set the ``cache_type``.
|
496
|
+
To clear the old/previously set cache (after copying its values), pass
|
497
|
+
``clear_old_cache=True``.
|
353
498
|
|
354
499
|
Parameters
|
355
500
|
----------
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
Raises
|
362
|
-
------
|
363
|
-
ValueError
|
364
|
-
Raised if value is not one of the available cache types.
|
501
|
+
copy_content : bool, optional
|
502
|
+
Whether to copy the current cache content to the new cache,
|
503
|
+
by default True
|
504
|
+
clear_old_cache : bool, optional
|
505
|
+
Whether to clear the old/previously set cache, by default False.
|
365
506
|
"""
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
self._using_default_cache_type = use_default_cache_type
|
384
|
-
self._setup_cache()
|
385
|
-
|
386
|
-
def _setup_cache(self) -> None:
|
387
|
-
# set up the cache indicated by self.cache_type and all others to None
|
388
|
-
# also makes sure that all previously cached values are transfered
|
389
|
-
# to the newly setup cache
|
390
|
-
# NOTE: we setup an npz cache to see if there are any saved values
|
391
|
-
# that we would want to add to the newly setup cache
|
392
|
-
# We do this because upon pickling we save everything to npz
|
393
|
-
# Note that we can just set self._npz to this cache because it is
|
394
|
-
# stateless (in the sense that if it existed it be exactly the same)
|
395
|
-
self._npz_cache = TrajectoryFunctionValueCacheNPZ(
|
396
|
-
fname_trajs=self.trajectory_files,
|
397
|
-
hash_traj=self._traj_hash,
|
398
|
-
)
|
399
|
-
if self._cache_type == "memory":
|
400
|
-
if self._memory_cache is None:
|
401
|
-
self._memory_cache = TrajectoryFunctionValueCacheMEMORY()
|
402
|
-
else:
|
403
|
-
# we already have a mem cache so just try to use it
|
404
|
-
pass
|
405
|
-
if self._h5py_cache is not None:
|
406
|
-
self._cache_content_to_new_cache(
|
407
|
-
old_cache=self._h5py_cache,
|
408
|
-
new_cache=self._memory_cache,
|
409
|
-
)
|
410
|
-
self._h5py_cache = None
|
411
|
-
self._cache_content_to_new_cache(
|
412
|
-
old_cache=self._npz_cache,
|
413
|
-
new_cache=self._memory_cache,
|
414
|
-
)
|
415
|
-
self._npz_cache = None
|
416
|
-
elif self._cache_type == "h5py":
|
417
|
-
try:
|
418
|
-
h5py_cache = _GLOBALS["H5PY_CACHE"]
|
419
|
-
except KeyError as exc:
|
420
|
-
raise ValueError(
|
421
|
-
"No h5py cache file registered yet. Try calling "
|
422
|
-
+ "``asyncmd.config.register_h5py_cache_file()``"
|
423
|
-
+ " with the appropriate arguments first") from exc
|
424
|
-
if self._h5py_cache is None:
|
425
|
-
# dont have one yet so setup the cache
|
426
|
-
self._h5py_cache = TrajectoryFunctionValueCacheH5PY(
|
427
|
-
h5py_cache=h5py_cache,
|
428
|
-
hash_traj=self._traj_hash,
|
429
|
-
)
|
430
|
-
else:
|
431
|
-
# we already have a h5py cache...
|
432
|
-
if self._h5py_cache.h5py_cache is h5py_cache:
|
433
|
-
# and it is in the same file/group location
|
434
|
-
# so we do nothing but making sure that all values from
|
435
|
-
# other caches are transfered
|
507
|
+
cache_type = _GLOBALS["TRAJECTORY_FUNCTION_CACHE_TYPE"]
|
508
|
+
if isinstance(self._cache, self._CACHE_CLASS_FOR_TYPE[cache_type]):
|
509
|
+
logger.info("Cache type is already %s. Not doing anything.", cache_type)
|
510
|
+
return
|
511
|
+
# init the new cache
|
512
|
+
cache = self._CACHE_CLASS_FOR_TYPE[cache_type](
|
513
|
+
traj_hash=self.trajectory_hash,
|
514
|
+
traj_files=self.trajectory_files,
|
515
|
+
)
|
516
|
+
if copy_content:
|
517
|
+
# and copy/append everything from current cache to the new one
|
518
|
+
for func_id, values in self._cache.items():
|
519
|
+
try:
|
520
|
+
cache.append(func_id=func_id, values=values)
|
521
|
+
except ValuesAlreadyStoredError:
|
522
|
+
# if we just initialized a non-empty cache we might already
|
523
|
+
# have some of the values cached there, ignore them
|
436
524
|
pass
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
self._h5py_cache = TrajectoryFunctionValueCacheH5PY(
|
441
|
-
h5py_cache=h5py_cache,
|
442
|
-
hash_traj=self._traj_hash,
|
443
|
-
)
|
444
|
-
self._cache_content_to_new_cache(
|
445
|
-
old_cache=old_h5py_cache,
|
446
|
-
new_cache=self._h5py_cache,
|
447
|
-
)
|
448
|
-
# transfer all values from other cache types and empty them
|
449
|
-
if self._memory_cache is not None:
|
450
|
-
self._cache_content_to_new_cache(
|
451
|
-
old_cache=self._memory_cache,
|
452
|
-
new_cache=self._h5py_cache,
|
453
|
-
)
|
454
|
-
self._memory_cache = None
|
455
|
-
self._cache_content_to_new_cache(
|
456
|
-
old_cache=self._npz_cache,
|
457
|
-
new_cache=self._h5py_cache,
|
458
|
-
)
|
459
|
-
self._npz_cache = None
|
460
|
-
elif self._cache_type == "npz":
|
461
|
-
if self._h5py_cache is not None:
|
462
|
-
self._cache_content_to_new_cache(
|
463
|
-
old_cache=self._h5py_cache,
|
464
|
-
new_cache=self._npz_cache,
|
465
|
-
)
|
466
|
-
self._h5py_cache = None
|
467
|
-
if self._memory_cache is not None:
|
468
|
-
self._cache_content_to_new_cache(
|
469
|
-
old_cache=self._memory_cache,
|
470
|
-
new_cache=self._npz_cache,
|
471
|
-
)
|
472
|
-
self._memory_cache = None
|
473
|
-
else:
|
474
|
-
raise RuntimeError("This should never happen. self._cache_type "
|
475
|
-
+ "must be one of 'memory', 'h5py', 'npz' when "
|
476
|
-
+ "self._setup_cache is called. "
|
477
|
-
+ f"Was {self._cache_type}.")
|
525
|
+
if clear_old_cache:
|
526
|
+
self._cache.clear_all_values()
|
527
|
+
self._cache = cache
|
478
528
|
|
479
|
-
def
|
529
|
+
def clear_all_cache_values(self) -> None:
|
480
530
|
"""
|
481
|
-
|
531
|
+
Clear all function values cached for this :class:`Trajectory`.
|
532
|
+
|
533
|
+
For file-based caches, this also removes the associated cache files.
|
534
|
+
Note that this just calls the underlying :class:`TrajectoryFunctionValueCache`
|
535
|
+
classes ``clear_all_values`` method.
|
536
|
+
"""
|
537
|
+
self._cache.clear_all_values()
|
538
|
+
|
539
|
+
def _retrieve_cached_values(self, func_wrapper: "TrajectoryFunctionWrapper",
|
540
|
+
) -> np.ndarray | None:
|
541
|
+
"""
|
542
|
+
Retrieve values cached for given :class:`TrajectoryFunctionWrapper`.
|
543
|
+
|
544
|
+
Return ``None`` if no values are cached (yet).
|
545
|
+
|
546
|
+
Parameters
|
547
|
+
----------
|
548
|
+
func_wrapper : TrajectoryFunctionWrapper
|
549
|
+
The TrajectoryFunctionWrapper for which we (try to) retrieve cached values.
|
550
|
+
|
551
|
+
Returns
|
552
|
+
-------
|
553
|
+
np.ndarray | None
|
554
|
+
Cached function values or None if none are found.
|
555
|
+
"""
|
556
|
+
try:
|
557
|
+
values = self._cache[func_wrapper.id]
|
558
|
+
except KeyError:
|
559
|
+
values = None
|
560
|
+
return values
|
561
|
+
|
562
|
+
def _register_cached_values(self, values: np.ndarray,
|
563
|
+
func_wrapper: "TrajectoryFunctionWrapper",
|
564
|
+
) -> None:
|
565
|
+
"""
|
566
|
+
Add values to cache for given TrajectoryFunctionWrapper.
|
567
|
+
|
568
|
+
Parameters
|
569
|
+
----------
|
570
|
+
values : np.ndarray
|
571
|
+
The values to add.
|
572
|
+
func_wrapper : TrajectoryFunctionWrapper
|
573
|
+
The TrajectoryFunctionWrapper this values belong to.
|
574
|
+
"""
|
575
|
+
self._cache.append(func_id=func_wrapper.id, values=values)
|
576
|
+
|
577
|
+
def _populate_property_data(self) -> _TrajectoryPropertyData:
|
578
|
+
"""
|
579
|
+
Populate and return cached properties from the underlying trajectory.
|
580
|
+
|
581
|
+
Returns a :class:`_TrajectoryPropertyData` class.
|
482
582
|
"""
|
483
583
|
# create/open a mdanalysis universe to get...
|
484
584
|
u = mda.Universe(self.structure_file, *self.trajectory_files)
|
485
585
|
# ...the number of frames
|
486
|
-
|
586
|
+
length = len(u.trajectory)
|
487
587
|
# ...the first integration step and time
|
488
588
|
ts = u.trajectory[0]
|
489
|
-
|
490
|
-
|
491
|
-
# does not have step data!
|
492
|
-
# TODO: which traj formats have step data set in MDAnalysis?
|
493
|
-
# XTC and TRR have it for sure (with the wraparound issue)
|
494
|
-
self._first_step = ts.data.get("step", None)
|
495
|
-
self._first_time = ts.time
|
589
|
+
first_step = ts.data.get("step", None)
|
590
|
+
first_time = ts.time
|
496
591
|
# ...the time diff between subsequent **frames** (not steps)
|
497
|
-
|
592
|
+
dt = ts.dt
|
498
593
|
# ...the last integration step and time
|
499
594
|
ts = u.trajectory[-1]
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
if all(
|
505
|
-
|
506
|
-
|
595
|
+
last_step = ts.data.get("step", None)
|
596
|
+
last_time = ts.time
|
597
|
+
# See if we apply the wraparound issue fix
|
598
|
+
# Note: we are using some of the info we just read here (all explicitly passed)!
|
599
|
+
if all(
|
600
|
+
t.lower().endswith((".xtc", ".trr")) for t in self.trajectory_files
|
601
|
+
):
|
602
|
+
first_step, last_step = self._fix_trr_xtc_step_wraparound(
|
603
|
+
universe=u,
|
604
|
+
first_time=first_time, last_time=last_time,
|
605
|
+
first_step=first_step, last_step=last_step,
|
606
|
+
)
|
507
607
|
else:
|
508
608
|
# bail out if traj is not an XTC or TRR
|
509
609
|
logger.info("%s is not of type XTC or TRR. Not applying "
|
@@ -511,9 +611,23 @@ class Trajectory:
|
|
511
611
|
# make sure the trajectory is closed by MDAnalysis
|
512
612
|
u.trajectory.close()
|
513
613
|
del u
|
514
|
-
|
515
|
-
|
614
|
+
# finally populate and return the dataclass with what we just read
|
615
|
+
# (and possibly corrected)
|
616
|
+
return _TrajectoryPropertyData(
|
617
|
+
length=length, dt=dt,
|
618
|
+
first_time=first_time, last_time=last_time,
|
619
|
+
first_step=first_step, last_step=last_step,
|
620
|
+
)
|
621
|
+
|
622
|
+
def _fix_trr_xtc_step_wraparound(self, *,
|
623
|
+
universe: mda.Universe,
|
624
|
+
first_time: float, last_time: float,
|
625
|
+
first_step: int, last_step: int,
|
626
|
+
) -> tuple[int, int]:
|
516
627
|
# check/correct for wraparounds in the integration step numbers
|
628
|
+
# return (corrected or not) first_step, last_step
|
629
|
+
# I.e. it is save to always set first_step, last_step with the return
|
630
|
+
# of this method.
|
517
631
|
# NOTE: fails if the trajectory has length = 1!
|
518
632
|
# NOTE: strictly spoken we should not assume wraparound behavior,
|
519
633
|
# but it seems reasonable for the stepnum,
|
@@ -525,52 +639,46 @@ class Trajectory:
|
|
525
639
|
# dividing the times by integrator_dt, this should be reasonably
|
526
640
|
# save for normal MD settings where integrator_dt should be on the
|
527
641
|
# order of 1-10 fs
|
528
|
-
if
|
642
|
+
if (n_frames := len(universe.trajectory)) == 1:
|
529
643
|
# bail out if the trajectory has length=1
|
530
644
|
# as we can not calculate dt if we only have one frame
|
531
645
|
logger.info("%s has only one frame. Can not correct for "
|
532
646
|
"potential wraparound of the integration step.",
|
533
647
|
self)
|
534
|
-
return # bail out
|
648
|
+
return first_step, last_step # bail out
|
535
649
|
# get the time offset for first and last frame, they need to match for
|
536
650
|
# our wraparound fix to work
|
537
|
-
|
538
|
-
|
539
|
-
ts = universe.trajectory[-1]
|
540
|
-
if ts.data.get("time_offset", 0) != time_offset:
|
651
|
+
time_offset = universe.trajectory[0].data.get("time_offset", 0)
|
652
|
+
if universe.trajectory[-1].data.get("time_offset", 0) != time_offset:
|
541
653
|
logger.info("Time offset of the first and last time in "
|
542
654
|
"%s do not match. Not correcting for potential "
|
543
655
|
"wraparound of the integration step.",
|
544
656
|
self)
|
545
|
-
return # bail out
|
546
|
-
delta_s =
|
547
|
-
delta_t = round(
|
548
|
-
# first make sure traj is
|
549
|
-
#
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
step_nums = [ts.data["step"] for ts in universe.trajectory[::skip]]
|
557
|
-
step_diffs = np.diff(step_nums)
|
558
|
-
first_diff = step_diffs[0]
|
559
|
-
if first_diff < 0:
|
657
|
+
return first_step, last_step # bail out
|
658
|
+
delta_s = last_step - first_step
|
659
|
+
delta_t = round(last_time - first_time, ndigits=6)
|
660
|
+
# first make sure traj is continuous (i.e. not a concatenation where we
|
661
|
+
# carried over the time and step data from the original trajs).
|
662
|
+
# Use at most 100 (equally spaced) frames to see if it is continuous.
|
663
|
+
skip = n_frames // 100 if n_frames > 100 else 1
|
664
|
+
step_diffs = np.diff([ts.data["step"]
|
665
|
+
for ts in universe.trajectory[::skip]]
|
666
|
+
)
|
667
|
+
if (first_diff := step_diffs[0]) < 0:
|
560
668
|
# we possibly wrapped around at the first step
|
561
669
|
first_diff += 2**32
|
562
670
|
for diff in step_diffs[1:]:
|
563
671
|
if diff != first_diff:
|
564
|
-
# bail out because traj is not
|
565
|
-
logger.debug("%s is not from one
|
672
|
+
# bail out because traj is not continuous in time
|
673
|
+
logger.debug("%s is not from one continuous propagation, i.e. "
|
566
674
|
"the step difference between subsequent steps is "
|
567
675
|
"not constant. Not applying TRR/XTC step "
|
568
676
|
"wraparound fix and using step as read from the "
|
569
677
|
"underlying trajectory.",
|
570
678
|
self)
|
571
|
-
return
|
679
|
+
return first_step, last_step
|
572
680
|
# now the actual fix
|
573
|
-
if delta_s != 0
|
681
|
+
if delta_s: # delta_s != 0
|
574
682
|
if delta_s > 0:
|
575
683
|
# both (last and first) wrapped around the same number of times
|
576
684
|
integrator_dt = round(delta_t / delta_s, ndigits=6)
|
@@ -580,17 +688,16 @@ class Trajectory:
|
|
580
688
|
# NOTE: should we round or floor? I (hejung) think round is what we
|
581
689
|
# want, it will get us to the nearest int, which is good if
|
582
690
|
# we e.g. have 0.99999999999 instead of 1
|
583
|
-
first_step = round((
|
584
|
-
last_step = round((
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
raise RuntimeError("This should not be possible?!")
|
691
|
+
first_step = round((first_time - time_offset) / integrator_dt)
|
692
|
+
last_step = round((last_time - time_offset) / integrator_dt)
|
693
|
+
return first_step, last_step
|
694
|
+
# delta_s == 0
|
695
|
+
# can only end up here if we have more than one frame in trajectory
|
696
|
+
# **and** the first and last frame have the same integration step
|
697
|
+
# which should be very rare and we can not correct anyway as the
|
698
|
+
# trajectory can not be from a continuous propagation, so we can not
|
699
|
+
# end up here at all?
|
700
|
+
raise RuntimeError("This should not be possible?!")
|
594
701
|
|
595
702
|
def __len__(self) -> int:
|
596
703
|
"""
|
@@ -601,9 +708,9 @@ class Trajectory:
|
|
601
708
|
int
|
602
709
|
The number of frames in the trajectory.
|
603
710
|
"""
|
604
|
-
if self.
|
605
|
-
self.
|
606
|
-
return self.
|
711
|
+
if self._property_data is None:
|
712
|
+
self._property_data = self._populate_property_data()
|
713
|
+
return self._property_data.length
|
607
714
|
|
608
715
|
def __repr__(self) -> str:
|
609
716
|
if len(self.trajectory_files) == 1:
|
@@ -614,6 +721,9 @@ class Trajectory:
|
|
614
721
|
+ f" structure_file={self.structure_file})"
|
615
722
|
)
|
616
723
|
|
724
|
+
def __hash__(self) -> int:
|
725
|
+
return self.trajectory_hash
|
726
|
+
|
617
727
|
def __eq__(self, other: object) -> bool:
|
618
728
|
if not isinstance(other, Trajectory):
|
619
729
|
# if its not a trajectory it cant be equal
|
@@ -621,37 +731,35 @@ class Trajectory:
|
|
621
731
|
if self.trajectory_hash != other.trajectory_hash:
|
622
732
|
# if it has a different hash it cant be equal
|
623
733
|
return False
|
624
|
-
# TODO: check for cached CV values? I (hejung) think it does not really
|
625
|
-
# make sense...
|
626
734
|
|
627
|
-
# if we got until here the two
|
735
|
+
# if we got until here the two trajectories are equal
|
628
736
|
return True
|
629
737
|
|
630
738
|
def __ne__(self, other: object) -> bool:
|
631
|
-
return not self.__eq__(other
|
739
|
+
return not self.__eq__(other)
|
632
740
|
|
633
741
|
@property
|
634
742
|
def structure_file(self) -> str:
|
635
743
|
"""Return relative path to the structure file."""
|
636
|
-
return
|
744
|
+
return self._file_data.structure_file
|
637
745
|
|
638
746
|
@property
|
639
|
-
def trajectory_files(self) -> str:
|
747
|
+
def trajectory_files(self) -> list[str]:
|
640
748
|
"""Return relative path to the trajectory files."""
|
641
|
-
return
|
749
|
+
return self._file_data.trajectory_files
|
642
750
|
|
643
751
|
@property
|
644
752
|
def trajectory_hash(self) -> int:
|
645
|
-
"""Return hash over the
|
646
|
-
return
|
753
|
+
"""Return hash over the trajectory files"""
|
754
|
+
return self._file_data.trajectory_hash
|
647
755
|
|
648
756
|
@property
|
649
|
-
def nstout(self) ->
|
757
|
+
def nstout(self) -> int | None:
|
650
758
|
"""Output frequency between subsequent frames in integration steps."""
|
651
759
|
return self._nstout
|
652
760
|
|
653
761
|
@nstout.setter
|
654
|
-
def nstout(self, val:
|
762
|
+
def nstout(self, val: int | None) -> None:
|
655
763
|
if val is not None:
|
656
764
|
# ensure that it is an int
|
657
765
|
val = int(val)
|
@@ -659,445 +767,80 @@ class Trajectory:
|
|
659
767
|
self._nstout = val
|
660
768
|
|
661
769
|
@property
|
662
|
-
def first_step(self) -> int:
|
770
|
+
def first_step(self) -> int | None:
|
663
771
|
"""Return the integration step of the first frame in the trajectory."""
|
664
|
-
if self.
|
665
|
-
self.
|
666
|
-
return self.
|
772
|
+
if self._property_data is None:
|
773
|
+
self._property_data = self._populate_property_data()
|
774
|
+
return self._property_data.first_step
|
667
775
|
|
668
776
|
@property
|
669
|
-
def last_step(self) -> int:
|
777
|
+
def last_step(self) -> int | None:
|
670
778
|
"""Return the integration step of the last frame in the trajectory."""
|
671
|
-
if self.
|
672
|
-
self.
|
673
|
-
return self.
|
779
|
+
if self._property_data is None:
|
780
|
+
self._property_data = self._populate_property_data()
|
781
|
+
return self._property_data.last_step
|
674
782
|
|
675
783
|
@property
|
676
784
|
def dt(self) -> float:
|
677
|
-
"""The time
|
678
|
-
if self.
|
679
|
-
self.
|
680
|
-
return self.
|
785
|
+
"""The time interval between subsequent *frames* (not steps) in ps."""
|
786
|
+
if self._property_data is None:
|
787
|
+
self._property_data = self._populate_property_data()
|
788
|
+
return self._property_data.dt
|
681
789
|
|
682
790
|
@property
|
683
791
|
def first_time(self) -> float:
|
684
792
|
"""Return the integration timestep of the first frame in ps."""
|
685
|
-
if self.
|
686
|
-
self.
|
687
|
-
return self.
|
793
|
+
if self._property_data is None:
|
794
|
+
self._property_data = self._populate_property_data()
|
795
|
+
return self._property_data.first_time
|
688
796
|
|
689
797
|
@property
|
690
798
|
def last_time(self) -> float:
|
691
799
|
"""Return the integration timestep of the last frame in ps."""
|
692
|
-
if self.
|
693
|
-
self.
|
694
|
-
return self.
|
695
|
-
|
696
|
-
|
697
|
-
async with self._semaphores_by_func_id[func_id]:
|
698
|
-
# sort out which cache we use
|
699
|
-
# NOTE: only one cache should ever be not None, so order should not
|
700
|
-
# matter here
|
701
|
-
# anyway I (hejung) think this order is even what we want:
|
702
|
-
# 1.) use h5py cache if registered
|
703
|
-
# 2.) use npz cache (the default since h5py is not registered
|
704
|
-
# if not set by the user)
|
705
|
-
# 3.) use memory/local cache (only if set on traj creation
|
706
|
-
# or if set as default cache)
|
707
|
-
if self._h5py_cache is not None:
|
708
|
-
return await self._apply_wrapped_func_cached(
|
709
|
-
func_id=func_id,
|
710
|
-
wrapped_func=wrapped_func,
|
711
|
-
cache=self._h5py_cache,
|
712
|
-
)
|
713
|
-
if self._npz_cache is not None:
|
714
|
-
return await self._apply_wrapped_func_cached(
|
715
|
-
func_id=func_id,
|
716
|
-
wrapped_func=wrapped_func,
|
717
|
-
cache=self._npz_cache
|
718
|
-
)
|
719
|
-
if self._memory_cache is not None:
|
720
|
-
return await self._apply_wrapped_func_cached(
|
721
|
-
func_id=func_id,
|
722
|
-
wrapped_func=wrapped_func,
|
723
|
-
cache=self._memory_cache,
|
724
|
-
)
|
725
|
-
# if we get until here we have no cache!
|
726
|
-
logger.warning("No cache associated with %s. Returning calculated "
|
727
|
-
"function values anyway but no caching can/will be "
|
728
|
-
"performed!",
|
729
|
-
self,
|
730
|
-
)
|
731
|
-
return await wrapped_func.get_values_for_trajectory(self)
|
732
|
-
|
733
|
-
async def _apply_wrapped_func_cached(
|
734
|
-
self, func_id: str, wrapped_func,
|
735
|
-
cache: collections.abc.Mapping[str, np.ndarray],
|
736
|
-
):
|
737
|
-
try:
|
738
|
-
# see if it is in cache
|
739
|
-
return copy.copy(cache[func_id])
|
740
|
-
except KeyError:
|
741
|
-
# if not calculate, store and return
|
742
|
-
# send function application to seperate process and wait
|
743
|
-
# until it finishes
|
744
|
-
vals = await wrapped_func.get_values_for_trajectory(self)
|
745
|
-
cache.append(func_id=func_id, vals=vals)
|
746
|
-
return vals
|
747
|
-
|
748
|
-
def _cache_content_to_new_cache(
|
749
|
-
self,
|
750
|
-
old_cache: collections.abc.Mapping[str, np.ndarray],
|
751
|
-
new_cache: collections.abc.Mapping[str, np.ndarray],
|
752
|
-
):
|
753
|
-
for func_id, values in old_cache.items():
|
754
|
-
if func_id in new_cache:
|
755
|
-
continue # dont try to add what is already in there
|
756
|
-
new_cache.append(func_id=func_id, vals=values)
|
757
|
-
|
758
|
-
def __getstate__(self):
|
800
|
+
if self._property_data is None:
|
801
|
+
self._property_data = self._populate_property_data()
|
802
|
+
return self._property_data.last_time
|
803
|
+
|
804
|
+
def __getstate__(self) -> dict[str, typing.Any]:
|
759
805
|
# enable pickling of Trajectory
|
760
806
|
# this should make it possible to pass it into a ProcessPoolExecutor
|
761
|
-
# and lets us calculate TrajectoryFunction values
|
807
|
+
# and lets us calculate TrajectoryFunction values asynchronously
|
762
808
|
state = self.__dict__.copy()
|
763
|
-
#
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
)
|
779
|
-
# and set npz cache back to None since we have not been using it
|
780
|
-
self._npz_cache = None
|
781
|
-
state["_h5py_cache"] = None
|
782
|
-
state["_npz_cache"] = None
|
783
|
-
state["_memory_cache"] = None
|
809
|
+
# special handling for case of function values cached in memory
|
810
|
+
if isinstance(self._cache, TrajectoryFunctionValueCacheInMemory):
|
811
|
+
# write it to npz so we can unpickle with values for any cache type
|
812
|
+
# (if we unpickle with an empty cache we will [try to] read the npz)
|
813
|
+
npz_cache = TrajectoryFunctionValueCacheInNPZ(
|
814
|
+
traj_hash=self.trajectory_hash,
|
815
|
+
traj_files=self.trajectory_files,
|
816
|
+
)
|
817
|
+
for func_id, values in self._cache.items():
|
818
|
+
try:
|
819
|
+
npz_cache.append(func_id=func_id, values=values)
|
820
|
+
except ValuesAlreadyStoredError:
|
821
|
+
# ignore if we already have them
|
822
|
+
pass
|
823
|
+
state["_cache"] = None
|
784
824
|
state["_semaphores_by_func_id"] = collections.defaultdict(
|
785
825
|
asyncio.BoundedSemaphore
|
786
826
|
)
|
787
827
|
return state
|
788
828
|
|
789
|
-
def __setstate__(self, d: dict):
|
829
|
+
def __setstate__(self, d: dict) -> None:
|
790
830
|
# remove the attributes we set in __new__ from dict
|
791
831
|
# (otherwise we would overwrite what we set in __new__)
|
792
|
-
del d["
|
793
|
-
|
794
|
-
del d["_traj_hash"]
|
795
|
-
try:
|
796
|
-
del d["_workdir"]
|
797
|
-
except KeyError:
|
798
|
-
# 'old' trajectory objects dont have a _workdir attribute
|
799
|
-
pass
|
800
|
-
# now we can update without overwritting what we set in __new__
|
832
|
+
del d["_file_data"]
|
833
|
+
# now we can update without overwriting what we set in __new__
|
801
834
|
self.__dict__.update(d)
|
802
|
-
#
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
# no h5py cache has been registered but it is set as default
|
807
|
-
# (which is intended because it is the same behavior as when
|
808
|
-
# initializing a new trajectory in the same situation)
|
809
|
-
self.cache_type = None # this calls _setup_cache
|
810
|
-
return # get out of here, no need to setup the cache twice
|
811
|
-
if self.cache_type == "h5py":
|
812
|
-
# make sure h5py cache is set before trying to unpickle with it
|
813
|
-
try:
|
814
|
-
_ = _GLOBALS["H5PY_CACHE"]
|
815
|
-
except KeyError:
|
816
|
-
# this will (probably) fallback to npz but I (hejung) think it
|
817
|
-
# is nice if we use the possibly set global default?
|
818
|
-
# Note that this will not err but just emit the warning to log
|
819
|
-
# when we change the cache but it will err when the global
|
820
|
-
# default cache is set to h5py (as above)
|
821
|
-
logger.warning("Trying to unpickle %s with cache_type "
|
822
|
-
"'h5py' not possible without a registered "
|
823
|
-
"cache. Falling back to global default type."
|
824
|
-
"See 'asyncmd.config.register_h5py_cache' and"
|
825
|
-
" 'asyncmd.config.set_default_cache_type'.",
|
826
|
-
self
|
827
|
-
)
|
828
|
-
self.cache_type = None # this calls _setup_cache
|
829
|
-
return # get out of here, no need to setup the cache twice
|
830
|
-
# setup the cache for all cases where we are not using default cache
|
831
|
-
# (or had "h5py" but could not unpickle with "h5py" now [and are
|
832
|
-
# therefore also using the default])
|
833
|
-
self._setup_cache()
|
834
|
-
|
835
|
-
def __getnewargs_ex__(self):
|
835
|
+
# and finally setup the cache according to what the global config says
|
836
|
+
self._cache = self._setup_cache()
|
837
|
+
|
838
|
+
def __getnewargs_ex__(self) -> tuple[tuple, dict[str, typing.Any]]:
|
836
839
|
# new needs the trajectory_files to be able to calculate the traj_hash
|
837
840
|
# and since we want __new__ to have the same call signature as __init__
|
838
841
|
# we also add all the init args here too
|
839
842
|
return ((), {"trajectory_files": self.trajectory_files,
|
840
843
|
"structure_file": self.structure_file,
|
841
844
|
"nstout": self.nstout,
|
842
|
-
"
|
843
|
-
"old_workdir": self._workdir,
|
845
|
+
"old_workdir": self._file_data.workdir,
|
844
846
|
})
|
845
|
-
|
846
|
-
|
847
|
-
class TrajectoryFunctionValueCacheMEMORY(collections.abc.Mapping):
|
848
|
-
"""
|
849
|
-
Interface for caching trajectory function values in memory in a dict.
|
850
|
-
"""
|
851
|
-
|
852
|
-
def __init__(self, *args, **kwargs) -> None:
|
853
|
-
"""Initialize a `TrajectoryFunctionValueCacheMEMORY`."""
|
854
|
-
self._func_values_by_id = {}
|
855
|
-
|
856
|
-
def __len__(self) -> int:
|
857
|
-
return len(self._func_values_by_id)
|
858
|
-
|
859
|
-
def __iter__(self):
|
860
|
-
return self._func_values_by_id.__iter__()
|
861
|
-
|
862
|
-
def __getitem__(self, key: str) -> np.ndarray:
|
863
|
-
if not isinstance(key, str):
|
864
|
-
raise TypeError("Keys must be of type str.")
|
865
|
-
return self._func_values_by_id[key]
|
866
|
-
|
867
|
-
def append(self, func_id: str, vals: np.ndarray) -> None:
|
868
|
-
if not isinstance(func_id, str):
|
869
|
-
raise TypeError("func_id must be of type str.")
|
870
|
-
if func_id in self._func_values_by_id:
|
871
|
-
# first check if it already in there
|
872
|
-
raise ValueError("There are already values stored for func_id "
|
873
|
-
+ f"{func_id}. Changing the stored values is not "
|
874
|
-
+ "supported.")
|
875
|
-
self._func_values_by_id[func_id] = vals
|
876
|
-
|
877
|
-
|
878
|
-
class TrajectoryFunctionValueCacheNPZ(collections.abc.Mapping):
|
879
|
-
"""
|
880
|
-
Interface for caching trajectory function values in a numpy npz file.
|
881
|
-
|
882
|
-
Drop-in replacement for the dictionary that is used for in-memory caching.
|
883
|
-
"""
|
884
|
-
|
885
|
-
_hash_traj_npz_key = "hash_of_trajs" # key of hash_traj in npz file
|
886
|
-
|
887
|
-
# NOTE: this is written with the assumption that stored trajectories are
|
888
|
-
# immutable (except for adding additional stored function values)
|
889
|
-
# but we assume that the actual underlying trajectory stays the same,
|
890
|
-
# i.e. it is not extended after first storing it
|
891
|
-
# If it changes between two npz-cache initializiations, it will have
|
892
|
-
# a different traj-hash and all cached CV values will be recalculated
|
893
|
-
|
894
|
-
# NOTE: npz appending inspired by: https://stackoverflow.com/a/66618141
|
895
|
-
|
896
|
-
# NOTE/FIXME: It would be nice to use the MAX_FILES_OPEN semaphore
|
897
|
-
# but then we need async/await and then we need to go to a 'create'
|
898
|
-
# classmethod that is async and required for initialization
|
899
|
-
# (because __init__ cant be async)
|
900
|
-
# but since we (have to) open the npz file in the other magic methods
|
901
|
-
# too it does not really matter (as they can not be async either)?
|
902
|
-
# ...and as we also leave some room for non-semaphored file openings anyway
|
903
|
-
|
904
|
-
def __init__(self, fname_trajs: list[str], hash_traj: int) -> None:
|
905
|
-
"""
|
906
|
-
Initialize a `TrajectoryFunctionValueCacheNPZ`.
|
907
|
-
|
908
|
-
Parameters
|
909
|
-
----------
|
910
|
-
fname_trajs : list[str]
|
911
|
-
Absolute filenames to the trajectories for which we cache CV values.
|
912
|
-
hash_traj : int
|
913
|
-
Hash over the first part of the trajectory file,
|
914
|
-
used to make sure we cache only for the right trajectory
|
915
|
-
(and not any trajectories with the same filename).
|
916
|
-
"""
|
917
|
-
self.fname_npz = self._get_cache_filename(fname_trajs=fname_trajs,
|
918
|
-
trajectory_hash=hash_traj,
|
919
|
-
)
|
920
|
-
self._hash_traj = hash_traj
|
921
|
-
self._func_ids = []
|
922
|
-
# sort out if we have an associated npz file already
|
923
|
-
# and if it is from/for the "right" trajectory file
|
924
|
-
self._ensure_consistent_npz()
|
925
|
-
|
926
|
-
def _ensure_consistent_npz(self):
|
927
|
-
# next line makes sure we only remember func_ids from the current npz
|
928
|
-
self._func_ids = []
|
929
|
-
if not os.path.isfile(self.fname_npz):
|
930
|
-
# no npz so nothing to do except making sure we have no func_ids
|
931
|
-
return
|
932
|
-
existing_npz_matches = False
|
933
|
-
with np.load(self.fname_npz, allow_pickle=False) as npzfile:
|
934
|
-
try:
|
935
|
-
saved_hash_traj = npzfile[self._hash_traj_npz_key][0]
|
936
|
-
except KeyError:
|
937
|
-
# we probably tripped over an old formatted npz
|
938
|
-
# so we will just rewrite it completely with hash
|
939
|
-
pass
|
940
|
-
else:
|
941
|
-
# old hash found, lets compare the two hashes
|
942
|
-
existing_npz_matches = (self._hash_traj == saved_hash_traj)
|
943
|
-
if existing_npz_matches:
|
944
|
-
# if they do populate self with the func_ids we have
|
945
|
-
# cached values for
|
946
|
-
for k in npzfile.keys():
|
947
|
-
if k != self._hash_traj_npz_key:
|
948
|
-
self._func_ids.append(str(k))
|
949
|
-
# now if the old npz did not match we should remove it
|
950
|
-
# then we will rewrite it with the first cached CV values
|
951
|
-
if not existing_npz_matches:
|
952
|
-
logger.debug("Found existing npz file (%s) but the"
|
953
|
-
" trajectory hash does not match."
|
954
|
-
" Recreating the npz cache from scratch.",
|
955
|
-
self.fname_npz
|
956
|
-
)
|
957
|
-
os.unlink(self.fname_npz)
|
958
|
-
|
959
|
-
@classmethod
|
960
|
-
def _get_cache_filename(cls, fname_trajs: list[str],
|
961
|
-
trajectory_hash: int) -> str:
|
962
|
-
"""
|
963
|
-
Construct cachefilename from trajectory fname.
|
964
|
-
|
965
|
-
Parameters
|
966
|
-
----------
|
967
|
-
fname_trajs : list[str]
|
968
|
-
Path to the trajectory for which we cache.
|
969
|
-
trajectory_hash : int
|
970
|
-
Hash of the trajectory (files).
|
971
|
-
|
972
|
-
Returns
|
973
|
-
-------
|
974
|
-
str
|
975
|
-
Path to the cachefile associated with trajectory.
|
976
|
-
"""
|
977
|
-
head, tail = os.path.split(fname_trajs[0])
|
978
|
-
return os.path.join(head,
|
979
|
-
f".{tail}{'_MULTIPART' if len(fname_trajs) > 1 else ''}_asyncmd_cv_cache.npz"
|
980
|
-
)
|
981
|
-
|
982
|
-
def __len__(self) -> int:
|
983
|
-
return len(self._func_ids)
|
984
|
-
|
985
|
-
def __iter__(self):
|
986
|
-
for func_id in self._func_ids:
|
987
|
-
yield func_id
|
988
|
-
|
989
|
-
def __getitem__(self, key: str) -> np.ndarray:
|
990
|
-
if not isinstance(key, str):
|
991
|
-
raise TypeError("Keys must be of type str.")
|
992
|
-
if key in self._func_ids:
|
993
|
-
with np.load(self.fname_npz, allow_pickle=False) as npzfile:
|
994
|
-
return npzfile[key]
|
995
|
-
else:
|
996
|
-
raise KeyError(f"No values for {key} cached (yet).")
|
997
|
-
|
998
|
-
def append(self, func_id: str, vals: np.ndarray) -> None:
|
999
|
-
"""
|
1000
|
-
Append values for given func_id.
|
1001
|
-
|
1002
|
-
Parameters
|
1003
|
-
----------
|
1004
|
-
func_id : str
|
1005
|
-
Function identifier.
|
1006
|
-
vals : np.ndarray
|
1007
|
-
Values of application of function with given func_id.
|
1008
|
-
|
1009
|
-
Raises
|
1010
|
-
------
|
1011
|
-
TypeError
|
1012
|
-
If ``func_id`` is not a string.
|
1013
|
-
ValueError
|
1014
|
-
If there are already values stored for ``func_id`` in self.
|
1015
|
-
"""
|
1016
|
-
if not isinstance(func_id, str):
|
1017
|
-
raise TypeError("func_id must be of type str.")
|
1018
|
-
if func_id in self._func_ids:
|
1019
|
-
# first check if it already in there
|
1020
|
-
raise ValueError("There are already values stored for func_id "
|
1021
|
-
+ f"{func_id}. Changing the stored values is not "
|
1022
|
-
+ "supported.")
|
1023
|
-
if len(self) == 0:
|
1024
|
-
# these are the first cached CV values for this traj
|
1025
|
-
# so we just create the (empty) npz file
|
1026
|
-
np.savez(self.fname_npz)
|
1027
|
-
# and write the trajectory hash
|
1028
|
-
self._append_data_to_npz(name=self._hash_traj_npz_key,
|
1029
|
-
value=np.array([self._hash_traj]),
|
1030
|
-
)
|
1031
|
-
# now we can append either way
|
1032
|
-
# either already something cached, or freshly created empty file
|
1033
|
-
self._append_data_to_npz(name=func_id, value=vals)
|
1034
|
-
# add func_id to list of func_ids that we know are cached in npz
|
1035
|
-
self._func_ids.append(func_id)
|
1036
|
-
|
1037
|
-
def _append_data_to_npz(self, name: str, value: np.ndarray) -> None:
|
1038
|
-
# npz files are just zipped together collections of npy files
|
1039
|
-
# so we just make a npy file saved into a BytesIO and then write that
|
1040
|
-
# to the end of the npz file
|
1041
|
-
bio = io.BytesIO()
|
1042
|
-
np.save(bio, value)
|
1043
|
-
with zipfile.ZipFile(file=self.fname_npz,
|
1044
|
-
mode="a", # append!
|
1045
|
-
# uncompressed (but) zip archive member
|
1046
|
-
compression=zipfile.ZIP_STORED,
|
1047
|
-
) as zfile:
|
1048
|
-
zfile.writestr(f"{name}.npy", data=bio.getvalue())
|
1049
|
-
|
1050
|
-
|
1051
|
-
class TrajectoryFunctionValueCacheH5PY(collections.abc.Mapping):
|
1052
|
-
"""
|
1053
|
-
Interface for caching trajectory function values in a given h5py group.
|
1054
|
-
|
1055
|
-
Drop-in replacement for the dictionary that is used for in-memory caching.
|
1056
|
-
"""
|
1057
|
-
|
1058
|
-
# NOTE: this is written with the assumption that stored trajectories are
|
1059
|
-
# immutable (except for adding additional stored function values)
|
1060
|
-
# but we assume that the actual underlying trajectory stays the same,
|
1061
|
-
# i.e. it is not extended after first storing it
|
1062
|
-
|
1063
|
-
def __init__(self, h5py_cache, hash_traj: int):
|
1064
|
-
self.h5py_cache = h5py_cache
|
1065
|
-
self._hash_traj = hash_traj
|
1066
|
-
self._h5py_paths = {"ids": "FunctionIDs",
|
1067
|
-
"vals": "FunctionValues"
|
1068
|
-
}
|
1069
|
-
self._root_grp = h5py_cache.require_group(
|
1070
|
-
"asyncmd/"
|
1071
|
-
+ "TrajectoryFunctionValueCache/"
|
1072
|
-
+ f"{self._hash_traj}"
|
1073
|
-
)
|
1074
|
-
self._ids_grp = self._root_grp.require_group(self._h5py_paths["ids"])
|
1075
|
-
self._vals_grp = self._root_grp.require_group(self._h5py_paths["vals"])
|
1076
|
-
|
1077
|
-
def __len__(self):
|
1078
|
-
return len(self._ids_grp.keys())
|
1079
|
-
|
1080
|
-
def __iter__(self):
|
1081
|
-
for idx in range(len(self)):
|
1082
|
-
yield self._ids_grp[str(idx)].asstr()[()]
|
1083
|
-
|
1084
|
-
def __getitem__(self, key):
|
1085
|
-
if not isinstance(key, str):
|
1086
|
-
raise TypeError("Keys must be of type str.")
|
1087
|
-
for idx, k_val in enumerate(self):
|
1088
|
-
if key == k_val:
|
1089
|
-
return self._vals_grp[str(idx)][:]
|
1090
|
-
# if we got until here the key is not in there
|
1091
|
-
raise KeyError("Key not found.")
|
1092
|
-
|
1093
|
-
def append(self, func_id, vals):
|
1094
|
-
if not isinstance(func_id, str):
|
1095
|
-
raise TypeError("Keys (func_id) must be of type str.")
|
1096
|
-
if func_id in self:
|
1097
|
-
raise ValueError("There are already values stored for func_id "
|
1098
|
-
+ f"{func_id}. Changing the stored values is not "
|
1099
|
-
+ "supported.")
|
1100
|
-
# TODO: do we also want to check vals for type?
|
1101
|
-
name = str(len(self))
|
1102
|
-
_ = self._ids_grp.create_dataset(name, data=func_id)
|
1103
|
-
_ = self._vals_grp.create_dataset(name, data=vals)
|