asyncmd 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
asyncmd/__init__.py ADDED
@@ -0,0 +1,18 @@
1
+ # This file is part of asyncmd.
2
+ #
3
+ # asyncmd is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # asyncmd is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with asyncmd. If not, see <https://www.gnu.org/licenses/>.
15
+ from ._version import __version__, __git_hash__
16
+
17
+ from . import config
18
+ from .trajectory.trajectory import Trajectory
asyncmd/_config.py ADDED
@@ -0,0 +1,26 @@
1
+ # This file is part of asyncmd.
2
+ #
3
+ # asyncmd is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # asyncmd is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with asyncmd. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+
17
+ # NOTE: This file **only** contains the dictionaries with the values
18
+ # and **no** functions to set them, the funcs all live in 'config.py'.
19
+ # The idea here is that we can then without any issues import additional
20
+ # stuff (like the config functions from 'slurm.py') in 'config.py'
21
+ # without risking circular imports becasue all asyncmd files should only
22
+ # need to import the _CONFIG and _SEMAPHORES dicts from '_config.py'.
23
+
24
+
25
+ _GLOBALS = {}
26
+ _SEMAPHORES = {}
asyncmd/_version.py ADDED
@@ -0,0 +1,75 @@
1
+ # This file is part of asyncmd.
2
+ #
3
+ # asyncmd is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # asyncmd is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with asyncmd. If not, see <https://www.gnu.org/licenses/>.
15
+ import os
16
+ import subprocess
17
+
18
+
19
+ def _get_version_from_pyproject():
20
+ """Get version string from pyproject.toml file."""
21
+ pyproject_toml = os.path.join(os.path.dirname(__file__),
22
+ "../../pyproject.toml")
23
+ with open(pyproject_toml) as f:
24
+ line = f.readline()
25
+ while line:
26
+ if line.startswith("version ="):
27
+ version_line = line
28
+ break
29
+ line = f.readline()
30
+ version = version_line.strip().split(" = ")[1]
31
+ version = version.replace('"', '').replace("'", "")
32
+ return version
33
+
34
+
35
+ def _get_git_hash_and_tag():
36
+ """Get git hash, date, and tag from git log."""
37
+ git_hash = ""
38
+ git_date = ""
39
+ git_tag = ""
40
+ p = subprocess.Popen(
41
+ ["git", "log", "-1", "--format='%H || %as || %(describe:tags=true,match=v*)'"],
42
+ stdout=subprocess.PIPE,
43
+ stderr=subprocess.PIPE,
44
+ cwd=os.path.dirname(__file__),
45
+ )
46
+ stdout, stderr = p.communicate()
47
+ if p.returncode == 0:
48
+ git_hash, git_date, git_describe = (stdout.decode("utf-8")
49
+ .replace("'", "").replace('"', '')
50
+ .strip().split("||"))
51
+ git_date = git_date.strip().replace("-", "")
52
+ git_describe = git_describe.strip()
53
+ if "-" not in git_describe and git_describe != "":
54
+ # git-describe returns either the git-tag or (if we are not exactly
55
+ # at a tag) something like
56
+ # $GITTAG-$NUM_COMMITS_DISTANCE-$CURRENT_COMMIT_HASH
57
+ git_tag = git_describe[1:] # strip of the 'v'
58
+ return git_hash, git_date, git_tag
59
+
60
+ try:
61
+ _version = _get_version_from_pyproject()
62
+ except FileNotFoundError:
63
+ # pyproject.toml not found
64
+ import importlib.metadata
65
+ __version__ = importlib.metadata.version("asyncmd")
66
+ __git_hash__ = ""
67
+ else:
68
+ _git_hash, _git_date, _git_tag = _get_git_hash_and_tag()
69
+ __git_hash__ = _git_hash
70
+ if _version == _git_tag or _git_hash == "":
71
+ # dont append git_hash to version, if it is a version-tagged commit or if
72
+ # git_hash is empty (happens if git is installed but we are not in a repo)
73
+ __version__ = _version
74
+ else:
75
+ __version__ = _version + f"+git{_git_date}.{_git_hash[:7]}"
asyncmd/config.py ADDED
@@ -0,0 +1,203 @@
1
+ # This file is part of asyncmd.
2
+ #
3
+ # asyncmd is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # asyncmd is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with asyncmd. If not, see <https://www.gnu.org/licenses/>.
15
+ import os
16
+ import asyncio
17
+ import logging
18
+ import resource
19
+ import typing
20
+
21
+
22
+ from ._config import _GLOBALS, _SEMAPHORES
23
+ from .slurm import set_slurm_settings, set_all_slurm_settings
24
+ # TODO: Do we want to set the _GLOBALS defaults here? E.g. CACHE_TYPE="npz"?
25
+
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ # can be called by the user to (re) set maximum number of processes used
31
+ def set_max_process(num=None, max_num=None):
32
+ """
33
+ Set the maximum number of concurrent python processes.
34
+
35
+ If num is None, default to os.cpu_count() / 4.
36
+
37
+ Parameters
38
+ ----------
39
+ num : int, optional
40
+ Number of processes, if None will default to 1/4 of the CPU count.
41
+ max_num : int, optional
42
+ If given the number of processes can not exceed this number independent
43
+ of the value of CPU count. Useful mostly for code that runs on multiple
44
+ different machines (with different CPU counts) but still wants to avoid
45
+ spawning hundreds of processes.
46
+ """
47
+ # NOTE: I think we should use a conservative default, e.g. 0.25*cpu_count()
48
+ # TODO: limit to 30-40?, i.e never higher even if we have 1111 cores?
49
+ global _SEMAPHORES
50
+ if num is None:
51
+ logical_cpu_count = os.cpu_count()
52
+ if logical_cpu_count is not None:
53
+ num = int(logical_cpu_count / 4)
54
+ else:
55
+ # fallback if os.cpu_count() can not determine the number of cpus
56
+ # play it save and not have more than 2?
57
+ # TODO: think about a good number!
58
+ num = 2
59
+ if max_num is not None:
60
+ num = min((num, max_num))
61
+ _SEMAPHORES["MAX_PROCESS"] = asyncio.BoundedSemaphore(num)
62
+
63
+
64
+ set_max_process()
65
+
66
+
67
+ def set_max_files_open(num: typing.Optional[int] = None, margin: int = 30):
68
+ """
69
+ Set the maximum number of concurrently opened files.
70
+
71
+ By default use the systems soft resource limit.
72
+
73
+ Parameters
74
+ ----------
75
+ num : int, optional
76
+ Maximum number of open files, if None use systems (soft) resourcelimit,
77
+ by default None
78
+ margin : int, optional
79
+ Safe margin to keep, i.e. we will only ever open `num - margin` files,
80
+ by default 30
81
+
82
+ Raises
83
+ ------
84
+ ValueError
85
+ If num <= margin.
86
+ """
87
+ # ensure that we do not open too many files
88
+ # resource.getrlimit returns a tuple (soft, hard); we take the soft-limit
89
+ # and to be sure 30 less (the reason beeing that we can not use the
90
+ # semaphores from non-async code, but sometimes use the sync subprocess.run
91
+ # and subprocess.check_call [which also need files/pipes to work])
92
+ # also maybe we need other open files like a storage :)
93
+ global _SEMAPHORES
94
+ rlim_soft = resource.getrlimit(resource.RLIMIT_NOFILE)[0]
95
+ if num is None:
96
+ num = rlim_soft
97
+ elif num > rlim_soft:
98
+ logger.warning("Passed a wanted number of open files that is larger "
99
+ "than the systems soft resource limit (%d > %d). "
100
+ "Will be using num=%d instead. To set a higher number "
101
+ "increase your systems limit on the number of open "
102
+ "files and call this function again.",
103
+ num, rlim_soft, rlim_soft,
104
+ )
105
+ num = rlim_soft
106
+ if num - margin <= 0:
107
+ raise ValueError("num must be larger than margin."
108
+ f"Was num={num}, margin={margin}."
109
+ )
110
+ # NOTE: Each MAX_FILES_OPEN semaphore counts for 3 open files!
111
+ # The reason is that we open 3 files at the same time for each
112
+ # subprocess (stdin, stdout, stderr), but semaphores can only be
113
+ # decreased (awaited) once at a time. The problem with just awaiting
114
+ # it three times in a row is that we can get deadlocked by getting
115
+ # 1-2 semaphores and waiting for the next (last) semaphore in all
116
+ # threads. The problem is that this semaphore will never be freed
117
+ # without any process getting a semaphore...
118
+ semaval = int((num - margin) / 3)
119
+ _SEMAPHORES["MAX_FILES_OPEN"] = asyncio.BoundedSemaphore(semaval)
120
+
121
+
122
+ set_max_files_open()
123
+
124
+
125
+ # SLURM semaphore stuff:
126
+ # TODO: move this to slurm.py? and initialize only if slurm is available?
127
+ # slurm max job semaphore, if the user sets it it will be used,
128
+ # otherwise we can use an unlimited number of syncronous slurm-jobs
129
+ # (if the simulation requires that much)
130
+ # TODO: document that somewhere, bc usually clusters have a job number limit?!
131
+ def set_slurm_max_jobs(num: typing.Union[int, None]):
132
+ """
133
+ Set the maximum number of simultaneously submitted SLURM jobs.
134
+
135
+ Parameters
136
+ ----------
137
+ num : int or None
138
+ The maximum number of simultaneous SLURM jobs for this invocation of
139
+ python/asyncmd. `None` means do not limit the maximum number of jobs.
140
+ """
141
+ global _SEMAPHORES
142
+ if num is None:
143
+ _SEMAPHORES["SLURM_MAX_JOB"] = None
144
+ else:
145
+ _SEMAPHORES["SLURM_MAX_JOB"] = asyncio.BoundedSemaphore(num)
146
+
147
+
148
+ set_slurm_max_jobs(num=None)
149
+
150
+
151
+ # Trajectory function value config
152
+ def set_default_trajectory_cache_type(cache_type: str):
153
+ """
154
+ Set the default cache type for TrajectoryFunctionValues.
155
+
156
+ Note that this can be overwritten on a per trajectory basis by passing
157
+ ``cache_type`` to ``Trajectory.__init__``.
158
+
159
+ Parameters
160
+ ----------
161
+ cache_type : str
162
+ One of "h5py", "npz", "memory".
163
+
164
+ Raises
165
+ ------
166
+ ValueError
167
+ Raised if ``cache_type`` is not one of the allowed values.
168
+ """
169
+ global _GLOBALS
170
+ allowed_values = ["h5py", "npz", "memory"]
171
+ cache_type = cache_type.lower()
172
+ if cache_type not in allowed_values:
173
+ raise ValueError(f"Given cache type must be one of {allowed_values}."
174
+ + f" Was: {cache_type}.")
175
+ _GLOBALS["TRAJECTORY_FUNCTION_CACHE_TYPE"] = cache_type
176
+
177
+
178
+ def register_h5py_cache(h5py_group, make_default: bool = False):
179
+ """
180
+ Register a h5py file or group for CV value caching.
181
+
182
+ Note that this also sets the default cache type to "h5py", i.e. it calls
183
+ :func:`set_default_trajectory_cache_type` with ``cache_type="h5py"``.
184
+
185
+ Note that a ``h5py.File`` is just a slightly special ``h5py.Group``, so you
186
+ can pass either. :mod:`asyncmd` will use euther the file or the group as
187
+ the root of its own stored values.
188
+ E.g. you will have ``h5py_group["asyncmd/TrajectoryFunctionValueCache"]``
189
+ always pointing to the cached trajectory values and if ``h5py_group`` is
190
+ the top-level group (i.e. the file) you also have ``(file["/asyncmd/TrajectoryFunctionValueCache"] == h5py_group["asyncmd/TrajectoryFunctionValueCache"])``.
191
+
192
+ Parameters
193
+ ----------
194
+ h5py_group : h5py.Group or h5py.File
195
+ The file or group to use for caching.
196
+ make_default: bool,
197
+ Whether we should also make "h5py" the default trajectory function
198
+ cache type. By default False.
199
+ """
200
+ global _GLOBALS
201
+ if make_default:
202
+ set_default_trajectory_cache_type(cache_type="h5py")
203
+ _GLOBALS["H5PY_CACHE"] = h5py_group
@@ -0,0 +1,16 @@
1
+ # This file is part of asyncmd.
2
+ #
3
+ # asyncmd is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # asyncmd is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with asyncmd. If not, see <https://www.gnu.org/licenses/>.
15
+ from .mdconfig import MDP
16
+ from .mdengine import GmxEngine, SlurmGmxEngine