sawnergy 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sawnergy might be problematic. Click here for more details.
- sawnergy/__init__.py +13 -0
- sawnergy/embedding/SGNS_pml.py +135 -0
- sawnergy/embedding/SGNS_torch.py +177 -0
- sawnergy/embedding/__init__.py +34 -0
- sawnergy/embedding/embedder.py +578 -0
- sawnergy/logging_util.py +54 -0
- sawnergy/rin/__init__.py +9 -0
- sawnergy/rin/rin_builder.py +936 -0
- sawnergy/rin/rin_util.py +391 -0
- sawnergy/sawnergy_util.py +1182 -0
- sawnergy/visual/__init__.py +42 -0
- sawnergy/visual/visualizer.py +690 -0
- sawnergy/visual/visualizer_util.py +387 -0
- sawnergy/walks/__init__.py +16 -0
- sawnergy/walks/walker.py +795 -0
- sawnergy/walks/walker_util.py +384 -0
- sawnergy-1.0.0.dist-info/METADATA +290 -0
- sawnergy-1.0.0.dist-info/RECORD +22 -0
- sawnergy-1.0.0.dist-info/WHEEL +5 -0
- sawnergy-1.0.0.dist-info/licenses/LICENSE +201 -0
- sawnergy-1.0.0.dist-info/licenses/NOTICE +4 -0
- sawnergy-1.0.0.dist-info/top_level.txt +1 -0
sawnergy/rin/rin_util.py
ADDED
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
# built-in
|
|
4
|
+
import logging
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
import os, shutil, subprocess
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
import re
|
|
9
|
+
# local
|
|
10
|
+
from .. import sawnergy_util
|
|
11
|
+
|
|
12
|
+
# *----------------------------------------------------*
|
|
13
|
+
# GLOBALS
|
|
14
|
+
# *----------------------------------------------------*
|
|
15
|
+
|
|
16
|
+
_logger = logging.getLogger(__name__)
|
|
17
|
+
PAIRWISE_STDOUT: CpptrajScript
|
|
18
|
+
COM_STDOUT: CpptrajScript
|
|
19
|
+
|
|
20
|
+
# *----------------------------------------------------*
|
|
21
|
+
# CLASSES
|
|
22
|
+
# *----------------------------------------------------*
|
|
23
|
+
|
|
24
|
+
class CpptrajNotFound(RuntimeError):
|
|
25
|
+
"""Error raised when a functional `cpptraj` executable cannot be found.
|
|
26
|
+
|
|
27
|
+
The exception message lists the candidate paths that were tried and gives a
|
|
28
|
+
brief hint on how to make `cpptraj` discoverable (install AmberTools, add
|
|
29
|
+
to PATH, or set the CPPTRAJ environment variable).
|
|
30
|
+
"""
|
|
31
|
+
def __init__(self, candidates: list[Path]):
|
|
32
|
+
"""Initialize the exception with the candidate paths.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
candidates: Ordered list of filesystem paths that were checked for a
|
|
36
|
+
working `cpptraj` executable.
|
|
37
|
+
"""
|
|
38
|
+
msg = (
|
|
39
|
+
"Could not locate a working `cpptraj` executable.\n"
|
|
40
|
+
f"Tried the following locations:\n" +
|
|
41
|
+
"\n".join(f" - {p}" for p in candidates) +
|
|
42
|
+
"\nEnsure that AmberTools is installed and `cpptraj` is on your PATH, "
|
|
43
|
+
"or set the CPPTRAJ environment variable to its location."
|
|
44
|
+
)
|
|
45
|
+
super().__init__(msg)
|
|
46
|
+
|
|
47
|
+
@dataclass(frozen=True)
|
|
48
|
+
class CpptrajScript:
|
|
49
|
+
"""Immutable builder for composing cpptraj input scripts.
|
|
50
|
+
|
|
51
|
+
Instances hold a tuple of command strings. You can:
|
|
52
|
+
- Append a command with `+ "cmd"`.
|
|
53
|
+
- Concatenate two scripts with `+ other_script`.
|
|
54
|
+
- Redirect the last command to a file with the overloaded `>` operator.
|
|
55
|
+
- Render the final script text with `render()`, which ensures a trailing
|
|
56
|
+
newline and injects a `run` command if one is not already present.
|
|
57
|
+
|
|
58
|
+
Attributes:
|
|
59
|
+
commands: Ordered tuple of cpptraj command lines (without trailing
|
|
60
|
+
newlines).
|
|
61
|
+
"""
|
|
62
|
+
commands: tuple[str] = field(default_factory=tuple)
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
def from_cmd(cls, cmd: str) -> CpptrajScript:
|
|
66
|
+
"""Create a script containing a single command.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
cmd: A single cpptraj command line (no trailing newline required).
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
CpptrajScript: A new script with exactly one command.
|
|
73
|
+
"""
|
|
74
|
+
return cls((cmd,))
|
|
75
|
+
|
|
76
|
+
def __add__(self, other: str | CpptrajScript) -> CpptrajScript:
|
|
77
|
+
"""Concatenate a command or another script.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
other: Either a command string to append as a new line, or another
|
|
81
|
+
`CpptrajScript` whose commands will be appended in order.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
CpptrajScript: A new script with `other` appended.
|
|
85
|
+
|
|
86
|
+
Raises:
|
|
87
|
+
TypeError: If `other` is not a `str` or `CpptrajScript`.
|
|
88
|
+
"""
|
|
89
|
+
if isinstance(other, str):
|
|
90
|
+
return CpptrajScript(self.commands + (other,))
|
|
91
|
+
elif isinstance(other, CpptrajScript):
|
|
92
|
+
return CpptrajScript(self.commands + other.commands)
|
|
93
|
+
else:
|
|
94
|
+
return NotImplemented
|
|
95
|
+
|
|
96
|
+
def __gt__(self, file_name: str | CpptrajScript) -> CpptrajScript: # >
|
|
97
|
+
"""Overload `>` to add an `out <file>` target to the last command.
|
|
98
|
+
|
|
99
|
+
If `file_name` is a string, append `out <file_name>` to the last command.
|
|
100
|
+
If `file_name` is a `CpptrajScript`, treat this as concatenation (same
|
|
101
|
+
effect as `self + file_name`).
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
file_name: Output filename to attach to the last command, or another
|
|
105
|
+
script to concatenate.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
CpptrajScript: A new script with modified/concatenated commands.
|
|
109
|
+
"""
|
|
110
|
+
if isinstance(file_name, CpptrajScript):
|
|
111
|
+
return self + file_name
|
|
112
|
+
else:
|
|
113
|
+
save_to = (self.commands[-1] + f" out {file_name}",)
|
|
114
|
+
return CpptrajScript(self.commands[:-1] + save_to)
|
|
115
|
+
|
|
116
|
+
def render(self) -> str:
|
|
117
|
+
"""Render the script to text, auto-inserting `run` if missing.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
str: The full script text joined by newlines. If no `run` appears in
|
|
121
|
+
`commands`, a `run` line (plus a trailing blank line) is added.
|
|
122
|
+
"""
|
|
123
|
+
commands = self.commands + ("",) if "run" in self.commands else self.commands + ("run", "")
|
|
124
|
+
return "\n".join(commands)
|
|
125
|
+
|
|
126
|
+
PAIRWISE_STDOUT = CpptrajScript((
|
|
127
|
+
"run",
|
|
128
|
+
"printdata PW[EMAP] square2d noheader",
|
|
129
|
+
"printdata PW[VMAP] square2d noheader"
|
|
130
|
+
))
|
|
131
|
+
|
|
132
|
+
COM_STDOUT = lambda mol_id: CpptrajScript((
|
|
133
|
+
"run",
|
|
134
|
+
f"for residues R inmask ^{mol_id} i=1;i++",
|
|
135
|
+
"dataset legend $R COM$i",
|
|
136
|
+
"dataset vectorcoord X COM$i name COMX$i",
|
|
137
|
+
"dataset vectorcoord Y COM$i name COMY$i",
|
|
138
|
+
"dataset vectorcoord Z COM$i name COMZ$i",
|
|
139
|
+
"done",
|
|
140
|
+
"printdata COMX* COMY* COMZ*"
|
|
141
|
+
))
|
|
142
|
+
|
|
143
|
+
# *----------------------------------------------------*
|
|
144
|
+
# FUNCTIONS
|
|
145
|
+
# *----------------------------------------------------*
|
|
146
|
+
|
|
147
|
+
# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= #
|
|
148
|
+
# WRAPPERS AND HELPERS FOR THE CPPTRAJ EXECUTABLE
|
|
149
|
+
# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= #
|
|
150
|
+
|
|
151
|
+
def locate_cpptraj(explicit: Path | None = None, verify: bool = True) -> str:
|
|
152
|
+
"""Locate a working `cpptraj` executable.
|
|
153
|
+
|
|
154
|
+
This function attempts to resolve the path to the `cpptraj` binary used in
|
|
155
|
+
AmberTools. It searches for `cpptraj` in the following order:
|
|
156
|
+
|
|
157
|
+
1. An explicitly provided path.
|
|
158
|
+
2. The `CPPTRAJ` environment variable.
|
|
159
|
+
3. System PATH (via `shutil.which`).
|
|
160
|
+
4. The `AMBERHOME/bin` directory.
|
|
161
|
+
5. The `CONDA_PREFIX/bin` directory.
|
|
162
|
+
|
|
163
|
+
Each candidate path is checked for existence and executability. If
|
|
164
|
+
`verify=True`, the function also probes the binary with the `-h` flag
|
|
165
|
+
to ensure it responds correctly.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
explicit (Path | None): An explicit path to the `cpptraj` executable.
|
|
169
|
+
If provided, this is the first candidate tested.
|
|
170
|
+
verify (bool): If True, run `cpptraj -h` to confirm functionality
|
|
171
|
+
of the executable. If False, only existence and executability
|
|
172
|
+
are checked. Defaults to True.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
str: The absolute path to a verified `cpptraj` executable.
|
|
176
|
+
|
|
177
|
+
Raises:
|
|
178
|
+
CpptrajNotFound: If no functional `cpptraj` instance can be located.
|
|
179
|
+
subprocess.TimeoutExpired: If the `cpptraj -h` verification command
|
|
180
|
+
exceeds the timeout limit.
|
|
181
|
+
"""
|
|
182
|
+
_logger.info("Attempting to locate a `cpptraj` executable")
|
|
183
|
+
|
|
184
|
+
if explicit is not None: _logger.info(f"An explicit path was provided: {explicit.resolve()}")
|
|
185
|
+
else: _logger.info("No explicit path was provided")
|
|
186
|
+
|
|
187
|
+
candidates = []
|
|
188
|
+
if explicit: candidates.append(Path(explicit))
|
|
189
|
+
|
|
190
|
+
if os.getenv("CPPTRAJ"): candidates.append(Path(os.getenv("CPPTRAJ")))
|
|
191
|
+
|
|
192
|
+
for name in ("cpptraj", "cpptraj.exe"):
|
|
193
|
+
exe = shutil.which(name)
|
|
194
|
+
if exe: candidates.append(Path(exe))
|
|
195
|
+
|
|
196
|
+
if os.getenv("AMBERHOME"): candidates.append(Path(os.getenv("AMBERHOME")) / "bin" / "cpptraj")
|
|
197
|
+
if os.getenv("CONDA_PREFIX"): candidates.append(Path(os.getenv("CONDA_PREFIX")) / "bin" / "cpptraj")
|
|
198
|
+
|
|
199
|
+
_logger.info(f"Checking the following paths for cpptraj presence: {candidates}")
|
|
200
|
+
for p in candidates:
|
|
201
|
+
if p and p.exists() and os.access(p, os.X_OK):
|
|
202
|
+
_logger.info(f"Found a `cpptraj` instance at {p}")
|
|
203
|
+
|
|
204
|
+
if not verify:
|
|
205
|
+
_logger.info(f"No verification was prompted. Returning the path {p}")
|
|
206
|
+
return str(p.resolve())
|
|
207
|
+
|
|
208
|
+
_logger.info("Attempting to verify that it works")
|
|
209
|
+
try:
|
|
210
|
+
# cpptraj -h prints a help message
|
|
211
|
+
proc = subprocess.run([str(p), "-h"], capture_output=True, text=True, timeout=15) # 15 sec timeout
|
|
212
|
+
except subprocess.TimeoutExpired:
|
|
213
|
+
_logger.warning(f"The instance at {p} hung during verification (timeout). Skipping.")
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
if proc.returncode in (0, 1):
|
|
217
|
+
_logger.info(f"The instance is functional. Returning the path {p}")
|
|
218
|
+
return str(p.resolve())
|
|
219
|
+
else:
|
|
220
|
+
_logger.warning(f"The instance is not functional: {proc.stderr}")
|
|
221
|
+
|
|
222
|
+
_logger.error(f"No functional `cpptraj` instance was found")
|
|
223
|
+
raise CpptrajNotFound(candidates)
|
|
224
|
+
|
|
225
|
+
def run_cpptraj(cpptraj: str,
|
|
226
|
+
script: str | None = None,
|
|
227
|
+
argv: list[str] | None = None,
|
|
228
|
+
timeout: float | None = None,
|
|
229
|
+
*,
|
|
230
|
+
env: dict | None = None):
|
|
231
|
+
"""Run `cpptraj` and return its standard output.
|
|
232
|
+
|
|
233
|
+
If `script` text is provided, it is sent to cpptraj via STDIN. A trailing
|
|
234
|
+
`quit` line is appended automatically if the script does not already end
|
|
235
|
+
with one. Alternatively, you can pass command-line arguments via `argv`
|
|
236
|
+
(e.g., `["-i", "script.in"]`) and leave `script=None`.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
cpptraj: Path to the `cpptraj` executable.
|
|
240
|
+
script: Complete cpptraj script to feed on STDIN. If not `None` and not
|
|
241
|
+
already terminated by `quit`, the function appends `quit\\n`.
|
|
242
|
+
argv: Additional command-line arguments to pass to `cpptraj`.
|
|
243
|
+
timeout: Maximum wall time in seconds for the subprocess.
|
|
244
|
+
env: Environment variables for the child process. Values must be strings.
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
str: Captured `stdout` produced by `cpptraj`.
|
|
248
|
+
|
|
249
|
+
Raises:
|
|
250
|
+
subprocess.CalledProcessError: If cpptraj exits with a non-zero status.
|
|
251
|
+
Exception: For unexpected errors during subprocess execution.
|
|
252
|
+
"""
|
|
253
|
+
if script is not None:
|
|
254
|
+
if not script.rstrip().lower().endswith("quit"):
|
|
255
|
+
script = script + "quit\n"
|
|
256
|
+
|
|
257
|
+
args = [cpptraj] + (argv or [])
|
|
258
|
+
try:
|
|
259
|
+
_logger.debug(f"Running cpptraj command: {script} with args: {args}")
|
|
260
|
+
proc = subprocess.run(
|
|
261
|
+
args,
|
|
262
|
+
input=script,
|
|
263
|
+
text=True,
|
|
264
|
+
capture_output=True,
|
|
265
|
+
check=True,
|
|
266
|
+
timeout=timeout,
|
|
267
|
+
env=env
|
|
268
|
+
)
|
|
269
|
+
return proc.stdout
|
|
270
|
+
except subprocess.CalledProcessError as e:
|
|
271
|
+
stderr = (e.stderr or "").strip()
|
|
272
|
+
_logger.error(f"Cpptraj execution failed (code {e.returncode}). Stderr:\n{stderr}")
|
|
273
|
+
raise
|
|
274
|
+
except Exception as e:
|
|
275
|
+
_logger.error(f"Unexpected error while running cpptraj: {e}")
|
|
276
|
+
raise
|
|
277
|
+
|
|
278
|
+
# *----------------------------------------------------*
|
|
279
|
+
# CPPTRAJ OUTPUT PARSERS
|
|
280
|
+
# *----------------------------------------------------*
|
|
281
|
+
|
|
282
|
+
# MD ITEMS (atoms, residues, molecules)
|
|
283
|
+
class CpptrajMaskParser:
|
|
284
|
+
"""Namespace container for cpptraj mask table parsing helpers."""
|
|
285
|
+
__slots__ = () # no instances allowed
|
|
286
|
+
|
|
287
|
+
# ---------- REGEX ----------
|
|
288
|
+
_spaces_pattern = re.compile(r"\s+")
|
|
289
|
+
_items_pattern = re.compile(r"\[(\w+)\]") # captures fields of interest: [AtNum], [Rnum], [Mnum]
|
|
290
|
+
|
|
291
|
+
# --------- HELPERS ---------
|
|
292
|
+
@staticmethod
|
|
293
|
+
def _id2item_map(header: str) -> dict[str, int]:
|
|
294
|
+
"""Map column names found in bracket tokens in `header` to their 0-based indices."""
|
|
295
|
+
cols = CpptrajMaskParser._items_pattern.findall(header)
|
|
296
|
+
return {name: i for i, name in enumerate(cols)}
|
|
297
|
+
|
|
298
|
+
@staticmethod
|
|
299
|
+
def _collapse_spaces(s: str) -> str:
|
|
300
|
+
"""Normalize all whitespace runs to a single space and strip ends."""
|
|
301
|
+
return CpptrajMaskParser._spaces_pattern.sub(" ", s).strip()
|
|
302
|
+
|
|
303
|
+
@staticmethod
|
|
304
|
+
def _get_row_items(row: str, header_map: dict[str, int]) -> tuple[int, int, int]:
|
|
305
|
+
"""Extract molecule/residue/atom IDs from a data row using the header map."""
|
|
306
|
+
items = CpptrajMaskParser._collapse_spaces(row).split()
|
|
307
|
+
try:
|
|
308
|
+
return (
|
|
309
|
+
int(items[header_map["Mnum"]]),
|
|
310
|
+
int(items[header_map["Rnum"]]),
|
|
311
|
+
int(items[header_map["AtNum"]]),
|
|
312
|
+
)
|
|
313
|
+
except KeyError as ke:
|
|
314
|
+
raise ValueError(f"Required column missing in header: {ke}") from ke
|
|
315
|
+
except IndexError as ie:
|
|
316
|
+
raise ValueError(f"Row has fewer fields than expected: {row!r}") from ie
|
|
317
|
+
|
|
318
|
+
# --------- PUBLIC ----------
|
|
319
|
+
@staticmethod
|
|
320
|
+
def hierarchize_molecular_composition(mol_compositions_file: str) -> dict[int, dict[int, set[int]]]:
|
|
321
|
+
"""
|
|
322
|
+
Build {molecule_id: {residue_id: {atom_id, ...}, ...}} from a cpptraj mask table.
|
|
323
|
+
|
|
324
|
+
Assumes the file's header line contains bracketed column labels (e.g., [AtNum], [Rnum], [Mnum]).
|
|
325
|
+
|
|
326
|
+
Args:
|
|
327
|
+
mol_compositions_file: Path to a text file produced by cpptraj that
|
|
328
|
+
lists atoms with bracketed header tokens identifying molecule,
|
|
329
|
+
residue, and atom indices.
|
|
330
|
+
|
|
331
|
+
Returns:
|
|
332
|
+
dict[int, dict[int, set[int]]]: Nested mapping from molecule ID to
|
|
333
|
+
residue ID to the set of atom IDs.
|
|
334
|
+
|
|
335
|
+
Raises:
|
|
336
|
+
RuntimeError: If the input file is empty.
|
|
337
|
+
ValueError: If required columns are missing or a row is malformed.
|
|
338
|
+
"""
|
|
339
|
+
lines = sawnergy_util.read_lines(mol_compositions_file, skip_header=False)
|
|
340
|
+
if not lines:
|
|
341
|
+
raise RuntimeError(f"0 lines were read from {mol_compositions_file}")
|
|
342
|
+
|
|
343
|
+
header = lines[0]
|
|
344
|
+
header_map = CpptrajMaskParser._id2item_map(header)
|
|
345
|
+
|
|
346
|
+
required = {"Mnum", "Rnum", "AtNum"}
|
|
347
|
+
missing = required.difference(header_map)
|
|
348
|
+
if missing:
|
|
349
|
+
raise ValueError(f"Missing required columns in header: {sorted(missing)}")
|
|
350
|
+
|
|
351
|
+
hierarchy: dict[int, dict[int, set[int]]] = {}
|
|
352
|
+
|
|
353
|
+
for line in lines[1:]:
|
|
354
|
+
if not line.strip():
|
|
355
|
+
continue
|
|
356
|
+
molecule_id, residue_id, atom_id = CpptrajMaskParser._get_row_items(line, header_map)
|
|
357
|
+
|
|
358
|
+
residues = hierarchy.setdefault(molecule_id, {})
|
|
359
|
+
atoms = residues.setdefault(residue_id, set())
|
|
360
|
+
atoms.add(atom_id)
|
|
361
|
+
|
|
362
|
+
return hierarchy
|
|
363
|
+
|
|
364
|
+
# CENTER OF THE MASS
|
|
365
|
+
def com_parser(line: str) -> str:
|
|
366
|
+
"""Parse a cpptraj `center of mass` output line into CSV format.
|
|
367
|
+
|
|
368
|
+
The input line is expected to contain seven whitespace-separated fields:
|
|
369
|
+
`frame x y z vx vy vz` (velocity fields ignored here). The function emits
|
|
370
|
+
a CSV string with the first four values: `frame,x,y,z\\n`.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
line: A single line from cpptraj's COM output.
|
|
374
|
+
|
|
375
|
+
Returns:
|
|
376
|
+
str: A CSV-formatted line containing `frame,x,y,z` and a trailing newline.
|
|
377
|
+
|
|
378
|
+
Raises:
|
|
379
|
+
ValueError: If the input line does not contain at least four fields.
|
|
380
|
+
"""
|
|
381
|
+
frame, x, y, z, _, _, _= line.split()
|
|
382
|
+
return f"{frame},{x},{y},{z}\n"
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
__all__ = [
|
|
386
|
+
"run_cpptraj",
|
|
387
|
+
"CpptrajScript"
|
|
388
|
+
]
|
|
389
|
+
|
|
390
|
+
if __name__ == "__main__":
|
|
391
|
+
pass
|