bentopy 0.2.0a10__cp313-cp313-manylinux_2_34_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bentopy-0.2.0a10.data/scripts/bentopy-init +0 -0
- bentopy-0.2.0a10.data/scripts/bentopy-pack +0 -0
- bentopy-0.2.0a10.data/scripts/bentopy-render +0 -0
- bentopy-0.2.0a10.data/scripts/bentopy-solvate +0 -0
- bentopy-0.2.0a10.dist-info/METADATA +358 -0
- bentopy-0.2.0a10.dist-info/RECORD +58 -0
- bentopy-0.2.0a10.dist-info/WHEEL +5 -0
- bentopy-0.2.0a10.dist-info/entry_points.txt +4 -0
- bentopy-0.2.0a10.dist-info/licenses/LICENSE.txt +13 -0
- bentopy-0.2.0a10.dist-info/top_level.txt +8 -0
- check/check.py +128 -0
- core/config/bent/lexer.rs +338 -0
- core/config/bent/parser.rs +1180 -0
- core/config/bent/writer.rs +205 -0
- core/config/bent.rs +149 -0
- core/config/compartment_combinations.rs +300 -0
- core/config/legacy.rs +768 -0
- core/config.rs +362 -0
- core/mod.rs +4 -0
- core/placement.rs +100 -0
- core/utilities.rs +1 -0
- core/version.rs +32 -0
- init/example.bent +74 -0
- init/main.rs +235 -0
- mask/config.py +153 -0
- mask/mask.py +308 -0
- mask/utilities.py +38 -0
- merge/merge.py +175 -0
- pack/args.rs +77 -0
- pack/main.rs +121 -0
- pack/mask.rs +940 -0
- pack/session.rs +176 -0
- pack/state/combinations.rs +31 -0
- pack/state/compartment.rs +44 -0
- pack/state/mask.rs +196 -0
- pack/state/pack.rs +187 -0
- pack/state/segment.rs +72 -0
- pack/state/space.rs +98 -0
- pack/state.rs +440 -0
- pack/structure.rs +185 -0
- pack/voxelize.rs +85 -0
- render/args.rs +109 -0
- render/limits.rs +73 -0
- render/main.rs +12 -0
- render/render.rs +393 -0
- render/structure.rs +264 -0
- solvate/args.rs +324 -0
- solvate/convert.rs +25 -0
- solvate/cookies.rs +185 -0
- solvate/main.rs +177 -0
- solvate/placement.rs +380 -0
- solvate/solvate.rs +244 -0
- solvate/structure.rs +160 -0
- solvate/substitute.rs +113 -0
- solvate/water/martini.rs +409 -0
- solvate/water/models.rs +150 -0
- solvate/water/tip3p.rs +658 -0
- solvate/water.rs +115 -0
render/render.rs
ADDED
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
use std::io::{self, Read};
|
|
2
|
+
use std::path::PathBuf;
|
|
3
|
+
|
|
4
|
+
use anyhow::{Context, Result};
|
|
5
|
+
use bentopy::core::placement::{Placement, PlacementList};
|
|
6
|
+
use bentopy::core::utilities::CLEAR_LINE;
|
|
7
|
+
use glam::Vec3;
|
|
8
|
+
use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
|
|
9
|
+
|
|
10
|
+
use crate::args::{Args, Mode, ResnumMode};
|
|
11
|
+
use crate::limits::Limits;
|
|
12
|
+
use crate::structure::{self, Atom, Molecule, rotate_molecule};
|
|
13
|
+
|
|
14
|
+
/// Read a placement list to return [`Placements`].
|
|
15
|
+
fn read_placement_list(
|
|
16
|
+
placement_list: &str,
|
|
17
|
+
root: Option<PathBuf>,
|
|
18
|
+
) -> serde_json::Result<PlacementList> {
|
|
19
|
+
let mut placements: PlacementList = serde_json::from_str(placement_list)?;
|
|
20
|
+
if let Some(root) = root {
|
|
21
|
+
for p in &mut placements.placements {
|
|
22
|
+
if p.path.is_relative() {
|
|
23
|
+
let mut path = root.clone();
|
|
24
|
+
path.push(&p.path);
|
|
25
|
+
p.path = path;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
Ok(placements)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
impl Mode {
|
|
33
|
+
/// Based on the [`Mode`], prepare the list of molecules.
|
|
34
|
+
///
|
|
35
|
+
/// If necessary, a molecule is loaded from its structure path, centered, possible its atoms
|
|
36
|
+
/// are filtered down to a subset, or its residues or the whole molecule itself is represented
|
|
37
|
+
/// by single particles.
|
|
38
|
+
///
|
|
39
|
+
/// The goal is to do as little work as necessary for `Mode`s that aim to make lightweight
|
|
40
|
+
/// representations for inspection.
|
|
41
|
+
fn prepare_molecules(
|
|
42
|
+
&self,
|
|
43
|
+
placements: &[Placement],
|
|
44
|
+
ignore_tags: bool,
|
|
45
|
+
verbose: bool,
|
|
46
|
+
) -> Result<Vec<Molecule>> {
|
|
47
|
+
let load_molecule = |path| -> Result<_> {
|
|
48
|
+
let prefix = if verbose { "" } else { CLEAR_LINE };
|
|
49
|
+
let suffix = if verbose { "\n" } else { "\r" };
|
|
50
|
+
eprint!("{prefix}\tLoading {path:?}...{suffix}");
|
|
51
|
+
let mut molecule = structure::load_molecule(path)?;
|
|
52
|
+
molecule.translate_to_center();
|
|
53
|
+
Ok(molecule)
|
|
54
|
+
};
|
|
55
|
+
let apply_tag = |molecule: &mut Molecule, tag: Option<&str>| {
|
|
56
|
+
if !ignore_tags && let Some(tag) = tag {
|
|
57
|
+
molecule.apply_tag(tag)
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
match self {
|
|
62
|
+
Mode::Full => placements
|
|
63
|
+
.iter()
|
|
64
|
+
.map(|p| {
|
|
65
|
+
let mut molecule = load_molecule(&p.path)?;
|
|
66
|
+
apply_tag(&mut molecule, p.tag());
|
|
67
|
+
Ok(molecule)
|
|
68
|
+
})
|
|
69
|
+
.collect(),
|
|
70
|
+
Mode::Backbone => placements
|
|
71
|
+
.iter()
|
|
72
|
+
.map(|p| {
|
|
73
|
+
let mut molecule = load_molecule(&p.path)?;
|
|
74
|
+
apply_tag(&mut molecule, p.tag());
|
|
75
|
+
molecule
|
|
76
|
+
.atoms
|
|
77
|
+
.retain(|a| ["N", "CA", "C", "O"].contains(&a.name.as_str()));
|
|
78
|
+
Ok(molecule)
|
|
79
|
+
})
|
|
80
|
+
.collect(),
|
|
81
|
+
Mode::Alpha => placements
|
|
82
|
+
.iter()
|
|
83
|
+
.map(|p| {
|
|
84
|
+
let mut molecule = load_molecule(&p.path)?;
|
|
85
|
+
apply_tag(&mut molecule, p.tag());
|
|
86
|
+
molecule.atoms.retain(|a| a.name.as_str() == "CA");
|
|
87
|
+
Ok(molecule)
|
|
88
|
+
})
|
|
89
|
+
.collect(),
|
|
90
|
+
Mode::Residue => placements
|
|
91
|
+
.iter()
|
|
92
|
+
.map(|p| {
|
|
93
|
+
let mut molecule = load_molecule(&p.path)?;
|
|
94
|
+
let mut residues = Vec::new();
|
|
95
|
+
let mut residue_atoms = Vec::new();
|
|
96
|
+
for atom in molecule.atoms {
|
|
97
|
+
let Some(last) = residue_atoms.last() else {
|
|
98
|
+
residue_atoms.push(atom);
|
|
99
|
+
continue;
|
|
100
|
+
};
|
|
101
|
+
if last.resnum == atom.resnum {
|
|
102
|
+
residue_atoms.push(atom);
|
|
103
|
+
} else {
|
|
104
|
+
// The residue we just collected is complete.
|
|
105
|
+
let avg_pos =
|
|
106
|
+
residue_atoms.iter().fold(Vec3::ZERO, |acc, a| acc + a.pos)
|
|
107
|
+
/ residue_atoms.len() as f32;
|
|
108
|
+
residues.push(Atom {
|
|
109
|
+
name: "DUMMY".into(),
|
|
110
|
+
num: residues.len() as u32,
|
|
111
|
+
pos: avg_pos,
|
|
112
|
+
..last.clone()
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
// Prepare for the next resdue.
|
|
116
|
+
residue_atoms.clear();
|
|
117
|
+
residue_atoms.push(atom);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
molecule.atoms = residues;
|
|
121
|
+
apply_tag(&mut molecule, p.tag());
|
|
122
|
+
molecule.translate_to_center();
|
|
123
|
+
Ok(molecule)
|
|
124
|
+
})
|
|
125
|
+
.collect(),
|
|
126
|
+
Mode::Instance => Ok(placements
|
|
127
|
+
.iter()
|
|
128
|
+
.map(|p| {
|
|
129
|
+
let mut atom = Atom::dummy();
|
|
130
|
+
if let Some(tag) = p.tag() {
|
|
131
|
+
atom.resname = tag.into();
|
|
132
|
+
}
|
|
133
|
+
Molecule { atoms: vec![atom] }
|
|
134
|
+
})
|
|
135
|
+
.collect()),
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/// Write out [`Placements`] as a `gro` file.
|
|
141
|
+
///
|
|
142
|
+
/// See the Gromacs manual entry on the [gro file][gro_manual].
|
|
143
|
+
///
|
|
144
|
+
/// Note that each [`Placement`] (i.e., kind of structure) is given a different `resnum`, such that
|
|
145
|
+
/// the different kinds of structures can be targeted individually in programmes that use this
|
|
146
|
+
/// `gro` file output. For example, each kind of molecule may be drawn in a different color in a
|
|
147
|
+
/// molecular visualization program.
|
|
148
|
+
///
|
|
149
|
+
/// [gro_manual]: https://manual.gromacs.org/archive/5.0.3/online/gro.html
|
|
150
|
+
fn write_gro(
|
|
151
|
+
writer: &mut impl io::Write,
|
|
152
|
+
placements: &PlacementList,
|
|
153
|
+
limits: Limits,
|
|
154
|
+
mode: Mode,
|
|
155
|
+
resnum_mode: ResnumMode,
|
|
156
|
+
ignore_tags: bool,
|
|
157
|
+
verbose: bool,
|
|
158
|
+
) -> Result<Box<[(String, usize)]>> {
|
|
159
|
+
let min_limits = Vec3::new(
|
|
160
|
+
limits.minx.unwrap_or_default(),
|
|
161
|
+
limits.miny.unwrap_or_default(),
|
|
162
|
+
limits.minz.unwrap_or_default(),
|
|
163
|
+
);
|
|
164
|
+
let size = limits.box_size(Vec3::from(placements.size));
|
|
165
|
+
let placements = &placements.placements;
|
|
166
|
+
// Load the molecules and center them with respect to themselves.
|
|
167
|
+
let molecules = mode.prepare_molecules(placements, ignore_tags, verbose)?;
|
|
168
|
+
writeln!(
|
|
169
|
+
writer,
|
|
170
|
+
"{} (v{})",
|
|
171
|
+
env!("CARGO_BIN_NAME"),
|
|
172
|
+
bentopy::core::version::VERSION
|
|
173
|
+
)?;
|
|
174
|
+
let placed = placements.iter().zip(&molecules).map(|(p, m)| {
|
|
175
|
+
let n_placed = p
|
|
176
|
+
.batches
|
|
177
|
+
.iter()
|
|
178
|
+
.map(|b| {
|
|
179
|
+
b.positions
|
|
180
|
+
.iter()
|
|
181
|
+
.filter(|&&pos| limits.is_inside(pos))
|
|
182
|
+
.count()
|
|
183
|
+
})
|
|
184
|
+
.sum::<usize>();
|
|
185
|
+
let n_atoms = n_placed * m.atoms.len();
|
|
186
|
+
let name = p.name.clone();
|
|
187
|
+
(name, n_placed, n_atoms)
|
|
188
|
+
});
|
|
189
|
+
let n_atoms: usize = placed.clone().map(|(_, _, n)| n).sum();
|
|
190
|
+
let placed = placed.map(|(name, n_placed, _)| (name, n_placed)).collect();
|
|
191
|
+
let mut instance_resnum = 0;
|
|
192
|
+
writeln!(writer, "{n_atoms}")?;
|
|
193
|
+
for (segidx, (placement, molecule)) in placements.iter().zip(molecules).enumerate() {
|
|
194
|
+
{
|
|
195
|
+
let prefix = if verbose { "" } else { CLEAR_LINE };
|
|
196
|
+
let suffix = if verbose { "\n" } else { "\r" };
|
|
197
|
+
let name = &placement.name;
|
|
198
|
+
let tag = placement
|
|
199
|
+
.tag()
|
|
200
|
+
.map(|tag| format!(":{tag}"))
|
|
201
|
+
.unwrap_or_default();
|
|
202
|
+
let path = &placement.path;
|
|
203
|
+
eprint!(
|
|
204
|
+
"{prefix}\tNow writing '{name}{tag}' from {path:?} data to output file.{suffix}",
|
|
205
|
+
);
|
|
206
|
+
}
|
|
207
|
+
// Make sure that the printed resnum cannot exceed the 5 allotted columns.
|
|
208
|
+
let segidx = segidx % 100000;
|
|
209
|
+
|
|
210
|
+
let mut inserts = Vec::new();
|
|
211
|
+
for atom in &molecule.atoms {
|
|
212
|
+
// HACK: For some reason, converting to string here is necessary for getting correct
|
|
213
|
+
// alignment of ArrayString<U5>.
|
|
214
|
+
let resnum = atom.resnum;
|
|
215
|
+
let resname = atom.resname.to_string();
|
|
216
|
+
let atomname = atom.name.to_string();
|
|
217
|
+
let atomnum = atom.num;
|
|
218
|
+
let insert = match resnum_mode {
|
|
219
|
+
ResnumMode::Instance => format!("{resname:<5}{atomname:>5}{atomnum:>5}"),
|
|
220
|
+
ResnumMode::Segment => format!("{segidx:>5}{resname:<5}{atomname:>5}{atomnum:>5}"),
|
|
221
|
+
ResnumMode::Keep => format!("{resnum:>5}{resname:<5}{atomname:>5}{atomnum:>5}"),
|
|
222
|
+
};
|
|
223
|
+
inserts.push(insert)
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
let mut output = String::new();
|
|
227
|
+
let t0 = std::time::Instant::now();
|
|
228
|
+
for batch in &placement.batches {
|
|
229
|
+
// This tends to take around half a ms.
|
|
230
|
+
let molecule = rotate_molecule(&molecule, &batch.rotation());
|
|
231
|
+
|
|
232
|
+
// Move over the positions so that they do not float somewhere in space, far from the
|
|
233
|
+
// origin, in case the limits are set. This puts them within the specified limits.
|
|
234
|
+
let min = molecule.min() + min_limits;
|
|
235
|
+
let included_positions: Vec<_> = batch
|
|
236
|
+
.positions
|
|
237
|
+
.iter()
|
|
238
|
+
.filter(|&&position| limits.is_inside(position))
|
|
239
|
+
.collect();
|
|
240
|
+
let n_included_instances = included_positions.len();
|
|
241
|
+
let out: String = included_positions
|
|
242
|
+
.par_iter()
|
|
243
|
+
.enumerate()
|
|
244
|
+
.map(|(batch_instance_resnum, position)| {
|
|
245
|
+
let mut output = String::new();
|
|
246
|
+
for (atom, insert) in molecule.atoms.iter().zip(&inserts) {
|
|
247
|
+
let mut pos = atom.pos;
|
|
248
|
+
pos.x += position[0] - min.x;
|
|
249
|
+
pos.y += position[1] - min.y;
|
|
250
|
+
pos.z += position[2] - min.z;
|
|
251
|
+
|
|
252
|
+
match resnum_mode {
|
|
253
|
+
ResnumMode::Instance => {
|
|
254
|
+
let resnum = instance_resnum + batch_instance_resnum;
|
|
255
|
+
// Make sure that the printed resnum cannot exceed the 5 allotted columns.
|
|
256
|
+
let resnum = resnum % 100000;
|
|
257
|
+
output.push_str(&format!("{resnum:>5}"))
|
|
258
|
+
}
|
|
259
|
+
ResnumMode::Segment => {}
|
|
260
|
+
ResnumMode::Keep => {}
|
|
261
|
+
};
|
|
262
|
+
output.push_str(insert);
|
|
263
|
+
output.push_str(&format!("{:8.3}{:8.3}{:8.3}\n", pos.x, pos.y, pos.z));
|
|
264
|
+
}
|
|
265
|
+
output
|
|
266
|
+
})
|
|
267
|
+
.collect();
|
|
268
|
+
instance_resnum += n_included_instances;
|
|
269
|
+
output.push_str(&out);
|
|
270
|
+
}
|
|
271
|
+
if verbose {
|
|
272
|
+
eprintln!("\t\tFormatting took {:.3} s.", t0.elapsed().as_secs_f32());
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
let t0 = std::time::Instant::now();
|
|
276
|
+
writer.write_all(output.as_bytes())?;
|
|
277
|
+
writer.flush()?;
|
|
278
|
+
if verbose {
|
|
279
|
+
eprintln!("\t\tWriting took {:.3} s.", t0.elapsed().as_secs_f32());
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Write the box vectors.
|
|
284
|
+
let [v1x, v2y, v3z] = size.to_array();
|
|
285
|
+
writeln!(writer, "{v1x:.3} {v2y:.3} {v3z:.3}")?;
|
|
286
|
+
|
|
287
|
+
Ok(placed)
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
/// Write out the topology as a `top` file.
|
|
291
|
+
///
|
|
292
|
+
/// See the Gromacs manual entry on the [top file][top_manual].
|
|
293
|
+
///
|
|
294
|
+
/// [top_manual]: https://manual.gromacs.org/archive/5.0/online/top.html
|
|
295
|
+
fn write_top(
|
|
296
|
+
writer: &mut impl io::Write,
|
|
297
|
+
includes: &[String],
|
|
298
|
+
title: &str,
|
|
299
|
+
placed: &[(String, usize)],
|
|
300
|
+
) -> io::Result<()> {
|
|
301
|
+
// Write some meta stuff.
|
|
302
|
+
writeln!(
|
|
303
|
+
writer,
|
|
304
|
+
"; This topology file was generated by {} (v{}).",
|
|
305
|
+
env!("CARGO_BIN_NAME"),
|
|
306
|
+
bentopy::core::version::VERSION
|
|
307
|
+
)?;
|
|
308
|
+
writeln!(writer, "; {}", env!("CARGO_PKG_AUTHORS"))?;
|
|
309
|
+
writeln!(writer, "; {}", env!("CARGO_PKG_REPOSITORY"))?;
|
|
310
|
+
writeln!(writer)?;
|
|
311
|
+
|
|
312
|
+
// Dump the includes.
|
|
313
|
+
for include in includes {
|
|
314
|
+
writeln!(writer, "#include \"{include}\"")?;
|
|
315
|
+
}
|
|
316
|
+
writeln!(writer)?;
|
|
317
|
+
|
|
318
|
+
// Write the system title.
|
|
319
|
+
writeln!(writer, "[ system ]")?;
|
|
320
|
+
writeln!(writer, "{title}")?;
|
|
321
|
+
writeln!(writer)?;
|
|
322
|
+
|
|
323
|
+
// Write out the name of each structure and how many of them were written to the output file.
|
|
324
|
+
writeln!(writer, "[ molecules ]")?;
|
|
325
|
+
for (name, count) in placed {
|
|
326
|
+
if *count > 0 {
|
|
327
|
+
writeln!(writer, "{name}\t{count}")?;
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
Ok(())
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/// Render a placement list to a gro file.
|
|
335
|
+
///
|
|
336
|
+
/// If `input_path` is "-", the placement list is read from stdin.
|
|
337
|
+
pub fn render(args: Args) -> anyhow::Result<()> {
|
|
338
|
+
let Args {
|
|
339
|
+
input: input_path,
|
|
340
|
+
output: output_path,
|
|
341
|
+
topol: topol_path,
|
|
342
|
+
root,
|
|
343
|
+
limits,
|
|
344
|
+
mode,
|
|
345
|
+
resnum_mode,
|
|
346
|
+
ignore_tags,
|
|
347
|
+
verbose,
|
|
348
|
+
} = args;
|
|
349
|
+
|
|
350
|
+
eprint!("Reading from {input_path:?}... ");
|
|
351
|
+
let t0 = std::time::Instant::now();
|
|
352
|
+
let mut placement_list = String::new();
|
|
353
|
+
if input_path.to_str() == Some("-") {
|
|
354
|
+
std::io::stdin().read_to_string(&mut placement_list)?;
|
|
355
|
+
} else {
|
|
356
|
+
std::fs::File::open(&input_path)?.read_to_string(&mut placement_list)?;
|
|
357
|
+
}
|
|
358
|
+
let placements = read_placement_list(&placement_list, root)
|
|
359
|
+
.with_context(|| format!("Failed to read placement list from {input_path:?}"))?;
|
|
360
|
+
eprintln!("Done in {:.3} ms.", t0.elapsed().as_millis());
|
|
361
|
+
|
|
362
|
+
eprintln!("Writing structure to {output_path:?}... ");
|
|
363
|
+
let t0 = std::time::Instant::now();
|
|
364
|
+
let outfile = std::fs::File::create(&output_path)?;
|
|
365
|
+
let placed = write_gro(
|
|
366
|
+
&mut std::io::BufWriter::new(outfile),
|
|
367
|
+
&placements,
|
|
368
|
+
limits.unwrap_or_default(),
|
|
369
|
+
mode,
|
|
370
|
+
resnum_mode,
|
|
371
|
+
ignore_tags,
|
|
372
|
+
verbose,
|
|
373
|
+
)?;
|
|
374
|
+
eprintln!("{CLEAR_LINE}Done in {:.3} s.", t0.elapsed().as_secs_f32());
|
|
375
|
+
eprintln!(
|
|
376
|
+
"Wrote {} placements of {} different kinds of structures.",
|
|
377
|
+
placed.iter().map(|(_, c)| c).sum::<usize>(),
|
|
378
|
+
placed.iter().filter(|(_, c)| *c > 0).count(),
|
|
379
|
+
);
|
|
380
|
+
|
|
381
|
+
if let Some(topol_path) = topol_path {
|
|
382
|
+
let mut top = std::fs::File::create(&topol_path)?;
|
|
383
|
+
write_top(
|
|
384
|
+
&mut top,
|
|
385
|
+
&placements.topol_includes,
|
|
386
|
+
&placements.title,
|
|
387
|
+
&placed,
|
|
388
|
+
)?;
|
|
389
|
+
eprintln!("Wrote topology to {topol_path:?}.");
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
Ok(())
|
|
393
|
+
}
|
render/structure.rs
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
use std::{io::Read, path::Path};
|
|
2
|
+
|
|
3
|
+
use anyhow::{Context, Result};
|
|
4
|
+
use eightyseven::structure::{AtomName, AtomNum, ResName, ResNum};
|
|
5
|
+
use glam::{Mat3, Vec3};
|
|
6
|
+
|
|
7
|
+
/// A molecule as derived from PDB 'ATOM' records.
|
|
8
|
+
#[derive(Clone)]
|
|
9
|
+
pub struct Molecule {
|
|
10
|
+
pub atoms: Vec<Atom>,
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
impl Molecule {
|
|
14
|
+
/// Create a new molecule from a PDB file.
|
|
15
|
+
fn from_pdb(pdb: &str) -> Result<Self> {
|
|
16
|
+
let mut atoms = Vec::new();
|
|
17
|
+
for (ln, line) in pdb.lines().enumerate() {
|
|
18
|
+
if line.starts_with("ATOM") || line.starts_with("HETATM") {
|
|
19
|
+
let ln = ln + 1;
|
|
20
|
+
let atom = Atom::from_pdb_atom_line(line)
|
|
21
|
+
.context(format!("could not parse atom on line {ln}"))?;
|
|
22
|
+
atoms.push(atom);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
Ok(Self { atoms })
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/// Create a new molecule from a gro file.
|
|
30
|
+
fn from_gro(gro: &str) -> Result<Self> {
|
|
31
|
+
let mut atoms = Vec::new();
|
|
32
|
+
let mut lines = gro.lines().enumerate();
|
|
33
|
+
let (_ln, _title) = lines.next().context("expected a title line")?;
|
|
34
|
+
let n_atoms: usize = lines
|
|
35
|
+
.next()
|
|
36
|
+
.context("expected the number of atoms")?
|
|
37
|
+
.1
|
|
38
|
+
.trim()
|
|
39
|
+
.parse()
|
|
40
|
+
.context("could not parse the number of atoms")?;
|
|
41
|
+
// Read the atoms.
|
|
42
|
+
for (ln, line) in lines.take(n_atoms) {
|
|
43
|
+
let ln = ln + 1;
|
|
44
|
+
let atom = Atom::from_gro_atom_line(line)
|
|
45
|
+
.context(format!("could not parse atom on line {ln}"))?;
|
|
46
|
+
atoms.push(atom);
|
|
47
|
+
}
|
|
48
|
+
// We don't check for the presence and correctness of the box vectors, even though they
|
|
49
|
+
// should be there. We just don't care here.
|
|
50
|
+
|
|
51
|
+
Ok(Self { atoms })
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/// Apply a `tag` to the atoms in this [`Molecule`].
|
|
55
|
+
///
|
|
56
|
+
/// This will replace the `resname` for each atom with the provided `tag`.
|
|
57
|
+
pub fn apply_tag(&mut self, tag: impl Into<ResName>) {
|
|
58
|
+
let tag = tag.into();
|
|
59
|
+
for Atom { resname, .. } in &mut self.atoms {
|
|
60
|
+
*resname = tag;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/// Returns the center of this [`Molecule`].
|
|
65
|
+
///
|
|
66
|
+
/// Here, center refers to mean position. This can be understood as a center of mass where all
|
|
67
|
+
/// atoms are considered to have the same weight.
|
|
68
|
+
pub fn center(&self) -> Vec3 {
|
|
69
|
+
if self.atoms.is_empty() {
|
|
70
|
+
return Vec3::ZERO;
|
|
71
|
+
}
|
|
72
|
+
let mut mean = Vec3::ZERO;
|
|
73
|
+
for atom in &self.atoms {
|
|
74
|
+
mean += atom.pos
|
|
75
|
+
}
|
|
76
|
+
mean / self.atoms.len() as f32
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/// Translate all atoms such that their center lies at the origin.
|
|
80
|
+
///
|
|
81
|
+
/// Here, center refers to mean position. This can be understood as a center of mass where all
|
|
82
|
+
/// atoms are considered to have the same weight.
|
|
83
|
+
pub fn translate_to_center(&mut self) {
|
|
84
|
+
let center = self.center();
|
|
85
|
+
for atom in &mut self.atoms {
|
|
86
|
+
atom.pos -= center;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/// Returns the minimum _x_, _y_, and _z_ values of this [`Molecule`] as a [`Vec3`].
|
|
91
|
+
///
|
|
92
|
+
/// Note that this is a virtual point, not an actual point in the atoms point cloud.
|
|
93
|
+
/// In other words, it is not the lowest point in a sorted list of existing positions.
|
|
94
|
+
pub fn min(&self) -> Vec3 {
|
|
95
|
+
let mut min = Vec3::ZERO;
|
|
96
|
+
for atom in &self.atoms {
|
|
97
|
+
let pos = atom.pos;
|
|
98
|
+
if pos.x < min.x {
|
|
99
|
+
min.x = pos.x
|
|
100
|
+
}
|
|
101
|
+
if pos.y < min.y {
|
|
102
|
+
min.y = pos.y
|
|
103
|
+
}
|
|
104
|
+
if pos.z < min.z {
|
|
105
|
+
min.z = pos.z
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
min
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/// Assumes the [Molecule]'s atoms have been centered.
|
|
113
|
+
pub fn rotate_molecule(molecule: &Molecule, rotmat: &Mat3) -> Molecule {
|
|
114
|
+
let mut molecule = molecule.clone();
|
|
115
|
+
for atom in &mut molecule.atoms {
|
|
116
|
+
atom.pos = rotmat.mul_vec3(atom.pos)
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
molecule
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/// The representation of an atom.
|
|
123
|
+
#[derive(Clone)]
|
|
124
|
+
pub struct Atom {
|
|
125
|
+
/// Atom name.
|
|
126
|
+
pub name: AtomName,
|
|
127
|
+
/// Residue name.
|
|
128
|
+
pub resname: ResName,
|
|
129
|
+
/// Residue sequence number.
|
|
130
|
+
pub resnum: ResNum,
|
|
131
|
+
/// Number of the atom within its structure.
|
|
132
|
+
///
|
|
133
|
+
/// Also understood as its 'serial' number.
|
|
134
|
+
pub num: AtomNum,
|
|
135
|
+
/// Position in nanometers.
|
|
136
|
+
pub pos: Vec3,
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
impl Atom {
|
|
140
|
+
// COLUMNS DATA TYPE FIELD DEFINITION
|
|
141
|
+
// -------------------------------------------------------------------------------------
|
|
142
|
+
// 1 - 6 Record name "ATOM "
|
|
143
|
+
// 7 - 11 Integer serial Atom serial number.
|
|
144
|
+
// 13 - 16 Atom name Atom name.
|
|
145
|
+
// 17 Character altLoc Alternate location indicator.
|
|
146
|
+
// 18 - 20 Residue name resName Residue name.
|
|
147
|
+
// 22 Character chainID Chain identifier.
|
|
148
|
+
// 23 - 26 Integer resSeq Residue sequence number.
|
|
149
|
+
// 27 AChar iCode Code for insertion of residues.
|
|
150
|
+
// 31 - 38 Real(8.3) x Orthogonal coordinates for X in Angstroms.
|
|
151
|
+
// 39 - 46 Real(8.3) y Orthogonal coordinates for Y in Angstroms.
|
|
152
|
+
// 47 - 54 Real(8.3) z Orthogonal coordinates for Z in Angstroms.
|
|
153
|
+
// 55 - 60 Real(6.2) occupancy Occupancy.
|
|
154
|
+
// 61 - 66 Real(6.2) tempFactor Temperature factor.
|
|
155
|
+
// 77 - 78 LString(2) element Element symbol, right-justified.
|
|
156
|
+
// 79 - 80 LString(2) charge Charge on the atom.
|
|
157
|
+
//
|
|
158
|
+
// Examples:
|
|
159
|
+
// ATOM 1 N ALA A 1 11.104 6.134 -6.504
|
|
160
|
+
// ATOM 1 N MET 1 48.048 69.220 58.803
|
|
161
|
+
// ATOM 2 C13 DPPCM 367 31.671 -46.874 39.426 1.00 0.00 MEMB
|
|
162
|
+
/// Read a single "ATOM" or "HETATM" record from a PDB and return an Atom.
|
|
163
|
+
fn from_pdb_atom_line(line: &str) -> Result<Atom> {
|
|
164
|
+
let serial = line[6..11]
|
|
165
|
+
.trim()
|
|
166
|
+
.parse()
|
|
167
|
+
.context("could not parse atom serial")?;
|
|
168
|
+
let name = line[12..16].trim().into();
|
|
169
|
+
// NOTE: Even though the PDB specification only regards columns 18..21 as constituting the
|
|
170
|
+
// resname, in practice the character directly after that is also included. This column is
|
|
171
|
+
// not defined by the spec. Especially for telling apart lipids like DPPC and DPPG, it's
|
|
172
|
+
// quite important to include that by-convention resname character. Thanks Abby!
|
|
173
|
+
let resname = line[17..21].trim().into();
|
|
174
|
+
let resnum = line[22..26]
|
|
175
|
+
.trim()
|
|
176
|
+
.parse()
|
|
177
|
+
.context("could not parse atom resnum")?;
|
|
178
|
+
let x = line[30..38]
|
|
179
|
+
.trim()
|
|
180
|
+
.parse()
|
|
181
|
+
.context("could not parse x coordinate")?;
|
|
182
|
+
let y = line[38..46]
|
|
183
|
+
.trim()
|
|
184
|
+
.parse()
|
|
185
|
+
.context("could not parse y coordinate")?;
|
|
186
|
+
let z = line[46..54]
|
|
187
|
+
.trim()
|
|
188
|
+
.parse()
|
|
189
|
+
.context("could not parse z coordinate")?;
|
|
190
|
+
Ok(Atom {
|
|
191
|
+
name,
|
|
192
|
+
resname,
|
|
193
|
+
resnum,
|
|
194
|
+
num: serial,
|
|
195
|
+
pos: Vec3::new(x, y, z) / 10.0, // Convert from Å to nm.
|
|
196
|
+
})
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// ` 2WATER HW3 6 1.326 0.120 0.568 1.9427 -0.8216 -0.0244`
|
|
200
|
+
/// Read a single atom line from a gro file and return an Atom.
|
|
201
|
+
fn from_gro_atom_line(line: &str) -> Result<Atom> {
|
|
202
|
+
let resnum = line[0..5]
|
|
203
|
+
.trim()
|
|
204
|
+
.parse()
|
|
205
|
+
.context("could not parse resnum")?;
|
|
206
|
+
let resname = line[5..10].trim().into();
|
|
207
|
+
let name = line[10..15].trim().into(); // Atom name.
|
|
208
|
+
let num = line[15..20]
|
|
209
|
+
.trim()
|
|
210
|
+
.parse()
|
|
211
|
+
.context("could not parse atomnum")?; // Atom number.
|
|
212
|
+
let x = line[20..28]
|
|
213
|
+
.trim()
|
|
214
|
+
.parse()
|
|
215
|
+
.context("could not parse x coordinate")?;
|
|
216
|
+
let y = line[28..36]
|
|
217
|
+
.trim()
|
|
218
|
+
.parse()
|
|
219
|
+
.context("could not parse y coordinate")?;
|
|
220
|
+
let z = line[36..44]
|
|
221
|
+
.trim()
|
|
222
|
+
.parse()
|
|
223
|
+
.context("could not parse z coordinate")?;
|
|
224
|
+
Ok(Atom {
|
|
225
|
+
name,
|
|
226
|
+
resname,
|
|
227
|
+
resnum,
|
|
228
|
+
num,
|
|
229
|
+
pos: Vec3::new(x, y, z), // Values are already in nm.
|
|
230
|
+
})
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/// Create a dummy atom.
|
|
234
|
+
pub fn dummy() -> Self {
|
|
235
|
+
Self {
|
|
236
|
+
name: "DUMMY".into(),
|
|
237
|
+
resname: "DUMMY".into(),
|
|
238
|
+
resnum: 0,
|
|
239
|
+
num: 0,
|
|
240
|
+
pos: Vec3::ZERO,
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
/// Load a [`Molecule`] from a pdb file.
|
|
246
|
+
pub fn load_molecule<P: AsRef<Path> + std::fmt::Debug>(path: P) -> Result<Molecule> {
|
|
247
|
+
let mut data = String::new();
|
|
248
|
+
std::fs::File::open(&path)?.read_to_string(&mut data)?;
|
|
249
|
+
|
|
250
|
+
let molecule =
|
|
251
|
+
match path.as_ref().extension().and_then(|s| s.to_str()) {
|
|
252
|
+
Some("gro") => Molecule::from_gro(&data)
|
|
253
|
+
.context(format!("could not parse .gro file at {path:?}"))?,
|
|
254
|
+
Some("pdb") => Molecule::from_pdb(&data)
|
|
255
|
+
.context(format!("could not parse .pdb file at {path:?}"))?,
|
|
256
|
+
None | Some(_) => {
|
|
257
|
+
eprintln!("WARNING: Assuming {path:?} is a pdb file.");
|
|
258
|
+
Molecule::from_pdb(&data)
|
|
259
|
+
.context(format!("could not parse file at {path:?} as .pdb"))?
|
|
260
|
+
}
|
|
261
|
+
};
|
|
262
|
+
|
|
263
|
+
Ok(molecule)
|
|
264
|
+
}
|