samgen 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
samgen/__init__.py ADDED
@@ -0,0 +1,17 @@
1
+ """samgen — build self-assembled monolayer (SAM) surfaces for GROMACS.
2
+
3
+ Three independently runnable stages:
4
+ generate_geometry() -> tiled surface .gro (no force field needed)
5
+ assemble_topology() -> integrated topol.top + reordered .gro
6
+ build() -> geometry then topology in one shot
7
+ """
8
+
9
+ from .geometry import generate_geometry, build_twosided_strand, generate_twosided
10
+ from .topology import assemble_topology
11
+ from ._build import build
12
+
13
+ __all__ = [
14
+ "generate_geometry", "build_twosided_strand", "generate_twosided",
15
+ "assemble_topology", "build",
16
+ ]
17
+ __version__ = "0.0.1"
samgen/_build.py ADDED
@@ -0,0 +1,51 @@
1
+ """Stage 3: `build` = geometry then topology in one shot.
2
+
3
+ Convenience path for one-sided surfaces (and two-sided where the parameterized
4
+ strand already exists). Loads components from the config, runs geometry, then
5
+ assembles the topology against the generated surface.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ from typing import Dict, Optional
12
+
13
+ from .core.molecule import Molecule
14
+ from .geometry import generate_geometry
15
+ from .topology import assemble_topology
16
+
17
+
18
+ def load_components(config: dict, root: str = ".") -> Dict[str, Molecule]:
19
+ """Instantiate Molecules for every key in config['components'].
20
+
21
+ Also stashes per-component orientation metadata under config['components_meta']
22
+ (anchor, canonicalize, backbone_carbons) for the geometry stage.
23
+ """
24
+ comps: Dict[str, Molecule] = {}
25
+ meta: Dict[str, dict] = {}
26
+ for key, spec in config["components"].items():
27
+ gro = os.path.join(root, spec["gro"])
28
+ itp = os.path.join(root, spec["itp"]) if spec.get("itp") else None
29
+ comps[key] = Molecule.from_files(name=spec["resname"], gro=gro, itp=itp)
30
+ meta[key] = {k: spec[k] for k in ("anchor", "canonicalize",
31
+ "backbone_carbons", "allow_anchor_autodetect")
32
+ if k in spec}
33
+ config["components_meta"] = meta
34
+ return comps
35
+
36
+
37
+ def build(config: dict, root: str = ".", out_gro: str = "sam.gro",
38
+ out_top: str = "topol.top", out_reordered: Optional[str] = "sam-reordered.gro",
39
+ input_fn=input, is_tty=None):
40
+ """Full pipeline. Returns (surface_gro, top, counts)."""
41
+ components = load_components(config, root)
42
+ out = config.get("output", {})
43
+ geom = generate_geometry(config, components, out_gro=out_gro,
44
+ manifest_path=out_gro + ".manifest.json", root=root,
45
+ input_fn=input_fn, is_tty=is_tty)
46
+
47
+ order = out.get("order") or [m.name for m in components.values()]
48
+ itp_map = {m.name: m.itp_path for m in components.values()}
49
+ counts = assemble_topology(out_gro, itp_map=itp_map, order=order,
50
+ out_top=out_top, out_gro=out_reordered)
51
+ return geom.surface_gro, out_top, counts
samgen/cli.py ADDED
@@ -0,0 +1,118 @@
1
+ """Command-line interface: `samgen geometry|topology|build CONFIG`.
2
+
3
+ Mirrors the three independently runnable stages. Anchor handling follows the
4
+ prompt-first / consent-to-guess policy: prompt in interactive mode, never
5
+ silently auto-detect in batch mode without explicit consent.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+ import os
12
+ import sys
13
+
14
+ try:
15
+ import yaml
16
+ except ImportError: # keep import-light; yaml only needed for the CLI
17
+ yaml = None
18
+
19
+ from ._build import build, load_components
20
+ from .geometry import generate_geometry, generate_twosided
21
+ from .topology import assemble_topology
22
+ from .interactive import resolve_anchor_interactive
23
+
24
+
25
+ def _load_config(path: str) -> dict:
26
+ if yaml is None:
27
+ sys.exit("PyYAML is required for the CLI: pip install pyyaml")
28
+ with open(path) as fh:
29
+ return yaml.safe_load(fh)
30
+
31
+
32
+ def cmd_geometry(args):
33
+ cfg = _load_config(args.config)
34
+ root = os.path.dirname(os.path.abspath(args.config))
35
+ comps = load_components(cfg, root)
36
+ res = generate_geometry(cfg, comps, out_gro=args.out,
37
+ manifest_path=args.out + ".manifest.json", root=root)
38
+ print(f"wrote {res.surface_gro} ({res.manifest['natoms']} atoms)")
39
+
40
+
41
+ def cmd_topology(args):
42
+ cfg = _load_config(args.config)
43
+ root = os.path.dirname(os.path.abspath(args.config))
44
+ comps = load_components(cfg, root)
45
+ order = cfg.get("output", {}).get("order") or [m.name for m in comps.values()]
46
+ itp_map = {m.name: m.itp_path for m in comps.values()}
47
+ counts = assemble_topology(args.gro, itp_map=itp_map, order=order,
48
+ out_top=args.out, out_gro=args.reordered,
49
+ validate=args.validate)
50
+ print(f"wrote {args.out}: {counts}")
51
+
52
+
53
+ def cmd_build(args):
54
+ cfg = _load_config(args.config)
55
+ root = os.path.dirname(os.path.abspath(args.config))
56
+ gro, top, counts = build(cfg, root, out_gro=args.out, out_top=args.top)
57
+ print(f"wrote {gro} and {top}: {counts}")
58
+
59
+
60
+ def cmd_twosided(args):
61
+ cfg = _load_config(args.config)
62
+ root = os.path.dirname(os.path.abspath(args.config))
63
+ comps = load_components(cfg, root)
64
+ key = args.component or next(iter(comps))
65
+ if key not in comps:
66
+ sys.exit(f"component {key!r} not in config (have: {list(comps)})")
67
+ mol = comps[key]
68
+ specified = cfg["components"][key].get("anchor")
69
+ allow = cfg.get("allow_anchor_autodetect", False)
70
+ res = resolve_anchor_interactive(mol, specified, allow)
71
+ if res.cap_carbon_idx is None:
72
+ sys.exit(f"anchor {mol.struct.atoms[res.anchor_idx].atomname!r} has no "
73
+ "methyl cap to strip; cannot build a shared-S two-sided strand")
74
+ tsr, geom = generate_twosided(mol, res.anchor_idx, res.cap_carbon_idx, cfg,
75
+ out_strand=args.out, out_surface=args.surface)
76
+ print(f"wrote two-sided strand {tsr.strand_gro} ({tsr.natoms} atoms) and "
77
+ f"geometry-only surface {args.surface} ({geom.manifest['natoms']} atoms). "
78
+ "Parameterize the strand, then build the surface from its .itp.")
79
+
80
+
81
+ def main(argv=None):
82
+ p = argparse.ArgumentParser(prog="samgen", description="Build SAM surfaces for GROMACS")
83
+ sub = p.add_subparsers(dest="cmd", required=True)
84
+
85
+ g = sub.add_parser("geometry", help="tile a surface (no force field)")
86
+ g.add_argument("config")
87
+ g.add_argument("-o", "--out", default="sam.gro")
88
+ g.set_defaults(func=cmd_geometry)
89
+
90
+ t = sub.add_parser("topology", help="assemble topol.top for an existing .gro")
91
+ t.add_argument("config")
92
+ t.add_argument("--gro", required=True, help="existing surface .gro")
93
+ t.add_argument("-o", "--out", default="topol.top")
94
+ t.add_argument("--reordered", default="sam-reordered.gro")
95
+ t.add_argument("--validate", action="store_true", help="run gmx grompp gate")
96
+ t.set_defaults(func=cmd_topology)
97
+
98
+ b = sub.add_parser("build", help="geometry + topology in one shot")
99
+ b.add_argument("config")
100
+ b.add_argument("-o", "--out", default="sam.gro")
101
+ b.add_argument("--top", default="topol.top")
102
+ b.set_defaults(func=cmd_build)
103
+
104
+ w = sub.add_parser("twosided", help="build a two-sided (shared-S) strand + SAM geometry")
105
+ w.add_argument("config")
106
+ w.add_argument("--component", help="which component to fuse (default: first)")
107
+ w.add_argument("-o", "--out", default="twosided-strand.gro",
108
+ help="output strand .gro (parameterize this)")
109
+ w.add_argument("--surface", default="twosided-sam.gro",
110
+ help="output full two-sided SAM geometry (geometry-only)")
111
+ w.set_defaults(func=cmd_twosided)
112
+
113
+ args = p.parse_args(argv)
114
+ args.func(args)
115
+
116
+
117
+ if __name__ == "__main__":
118
+ main()
@@ -0,0 +1 @@
1
+ """Core building blocks: gro I/O, molecules, lattice, orientation, anchors."""
samgen/core/anchor.py ADDED
@@ -0,0 +1,140 @@
1
+ """Anchor (gold-facing S) and methyl-cap detection.
2
+
3
+ Policy: prompt the user first; auto-detect ONLY with explicit consent, and
4
+ always report what was picked and why. Detection never silently guesses.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import warnings
10
+ from dataclasses import dataclass
11
+ from typing import Optional
12
+ import numpy as np
13
+
14
+ from .molecule import Molecule
15
+
16
+ # Carbon mass (GAFF C2 united-atom CH2 ~14.03, c3 ~12.01). A methyl cap carbon
17
+ # in these models is a carbon bonded to the anchor S and to nothing else heavy.
18
+ _H_MASS = (1.0, 1.1)
19
+
20
+
21
+ @dataclass
22
+ class AnchorResult:
23
+ anchor_idx: int # 0-indexed S atom
24
+ cap_carbon_idx: Optional[int] # methyl cap carbon bonded to the anchor S
25
+ reason: str # human-readable explanation (for the consent prompt)
26
+
27
+
28
+ def resolve_anchor(
29
+ mol: Molecule,
30
+ specified: Optional[str | int],
31
+ allow_autodetect: bool,
32
+ ) -> AnchorResult:
33
+ """Resolve the anchor atom.
34
+
35
+ `specified` may be an atom name (e.g. "S41") or a 1-based index. If None and
36
+ autodetect is not permitted, raise so batch runs fail clearly.
37
+ """
38
+ if specified is not None:
39
+ idx = _resolve_specified(mol, specified)
40
+ return AnchorResult(idx, _find_cap(mol, idx), f"specified ({specified})")
41
+
42
+ if not allow_autodetect:
43
+ raise ValueError(
44
+ f"{mol.name}: no anchor specified. Set it in the config, or pass "
45
+ "allow_anchor_autodetect: true to consent to auto-detection."
46
+ )
47
+ return autodetect_anchor(mol)
48
+
49
+
50
+ def autodetect_anchor(mol: Molecule) -> AnchorResult:
51
+ """Heuristic anchor detection. Only call after the user has consented.
52
+
53
+ Anchor = the gold-facing sulfur, detected as the S bonded to a terminal
54
+ methyl cap. Falls back to the lowest-z sulfur. Raises on ambiguity.
55
+ """
56
+ sulfurs = mol.sulfur_indices()
57
+ if not sulfurs:
58
+ raise ValueError(f"{mol.name}: no sulfur found; specify the anchor manually")
59
+
60
+ capped = [(s, c) for s in sulfurs if (c := _find_cap(mol, s)) is not None]
61
+ if len(capped) == 1:
62
+ s, c = capped[0]
63
+ name = mol.struct.atoms[s].atomname
64
+ return AnchorResult(s, c, f"S {name!r} bonded to methyl cap "
65
+ f"{mol.struct.atoms[c].atomname!r}")
66
+ if len(capped) > 1:
67
+ names = ", ".join(mol.struct.atoms[s].atomname for s, _ in capped)
68
+ raise ValueError(
69
+ f"{mol.name}: ambiguous anchor — multiple capped sulfurs ({names}). "
70
+ "Specify the anchor manually."
71
+ )
72
+
73
+ # Fallback: lowest-z sulfur (assumes a pre-oriented, S-down strand).
74
+ coords = mol.coords
75
+ s = min(sulfurs, key=lambda i: coords[i][2])
76
+ name = mol.struct.atoms[s].atomname
77
+ return AnchorResult(s, None, f"lowest-z sulfur {name!r} (no methyl cap found)")
78
+
79
+
80
+ def _resolve_specified(mol: Molecule, specified: str | int) -> int:
81
+ if isinstance(specified, int):
82
+ return specified - 1 # 1-based -> 0-based
83
+ s = str(specified)
84
+ if s.isdigit():
85
+ return int(s) - 1
86
+ for i, atom in enumerate(mol.struct.atoms):
87
+ if atom.atomname == s:
88
+ return i
89
+ raise ValueError(f"{mol.name}: anchor atom {specified!r} not found")
90
+
91
+
92
+ def backbone_head(mol: Molecule, anchor_idx: int, n_carbons: int = 9) -> int:
93
+ """Index of the Nth alkyl backbone carbon from the anchor S.
94
+
95
+ Walks the linear carbon chain from the anchor (skipping the methyl cap).
96
+ Used as the orientation 'head' so the tilt/twist axis follows the alkyl
97
+ spacer, not a divergent ligand headgroup. Warns and stops early if the
98
+ chain branches or ends before N carbons.
99
+ """
100
+ if mol.bonds is None or mol.masses is None:
101
+ raise ValueError(f"{mol.name}: backbone detection needs an .itp bond graph")
102
+
103
+ cap = _find_cap(mol, anchor_idx)
104
+ # approximation: O/N bonded in-chain would also pass this carbon test
105
+ # (fine for thiol alkyl SAMs).
106
+ starts = [nb for nb in mol.neighbors(anchor_idx)
107
+ if mol.masses[nb] >= 11.0 and nb != cap]
108
+ if not starts:
109
+ raise ValueError(f"{mol.name}: no alkyl chain carbon bonded to the anchor")
110
+
111
+ prev, cur, count = anchor_idx, starts[0], 1
112
+ while count < n_carbons:
113
+ nxt = [nb for nb in mol.neighbors(cur)
114
+ if mol.masses[nb] >= 11.0 and nb != prev]
115
+ if len(nxt) == 0:
116
+ warnings.warn(f"{mol.name}: alkyl chain ends after {count} carbons "
117
+ f"(< {n_carbons}); orienting on the shorter segment")
118
+ break
119
+ if len(nxt) > 1:
120
+ warnings.warn(f"{mol.name}: alkyl chain branches at carbon {count}; "
121
+ f"orienting on the segment up to the branch")
122
+ break
123
+ prev, cur = cur, nxt[0]
124
+ count += 1
125
+ return cur
126
+
127
+
128
+ def _find_cap(mol: Molecule, sulfur_idx: int) -> Optional[int]:
129
+ """A methyl cap carbon: bonded to the S and to 3 H and no other heavy atom."""
130
+ if mol.bonds is None or mol.masses is None:
131
+ return None
132
+ for nb in mol.neighbors(sulfur_idx):
133
+ if mol.masses[nb] < 11.0: # not a carbon
134
+ continue
135
+ heavy = [x for x in mol.neighbors(nb) if mol.masses[x] >= 11.0]
136
+ hyd = [x for x in mol.neighbors(nb) if mol.masses[x] < 11.0]
137
+ # carbon's only heavy neighbour is the sulfur, plus ~3 hydrogens
138
+ if heavy == [sulfur_idx] and len(hyd) >= 2:
139
+ return nb
140
+ return None
samgen/core/gro.py ADDED
@@ -0,0 +1,98 @@
1
+ """Robust GROMACS .gro reader/writer.
2
+
3
+ The .gro format is fixed-width by column. We centralise that layout here so the
4
+ rest of the package never touches raw columns.
5
+
6
+ .gro fixed layout (1-indexed columns), positions in nm:
7
+ 1-5 residue number
8
+ 6-10 residue name
9
+ 11-15 atom name
10
+ 16-20 atom number
11
+ 21-28 x (%8.3f)
12
+ 29-36 y
13
+ 37-44 z
14
+ [45-52 53-60 61-68] optional vx vy vz (%8.4f)
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from dataclasses import dataclass, field
20
+ from typing import List, Optional, Tuple
21
+ import numpy as np
22
+
23
+
24
+ @dataclass
25
+ class GroAtom:
26
+ resid: int
27
+ resname: str
28
+ atomname: str
29
+ atomnum: int
30
+ x: float
31
+ y: float
32
+ z: float
33
+ # Velocities are preserved if present so we never silently drop data.
34
+ vel: Optional[Tuple[float, float, float]] = None
35
+
36
+
37
+ @dataclass
38
+ class GroStructure:
39
+ title: str
40
+ atoms: List[GroAtom]
41
+ box: Tuple[float, ...] # 3 (or 9, for triclinic) box values, nm
42
+
43
+ @property
44
+ def coords(self) -> np.ndarray:
45
+ """N x 3 array of positions in nm."""
46
+ return np.array([[a.x, a.y, a.z] for a in self.atoms], dtype=float)
47
+
48
+ def set_coords(self, coords: np.ndarray) -> None:
49
+ for atom, (x, y, z) in zip(self.atoms, coords):
50
+ atom.x, atom.y, atom.z = float(x), float(y), float(z)
51
+
52
+ @property
53
+ def natoms(self) -> int:
54
+ return len(self.atoms)
55
+
56
+
57
+ def read_gro(path: str) -> GroStructure:
58
+ with open(path) as fh:
59
+ lines = fh.read().splitlines()
60
+
61
+ title = lines[0]
62
+ natoms = int(lines[1].strip())
63
+ atoms: List[GroAtom] = []
64
+
65
+ for line in lines[2 : 2 + natoms]:
66
+ resid = int(line[0:5])
67
+ resname = line[5:10].strip()
68
+ atomname = line[10:15].strip()
69
+ atomnum = int(line[15:20])
70
+ x = float(line[20:28])
71
+ y = float(line[28:36])
72
+ z = float(line[36:44])
73
+ vel = None
74
+ if len(line) >= 68: # velocities present
75
+ vel = (float(line[44:52]), float(line[52:60]), float(line[60:68]))
76
+ atoms.append(GroAtom(resid, resname, atomname, atomnum, x, y, z, vel))
77
+
78
+ box = tuple(float(v) for v in lines[2 + natoms].split())
79
+ return GroStructure(title=title, atoms=atoms, box=box)
80
+
81
+
82
+ def write_gro(struct: GroStructure, path: str) -> None:
83
+ lines = [struct.title, f"{struct.natoms:5d}"]
84
+ for a in struct.atoms:
85
+ # GROMACS truncates resid/atomnum to 5 digits (mod 100000); match it so
86
+ # large surfaces stay valid .gro files.
87
+ resid = a.resid % 100000
88
+ atomnum = a.atomnum % 100000
89
+ line = (
90
+ f"{resid:5d}{a.resname:<5.5s}{a.atomname:>5.5s}{atomnum:5d}"
91
+ f"{a.x:8.3f}{a.y:8.3f}{a.z:8.3f}"
92
+ )
93
+ if a.vel is not None:
94
+ line += f"{a.vel[0]:8.4f}{a.vel[1]:8.4f}{a.vel[2]:8.4f}"
95
+ lines.append(line)
96
+ lines.append(" ".join(f"{v:.5f}" for v in struct.box))
97
+ with open(path, "w") as fh:
98
+ fh.write("\n".join(lines) + "\n")
samgen/core/lattice.py ADDED
@@ -0,0 +1,107 @@
1
+ """Hexagonal Au(111) lattice for SAM tiling.
2
+
3
+ Geometry follows Love et al. 2005 (alkanethiolate
4
+ (sqrt(3) x sqrt(3))R30 overlayer on Au(111)), lattice constant a = 0.288 nm:
5
+
6
+ colsep = sqrt(3) * a (spacing along a row)
7
+ rowsep = 1.5 * a (spacing between rows)
8
+ offset = sqrt(3)/2 * a (x-shift applied to alternate rows -> hex packing)
9
+
10
+ Reference: Love, Estroff, Kriebel, Nuzzo & Whitesides, Chem. Rev. 105 (2005)
11
+ 1103-1169.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import math
17
+ from dataclasses import dataclass
18
+ from typing import Iterator, Tuple
19
+
20
+
21
+ @dataclass
22
+ class Lattice:
23
+ a: float = 0.288 # Au(111) lattice constant, nm
24
+ # Optional explicit overrides. When None, spacings are computed from `a`.
25
+ # Lattice.rounded() supplies pre-rounded constants for a slightly different
26
+ # (rounded) spacing convention.
27
+ colsep_override: float | None = None
28
+ rowsep_override: float | None = None
29
+ offset_override: float | None = None
30
+
31
+ @classmethod
32
+ def rounded(cls) -> "Lattice":
33
+ """Pre-rounded spacing constants for a = 0.288 nm.
34
+
35
+ An alternative to the exact sqrt(3)*a spacing: colsep/rowsep/offset are
36
+ rounded to 3-4 decimals. Use when a rounded-constant cell is required.
37
+ """
38
+ return cls(a=0.288, colsep_override=0.499,
39
+ rowsep_override=0.432, offset_override=0.2494)
40
+
41
+ @property
42
+ def colsep(self) -> float:
43
+ return self.colsep_override if self.colsep_override is not None else math.sqrt(3.0) * self.a
44
+
45
+ @property
46
+ def rowsep(self) -> float:
47
+ return self.rowsep_override if self.rowsep_override is not None else 1.5 * self.a
48
+
49
+ @property
50
+ def offset(self) -> float:
51
+ return self.offset_override if self.offset_override is not None else math.sqrt(3.0) / 2.0 * self.a
52
+
53
+ # Tiny epsilon so a strand sitting exactly on the box edge isn't double-
54
+ # counted by the "< box" fill condition.
55
+ _EPS = 1e-9
56
+
57
+ def dimensions(self, boxx: float, boxy: float,
58
+ even_cols: bool = False) -> Tuple[int, int]:
59
+ """Return (ncols, nrows) for a *periodic* tile bounded by boxx x boxy.
60
+
61
+ Safeguards keep the final box a valid periodic cell:
62
+
63
+ * ncols is fixed by the first (non-offset) row and applied to EVERY row,
64
+ so offset rows aren't one strand short. Offset rows therefore extend
65
+ slightly past boxx by design.
66
+ * nrows is rounded up to an even number, so tiling always ends on a
67
+ complete A/B (non-offset/offset) row pair and stays periodic in y.
68
+ * `even_cols` rounds ncols up to even too. Patterned designs
69
+ (grid/density/multilig) require this so the 2-site stagger lines up
70
+ with the design grid; the uniform design does not.
71
+ """
72
+ ncols = int((boxx - self._EPS) // self.colsep) + 1
73
+ nrows = int((boxy - self._EPS) // self.rowsep) + 1
74
+ if even_cols and ncols % 2 == 1:
75
+ ncols += 1
76
+ if nrows % 2 == 1: # complete the A/B row pair
77
+ nrows += 1
78
+ return ncols, nrows
79
+
80
+ def sites_for(self, ncols: int, nrows: int) -> Iterator[Tuple[int, int, float, float]]:
81
+ """Yield (row, col, x, y) for an explicit ncols x nrows tile.
82
+
83
+ Alternate rows are x-shifted by `offset` for hexagonal packing.
84
+ """
85
+ for row in range(nrows):
86
+ xstart = self.offset if (row % 2 == 1) else 0.0
87
+ y = row * self.rowsep
88
+ for col in range(ncols):
89
+ yield row, col, xstart + col * self.colsep, y
90
+
91
+ def sites(self, boxx: float, boxy: float,
92
+ even_cols: bool = False) -> Iterator[Tuple[int, int, float, float]]:
93
+ """Yield sites for the periodic tile bounded by boxx x boxy."""
94
+ ncols, nrows = self.dimensions(boxx, boxy, even_cols)
95
+ yield from self.sites_for(ncols, nrows)
96
+
97
+ def site_density(self) -> float:
98
+ """Au site areal density (sites per nm^2) = 1 / (colsep * rowsep)."""
99
+ return 1.0 / (self.colsep * self.rowsep)
100
+
101
+ def final_box(self, ncols: int, nrows: int, boxz: float) -> Tuple[float, float, float]:
102
+ """Periodic box that matches how many sites were actually placed.
103
+
104
+ Slightly different from the requested box: x and y snap to whole
105
+ multiples of colsep/rowsep so inter-strand spacing is preserved.
106
+ """
107
+ return (ncols * self.colsep, nrows * self.rowsep, boxz)
@@ -0,0 +1,81 @@
1
+ """A SAM component: coordinates plus (optional) force-field knowledge.
2
+
3
+ When an .itp is supplied we parse just enough of it to support anchor detection
4
+ and topology assembly: per-atom mass (to find sulfur by element, not by name)
5
+ and the bond graph (to tell a terminal/anchor S from an in-chain thioether).
6
+ We deliberately do NOT parse the full force field here — topology.py owns that.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass, field
12
+ from typing import Dict, List, Optional, Tuple
13
+ import numpy as np
14
+
15
+ from .gro import GroStructure, read_gro
16
+
17
+
18
+ @dataclass
19
+ class Molecule:
20
+ name: str # residue name as it appears in the .gro / [molecules]
21
+ struct: GroStructure
22
+ itp_path: Optional[str] = None
23
+ masses: Optional[List[float]] = None # per-atom, in itp atom order
24
+ bonds: Optional[List[Tuple[int, int]]] = None # 0-indexed atom pairs
25
+
26
+ @property
27
+ def coords(self) -> np.ndarray:
28
+ return self.struct.coords
29
+
30
+ @classmethod
31
+ def from_files(cls, name: str, gro: str, itp: Optional[str] = None) -> "Molecule":
32
+ struct = read_gro(gro)
33
+ masses, bonds = (None, None)
34
+ if itp is not None:
35
+ masses, bonds = _parse_itp(itp)
36
+ return cls(name=name, struct=struct, itp_path=itp, masses=masses, bonds=bonds)
37
+
38
+ def sulfur_indices(self) -> List[int]:
39
+ """0-indexed atoms whose mass is ~32.06 (sulfur). Name-independent."""
40
+ if self.masses is None:
41
+ raise ValueError(
42
+ f"{self.name}: need an .itp to identify sulfur by mass"
43
+ )
44
+ return [i for i, m in enumerate(self.masses) if abs(m - 32.06) < 0.5]
45
+
46
+ def neighbors(self, atom_idx: int) -> List[int]:
47
+ if self.bonds is None:
48
+ raise ValueError(f"{self.name}: need an .itp bond graph")
49
+ out = []
50
+ for a, b in self.bonds:
51
+ if a == atom_idx:
52
+ out.append(b)
53
+ elif b == atom_idx:
54
+ out.append(a)
55
+ return out
56
+
57
+
58
+ def _parse_itp(path: str) -> Tuple[List[float], List[Tuple[int, int]]]:
59
+ """Minimal .itp parse: [atoms] masses + [bonds] graph (first moleculetype)."""
60
+ masses: List[float] = []
61
+ bonds: List[Tuple[int, int]] = []
62
+ section = None
63
+ with open(path) as fh:
64
+ for raw in fh:
65
+ line = raw.split(";", 1)[0].strip() # strip comments
66
+ if not line:
67
+ continue
68
+ if line.startswith("["):
69
+ section = line.strip("[] ").lower()
70
+ continue
71
+ if section == "atoms":
72
+ # nr type resnr resname atomname cgnr charge mass ...
73
+ cols = line.split()
74
+ if len(cols) >= 8:
75
+ masses.append(float(cols[7]))
76
+ elif section == "bonds":
77
+ cols = line.split()
78
+ if len(cols) >= 2:
79
+ # itp atom indices are 1-based -> store 0-based
80
+ bonds.append((int(cols[0]) - 1, int(cols[1]) - 1))
81
+ return masses, bonds