samgen 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- samgen/__init__.py +17 -0
- samgen/_build.py +51 -0
- samgen/cli.py +118 -0
- samgen/core/__init__.py +1 -0
- samgen/core/anchor.py +140 -0
- samgen/core/gro.py +98 -0
- samgen/core/lattice.py +107 -0
- samgen/core/molecule.py +81 -0
- samgen/core/orient.py +89 -0
- samgen/core/periodicity.py +133 -0
- samgen/core/topfile.py +127 -0
- samgen/design/__init__.py +44 -0
- samgen/design/density.py +130 -0
- samgen/design/fraction.py +26 -0
- samgen/design/grid.py +36 -0
- samgen/design/multilig.py +14 -0
- samgen/design/uniform.py +13 -0
- samgen/geometry.py +271 -0
- samgen/gmx.py +56 -0
- samgen/interactive.py +136 -0
- samgen/topology.py +179 -0
- samgen-0.1.2.dist-info/METADATA +238 -0
- samgen-0.1.2.dist-info/RECORD +27 -0
- samgen-0.1.2.dist-info/WHEEL +5 -0
- samgen-0.1.2.dist-info/entry_points.txt +2 -0
- samgen-0.1.2.dist-info/licenses/LICENSE +28 -0
- samgen-0.1.2.dist-info/top_level.txt +1 -0
samgen/__init__.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""samgen — build self-assembled monolayer (SAM) surfaces for GROMACS.
|
|
2
|
+
|
|
3
|
+
Three independently runnable stages:
|
|
4
|
+
generate_geometry() -> tiled surface .gro (no force field needed)
|
|
5
|
+
assemble_topology() -> integrated topol.top + reordered .gro
|
|
6
|
+
build() -> geometry then topology in one shot
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .geometry import generate_geometry, build_twosided_strand, generate_twosided
|
|
10
|
+
from .topology import assemble_topology
|
|
11
|
+
from ._build import build
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"generate_geometry", "build_twosided_strand", "generate_twosided",
|
|
15
|
+
"assemble_topology", "build",
|
|
16
|
+
]
|
|
17
|
+
__version__ = "0.0.1"
|
samgen/_build.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Stage 3: `build` = geometry then topology in one shot.
|
|
2
|
+
|
|
3
|
+
Convenience path for one-sided surfaces (and two-sided where the parameterized
|
|
4
|
+
strand already exists). Loads components from the config, runs geometry, then
|
|
5
|
+
assembles the topology against the generated surface.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
from typing import Dict, Optional
|
|
12
|
+
|
|
13
|
+
from .core.molecule import Molecule
|
|
14
|
+
from .geometry import generate_geometry
|
|
15
|
+
from .topology import assemble_topology
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def load_components(config: dict, root: str = ".") -> Dict[str, Molecule]:
|
|
19
|
+
"""Instantiate Molecules for every key in config['components'].
|
|
20
|
+
|
|
21
|
+
Also stashes per-component orientation metadata under config['components_meta']
|
|
22
|
+
(anchor, canonicalize, backbone_carbons) for the geometry stage.
|
|
23
|
+
"""
|
|
24
|
+
comps: Dict[str, Molecule] = {}
|
|
25
|
+
meta: Dict[str, dict] = {}
|
|
26
|
+
for key, spec in config["components"].items():
|
|
27
|
+
gro = os.path.join(root, spec["gro"])
|
|
28
|
+
itp = os.path.join(root, spec["itp"]) if spec.get("itp") else None
|
|
29
|
+
comps[key] = Molecule.from_files(name=spec["resname"], gro=gro, itp=itp)
|
|
30
|
+
meta[key] = {k: spec[k] for k in ("anchor", "canonicalize",
|
|
31
|
+
"backbone_carbons", "allow_anchor_autodetect")
|
|
32
|
+
if k in spec}
|
|
33
|
+
config["components_meta"] = meta
|
|
34
|
+
return comps
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def build(config: dict, root: str = ".", out_gro: str = "sam.gro",
|
|
38
|
+
out_top: str = "topol.top", out_reordered: Optional[str] = "sam-reordered.gro",
|
|
39
|
+
input_fn=input, is_tty=None):
|
|
40
|
+
"""Full pipeline. Returns (surface_gro, top, counts)."""
|
|
41
|
+
components = load_components(config, root)
|
|
42
|
+
out = config.get("output", {})
|
|
43
|
+
geom = generate_geometry(config, components, out_gro=out_gro,
|
|
44
|
+
manifest_path=out_gro + ".manifest.json", root=root,
|
|
45
|
+
input_fn=input_fn, is_tty=is_tty)
|
|
46
|
+
|
|
47
|
+
order = out.get("order") or [m.name for m in components.values()]
|
|
48
|
+
itp_map = {m.name: m.itp_path for m in components.values()}
|
|
49
|
+
counts = assemble_topology(out_gro, itp_map=itp_map, order=order,
|
|
50
|
+
out_top=out_top, out_gro=out_reordered)
|
|
51
|
+
return geom.surface_gro, out_top, counts
|
samgen/cli.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""Command-line interface: `samgen geometry|topology|build CONFIG`.
|
|
2
|
+
|
|
3
|
+
Mirrors the three independently runnable stages. Anchor handling follows the
|
|
4
|
+
prompt-first / consent-to-guess policy: prompt in interactive mode, never
|
|
5
|
+
silently auto-detect in batch mode without explicit consent.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import os
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
import yaml
|
|
16
|
+
except ImportError: # keep import-light; yaml only needed for the CLI
|
|
17
|
+
yaml = None
|
|
18
|
+
|
|
19
|
+
from ._build import build, load_components
|
|
20
|
+
from .geometry import generate_geometry, generate_twosided
|
|
21
|
+
from .topology import assemble_topology
|
|
22
|
+
from .interactive import resolve_anchor_interactive
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _load_config(path: str) -> dict:
|
|
26
|
+
if yaml is None:
|
|
27
|
+
sys.exit("PyYAML is required for the CLI: pip install pyyaml")
|
|
28
|
+
with open(path) as fh:
|
|
29
|
+
return yaml.safe_load(fh)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def cmd_geometry(args):
|
|
33
|
+
cfg = _load_config(args.config)
|
|
34
|
+
root = os.path.dirname(os.path.abspath(args.config))
|
|
35
|
+
comps = load_components(cfg, root)
|
|
36
|
+
res = generate_geometry(cfg, comps, out_gro=args.out,
|
|
37
|
+
manifest_path=args.out + ".manifest.json", root=root)
|
|
38
|
+
print(f"wrote {res.surface_gro} ({res.manifest['natoms']} atoms)")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def cmd_topology(args):
|
|
42
|
+
cfg = _load_config(args.config)
|
|
43
|
+
root = os.path.dirname(os.path.abspath(args.config))
|
|
44
|
+
comps = load_components(cfg, root)
|
|
45
|
+
order = cfg.get("output", {}).get("order") or [m.name for m in comps.values()]
|
|
46
|
+
itp_map = {m.name: m.itp_path for m in comps.values()}
|
|
47
|
+
counts = assemble_topology(args.gro, itp_map=itp_map, order=order,
|
|
48
|
+
out_top=args.out, out_gro=args.reordered,
|
|
49
|
+
validate=args.validate)
|
|
50
|
+
print(f"wrote {args.out}: {counts}")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def cmd_build(args):
|
|
54
|
+
cfg = _load_config(args.config)
|
|
55
|
+
root = os.path.dirname(os.path.abspath(args.config))
|
|
56
|
+
gro, top, counts = build(cfg, root, out_gro=args.out, out_top=args.top)
|
|
57
|
+
print(f"wrote {gro} and {top}: {counts}")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def cmd_twosided(args):
|
|
61
|
+
cfg = _load_config(args.config)
|
|
62
|
+
root = os.path.dirname(os.path.abspath(args.config))
|
|
63
|
+
comps = load_components(cfg, root)
|
|
64
|
+
key = args.component or next(iter(comps))
|
|
65
|
+
if key not in comps:
|
|
66
|
+
sys.exit(f"component {key!r} not in config (have: {list(comps)})")
|
|
67
|
+
mol = comps[key]
|
|
68
|
+
specified = cfg["components"][key].get("anchor")
|
|
69
|
+
allow = cfg.get("allow_anchor_autodetect", False)
|
|
70
|
+
res = resolve_anchor_interactive(mol, specified, allow)
|
|
71
|
+
if res.cap_carbon_idx is None:
|
|
72
|
+
sys.exit(f"anchor {mol.struct.atoms[res.anchor_idx].atomname!r} has no "
|
|
73
|
+
"methyl cap to strip; cannot build a shared-S two-sided strand")
|
|
74
|
+
tsr, geom = generate_twosided(mol, res.anchor_idx, res.cap_carbon_idx, cfg,
|
|
75
|
+
out_strand=args.out, out_surface=args.surface)
|
|
76
|
+
print(f"wrote two-sided strand {tsr.strand_gro} ({tsr.natoms} atoms) and "
|
|
77
|
+
f"geometry-only surface {args.surface} ({geom.manifest['natoms']} atoms). "
|
|
78
|
+
"Parameterize the strand, then build the surface from its .itp.")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def main(argv=None):
|
|
82
|
+
p = argparse.ArgumentParser(prog="samgen", description="Build SAM surfaces for GROMACS")
|
|
83
|
+
sub = p.add_subparsers(dest="cmd", required=True)
|
|
84
|
+
|
|
85
|
+
g = sub.add_parser("geometry", help="tile a surface (no force field)")
|
|
86
|
+
g.add_argument("config")
|
|
87
|
+
g.add_argument("-o", "--out", default="sam.gro")
|
|
88
|
+
g.set_defaults(func=cmd_geometry)
|
|
89
|
+
|
|
90
|
+
t = sub.add_parser("topology", help="assemble topol.top for an existing .gro")
|
|
91
|
+
t.add_argument("config")
|
|
92
|
+
t.add_argument("--gro", required=True, help="existing surface .gro")
|
|
93
|
+
t.add_argument("-o", "--out", default="topol.top")
|
|
94
|
+
t.add_argument("--reordered", default="sam-reordered.gro")
|
|
95
|
+
t.add_argument("--validate", action="store_true", help="run gmx grompp gate")
|
|
96
|
+
t.set_defaults(func=cmd_topology)
|
|
97
|
+
|
|
98
|
+
b = sub.add_parser("build", help="geometry + topology in one shot")
|
|
99
|
+
b.add_argument("config")
|
|
100
|
+
b.add_argument("-o", "--out", default="sam.gro")
|
|
101
|
+
b.add_argument("--top", default="topol.top")
|
|
102
|
+
b.set_defaults(func=cmd_build)
|
|
103
|
+
|
|
104
|
+
w = sub.add_parser("twosided", help="build a two-sided (shared-S) strand + SAM geometry")
|
|
105
|
+
w.add_argument("config")
|
|
106
|
+
w.add_argument("--component", help="which component to fuse (default: first)")
|
|
107
|
+
w.add_argument("-o", "--out", default="twosided-strand.gro",
|
|
108
|
+
help="output strand .gro (parameterize this)")
|
|
109
|
+
w.add_argument("--surface", default="twosided-sam.gro",
|
|
110
|
+
help="output full two-sided SAM geometry (geometry-only)")
|
|
111
|
+
w.set_defaults(func=cmd_twosided)
|
|
112
|
+
|
|
113
|
+
args = p.parse_args(argv)
|
|
114
|
+
args.func(args)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
if __name__ == "__main__":
|
|
118
|
+
main()
|
samgen/core/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Core building blocks: gro I/O, molecules, lattice, orientation, anchors."""
|
samgen/core/anchor.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""Anchor (gold-facing S) and methyl-cap detection.
|
|
2
|
+
|
|
3
|
+
Policy: prompt the user first; auto-detect ONLY with explicit consent, and
|
|
4
|
+
always report what was picked and why. Detection never silently guesses.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import warnings
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from typing import Optional
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
from .molecule import Molecule
|
|
15
|
+
|
|
16
|
+
# Carbon mass (GAFF C2 united-atom CH2 ~14.03, c3 ~12.01). A methyl cap carbon
|
|
17
|
+
# in these models is a carbon bonded to the anchor S and to nothing else heavy.
|
|
18
|
+
_H_MASS = (1.0, 1.1)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class AnchorResult:
|
|
23
|
+
anchor_idx: int # 0-indexed S atom
|
|
24
|
+
cap_carbon_idx: Optional[int] # methyl cap carbon bonded to the anchor S
|
|
25
|
+
reason: str # human-readable explanation (for the consent prompt)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def resolve_anchor(
|
|
29
|
+
mol: Molecule,
|
|
30
|
+
specified: Optional[str | int],
|
|
31
|
+
allow_autodetect: bool,
|
|
32
|
+
) -> AnchorResult:
|
|
33
|
+
"""Resolve the anchor atom.
|
|
34
|
+
|
|
35
|
+
`specified` may be an atom name (e.g. "S41") or a 1-based index. If None and
|
|
36
|
+
autodetect is not permitted, raise so batch runs fail clearly.
|
|
37
|
+
"""
|
|
38
|
+
if specified is not None:
|
|
39
|
+
idx = _resolve_specified(mol, specified)
|
|
40
|
+
return AnchorResult(idx, _find_cap(mol, idx), f"specified ({specified})")
|
|
41
|
+
|
|
42
|
+
if not allow_autodetect:
|
|
43
|
+
raise ValueError(
|
|
44
|
+
f"{mol.name}: no anchor specified. Set it in the config, or pass "
|
|
45
|
+
"allow_anchor_autodetect: true to consent to auto-detection."
|
|
46
|
+
)
|
|
47
|
+
return autodetect_anchor(mol)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def autodetect_anchor(mol: Molecule) -> AnchorResult:
|
|
51
|
+
"""Heuristic anchor detection. Only call after the user has consented.
|
|
52
|
+
|
|
53
|
+
Anchor = the gold-facing sulfur, detected as the S bonded to a terminal
|
|
54
|
+
methyl cap. Falls back to the lowest-z sulfur. Raises on ambiguity.
|
|
55
|
+
"""
|
|
56
|
+
sulfurs = mol.sulfur_indices()
|
|
57
|
+
if not sulfurs:
|
|
58
|
+
raise ValueError(f"{mol.name}: no sulfur found; specify the anchor manually")
|
|
59
|
+
|
|
60
|
+
capped = [(s, c) for s in sulfurs if (c := _find_cap(mol, s)) is not None]
|
|
61
|
+
if len(capped) == 1:
|
|
62
|
+
s, c = capped[0]
|
|
63
|
+
name = mol.struct.atoms[s].atomname
|
|
64
|
+
return AnchorResult(s, c, f"S {name!r} bonded to methyl cap "
|
|
65
|
+
f"{mol.struct.atoms[c].atomname!r}")
|
|
66
|
+
if len(capped) > 1:
|
|
67
|
+
names = ", ".join(mol.struct.atoms[s].atomname for s, _ in capped)
|
|
68
|
+
raise ValueError(
|
|
69
|
+
f"{mol.name}: ambiguous anchor — multiple capped sulfurs ({names}). "
|
|
70
|
+
"Specify the anchor manually."
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Fallback: lowest-z sulfur (assumes a pre-oriented, S-down strand).
|
|
74
|
+
coords = mol.coords
|
|
75
|
+
s = min(sulfurs, key=lambda i: coords[i][2])
|
|
76
|
+
name = mol.struct.atoms[s].atomname
|
|
77
|
+
return AnchorResult(s, None, f"lowest-z sulfur {name!r} (no methyl cap found)")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _resolve_specified(mol: Molecule, specified: str | int) -> int:
|
|
81
|
+
if isinstance(specified, int):
|
|
82
|
+
return specified - 1 # 1-based -> 0-based
|
|
83
|
+
s = str(specified)
|
|
84
|
+
if s.isdigit():
|
|
85
|
+
return int(s) - 1
|
|
86
|
+
for i, atom in enumerate(mol.struct.atoms):
|
|
87
|
+
if atom.atomname == s:
|
|
88
|
+
return i
|
|
89
|
+
raise ValueError(f"{mol.name}: anchor atom {specified!r} not found")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def backbone_head(mol: Molecule, anchor_idx: int, n_carbons: int = 9) -> int:
|
|
93
|
+
"""Index of the Nth alkyl backbone carbon from the anchor S.
|
|
94
|
+
|
|
95
|
+
Walks the linear carbon chain from the anchor (skipping the methyl cap).
|
|
96
|
+
Used as the orientation 'head' so the tilt/twist axis follows the alkyl
|
|
97
|
+
spacer, not a divergent ligand headgroup. Warns and stops early if the
|
|
98
|
+
chain branches or ends before N carbons.
|
|
99
|
+
"""
|
|
100
|
+
if mol.bonds is None or mol.masses is None:
|
|
101
|
+
raise ValueError(f"{mol.name}: backbone detection needs an .itp bond graph")
|
|
102
|
+
|
|
103
|
+
cap = _find_cap(mol, anchor_idx)
|
|
104
|
+
# approximation: O/N bonded in-chain would also pass this carbon test
|
|
105
|
+
# (fine for thiol alkyl SAMs).
|
|
106
|
+
starts = [nb for nb in mol.neighbors(anchor_idx)
|
|
107
|
+
if mol.masses[nb] >= 11.0 and nb != cap]
|
|
108
|
+
if not starts:
|
|
109
|
+
raise ValueError(f"{mol.name}: no alkyl chain carbon bonded to the anchor")
|
|
110
|
+
|
|
111
|
+
prev, cur, count = anchor_idx, starts[0], 1
|
|
112
|
+
while count < n_carbons:
|
|
113
|
+
nxt = [nb for nb in mol.neighbors(cur)
|
|
114
|
+
if mol.masses[nb] >= 11.0 and nb != prev]
|
|
115
|
+
if len(nxt) == 0:
|
|
116
|
+
warnings.warn(f"{mol.name}: alkyl chain ends after {count} carbons "
|
|
117
|
+
f"(< {n_carbons}); orienting on the shorter segment")
|
|
118
|
+
break
|
|
119
|
+
if len(nxt) > 1:
|
|
120
|
+
warnings.warn(f"{mol.name}: alkyl chain branches at carbon {count}; "
|
|
121
|
+
f"orienting on the segment up to the branch")
|
|
122
|
+
break
|
|
123
|
+
prev, cur = cur, nxt[0]
|
|
124
|
+
count += 1
|
|
125
|
+
return cur
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _find_cap(mol: Molecule, sulfur_idx: int) -> Optional[int]:
|
|
129
|
+
"""A methyl cap carbon: bonded to the S and to 3 H and no other heavy atom."""
|
|
130
|
+
if mol.bonds is None or mol.masses is None:
|
|
131
|
+
return None
|
|
132
|
+
for nb in mol.neighbors(sulfur_idx):
|
|
133
|
+
if mol.masses[nb] < 11.0: # not a carbon
|
|
134
|
+
continue
|
|
135
|
+
heavy = [x for x in mol.neighbors(nb) if mol.masses[x] >= 11.0]
|
|
136
|
+
hyd = [x for x in mol.neighbors(nb) if mol.masses[x] < 11.0]
|
|
137
|
+
# carbon's only heavy neighbour is the sulfur, plus ~3 hydrogens
|
|
138
|
+
if heavy == [sulfur_idx] and len(hyd) >= 2:
|
|
139
|
+
return nb
|
|
140
|
+
return None
|
samgen/core/gro.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Robust GROMACS .gro reader/writer.
|
|
2
|
+
|
|
3
|
+
The .gro format is fixed-width by column. We centralise that layout here so the
|
|
4
|
+
rest of the package never touches raw columns.
|
|
5
|
+
|
|
6
|
+
.gro fixed layout (1-indexed columns), positions in nm:
|
|
7
|
+
1-5 residue number
|
|
8
|
+
6-10 residue name
|
|
9
|
+
11-15 atom name
|
|
10
|
+
16-20 atom number
|
|
11
|
+
21-28 x (%8.3f)
|
|
12
|
+
29-36 y
|
|
13
|
+
37-44 z
|
|
14
|
+
[45-52 53-60 61-68] optional vx vy vz (%8.4f)
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from typing import List, Optional, Tuple
|
|
21
|
+
import numpy as np
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class GroAtom:
|
|
26
|
+
resid: int
|
|
27
|
+
resname: str
|
|
28
|
+
atomname: str
|
|
29
|
+
atomnum: int
|
|
30
|
+
x: float
|
|
31
|
+
y: float
|
|
32
|
+
z: float
|
|
33
|
+
# Velocities are preserved if present so we never silently drop data.
|
|
34
|
+
vel: Optional[Tuple[float, float, float]] = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class GroStructure:
|
|
39
|
+
title: str
|
|
40
|
+
atoms: List[GroAtom]
|
|
41
|
+
box: Tuple[float, ...] # 3 (or 9, for triclinic) box values, nm
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def coords(self) -> np.ndarray:
|
|
45
|
+
"""N x 3 array of positions in nm."""
|
|
46
|
+
return np.array([[a.x, a.y, a.z] for a in self.atoms], dtype=float)
|
|
47
|
+
|
|
48
|
+
def set_coords(self, coords: np.ndarray) -> None:
|
|
49
|
+
for atom, (x, y, z) in zip(self.atoms, coords):
|
|
50
|
+
atom.x, atom.y, atom.z = float(x), float(y), float(z)
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def natoms(self) -> int:
|
|
54
|
+
return len(self.atoms)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def read_gro(path: str) -> GroStructure:
|
|
58
|
+
with open(path) as fh:
|
|
59
|
+
lines = fh.read().splitlines()
|
|
60
|
+
|
|
61
|
+
title = lines[0]
|
|
62
|
+
natoms = int(lines[1].strip())
|
|
63
|
+
atoms: List[GroAtom] = []
|
|
64
|
+
|
|
65
|
+
for line in lines[2 : 2 + natoms]:
|
|
66
|
+
resid = int(line[0:5])
|
|
67
|
+
resname = line[5:10].strip()
|
|
68
|
+
atomname = line[10:15].strip()
|
|
69
|
+
atomnum = int(line[15:20])
|
|
70
|
+
x = float(line[20:28])
|
|
71
|
+
y = float(line[28:36])
|
|
72
|
+
z = float(line[36:44])
|
|
73
|
+
vel = None
|
|
74
|
+
if len(line) >= 68: # velocities present
|
|
75
|
+
vel = (float(line[44:52]), float(line[52:60]), float(line[60:68]))
|
|
76
|
+
atoms.append(GroAtom(resid, resname, atomname, atomnum, x, y, z, vel))
|
|
77
|
+
|
|
78
|
+
box = tuple(float(v) for v in lines[2 + natoms].split())
|
|
79
|
+
return GroStructure(title=title, atoms=atoms, box=box)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def write_gro(struct: GroStructure, path: str) -> None:
|
|
83
|
+
lines = [struct.title, f"{struct.natoms:5d}"]
|
|
84
|
+
for a in struct.atoms:
|
|
85
|
+
# GROMACS truncates resid/atomnum to 5 digits (mod 100000); match it so
|
|
86
|
+
# large surfaces stay valid .gro files.
|
|
87
|
+
resid = a.resid % 100000
|
|
88
|
+
atomnum = a.atomnum % 100000
|
|
89
|
+
line = (
|
|
90
|
+
f"{resid:5d}{a.resname:<5.5s}{a.atomname:>5.5s}{atomnum:5d}"
|
|
91
|
+
f"{a.x:8.3f}{a.y:8.3f}{a.z:8.3f}"
|
|
92
|
+
)
|
|
93
|
+
if a.vel is not None:
|
|
94
|
+
line += f"{a.vel[0]:8.4f}{a.vel[1]:8.4f}{a.vel[2]:8.4f}"
|
|
95
|
+
lines.append(line)
|
|
96
|
+
lines.append(" ".join(f"{v:.5f}" for v in struct.box))
|
|
97
|
+
with open(path, "w") as fh:
|
|
98
|
+
fh.write("\n".join(lines) + "\n")
|
samgen/core/lattice.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""Hexagonal Au(111) lattice for SAM tiling.
|
|
2
|
+
|
|
3
|
+
Geometry follows Love et al. 2005 (alkanethiolate
|
|
4
|
+
(sqrt(3) x sqrt(3))R30 overlayer on Au(111)), lattice constant a = 0.288 nm:
|
|
5
|
+
|
|
6
|
+
colsep = sqrt(3) * a (spacing along a row)
|
|
7
|
+
rowsep = 1.5 * a (spacing between rows)
|
|
8
|
+
offset = sqrt(3)/2 * a (x-shift applied to alternate rows -> hex packing)
|
|
9
|
+
|
|
10
|
+
Reference: Love, Estroff, Kriebel, Nuzzo & Whitesides, Chem. Rev. 105 (2005)
|
|
11
|
+
1103-1169.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import math
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from typing import Iterator, Tuple
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class Lattice:
|
|
23
|
+
a: float = 0.288 # Au(111) lattice constant, nm
|
|
24
|
+
# Optional explicit overrides. When None, spacings are computed from `a`.
|
|
25
|
+
# Lattice.rounded() supplies pre-rounded constants for a slightly different
|
|
26
|
+
# (rounded) spacing convention.
|
|
27
|
+
colsep_override: float | None = None
|
|
28
|
+
rowsep_override: float | None = None
|
|
29
|
+
offset_override: float | None = None
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def rounded(cls) -> "Lattice":
|
|
33
|
+
"""Pre-rounded spacing constants for a = 0.288 nm.
|
|
34
|
+
|
|
35
|
+
An alternative to the exact sqrt(3)*a spacing: colsep/rowsep/offset are
|
|
36
|
+
rounded to 3-4 decimals. Use when a rounded-constant cell is required.
|
|
37
|
+
"""
|
|
38
|
+
return cls(a=0.288, colsep_override=0.499,
|
|
39
|
+
rowsep_override=0.432, offset_override=0.2494)
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def colsep(self) -> float:
|
|
43
|
+
return self.colsep_override if self.colsep_override is not None else math.sqrt(3.0) * self.a
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def rowsep(self) -> float:
|
|
47
|
+
return self.rowsep_override if self.rowsep_override is not None else 1.5 * self.a
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def offset(self) -> float:
|
|
51
|
+
return self.offset_override if self.offset_override is not None else math.sqrt(3.0) / 2.0 * self.a
|
|
52
|
+
|
|
53
|
+
# Tiny epsilon so a strand sitting exactly on the box edge isn't double-
|
|
54
|
+
# counted by the "< box" fill condition.
|
|
55
|
+
_EPS = 1e-9
|
|
56
|
+
|
|
57
|
+
def dimensions(self, boxx: float, boxy: float,
|
|
58
|
+
even_cols: bool = False) -> Tuple[int, int]:
|
|
59
|
+
"""Return (ncols, nrows) for a *periodic* tile bounded by boxx x boxy.
|
|
60
|
+
|
|
61
|
+
Safeguards keep the final box a valid periodic cell:
|
|
62
|
+
|
|
63
|
+
* ncols is fixed by the first (non-offset) row and applied to EVERY row,
|
|
64
|
+
so offset rows aren't one strand short. Offset rows therefore extend
|
|
65
|
+
slightly past boxx by design.
|
|
66
|
+
* nrows is rounded up to an even number, so tiling always ends on a
|
|
67
|
+
complete A/B (non-offset/offset) row pair and stays periodic in y.
|
|
68
|
+
* `even_cols` rounds ncols up to even too. Patterned designs
|
|
69
|
+
(grid/density/multilig) require this so the 2-site stagger lines up
|
|
70
|
+
with the design grid; the uniform design does not.
|
|
71
|
+
"""
|
|
72
|
+
ncols = int((boxx - self._EPS) // self.colsep) + 1
|
|
73
|
+
nrows = int((boxy - self._EPS) // self.rowsep) + 1
|
|
74
|
+
if even_cols and ncols % 2 == 1:
|
|
75
|
+
ncols += 1
|
|
76
|
+
if nrows % 2 == 1: # complete the A/B row pair
|
|
77
|
+
nrows += 1
|
|
78
|
+
return ncols, nrows
|
|
79
|
+
|
|
80
|
+
def sites_for(self, ncols: int, nrows: int) -> Iterator[Tuple[int, int, float, float]]:
|
|
81
|
+
"""Yield (row, col, x, y) for an explicit ncols x nrows tile.
|
|
82
|
+
|
|
83
|
+
Alternate rows are x-shifted by `offset` for hexagonal packing.
|
|
84
|
+
"""
|
|
85
|
+
for row in range(nrows):
|
|
86
|
+
xstart = self.offset if (row % 2 == 1) else 0.0
|
|
87
|
+
y = row * self.rowsep
|
|
88
|
+
for col in range(ncols):
|
|
89
|
+
yield row, col, xstart + col * self.colsep, y
|
|
90
|
+
|
|
91
|
+
def sites(self, boxx: float, boxy: float,
|
|
92
|
+
even_cols: bool = False) -> Iterator[Tuple[int, int, float, float]]:
|
|
93
|
+
"""Yield sites for the periodic tile bounded by boxx x boxy."""
|
|
94
|
+
ncols, nrows = self.dimensions(boxx, boxy, even_cols)
|
|
95
|
+
yield from self.sites_for(ncols, nrows)
|
|
96
|
+
|
|
97
|
+
def site_density(self) -> float:
|
|
98
|
+
"""Au site areal density (sites per nm^2) = 1 / (colsep * rowsep)."""
|
|
99
|
+
return 1.0 / (self.colsep * self.rowsep)
|
|
100
|
+
|
|
101
|
+
def final_box(self, ncols: int, nrows: int, boxz: float) -> Tuple[float, float, float]:
|
|
102
|
+
"""Periodic box that matches how many sites were actually placed.
|
|
103
|
+
|
|
104
|
+
Slightly different from the requested box: x and y snap to whole
|
|
105
|
+
multiples of colsep/rowsep so inter-strand spacing is preserved.
|
|
106
|
+
"""
|
|
107
|
+
return (ncols * self.colsep, nrows * self.rowsep, boxz)
|
samgen/core/molecule.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""A SAM component: coordinates plus (optional) force-field knowledge.
|
|
2
|
+
|
|
3
|
+
When an .itp is supplied we parse just enough of it to support anchor detection
|
|
4
|
+
and topology assembly: per-atom mass (to find sulfur by element, not by name)
|
|
5
|
+
and the bond graph (to tell a terminal/anchor S from an in-chain thioether).
|
|
6
|
+
We deliberately do NOT parse the full force field here — topology.py owns that.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from typing import Dict, List, Optional, Tuple
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
15
|
+
from .gro import GroStructure, read_gro
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class Molecule:
|
|
20
|
+
name: str # residue name as it appears in the .gro / [molecules]
|
|
21
|
+
struct: GroStructure
|
|
22
|
+
itp_path: Optional[str] = None
|
|
23
|
+
masses: Optional[List[float]] = None # per-atom, in itp atom order
|
|
24
|
+
bonds: Optional[List[Tuple[int, int]]] = None # 0-indexed atom pairs
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def coords(self) -> np.ndarray:
|
|
28
|
+
return self.struct.coords
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def from_files(cls, name: str, gro: str, itp: Optional[str] = None) -> "Molecule":
|
|
32
|
+
struct = read_gro(gro)
|
|
33
|
+
masses, bonds = (None, None)
|
|
34
|
+
if itp is not None:
|
|
35
|
+
masses, bonds = _parse_itp(itp)
|
|
36
|
+
return cls(name=name, struct=struct, itp_path=itp, masses=masses, bonds=bonds)
|
|
37
|
+
|
|
38
|
+
def sulfur_indices(self) -> List[int]:
|
|
39
|
+
"""0-indexed atoms whose mass is ~32.06 (sulfur). Name-independent."""
|
|
40
|
+
if self.masses is None:
|
|
41
|
+
raise ValueError(
|
|
42
|
+
f"{self.name}: need an .itp to identify sulfur by mass"
|
|
43
|
+
)
|
|
44
|
+
return [i for i, m in enumerate(self.masses) if abs(m - 32.06) < 0.5]
|
|
45
|
+
|
|
46
|
+
def neighbors(self, atom_idx: int) -> List[int]:
|
|
47
|
+
if self.bonds is None:
|
|
48
|
+
raise ValueError(f"{self.name}: need an .itp bond graph")
|
|
49
|
+
out = []
|
|
50
|
+
for a, b in self.bonds:
|
|
51
|
+
if a == atom_idx:
|
|
52
|
+
out.append(b)
|
|
53
|
+
elif b == atom_idx:
|
|
54
|
+
out.append(a)
|
|
55
|
+
return out
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _parse_itp(path: str) -> Tuple[List[float], List[Tuple[int, int]]]:
|
|
59
|
+
"""Minimal .itp parse: [atoms] masses + [bonds] graph (first moleculetype)."""
|
|
60
|
+
masses: List[float] = []
|
|
61
|
+
bonds: List[Tuple[int, int]] = []
|
|
62
|
+
section = None
|
|
63
|
+
with open(path) as fh:
|
|
64
|
+
for raw in fh:
|
|
65
|
+
line = raw.split(";", 1)[0].strip() # strip comments
|
|
66
|
+
if not line:
|
|
67
|
+
continue
|
|
68
|
+
if line.startswith("["):
|
|
69
|
+
section = line.strip("[] ").lower()
|
|
70
|
+
continue
|
|
71
|
+
if section == "atoms":
|
|
72
|
+
# nr type resnr resname atomname cgnr charge mass ...
|
|
73
|
+
cols = line.split()
|
|
74
|
+
if len(cols) >= 8:
|
|
75
|
+
masses.append(float(cols[7]))
|
|
76
|
+
elif section == "bonds":
|
|
77
|
+
cols = line.split()
|
|
78
|
+
if len(cols) >= 2:
|
|
79
|
+
# itp atom indices are 1-based -> store 0-based
|
|
80
|
+
bonds.append((int(cols[0]) - 1, int(cols[1]) - 1))
|
|
81
|
+
return masses, bonds
|