molbuilder 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- molbuilder/__init__.py +8 -0
- molbuilder/__main__.py +6 -0
- molbuilder/atomic/__init__.py +4 -0
- molbuilder/atomic/bohr.py +235 -0
- molbuilder/atomic/quantum_atom.py +334 -0
- molbuilder/atomic/quantum_numbers.py +196 -0
- molbuilder/atomic/wavefunctions.py +297 -0
- molbuilder/bonding/__init__.py +4 -0
- molbuilder/bonding/covalent.py +442 -0
- molbuilder/bonding/lewis.py +347 -0
- molbuilder/bonding/vsepr.py +433 -0
- molbuilder/cli/__init__.py +1 -0
- molbuilder/cli/demos.py +516 -0
- molbuilder/cli/menu.py +127 -0
- molbuilder/cli/wizard.py +831 -0
- molbuilder/core/__init__.py +6 -0
- molbuilder/core/bond_data.py +170 -0
- molbuilder/core/constants.py +51 -0
- molbuilder/core/element_properties.py +183 -0
- molbuilder/core/elements.py +181 -0
- molbuilder/core/geometry.py +232 -0
- molbuilder/gui/__init__.py +2 -0
- molbuilder/gui/app.py +286 -0
- molbuilder/gui/canvas3d.py +115 -0
- molbuilder/gui/dialogs.py +117 -0
- molbuilder/gui/event_handler.py +118 -0
- molbuilder/gui/sidebar.py +105 -0
- molbuilder/gui/toolbar.py +71 -0
- molbuilder/io/__init__.py +1 -0
- molbuilder/io/json_io.py +146 -0
- molbuilder/io/mol_sdf.py +169 -0
- molbuilder/io/pdb.py +184 -0
- molbuilder/io/smiles_io.py +47 -0
- molbuilder/io/xyz.py +103 -0
- molbuilder/molecule/__init__.py +2 -0
- molbuilder/molecule/amino_acids.py +919 -0
- molbuilder/molecule/builders.py +257 -0
- molbuilder/molecule/conformations.py +70 -0
- molbuilder/molecule/functional_groups.py +484 -0
- molbuilder/molecule/graph.py +712 -0
- molbuilder/molecule/peptides.py +13 -0
- molbuilder/molecule/stereochemistry.py +6 -0
- molbuilder/process/__init__.py +3 -0
- molbuilder/process/conditions.py +260 -0
- molbuilder/process/costing.py +316 -0
- molbuilder/process/purification.py +285 -0
- molbuilder/process/reactor.py +297 -0
- molbuilder/process/safety.py +476 -0
- molbuilder/process/scale_up.py +427 -0
- molbuilder/process/solvent_systems.py +204 -0
- molbuilder/reactions/__init__.py +3 -0
- molbuilder/reactions/functional_group_detect.py +728 -0
- molbuilder/reactions/knowledge_base.py +1716 -0
- molbuilder/reactions/reaction_types.py +102 -0
- molbuilder/reactions/reagent_data.py +1248 -0
- molbuilder/reactions/retrosynthesis.py +1430 -0
- molbuilder/reactions/synthesis_route.py +377 -0
- molbuilder/reports/__init__.py +158 -0
- molbuilder/reports/cost_report.py +206 -0
- molbuilder/reports/molecule_report.py +279 -0
- molbuilder/reports/safety_report.py +296 -0
- molbuilder/reports/synthesis_report.py +283 -0
- molbuilder/reports/text_formatter.py +170 -0
- molbuilder/smiles/__init__.py +4 -0
- molbuilder/smiles/parser.py +487 -0
- molbuilder/smiles/tokenizer.py +291 -0
- molbuilder/smiles/writer.py +375 -0
- molbuilder/visualization/__init__.py +1 -0
- molbuilder/visualization/bohr_viz.py +166 -0
- molbuilder/visualization/molecule_viz.py +368 -0
- molbuilder/visualization/quantum_viz.py +434 -0
- molbuilder/visualization/theme.py +12 -0
- molbuilder-1.0.0.dist-info/METADATA +360 -0
- molbuilder-1.0.0.dist-info/RECORD +78 -0
- molbuilder-1.0.0.dist-info/WHEEL +5 -0
- molbuilder-1.0.0.dist-info/entry_points.txt +2 -0
- molbuilder-1.0.0.dist-info/licenses/LICENSE +21 -0
- molbuilder-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,919 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Amino Acid Structures and Functional Groups
|
|
3
|
+
|
|
4
|
+
Builds all 20 standard amino acids with correct 3D geometry using the
|
|
5
|
+
Molecule class from molecular_conformations. Provides:
|
|
6
|
+
|
|
7
|
+
- Functional group builder functions (hydroxyl, amino, carboxyl, etc.)
|
|
8
|
+
- Planar ring builders (phenyl, imidazole, indole)
|
|
9
|
+
- Amino acid backbone template with L-chirality enforcement
|
|
10
|
+
- Side chain builders for all 20 standard amino acids
|
|
11
|
+
- Peptide bond formation (condensation reaction)
|
|
12
|
+
- Backbone conformation control (phi/psi angles)
|
|
13
|
+
- Secondary structure presets (alpha helix, beta sheet)
|
|
14
|
+
|
|
15
|
+
All coordinates are in Angstroms. No Unicode characters are used
|
|
16
|
+
(Windows cp1252 compatibility).
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import math
|
|
22
|
+
from dataclasses import dataclass, field
|
|
23
|
+
from enum import Enum, auto
|
|
24
|
+
|
|
25
|
+
import numpy as np
|
|
26
|
+
|
|
27
|
+
from molbuilder.molecule.graph import Molecule, Hybridization, Bond as MolBond
|
|
28
|
+
from molbuilder.core.bond_data import bond_length, SP3_ANGLE, SP2_ANGLE, SP_ANGLE
|
|
29
|
+
from molbuilder.core.geometry import normalize, available_tetrahedral_dirs, place_atom_zmatrix
|
|
30
|
+
from molbuilder.core.geometry import add_sp3_hydrogens
|
|
31
|
+
from molbuilder.core.geometry import rotation_matrix
|
|
32
|
+
from molbuilder.molecule.functional_groups import (
|
|
33
|
+
add_hydroxyl,
|
|
34
|
+
add_amino,
|
|
35
|
+
add_carboxyl,
|
|
36
|
+
add_carbonyl,
|
|
37
|
+
add_amide,
|
|
38
|
+
add_thiol,
|
|
39
|
+
add_phenyl_ring,
|
|
40
|
+
add_imidazole_ring,
|
|
41
|
+
add_indole_ring,
|
|
42
|
+
add_guanidinium,
|
|
43
|
+
AMIDE_CN,
|
|
44
|
+
SS_BOND,
|
|
45
|
+
CC_AROMATIC,
|
|
46
|
+
CN_AROMATIC,
|
|
47
|
+
CO_CARBOXYL,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# ===================================================================
|
|
52
|
+
# Constants
|
|
53
|
+
# ===================================================================
|
|
54
|
+
|
|
55
|
+
# Ramachandran angle presets (phi, psi) in degrees
|
|
56
|
+
ALPHA_HELIX = (-57.0, -47.0)
|
|
57
|
+
BETA_SHEET = (-135.0, 135.0)
|
|
58
|
+
POLYPROLINE_II = (-75.0, 145.0)
|
|
59
|
+
EXTENDED = (-180.0, 180.0)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# ===================================================================
|
|
63
|
+
# Enumerations
|
|
64
|
+
# ===================================================================
|
|
65
|
+
|
|
66
|
+
class AminoAcidType(Enum):
|
|
67
|
+
"""The 20 standard amino acids."""
|
|
68
|
+
GLY = auto()
|
|
69
|
+
ALA = auto()
|
|
70
|
+
VAL = auto()
|
|
71
|
+
LEU = auto()
|
|
72
|
+
ILE = auto()
|
|
73
|
+
PRO = auto()
|
|
74
|
+
PHE = auto()
|
|
75
|
+
TRP = auto()
|
|
76
|
+
MET = auto()
|
|
77
|
+
SER = auto()
|
|
78
|
+
THR = auto()
|
|
79
|
+
CYS = auto()
|
|
80
|
+
TYR = auto()
|
|
81
|
+
ASN = auto()
|
|
82
|
+
GLN = auto()
|
|
83
|
+
ASP = auto()
|
|
84
|
+
GLU = auto()
|
|
85
|
+
LYS = auto()
|
|
86
|
+
ARG = auto()
|
|
87
|
+
HIS = auto()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class SecondaryStructure(Enum):
|
|
91
|
+
"""Common secondary structure types."""
|
|
92
|
+
ALPHA_HELIX = auto()
|
|
93
|
+
BETA_SHEET = auto()
|
|
94
|
+
POLYPROLINE_II = auto()
|
|
95
|
+
EXTENDED = auto()
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# ===================================================================
|
|
99
|
+
# Backbone indices dataclass
|
|
100
|
+
# ===================================================================
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class BackboneIndices:
|
|
104
|
+
"""Atom indices for key positions in an amino acid backbone.
|
|
105
|
+
|
|
106
|
+
Stored on the Molecule as mol.backbone after building.
|
|
107
|
+
"""
|
|
108
|
+
N: int # amino nitrogen
|
|
109
|
+
H_N: int | None # H on N (None for proline)
|
|
110
|
+
CA: int # alpha carbon
|
|
111
|
+
HA: int # H on alpha carbon
|
|
112
|
+
C: int # carbonyl carbon
|
|
113
|
+
O: int # carbonyl oxygen
|
|
114
|
+
CB: int | None # beta carbon (None for glycine)
|
|
115
|
+
H_Nterm: int | None # extra H on free N-terminus
|
|
116
|
+
OXT: int | None # second O on free C-terminus (OH)
|
|
117
|
+
H_OXT: int | None # H on OXT
|
|
118
|
+
|
|
119
|
+
def __repr__(self):
|
|
120
|
+
return (f"Backbone(N={self.N}, CA={self.CA}, C={self.C}, "
|
|
121
|
+
f"O={self.O}, CB={self.CB})")
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# ===================================================================
|
|
125
|
+
# Amino acid backbone builder
|
|
126
|
+
# ===================================================================
|
|
127
|
+
|
|
128
|
+
def _build_backbone(name: str, has_cb: bool = True,
|
|
129
|
+
is_proline: bool = False) -> Molecule:
|
|
130
|
+
"""Build the amino acid backbone: H2N-CA(H)(R)-C(=O)-OH.
|
|
131
|
+
|
|
132
|
+
Places atoms using z-matrix coordinates. If has_cb is True, places
|
|
133
|
+
CB in the L-configuration direction. For proline, N gets only one
|
|
134
|
+
H (secondary amine).
|
|
135
|
+
|
|
136
|
+
Returns a Molecule with a .backbone attribute (BackboneIndices).
|
|
137
|
+
"""
|
|
138
|
+
mol = Molecule(name)
|
|
139
|
+
|
|
140
|
+
# -- N (amino nitrogen) --
|
|
141
|
+
n_idx = mol.add_atom("N", [0.0, 0.0, 0.0], Hybridization.SP3)
|
|
142
|
+
|
|
143
|
+
# -- CA (alpha carbon) --
|
|
144
|
+
ca_idx = mol.add_atom_bonded(
|
|
145
|
+
"C", n_idx, bond_order=1,
|
|
146
|
+
bond_length=bond_length("C", "N", 1),
|
|
147
|
+
hybridization=Hybridization.SP3)
|
|
148
|
+
|
|
149
|
+
# -- C (carbonyl carbon) --
|
|
150
|
+
c_idx = mol.add_atom_bonded(
|
|
151
|
+
"C", ca_idx, bond_order=1, angle_ref=n_idx,
|
|
152
|
+
bond_angle_deg=SP3_ANGLE,
|
|
153
|
+
dihedral_deg=180.0,
|
|
154
|
+
hybridization=Hybridization.SP2)
|
|
155
|
+
|
|
156
|
+
# -- O (carbonyl oxygen, C=O) --
|
|
157
|
+
o_idx = mol.add_atom_bonded(
|
|
158
|
+
"O", c_idx, bond_order=2, angle_ref=ca_idx,
|
|
159
|
+
bond_angle_deg=SP2_ANGLE, dihedral_deg=0.0,
|
|
160
|
+
rotatable=False)
|
|
161
|
+
|
|
162
|
+
# -- OXT (C-terminus OH) --
|
|
163
|
+
oxt = mol.add_atom_bonded(
|
|
164
|
+
"O", c_idx, bond_order=1, angle_ref=ca_idx,
|
|
165
|
+
bond_angle_deg=SP2_ANGLE, dihedral_deg=180.0,
|
|
166
|
+
hybridization=Hybridization.SP3)
|
|
167
|
+
|
|
168
|
+
h_oxt = mol.add_atom_bonded(
|
|
169
|
+
"H", oxt, bond_order=1, angle_ref=c_idx,
|
|
170
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=0.0,
|
|
171
|
+
rotatable=False)
|
|
172
|
+
|
|
173
|
+
# -- H on N-terminus --
|
|
174
|
+
h_n = mol.add_atom_bonded(
|
|
175
|
+
"H", n_idx, bond_order=1, angle_ref=ca_idx,
|
|
176
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=120.0,
|
|
177
|
+
rotatable=False)
|
|
178
|
+
|
|
179
|
+
h_nterm = None
|
|
180
|
+
if not is_proline:
|
|
181
|
+
h_nterm = mol.add_atom_bonded(
|
|
182
|
+
"H", n_idx, bond_order=1, angle_ref=ca_idx,
|
|
183
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=-120.0,
|
|
184
|
+
rotatable=False)
|
|
185
|
+
|
|
186
|
+
# -- HA (H on alpha carbon) --
|
|
187
|
+
# Place HA to establish L-chirality.
|
|
188
|
+
# For L-amino acids, looking from H -> CA, the priority order
|
|
189
|
+
# NH2 > COOH > R is clockwise (S configuration in CIP, except Cys).
|
|
190
|
+
ha_idx = mol.add_atom_bonded(
|
|
191
|
+
"H", ca_idx, bond_order=1, angle_ref=n_idx,
|
|
192
|
+
dihedral_ref=c_idx,
|
|
193
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=120.0,
|
|
194
|
+
rotatable=False)
|
|
195
|
+
|
|
196
|
+
# -- CB (beta carbon) --
|
|
197
|
+
# Placed at -120 deg dihedral to give L-configuration (S in CIP
|
|
198
|
+
# for most amino acids, R for cysteine due to sulfur priority).
|
|
199
|
+
cb_idx = None
|
|
200
|
+
if has_cb:
|
|
201
|
+
cb_idx = mol.add_atom_bonded(
|
|
202
|
+
"C", ca_idx, bond_order=1, angle_ref=n_idx,
|
|
203
|
+
dihedral_ref=c_idx,
|
|
204
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=-120.0,
|
|
205
|
+
hybridization=Hybridization.SP3)
|
|
206
|
+
|
|
207
|
+
backbone = BackboneIndices(
|
|
208
|
+
N=n_idx, H_N=h_n, CA=ca_idx, HA=ha_idx,
|
|
209
|
+
C=c_idx, O=o_idx, CB=cb_idx,
|
|
210
|
+
H_Nterm=h_nterm, OXT=oxt, H_OXT=h_oxt,
|
|
211
|
+
)
|
|
212
|
+
mol.backbone = backbone
|
|
213
|
+
return mol
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
# ===================================================================
|
|
217
|
+
# Side chain builders
|
|
218
|
+
# ===================================================================
|
|
219
|
+
|
|
220
|
+
def _sidechain_gly(mol: Molecule):
|
|
221
|
+
"""Glycine: no side chain -- add a second H to CA."""
|
|
222
|
+
bb = mol.backbone
|
|
223
|
+
# CA already has N, C, HA; add second H
|
|
224
|
+
ca_pos = mol.atoms[bb.CA].position
|
|
225
|
+
existing = mol.neighbors(bb.CA)
|
|
226
|
+
existing_dirs = [
|
|
227
|
+
normalize(mol.atoms[n].position - ca_pos) for n in existing
|
|
228
|
+
]
|
|
229
|
+
new_dirs = available_tetrahedral_dirs(existing_dirs, 1)
|
|
230
|
+
if new_dirs:
|
|
231
|
+
h_pos = ca_pos + bond_length("C", "H", 1) * new_dirs[0]
|
|
232
|
+
h2 = mol.add_atom("H", h_pos)
|
|
233
|
+
mol.add_bond(bb.CA, h2, order=1, rotatable=False)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _sidechain_ala(mol: Molecule):
|
|
237
|
+
"""Alanine: -CH3."""
|
|
238
|
+
bb = mol.backbone
|
|
239
|
+
add_sp3_hydrogens(mol, bb.CB, 3)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _sidechain_val(mol: Molecule):
|
|
243
|
+
"""Valine: -CH(CH3)2."""
|
|
244
|
+
bb = mol.backbone
|
|
245
|
+
cb = bb.CB
|
|
246
|
+
|
|
247
|
+
# CG1
|
|
248
|
+
cg1 = mol.add_atom_bonded(
|
|
249
|
+
"C", cb, bond_order=1, angle_ref=bb.CA,
|
|
250
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=60.0,
|
|
251
|
+
hybridization=Hybridization.SP3)
|
|
252
|
+
add_sp3_hydrogens(mol, cg1, 3)
|
|
253
|
+
|
|
254
|
+
# CG2
|
|
255
|
+
cg2 = mol.add_atom_bonded(
|
|
256
|
+
"C", cb, bond_order=1, angle_ref=bb.CA,
|
|
257
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=-60.0,
|
|
258
|
+
hybridization=Hybridization.SP3)
|
|
259
|
+
add_sp3_hydrogens(mol, cg2, 3)
|
|
260
|
+
|
|
261
|
+
# HB on CB
|
|
262
|
+
add_sp3_hydrogens(mol, cb, 1)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _sidechain_leu(mol: Molecule):
|
|
266
|
+
"""Leucine: -CH2-CH(CH3)2."""
|
|
267
|
+
bb = mol.backbone
|
|
268
|
+
cb = bb.CB
|
|
269
|
+
|
|
270
|
+
cg = mol.add_atom_bonded(
|
|
271
|
+
"C", cb, bond_order=1, angle_ref=bb.CA,
|
|
272
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
|
|
273
|
+
hybridization=Hybridization.SP3)
|
|
274
|
+
|
|
275
|
+
cd1 = mol.add_atom_bonded(
|
|
276
|
+
"C", cg, bond_order=1, angle_ref=cb,
|
|
277
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=60.0,
|
|
278
|
+
hybridization=Hybridization.SP3)
|
|
279
|
+
add_sp3_hydrogens(mol, cd1, 3)
|
|
280
|
+
|
|
281
|
+
cd2 = mol.add_atom_bonded(
|
|
282
|
+
"C", cg, bond_order=1, angle_ref=cb,
|
|
283
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=-60.0,
|
|
284
|
+
hybridization=Hybridization.SP3)
|
|
285
|
+
add_sp3_hydrogens(mol, cd2, 3)
|
|
286
|
+
|
|
287
|
+
add_sp3_hydrogens(mol, cg, 1) # HG
|
|
288
|
+
add_sp3_hydrogens(mol, cb, 2) # HB1, HB2
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _sidechain_ile(mol: Molecule):
|
|
292
|
+
"""Isoleucine: -CH(CH3)-CH2-CH3."""
|
|
293
|
+
bb = mol.backbone
|
|
294
|
+
cb = bb.CB
|
|
295
|
+
|
|
296
|
+
# CG1 (the longer chain: -CH2-CH3)
|
|
297
|
+
cg1 = mol.add_atom_bonded(
|
|
298
|
+
"C", cb, bond_order=1, angle_ref=bb.CA,
|
|
299
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
|
|
300
|
+
hybridization=Hybridization.SP3)
|
|
301
|
+
|
|
302
|
+
cd1 = mol.add_atom_bonded(
|
|
303
|
+
"C", cg1, bond_order=1, angle_ref=cb,
|
|
304
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
|
|
305
|
+
hybridization=Hybridization.SP3)
|
|
306
|
+
add_sp3_hydrogens(mol, cd1, 3)
|
|
307
|
+
add_sp3_hydrogens(mol, cg1, 2)
|
|
308
|
+
|
|
309
|
+
# CG2 (the methyl branch)
|
|
310
|
+
cg2 = mol.add_atom_bonded(
|
|
311
|
+
"C", cb, bond_order=1, angle_ref=bb.CA,
|
|
312
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=-60.0,
|
|
313
|
+
hybridization=Hybridization.SP3)
|
|
314
|
+
add_sp3_hydrogens(mol, cg2, 3)
|
|
315
|
+
|
|
316
|
+
add_sp3_hydrogens(mol, cb, 1) # HB
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def _sidechain_pro(mol: Molecule):
|
|
320
|
+
"""Proline: pyrrolidine ring connecting CB-CG-CD back to N."""
|
|
321
|
+
bb = mol.backbone
|
|
322
|
+
cb = bb.CB
|
|
323
|
+
|
|
324
|
+
cg = mol.add_atom_bonded(
|
|
325
|
+
"C", cb, bond_order=1, angle_ref=bb.CA,
|
|
326
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
|
|
327
|
+
hybridization=Hybridization.SP3)
|
|
328
|
+
|
|
329
|
+
cd = mol.add_atom_bonded(
|
|
330
|
+
"C", cg, bond_order=1, angle_ref=cb,
|
|
331
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=30.0,
|
|
332
|
+
hybridization=Hybridization.SP3)
|
|
333
|
+
|
|
334
|
+
# Close ring: CD-N
|
|
335
|
+
mol.close_ring(cd, bb.N)
|
|
336
|
+
|
|
337
|
+
add_sp3_hydrogens(mol, cb, 2)
|
|
338
|
+
add_sp3_hydrogens(mol, cg, 2)
|
|
339
|
+
add_sp3_hydrogens(mol, cd, 2)
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def _sidechain_phe(mol: Molecule):
|
|
343
|
+
"""Phenylalanine: -CH2-phenyl."""
|
|
344
|
+
bb = mol.backbone
|
|
345
|
+
cb = bb.CB
|
|
346
|
+
add_sp3_hydrogens(mol, cb, 2)
|
|
347
|
+
add_phenyl_ring(mol, cb, angle_ref=bb.CA, dihedral_deg=90.0)
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def _sidechain_trp(mol: Molecule):
|
|
351
|
+
"""Tryptophan: -CH2-indole."""
|
|
352
|
+
bb = mol.backbone
|
|
353
|
+
cb = bb.CB
|
|
354
|
+
add_sp3_hydrogens(mol, cb, 2)
|
|
355
|
+
add_indole_ring(mol, cb, angle_ref=bb.CA, dihedral_deg=90.0)
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def _sidechain_met(mol: Molecule):
|
|
359
|
+
"""Methionine: -CH2-CH2-S-CH3."""
|
|
360
|
+
bb = mol.backbone
|
|
361
|
+
cb = bb.CB
|
|
362
|
+
|
|
363
|
+
cg = mol.add_atom_bonded(
|
|
364
|
+
"C", cb, bond_order=1, angle_ref=bb.CA,
|
|
365
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
|
|
366
|
+
hybridization=Hybridization.SP3)
|
|
367
|
+
|
|
368
|
+
sd = mol.add_atom_bonded(
|
|
369
|
+
"S", cg, bond_order=1, angle_ref=cb,
|
|
370
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
|
|
371
|
+
hybridization=Hybridization.SP3)
|
|
372
|
+
|
|
373
|
+
ce = mol.add_atom_bonded(
|
|
374
|
+
"C", sd, bond_order=1, angle_ref=cg,
|
|
375
|
+
bond_angle_deg=100.0, # C-S-C angle ~100 deg
|
|
376
|
+
dihedral_deg=180.0,
|
|
377
|
+
hybridization=Hybridization.SP3)
|
|
378
|
+
|
|
379
|
+
add_sp3_hydrogens(mol, ce, 3)
|
|
380
|
+
add_sp3_hydrogens(mol, cg, 2)
|
|
381
|
+
add_sp3_hydrogens(mol, cb, 2)
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def _sidechain_ser(mol: Molecule):
|
|
385
|
+
"""Serine: -CH2-OH."""
|
|
386
|
+
bb = mol.backbone
|
|
387
|
+
cb = bb.CB
|
|
388
|
+
add_sp3_hydrogens(mol, cb, 2)
|
|
389
|
+
add_hydroxyl(mol, cb, angle_ref=bb.CA, dihedral_deg=60.0)
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def _sidechain_thr(mol: Molecule):
|
|
393
|
+
"""Threonine: -CH(OH)-CH3."""
|
|
394
|
+
bb = mol.backbone
|
|
395
|
+
cb = bb.CB
|
|
396
|
+
|
|
397
|
+
# OG1 (hydroxyl)
|
|
398
|
+
og1 = mol.add_atom_bonded(
|
|
399
|
+
"O", cb, bond_order=1, angle_ref=bb.CA,
|
|
400
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=60.0,
|
|
401
|
+
hybridization=Hybridization.SP3)
|
|
402
|
+
mol.add_atom_bonded(
|
|
403
|
+
"H", og1, bond_order=1, angle_ref=cb,
|
|
404
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
|
|
405
|
+
rotatable=False)
|
|
406
|
+
|
|
407
|
+
# CG2 (methyl)
|
|
408
|
+
cg2 = mol.add_atom_bonded(
|
|
409
|
+
"C", cb, bond_order=1, angle_ref=bb.CA,
|
|
410
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=-60.0,
|
|
411
|
+
hybridization=Hybridization.SP3)
|
|
412
|
+
add_sp3_hydrogens(mol, cg2, 3)
|
|
413
|
+
|
|
414
|
+
add_sp3_hydrogens(mol, cb, 1) # HB
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def _sidechain_cys(mol: Molecule):
|
|
418
|
+
"""Cysteine: -CH2-SH."""
|
|
419
|
+
bb = mol.backbone
|
|
420
|
+
cb = bb.CB
|
|
421
|
+
add_sp3_hydrogens(mol, cb, 2)
|
|
422
|
+
add_thiol(mol, cb, angle_ref=bb.CA, dihedral_deg=60.0)
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def _sidechain_tyr(mol: Molecule):
|
|
426
|
+
"""Tyrosine: -CH2-phenyl-OH."""
|
|
427
|
+
bb = mol.backbone
|
|
428
|
+
cb = bb.CB
|
|
429
|
+
add_sp3_hydrogens(mol, cb, 2)
|
|
430
|
+
result = add_phenyl_ring(mol, cb, angle_ref=bb.CA, dihedral_deg=90.0)
|
|
431
|
+
# Add OH to the para carbon (ring[3], opposite to attachment)
|
|
432
|
+
para_c = result["ring"][3]
|
|
433
|
+
# Remove one H from para position -- we added H to ring[1..5]
|
|
434
|
+
# ring[3] is index 2 in the H list (since H list starts at ring[1])
|
|
435
|
+
# Actually the H on para_c is result["H"][2]
|
|
436
|
+
# We need to replace it with OH -- but we can't easily remove atoms
|
|
437
|
+
# from Molecule. Instead, we'll add OH and the extra H is a minor
|
|
438
|
+
# approximation for this educational model.
|
|
439
|
+
add_hydroxyl(mol, para_c, angle_ref=result["ring"][2])
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def _sidechain_asn(mol: Molecule):
|
|
443
|
+
"""Asparagine: -CH2-CONH2."""
|
|
444
|
+
bb = mol.backbone
|
|
445
|
+
cb = bb.CB
|
|
446
|
+
add_sp3_hydrogens(mol, cb, 2)
|
|
447
|
+
add_amide(mol, cb, angle_ref=bb.CA, dihedral_deg=180.0)
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
def _sidechain_gln(mol: Molecule):
|
|
451
|
+
"""Glutamine: -CH2-CH2-CONH2."""
|
|
452
|
+
bb = mol.backbone
|
|
453
|
+
cb = bb.CB
|
|
454
|
+
|
|
455
|
+
cg = mol.add_atom_bonded(
|
|
456
|
+
"C", cb, bond_order=1, angle_ref=bb.CA,
|
|
457
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
|
|
458
|
+
hybridization=Hybridization.SP3)
|
|
459
|
+
|
|
460
|
+
add_sp3_hydrogens(mol, cg, 2)
|
|
461
|
+
add_sp3_hydrogens(mol, cb, 2)
|
|
462
|
+
add_amide(mol, cg, angle_ref=cb, dihedral_deg=180.0)
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def _sidechain_asp(mol: Molecule):
|
|
466
|
+
"""Aspartate: -CH2-COO-."""
|
|
467
|
+
bb = mol.backbone
|
|
468
|
+
cb = bb.CB
|
|
469
|
+
add_sp3_hydrogens(mol, cb, 2)
|
|
470
|
+
add_carboxyl(mol, cb, angle_ref=bb.CA, dihedral_deg=180.0)
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def _sidechain_glu(mol: Molecule):
|
|
474
|
+
"""Glutamate: -CH2-CH2-COO-."""
|
|
475
|
+
bb = mol.backbone
|
|
476
|
+
cb = bb.CB
|
|
477
|
+
|
|
478
|
+
cg = mol.add_atom_bonded(
|
|
479
|
+
"C", cb, bond_order=1, angle_ref=bb.CA,
|
|
480
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
|
|
481
|
+
hybridization=Hybridization.SP3)
|
|
482
|
+
|
|
483
|
+
add_sp3_hydrogens(mol, cg, 2)
|
|
484
|
+
add_sp3_hydrogens(mol, cb, 2)
|
|
485
|
+
add_carboxyl(mol, cg, angle_ref=cb, dihedral_deg=180.0)
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
def _sidechain_lys(mol: Molecule):
|
|
489
|
+
"""Lysine: -(CH2)4-NH2."""
|
|
490
|
+
bb = mol.backbone
|
|
491
|
+
cb = bb.CB
|
|
492
|
+
|
|
493
|
+
cg = mol.add_atom_bonded(
|
|
494
|
+
"C", cb, bond_order=1, angle_ref=bb.CA,
|
|
495
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
|
|
496
|
+
hybridization=Hybridization.SP3)
|
|
497
|
+
|
|
498
|
+
cd = mol.add_atom_bonded(
|
|
499
|
+
"C", cg, bond_order=1, angle_ref=cb,
|
|
500
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
|
|
501
|
+
hybridization=Hybridization.SP3)
|
|
502
|
+
|
|
503
|
+
ce = mol.add_atom_bonded(
|
|
504
|
+
"C", cd, bond_order=1, angle_ref=cg,
|
|
505
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
|
|
506
|
+
hybridization=Hybridization.SP3)
|
|
507
|
+
|
|
508
|
+
add_sp3_hydrogens(mol, ce, 2)
|
|
509
|
+
add_sp3_hydrogens(mol, cd, 2)
|
|
510
|
+
add_sp3_hydrogens(mol, cg, 2)
|
|
511
|
+
add_sp3_hydrogens(mol, cb, 2)
|
|
512
|
+
|
|
513
|
+
add_amino(mol, ce, angle_ref=cd, dihedral_deg=180.0)
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
def _sidechain_arg(mol: Molecule):
|
|
517
|
+
"""Arginine: -(CH2)3-NH-C(=NH)(NH2)."""
|
|
518
|
+
bb = mol.backbone
|
|
519
|
+
cb = bb.CB
|
|
520
|
+
|
|
521
|
+
cg = mol.add_atom_bonded(
|
|
522
|
+
"C", cb, bond_order=1, angle_ref=bb.CA,
|
|
523
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
|
|
524
|
+
hybridization=Hybridization.SP3)
|
|
525
|
+
|
|
526
|
+
cd = mol.add_atom_bonded(
|
|
527
|
+
"C", cg, bond_order=1, angle_ref=cb,
|
|
528
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
|
|
529
|
+
hybridization=Hybridization.SP3)
|
|
530
|
+
|
|
531
|
+
ne = mol.add_atom_bonded(
|
|
532
|
+
"N", cd, bond_order=1, angle_ref=cg,
|
|
533
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
|
|
534
|
+
hybridization=Hybridization.SP3)
|
|
535
|
+
|
|
536
|
+
# H on NE
|
|
537
|
+
mol.add_atom_bonded(
|
|
538
|
+
"H", ne, bond_order=1, angle_ref=cd,
|
|
539
|
+
bond_angle_deg=SP3_ANGLE, dihedral_deg=120.0,
|
|
540
|
+
rotatable=False)
|
|
541
|
+
|
|
542
|
+
add_sp3_hydrogens(mol, cd, 2)
|
|
543
|
+
add_sp3_hydrogens(mol, cg, 2)
|
|
544
|
+
add_sp3_hydrogens(mol, cb, 2)
|
|
545
|
+
|
|
546
|
+
add_guanidinium(mol, ne, angle_ref=cd, dihedral_deg=180.0)
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def _sidechain_his(mol: Molecule):
|
|
550
|
+
"""Histidine: -CH2-imidazole."""
|
|
551
|
+
bb = mol.backbone
|
|
552
|
+
cb = bb.CB
|
|
553
|
+
add_sp3_hydrogens(mol, cb, 2)
|
|
554
|
+
add_imidazole_ring(mol, cb, angle_ref=bb.CA, dihedral_deg=90.0)
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
# ===================================================================
|
|
558
|
+
# Amino acid data and factory
|
|
559
|
+
# ===================================================================
|
|
560
|
+
|
|
561
|
+
AMINO_ACID_DATA: dict[AminoAcidType, dict] = {
|
|
562
|
+
AminoAcidType.GLY: {
|
|
563
|
+
"name": "Glycine", "code1": "G", "code3": "Gly",
|
|
564
|
+
"has_cb": False, "builder": _sidechain_gly,
|
|
565
|
+
},
|
|
566
|
+
AminoAcidType.ALA: {
|
|
567
|
+
"name": "Alanine", "code1": "A", "code3": "Ala",
|
|
568
|
+
"has_cb": True, "builder": _sidechain_ala,
|
|
569
|
+
},
|
|
570
|
+
AminoAcidType.VAL: {
|
|
571
|
+
"name": "Valine", "code1": "V", "code3": "Val",
|
|
572
|
+
"has_cb": True, "builder": _sidechain_val,
|
|
573
|
+
},
|
|
574
|
+
AminoAcidType.LEU: {
|
|
575
|
+
"name": "Leucine", "code1": "L", "code3": "Leu",
|
|
576
|
+
"has_cb": True, "builder": _sidechain_leu,
|
|
577
|
+
},
|
|
578
|
+
AminoAcidType.ILE: {
|
|
579
|
+
"name": "Isoleucine", "code1": "I", "code3": "Ile",
|
|
580
|
+
"has_cb": True, "builder": _sidechain_ile,
|
|
581
|
+
},
|
|
582
|
+
AminoAcidType.PRO: {
|
|
583
|
+
"name": "Proline", "code1": "P", "code3": "Pro",
|
|
584
|
+
"has_cb": True, "builder": _sidechain_pro,
|
|
585
|
+
},
|
|
586
|
+
AminoAcidType.PHE: {
|
|
587
|
+
"name": "Phenylalanine", "code1": "F", "code3": "Phe",
|
|
588
|
+
"has_cb": True, "builder": _sidechain_phe,
|
|
589
|
+
},
|
|
590
|
+
AminoAcidType.TRP: {
|
|
591
|
+
"name": "Tryptophan", "code1": "W", "code3": "Trp",
|
|
592
|
+
"has_cb": True, "builder": _sidechain_trp,
|
|
593
|
+
},
|
|
594
|
+
AminoAcidType.MET: {
|
|
595
|
+
"name": "Methionine", "code1": "M", "code3": "Met",
|
|
596
|
+
"has_cb": True, "builder": _sidechain_met,
|
|
597
|
+
},
|
|
598
|
+
AminoAcidType.SER: {
|
|
599
|
+
"name": "Serine", "code1": "S", "code3": "Ser",
|
|
600
|
+
"has_cb": True, "builder": _sidechain_ser,
|
|
601
|
+
},
|
|
602
|
+
AminoAcidType.THR: {
|
|
603
|
+
"name": "Threonine", "code1": "T", "code3": "Thr",
|
|
604
|
+
"has_cb": True, "builder": _sidechain_thr,
|
|
605
|
+
},
|
|
606
|
+
AminoAcidType.CYS: {
|
|
607
|
+
"name": "Cysteine", "code1": "C", "code3": "Cys",
|
|
608
|
+
"has_cb": True, "builder": _sidechain_cys,
|
|
609
|
+
},
|
|
610
|
+
AminoAcidType.TYR: {
|
|
611
|
+
"name": "Tyrosine", "code1": "Y", "code3": "Tyr",
|
|
612
|
+
"has_cb": True, "builder": _sidechain_tyr,
|
|
613
|
+
},
|
|
614
|
+
AminoAcidType.ASN: {
|
|
615
|
+
"name": "Asparagine", "code1": "N", "code3": "Asn",
|
|
616
|
+
"has_cb": True, "builder": _sidechain_asn,
|
|
617
|
+
},
|
|
618
|
+
AminoAcidType.GLN: {
|
|
619
|
+
"name": "Glutamine", "code1": "Q", "code3": "Gln",
|
|
620
|
+
"has_cb": True, "builder": _sidechain_gln,
|
|
621
|
+
},
|
|
622
|
+
AminoAcidType.ASP: {
|
|
623
|
+
"name": "Aspartate", "code1": "D", "code3": "Asp",
|
|
624
|
+
"has_cb": True, "builder": _sidechain_asp,
|
|
625
|
+
},
|
|
626
|
+
AminoAcidType.GLU: {
|
|
627
|
+
"name": "Glutamate", "code1": "E", "code3": "Glu",
|
|
628
|
+
"has_cb": True, "builder": _sidechain_glu,
|
|
629
|
+
},
|
|
630
|
+
AminoAcidType.LYS: {
|
|
631
|
+
"name": "Lysine", "code1": "K", "code3": "Lys",
|
|
632
|
+
"has_cb": True, "builder": _sidechain_lys,
|
|
633
|
+
},
|
|
634
|
+
AminoAcidType.ARG: {
|
|
635
|
+
"name": "Arginine", "code1": "R", "code3": "Arg",
|
|
636
|
+
"has_cb": True, "builder": _sidechain_arg,
|
|
637
|
+
},
|
|
638
|
+
AminoAcidType.HIS: {
|
|
639
|
+
"name": "Histidine", "code1": "H", "code3": "His",
|
|
640
|
+
"has_cb": True, "builder": _sidechain_his,
|
|
641
|
+
},
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
def build_amino_acid(aa_type: AminoAcidType) -> Molecule:
|
|
646
|
+
"""Build a single amino acid with correct 3D geometry.
|
|
647
|
+
|
|
648
|
+
The returned Molecule has a .backbone attribute (BackboneIndices)
|
|
649
|
+
that records the indices of backbone atoms for peptide bond
|
|
650
|
+
formation.
|
|
651
|
+
"""
|
|
652
|
+
data = AMINO_ACID_DATA[aa_type]
|
|
653
|
+
is_pro = (aa_type == AminoAcidType.PRO)
|
|
654
|
+
mol = _build_backbone(data["name"], has_cb=data["has_cb"],
|
|
655
|
+
is_proline=is_pro)
|
|
656
|
+
data["builder"](mol)
|
|
657
|
+
return mol
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
# ===================================================================
|
|
661
|
+
# Peptide bond formation
|
|
662
|
+
# ===================================================================
|
|
663
|
+
|
|
664
|
+
def form_peptide_bond(mol_nterm: Molecule, mol_cterm: Molecule,
|
|
665
|
+
omega_deg: float = 180.0) -> Molecule:
|
|
666
|
+
"""Join two amino acids via a peptide (amide) bond.
|
|
667
|
+
|
|
668
|
+
mol_nterm provides the N-terminal residue (its C-terminus is
|
|
669
|
+
condensed). mol_cterm provides the C-terminal residue (one H
|
|
670
|
+
removed from its N-terminus).
|
|
671
|
+
|
|
672
|
+
Returns a new Molecule with both residues joined.
|
|
673
|
+
"""
|
|
674
|
+
bb_n = mol_nterm.backbone
|
|
675
|
+
bb_c = mol_cterm.backbone
|
|
676
|
+
|
|
677
|
+
# Atoms to skip (condensation products: H2O)
|
|
678
|
+
skip_nterm = set() # skip OXT and H_OXT from N-terminal residue
|
|
679
|
+
if bb_n.OXT is not None:
|
|
680
|
+
skip_nterm.add(bb_n.OXT)
|
|
681
|
+
if bb_n.H_OXT is not None:
|
|
682
|
+
skip_nterm.add(bb_n.H_OXT)
|
|
683
|
+
|
|
684
|
+
skip_cterm = set() # skip one H from C-terminal residue's N
|
|
685
|
+
if bb_c.H_Nterm is not None:
|
|
686
|
+
skip_cterm.add(bb_c.H_Nterm)
|
|
687
|
+
|
|
688
|
+
result = Molecule(f"{mol_nterm.name}-{mol_cterm.name}")
|
|
689
|
+
|
|
690
|
+
# Copy atoms from mol_nterm (with index remapping)
|
|
691
|
+
remap_n: dict[int, int] = {}
|
|
692
|
+
for atom in mol_nterm.atoms:
|
|
693
|
+
if atom.index in skip_nterm:
|
|
694
|
+
continue
|
|
695
|
+
new_idx = result.add_atom(atom.symbol, atom.position.copy(),
|
|
696
|
+
atom.hybridization)
|
|
697
|
+
remap_n[atom.index] = new_idx
|
|
698
|
+
|
|
699
|
+
# Copy atoms from mol_cterm
|
|
700
|
+
# First, compute the rigid-body transform to position mol_cterm
|
|
701
|
+
# so that its N is at the correct peptide bond distance from
|
|
702
|
+
# mol_nterm's C.
|
|
703
|
+
c_pos = mol_nterm.atoms[bb_n.C].position
|
|
704
|
+
ca_n_pos = mol_nterm.atoms[bb_n.CA].position
|
|
705
|
+
o_n_pos = mol_nterm.atoms[bb_n.O].position
|
|
706
|
+
|
|
707
|
+
# Target position for the peptide N: along C->away_from_CA direction
|
|
708
|
+
# at AMIDE_CN distance, at SP2 angle from CA-C-N
|
|
709
|
+
c_to_ca = normalize(ca_n_pos - c_pos)
|
|
710
|
+
c_to_o = normalize(o_n_pos - c_pos)
|
|
711
|
+
|
|
712
|
+
# The peptide N should be at ~120 deg from both CA and O
|
|
713
|
+
# N direction: reflect c_to_ca across the plane normal to c_to_o
|
|
714
|
+
# For sp2 geometry: N is at 120 deg from CA on the opposite side of O
|
|
715
|
+
n_dir = normalize(-(c_to_ca + c_to_o))
|
|
716
|
+
target_n_pos = c_pos + n_dir * AMIDE_CN
|
|
717
|
+
|
|
718
|
+
# Compute translation for mol_cterm
|
|
719
|
+
current_n_pos = mol_cterm.atoms[bb_c.N].position
|
|
720
|
+
translation = target_n_pos - current_n_pos
|
|
721
|
+
|
|
722
|
+
remap_c: dict[int, int] = {}
|
|
723
|
+
for atom in mol_cterm.atoms:
|
|
724
|
+
if atom.index in skip_cterm:
|
|
725
|
+
continue
|
|
726
|
+
new_pos = atom.position.copy() + translation
|
|
727
|
+
new_idx = result.add_atom(atom.symbol, new_pos,
|
|
728
|
+
atom.hybridization)
|
|
729
|
+
remap_c[atom.index] = new_idx
|
|
730
|
+
|
|
731
|
+
# Copy bonds from mol_nterm
|
|
732
|
+
for bond in mol_nterm.bonds:
|
|
733
|
+
if bond.atom_i in skip_nterm or bond.atom_j in skip_nterm:
|
|
734
|
+
continue
|
|
735
|
+
if bond.atom_i in remap_n and bond.atom_j in remap_n:
|
|
736
|
+
result.add_bond(remap_n[bond.atom_i], remap_n[bond.atom_j],
|
|
737
|
+
order=bond.order, rotatable=bond.rotatable)
|
|
738
|
+
|
|
739
|
+
# Copy bonds from mol_cterm
|
|
740
|
+
for bond in mol_cterm.bonds:
|
|
741
|
+
if bond.atom_i in skip_cterm or bond.atom_j in skip_cterm:
|
|
742
|
+
continue
|
|
743
|
+
if bond.atom_i in remap_c and bond.atom_j in remap_c:
|
|
744
|
+
result.add_bond(remap_c[bond.atom_i], remap_c[bond.atom_j],
|
|
745
|
+
order=bond.order, rotatable=bond.rotatable)
|
|
746
|
+
|
|
747
|
+
# Add the peptide bond (C-N amide bond)
|
|
748
|
+
peptide_c = remap_n[bb_n.C]
|
|
749
|
+
peptide_n = remap_c[bb_c.N]
|
|
750
|
+
result.add_bond(peptide_c, peptide_n, order=1, rotatable=False)
|
|
751
|
+
|
|
752
|
+
# Build backbone indices for the combined molecule
|
|
753
|
+
# N-terminal residue backbone
|
|
754
|
+
bb_new_n = BackboneIndices(
|
|
755
|
+
N=remap_n[bb_n.N],
|
|
756
|
+
H_N=remap_n.get(bb_n.H_N) if bb_n.H_N is not None else None,
|
|
757
|
+
CA=remap_n[bb_n.CA],
|
|
758
|
+
HA=remap_n[bb_n.HA],
|
|
759
|
+
C=remap_n[bb_n.C],
|
|
760
|
+
O=remap_n[bb_n.O],
|
|
761
|
+
CB=remap_n.get(bb_n.CB) if bb_n.CB is not None else None,
|
|
762
|
+
H_Nterm=remap_n.get(bb_n.H_Nterm) if bb_n.H_Nterm is not None else None,
|
|
763
|
+
OXT=None, H_OXT=None, # removed in condensation
|
|
764
|
+
)
|
|
765
|
+
|
|
766
|
+
bb_new_c = BackboneIndices(
|
|
767
|
+
N=remap_c[bb_c.N],
|
|
768
|
+
H_N=remap_c.get(bb_c.H_N) if bb_c.H_N is not None else None,
|
|
769
|
+
CA=remap_c[bb_c.CA],
|
|
770
|
+
HA=remap_c[bb_c.HA],
|
|
771
|
+
C=remap_c[bb_c.C],
|
|
772
|
+
O=remap_c[bb_c.O],
|
|
773
|
+
CB=remap_c.get(bb_c.CB) if bb_c.CB is not None else None,
|
|
774
|
+
H_Nterm=None, # removed in condensation
|
|
775
|
+
OXT=remap_c.get(bb_c.OXT) if bb_c.OXT is not None else None,
|
|
776
|
+
H_OXT=remap_c.get(bb_c.H_OXT) if bb_c.H_OXT is not None else None,
|
|
777
|
+
)
|
|
778
|
+
|
|
779
|
+
# Store backbone index sets -- carry forward any existing residues
|
|
780
|
+
result.backbone = bb_new_n
|
|
781
|
+
prior_residues = []
|
|
782
|
+
if hasattr(mol_nterm, 'residues'):
|
|
783
|
+
# Remap all prior residue indices
|
|
784
|
+
for old_bb in mol_nterm.residues:
|
|
785
|
+
remapped = BackboneIndices(
|
|
786
|
+
N=remap_n.get(old_bb.N, old_bb.N),
|
|
787
|
+
H_N=remap_n.get(old_bb.H_N) if old_bb.H_N is not None else None,
|
|
788
|
+
CA=remap_n.get(old_bb.CA, old_bb.CA),
|
|
789
|
+
HA=remap_n.get(old_bb.HA, old_bb.HA),
|
|
790
|
+
C=remap_n.get(old_bb.C, old_bb.C),
|
|
791
|
+
O=remap_n.get(old_bb.O, old_bb.O),
|
|
792
|
+
CB=remap_n.get(old_bb.CB) if old_bb.CB is not None else None,
|
|
793
|
+
H_Nterm=remap_n.get(old_bb.H_Nterm) if old_bb.H_Nterm is not None else None,
|
|
794
|
+
OXT=remap_n.get(old_bb.OXT) if old_bb.OXT is not None else None,
|
|
795
|
+
H_OXT=remap_n.get(old_bb.H_OXT) if old_bb.H_OXT is not None else None,
|
|
796
|
+
)
|
|
797
|
+
prior_residues.append(remapped)
|
|
798
|
+
else:
|
|
799
|
+
prior_residues.append(bb_new_n)
|
|
800
|
+
result.residues = prior_residues + [bb_new_c]
|
|
801
|
+
|
|
802
|
+
# Set omega dihedral (CA_n - C_n - N_c - CA_c)
|
|
803
|
+
try:
|
|
804
|
+
result.set_dihedral(
|
|
805
|
+
bb_new_n.CA, bb_new_n.C, bb_new_c.N, bb_new_c.CA,
|
|
806
|
+
omega_deg)
|
|
807
|
+
except (ValueError, IndexError):
|
|
808
|
+
pass # ring bonds or missing atoms
|
|
809
|
+
|
|
810
|
+
return result
|
|
811
|
+
|
|
812
|
+
|
|
813
|
+
def build_peptide(sequence: list[AminoAcidType],
|
|
814
|
+
phi_psi: list[tuple[float, float]] | None = None,
|
|
815
|
+
) -> Molecule:
|
|
816
|
+
"""Build a polypeptide from a sequence of amino acids.
|
|
817
|
+
|
|
818
|
+
Parameters
|
|
819
|
+
----------
|
|
820
|
+
sequence : list of AminoAcidType
|
|
821
|
+
Amino acid sequence from N-terminus to C-terminus.
|
|
822
|
+
phi_psi : optional list of (phi, psi) angle pairs in degrees.
|
|
823
|
+
If provided, must have len(sequence) entries.
|
|
824
|
+
"""
|
|
825
|
+
if len(sequence) < 1:
|
|
826
|
+
raise ValueError("Sequence must have at least one amino acid")
|
|
827
|
+
|
|
828
|
+
if len(sequence) == 1:
|
|
829
|
+
return build_amino_acid(sequence[0])
|
|
830
|
+
|
|
831
|
+
# Build each amino acid individually
|
|
832
|
+
mols = [build_amino_acid(aa) for aa in sequence]
|
|
833
|
+
|
|
834
|
+
# Join them one by one
|
|
835
|
+
result = mols[0]
|
|
836
|
+
for i in range(1, len(mols)):
|
|
837
|
+
result = form_peptide_bond(result, mols[i])
|
|
838
|
+
|
|
839
|
+
# Apply phi/psi angles if given
|
|
840
|
+
if phi_psi is not None and hasattr(result, 'residues'):
|
|
841
|
+
for i, (phi, psi) in enumerate(phi_psi):
|
|
842
|
+
if i < len(result.residues):
|
|
843
|
+
try:
|
|
844
|
+
set_phi(result, result.residues, i, phi)
|
|
845
|
+
except (ValueError, IndexError):
|
|
846
|
+
pass
|
|
847
|
+
try:
|
|
848
|
+
set_psi(result, result.residues, i, psi)
|
|
849
|
+
except (ValueError, IndexError):
|
|
850
|
+
pass
|
|
851
|
+
|
|
852
|
+
return result
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
# ===================================================================
|
|
856
|
+
# Backbone conformation control
|
|
857
|
+
# ===================================================================
|
|
858
|
+
|
|
859
|
+
def set_phi(mol: Molecule, residues: list[BackboneIndices],
|
|
860
|
+
residue_index: int, angle_deg: float):
|
|
861
|
+
"""Set the phi angle for a residue.
|
|
862
|
+
|
|
863
|
+
Phi = C(i-1) - N(i) - CA(i) - C(i).
|
|
864
|
+
Only valid for residue_index >= 1 (the first residue has no
|
|
865
|
+
preceding C).
|
|
866
|
+
"""
|
|
867
|
+
if residue_index < 1:
|
|
868
|
+
return
|
|
869
|
+
bb_prev = residues[residue_index - 1]
|
|
870
|
+
bb = residues[residue_index]
|
|
871
|
+
mol.set_dihedral(bb_prev.C, bb.N, bb.CA, bb.C, angle_deg)
|
|
872
|
+
|
|
873
|
+
|
|
874
|
+
def set_psi(mol: Molecule, residues: list[BackboneIndices],
|
|
875
|
+
residue_index: int, angle_deg: float):
|
|
876
|
+
"""Set the psi angle for a residue.
|
|
877
|
+
|
|
878
|
+
Psi = N(i) - CA(i) - C(i) - N(i+1).
|
|
879
|
+
Only valid for residue_index < len-1 (the last residue has no
|
|
880
|
+
following N).
|
|
881
|
+
"""
|
|
882
|
+
if residue_index >= len(residues) - 1:
|
|
883
|
+
return
|
|
884
|
+
bb = residues[residue_index]
|
|
885
|
+
bb_next = residues[residue_index + 1]
|
|
886
|
+
mol.set_dihedral(bb.N, bb.CA, bb.C, bb_next.N, angle_deg)
|
|
887
|
+
|
|
888
|
+
|
|
889
|
+
def set_phi_psi(mol: Molecule, residues: list[BackboneIndices],
|
|
890
|
+
residue_index: int, phi_deg: float, psi_deg: float):
|
|
891
|
+
"""Set both phi and psi angles for a residue."""
|
|
892
|
+
set_phi(mol, residues, residue_index, phi_deg)
|
|
893
|
+
set_psi(mol, residues, residue_index, psi_deg)
|
|
894
|
+
|
|
895
|
+
|
|
896
|
+
def set_secondary_structure(mol: Molecule,
|
|
897
|
+
residues: list[BackboneIndices],
|
|
898
|
+
structure: SecondaryStructure):
|
|
899
|
+
"""Apply phi/psi angles for a secondary structure type."""
|
|
900
|
+
angles = {
|
|
901
|
+
SecondaryStructure.ALPHA_HELIX: ALPHA_HELIX,
|
|
902
|
+
SecondaryStructure.BETA_SHEET: BETA_SHEET,
|
|
903
|
+
SecondaryStructure.POLYPROLINE_II: POLYPROLINE_II,
|
|
904
|
+
SecondaryStructure.EXTENDED: EXTENDED,
|
|
905
|
+
}
|
|
906
|
+
phi, psi = angles[structure]
|
|
907
|
+
for i in range(len(residues)):
|
|
908
|
+
try:
|
|
909
|
+
set_phi(mol, residues, i, phi)
|
|
910
|
+
except (ValueError, IndexError):
|
|
911
|
+
pass
|
|
912
|
+
try:
|
|
913
|
+
set_psi(mol, residues, i, psi)
|
|
914
|
+
except (ValueError, IndexError):
|
|
915
|
+
pass
|
|
916
|
+
|
|
917
|
+
|
|
918
|
+
# Alias for the peptides.py re-export
|
|
919
|
+
apply_secondary_structure = set_secondary_structure
|