molbuilder 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. molbuilder/__init__.py +8 -0
  2. molbuilder/__main__.py +6 -0
  3. molbuilder/atomic/__init__.py +4 -0
  4. molbuilder/atomic/bohr.py +235 -0
  5. molbuilder/atomic/quantum_atom.py +334 -0
  6. molbuilder/atomic/quantum_numbers.py +196 -0
  7. molbuilder/atomic/wavefunctions.py +297 -0
  8. molbuilder/bonding/__init__.py +4 -0
  9. molbuilder/bonding/covalent.py +442 -0
  10. molbuilder/bonding/lewis.py +347 -0
  11. molbuilder/bonding/vsepr.py +433 -0
  12. molbuilder/cli/__init__.py +1 -0
  13. molbuilder/cli/demos.py +516 -0
  14. molbuilder/cli/menu.py +127 -0
  15. molbuilder/cli/wizard.py +831 -0
  16. molbuilder/core/__init__.py +6 -0
  17. molbuilder/core/bond_data.py +170 -0
  18. molbuilder/core/constants.py +51 -0
  19. molbuilder/core/element_properties.py +183 -0
  20. molbuilder/core/elements.py +181 -0
  21. molbuilder/core/geometry.py +232 -0
  22. molbuilder/gui/__init__.py +2 -0
  23. molbuilder/gui/app.py +286 -0
  24. molbuilder/gui/canvas3d.py +115 -0
  25. molbuilder/gui/dialogs.py +117 -0
  26. molbuilder/gui/event_handler.py +118 -0
  27. molbuilder/gui/sidebar.py +105 -0
  28. molbuilder/gui/toolbar.py +71 -0
  29. molbuilder/io/__init__.py +1 -0
  30. molbuilder/io/json_io.py +146 -0
  31. molbuilder/io/mol_sdf.py +169 -0
  32. molbuilder/io/pdb.py +184 -0
  33. molbuilder/io/smiles_io.py +47 -0
  34. molbuilder/io/xyz.py +103 -0
  35. molbuilder/molecule/__init__.py +2 -0
  36. molbuilder/molecule/amino_acids.py +919 -0
  37. molbuilder/molecule/builders.py +257 -0
  38. molbuilder/molecule/conformations.py +70 -0
  39. molbuilder/molecule/functional_groups.py +484 -0
  40. molbuilder/molecule/graph.py +712 -0
  41. molbuilder/molecule/peptides.py +13 -0
  42. molbuilder/molecule/stereochemistry.py +6 -0
  43. molbuilder/process/__init__.py +3 -0
  44. molbuilder/process/conditions.py +260 -0
  45. molbuilder/process/costing.py +316 -0
  46. molbuilder/process/purification.py +285 -0
  47. molbuilder/process/reactor.py +297 -0
  48. molbuilder/process/safety.py +476 -0
  49. molbuilder/process/scale_up.py +427 -0
  50. molbuilder/process/solvent_systems.py +204 -0
  51. molbuilder/reactions/__init__.py +3 -0
  52. molbuilder/reactions/functional_group_detect.py +728 -0
  53. molbuilder/reactions/knowledge_base.py +1716 -0
  54. molbuilder/reactions/reaction_types.py +102 -0
  55. molbuilder/reactions/reagent_data.py +1248 -0
  56. molbuilder/reactions/retrosynthesis.py +1430 -0
  57. molbuilder/reactions/synthesis_route.py +377 -0
  58. molbuilder/reports/__init__.py +158 -0
  59. molbuilder/reports/cost_report.py +206 -0
  60. molbuilder/reports/molecule_report.py +279 -0
  61. molbuilder/reports/safety_report.py +296 -0
  62. molbuilder/reports/synthesis_report.py +283 -0
  63. molbuilder/reports/text_formatter.py +170 -0
  64. molbuilder/smiles/__init__.py +4 -0
  65. molbuilder/smiles/parser.py +487 -0
  66. molbuilder/smiles/tokenizer.py +291 -0
  67. molbuilder/smiles/writer.py +375 -0
  68. molbuilder/visualization/__init__.py +1 -0
  69. molbuilder/visualization/bohr_viz.py +166 -0
  70. molbuilder/visualization/molecule_viz.py +368 -0
  71. molbuilder/visualization/quantum_viz.py +434 -0
  72. molbuilder/visualization/theme.py +12 -0
  73. molbuilder-1.0.0.dist-info/METADATA +360 -0
  74. molbuilder-1.0.0.dist-info/RECORD +78 -0
  75. molbuilder-1.0.0.dist-info/WHEEL +5 -0
  76. molbuilder-1.0.0.dist-info/entry_points.txt +2 -0
  77. molbuilder-1.0.0.dist-info/licenses/LICENSE +21 -0
  78. molbuilder-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,919 @@
1
+ """
2
+ Amino Acid Structures and Functional Groups
3
+
4
+ Builds all 20 standard amino acids with correct 3D geometry using the
5
+ Molecule class from molecular_conformations. Provides:
6
+
7
+ - Functional group builder functions (hydroxyl, amino, carboxyl, etc.)
8
+ - Planar ring builders (phenyl, imidazole, indole)
9
+ - Amino acid backbone template with L-chirality enforcement
10
+ - Side chain builders for all 20 standard amino acids
11
+ - Peptide bond formation (condensation reaction)
12
+ - Backbone conformation control (phi/psi angles)
13
+ - Secondary structure presets (alpha helix, beta sheet)
14
+
15
+ All coordinates are in Angstroms. No Unicode characters are used
16
+ (Windows cp1252 compatibility).
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import math
22
+ from dataclasses import dataclass, field
23
+ from enum import Enum, auto
24
+
25
+ import numpy as np
26
+
27
+ from molbuilder.molecule.graph import Molecule, Hybridization, Bond as MolBond
28
+ from molbuilder.core.bond_data import bond_length, SP3_ANGLE, SP2_ANGLE, SP_ANGLE
29
+ from molbuilder.core.geometry import normalize, available_tetrahedral_dirs, place_atom_zmatrix
30
+ from molbuilder.core.geometry import add_sp3_hydrogens
31
+ from molbuilder.core.geometry import rotation_matrix
32
+ from molbuilder.molecule.functional_groups import (
33
+ add_hydroxyl,
34
+ add_amino,
35
+ add_carboxyl,
36
+ add_carbonyl,
37
+ add_amide,
38
+ add_thiol,
39
+ add_phenyl_ring,
40
+ add_imidazole_ring,
41
+ add_indole_ring,
42
+ add_guanidinium,
43
+ AMIDE_CN,
44
+ SS_BOND,
45
+ CC_AROMATIC,
46
+ CN_AROMATIC,
47
+ CO_CARBOXYL,
48
+ )
49
+
50
+
51
+ # ===================================================================
52
+ # Constants
53
+ # ===================================================================
54
+
55
+ # Ramachandran angle presets (phi, psi) in degrees
56
+ ALPHA_HELIX = (-57.0, -47.0)
57
+ BETA_SHEET = (-135.0, 135.0)
58
+ POLYPROLINE_II = (-75.0, 145.0)
59
+ EXTENDED = (-180.0, 180.0)
60
+
61
+
62
+ # ===================================================================
63
+ # Enumerations
64
+ # ===================================================================
65
+
66
+ class AminoAcidType(Enum):
67
+ """The 20 standard amino acids."""
68
+ GLY = auto()
69
+ ALA = auto()
70
+ VAL = auto()
71
+ LEU = auto()
72
+ ILE = auto()
73
+ PRO = auto()
74
+ PHE = auto()
75
+ TRP = auto()
76
+ MET = auto()
77
+ SER = auto()
78
+ THR = auto()
79
+ CYS = auto()
80
+ TYR = auto()
81
+ ASN = auto()
82
+ GLN = auto()
83
+ ASP = auto()
84
+ GLU = auto()
85
+ LYS = auto()
86
+ ARG = auto()
87
+ HIS = auto()
88
+
89
+
90
+ class SecondaryStructure(Enum):
91
+ """Common secondary structure types."""
92
+ ALPHA_HELIX = auto()
93
+ BETA_SHEET = auto()
94
+ POLYPROLINE_II = auto()
95
+ EXTENDED = auto()
96
+
97
+
98
+ # ===================================================================
99
+ # Backbone indices dataclass
100
+ # ===================================================================
101
+
102
+ @dataclass
103
+ class BackboneIndices:
104
+ """Atom indices for key positions in an amino acid backbone.
105
+
106
+ Stored on the Molecule as mol.backbone after building.
107
+ """
108
+ N: int # amino nitrogen
109
+ H_N: int | None # H on N (None for proline)
110
+ CA: int # alpha carbon
111
+ HA: int # H on alpha carbon
112
+ C: int # carbonyl carbon
113
+ O: int # carbonyl oxygen
114
+ CB: int | None # beta carbon (None for glycine)
115
+ H_Nterm: int | None # extra H on free N-terminus
116
+ OXT: int | None # second O on free C-terminus (OH)
117
+ H_OXT: int | None # H on OXT
118
+
119
+ def __repr__(self):
120
+ return (f"Backbone(N={self.N}, CA={self.CA}, C={self.C}, "
121
+ f"O={self.O}, CB={self.CB})")
122
+
123
+
124
+ # ===================================================================
125
+ # Amino acid backbone builder
126
+ # ===================================================================
127
+
128
+ def _build_backbone(name: str, has_cb: bool = True,
129
+ is_proline: bool = False) -> Molecule:
130
+ """Build the amino acid backbone: H2N-CA(H)(R)-C(=O)-OH.
131
+
132
+ Places atoms using z-matrix coordinates. If has_cb is True, places
133
+ CB in the L-configuration direction. For proline, N gets only one
134
+ H (secondary amine).
135
+
136
+ Returns a Molecule with a .backbone attribute (BackboneIndices).
137
+ """
138
+ mol = Molecule(name)
139
+
140
+ # -- N (amino nitrogen) --
141
+ n_idx = mol.add_atom("N", [0.0, 0.0, 0.0], Hybridization.SP3)
142
+
143
+ # -- CA (alpha carbon) --
144
+ ca_idx = mol.add_atom_bonded(
145
+ "C", n_idx, bond_order=1,
146
+ bond_length=bond_length("C", "N", 1),
147
+ hybridization=Hybridization.SP3)
148
+
149
+ # -- C (carbonyl carbon) --
150
+ c_idx = mol.add_atom_bonded(
151
+ "C", ca_idx, bond_order=1, angle_ref=n_idx,
152
+ bond_angle_deg=SP3_ANGLE,
153
+ dihedral_deg=180.0,
154
+ hybridization=Hybridization.SP2)
155
+
156
+ # -- O (carbonyl oxygen, C=O) --
157
+ o_idx = mol.add_atom_bonded(
158
+ "O", c_idx, bond_order=2, angle_ref=ca_idx,
159
+ bond_angle_deg=SP2_ANGLE, dihedral_deg=0.0,
160
+ rotatable=False)
161
+
162
+ # -- OXT (C-terminus OH) --
163
+ oxt = mol.add_atom_bonded(
164
+ "O", c_idx, bond_order=1, angle_ref=ca_idx,
165
+ bond_angle_deg=SP2_ANGLE, dihedral_deg=180.0,
166
+ hybridization=Hybridization.SP3)
167
+
168
+ h_oxt = mol.add_atom_bonded(
169
+ "H", oxt, bond_order=1, angle_ref=c_idx,
170
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=0.0,
171
+ rotatable=False)
172
+
173
+ # -- H on N-terminus --
174
+ h_n = mol.add_atom_bonded(
175
+ "H", n_idx, bond_order=1, angle_ref=ca_idx,
176
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=120.0,
177
+ rotatable=False)
178
+
179
+ h_nterm = None
180
+ if not is_proline:
181
+ h_nterm = mol.add_atom_bonded(
182
+ "H", n_idx, bond_order=1, angle_ref=ca_idx,
183
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=-120.0,
184
+ rotatable=False)
185
+
186
+ # -- HA (H on alpha carbon) --
187
+ # Place HA to establish L-chirality.
188
+ # For L-amino acids, looking from H -> CA, the priority order
189
+ # NH2 > COOH > R is clockwise (S configuration in CIP, except Cys).
190
+ ha_idx = mol.add_atom_bonded(
191
+ "H", ca_idx, bond_order=1, angle_ref=n_idx,
192
+ dihedral_ref=c_idx,
193
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=120.0,
194
+ rotatable=False)
195
+
196
+ # -- CB (beta carbon) --
197
+ # Placed at -120 deg dihedral to give L-configuration (S in CIP
198
+ # for most amino acids, R for cysteine due to sulfur priority).
199
+ cb_idx = None
200
+ if has_cb:
201
+ cb_idx = mol.add_atom_bonded(
202
+ "C", ca_idx, bond_order=1, angle_ref=n_idx,
203
+ dihedral_ref=c_idx,
204
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=-120.0,
205
+ hybridization=Hybridization.SP3)
206
+
207
+ backbone = BackboneIndices(
208
+ N=n_idx, H_N=h_n, CA=ca_idx, HA=ha_idx,
209
+ C=c_idx, O=o_idx, CB=cb_idx,
210
+ H_Nterm=h_nterm, OXT=oxt, H_OXT=h_oxt,
211
+ )
212
+ mol.backbone = backbone
213
+ return mol
214
+
215
+
216
+ # ===================================================================
217
+ # Side chain builders
218
+ # ===================================================================
219
+
220
+ def _sidechain_gly(mol: Molecule):
221
+ """Glycine: no side chain -- add a second H to CA."""
222
+ bb = mol.backbone
223
+ # CA already has N, C, HA; add second H
224
+ ca_pos = mol.atoms[bb.CA].position
225
+ existing = mol.neighbors(bb.CA)
226
+ existing_dirs = [
227
+ normalize(mol.atoms[n].position - ca_pos) for n in existing
228
+ ]
229
+ new_dirs = available_tetrahedral_dirs(existing_dirs, 1)
230
+ if new_dirs:
231
+ h_pos = ca_pos + bond_length("C", "H", 1) * new_dirs[0]
232
+ h2 = mol.add_atom("H", h_pos)
233
+ mol.add_bond(bb.CA, h2, order=1, rotatable=False)
234
+
235
+
236
+ def _sidechain_ala(mol: Molecule):
237
+ """Alanine: -CH3."""
238
+ bb = mol.backbone
239
+ add_sp3_hydrogens(mol, bb.CB, 3)
240
+
241
+
242
+ def _sidechain_val(mol: Molecule):
243
+ """Valine: -CH(CH3)2."""
244
+ bb = mol.backbone
245
+ cb = bb.CB
246
+
247
+ # CG1
248
+ cg1 = mol.add_atom_bonded(
249
+ "C", cb, bond_order=1, angle_ref=bb.CA,
250
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=60.0,
251
+ hybridization=Hybridization.SP3)
252
+ add_sp3_hydrogens(mol, cg1, 3)
253
+
254
+ # CG2
255
+ cg2 = mol.add_atom_bonded(
256
+ "C", cb, bond_order=1, angle_ref=bb.CA,
257
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=-60.0,
258
+ hybridization=Hybridization.SP3)
259
+ add_sp3_hydrogens(mol, cg2, 3)
260
+
261
+ # HB on CB
262
+ add_sp3_hydrogens(mol, cb, 1)
263
+
264
+
265
+ def _sidechain_leu(mol: Molecule):
266
+ """Leucine: -CH2-CH(CH3)2."""
267
+ bb = mol.backbone
268
+ cb = bb.CB
269
+
270
+ cg = mol.add_atom_bonded(
271
+ "C", cb, bond_order=1, angle_ref=bb.CA,
272
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
273
+ hybridization=Hybridization.SP3)
274
+
275
+ cd1 = mol.add_atom_bonded(
276
+ "C", cg, bond_order=1, angle_ref=cb,
277
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=60.0,
278
+ hybridization=Hybridization.SP3)
279
+ add_sp3_hydrogens(mol, cd1, 3)
280
+
281
+ cd2 = mol.add_atom_bonded(
282
+ "C", cg, bond_order=1, angle_ref=cb,
283
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=-60.0,
284
+ hybridization=Hybridization.SP3)
285
+ add_sp3_hydrogens(mol, cd2, 3)
286
+
287
+ add_sp3_hydrogens(mol, cg, 1) # HG
288
+ add_sp3_hydrogens(mol, cb, 2) # HB1, HB2
289
+
290
+
291
+ def _sidechain_ile(mol: Molecule):
292
+ """Isoleucine: -CH(CH3)-CH2-CH3."""
293
+ bb = mol.backbone
294
+ cb = bb.CB
295
+
296
+ # CG1 (the longer chain: -CH2-CH3)
297
+ cg1 = mol.add_atom_bonded(
298
+ "C", cb, bond_order=1, angle_ref=bb.CA,
299
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
300
+ hybridization=Hybridization.SP3)
301
+
302
+ cd1 = mol.add_atom_bonded(
303
+ "C", cg1, bond_order=1, angle_ref=cb,
304
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
305
+ hybridization=Hybridization.SP3)
306
+ add_sp3_hydrogens(mol, cd1, 3)
307
+ add_sp3_hydrogens(mol, cg1, 2)
308
+
309
+ # CG2 (the methyl branch)
310
+ cg2 = mol.add_atom_bonded(
311
+ "C", cb, bond_order=1, angle_ref=bb.CA,
312
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=-60.0,
313
+ hybridization=Hybridization.SP3)
314
+ add_sp3_hydrogens(mol, cg2, 3)
315
+
316
+ add_sp3_hydrogens(mol, cb, 1) # HB
317
+
318
+
319
+ def _sidechain_pro(mol: Molecule):
320
+ """Proline: pyrrolidine ring connecting CB-CG-CD back to N."""
321
+ bb = mol.backbone
322
+ cb = bb.CB
323
+
324
+ cg = mol.add_atom_bonded(
325
+ "C", cb, bond_order=1, angle_ref=bb.CA,
326
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
327
+ hybridization=Hybridization.SP3)
328
+
329
+ cd = mol.add_atom_bonded(
330
+ "C", cg, bond_order=1, angle_ref=cb,
331
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=30.0,
332
+ hybridization=Hybridization.SP3)
333
+
334
+ # Close ring: CD-N
335
+ mol.close_ring(cd, bb.N)
336
+
337
+ add_sp3_hydrogens(mol, cb, 2)
338
+ add_sp3_hydrogens(mol, cg, 2)
339
+ add_sp3_hydrogens(mol, cd, 2)
340
+
341
+
342
+ def _sidechain_phe(mol: Molecule):
343
+ """Phenylalanine: -CH2-phenyl."""
344
+ bb = mol.backbone
345
+ cb = bb.CB
346
+ add_sp3_hydrogens(mol, cb, 2)
347
+ add_phenyl_ring(mol, cb, angle_ref=bb.CA, dihedral_deg=90.0)
348
+
349
+
350
+ def _sidechain_trp(mol: Molecule):
351
+ """Tryptophan: -CH2-indole."""
352
+ bb = mol.backbone
353
+ cb = bb.CB
354
+ add_sp3_hydrogens(mol, cb, 2)
355
+ add_indole_ring(mol, cb, angle_ref=bb.CA, dihedral_deg=90.0)
356
+
357
+
358
+ def _sidechain_met(mol: Molecule):
359
+ """Methionine: -CH2-CH2-S-CH3."""
360
+ bb = mol.backbone
361
+ cb = bb.CB
362
+
363
+ cg = mol.add_atom_bonded(
364
+ "C", cb, bond_order=1, angle_ref=bb.CA,
365
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
366
+ hybridization=Hybridization.SP3)
367
+
368
+ sd = mol.add_atom_bonded(
369
+ "S", cg, bond_order=1, angle_ref=cb,
370
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
371
+ hybridization=Hybridization.SP3)
372
+
373
+ ce = mol.add_atom_bonded(
374
+ "C", sd, bond_order=1, angle_ref=cg,
375
+ bond_angle_deg=100.0, # C-S-C angle ~100 deg
376
+ dihedral_deg=180.0,
377
+ hybridization=Hybridization.SP3)
378
+
379
+ add_sp3_hydrogens(mol, ce, 3)
380
+ add_sp3_hydrogens(mol, cg, 2)
381
+ add_sp3_hydrogens(mol, cb, 2)
382
+
383
+
384
+ def _sidechain_ser(mol: Molecule):
385
+ """Serine: -CH2-OH."""
386
+ bb = mol.backbone
387
+ cb = bb.CB
388
+ add_sp3_hydrogens(mol, cb, 2)
389
+ add_hydroxyl(mol, cb, angle_ref=bb.CA, dihedral_deg=60.0)
390
+
391
+
392
+ def _sidechain_thr(mol: Molecule):
393
+ """Threonine: -CH(OH)-CH3."""
394
+ bb = mol.backbone
395
+ cb = bb.CB
396
+
397
+ # OG1 (hydroxyl)
398
+ og1 = mol.add_atom_bonded(
399
+ "O", cb, bond_order=1, angle_ref=bb.CA,
400
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=60.0,
401
+ hybridization=Hybridization.SP3)
402
+ mol.add_atom_bonded(
403
+ "H", og1, bond_order=1, angle_ref=cb,
404
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
405
+ rotatable=False)
406
+
407
+ # CG2 (methyl)
408
+ cg2 = mol.add_atom_bonded(
409
+ "C", cb, bond_order=1, angle_ref=bb.CA,
410
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=-60.0,
411
+ hybridization=Hybridization.SP3)
412
+ add_sp3_hydrogens(mol, cg2, 3)
413
+
414
+ add_sp3_hydrogens(mol, cb, 1) # HB
415
+
416
+
417
+ def _sidechain_cys(mol: Molecule):
418
+ """Cysteine: -CH2-SH."""
419
+ bb = mol.backbone
420
+ cb = bb.CB
421
+ add_sp3_hydrogens(mol, cb, 2)
422
+ add_thiol(mol, cb, angle_ref=bb.CA, dihedral_deg=60.0)
423
+
424
+
425
+ def _sidechain_tyr(mol: Molecule):
426
+ """Tyrosine: -CH2-phenyl-OH."""
427
+ bb = mol.backbone
428
+ cb = bb.CB
429
+ add_sp3_hydrogens(mol, cb, 2)
430
+ result = add_phenyl_ring(mol, cb, angle_ref=bb.CA, dihedral_deg=90.0)
431
+ # Add OH to the para carbon (ring[3], opposite to attachment)
432
+ para_c = result["ring"][3]
433
+ # Remove one H from para position -- we added H to ring[1..5]
434
+ # ring[3] is index 2 in the H list (since H list starts at ring[1])
435
+ # Actually the H on para_c is result["H"][2]
436
+ # We need to replace it with OH -- but we can't easily remove atoms
437
+ # from Molecule. Instead, we'll add OH and the extra H is a minor
438
+ # approximation for this educational model.
439
+ add_hydroxyl(mol, para_c, angle_ref=result["ring"][2])
440
+
441
+
442
+ def _sidechain_asn(mol: Molecule):
443
+ """Asparagine: -CH2-CONH2."""
444
+ bb = mol.backbone
445
+ cb = bb.CB
446
+ add_sp3_hydrogens(mol, cb, 2)
447
+ add_amide(mol, cb, angle_ref=bb.CA, dihedral_deg=180.0)
448
+
449
+
450
+ def _sidechain_gln(mol: Molecule):
451
+ """Glutamine: -CH2-CH2-CONH2."""
452
+ bb = mol.backbone
453
+ cb = bb.CB
454
+
455
+ cg = mol.add_atom_bonded(
456
+ "C", cb, bond_order=1, angle_ref=bb.CA,
457
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
458
+ hybridization=Hybridization.SP3)
459
+
460
+ add_sp3_hydrogens(mol, cg, 2)
461
+ add_sp3_hydrogens(mol, cb, 2)
462
+ add_amide(mol, cg, angle_ref=cb, dihedral_deg=180.0)
463
+
464
+
465
+ def _sidechain_asp(mol: Molecule):
466
+ """Aspartate: -CH2-COO-."""
467
+ bb = mol.backbone
468
+ cb = bb.CB
469
+ add_sp3_hydrogens(mol, cb, 2)
470
+ add_carboxyl(mol, cb, angle_ref=bb.CA, dihedral_deg=180.0)
471
+
472
+
473
+ def _sidechain_glu(mol: Molecule):
474
+ """Glutamate: -CH2-CH2-COO-."""
475
+ bb = mol.backbone
476
+ cb = bb.CB
477
+
478
+ cg = mol.add_atom_bonded(
479
+ "C", cb, bond_order=1, angle_ref=bb.CA,
480
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
481
+ hybridization=Hybridization.SP3)
482
+
483
+ add_sp3_hydrogens(mol, cg, 2)
484
+ add_sp3_hydrogens(mol, cb, 2)
485
+ add_carboxyl(mol, cg, angle_ref=cb, dihedral_deg=180.0)
486
+
487
+
488
+ def _sidechain_lys(mol: Molecule):
489
+ """Lysine: -(CH2)4-NH2."""
490
+ bb = mol.backbone
491
+ cb = bb.CB
492
+
493
+ cg = mol.add_atom_bonded(
494
+ "C", cb, bond_order=1, angle_ref=bb.CA,
495
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
496
+ hybridization=Hybridization.SP3)
497
+
498
+ cd = mol.add_atom_bonded(
499
+ "C", cg, bond_order=1, angle_ref=cb,
500
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
501
+ hybridization=Hybridization.SP3)
502
+
503
+ ce = mol.add_atom_bonded(
504
+ "C", cd, bond_order=1, angle_ref=cg,
505
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
506
+ hybridization=Hybridization.SP3)
507
+
508
+ add_sp3_hydrogens(mol, ce, 2)
509
+ add_sp3_hydrogens(mol, cd, 2)
510
+ add_sp3_hydrogens(mol, cg, 2)
511
+ add_sp3_hydrogens(mol, cb, 2)
512
+
513
+ add_amino(mol, ce, angle_ref=cd, dihedral_deg=180.0)
514
+
515
+
516
+ def _sidechain_arg(mol: Molecule):
517
+ """Arginine: -(CH2)3-NH-C(=NH)(NH2)."""
518
+ bb = mol.backbone
519
+ cb = bb.CB
520
+
521
+ cg = mol.add_atom_bonded(
522
+ "C", cb, bond_order=1, angle_ref=bb.CA,
523
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
524
+ hybridization=Hybridization.SP3)
525
+
526
+ cd = mol.add_atom_bonded(
527
+ "C", cg, bond_order=1, angle_ref=cb,
528
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
529
+ hybridization=Hybridization.SP3)
530
+
531
+ ne = mol.add_atom_bonded(
532
+ "N", cd, bond_order=1, angle_ref=cg,
533
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=180.0,
534
+ hybridization=Hybridization.SP3)
535
+
536
+ # H on NE
537
+ mol.add_atom_bonded(
538
+ "H", ne, bond_order=1, angle_ref=cd,
539
+ bond_angle_deg=SP3_ANGLE, dihedral_deg=120.0,
540
+ rotatable=False)
541
+
542
+ add_sp3_hydrogens(mol, cd, 2)
543
+ add_sp3_hydrogens(mol, cg, 2)
544
+ add_sp3_hydrogens(mol, cb, 2)
545
+
546
+ add_guanidinium(mol, ne, angle_ref=cd, dihedral_deg=180.0)
547
+
548
+
549
+ def _sidechain_his(mol: Molecule):
550
+ """Histidine: -CH2-imidazole."""
551
+ bb = mol.backbone
552
+ cb = bb.CB
553
+ add_sp3_hydrogens(mol, cb, 2)
554
+ add_imidazole_ring(mol, cb, angle_ref=bb.CA, dihedral_deg=90.0)
555
+
556
+
557
+ # ===================================================================
558
+ # Amino acid data and factory
559
+ # ===================================================================
560
+
561
+ AMINO_ACID_DATA: dict[AminoAcidType, dict] = {
562
+ AminoAcidType.GLY: {
563
+ "name": "Glycine", "code1": "G", "code3": "Gly",
564
+ "has_cb": False, "builder": _sidechain_gly,
565
+ },
566
+ AminoAcidType.ALA: {
567
+ "name": "Alanine", "code1": "A", "code3": "Ala",
568
+ "has_cb": True, "builder": _sidechain_ala,
569
+ },
570
+ AminoAcidType.VAL: {
571
+ "name": "Valine", "code1": "V", "code3": "Val",
572
+ "has_cb": True, "builder": _sidechain_val,
573
+ },
574
+ AminoAcidType.LEU: {
575
+ "name": "Leucine", "code1": "L", "code3": "Leu",
576
+ "has_cb": True, "builder": _sidechain_leu,
577
+ },
578
+ AminoAcidType.ILE: {
579
+ "name": "Isoleucine", "code1": "I", "code3": "Ile",
580
+ "has_cb": True, "builder": _sidechain_ile,
581
+ },
582
+ AminoAcidType.PRO: {
583
+ "name": "Proline", "code1": "P", "code3": "Pro",
584
+ "has_cb": True, "builder": _sidechain_pro,
585
+ },
586
+ AminoAcidType.PHE: {
587
+ "name": "Phenylalanine", "code1": "F", "code3": "Phe",
588
+ "has_cb": True, "builder": _sidechain_phe,
589
+ },
590
+ AminoAcidType.TRP: {
591
+ "name": "Tryptophan", "code1": "W", "code3": "Trp",
592
+ "has_cb": True, "builder": _sidechain_trp,
593
+ },
594
+ AminoAcidType.MET: {
595
+ "name": "Methionine", "code1": "M", "code3": "Met",
596
+ "has_cb": True, "builder": _sidechain_met,
597
+ },
598
+ AminoAcidType.SER: {
599
+ "name": "Serine", "code1": "S", "code3": "Ser",
600
+ "has_cb": True, "builder": _sidechain_ser,
601
+ },
602
+ AminoAcidType.THR: {
603
+ "name": "Threonine", "code1": "T", "code3": "Thr",
604
+ "has_cb": True, "builder": _sidechain_thr,
605
+ },
606
+ AminoAcidType.CYS: {
607
+ "name": "Cysteine", "code1": "C", "code3": "Cys",
608
+ "has_cb": True, "builder": _sidechain_cys,
609
+ },
610
+ AminoAcidType.TYR: {
611
+ "name": "Tyrosine", "code1": "Y", "code3": "Tyr",
612
+ "has_cb": True, "builder": _sidechain_tyr,
613
+ },
614
+ AminoAcidType.ASN: {
615
+ "name": "Asparagine", "code1": "N", "code3": "Asn",
616
+ "has_cb": True, "builder": _sidechain_asn,
617
+ },
618
+ AminoAcidType.GLN: {
619
+ "name": "Glutamine", "code1": "Q", "code3": "Gln",
620
+ "has_cb": True, "builder": _sidechain_gln,
621
+ },
622
+ AminoAcidType.ASP: {
623
+ "name": "Aspartate", "code1": "D", "code3": "Asp",
624
+ "has_cb": True, "builder": _sidechain_asp,
625
+ },
626
+ AminoAcidType.GLU: {
627
+ "name": "Glutamate", "code1": "E", "code3": "Glu",
628
+ "has_cb": True, "builder": _sidechain_glu,
629
+ },
630
+ AminoAcidType.LYS: {
631
+ "name": "Lysine", "code1": "K", "code3": "Lys",
632
+ "has_cb": True, "builder": _sidechain_lys,
633
+ },
634
+ AminoAcidType.ARG: {
635
+ "name": "Arginine", "code1": "R", "code3": "Arg",
636
+ "has_cb": True, "builder": _sidechain_arg,
637
+ },
638
+ AminoAcidType.HIS: {
639
+ "name": "Histidine", "code1": "H", "code3": "His",
640
+ "has_cb": True, "builder": _sidechain_his,
641
+ },
642
+ }
643
+
644
+
645
+ def build_amino_acid(aa_type: AminoAcidType) -> Molecule:
646
+ """Build a single amino acid with correct 3D geometry.
647
+
648
+ The returned Molecule has a .backbone attribute (BackboneIndices)
649
+ that records the indices of backbone atoms for peptide bond
650
+ formation.
651
+ """
652
+ data = AMINO_ACID_DATA[aa_type]
653
+ is_pro = (aa_type == AminoAcidType.PRO)
654
+ mol = _build_backbone(data["name"], has_cb=data["has_cb"],
655
+ is_proline=is_pro)
656
+ data["builder"](mol)
657
+ return mol
658
+
659
+
660
+ # ===================================================================
661
+ # Peptide bond formation
662
+ # ===================================================================
663
+
664
+ def form_peptide_bond(mol_nterm: Molecule, mol_cterm: Molecule,
665
+ omega_deg: float = 180.0) -> Molecule:
666
+ """Join two amino acids via a peptide (amide) bond.
667
+
668
+ mol_nterm provides the N-terminal residue (its C-terminus is
669
+ condensed). mol_cterm provides the C-terminal residue (one H
670
+ removed from its N-terminus).
671
+
672
+ Returns a new Molecule with both residues joined.
673
+ """
674
+ bb_n = mol_nterm.backbone
675
+ bb_c = mol_cterm.backbone
676
+
677
+ # Atoms to skip (condensation products: H2O)
678
+ skip_nterm = set() # skip OXT and H_OXT from N-terminal residue
679
+ if bb_n.OXT is not None:
680
+ skip_nterm.add(bb_n.OXT)
681
+ if bb_n.H_OXT is not None:
682
+ skip_nterm.add(bb_n.H_OXT)
683
+
684
+ skip_cterm = set() # skip one H from C-terminal residue's N
685
+ if bb_c.H_Nterm is not None:
686
+ skip_cterm.add(bb_c.H_Nterm)
687
+
688
+ result = Molecule(f"{mol_nterm.name}-{mol_cterm.name}")
689
+
690
+ # Copy atoms from mol_nterm (with index remapping)
691
+ remap_n: dict[int, int] = {}
692
+ for atom in mol_nterm.atoms:
693
+ if atom.index in skip_nterm:
694
+ continue
695
+ new_idx = result.add_atom(atom.symbol, atom.position.copy(),
696
+ atom.hybridization)
697
+ remap_n[atom.index] = new_idx
698
+
699
+ # Copy atoms from mol_cterm
700
+ # First, compute the rigid-body transform to position mol_cterm
701
+ # so that its N is at the correct peptide bond distance from
702
+ # mol_nterm's C.
703
+ c_pos = mol_nterm.atoms[bb_n.C].position
704
+ ca_n_pos = mol_nterm.atoms[bb_n.CA].position
705
+ o_n_pos = mol_nterm.atoms[bb_n.O].position
706
+
707
+ # Target position for the peptide N: along C->away_from_CA direction
708
+ # at AMIDE_CN distance, at SP2 angle from CA-C-N
709
+ c_to_ca = normalize(ca_n_pos - c_pos)
710
+ c_to_o = normalize(o_n_pos - c_pos)
711
+
712
+ # The peptide N should be at ~120 deg from both CA and O
713
+ # N direction: reflect c_to_ca across the plane normal to c_to_o
714
+ # For sp2 geometry: N is at 120 deg from CA on the opposite side of O
715
+ n_dir = normalize(-(c_to_ca + c_to_o))
716
+ target_n_pos = c_pos + n_dir * AMIDE_CN
717
+
718
+ # Compute translation for mol_cterm
719
+ current_n_pos = mol_cterm.atoms[bb_c.N].position
720
+ translation = target_n_pos - current_n_pos
721
+
722
+ remap_c: dict[int, int] = {}
723
+ for atom in mol_cterm.atoms:
724
+ if atom.index in skip_cterm:
725
+ continue
726
+ new_pos = atom.position.copy() + translation
727
+ new_idx = result.add_atom(atom.symbol, new_pos,
728
+ atom.hybridization)
729
+ remap_c[atom.index] = new_idx
730
+
731
+ # Copy bonds from mol_nterm
732
+ for bond in mol_nterm.bonds:
733
+ if bond.atom_i in skip_nterm or bond.atom_j in skip_nterm:
734
+ continue
735
+ if bond.atom_i in remap_n and bond.atom_j in remap_n:
736
+ result.add_bond(remap_n[bond.atom_i], remap_n[bond.atom_j],
737
+ order=bond.order, rotatable=bond.rotatable)
738
+
739
+ # Copy bonds from mol_cterm
740
+ for bond in mol_cterm.bonds:
741
+ if bond.atom_i in skip_cterm or bond.atom_j in skip_cterm:
742
+ continue
743
+ if bond.atom_i in remap_c and bond.atom_j in remap_c:
744
+ result.add_bond(remap_c[bond.atom_i], remap_c[bond.atom_j],
745
+ order=bond.order, rotatable=bond.rotatable)
746
+
747
+ # Add the peptide bond (C-N amide bond)
748
+ peptide_c = remap_n[bb_n.C]
749
+ peptide_n = remap_c[bb_c.N]
750
+ result.add_bond(peptide_c, peptide_n, order=1, rotatable=False)
751
+
752
+ # Build backbone indices for the combined molecule
753
+ # N-terminal residue backbone
754
+ bb_new_n = BackboneIndices(
755
+ N=remap_n[bb_n.N],
756
+ H_N=remap_n.get(bb_n.H_N) if bb_n.H_N is not None else None,
757
+ CA=remap_n[bb_n.CA],
758
+ HA=remap_n[bb_n.HA],
759
+ C=remap_n[bb_n.C],
760
+ O=remap_n[bb_n.O],
761
+ CB=remap_n.get(bb_n.CB) if bb_n.CB is not None else None,
762
+ H_Nterm=remap_n.get(bb_n.H_Nterm) if bb_n.H_Nterm is not None else None,
763
+ OXT=None, H_OXT=None, # removed in condensation
764
+ )
765
+
766
+ bb_new_c = BackboneIndices(
767
+ N=remap_c[bb_c.N],
768
+ H_N=remap_c.get(bb_c.H_N) if bb_c.H_N is not None else None,
769
+ CA=remap_c[bb_c.CA],
770
+ HA=remap_c[bb_c.HA],
771
+ C=remap_c[bb_c.C],
772
+ O=remap_c[bb_c.O],
773
+ CB=remap_c.get(bb_c.CB) if bb_c.CB is not None else None,
774
+ H_Nterm=None, # removed in condensation
775
+ OXT=remap_c.get(bb_c.OXT) if bb_c.OXT is not None else None,
776
+ H_OXT=remap_c.get(bb_c.H_OXT) if bb_c.H_OXT is not None else None,
777
+ )
778
+
779
+ # Store backbone index sets -- carry forward any existing residues
780
+ result.backbone = bb_new_n
781
+ prior_residues = []
782
+ if hasattr(mol_nterm, 'residues'):
783
+ # Remap all prior residue indices
784
+ for old_bb in mol_nterm.residues:
785
+ remapped = BackboneIndices(
786
+ N=remap_n.get(old_bb.N, old_bb.N),
787
+ H_N=remap_n.get(old_bb.H_N) if old_bb.H_N is not None else None,
788
+ CA=remap_n.get(old_bb.CA, old_bb.CA),
789
+ HA=remap_n.get(old_bb.HA, old_bb.HA),
790
+ C=remap_n.get(old_bb.C, old_bb.C),
791
+ O=remap_n.get(old_bb.O, old_bb.O),
792
+ CB=remap_n.get(old_bb.CB) if old_bb.CB is not None else None,
793
+ H_Nterm=remap_n.get(old_bb.H_Nterm) if old_bb.H_Nterm is not None else None,
794
+ OXT=remap_n.get(old_bb.OXT) if old_bb.OXT is not None else None,
795
+ H_OXT=remap_n.get(old_bb.H_OXT) if old_bb.H_OXT is not None else None,
796
+ )
797
+ prior_residues.append(remapped)
798
+ else:
799
+ prior_residues.append(bb_new_n)
800
+ result.residues = prior_residues + [bb_new_c]
801
+
802
+ # Set omega dihedral (CA_n - C_n - N_c - CA_c)
803
+ try:
804
+ result.set_dihedral(
805
+ bb_new_n.CA, bb_new_n.C, bb_new_c.N, bb_new_c.CA,
806
+ omega_deg)
807
+ except (ValueError, IndexError):
808
+ pass # ring bonds or missing atoms
809
+
810
+ return result
811
+
812
+
813
+ def build_peptide(sequence: list[AminoAcidType],
814
+ phi_psi: list[tuple[float, float]] | None = None,
815
+ ) -> Molecule:
816
+ """Build a polypeptide from a sequence of amino acids.
817
+
818
+ Parameters
819
+ ----------
820
+ sequence : list of AminoAcidType
821
+ Amino acid sequence from N-terminus to C-terminus.
822
+ phi_psi : optional list of (phi, psi) angle pairs in degrees.
823
+ If provided, must have len(sequence) entries.
824
+ """
825
+ if len(sequence) < 1:
826
+ raise ValueError("Sequence must have at least one amino acid")
827
+
828
+ if len(sequence) == 1:
829
+ return build_amino_acid(sequence[0])
830
+
831
+ # Build each amino acid individually
832
+ mols = [build_amino_acid(aa) for aa in sequence]
833
+
834
+ # Join them one by one
835
+ result = mols[0]
836
+ for i in range(1, len(mols)):
837
+ result = form_peptide_bond(result, mols[i])
838
+
839
+ # Apply phi/psi angles if given
840
+ if phi_psi is not None and hasattr(result, 'residues'):
841
+ for i, (phi, psi) in enumerate(phi_psi):
842
+ if i < len(result.residues):
843
+ try:
844
+ set_phi(result, result.residues, i, phi)
845
+ except (ValueError, IndexError):
846
+ pass
847
+ try:
848
+ set_psi(result, result.residues, i, psi)
849
+ except (ValueError, IndexError):
850
+ pass
851
+
852
+ return result
853
+
854
+
855
+ # ===================================================================
856
+ # Backbone conformation control
857
+ # ===================================================================
858
+
859
+ def set_phi(mol: Molecule, residues: list[BackboneIndices],
860
+ residue_index: int, angle_deg: float):
861
+ """Set the phi angle for a residue.
862
+
863
+ Phi = C(i-1) - N(i) - CA(i) - C(i).
864
+ Only valid for residue_index >= 1 (the first residue has no
865
+ preceding C).
866
+ """
867
+ if residue_index < 1:
868
+ return
869
+ bb_prev = residues[residue_index - 1]
870
+ bb = residues[residue_index]
871
+ mol.set_dihedral(bb_prev.C, bb.N, bb.CA, bb.C, angle_deg)
872
+
873
+
874
+ def set_psi(mol: Molecule, residues: list[BackboneIndices],
875
+ residue_index: int, angle_deg: float):
876
+ """Set the psi angle for a residue.
877
+
878
+ Psi = N(i) - CA(i) - C(i) - N(i+1).
879
+ Only valid for residue_index < len-1 (the last residue has no
880
+ following N).
881
+ """
882
+ if residue_index >= len(residues) - 1:
883
+ return
884
+ bb = residues[residue_index]
885
+ bb_next = residues[residue_index + 1]
886
+ mol.set_dihedral(bb.N, bb.CA, bb.C, bb_next.N, angle_deg)
887
+
888
+
889
+ def set_phi_psi(mol: Molecule, residues: list[BackboneIndices],
890
+ residue_index: int, phi_deg: float, psi_deg: float):
891
+ """Set both phi and psi angles for a residue."""
892
+ set_phi(mol, residues, residue_index, phi_deg)
893
+ set_psi(mol, residues, residue_index, psi_deg)
894
+
895
+
896
+ def set_secondary_structure(mol: Molecule,
897
+ residues: list[BackboneIndices],
898
+ structure: SecondaryStructure):
899
+ """Apply phi/psi angles for a secondary structure type."""
900
+ angles = {
901
+ SecondaryStructure.ALPHA_HELIX: ALPHA_HELIX,
902
+ SecondaryStructure.BETA_SHEET: BETA_SHEET,
903
+ SecondaryStructure.POLYPROLINE_II: POLYPROLINE_II,
904
+ SecondaryStructure.EXTENDED: EXTENDED,
905
+ }
906
+ phi, psi = angles[structure]
907
+ for i in range(len(residues)):
908
+ try:
909
+ set_phi(mol, residues, i, phi)
910
+ except (ValueError, IndexError):
911
+ pass
912
+ try:
913
+ set_psi(mol, residues, i, psi)
914
+ except (ValueError, IndexError):
915
+ pass
916
+
917
+
918
+ # Alias for the peptides.py re-export
919
+ apply_secondary_structure = set_secondary_structure