molbuilder 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. molbuilder/__init__.py +8 -0
  2. molbuilder/__main__.py +6 -0
  3. molbuilder/atomic/__init__.py +4 -0
  4. molbuilder/atomic/bohr.py +235 -0
  5. molbuilder/atomic/quantum_atom.py +334 -0
  6. molbuilder/atomic/quantum_numbers.py +196 -0
  7. molbuilder/atomic/wavefunctions.py +297 -0
  8. molbuilder/bonding/__init__.py +4 -0
  9. molbuilder/bonding/covalent.py +442 -0
  10. molbuilder/bonding/lewis.py +347 -0
  11. molbuilder/bonding/vsepr.py +433 -0
  12. molbuilder/cli/__init__.py +1 -0
  13. molbuilder/cli/demos.py +516 -0
  14. molbuilder/cli/menu.py +127 -0
  15. molbuilder/cli/wizard.py +831 -0
  16. molbuilder/core/__init__.py +6 -0
  17. molbuilder/core/bond_data.py +170 -0
  18. molbuilder/core/constants.py +51 -0
  19. molbuilder/core/element_properties.py +183 -0
  20. molbuilder/core/elements.py +181 -0
  21. molbuilder/core/geometry.py +232 -0
  22. molbuilder/gui/__init__.py +2 -0
  23. molbuilder/gui/app.py +286 -0
  24. molbuilder/gui/canvas3d.py +115 -0
  25. molbuilder/gui/dialogs.py +117 -0
  26. molbuilder/gui/event_handler.py +118 -0
  27. molbuilder/gui/sidebar.py +105 -0
  28. molbuilder/gui/toolbar.py +71 -0
  29. molbuilder/io/__init__.py +1 -0
  30. molbuilder/io/json_io.py +146 -0
  31. molbuilder/io/mol_sdf.py +169 -0
  32. molbuilder/io/pdb.py +184 -0
  33. molbuilder/io/smiles_io.py +47 -0
  34. molbuilder/io/xyz.py +103 -0
  35. molbuilder/molecule/__init__.py +2 -0
  36. molbuilder/molecule/amino_acids.py +919 -0
  37. molbuilder/molecule/builders.py +257 -0
  38. molbuilder/molecule/conformations.py +70 -0
  39. molbuilder/molecule/functional_groups.py +484 -0
  40. molbuilder/molecule/graph.py +712 -0
  41. molbuilder/molecule/peptides.py +13 -0
  42. molbuilder/molecule/stereochemistry.py +6 -0
  43. molbuilder/process/__init__.py +3 -0
  44. molbuilder/process/conditions.py +260 -0
  45. molbuilder/process/costing.py +316 -0
  46. molbuilder/process/purification.py +285 -0
  47. molbuilder/process/reactor.py +297 -0
  48. molbuilder/process/safety.py +476 -0
  49. molbuilder/process/scale_up.py +427 -0
  50. molbuilder/process/solvent_systems.py +204 -0
  51. molbuilder/reactions/__init__.py +3 -0
  52. molbuilder/reactions/functional_group_detect.py +728 -0
  53. molbuilder/reactions/knowledge_base.py +1716 -0
  54. molbuilder/reactions/reaction_types.py +102 -0
  55. molbuilder/reactions/reagent_data.py +1248 -0
  56. molbuilder/reactions/retrosynthesis.py +1430 -0
  57. molbuilder/reactions/synthesis_route.py +377 -0
  58. molbuilder/reports/__init__.py +158 -0
  59. molbuilder/reports/cost_report.py +206 -0
  60. molbuilder/reports/molecule_report.py +279 -0
  61. molbuilder/reports/safety_report.py +296 -0
  62. molbuilder/reports/synthesis_report.py +283 -0
  63. molbuilder/reports/text_formatter.py +170 -0
  64. molbuilder/smiles/__init__.py +4 -0
  65. molbuilder/smiles/parser.py +487 -0
  66. molbuilder/smiles/tokenizer.py +291 -0
  67. molbuilder/smiles/writer.py +375 -0
  68. molbuilder/visualization/__init__.py +1 -0
  69. molbuilder/visualization/bohr_viz.py +166 -0
  70. molbuilder/visualization/molecule_viz.py +368 -0
  71. molbuilder/visualization/quantum_viz.py +434 -0
  72. molbuilder/visualization/theme.py +12 -0
  73. molbuilder-1.0.0.dist-info/METADATA +360 -0
  74. molbuilder-1.0.0.dist-info/RECORD +78 -0
  75. molbuilder-1.0.0.dist-info/WHEEL +5 -0
  76. molbuilder-1.0.0.dist-info/entry_points.txt +2 -0
  77. molbuilder-1.0.0.dist-info/licenses/LICENSE +21 -0
  78. molbuilder-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,170 @@
1
+ """ASCII text formatting utilities for report generation.
2
+
3
+ All output is cp1252-safe (pure ASCII printable characters only).
4
+ Used by molecule_report, synthesis_report, safety_report, and cost_report.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import textwrap
10
+
11
+
12
+ # =====================================================================
13
+ # Section headers
14
+ # =====================================================================
15
+
16
+ def section_header(title: str, width: int = 70, char: str = "=") -> str:
17
+ """Generate a centered section header with border lines.
18
+
19
+ Example::
20
+
21
+ ======================================================================
22
+ MOLECULE REPORT
23
+ ======================================================================
24
+ """
25
+ border = char * width
26
+ padded = title.upper().center(width)
27
+ return "\n".join([border, padded, border])
28
+
29
+
30
+ def subsection_header(title: str, width: int = 70, char: str = "-") -> str:
31
+ """Generate a subsection header.
32
+
33
+ Example::
34
+
35
+ --- Atom Composition --------------------------------------------------
36
+ """
37
+ prefix = char * 3 + " " + title + " "
38
+ return prefix + char * max(0, width - len(prefix))
39
+
40
+
41
+ # =====================================================================
42
+ # Tables
43
+ # =====================================================================
44
+
45
+ def ascii_table(headers: list[str], rows: list[list[str]],
46
+ alignments: list[str] | None = None,
47
+ min_widths: list[int] | None = None) -> str:
48
+ """Generate a formatted ASCII table with column alignment.
49
+
50
+ Parameters
51
+ ----------
52
+ headers : list[str]
53
+ Column header labels.
54
+ rows : list[list[str]]
55
+ Table data (each row is a list of cell strings).
56
+ alignments : list[str] | None
57
+ Per-column alignment: ``'l'`` left, ``'r'`` right, ``'c'`` center.
58
+ Defaults to left-aligned for every column.
59
+ min_widths : list[int] | None
60
+ Minimum column widths. Actual widths expand to fit content.
61
+ """
62
+ n_cols = len(headers)
63
+ if alignments is None:
64
+ alignments = ["l"] * n_cols
65
+ if min_widths is None:
66
+ min_widths = [0] * n_cols
67
+
68
+ # Compute column widths
69
+ col_widths: list[int] = []
70
+ for c in range(n_cols):
71
+ w = max(len(headers[c]), min_widths[c])
72
+ for row in rows:
73
+ if c < len(row):
74
+ w = max(w, len(str(row[c])))
75
+ col_widths.append(w)
76
+
77
+ def _fmt_cell(text: str, width: int, align: str) -> str:
78
+ if align == "r":
79
+ return text.rjust(width)
80
+ if align == "c":
81
+ return text.center(width)
82
+ return text.ljust(width)
83
+
84
+ sep = " "
85
+ header_line = sep.join(
86
+ _fmt_cell(headers[c], col_widths[c], alignments[c])
87
+ for c in range(n_cols)
88
+ )
89
+ divider = sep.join("-" * col_widths[c] for c in range(n_cols))
90
+
91
+ lines = [header_line, divider]
92
+ for row in rows:
93
+ cells: list[str] = []
94
+ for c in range(n_cols):
95
+ val = str(row[c]) if c < len(row) else ""
96
+ cells.append(_fmt_cell(val, col_widths[c], alignments[c]))
97
+ lines.append(sep.join(cells))
98
+
99
+ return "\n".join(lines)
100
+
101
+
102
+ # =====================================================================
103
+ # Text utilities
104
+ # =====================================================================
105
+
106
+ def word_wrap(text: str, width: int = 70, indent: int = 0) -> str:
107
+ """Word-wrap text to the given width with optional indent."""
108
+ prefix = " " * indent
109
+ wrapped = textwrap.fill(
110
+ text, width=width, initial_indent=prefix,
111
+ subsequent_indent=prefix,
112
+ )
113
+ return wrapped
114
+
115
+
116
+ def bullet_list(items: list[str], indent: int = 2, bullet: str = "-") -> str:
117
+ """Format items as a bulleted list."""
118
+ prefix = " " * indent + bullet + " "
119
+ subsequent = " " * (indent + len(bullet) + 1)
120
+ lines: list[str] = []
121
+ for item in items:
122
+ wrapped = textwrap.fill(
123
+ item, width=70,
124
+ initial_indent=prefix,
125
+ subsequent_indent=subsequent,
126
+ )
127
+ lines.append(wrapped)
128
+ return "\n".join(lines)
129
+
130
+
131
+ def key_value_block(pairs: list[tuple[str, str]], separator: str = ": ",
132
+ indent: int = 2) -> str:
133
+ """Format key-value pairs aligned on the separator."""
134
+ if not pairs:
135
+ return ""
136
+ max_key = max(len(k) for k, _ in pairs)
137
+ prefix = " " * indent
138
+ lines: list[str] = []
139
+ for key, value in pairs:
140
+ lines.append(f"{prefix}{key:<{max_key}}{separator}{value}")
141
+ return "\n".join(lines)
142
+
143
+
144
+ # =====================================================================
145
+ # Charts and number formatting
146
+ # =====================================================================
147
+
148
+ def horizontal_bar(value: float, max_value: float, width: int = 40,
149
+ char: str = "#") -> str:
150
+ """Render a simple horizontal bar chart line.
151
+
152
+ Returns a string of *char* characters proportional to *value / max_value*,
153
+ padded to *width* with spaces.
154
+ """
155
+ if max_value <= 0:
156
+ filled = 0
157
+ else:
158
+ ratio = max(0.0, min(1.0, value / max_value))
159
+ filled = int(round(ratio * width))
160
+ return char * filled + " " * (width - filled)
161
+
162
+
163
+ def format_currency(amount: float) -> str:
164
+ """Format as USD with commas: ``$1,234.56``."""
165
+ return "${:,.2f}".format(amount)
166
+
167
+
168
+ def format_percent(value: float, decimals: int = 1) -> str:
169
+ """Format as percentage: ``85.0%``."""
170
+ return "{:.{d}f}%".format(value, d=decimals)
@@ -0,0 +1,4 @@
1
+ """SMILES parsing and writing."""
2
+ from molbuilder.smiles.parser import parse
3
+ from molbuilder.smiles.writer import to_smiles
4
+ from molbuilder.smiles.tokenizer import tokenize
@@ -0,0 +1,487 @@
1
+ """SMILES parser: tokens -> Molecule with 3D coordinates.
2
+
3
+ Algorithm:
4
+ 1. Parse tokens using a stack for branch tracking and a dict for ring closures
5
+ 2. Build connectivity graph (atoms + bonds)
6
+ 3. Add implicit hydrogens based on standard valence
7
+ 4. Assign 3D coordinates via BFS z-matrix placement
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import math
13
+ from collections import deque
14
+
15
+ import numpy as np
16
+
17
+ from molbuilder.smiles.tokenizer import (
18
+ tokenize, Token, TokenType, DEFAULT_VALENCE, ORGANIC_SUBSET, AROMATIC_ATOMS,
19
+ )
20
+ from molbuilder.molecule.graph import Molecule, Hybridization
21
+ from molbuilder.core.bond_data import bond_length, SP3_ANGLE, SP2_ANGLE, SP_ANGLE
22
+ from molbuilder.core.geometry import (
23
+ normalize, place_atom_zmatrix, available_tetrahedral_dirs,
24
+ )
25
+
26
+
27
+ # ===================================================================
28
+ # Bond order from SMILES bond symbol
29
+ # ===================================================================
30
+
31
+ _BOND_ORDER = {
32
+ "-": 1,
33
+ "=": 2,
34
+ "#": 3,
35
+ ":": 1, # aromatic bond treated as order 1 for connectivity
36
+ "/": 1, # E/Z bond direction indicator (single bond connectivity)
37
+ "\\": 1, # E/Z bond direction indicator (single bond connectivity)
38
+ }
39
+
40
+
41
+ # ===================================================================
42
+ # Internal data structures used during graph construction
43
+ # ===================================================================
44
+
45
+ class _AtomInfo:
46
+ """Lightweight bookkeeping record for an atom during parsing."""
47
+
48
+ __slots__ = ("index", "symbol", "aromatic", "bracket",
49
+ "isotope", "hcount", "charge", "chirality")
50
+
51
+ def __init__(self, index: int, symbol: str, aromatic: bool = False,
52
+ bracket: bool = False, isotope: int | None = None,
53
+ hcount: int | None = None, charge: int = 0,
54
+ chirality: str | None = None):
55
+ self.index = index
56
+ self.symbol = symbol
57
+ self.aromatic = aromatic
58
+ self.bracket = bracket
59
+ self.isotope = isotope
60
+ self.hcount = hcount
61
+ self.charge = charge
62
+ self.chirality = chirality
63
+
64
+
65
+ class _BondInfo:
66
+ """Lightweight bookkeeping record for a bond during parsing."""
67
+
68
+ __slots__ = ("atom_i", "atom_j", "order")
69
+
70
+ def __init__(self, atom_i: int, atom_j: int, order: int = 1):
71
+ self.atom_i = atom_i
72
+ self.atom_j = atom_j
73
+ self.order = order
74
+
75
+
76
+ # ===================================================================
77
+ # Graph construction from tokens
78
+ # ===================================================================
79
+
80
+ def _build_graph(tokens: list[Token]) -> tuple[list[_AtomInfo], list[_BondInfo]]:
81
+ """Walk the token list and build atom / bond lists.
82
+
83
+ Uses a stack for branch handling and a dictionary for ring closures.
84
+
85
+ Returns
86
+ -------
87
+ atoms : list[_AtomInfo]
88
+ bonds : list[_BondInfo]
89
+ """
90
+ atoms: list[_AtomInfo] = []
91
+ bonds: list[_BondInfo] = []
92
+
93
+ stack: list[int] = [] # branch stack of atom indices
94
+ ring_closures: dict[str, tuple[int, int]] = {} # digit -> (atom_idx, bond_order)
95
+ prev: int | None = None # index of the most recent atom
96
+ pending_bond_order: int | None = None # explicit bond symbol waiting
97
+
98
+ for tok in tokens:
99
+ # ---- atom ----
100
+ if tok.type == TokenType.ATOM:
101
+ idx = len(atoms)
102
+ is_bracket = (tok.hcount is not None or tok.charge != 0
103
+ or tok.isotope is not None
104
+ or tok.value not in ORGANIC_SUBSET
105
+ and tok.value not in AROMATIC_ATOMS)
106
+ # Canonical symbol: aromatic lowercase -> titlecase for storage
107
+ symbol = tok.value
108
+ if tok.aromatic and symbol.islower():
109
+ symbol = symbol.capitalize()
110
+
111
+ atoms.append(_AtomInfo(
112
+ index=idx,
113
+ symbol=symbol,
114
+ aromatic=tok.aromatic,
115
+ bracket=is_bracket,
116
+ isotope=tok.isotope,
117
+ hcount=tok.hcount,
118
+ charge=tok.charge,
119
+ chirality=tok.chirality,
120
+ ))
121
+
122
+ # Bond to previous atom
123
+ if prev is not None:
124
+ order = pending_bond_order if pending_bond_order else 1
125
+ # Aromatic bond default: if both atoms are aromatic and no
126
+ # explicit bond, use order 1 (aromatic bonds are kekulized
127
+ # later or left as single for coordinate purposes).
128
+ bonds.append(_BondInfo(prev, idx, order))
129
+ pending_bond_order = None
130
+ prev = idx
131
+ continue
132
+
133
+ # ---- bond symbol ----
134
+ if tok.type == TokenType.BOND:
135
+ pending_bond_order = _BOND_ORDER[tok.value]
136
+ continue
137
+
138
+ # ---- branch open ----
139
+ if tok.type == TokenType.BRANCH_OPEN:
140
+ if prev is not None:
141
+ stack.append(prev)
142
+ continue
143
+
144
+ # ---- branch close ----
145
+ if tok.type == TokenType.BRANCH_CLOSE:
146
+ if stack:
147
+ prev = stack.pop()
148
+ pending_bond_order = None
149
+ continue
150
+
151
+ # ---- ring closure ----
152
+ if tok.type == TokenType.RING_DIGIT:
153
+ digit = tok.value
154
+ if digit in ring_closures:
155
+ # Close the ring
156
+ other_idx, ring_order = ring_closures.pop(digit)
157
+ order = pending_bond_order if pending_bond_order else ring_order
158
+ bonds.append(_BondInfo(prev, other_idx, order))
159
+ pending_bond_order = None
160
+ else:
161
+ # Open a ring
162
+ order = pending_bond_order if pending_bond_order else 1
163
+ ring_closures[digit] = (prev, order)
164
+ pending_bond_order = None
165
+ continue
166
+
167
+ # ---- dot (disconnection) ----
168
+ if tok.type == TokenType.DOT:
169
+ prev = None
170
+ pending_bond_order = None
171
+ continue
172
+
173
+ if ring_closures:
174
+ open_digits = ", ".join(ring_closures.keys())
175
+ raise ValueError(
176
+ f"Unclosed ring closure(s) for digit(s): {open_digits}")
177
+
178
+ return atoms, bonds
179
+
180
+
181
+ # ===================================================================
182
+ # Implicit hydrogen addition
183
+ # ===================================================================
184
+
185
+ def _explicit_valence(atom_idx: int, bonds: list[_BondInfo]) -> int:
186
+ """Sum of bond orders touching *atom_idx*."""
187
+ total = 0
188
+ for b in bonds:
189
+ if b.atom_i == atom_idx or b.atom_j == atom_idx:
190
+ total += b.order
191
+ return total
192
+
193
+
194
+ def _add_implicit_hydrogens(
195
+ atoms: list[_AtomInfo],
196
+ bonds: list[_BondInfo],
197
+ ) -> tuple[list[_AtomInfo], list[_BondInfo]]:
198
+ """Add implicit H atoms to organic-subset atoms.
199
+
200
+ Bracket atoms with an explicit ``hcount`` use that count. Organic-
201
+ subset atoms use ``DEFAULT_VALENCE`` to infer the missing hydrogens.
202
+
203
+ Returns the (possibly extended) atoms and bonds lists.
204
+ """
205
+ heavy_count = len(atoms)
206
+
207
+ for ai in range(heavy_count):
208
+ atom = atoms[ai]
209
+ ev = _explicit_valence(ai, bonds)
210
+
211
+ # Bracket atom with explicit H count
212
+ if atom.bracket and atom.hcount is not None:
213
+ n_h = atom.hcount
214
+ else:
215
+ # Look up default valence for organic subset / aromatic atoms
216
+ lookup_sym = atom.symbol.lower() if atom.aromatic else atom.symbol
217
+ if lookup_sym not in DEFAULT_VALENCE and atom.symbol not in DEFAULT_VALENCE:
218
+ continue # unknown atom -- no implicit H
219
+ valences = DEFAULT_VALENCE.get(
220
+ lookup_sym, DEFAULT_VALENCE.get(atom.symbol, []))
221
+ if not valences:
222
+ continue
223
+
224
+ # Pick the smallest default valence >= explicit valence
225
+ target = None
226
+ for v in sorted(valences):
227
+ if v >= ev:
228
+ target = v
229
+ break
230
+ if target is None:
231
+ target = max(valences)
232
+
233
+ # Aromatic atoms contribute one electron to the pi system,
234
+ # so reduce the target by 1.
235
+ if atom.aromatic:
236
+ target = max(0, target - 1)
237
+
238
+ n_h = max(0, target - ev)
239
+
240
+ # Add H atoms
241
+ for _ in range(n_h):
242
+ h_idx = len(atoms)
243
+ atoms.append(_AtomInfo(
244
+ index=h_idx, symbol="H", aromatic=False, bracket=False))
245
+ bonds.append(_BondInfo(ai, h_idx, 1))
246
+
247
+ return atoms, bonds
248
+
249
+
250
+ # ===================================================================
251
+ # Hybridization determination
252
+ # ===================================================================
253
+
254
+ def _determine_hybridization(
255
+ atom_idx: int,
256
+ atoms: list[_AtomInfo],
257
+ bonds: list[_BondInfo],
258
+ ) -> Hybridization:
259
+ """Infer hybridization from bond orders around an atom.
260
+
261
+ Rules
262
+ -----
263
+ - Any triple bond -> SP
264
+ - Any double bond -> SP2
265
+ - All single bonds -> SP3
266
+ - Aromatic atoms -> SP2
267
+ """
268
+ if atoms[atom_idx].aromatic:
269
+ return Hybridization.SP2
270
+
271
+ has_double = False
272
+ for b in bonds:
273
+ if b.atom_i == atom_idx or b.atom_j == atom_idx:
274
+ if b.order == 3:
275
+ return Hybridization.SP
276
+ if b.order == 2:
277
+ has_double = True
278
+
279
+ if has_double:
280
+ return Hybridization.SP2
281
+ return Hybridization.SP3
282
+
283
+
284
+ # ===================================================================
285
+ # 3D coordinate assignment via BFS
286
+ # ===================================================================
287
+
288
+ def _angle_for_hyb(hyb: Hybridization) -> float:
289
+ """Return the ideal bond angle in degrees for a hybridization."""
290
+ if hyb == Hybridization.SP:
291
+ return SP_ANGLE
292
+ if hyb == Hybridization.SP2:
293
+ return SP2_ANGLE
294
+ return SP3_ANGLE
295
+
296
+
297
+ def _assign_3d_coordinates(mol: Molecule) -> None:
298
+ """Place atoms in 3D using BFS from atom 0.
299
+
300
+ Algorithm
301
+ ---------
302
+ 1. Place atom 0 at the origin.
303
+ 2. BFS outward; for each newly visited atom, use its parent (and
304
+ grandparent if available) for z-matrix placement.
305
+ 3. Distribute multiple substituents around each centre at regular
306
+ dihedral intervals based on hybridization.
307
+ """
308
+ n = len(mol.atoms)
309
+ if n == 0:
310
+ return
311
+
312
+ # Place first atom at origin
313
+ mol.atoms[0].position = np.array([0.0, 0.0, 0.0])
314
+ placed = {0}
315
+
316
+ if n == 1:
317
+ return
318
+
319
+ # Build adjacency from molecule bonds
320
+ adj: dict[int, list[tuple[int, int]]] = {i: [] for i in range(n)}
321
+ for b in mol.bonds:
322
+ adj[b.atom_i].append((b.atom_j, b.order))
323
+ adj[b.atom_j].append((b.atom_i, b.order))
324
+
325
+ # BFS queue: (atom_index, parent_index, grandparent_index_or_None)
326
+ queue: deque[tuple[int, int | None, int | None]] = deque()
327
+
328
+ # Track children scheduled per parent to assign dihedral offsets
329
+ child_counter: dict[int, int] = {}
330
+
331
+ # Seed the BFS from atom 0: schedule all neighbours
332
+ for nb_idx, nb_order in adj[0]:
333
+ queue.append((nb_idx, 0, None))
334
+
335
+ while queue:
336
+ atom_idx, parent_idx, grandparent_idx = queue.popleft()
337
+
338
+ if atom_idx in placed:
339
+ continue
340
+
341
+ parent_pos = mol.atoms[parent_idx].position
342
+ parent_hyb = mol.atoms[parent_idx].hybridization
343
+ angle = _angle_for_hyb(parent_hyb) if parent_hyb else SP3_ANGLE
344
+
345
+ # Determine bond order for bond length
346
+ b_order = 1
347
+ for nb, bo in adj[parent_idx]:
348
+ if nb == atom_idx:
349
+ b_order = bo
350
+ break
351
+
352
+ bl = bond_length(
353
+ mol.atoms[parent_idx].symbol,
354
+ mol.atoms[atom_idx].symbol,
355
+ b_order,
356
+ )
357
+
358
+ # Child counter for dihedral offset
359
+ child_num = child_counter.get(parent_idx, 0)
360
+ child_counter[parent_idx] = child_num + 1
361
+
362
+ # Dihedral step depends on parent hybridization
363
+ if parent_hyb == Hybridization.SP2:
364
+ dihedral_step = 120.0
365
+ elif parent_hyb == Hybridization.SP:
366
+ dihedral_step = 180.0
367
+ else:
368
+ dihedral_step = 120.0 # tetrahedral uses ~120 deg between projections
369
+
370
+ dihedral = dihedral_step * child_num
371
+
372
+ if grandparent_idx is not None and grandparent_idx in placed:
373
+ # Normal z-matrix placement
374
+ gp_pos = mol.atoms[grandparent_idx].position
375
+ pos = place_atom_zmatrix(
376
+ parent_pos, gp_pos,
377
+ _dihedral_ref_pos(mol, parent_idx, grandparent_idx, placed),
378
+ bl, angle, dihedral,
379
+ )
380
+ elif len(placed) == 1:
381
+ # Second atom: place along +z
382
+ pos = parent_pos + np.array([0.0, 0.0, bl])
383
+ else:
384
+ # No grandparent yet -- use a synthetic reference
385
+ ref_pos = parent_pos + np.array([0.0, 0.0, -1.0])
386
+ synth_k = ref_pos + np.array([0.0, 1.0, 0.0])
387
+ pos = place_atom_zmatrix(
388
+ parent_pos, ref_pos, synth_k,
389
+ bl, angle, dihedral,
390
+ )
391
+
392
+ mol.atoms[atom_idx].position = pos
393
+ placed.add(atom_idx)
394
+
395
+ # Enqueue unvisited neighbours
396
+ for nb_idx, nb_order in adj[atom_idx]:
397
+ if nb_idx not in placed:
398
+ queue.append((nb_idx, atom_idx, parent_idx))
399
+
400
+
401
+ def _dihedral_ref_pos(
402
+ mol: Molecule,
403
+ parent_idx: int,
404
+ grandparent_idx: int,
405
+ placed: set[int],
406
+ ) -> np.ndarray:
407
+ """Find a third reference position for z-matrix dihedral.
408
+
409
+ Looks for a placed neighbour of the grandparent that is not the
410
+ parent. Falls back to a synthetic offset if none is found.
411
+ """
412
+ gp_pos = mol.atoms[grandparent_idx].position
413
+ for nb in mol.neighbors(grandparent_idx):
414
+ if nb != parent_idx and nb in placed:
415
+ return mol.atoms[nb].position
416
+
417
+ # Synthetic fallback: offset perpendicular to parent-grandparent axis
418
+ axis = mol.atoms[parent_idx].position - gp_pos
419
+ perp = np.array([1.0, 0.0, 0.0])
420
+ if abs(np.dot(normalize(axis), perp)) > 0.9:
421
+ perp = np.array([0.0, 1.0, 0.0])
422
+ return gp_pos + np.cross(axis, perp) * 0.5
423
+
424
+
425
+ # ===================================================================
426
+ # Public API
427
+ # ===================================================================
428
+
429
+ def parse(smiles: str) -> Molecule:
430
+ """Parse a SMILES string and return a Molecule with 3D coordinates.
431
+
432
+ Parameters
433
+ ----------
434
+ smiles : str
435
+ A SMILES string, e.g. ``"CCO"`` (ethanol), ``"c1ccccc1"``
436
+ (benzene), ``"CC(=O)O"`` (acetic acid).
437
+
438
+ Returns
439
+ -------
440
+ Molecule
441
+ A fully constructed molecule with atoms, bonds, hybridization,
442
+ and approximate 3D coordinates.
443
+
444
+ Raises
445
+ ------
446
+ ValueError
447
+ If the SMILES string is invalid.
448
+
449
+ Examples
450
+ --------
451
+ >>> mol = parse("C")
452
+ >>> len([a for a in mol.atoms if a.symbol == "C"])
453
+ 1
454
+ >>> len([a for a in mol.atoms if a.symbol == "H"])
455
+ 4
456
+ """
457
+ tokens = tokenize(smiles)
458
+ atoms, bonds = _build_graph(tokens)
459
+ atoms, bonds = _add_implicit_hydrogens(atoms, bonds)
460
+
461
+ # Create molecule
462
+ mol = Molecule(name=smiles)
463
+
464
+ # First pass: add atoms with placeholder positions and hybridization
465
+ for ai in atoms:
466
+ hyb = _determine_hybridization(ai.index, atoms, bonds)
467
+ mol.add_atom(
468
+ symbol=ai.symbol,
469
+ position=[0.0, 0.0, 0.0],
470
+ hybridization=hyb,
471
+ chirality=ai.chirality,
472
+ isotope=ai.isotope,
473
+ formal_charge=ai.charge,
474
+ )
475
+
476
+ # Add bonds
477
+ for bi in bonds:
478
+ # Determine if rotatable (single bonds between heavy atoms)
479
+ rot = (bi.order == 1
480
+ and atoms[bi.atom_i].symbol != "H"
481
+ and atoms[bi.atom_j].symbol != "H")
482
+ mol.add_bond(bi.atom_i, bi.atom_j, order=bi.order, rotatable=rot)
483
+
484
+ # Assign 3D coordinates via BFS
485
+ _assign_3d_coordinates(mol)
486
+
487
+ return mol