molbuilder 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. molbuilder/__init__.py +8 -0
  2. molbuilder/__main__.py +6 -0
  3. molbuilder/atomic/__init__.py +4 -0
  4. molbuilder/atomic/bohr.py +235 -0
  5. molbuilder/atomic/quantum_atom.py +334 -0
  6. molbuilder/atomic/quantum_numbers.py +196 -0
  7. molbuilder/atomic/wavefunctions.py +297 -0
  8. molbuilder/bonding/__init__.py +4 -0
  9. molbuilder/bonding/covalent.py +442 -0
  10. molbuilder/bonding/lewis.py +347 -0
  11. molbuilder/bonding/vsepr.py +433 -0
  12. molbuilder/cli/__init__.py +1 -0
  13. molbuilder/cli/demos.py +516 -0
  14. molbuilder/cli/menu.py +127 -0
  15. molbuilder/cli/wizard.py +831 -0
  16. molbuilder/core/__init__.py +6 -0
  17. molbuilder/core/bond_data.py +170 -0
  18. molbuilder/core/constants.py +51 -0
  19. molbuilder/core/element_properties.py +183 -0
  20. molbuilder/core/elements.py +181 -0
  21. molbuilder/core/geometry.py +232 -0
  22. molbuilder/gui/__init__.py +2 -0
  23. molbuilder/gui/app.py +286 -0
  24. molbuilder/gui/canvas3d.py +115 -0
  25. molbuilder/gui/dialogs.py +117 -0
  26. molbuilder/gui/event_handler.py +118 -0
  27. molbuilder/gui/sidebar.py +105 -0
  28. molbuilder/gui/toolbar.py +71 -0
  29. molbuilder/io/__init__.py +1 -0
  30. molbuilder/io/json_io.py +146 -0
  31. molbuilder/io/mol_sdf.py +169 -0
  32. molbuilder/io/pdb.py +184 -0
  33. molbuilder/io/smiles_io.py +47 -0
  34. molbuilder/io/xyz.py +103 -0
  35. molbuilder/molecule/__init__.py +2 -0
  36. molbuilder/molecule/amino_acids.py +919 -0
  37. molbuilder/molecule/builders.py +257 -0
  38. molbuilder/molecule/conformations.py +70 -0
  39. molbuilder/molecule/functional_groups.py +484 -0
  40. molbuilder/molecule/graph.py +712 -0
  41. molbuilder/molecule/peptides.py +13 -0
  42. molbuilder/molecule/stereochemistry.py +6 -0
  43. molbuilder/process/__init__.py +3 -0
  44. molbuilder/process/conditions.py +260 -0
  45. molbuilder/process/costing.py +316 -0
  46. molbuilder/process/purification.py +285 -0
  47. molbuilder/process/reactor.py +297 -0
  48. molbuilder/process/safety.py +476 -0
  49. molbuilder/process/scale_up.py +427 -0
  50. molbuilder/process/solvent_systems.py +204 -0
  51. molbuilder/reactions/__init__.py +3 -0
  52. molbuilder/reactions/functional_group_detect.py +728 -0
  53. molbuilder/reactions/knowledge_base.py +1716 -0
  54. molbuilder/reactions/reaction_types.py +102 -0
  55. molbuilder/reactions/reagent_data.py +1248 -0
  56. molbuilder/reactions/retrosynthesis.py +1430 -0
  57. molbuilder/reactions/synthesis_route.py +377 -0
  58. molbuilder/reports/__init__.py +158 -0
  59. molbuilder/reports/cost_report.py +206 -0
  60. molbuilder/reports/molecule_report.py +279 -0
  61. molbuilder/reports/safety_report.py +296 -0
  62. molbuilder/reports/synthesis_report.py +283 -0
  63. molbuilder/reports/text_formatter.py +170 -0
  64. molbuilder/smiles/__init__.py +4 -0
  65. molbuilder/smiles/parser.py +487 -0
  66. molbuilder/smiles/tokenizer.py +291 -0
  67. molbuilder/smiles/writer.py +375 -0
  68. molbuilder/visualization/__init__.py +1 -0
  69. molbuilder/visualization/bohr_viz.py +166 -0
  70. molbuilder/visualization/molecule_viz.py +368 -0
  71. molbuilder/visualization/quantum_viz.py +434 -0
  72. molbuilder/visualization/theme.py +12 -0
  73. molbuilder-1.0.0.dist-info/METADATA +360 -0
  74. molbuilder-1.0.0.dist-info/RECORD +78 -0
  75. molbuilder-1.0.0.dist-info/WHEEL +5 -0
  76. molbuilder-1.0.0.dist-info/entry_points.txt +2 -0
  77. molbuilder-1.0.0.dist-info/licenses/LICENSE +21 -0
  78. molbuilder-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,291 @@
1
+ """SMILES string tokenizer.
2
+
3
+ Converts a SMILES string into a sequence of Token objects for the parser.
4
+
5
+ Handles:
6
+ - Organic subset atoms: B, C, N, O, P, S, F, Cl, Br, I (uppercase)
7
+ - Bracket atoms: [NH3+], [Fe], [13C], etc.
8
+ - Aromatic atoms: b, c, n, o, p, s (lowercase)
9
+ - Bond types: - (single), = (double), # (triple), : (aromatic)
10
+ - Branch notation: ( and )
11
+ - Ring closure digits: 0-9, and %nn for two-digit ring numbers
12
+ - Dot disconnection: .
13
+ - Hydrogen counts in brackets: [NH2], [CH3]
14
+ - Charges in brackets: +, -, +2, -1
15
+ - Isotopes in brackets: [13C], [2H]
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from dataclasses import dataclass
21
+ from enum import Enum, auto
22
+
23
+
24
+ # ===================================================================
25
+ # Token types and data class
26
+ # ===================================================================
27
+
28
+ class TokenType(Enum):
29
+ ATOM = auto() # organic subset atom or bracket atom
30
+ BOND = auto() # -, =, #, :
31
+ BRANCH_OPEN = auto() # (
32
+ BRANCH_CLOSE = auto() # )
33
+ RING_DIGIT = auto() # ring closure number
34
+ DOT = auto() # . (disconnection)
35
+
36
+
37
+ @dataclass
38
+ class Token:
39
+ """A single lexical token from a SMILES string.
40
+
41
+ Attributes
42
+ ----------
43
+ type : TokenType
44
+ The category of this token.
45
+ value : str
46
+ The atom symbol for ATOM tokens, the bond character for BOND
47
+ tokens, or the digit string for RING_DIGIT tokens.
48
+ isotope : int | None
49
+ Mass number from a bracket atom, e.g. 13 in ``[13C]``.
50
+ hcount : int | None
51
+ Explicit hydrogen count from a bracket atom, e.g. 2 in ``[NH2]``.
52
+ ``None`` means no explicit H specification (implicit semantics
53
+ apply). ``0`` means explicitly zero hydrogens.
54
+ charge : int
55
+ Formal charge from a bracket atom, e.g. +1 in ``[NH4+]``.
56
+ aromatic : bool
57
+ True if the atom was given in lowercase (aromatic notation).
58
+ chirality : str | None
59
+ Chirality marker from a bracket atom: ``"@"`` (anticlockwise),
60
+ ``"@@"`` (clockwise), or ``None`` (no chirality specified).
61
+ """
62
+ type: TokenType
63
+ value: str
64
+ isotope: int | None = None
65
+ hcount: int | None = None
66
+ charge: int = 0
67
+ aromatic: bool = False
68
+ chirality: str | None = None
69
+
70
+
71
+ # ===================================================================
72
+ # Constants
73
+ # ===================================================================
74
+
75
+ # Organic subset: atoms that don't need brackets
76
+ ORGANIC_SUBSET = {"B", "C", "N", "O", "P", "S", "F", "Cl", "Br", "I"}
77
+ AROMATIC_ATOMS = {"b", "c", "n", "o", "p", "s"}
78
+
79
+ # Two-letter organic subset atoms (checked before single-letter)
80
+ TWO_LETTER_ORGANIC = {"Cl", "Br"}
81
+
82
+ # Default valence for implicit hydrogen calculation
83
+ DEFAULT_VALENCE: dict[str, list[int]] = {
84
+ "B": [3], "C": [4], "N": [3, 5], "O": [2], "P": [3, 5],
85
+ "S": [2, 4, 6], "F": [1], "Cl": [1], "Br": [1], "I": [1],
86
+ "b": [3], "c": [4], "n": [3], "o": [2], "p": [3], "s": [2],
87
+ }
88
+
89
+ BOND_CHARS = {"-", "=", "#", ":", "/", "\\"}
90
+
91
+
92
+ # ===================================================================
93
+ # Bracket-atom parser helper
94
+ # ===================================================================
95
+
96
+ def _parse_bracket(smiles: str, start: int) -> tuple[Token, int]:
97
+ """Parse a bracket atom ``[...]`` starting at *start* (the ``[``).
98
+
99
+ Returns the Token and the index of the character after the closing
100
+ ``]``.
101
+ """
102
+ pos = start + 1 # skip '['
103
+ end = smiles.index("]", pos)
104
+ inner = smiles[pos:end]
105
+
106
+ isotope: int | None = None
107
+ symbol: str = ""
108
+ hcount: int | None = None
109
+ charge: int = 0
110
+ aromatic: bool = False
111
+ chirality: str | None = None
112
+
113
+ i = 0
114
+ n = len(inner)
115
+
116
+ # --- isotope (leading digits) ---
117
+ iso_start = i
118
+ while i < n and inner[i].isdigit():
119
+ i += 1
120
+ if i > iso_start:
121
+ isotope = int(inner[iso_start:i])
122
+
123
+ # --- element symbol ---
124
+ # Symbol starts with an uppercase letter followed by optional lowercase,
125
+ # OR a single lowercase letter for aromatic atoms.
126
+ if i < n and inner[i].isupper():
127
+ symbol = inner[i]
128
+ i += 1
129
+ while i < n and inner[i].islower():
130
+ symbol += inner[i]
131
+ i += 1
132
+ elif i < n and inner[i].islower():
133
+ # aromatic bracket atom
134
+ symbol = inner[i]
135
+ aromatic = True
136
+ i += 1
137
+ else:
138
+ raise ValueError(
139
+ f"Expected element symbol in bracket atom: [{inner}]")
140
+
141
+ # --- chirality (@, @@) ---
142
+ if i < n and inner[i] == "@":
143
+ i += 1
144
+ if i < n and inner[i] == "@":
145
+ chirality = "@@"
146
+ i += 1
147
+ else:
148
+ chirality = "@"
149
+
150
+ # --- hydrogen count ---
151
+ if i < n and inner[i] == "H":
152
+ i += 1
153
+ h_start = i
154
+ while i < n and inner[i].isdigit():
155
+ i += 1
156
+ if i > h_start:
157
+ hcount = int(inner[h_start:i])
158
+ else:
159
+ hcount = 1 # bare H means 1 hydrogen
160
+
161
+ # --- charge ---
162
+ if i < n and inner[i] in ("+", "-"):
163
+ sign = 1 if inner[i] == "+" else -1
164
+ i += 1
165
+ ch_start = i
166
+ while i < n and inner[i].isdigit():
167
+ i += 1
168
+ if i > ch_start:
169
+ charge = sign * int(inner[ch_start:i])
170
+ else:
171
+ # Count consecutive +/- signs (e.g. ++ means +2)
172
+ extra = 0
173
+ ch = "+" if sign == 1 else "-"
174
+ while i < n and inner[i] == ch:
175
+ extra += 1
176
+ i += 1
177
+ charge = sign * (1 + extra)
178
+
179
+ return Token(
180
+ type=TokenType.ATOM,
181
+ value=symbol,
182
+ isotope=isotope,
183
+ hcount=hcount,
184
+ charge=charge,
185
+ aromatic=aromatic,
186
+ chirality=chirality,
187
+ ), end + 1
188
+
189
+
190
+ # ===================================================================
191
+ # Main tokenizer
192
+ # ===================================================================
193
+
194
+ def tokenize(smiles: str) -> list[Token]:
195
+ """Convert a SMILES string into a list of Token objects.
196
+
197
+ Parameters
198
+ ----------
199
+ smiles : str
200
+ A valid SMILES string, e.g. ``"CCO"``, ``"c1ccccc1"``,
201
+ ``"[NH4+]"``.
202
+
203
+ Returns
204
+ -------
205
+ list[Token]
206
+ Ordered sequence of tokens ready for the parser.
207
+
208
+ Raises
209
+ ------
210
+ ValueError
211
+ If the string contains unexpected characters or malformed
212
+ bracket atoms.
213
+ """
214
+ tokens: list[Token] = []
215
+ i = 0
216
+ n = len(smiles)
217
+
218
+ while i < n:
219
+ ch = smiles[i]
220
+
221
+ # --- bracket atom ---
222
+ if ch == "[":
223
+ token, i = _parse_bracket(smiles, i)
224
+ tokens.append(token)
225
+ continue
226
+
227
+ # --- two-letter organic atoms (Cl, Br) ---
228
+ if i + 1 < n and smiles[i:i + 2] in TWO_LETTER_ORGANIC:
229
+ tokens.append(Token(type=TokenType.ATOM, value=smiles[i:i + 2]))
230
+ i += 2
231
+ continue
232
+
233
+ # --- single-letter organic subset atoms ---
234
+ if ch in {"B", "C", "N", "O", "P", "S", "F", "I"}:
235
+ tokens.append(Token(type=TokenType.ATOM, value=ch))
236
+ i += 1
237
+ continue
238
+
239
+ # --- aromatic atoms ---
240
+ if ch in {"b", "c", "n", "o", "p", "s"}:
241
+ tokens.append(Token(
242
+ type=TokenType.ATOM, value=ch, aromatic=True))
243
+ i += 1
244
+ continue
245
+
246
+ # --- bond symbols ---
247
+ if ch in BOND_CHARS:
248
+ tokens.append(Token(type=TokenType.BOND, value=ch))
249
+ i += 1
250
+ continue
251
+
252
+ # --- branch open / close ---
253
+ if ch == "(":
254
+ tokens.append(Token(type=TokenType.BRANCH_OPEN, value="("))
255
+ i += 1
256
+ continue
257
+
258
+ if ch == ")":
259
+ tokens.append(Token(type=TokenType.BRANCH_CLOSE, value=")"))
260
+ i += 1
261
+ continue
262
+
263
+ # --- ring closure digit ---
264
+ if ch.isdigit():
265
+ tokens.append(Token(type=TokenType.RING_DIGIT, value=ch))
266
+ i += 1
267
+ continue
268
+
269
+ # --- two-digit ring closure: %nn ---
270
+ if ch == "%":
271
+ if i + 2 >= n or not smiles[i + 1].isdigit() or not smiles[i + 2].isdigit():
272
+ raise ValueError(
273
+ f"Expected two digits after '%' at position {i}")
274
+ tokens.append(Token(
275
+ type=TokenType.RING_DIGIT,
276
+ value=smiles[i + 1:i + 3]))
277
+ i += 3
278
+ continue
279
+
280
+ # --- dot disconnection ---
281
+ if ch == ".":
282
+ tokens.append(Token(type=TokenType.DOT, value="."))
283
+ i += 1
284
+ continue
285
+
286
+ # --- unexpected character ---
287
+ raise ValueError(
288
+ f"Unexpected character {ch!r} at position {i} in SMILES "
289
+ f"string {smiles!r}")
290
+
291
+ return tokens
@@ -0,0 +1,375 @@
1
+ """SMILES writer: Molecule -> canonical SMILES string.
2
+
3
+ Uses Morgan's algorithm for canonical atom numbering, then DFS traversal
4
+ to generate the SMILES string.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from collections import deque
10
+
11
+ from molbuilder.molecule.graph import Molecule
12
+ from molbuilder.core.elements import SYMBOL_TO_Z
13
+ from molbuilder.smiles.tokenizer import ORGANIC_SUBSET, DEFAULT_VALENCE
14
+
15
+
16
+ # ===================================================================
17
+ # Morgan canonical ordering
18
+ # ===================================================================
19
+
20
+ def _morgan_canonical_order(mol: Molecule) -> dict[int, int]:
21
+ """Compute canonical atom ranks using Morgan's extended connectivity.
22
+
23
+ Algorithm
24
+ ---------
25
+ 1. Initialize each atom's invariant to its degree (number of
26
+ neighbours).
27
+ 2. Iteratively replace each atom's invariant with the sum of its
28
+ neighbours' invariants until the number of distinct values
29
+ stabilises.
30
+ 3. Break ties using atomic number (higher Z = higher rank).
31
+ 4. Return a mapping from atom index to rank (0 = lowest priority,
32
+ first in canonical order).
33
+
34
+ Parameters
35
+ ----------
36
+ mol : Molecule
37
+ The molecule whose atoms should be ranked.
38
+
39
+ Returns
40
+ -------
41
+ dict[int, int]
42
+ Mapping from atom index to canonical rank.
43
+ """
44
+ n = len(mol.atoms)
45
+ if n == 0:
46
+ return {}
47
+
48
+ # Initial connectivity value = degree
49
+ ec = [len(mol.neighbors(i)) for i in range(n)]
50
+
51
+ prev_classes = 0
52
+ for _iteration in range(100):
53
+ new_ec = [0] * n
54
+ for i in range(n):
55
+ new_ec[i] = sum(ec[j] for j in mol.neighbors(i))
56
+
57
+ # Count distinct classes
58
+ classes = len(set(new_ec))
59
+ if classes == prev_classes:
60
+ break
61
+ prev_classes = classes
62
+ ec = new_ec
63
+
64
+ # Build (ec_value, atomic_number, original_index) for stable sort
65
+ sort_keys = []
66
+ for i in range(n):
67
+ z = SYMBOL_TO_Z.get(mol.atoms[i].symbol, 0)
68
+ sort_keys.append((ec[i], z, i))
69
+
70
+ sorted_atoms = sorted(sort_keys)
71
+ order: dict[int, int] = {}
72
+ for rank, (_, _, idx) in enumerate(sorted_atoms):
73
+ order[idx] = rank
74
+
75
+ return order
76
+
77
+
78
+ # ===================================================================
79
+ # Connected components
80
+ # ===================================================================
81
+
82
+ def _connected_components(mol: Molecule) -> list[list[int]]:
83
+ """Return lists of atom indices for each connected fragment.
84
+
85
+ Considers only heavy (non-hydrogen) atoms.
86
+ """
87
+ n = len(mol.atoms)
88
+ heavy = {i for i in range(n) if mol.atoms[i].symbol != "H"}
89
+ visited: set[int] = set()
90
+ components: list[list[int]] = []
91
+
92
+ for start in heavy:
93
+ if start in visited:
94
+ continue
95
+ comp: list[int] = []
96
+ stack = [start]
97
+ while stack:
98
+ cur = stack.pop()
99
+ if cur in visited:
100
+ continue
101
+ visited.add(cur)
102
+ if cur not in heavy:
103
+ continue
104
+ comp.append(cur)
105
+ for nb in mol.neighbors(cur):
106
+ if nb not in visited and nb in heavy:
107
+ stack.append(nb)
108
+ if comp:
109
+ components.append(comp)
110
+
111
+ return components
112
+
113
+
114
+ # ===================================================================
115
+ # Bond order symbol
116
+ # ===================================================================
117
+
118
+ def _bond_symbol(mol: Molecule, i: int, j: int) -> str:
119
+ """Return the SMILES bond symbol between atoms *i* and *j*.
120
+
121
+ Single bonds return ``""`` (implicit), double ``"="``, triple ``"#"``.
122
+ """
123
+ bond = mol.get_bond(i, j)
124
+ if bond is None:
125
+ return ""
126
+ if bond.order == 2:
127
+ return "="
128
+ if bond.order == 3:
129
+ return "#"
130
+ return ""
131
+
132
+
133
+ # ===================================================================
134
+ # DFS SMILES generation
135
+ # ===================================================================
136
+
137
+ def _dfs_smiles(mol: Molecule, order: dict[int, int],
138
+ component: list[int]) -> str:
139
+ """Generate a SMILES string for one connected component via DFS.
140
+
141
+ Parameters
142
+ ----------
143
+ mol : Molecule
144
+ The full molecule.
145
+ order : dict[int, int]
146
+ Canonical ranking from ``_morgan_canonical_order``.
147
+ component : list[int]
148
+ Atom indices belonging to this fragment (heavy atoms only).
149
+
150
+ Returns
151
+ -------
152
+ str
153
+ SMILES string for the fragment.
154
+ """
155
+ if not component:
156
+ return ""
157
+
158
+ comp_set = set(component)
159
+
160
+ # Start DFS from the atom with the lowest canonical rank in this component
161
+ start = min(component, key=lambda i: order.get(i, 0))
162
+
163
+ visited: set[int] = set()
164
+ # Track ring closure pairs: when DFS finds a back-edge, both
165
+ # endpoints get a digit.
166
+ ring_bonds: dict[tuple[int, int], int] = {}
167
+ next_ring_digit = 1
168
+
169
+ parts: list[str] = []
170
+
171
+ def _heavy_neighbors(idx: int) -> list[int]:
172
+ """Return non-hydrogen neighbours in this component, sorted by
173
+ canonical rank (highest rank = most preferred child = last in
174
+ the sort so it gets visited first on the main chain)."""
175
+ nbs = []
176
+ for nb in mol.neighbors(idx):
177
+ if mol.atoms[nb].symbol == "H":
178
+ continue
179
+ if nb not in comp_set:
180
+ continue
181
+ nbs.append(nb)
182
+ # Sort: the *last* element becomes the "main chain" child (no
183
+ # parentheses). Lower rank = visited first = branch.
184
+ nbs.sort(key=lambda x: order.get(x, 0))
185
+ return nbs
186
+
187
+ def _atom_str(idx: int) -> str:
188
+ """Return the SMILES atom token for atom *idx*.
189
+
190
+ Outputs bracket notation ``[<isotope><symbol><chirality><hcount><charge>]``
191
+ when the atom has non-default properties (chirality, isotope, charge,
192
+ or is not in the organic subset). Organic subset atoms without
193
+ special properties are written without brackets.
194
+ """
195
+ atom = mol.atoms[idx]
196
+ sym = atom.symbol
197
+ has_chirality = atom.chirality is not None
198
+ has_isotope = atom.isotope is not None
199
+ has_charge = atom.formal_charge != 0
200
+ needs_bracket = has_chirality or has_isotope or has_charge or sym not in ORGANIC_SUBSET
201
+
202
+ if not needs_bracket:
203
+ return sym
204
+
205
+ # Build bracket atom string: [<isotope><symbol><chirality><Hn><charge>]
206
+ parts: list[str] = []
207
+ if has_isotope:
208
+ parts.append(str(atom.isotope))
209
+ parts.append(sym)
210
+ if has_chirality:
211
+ parts.append(atom.chirality)
212
+
213
+ # Compute implicit H count: count explicit H neighbours
214
+ h_count = sum(1 for nb in mol.neighbors(idx)
215
+ if mol.atoms[nb].symbol == "H")
216
+ if h_count == 1:
217
+ parts.append("H")
218
+ elif h_count > 1:
219
+ parts.append(f"H{h_count}")
220
+
221
+ if has_charge:
222
+ ch = atom.formal_charge
223
+ if ch == 1:
224
+ parts.append("+")
225
+ elif ch == -1:
226
+ parts.append("-")
227
+ elif ch > 0:
228
+ parts.append(f"+{ch}")
229
+ else:
230
+ parts.append(str(ch))
231
+
232
+ return f"[{''.join(parts)}]"
233
+
234
+ def _emit_ring_closure(idx: int, nb: int) -> None:
235
+ """Register and emit a ring closure digit between idx and nb."""
236
+ nonlocal next_ring_digit
237
+ edge = (min(idx, nb), max(idx, nb))
238
+ if edge not in ring_bonds:
239
+ ring_bonds[edge] = next_ring_digit
240
+ next_ring_digit += 1
241
+ digit = ring_bonds[edge]
242
+ bsym = _bond_symbol(mol, idx, nb)
243
+ if digit < 10:
244
+ parts.append(f"{bsym}{digit}")
245
+ else:
246
+ parts.append(f"{bsym}%{digit:02d}")
247
+
248
+ def _dfs(idx: int, parent: int | None = None) -> None:
249
+ visited.add(idx)
250
+ parts.append(_atom_str(idx))
251
+
252
+ # Classify neighbours into ring closures and tree children
253
+ nbs = _heavy_neighbors(idx)
254
+ parent_consumed = False
255
+ ring_nbs: list[int] = []
256
+ unvisited_nbs: list[int] = []
257
+
258
+ for nb in nbs:
259
+ if nb == parent and not parent_consumed:
260
+ # Skip the tree edge we arrived on (consume once)
261
+ parent_consumed = True
262
+ continue
263
+ if nb in visited:
264
+ ring_nbs.append(nb)
265
+ else:
266
+ unvisited_nbs.append(nb)
267
+
268
+ # Emit ring closure digits at this atom
269
+ for nb in ring_nbs:
270
+ _emit_ring_closure(idx, nb)
271
+
272
+ if not unvisited_nbs:
273
+ return
274
+
275
+ # Process children. All except the last become branches
276
+ # (wrapped in parentheses); the last is the main chain.
277
+ # After each branch, re-check whether later children have
278
+ # been visited (they may have been reached through a ring
279
+ # inside the branch -- in that case emit the ring closure
280
+ # digit at this atom's position).
281
+ remaining = list(unvisited_nbs)
282
+ while remaining:
283
+ # Promote children visited during a sibling branch to ring
284
+ # closures: their partner already emitted the digit, so we
285
+ # must emit the matching digit here.
286
+ still_unvisited: list[int] = []
287
+ for c in remaining:
288
+ if c in visited:
289
+ edge = (min(idx, c), max(idx, c))
290
+ if edge in ring_bonds:
291
+ _emit_ring_closure(idx, c)
292
+ # else: already handled or no ring bond -- skip
293
+ else:
294
+ still_unvisited.append(c)
295
+ remaining = still_unvisited
296
+
297
+ if not remaining:
298
+ return
299
+
300
+ if len(remaining) == 1:
301
+ # Last remaining child: main chain, no parentheses
302
+ child = remaining[0]
303
+ bsym = _bond_symbol(mol, idx, child)
304
+ parts.append(bsym)
305
+ _dfs(child, parent=idx)
306
+ return
307
+
308
+ # Branch child (not the last one)
309
+ child = remaining.pop(0)
310
+ if child in visited:
311
+ # Reached through another path; ring closure
312
+ _emit_ring_closure(idx, child)
313
+ else:
314
+ bsym = _bond_symbol(mol, idx, child)
315
+ parts.append(f"({bsym}")
316
+ _dfs(child, parent=idx)
317
+ parts.append(")")
318
+
319
+ _dfs(start)
320
+
321
+ # Also emit ring closure digits for atoms that have back-edge
322
+ # partners not yet annotated. (The partner side is handled when
323
+ # the DFS visits that atom -- it adds the digit there.)
324
+ # In the standard algorithm above, both sides are already handled.
325
+
326
+ return "".join(parts)
327
+
328
+
329
+ # ===================================================================
330
+ # Public API
331
+ # ===================================================================
332
+
333
+ def to_smiles(mol: Molecule) -> str:
334
+ """Convert a Molecule to a SMILES string.
335
+
336
+ Hydrogen atoms are omitted (they are implicit in SMILES notation).
337
+ Multi-fragment molecules are joined with ``.`` separators.
338
+
339
+ Parameters
340
+ ----------
341
+ mol : Molecule
342
+ A molecule built with the ``molbuilder`` framework.
343
+
344
+ Returns
345
+ -------
346
+ str
347
+ A SMILES string representing the molecule.
348
+
349
+ Examples
350
+ --------
351
+ >>> from molbuilder.smiles.parser import parse
352
+ >>> mol = parse("CCO")
353
+ >>> to_smiles(mol) # may return "CCO" or "OCC" depending on canonicalization
354
+ '...'
355
+ """
356
+ if not mol.atoms:
357
+ return ""
358
+
359
+ order = _morgan_canonical_order(mol)
360
+ components = _connected_components(mol)
361
+
362
+ if not components:
363
+ return ""
364
+
365
+ # Sort components for deterministic output (largest first,
366
+ # then by lowest canonical rank of their start atom)
367
+ components.sort(key=lambda c: (-len(c), min(order.get(i, 0) for i in c)))
368
+
369
+ fragments = []
370
+ for comp in components:
371
+ smi = _dfs_smiles(mol, order, comp)
372
+ if smi:
373
+ fragments.append(smi)
374
+
375
+ return ".".join(fragments)
@@ -0,0 +1 @@
1
+ """Visualization: Bohr models, orbital clouds, molecule rendering."""