molbuilder 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. molbuilder/__init__.py +8 -0
  2. molbuilder/__main__.py +6 -0
  3. molbuilder/atomic/__init__.py +4 -0
  4. molbuilder/atomic/bohr.py +235 -0
  5. molbuilder/atomic/quantum_atom.py +334 -0
  6. molbuilder/atomic/quantum_numbers.py +196 -0
  7. molbuilder/atomic/wavefunctions.py +297 -0
  8. molbuilder/bonding/__init__.py +4 -0
  9. molbuilder/bonding/covalent.py +442 -0
  10. molbuilder/bonding/lewis.py +347 -0
  11. molbuilder/bonding/vsepr.py +433 -0
  12. molbuilder/cli/__init__.py +1 -0
  13. molbuilder/cli/demos.py +516 -0
  14. molbuilder/cli/menu.py +127 -0
  15. molbuilder/cli/wizard.py +831 -0
  16. molbuilder/core/__init__.py +6 -0
  17. molbuilder/core/bond_data.py +170 -0
  18. molbuilder/core/constants.py +51 -0
  19. molbuilder/core/element_properties.py +183 -0
  20. molbuilder/core/elements.py +181 -0
  21. molbuilder/core/geometry.py +232 -0
  22. molbuilder/gui/__init__.py +2 -0
  23. molbuilder/gui/app.py +286 -0
  24. molbuilder/gui/canvas3d.py +115 -0
  25. molbuilder/gui/dialogs.py +117 -0
  26. molbuilder/gui/event_handler.py +118 -0
  27. molbuilder/gui/sidebar.py +105 -0
  28. molbuilder/gui/toolbar.py +71 -0
  29. molbuilder/io/__init__.py +1 -0
  30. molbuilder/io/json_io.py +146 -0
  31. molbuilder/io/mol_sdf.py +169 -0
  32. molbuilder/io/pdb.py +184 -0
  33. molbuilder/io/smiles_io.py +47 -0
  34. molbuilder/io/xyz.py +103 -0
  35. molbuilder/molecule/__init__.py +2 -0
  36. molbuilder/molecule/amino_acids.py +919 -0
  37. molbuilder/molecule/builders.py +257 -0
  38. molbuilder/molecule/conformations.py +70 -0
  39. molbuilder/molecule/functional_groups.py +484 -0
  40. molbuilder/molecule/graph.py +712 -0
  41. molbuilder/molecule/peptides.py +13 -0
  42. molbuilder/molecule/stereochemistry.py +6 -0
  43. molbuilder/process/__init__.py +3 -0
  44. molbuilder/process/conditions.py +260 -0
  45. molbuilder/process/costing.py +316 -0
  46. molbuilder/process/purification.py +285 -0
  47. molbuilder/process/reactor.py +297 -0
  48. molbuilder/process/safety.py +476 -0
  49. molbuilder/process/scale_up.py +427 -0
  50. molbuilder/process/solvent_systems.py +204 -0
  51. molbuilder/reactions/__init__.py +3 -0
  52. molbuilder/reactions/functional_group_detect.py +728 -0
  53. molbuilder/reactions/knowledge_base.py +1716 -0
  54. molbuilder/reactions/reaction_types.py +102 -0
  55. molbuilder/reactions/reagent_data.py +1248 -0
  56. molbuilder/reactions/retrosynthesis.py +1430 -0
  57. molbuilder/reactions/synthesis_route.py +377 -0
  58. molbuilder/reports/__init__.py +158 -0
  59. molbuilder/reports/cost_report.py +206 -0
  60. molbuilder/reports/molecule_report.py +279 -0
  61. molbuilder/reports/safety_report.py +296 -0
  62. molbuilder/reports/synthesis_report.py +283 -0
  63. molbuilder/reports/text_formatter.py +170 -0
  64. molbuilder/smiles/__init__.py +4 -0
  65. molbuilder/smiles/parser.py +487 -0
  66. molbuilder/smiles/tokenizer.py +291 -0
  67. molbuilder/smiles/writer.py +375 -0
  68. molbuilder/visualization/__init__.py +1 -0
  69. molbuilder/visualization/bohr_viz.py +166 -0
  70. molbuilder/visualization/molecule_viz.py +368 -0
  71. molbuilder/visualization/quantum_viz.py +434 -0
  72. molbuilder/visualization/theme.py +12 -0
  73. molbuilder-1.0.0.dist-info/METADATA +360 -0
  74. molbuilder-1.0.0.dist-info/RECORD +78 -0
  75. molbuilder-1.0.0.dist-info/WHEEL +5 -0
  76. molbuilder-1.0.0.dist-info/entry_points.txt +2 -0
  77. molbuilder-1.0.0.dist-info/licenses/LICENSE +21 -0
  78. molbuilder-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,728 @@
1
+ """Functional group detection via subgraph pattern matching.
2
+
3
+ This module walks the atoms and bonds of a ``Molecule`` object and
4
+ identifies common organic functional groups by inspecting the local
5
+ neighbourhood of each atom. The detection is heuristic (based on
6
+ connectivity and bond order) rather than relying on SMARTS matching
7
+ so that it works with the graph representation already available in
8
+ the ``molbuilder.molecule.graph`` module.
9
+ """
10
+
11
+ from __future__ import annotations
12
+ from dataclasses import dataclass, field
13
+ from molbuilder.molecule.graph import Molecule, Hybridization
14
+
15
+
16
+ # =====================================================================
17
+ # Data class
18
+ # =====================================================================
19
+
20
+ @dataclass
21
+ class FunctionalGroup:
22
+ """A detected functional group occurrence in a molecule.
23
+
24
+ Attributes
25
+ ----------
26
+ name : str
27
+ Human-readable name (e.g. ``"alcohol"``, ``"ketone"``).
28
+ smarts_like : str
29
+ Simplified pattern description for display.
30
+ atoms : list[int]
31
+ All atom indices participating in the group.
32
+ center : int
33
+ Primary (most characteristic) atom index.
34
+ """
35
+ name: str
36
+ smarts_like: str
37
+ atoms: list[int] = field(default_factory=list)
38
+ center: int = -1
39
+
40
+ def __repr__(self) -> str:
41
+ return f"FunctionalGroup({self.name!r}, center={self.center})"
42
+
43
+
44
+ # =====================================================================
45
+ # Helper utilities
46
+ # =====================================================================
47
+
48
+
49
+ # Standard valences for implicit hydrogen inference (same data as
50
+ # smiles.tokenizer.DEFAULT_VALENCE but upper-cased and used here so
51
+ # that FG detection does not depend on the SMILES subpackage).
52
+ _STANDARD_VALENCE: dict[str, list[int]] = {
53
+ "B": [3], "C": [4], "N": [3, 5], "O": [2], "P": [3, 5],
54
+ "S": [2, 4, 6], "F": [1], "Cl": [1], "Br": [1], "I": [1],
55
+ }
56
+
57
+
58
+ def _element(mol: Molecule, idx: int) -> str:
59
+ """Return the element symbol of atom *idx* (upper-cased first letter)."""
60
+ return mol.atoms[idx].symbol
61
+
62
+
63
+ def _neighbors(mol: Molecule, idx: int) -> list[int]:
64
+ """Return indices of atoms bonded to *idx*."""
65
+ return mol.neighbors(idx)
66
+
67
+
68
+ def _neighbor_elements(mol: Molecule, idx: int) -> list[str]:
69
+ """Return element symbols of all neighbours of atom *idx*."""
70
+ return [_element(mol, n) for n in _neighbors(mol, idx)]
71
+
72
+
73
+ def _bond_order(mol: Molecule, i: int, j: int) -> float:
74
+ """Return the bond order between atoms *i* and *j*, or 0.0 if no bond."""
75
+ bond = mol.get_bond(i, j)
76
+ return bond.order if bond is not None else 0.0
77
+
78
+
79
+ def _sum_bond_orders(mol: Molecule, idx: int) -> int:
80
+ """Sum of bond orders for all bonds on atom *idx*."""
81
+ total = 0
82
+ for n in _neighbors(mol, idx):
83
+ total += int(_bond_order(mol, idx, n))
84
+ return total
85
+
86
+
87
+ def _h_count(mol: Molecule, idx: int) -> int:
88
+ """Total hydrogen count on atom *idx* (explicit + implicit).
89
+
90
+ Explicit H atoms are counted from the neighbour list. Implicit H
91
+ atoms are inferred from standard valence rules when the molecule
92
+ has fewer explicit neighbours than expected. This makes FG
93
+ detection work regardless of whether H atoms are represented as
94
+ explicit nodes in the graph (SMILES-built molecules) or are
95
+ absent (PDB/XYZ imports without H).
96
+ """
97
+ explicit_h = sum(1 for e in _neighbor_elements(mol, idx) if e == "H")
98
+ if explicit_h > 0:
99
+ return explicit_h
100
+
101
+ # No explicit H found -- infer from valence rules
102
+ sym = _element(mol, idx)
103
+ valences = _STANDARD_VALENCE.get(sym)
104
+ if valences is None:
105
+ return 0
106
+ bond_order_sum = _sum_bond_orders(mol, idx)
107
+ # Pick the smallest standard valence that accommodates current bonds
108
+ for v in valences:
109
+ implicit = v - bond_order_sum
110
+ if implicit >= 0:
111
+ return implicit
112
+ return 0
113
+
114
+
115
+ def _has_h(mol: Molecule, idx: int) -> bool:
116
+ """Return True if atom *idx* has at least one hydrogen (explicit or implicit)."""
117
+ return _h_count(mol, idx) > 0
118
+
119
+
120
+ def _count_element_neighbors(mol: Molecule, idx: int, elem: str) -> int:
121
+ """Count how many neighbours of *idx* have element *elem*."""
122
+ return sum(1 for e in _neighbor_elements(mol, idx) if e == elem)
123
+
124
+
125
+ def _double_bonded_to(mol: Molecule, idx: int, elem: str) -> list[int]:
126
+ """Return neighbour indices that are *elem* and double-bonded to *idx*."""
127
+ result = []
128
+ for n in _neighbors(mol, idx):
129
+ if _element(mol, n) == elem and _bond_order(mol, idx, n) == 2.0:
130
+ result.append(n)
131
+ return result
132
+
133
+
134
+ def _single_bonded_to(mol: Molecule, idx: int, elem: str) -> list[int]:
135
+ """Return neighbour indices that are *elem* and single-bonded to *idx*."""
136
+ result = []
137
+ for n in _neighbors(mol, idx):
138
+ if _element(mol, n) == elem and _bond_order(mol, idx, n) == 1.0:
139
+ result.append(n)
140
+ return result
141
+
142
+
143
+ def _triple_bonded_to(mol: Molecule, idx: int, elem: str) -> list[int]:
144
+ """Return neighbour indices that are *elem* and triple-bonded to *idx*."""
145
+ result = []
146
+ for n in _neighbors(mol, idx):
147
+ if _element(mol, n) == elem and _bond_order(mol, idx, n) == 3.0:
148
+ result.append(n)
149
+ return result
150
+
151
+
152
+ # =====================================================================
153
+ # Master dispatcher
154
+ # =====================================================================
155
+
156
+ def detect_functional_groups(mol: Molecule) -> list[FunctionalGroup]:
157
+ """Detect all recognisable functional groups in *mol*.
158
+
159
+ Returns a list of ``FunctionalGroup`` instances, one per occurrence.
160
+ The same atom may appear in more than one group (e.g. an ester
161
+ contains both a C=O and a C-O-C linkage).
162
+ """
163
+ groups: list[FunctionalGroup] = []
164
+ groups.extend(_detect_carboxylic_acids(mol))
165
+ groups.extend(_detect_esters(mol))
166
+ groups.extend(_detect_amides(mol))
167
+ groups.extend(_detect_aldehydes(mol))
168
+ groups.extend(_detect_ketones(mol))
169
+ groups.extend(_detect_alcohols(mol))
170
+ groups.extend(_detect_amines(mol))
171
+ groups.extend(_detect_alkyl_halides(mol))
172
+ groups.extend(_detect_alkenes(mol))
173
+ groups.extend(_detect_alkynes(mol))
174
+ groups.extend(_detect_ethers(mol))
175
+ groups.extend(_detect_thiols(mol))
176
+ groups.extend(_detect_nitriles(mol))
177
+ groups.extend(_detect_nitro(mol))
178
+ groups.extend(_detect_aromatic_rings(mol))
179
+ groups.extend(_detect_epoxides(mol))
180
+ groups.extend(_detect_acid_chlorides(mol))
181
+ groups.extend(_detect_anhydrides(mol))
182
+ groups.extend(_detect_sulfoxides(mol))
183
+ groups.extend(_detect_sulfones(mol))
184
+ groups.extend(_detect_imines(mol))
185
+ return groups
186
+
187
+
188
+ # =====================================================================
189
+ # Individual detectors
190
+ # =====================================================================
191
+
192
+ def _detect_alcohols(mol: Molecule) -> list[FunctionalGroup]:
193
+ """Alcohol: O bonded to C with an H (explicit or implicit).
194
+
195
+ The O must be single-bonded to C and not part of a C=O or ester
196
+ linkage. Works with both explicit H in the graph and implicit H
197
+ inferred from valence rules.
198
+ """
199
+ found: list[FunctionalGroup] = []
200
+ for idx, atom in enumerate(mol.atoms):
201
+ if atom.symbol != "O":
202
+ continue
203
+ nbrs = _neighbors(mol, idx)
204
+ elems = [_element(mol, n) for n in nbrs]
205
+
206
+ # Need at least one C neighbour, single-bonded
207
+ c_indices = [nbrs[i] for i, e in enumerate(elems) if e == "C"]
208
+ if not c_indices:
209
+ continue
210
+
211
+ for c_idx in c_indices:
212
+ if _bond_order(mol, idx, c_idx) != 1.0:
213
+ continue
214
+ # Check for H: explicit neighbour OR implicit from valence
215
+ if "H" in elems:
216
+ h_idx = nbrs[elems.index("H")]
217
+ found.append(FunctionalGroup(
218
+ name="alcohol", smarts_like="[C]-[OH]",
219
+ atoms=[c_idx, idx, h_idx], center=idx,
220
+ ))
221
+ break
222
+ elif _h_count(mol, idx) >= 1:
223
+ # Implicit H -- no explicit H atom index to record
224
+ found.append(FunctionalGroup(
225
+ name="alcohol", smarts_like="[C]-[OH]",
226
+ atoms=[c_idx, idx], center=idx,
227
+ ))
228
+ break
229
+ return found
230
+
231
+
232
+ def _detect_aldehydes(mol: Molecule) -> list[FunctionalGroup]:
233
+ """Aldehyde: C=O where C also has an H (terminal carbonyl)."""
234
+ found: list[FunctionalGroup] = []
235
+ for idx, atom in enumerate(mol.atoms):
236
+ if atom.symbol != "C":
237
+ continue
238
+ dbl_o = _double_bonded_to(mol, idx, "O")
239
+ if not dbl_o:
240
+ continue
241
+ if _has_h(mol, idx):
242
+ o_idx = dbl_o[0]
243
+ found.append(FunctionalGroup(
244
+ name="aldehyde", smarts_like="[CX3H1](=O)",
245
+ atoms=[idx, o_idx], center=idx,
246
+ ))
247
+ return found
248
+
249
+
250
+ def _detect_ketones(mol: Molecule) -> list[FunctionalGroup]:
251
+ """Ketone: C=O where C is bonded to two other carbons (no H, no O-single)."""
252
+ found: list[FunctionalGroup] = []
253
+ for idx, atom in enumerate(mol.atoms):
254
+ if atom.symbol != "C":
255
+ continue
256
+ dbl_o = _double_bonded_to(mol, idx, "O")
257
+ if not dbl_o:
258
+ continue
259
+ # Must not have H on carbonyl C (that would be aldehyde)
260
+ if _has_h(mol, idx):
261
+ continue
262
+ # The other two neighbours should both be C
263
+ c_nbrs = _single_bonded_to(mol, idx, "C")
264
+ if len(c_nbrs) >= 2:
265
+ o_idx = dbl_o[0]
266
+ found.append(FunctionalGroup(
267
+ name="ketone", smarts_like="[CX3](=O)([C])[C]",
268
+ atoms=[idx, o_idx] + c_nbrs[:2], center=idx,
269
+ ))
270
+ return found
271
+
272
+
273
+ def _detect_carboxylic_acids(mol: Molecule) -> list[FunctionalGroup]:
274
+ """Carboxylic acid: C with C=O and C-OH."""
275
+ found: list[FunctionalGroup] = []
276
+ for idx, atom in enumerate(mol.atoms):
277
+ if atom.symbol != "C":
278
+ continue
279
+ dbl_o = _double_bonded_to(mol, idx, "O")
280
+ sgl_o = _single_bonded_to(mol, idx, "O")
281
+ if dbl_o and sgl_o:
282
+ # Check that the single-bonded O has an H
283
+ for o_idx in sgl_o:
284
+ if _has_h(mol, o_idx):
285
+ found.append(FunctionalGroup(
286
+ name="carboxylic_acid",
287
+ smarts_like="[CX3](=O)[OH]",
288
+ atoms=[idx, dbl_o[0], o_idx], center=idx,
289
+ ))
290
+ break
291
+ return found
292
+
293
+
294
+ def _detect_esters(mol: Molecule) -> list[FunctionalGroup]:
295
+ """Ester: C(=O)-O-C where the single-bonded O has no H."""
296
+ found: list[FunctionalGroup] = []
297
+ for idx, atom in enumerate(mol.atoms):
298
+ if atom.symbol != "C":
299
+ continue
300
+ dbl_o = _double_bonded_to(mol, idx, "O")
301
+ sgl_o = _single_bonded_to(mol, idx, "O")
302
+ if dbl_o and sgl_o:
303
+ for o_idx in sgl_o:
304
+ if not _has_h(mol, o_idx):
305
+ # Check that O is also bonded to a C (ester, not anhydride check)
306
+ o_c_nbrs = _single_bonded_to(mol, o_idx, "C")
307
+ other_c = [c for c in o_c_nbrs if c != idx]
308
+ if other_c:
309
+ found.append(FunctionalGroup(
310
+ name="ester",
311
+ smarts_like="[CX3](=O)[O][C]",
312
+ atoms=[idx, dbl_o[0], o_idx, other_c[0]],
313
+ center=idx,
314
+ ))
315
+ break
316
+ return found
317
+
318
+
319
+ def _detect_amides(mol: Molecule) -> list[FunctionalGroup]:
320
+ """Amide: C(=O)-N."""
321
+ found: list[FunctionalGroup] = []
322
+ for idx, atom in enumerate(mol.atoms):
323
+ if atom.symbol != "C":
324
+ continue
325
+ dbl_o = _double_bonded_to(mol, idx, "O")
326
+ sgl_n = _single_bonded_to(mol, idx, "N")
327
+ if dbl_o and sgl_n:
328
+ found.append(FunctionalGroup(
329
+ name="amide", smarts_like="[CX3](=O)[NX3]",
330
+ atoms=[idx, dbl_o[0], sgl_n[0]], center=idx,
331
+ ))
332
+ return found
333
+
334
+
335
+ def _detect_amines(mol: Molecule) -> list[FunctionalGroup]:
336
+ """Primary, secondary, and tertiary amines (not amides)."""
337
+ found: list[FunctionalGroup] = []
338
+ for idx, atom in enumerate(mol.atoms):
339
+ if atom.symbol != "N":
340
+ continue
341
+ # Skip if N is part of an amide (bonded to a carbonyl C)
342
+ is_amide = False
343
+ for c_idx in _single_bonded_to(mol, idx, "C"):
344
+ if _double_bonded_to(mol, c_idx, "O"):
345
+ is_amide = True
346
+ break
347
+ if is_amide:
348
+ continue
349
+
350
+ c_count = _count_element_neighbors(mol, idx, "C")
351
+ h_count = _h_count(mol, idx)
352
+
353
+ if c_count == 1 and h_count == 2:
354
+ found.append(FunctionalGroup(
355
+ name="primary_amine", smarts_like="[NX3H2][C]",
356
+ atoms=[idx] + _single_bonded_to(mol, idx, "C"),
357
+ center=idx,
358
+ ))
359
+ elif c_count == 2 and h_count == 1:
360
+ found.append(FunctionalGroup(
361
+ name="secondary_amine", smarts_like="[NX3H1]([C])[C]",
362
+ atoms=[idx] + _single_bonded_to(mol, idx, "C"),
363
+ center=idx,
364
+ ))
365
+ elif c_count == 3 and h_count == 0:
366
+ found.append(FunctionalGroup(
367
+ name="tertiary_amine", smarts_like="[NX3]([C])([C])[C]",
368
+ atoms=[idx] + _single_bonded_to(mol, idx, "C"),
369
+ center=idx,
370
+ ))
371
+ return found
372
+
373
+
374
+ def _detect_alkyl_halides(mol: Molecule) -> list[FunctionalGroup]:
375
+ """Alkyl halide: C bonded to F, Cl, Br, or I."""
376
+ halogens = {"F", "Cl", "Br", "I"}
377
+ found: list[FunctionalGroup] = []
378
+ for idx, atom in enumerate(mol.atoms):
379
+ if atom.symbol != "C":
380
+ continue
381
+ for n in _neighbors(mol, idx):
382
+ if _element(mol, n) in halogens and _bond_order(mol, idx, n) == 1.0:
383
+ hal = _element(mol, n)
384
+ found.append(FunctionalGroup(
385
+ name=f"alkyl_halide_{hal.lower()}",
386
+ smarts_like=f"[C][{hal}]",
387
+ atoms=[idx, n], center=idx,
388
+ ))
389
+ return found
390
+
391
+
392
+ def _detect_alkenes(mol: Molecule) -> list[FunctionalGroup]:
393
+ """Alkene: C=C double bond."""
394
+ found: list[FunctionalGroup] = []
395
+ seen: set[tuple[int, int]] = set()
396
+ for idx, atom in enumerate(mol.atoms):
397
+ if atom.symbol != "C":
398
+ continue
399
+ for n in _double_bonded_to(mol, idx, "C"):
400
+ pair = (min(idx, n), max(idx, n))
401
+ if pair not in seen:
402
+ seen.add(pair)
403
+ found.append(FunctionalGroup(
404
+ name="alkene", smarts_like="[C]=[C]",
405
+ atoms=list(pair), center=pair[0],
406
+ ))
407
+ return found
408
+
409
+
410
+ def _detect_alkynes(mol: Molecule) -> list[FunctionalGroup]:
411
+ """Alkyne: C#C triple bond."""
412
+ found: list[FunctionalGroup] = []
413
+ seen: set[tuple[int, int]] = set()
414
+ for idx, atom in enumerate(mol.atoms):
415
+ if atom.symbol != "C":
416
+ continue
417
+ for n in _triple_bonded_to(mol, idx, "C"):
418
+ pair = (min(idx, n), max(idx, n))
419
+ if pair not in seen:
420
+ seen.add(pair)
421
+ found.append(FunctionalGroup(
422
+ name="alkyne", smarts_like="[C]#[C]",
423
+ atoms=list(pair), center=pair[0],
424
+ ))
425
+ return found
426
+
427
+
428
+ def _detect_ethers(mol: Molecule) -> list[FunctionalGroup]:
429
+ """Ether: C-O-C (oxygen single-bonded to two carbons, no C=O on either)."""
430
+ found: list[FunctionalGroup] = []
431
+ for idx, atom in enumerate(mol.atoms):
432
+ if atom.symbol != "O":
433
+ continue
434
+ c_nbrs = _single_bonded_to(mol, idx, "C")
435
+ if len(c_nbrs) != 2:
436
+ continue
437
+ # Exclude if either C has a C=O (that would be ester)
438
+ is_ester = any(_double_bonded_to(mol, c, "O") for c in c_nbrs)
439
+ if is_ester:
440
+ continue
441
+ found.append(FunctionalGroup(
442
+ name="ether", smarts_like="[C]-[O]-[C]",
443
+ atoms=[c_nbrs[0], idx, c_nbrs[1]], center=idx,
444
+ ))
445
+ return found
446
+
447
+
448
+ def _detect_thiols(mol: Molecule) -> list[FunctionalGroup]:
449
+ """Thiol: S bonded to C with an H (explicit or implicit)."""
450
+ found: list[FunctionalGroup] = []
451
+ for idx, atom in enumerate(mol.atoms):
452
+ if atom.symbol != "S":
453
+ continue
454
+ nbrs = _neighbors(mol, idx)
455
+ elems = [_element(mol, n) for n in nbrs]
456
+ c_indices = [nbrs[i] for i, e in enumerate(elems) if e == "C"]
457
+ if not c_indices:
458
+ continue
459
+ if _has_h(mol, idx):
460
+ c_idx = c_indices[0]
461
+ found.append(FunctionalGroup(
462
+ name="thiol", smarts_like="[C]-[SH]",
463
+ atoms=[c_idx, idx], center=idx,
464
+ ))
465
+ return found
466
+
467
+
468
+ def _detect_nitriles(mol: Molecule) -> list[FunctionalGroup]:
469
+ """Nitrile: C#N triple bond."""
470
+ found: list[FunctionalGroup] = []
471
+ for idx, atom in enumerate(mol.atoms):
472
+ if atom.symbol != "C":
473
+ continue
474
+ for n in _triple_bonded_to(mol, idx, "N"):
475
+ found.append(FunctionalGroup(
476
+ name="nitrile", smarts_like="[C]#[N]",
477
+ atoms=[idx, n], center=idx,
478
+ ))
479
+ return found
480
+
481
+
482
+ def _detect_nitro(mol: Molecule) -> list[FunctionalGroup]:
483
+ """Nitro group: N bonded to two O atoms with at least one N=O."""
484
+ found: list[FunctionalGroup] = []
485
+ for idx, atom in enumerate(mol.atoms):
486
+ if atom.symbol != "N":
487
+ continue
488
+ dbl_o = _double_bonded_to(mol, idx, "O")
489
+ sgl_o = _single_bonded_to(mol, idx, "O")
490
+ total_o = len(dbl_o) + len(sgl_o)
491
+ if total_o >= 2 and len(dbl_o) >= 1:
492
+ found.append(FunctionalGroup(
493
+ name="nitro", smarts_like="[N](=O)[O]",
494
+ atoms=[idx] + dbl_o + sgl_o, center=idx,
495
+ ))
496
+ return found
497
+
498
+
499
+ def _detect_aromatic_rings(mol: Molecule) -> list[FunctionalGroup]:
500
+ """Simplified aromatic ring detection.
501
+
502
+ Looks for six-membered rings composed entirely of carbons (or with
503
+ one nitrogen for pyridine) where all ring bonds have order >= 1.5
504
+ (aromatic) *or* alternating single/double bonds that form a
505
+ conjugated cycle.
506
+
507
+ This uses a breadth-first ring-finding approach limited to length 6.
508
+ """
509
+ found: list[FunctionalGroup] = []
510
+ n_atoms = len(mol.atoms)
511
+ seen_rings: set[tuple[int, ...]] = set()
512
+
513
+ for start in range(n_atoms):
514
+ if _element(mol, start) not in ("C", "N", "O", "S"):
515
+ continue
516
+ # BFS / DFS for 6-membered rings from start
517
+ rings = _find_rings_of_size(mol, start, 6)
518
+ # Also search for 5-membered rings (furan, thiophene, pyrrole, etc.)
519
+ rings += _find_rings_of_size(mol, start, 5)
520
+ for ring in rings:
521
+ canon = _canonicalise_ring(ring)
522
+ if canon in seen_rings:
523
+ continue
524
+ seen_rings.add(canon)
525
+ # Check that ring is plausibly aromatic
526
+ if _ring_is_aromatic(mol, ring):
527
+ found.append(FunctionalGroup(
528
+ name="aromatic_ring",
529
+ smarts_like="c1ccccc1",
530
+ atoms=list(ring), center=ring[0],
531
+ ))
532
+ return found
533
+
534
+
535
+ def _find_rings_of_size(mol: Molecule, start: int, size: int) -> list[tuple[int, ...]]:
536
+ """Return all simple rings of exactly *size* atoms that include *start*.
537
+
538
+ Uses iterative depth-limited DFS. To keep cost manageable the
539
+ search only proceeds through C and N atoms.
540
+ """
541
+ results: list[tuple[int, ...]] = []
542
+ allowed = {"C", "N", "O", "S"}
543
+ # stack entries: (current_atom, path_so_far)
544
+ stack: list[tuple[int, list[int]]] = [(start, [start])]
545
+ while stack:
546
+ current, path = stack.pop()
547
+ if len(path) == size:
548
+ # Check if we can close the ring back to start
549
+ if start in [n for n in _neighbors(mol, current)]:
550
+ results.append(tuple(path))
551
+ continue
552
+ for nbr in _neighbors(mol, current):
553
+ if nbr == start and len(path) >= 3:
554
+ # Early closure -- ring smaller than *size*; skip
555
+ continue
556
+ if nbr in path:
557
+ continue
558
+ if _element(mol, nbr) not in allowed:
559
+ continue
560
+ stack.append((nbr, path + [nbr]))
561
+ return results
562
+
563
+
564
+ def _canonicalise_ring(ring: tuple[int, ...]) -> tuple[int, ...]:
565
+ """Return a canonical form for a ring so that rotations/reflections match."""
566
+ min_val = min(ring)
567
+ min_idx = ring.index(min_val)
568
+ forward = ring[min_idx:] + ring[:min_idx]
569
+ backward = (ring[min_idx],) + tuple(reversed(ring[:min_idx])) + tuple(reversed(ring[min_idx + 1:]))
570
+ return min(forward, backward)
571
+
572
+
573
+ def _ring_is_aromatic(mol: Molecule, ring: tuple[int, ...]) -> bool:
574
+ """Heuristically decide if a ring is aromatic.
575
+
576
+ A ring is considered aromatic if any of:
577
+ - All bond orders are >= 1.5 (explicit aromatic annotation), **or**
578
+ - The ring consists of alternating single (1.0) and double (2.0)
579
+ bonds forming a fully conjugated system, **or**
580
+ - All ring atoms have SP2 hybridization (aromatic SMILES atoms are
581
+ assigned SP2 by the parser even though bonds are stored as order 1;
582
+ this catches furan, thiophene, pyrrole and other heteroaromatics).
583
+ """
584
+ n = len(ring)
585
+ orders = []
586
+ for i in range(n):
587
+ a, b = ring[i], ring[(i + 1) % n]
588
+ orders.append(_bond_order(mol, a, b))
589
+
590
+ # All aromatic-annotated bonds
591
+ if all(o >= 1.5 for o in orders):
592
+ return True
593
+
594
+ # Alternating single/double
595
+ if all(o in (1.0, 2.0) for o in orders):
596
+ alternating = all(orders[i] != orders[(i + 1) % n] for i in range(n))
597
+ if alternating:
598
+ return True
599
+
600
+ # All atoms SP2-hybridized (aromatic SMILES atoms, or conjugated rings)
601
+ if all(mol.atoms[idx].hybridization == Hybridization.SP2 for idx in ring):
602
+ return True
603
+
604
+ return False
605
+
606
+
607
+ def _detect_epoxides(mol: Molecule) -> list[FunctionalGroup]:
608
+ """Epoxide: 3-membered ring containing one O and two C atoms."""
609
+ found: list[FunctionalGroup] = []
610
+ seen_rings: set[tuple[int, ...]] = set()
611
+
612
+ for idx, atom in enumerate(mol.atoms):
613
+ if atom.symbol != "O":
614
+ continue
615
+ c_nbrs = _single_bonded_to(mol, idx, "C")
616
+ if len(c_nbrs) < 2:
617
+ continue
618
+ # Check each pair of C neighbours for a bond between them
619
+ for i in range(len(c_nbrs)):
620
+ for j in range(i + 1, len(c_nbrs)):
621
+ c1, c2 = c_nbrs[i], c_nbrs[j]
622
+ if _bond_order(mol, c1, c2) > 0:
623
+ canon = _canonicalise_ring((idx, c1, c2))
624
+ if canon not in seen_rings:
625
+ seen_rings.add(canon)
626
+ found.append(FunctionalGroup(
627
+ name="epoxide",
628
+ smarts_like="C1OC1",
629
+ atoms=[c1, idx, c2], center=idx,
630
+ ))
631
+ return found
632
+
633
+
634
+ def _detect_acid_chlorides(mol: Molecule) -> list[FunctionalGroup]:
635
+ """Acid chloride (acyl chloride): C(=O)Cl."""
636
+ found: list[FunctionalGroup] = []
637
+ for idx, atom in enumerate(mol.atoms):
638
+ if atom.symbol != "C":
639
+ continue
640
+ dbl_o = _double_bonded_to(mol, idx, "O")
641
+ sgl_cl = _single_bonded_to(mol, idx, "Cl")
642
+ if dbl_o and sgl_cl:
643
+ found.append(FunctionalGroup(
644
+ name="acid_chloride", smarts_like="[CX3](=O)[Cl]",
645
+ atoms=[idx, dbl_o[0], sgl_cl[0]], center=idx,
646
+ ))
647
+ return found
648
+
649
+
650
+ def _detect_anhydrides(mol: Molecule) -> list[FunctionalGroup]:
651
+ """Acid anhydride: C(=O)-O-C(=O)."""
652
+ found: list[FunctionalGroup] = []
653
+ seen: set[int] = set()
654
+ for idx, atom in enumerate(mol.atoms):
655
+ if atom.symbol != "O":
656
+ continue
657
+ if idx in seen:
658
+ continue
659
+ c_nbrs = _single_bonded_to(mol, idx, "C")
660
+ if len(c_nbrs) < 2:
661
+ continue
662
+ # Both C neighbours must have a C=O
663
+ carbonyl_cs = [c for c in c_nbrs if _double_bonded_to(mol, c, "O")]
664
+ if len(carbonyl_cs) >= 2:
665
+ c1, c2 = carbonyl_cs[0], carbonyl_cs[1]
666
+ o1 = _double_bonded_to(mol, c1, "O")[0]
667
+ o2 = _double_bonded_to(mol, c2, "O")[0]
668
+ seen.add(idx)
669
+ found.append(FunctionalGroup(
670
+ name="anhydride", smarts_like="[CX3](=O)[O][CX3](=O)",
671
+ atoms=[c1, o1, idx, c2, o2], center=idx,
672
+ ))
673
+ return found
674
+
675
+
676
+ def _detect_sulfoxides(mol: Molecule) -> list[FunctionalGroup]:
677
+ """Sulfoxide: S(=O) bonded to two carbons (no second O=S)."""
678
+ found: list[FunctionalGroup] = []
679
+ for idx, atom in enumerate(mol.atoms):
680
+ if atom.symbol != "S":
681
+ continue
682
+ dbl_o = _double_bonded_to(mol, idx, "O")
683
+ c_nbrs = _single_bonded_to(mol, idx, "C")
684
+ if len(dbl_o) == 1 and len(c_nbrs) >= 2:
685
+ found.append(FunctionalGroup(
686
+ name="sulfoxide", smarts_like="[SX3](=O)([C])[C]",
687
+ atoms=[idx, dbl_o[0]] + c_nbrs[:2], center=idx,
688
+ ))
689
+ return found
690
+
691
+
692
+ def _detect_sulfones(mol: Molecule) -> list[FunctionalGroup]:
693
+ """Sulfone: S(=O)(=O) bonded to two carbons."""
694
+ found: list[FunctionalGroup] = []
695
+ for idx, atom in enumerate(mol.atoms):
696
+ if atom.symbol != "S":
697
+ continue
698
+ dbl_o = _double_bonded_to(mol, idx, "O")
699
+ c_nbrs = _single_bonded_to(mol, idx, "C")
700
+ if len(dbl_o) >= 2 and len(c_nbrs) >= 2:
701
+ found.append(FunctionalGroup(
702
+ name="sulfone", smarts_like="[SX4](=O)(=O)([C])[C]",
703
+ atoms=[idx] + dbl_o[:2] + c_nbrs[:2], center=idx,
704
+ ))
705
+ return found
706
+
707
+
708
+ def _detect_imines(mol: Molecule) -> list[FunctionalGroup]:
709
+ """Imine: C=N (not part of nitrile C#N)."""
710
+ found: list[FunctionalGroup] = []
711
+ seen: set[tuple[int, int]] = set()
712
+ for idx, atom in enumerate(mol.atoms):
713
+ if atom.symbol != "C":
714
+ continue
715
+ for n in _neighbors(mol, idx):
716
+ if _element(mol, n) != "N":
717
+ continue
718
+ if _bond_order(mol, idx, n) != 2.0:
719
+ continue
720
+ pair = (min(idx, n), max(idx, n))
721
+ if pair in seen:
722
+ continue
723
+ seen.add(pair)
724
+ found.append(FunctionalGroup(
725
+ name="imine", smarts_like="[C]=[N]",
726
+ atoms=list(pair), center=idx,
727
+ ))
728
+ return found