molbuilder 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- molbuilder/__init__.py +8 -0
- molbuilder/__main__.py +6 -0
- molbuilder/atomic/__init__.py +4 -0
- molbuilder/atomic/bohr.py +235 -0
- molbuilder/atomic/quantum_atom.py +334 -0
- molbuilder/atomic/quantum_numbers.py +196 -0
- molbuilder/atomic/wavefunctions.py +297 -0
- molbuilder/bonding/__init__.py +4 -0
- molbuilder/bonding/covalent.py +442 -0
- molbuilder/bonding/lewis.py +347 -0
- molbuilder/bonding/vsepr.py +433 -0
- molbuilder/cli/__init__.py +1 -0
- molbuilder/cli/demos.py +516 -0
- molbuilder/cli/menu.py +127 -0
- molbuilder/cli/wizard.py +831 -0
- molbuilder/core/__init__.py +6 -0
- molbuilder/core/bond_data.py +170 -0
- molbuilder/core/constants.py +51 -0
- molbuilder/core/element_properties.py +183 -0
- molbuilder/core/elements.py +181 -0
- molbuilder/core/geometry.py +232 -0
- molbuilder/gui/__init__.py +2 -0
- molbuilder/gui/app.py +286 -0
- molbuilder/gui/canvas3d.py +115 -0
- molbuilder/gui/dialogs.py +117 -0
- molbuilder/gui/event_handler.py +118 -0
- molbuilder/gui/sidebar.py +105 -0
- molbuilder/gui/toolbar.py +71 -0
- molbuilder/io/__init__.py +1 -0
- molbuilder/io/json_io.py +146 -0
- molbuilder/io/mol_sdf.py +169 -0
- molbuilder/io/pdb.py +184 -0
- molbuilder/io/smiles_io.py +47 -0
- molbuilder/io/xyz.py +103 -0
- molbuilder/molecule/__init__.py +2 -0
- molbuilder/molecule/amino_acids.py +919 -0
- molbuilder/molecule/builders.py +257 -0
- molbuilder/molecule/conformations.py +70 -0
- molbuilder/molecule/functional_groups.py +484 -0
- molbuilder/molecule/graph.py +712 -0
- molbuilder/molecule/peptides.py +13 -0
- molbuilder/molecule/stereochemistry.py +6 -0
- molbuilder/process/__init__.py +3 -0
- molbuilder/process/conditions.py +260 -0
- molbuilder/process/costing.py +316 -0
- molbuilder/process/purification.py +285 -0
- molbuilder/process/reactor.py +297 -0
- molbuilder/process/safety.py +476 -0
- molbuilder/process/scale_up.py +427 -0
- molbuilder/process/solvent_systems.py +204 -0
- molbuilder/reactions/__init__.py +3 -0
- molbuilder/reactions/functional_group_detect.py +728 -0
- molbuilder/reactions/knowledge_base.py +1716 -0
- molbuilder/reactions/reaction_types.py +102 -0
- molbuilder/reactions/reagent_data.py +1248 -0
- molbuilder/reactions/retrosynthesis.py +1430 -0
- molbuilder/reactions/synthesis_route.py +377 -0
- molbuilder/reports/__init__.py +158 -0
- molbuilder/reports/cost_report.py +206 -0
- molbuilder/reports/molecule_report.py +279 -0
- molbuilder/reports/safety_report.py +296 -0
- molbuilder/reports/synthesis_report.py +283 -0
- molbuilder/reports/text_formatter.py +170 -0
- molbuilder/smiles/__init__.py +4 -0
- molbuilder/smiles/parser.py +487 -0
- molbuilder/smiles/tokenizer.py +291 -0
- molbuilder/smiles/writer.py +375 -0
- molbuilder/visualization/__init__.py +1 -0
- molbuilder/visualization/bohr_viz.py +166 -0
- molbuilder/visualization/molecule_viz.py +368 -0
- molbuilder/visualization/quantum_viz.py +434 -0
- molbuilder/visualization/theme.py +12 -0
- molbuilder-1.0.0.dist-info/METADATA +360 -0
- molbuilder-1.0.0.dist-info/RECORD +78 -0
- molbuilder-1.0.0.dist-info/WHEEL +5 -0
- molbuilder-1.0.0.dist-info/entry_points.txt +2 -0
- molbuilder-1.0.0.dist-info/licenses/LICENSE +21 -0
- molbuilder-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""ASCII text formatting utilities for report generation.
|
|
2
|
+
|
|
3
|
+
All output is cp1252-safe (pure ASCII printable characters only).
|
|
4
|
+
Used by molecule_report, synthesis_report, safety_report, and cost_report.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import textwrap
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# =====================================================================
|
|
13
|
+
# Section headers
|
|
14
|
+
# =====================================================================
|
|
15
|
+
|
|
16
|
+
def section_header(title: str, width: int = 70, char: str = "=") -> str:
|
|
17
|
+
"""Generate a centered section header with border lines.
|
|
18
|
+
|
|
19
|
+
Example::
|
|
20
|
+
|
|
21
|
+
======================================================================
|
|
22
|
+
MOLECULE REPORT
|
|
23
|
+
======================================================================
|
|
24
|
+
"""
|
|
25
|
+
border = char * width
|
|
26
|
+
padded = title.upper().center(width)
|
|
27
|
+
return "\n".join([border, padded, border])
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def subsection_header(title: str, width: int = 70, char: str = "-") -> str:
|
|
31
|
+
"""Generate a subsection header.
|
|
32
|
+
|
|
33
|
+
Example::
|
|
34
|
+
|
|
35
|
+
--- Atom Composition --------------------------------------------------
|
|
36
|
+
"""
|
|
37
|
+
prefix = char * 3 + " " + title + " "
|
|
38
|
+
return prefix + char * max(0, width - len(prefix))
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# =====================================================================
|
|
42
|
+
# Tables
|
|
43
|
+
# =====================================================================
|
|
44
|
+
|
|
45
|
+
def ascii_table(headers: list[str], rows: list[list[str]],
|
|
46
|
+
alignments: list[str] | None = None,
|
|
47
|
+
min_widths: list[int] | None = None) -> str:
|
|
48
|
+
"""Generate a formatted ASCII table with column alignment.
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
headers : list[str]
|
|
53
|
+
Column header labels.
|
|
54
|
+
rows : list[list[str]]
|
|
55
|
+
Table data (each row is a list of cell strings).
|
|
56
|
+
alignments : list[str] | None
|
|
57
|
+
Per-column alignment: ``'l'`` left, ``'r'`` right, ``'c'`` center.
|
|
58
|
+
Defaults to left-aligned for every column.
|
|
59
|
+
min_widths : list[int] | None
|
|
60
|
+
Minimum column widths. Actual widths expand to fit content.
|
|
61
|
+
"""
|
|
62
|
+
n_cols = len(headers)
|
|
63
|
+
if alignments is None:
|
|
64
|
+
alignments = ["l"] * n_cols
|
|
65
|
+
if min_widths is None:
|
|
66
|
+
min_widths = [0] * n_cols
|
|
67
|
+
|
|
68
|
+
# Compute column widths
|
|
69
|
+
col_widths: list[int] = []
|
|
70
|
+
for c in range(n_cols):
|
|
71
|
+
w = max(len(headers[c]), min_widths[c])
|
|
72
|
+
for row in rows:
|
|
73
|
+
if c < len(row):
|
|
74
|
+
w = max(w, len(str(row[c])))
|
|
75
|
+
col_widths.append(w)
|
|
76
|
+
|
|
77
|
+
def _fmt_cell(text: str, width: int, align: str) -> str:
|
|
78
|
+
if align == "r":
|
|
79
|
+
return text.rjust(width)
|
|
80
|
+
if align == "c":
|
|
81
|
+
return text.center(width)
|
|
82
|
+
return text.ljust(width)
|
|
83
|
+
|
|
84
|
+
sep = " "
|
|
85
|
+
header_line = sep.join(
|
|
86
|
+
_fmt_cell(headers[c], col_widths[c], alignments[c])
|
|
87
|
+
for c in range(n_cols)
|
|
88
|
+
)
|
|
89
|
+
divider = sep.join("-" * col_widths[c] for c in range(n_cols))
|
|
90
|
+
|
|
91
|
+
lines = [header_line, divider]
|
|
92
|
+
for row in rows:
|
|
93
|
+
cells: list[str] = []
|
|
94
|
+
for c in range(n_cols):
|
|
95
|
+
val = str(row[c]) if c < len(row) else ""
|
|
96
|
+
cells.append(_fmt_cell(val, col_widths[c], alignments[c]))
|
|
97
|
+
lines.append(sep.join(cells))
|
|
98
|
+
|
|
99
|
+
return "\n".join(lines)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
# =====================================================================
|
|
103
|
+
# Text utilities
|
|
104
|
+
# =====================================================================
|
|
105
|
+
|
|
106
|
+
def word_wrap(text: str, width: int = 70, indent: int = 0) -> str:
|
|
107
|
+
"""Word-wrap text to the given width with optional indent."""
|
|
108
|
+
prefix = " " * indent
|
|
109
|
+
wrapped = textwrap.fill(
|
|
110
|
+
text, width=width, initial_indent=prefix,
|
|
111
|
+
subsequent_indent=prefix,
|
|
112
|
+
)
|
|
113
|
+
return wrapped
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def bullet_list(items: list[str], indent: int = 2, bullet: str = "-") -> str:
|
|
117
|
+
"""Format items as a bulleted list."""
|
|
118
|
+
prefix = " " * indent + bullet + " "
|
|
119
|
+
subsequent = " " * (indent + len(bullet) + 1)
|
|
120
|
+
lines: list[str] = []
|
|
121
|
+
for item in items:
|
|
122
|
+
wrapped = textwrap.fill(
|
|
123
|
+
item, width=70,
|
|
124
|
+
initial_indent=prefix,
|
|
125
|
+
subsequent_indent=subsequent,
|
|
126
|
+
)
|
|
127
|
+
lines.append(wrapped)
|
|
128
|
+
return "\n".join(lines)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def key_value_block(pairs: list[tuple[str, str]], separator: str = ": ",
|
|
132
|
+
indent: int = 2) -> str:
|
|
133
|
+
"""Format key-value pairs aligned on the separator."""
|
|
134
|
+
if not pairs:
|
|
135
|
+
return ""
|
|
136
|
+
max_key = max(len(k) for k, _ in pairs)
|
|
137
|
+
prefix = " " * indent
|
|
138
|
+
lines: list[str] = []
|
|
139
|
+
for key, value in pairs:
|
|
140
|
+
lines.append(f"{prefix}{key:<{max_key}}{separator}{value}")
|
|
141
|
+
return "\n".join(lines)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# =====================================================================
|
|
145
|
+
# Charts and number formatting
|
|
146
|
+
# =====================================================================
|
|
147
|
+
|
|
148
|
+
def horizontal_bar(value: float, max_value: float, width: int = 40,
|
|
149
|
+
char: str = "#") -> str:
|
|
150
|
+
"""Render a simple horizontal bar chart line.
|
|
151
|
+
|
|
152
|
+
Returns a string of *char* characters proportional to *value / max_value*,
|
|
153
|
+
padded to *width* with spaces.
|
|
154
|
+
"""
|
|
155
|
+
if max_value <= 0:
|
|
156
|
+
filled = 0
|
|
157
|
+
else:
|
|
158
|
+
ratio = max(0.0, min(1.0, value / max_value))
|
|
159
|
+
filled = int(round(ratio * width))
|
|
160
|
+
return char * filled + " " * (width - filled)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def format_currency(amount: float) -> str:
|
|
164
|
+
"""Format as USD with commas: ``$1,234.56``."""
|
|
165
|
+
return "${:,.2f}".format(amount)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def format_percent(value: float, decimals: int = 1) -> str:
|
|
169
|
+
"""Format as percentage: ``85.0%``."""
|
|
170
|
+
return "{:.{d}f}%".format(value, d=decimals)
|
|
@@ -0,0 +1,487 @@
|
|
|
1
|
+
"""SMILES parser: tokens -> Molecule with 3D coordinates.
|
|
2
|
+
|
|
3
|
+
Algorithm:
|
|
4
|
+
1. Parse tokens using a stack for branch tracking and a dict for ring closures
|
|
5
|
+
2. Build connectivity graph (atoms + bonds)
|
|
6
|
+
3. Add implicit hydrogens based on standard valence
|
|
7
|
+
4. Assign 3D coordinates via BFS z-matrix placement
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import math
|
|
13
|
+
from collections import deque
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
from molbuilder.smiles.tokenizer import (
|
|
18
|
+
tokenize, Token, TokenType, DEFAULT_VALENCE, ORGANIC_SUBSET, AROMATIC_ATOMS,
|
|
19
|
+
)
|
|
20
|
+
from molbuilder.molecule.graph import Molecule, Hybridization
|
|
21
|
+
from molbuilder.core.bond_data import bond_length, SP3_ANGLE, SP2_ANGLE, SP_ANGLE
|
|
22
|
+
from molbuilder.core.geometry import (
|
|
23
|
+
normalize, place_atom_zmatrix, available_tetrahedral_dirs,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# ===================================================================
|
|
28
|
+
# Bond order from SMILES bond symbol
|
|
29
|
+
# ===================================================================
|
|
30
|
+
|
|
31
|
+
_BOND_ORDER = {
|
|
32
|
+
"-": 1,
|
|
33
|
+
"=": 2,
|
|
34
|
+
"#": 3,
|
|
35
|
+
":": 1, # aromatic bond treated as order 1 for connectivity
|
|
36
|
+
"/": 1, # E/Z bond direction indicator (single bond connectivity)
|
|
37
|
+
"\\": 1, # E/Z bond direction indicator (single bond connectivity)
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ===================================================================
|
|
42
|
+
# Internal data structures used during graph construction
|
|
43
|
+
# ===================================================================
|
|
44
|
+
|
|
45
|
+
class _AtomInfo:
|
|
46
|
+
"""Lightweight bookkeeping record for an atom during parsing."""
|
|
47
|
+
|
|
48
|
+
__slots__ = ("index", "symbol", "aromatic", "bracket",
|
|
49
|
+
"isotope", "hcount", "charge", "chirality")
|
|
50
|
+
|
|
51
|
+
def __init__(self, index: int, symbol: str, aromatic: bool = False,
|
|
52
|
+
bracket: bool = False, isotope: int | None = None,
|
|
53
|
+
hcount: int | None = None, charge: int = 0,
|
|
54
|
+
chirality: str | None = None):
|
|
55
|
+
self.index = index
|
|
56
|
+
self.symbol = symbol
|
|
57
|
+
self.aromatic = aromatic
|
|
58
|
+
self.bracket = bracket
|
|
59
|
+
self.isotope = isotope
|
|
60
|
+
self.hcount = hcount
|
|
61
|
+
self.charge = charge
|
|
62
|
+
self.chirality = chirality
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class _BondInfo:
|
|
66
|
+
"""Lightweight bookkeeping record for a bond during parsing."""
|
|
67
|
+
|
|
68
|
+
__slots__ = ("atom_i", "atom_j", "order")
|
|
69
|
+
|
|
70
|
+
def __init__(self, atom_i: int, atom_j: int, order: int = 1):
|
|
71
|
+
self.atom_i = atom_i
|
|
72
|
+
self.atom_j = atom_j
|
|
73
|
+
self.order = order
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# ===================================================================
|
|
77
|
+
# Graph construction from tokens
|
|
78
|
+
# ===================================================================
|
|
79
|
+
|
|
80
|
+
def _build_graph(tokens: list[Token]) -> tuple[list[_AtomInfo], list[_BondInfo]]:
|
|
81
|
+
"""Walk the token list and build atom / bond lists.
|
|
82
|
+
|
|
83
|
+
Uses a stack for branch handling and a dictionary for ring closures.
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
atoms : list[_AtomInfo]
|
|
88
|
+
bonds : list[_BondInfo]
|
|
89
|
+
"""
|
|
90
|
+
atoms: list[_AtomInfo] = []
|
|
91
|
+
bonds: list[_BondInfo] = []
|
|
92
|
+
|
|
93
|
+
stack: list[int] = [] # branch stack of atom indices
|
|
94
|
+
ring_closures: dict[str, tuple[int, int]] = {} # digit -> (atom_idx, bond_order)
|
|
95
|
+
prev: int | None = None # index of the most recent atom
|
|
96
|
+
pending_bond_order: int | None = None # explicit bond symbol waiting
|
|
97
|
+
|
|
98
|
+
for tok in tokens:
|
|
99
|
+
# ---- atom ----
|
|
100
|
+
if tok.type == TokenType.ATOM:
|
|
101
|
+
idx = len(atoms)
|
|
102
|
+
is_bracket = (tok.hcount is not None or tok.charge != 0
|
|
103
|
+
or tok.isotope is not None
|
|
104
|
+
or tok.value not in ORGANIC_SUBSET
|
|
105
|
+
and tok.value not in AROMATIC_ATOMS)
|
|
106
|
+
# Canonical symbol: aromatic lowercase -> titlecase for storage
|
|
107
|
+
symbol = tok.value
|
|
108
|
+
if tok.aromatic and symbol.islower():
|
|
109
|
+
symbol = symbol.capitalize()
|
|
110
|
+
|
|
111
|
+
atoms.append(_AtomInfo(
|
|
112
|
+
index=idx,
|
|
113
|
+
symbol=symbol,
|
|
114
|
+
aromatic=tok.aromatic,
|
|
115
|
+
bracket=is_bracket,
|
|
116
|
+
isotope=tok.isotope,
|
|
117
|
+
hcount=tok.hcount,
|
|
118
|
+
charge=tok.charge,
|
|
119
|
+
chirality=tok.chirality,
|
|
120
|
+
))
|
|
121
|
+
|
|
122
|
+
# Bond to previous atom
|
|
123
|
+
if prev is not None:
|
|
124
|
+
order = pending_bond_order if pending_bond_order else 1
|
|
125
|
+
# Aromatic bond default: if both atoms are aromatic and no
|
|
126
|
+
# explicit bond, use order 1 (aromatic bonds are kekulized
|
|
127
|
+
# later or left as single for coordinate purposes).
|
|
128
|
+
bonds.append(_BondInfo(prev, idx, order))
|
|
129
|
+
pending_bond_order = None
|
|
130
|
+
prev = idx
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
# ---- bond symbol ----
|
|
134
|
+
if tok.type == TokenType.BOND:
|
|
135
|
+
pending_bond_order = _BOND_ORDER[tok.value]
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
# ---- branch open ----
|
|
139
|
+
if tok.type == TokenType.BRANCH_OPEN:
|
|
140
|
+
if prev is not None:
|
|
141
|
+
stack.append(prev)
|
|
142
|
+
continue
|
|
143
|
+
|
|
144
|
+
# ---- branch close ----
|
|
145
|
+
if tok.type == TokenType.BRANCH_CLOSE:
|
|
146
|
+
if stack:
|
|
147
|
+
prev = stack.pop()
|
|
148
|
+
pending_bond_order = None
|
|
149
|
+
continue
|
|
150
|
+
|
|
151
|
+
# ---- ring closure ----
|
|
152
|
+
if tok.type == TokenType.RING_DIGIT:
|
|
153
|
+
digit = tok.value
|
|
154
|
+
if digit in ring_closures:
|
|
155
|
+
# Close the ring
|
|
156
|
+
other_idx, ring_order = ring_closures.pop(digit)
|
|
157
|
+
order = pending_bond_order if pending_bond_order else ring_order
|
|
158
|
+
bonds.append(_BondInfo(prev, other_idx, order))
|
|
159
|
+
pending_bond_order = None
|
|
160
|
+
else:
|
|
161
|
+
# Open a ring
|
|
162
|
+
order = pending_bond_order if pending_bond_order else 1
|
|
163
|
+
ring_closures[digit] = (prev, order)
|
|
164
|
+
pending_bond_order = None
|
|
165
|
+
continue
|
|
166
|
+
|
|
167
|
+
# ---- dot (disconnection) ----
|
|
168
|
+
if tok.type == TokenType.DOT:
|
|
169
|
+
prev = None
|
|
170
|
+
pending_bond_order = None
|
|
171
|
+
continue
|
|
172
|
+
|
|
173
|
+
if ring_closures:
|
|
174
|
+
open_digits = ", ".join(ring_closures.keys())
|
|
175
|
+
raise ValueError(
|
|
176
|
+
f"Unclosed ring closure(s) for digit(s): {open_digits}")
|
|
177
|
+
|
|
178
|
+
return atoms, bonds
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# ===================================================================
|
|
182
|
+
# Implicit hydrogen addition
|
|
183
|
+
# ===================================================================
|
|
184
|
+
|
|
185
|
+
def _explicit_valence(atom_idx: int, bonds: list[_BondInfo]) -> int:
|
|
186
|
+
"""Sum of bond orders touching *atom_idx*."""
|
|
187
|
+
total = 0
|
|
188
|
+
for b in bonds:
|
|
189
|
+
if b.atom_i == atom_idx or b.atom_j == atom_idx:
|
|
190
|
+
total += b.order
|
|
191
|
+
return total
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _add_implicit_hydrogens(
|
|
195
|
+
atoms: list[_AtomInfo],
|
|
196
|
+
bonds: list[_BondInfo],
|
|
197
|
+
) -> tuple[list[_AtomInfo], list[_BondInfo]]:
|
|
198
|
+
"""Add implicit H atoms to organic-subset atoms.
|
|
199
|
+
|
|
200
|
+
Bracket atoms with an explicit ``hcount`` use that count. Organic-
|
|
201
|
+
subset atoms use ``DEFAULT_VALENCE`` to infer the missing hydrogens.
|
|
202
|
+
|
|
203
|
+
Returns the (possibly extended) atoms and bonds lists.
|
|
204
|
+
"""
|
|
205
|
+
heavy_count = len(atoms)
|
|
206
|
+
|
|
207
|
+
for ai in range(heavy_count):
|
|
208
|
+
atom = atoms[ai]
|
|
209
|
+
ev = _explicit_valence(ai, bonds)
|
|
210
|
+
|
|
211
|
+
# Bracket atom with explicit H count
|
|
212
|
+
if atom.bracket and atom.hcount is not None:
|
|
213
|
+
n_h = atom.hcount
|
|
214
|
+
else:
|
|
215
|
+
# Look up default valence for organic subset / aromatic atoms
|
|
216
|
+
lookup_sym = atom.symbol.lower() if atom.aromatic else atom.symbol
|
|
217
|
+
if lookup_sym not in DEFAULT_VALENCE and atom.symbol not in DEFAULT_VALENCE:
|
|
218
|
+
continue # unknown atom -- no implicit H
|
|
219
|
+
valences = DEFAULT_VALENCE.get(
|
|
220
|
+
lookup_sym, DEFAULT_VALENCE.get(atom.symbol, []))
|
|
221
|
+
if not valences:
|
|
222
|
+
continue
|
|
223
|
+
|
|
224
|
+
# Pick the smallest default valence >= explicit valence
|
|
225
|
+
target = None
|
|
226
|
+
for v in sorted(valences):
|
|
227
|
+
if v >= ev:
|
|
228
|
+
target = v
|
|
229
|
+
break
|
|
230
|
+
if target is None:
|
|
231
|
+
target = max(valences)
|
|
232
|
+
|
|
233
|
+
# Aromatic atoms contribute one electron to the pi system,
|
|
234
|
+
# so reduce the target by 1.
|
|
235
|
+
if atom.aromatic:
|
|
236
|
+
target = max(0, target - 1)
|
|
237
|
+
|
|
238
|
+
n_h = max(0, target - ev)
|
|
239
|
+
|
|
240
|
+
# Add H atoms
|
|
241
|
+
for _ in range(n_h):
|
|
242
|
+
h_idx = len(atoms)
|
|
243
|
+
atoms.append(_AtomInfo(
|
|
244
|
+
index=h_idx, symbol="H", aromatic=False, bracket=False))
|
|
245
|
+
bonds.append(_BondInfo(ai, h_idx, 1))
|
|
246
|
+
|
|
247
|
+
return atoms, bonds
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
# ===================================================================
|
|
251
|
+
# Hybridization determination
|
|
252
|
+
# ===================================================================
|
|
253
|
+
|
|
254
|
+
def _determine_hybridization(
|
|
255
|
+
atom_idx: int,
|
|
256
|
+
atoms: list[_AtomInfo],
|
|
257
|
+
bonds: list[_BondInfo],
|
|
258
|
+
) -> Hybridization:
|
|
259
|
+
"""Infer hybridization from bond orders around an atom.
|
|
260
|
+
|
|
261
|
+
Rules
|
|
262
|
+
-----
|
|
263
|
+
- Any triple bond -> SP
|
|
264
|
+
- Any double bond -> SP2
|
|
265
|
+
- All single bonds -> SP3
|
|
266
|
+
- Aromatic atoms -> SP2
|
|
267
|
+
"""
|
|
268
|
+
if atoms[atom_idx].aromatic:
|
|
269
|
+
return Hybridization.SP2
|
|
270
|
+
|
|
271
|
+
has_double = False
|
|
272
|
+
for b in bonds:
|
|
273
|
+
if b.atom_i == atom_idx or b.atom_j == atom_idx:
|
|
274
|
+
if b.order == 3:
|
|
275
|
+
return Hybridization.SP
|
|
276
|
+
if b.order == 2:
|
|
277
|
+
has_double = True
|
|
278
|
+
|
|
279
|
+
if has_double:
|
|
280
|
+
return Hybridization.SP2
|
|
281
|
+
return Hybridization.SP3
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
# ===================================================================
|
|
285
|
+
# 3D coordinate assignment via BFS
|
|
286
|
+
# ===================================================================
|
|
287
|
+
|
|
288
|
+
def _angle_for_hyb(hyb: Hybridization) -> float:
|
|
289
|
+
"""Return the ideal bond angle in degrees for a hybridization."""
|
|
290
|
+
if hyb == Hybridization.SP:
|
|
291
|
+
return SP_ANGLE
|
|
292
|
+
if hyb == Hybridization.SP2:
|
|
293
|
+
return SP2_ANGLE
|
|
294
|
+
return SP3_ANGLE
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def _assign_3d_coordinates(mol: Molecule) -> None:
|
|
298
|
+
"""Place atoms in 3D using BFS from atom 0.
|
|
299
|
+
|
|
300
|
+
Algorithm
|
|
301
|
+
---------
|
|
302
|
+
1. Place atom 0 at the origin.
|
|
303
|
+
2. BFS outward; for each newly visited atom, use its parent (and
|
|
304
|
+
grandparent if available) for z-matrix placement.
|
|
305
|
+
3. Distribute multiple substituents around each centre at regular
|
|
306
|
+
dihedral intervals based on hybridization.
|
|
307
|
+
"""
|
|
308
|
+
n = len(mol.atoms)
|
|
309
|
+
if n == 0:
|
|
310
|
+
return
|
|
311
|
+
|
|
312
|
+
# Place first atom at origin
|
|
313
|
+
mol.atoms[0].position = np.array([0.0, 0.0, 0.0])
|
|
314
|
+
placed = {0}
|
|
315
|
+
|
|
316
|
+
if n == 1:
|
|
317
|
+
return
|
|
318
|
+
|
|
319
|
+
# Build adjacency from molecule bonds
|
|
320
|
+
adj: dict[int, list[tuple[int, int]]] = {i: [] for i in range(n)}
|
|
321
|
+
for b in mol.bonds:
|
|
322
|
+
adj[b.atom_i].append((b.atom_j, b.order))
|
|
323
|
+
adj[b.atom_j].append((b.atom_i, b.order))
|
|
324
|
+
|
|
325
|
+
# BFS queue: (atom_index, parent_index, grandparent_index_or_None)
|
|
326
|
+
queue: deque[tuple[int, int | None, int | None]] = deque()
|
|
327
|
+
|
|
328
|
+
# Track children scheduled per parent to assign dihedral offsets
|
|
329
|
+
child_counter: dict[int, int] = {}
|
|
330
|
+
|
|
331
|
+
# Seed the BFS from atom 0: schedule all neighbours
|
|
332
|
+
for nb_idx, nb_order in adj[0]:
|
|
333
|
+
queue.append((nb_idx, 0, None))
|
|
334
|
+
|
|
335
|
+
while queue:
|
|
336
|
+
atom_idx, parent_idx, grandparent_idx = queue.popleft()
|
|
337
|
+
|
|
338
|
+
if atom_idx in placed:
|
|
339
|
+
continue
|
|
340
|
+
|
|
341
|
+
parent_pos = mol.atoms[parent_idx].position
|
|
342
|
+
parent_hyb = mol.atoms[parent_idx].hybridization
|
|
343
|
+
angle = _angle_for_hyb(parent_hyb) if parent_hyb else SP3_ANGLE
|
|
344
|
+
|
|
345
|
+
# Determine bond order for bond length
|
|
346
|
+
b_order = 1
|
|
347
|
+
for nb, bo in adj[parent_idx]:
|
|
348
|
+
if nb == atom_idx:
|
|
349
|
+
b_order = bo
|
|
350
|
+
break
|
|
351
|
+
|
|
352
|
+
bl = bond_length(
|
|
353
|
+
mol.atoms[parent_idx].symbol,
|
|
354
|
+
mol.atoms[atom_idx].symbol,
|
|
355
|
+
b_order,
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
# Child counter for dihedral offset
|
|
359
|
+
child_num = child_counter.get(parent_idx, 0)
|
|
360
|
+
child_counter[parent_idx] = child_num + 1
|
|
361
|
+
|
|
362
|
+
# Dihedral step depends on parent hybridization
|
|
363
|
+
if parent_hyb == Hybridization.SP2:
|
|
364
|
+
dihedral_step = 120.0
|
|
365
|
+
elif parent_hyb == Hybridization.SP:
|
|
366
|
+
dihedral_step = 180.0
|
|
367
|
+
else:
|
|
368
|
+
dihedral_step = 120.0 # tetrahedral uses ~120 deg between projections
|
|
369
|
+
|
|
370
|
+
dihedral = dihedral_step * child_num
|
|
371
|
+
|
|
372
|
+
if grandparent_idx is not None and grandparent_idx in placed:
|
|
373
|
+
# Normal z-matrix placement
|
|
374
|
+
gp_pos = mol.atoms[grandparent_idx].position
|
|
375
|
+
pos = place_atom_zmatrix(
|
|
376
|
+
parent_pos, gp_pos,
|
|
377
|
+
_dihedral_ref_pos(mol, parent_idx, grandparent_idx, placed),
|
|
378
|
+
bl, angle, dihedral,
|
|
379
|
+
)
|
|
380
|
+
elif len(placed) == 1:
|
|
381
|
+
# Second atom: place along +z
|
|
382
|
+
pos = parent_pos + np.array([0.0, 0.0, bl])
|
|
383
|
+
else:
|
|
384
|
+
# No grandparent yet -- use a synthetic reference
|
|
385
|
+
ref_pos = parent_pos + np.array([0.0, 0.0, -1.0])
|
|
386
|
+
synth_k = ref_pos + np.array([0.0, 1.0, 0.0])
|
|
387
|
+
pos = place_atom_zmatrix(
|
|
388
|
+
parent_pos, ref_pos, synth_k,
|
|
389
|
+
bl, angle, dihedral,
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
mol.atoms[atom_idx].position = pos
|
|
393
|
+
placed.add(atom_idx)
|
|
394
|
+
|
|
395
|
+
# Enqueue unvisited neighbours
|
|
396
|
+
for nb_idx, nb_order in adj[atom_idx]:
|
|
397
|
+
if nb_idx not in placed:
|
|
398
|
+
queue.append((nb_idx, atom_idx, parent_idx))
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def _dihedral_ref_pos(
|
|
402
|
+
mol: Molecule,
|
|
403
|
+
parent_idx: int,
|
|
404
|
+
grandparent_idx: int,
|
|
405
|
+
placed: set[int],
|
|
406
|
+
) -> np.ndarray:
|
|
407
|
+
"""Find a third reference position for z-matrix dihedral.
|
|
408
|
+
|
|
409
|
+
Looks for a placed neighbour of the grandparent that is not the
|
|
410
|
+
parent. Falls back to a synthetic offset if none is found.
|
|
411
|
+
"""
|
|
412
|
+
gp_pos = mol.atoms[grandparent_idx].position
|
|
413
|
+
for nb in mol.neighbors(grandparent_idx):
|
|
414
|
+
if nb != parent_idx and nb in placed:
|
|
415
|
+
return mol.atoms[nb].position
|
|
416
|
+
|
|
417
|
+
# Synthetic fallback: offset perpendicular to parent-grandparent axis
|
|
418
|
+
axis = mol.atoms[parent_idx].position - gp_pos
|
|
419
|
+
perp = np.array([1.0, 0.0, 0.0])
|
|
420
|
+
if abs(np.dot(normalize(axis), perp)) > 0.9:
|
|
421
|
+
perp = np.array([0.0, 1.0, 0.0])
|
|
422
|
+
return gp_pos + np.cross(axis, perp) * 0.5
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
# ===================================================================
|
|
426
|
+
# Public API
|
|
427
|
+
# ===================================================================
|
|
428
|
+
|
|
429
|
+
def parse(smiles: str) -> Molecule:
|
|
430
|
+
"""Parse a SMILES string and return a Molecule with 3D coordinates.
|
|
431
|
+
|
|
432
|
+
Parameters
|
|
433
|
+
----------
|
|
434
|
+
smiles : str
|
|
435
|
+
A SMILES string, e.g. ``"CCO"`` (ethanol), ``"c1ccccc1"``
|
|
436
|
+
(benzene), ``"CC(=O)O"`` (acetic acid).
|
|
437
|
+
|
|
438
|
+
Returns
|
|
439
|
+
-------
|
|
440
|
+
Molecule
|
|
441
|
+
A fully constructed molecule with atoms, bonds, hybridization,
|
|
442
|
+
and approximate 3D coordinates.
|
|
443
|
+
|
|
444
|
+
Raises
|
|
445
|
+
------
|
|
446
|
+
ValueError
|
|
447
|
+
If the SMILES string is invalid.
|
|
448
|
+
|
|
449
|
+
Examples
|
|
450
|
+
--------
|
|
451
|
+
>>> mol = parse("C")
|
|
452
|
+
>>> len([a for a in mol.atoms if a.symbol == "C"])
|
|
453
|
+
1
|
|
454
|
+
>>> len([a for a in mol.atoms if a.symbol == "H"])
|
|
455
|
+
4
|
|
456
|
+
"""
|
|
457
|
+
tokens = tokenize(smiles)
|
|
458
|
+
atoms, bonds = _build_graph(tokens)
|
|
459
|
+
atoms, bonds = _add_implicit_hydrogens(atoms, bonds)
|
|
460
|
+
|
|
461
|
+
# Create molecule
|
|
462
|
+
mol = Molecule(name=smiles)
|
|
463
|
+
|
|
464
|
+
# First pass: add atoms with placeholder positions and hybridization
|
|
465
|
+
for ai in atoms:
|
|
466
|
+
hyb = _determine_hybridization(ai.index, atoms, bonds)
|
|
467
|
+
mol.add_atom(
|
|
468
|
+
symbol=ai.symbol,
|
|
469
|
+
position=[0.0, 0.0, 0.0],
|
|
470
|
+
hybridization=hyb,
|
|
471
|
+
chirality=ai.chirality,
|
|
472
|
+
isotope=ai.isotope,
|
|
473
|
+
formal_charge=ai.charge,
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
# Add bonds
|
|
477
|
+
for bi in bonds:
|
|
478
|
+
# Determine if rotatable (single bonds between heavy atoms)
|
|
479
|
+
rot = (bi.order == 1
|
|
480
|
+
and atoms[bi.atom_i].symbol != "H"
|
|
481
|
+
and atoms[bi.atom_j].symbol != "H")
|
|
482
|
+
mol.add_bond(bi.atom_i, bi.atom_j, order=bi.order, rotatable=rot)
|
|
483
|
+
|
|
484
|
+
# Assign 3D coordinates via BFS
|
|
485
|
+
_assign_3d_coordinates(mol)
|
|
486
|
+
|
|
487
|
+
return mol
|