molbuilder 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- molbuilder/__init__.py +8 -0
- molbuilder/__main__.py +6 -0
- molbuilder/atomic/__init__.py +4 -0
- molbuilder/atomic/bohr.py +235 -0
- molbuilder/atomic/quantum_atom.py +334 -0
- molbuilder/atomic/quantum_numbers.py +196 -0
- molbuilder/atomic/wavefunctions.py +297 -0
- molbuilder/bonding/__init__.py +4 -0
- molbuilder/bonding/covalent.py +442 -0
- molbuilder/bonding/lewis.py +347 -0
- molbuilder/bonding/vsepr.py +433 -0
- molbuilder/cli/__init__.py +1 -0
- molbuilder/cli/demos.py +516 -0
- molbuilder/cli/menu.py +127 -0
- molbuilder/cli/wizard.py +831 -0
- molbuilder/core/__init__.py +6 -0
- molbuilder/core/bond_data.py +170 -0
- molbuilder/core/constants.py +51 -0
- molbuilder/core/element_properties.py +183 -0
- molbuilder/core/elements.py +181 -0
- molbuilder/core/geometry.py +232 -0
- molbuilder/gui/__init__.py +2 -0
- molbuilder/gui/app.py +286 -0
- molbuilder/gui/canvas3d.py +115 -0
- molbuilder/gui/dialogs.py +117 -0
- molbuilder/gui/event_handler.py +118 -0
- molbuilder/gui/sidebar.py +105 -0
- molbuilder/gui/toolbar.py +71 -0
- molbuilder/io/__init__.py +1 -0
- molbuilder/io/json_io.py +146 -0
- molbuilder/io/mol_sdf.py +169 -0
- molbuilder/io/pdb.py +184 -0
- molbuilder/io/smiles_io.py +47 -0
- molbuilder/io/xyz.py +103 -0
- molbuilder/molecule/__init__.py +2 -0
- molbuilder/molecule/amino_acids.py +919 -0
- molbuilder/molecule/builders.py +257 -0
- molbuilder/molecule/conformations.py +70 -0
- molbuilder/molecule/functional_groups.py +484 -0
- molbuilder/molecule/graph.py +712 -0
- molbuilder/molecule/peptides.py +13 -0
- molbuilder/molecule/stereochemistry.py +6 -0
- molbuilder/process/__init__.py +3 -0
- molbuilder/process/conditions.py +260 -0
- molbuilder/process/costing.py +316 -0
- molbuilder/process/purification.py +285 -0
- molbuilder/process/reactor.py +297 -0
- molbuilder/process/safety.py +476 -0
- molbuilder/process/scale_up.py +427 -0
- molbuilder/process/solvent_systems.py +204 -0
- molbuilder/reactions/__init__.py +3 -0
- molbuilder/reactions/functional_group_detect.py +728 -0
- molbuilder/reactions/knowledge_base.py +1716 -0
- molbuilder/reactions/reaction_types.py +102 -0
- molbuilder/reactions/reagent_data.py +1248 -0
- molbuilder/reactions/retrosynthesis.py +1430 -0
- molbuilder/reactions/synthesis_route.py +377 -0
- molbuilder/reports/__init__.py +158 -0
- molbuilder/reports/cost_report.py +206 -0
- molbuilder/reports/molecule_report.py +279 -0
- molbuilder/reports/safety_report.py +296 -0
- molbuilder/reports/synthesis_report.py +283 -0
- molbuilder/reports/text_formatter.py +170 -0
- molbuilder/smiles/__init__.py +4 -0
- molbuilder/smiles/parser.py +487 -0
- molbuilder/smiles/tokenizer.py +291 -0
- molbuilder/smiles/writer.py +375 -0
- molbuilder/visualization/__init__.py +1 -0
- molbuilder/visualization/bohr_viz.py +166 -0
- molbuilder/visualization/molecule_viz.py +368 -0
- molbuilder/visualization/quantum_viz.py +434 -0
- molbuilder/visualization/theme.py +12 -0
- molbuilder-1.0.0.dist-info/METADATA +360 -0
- molbuilder-1.0.0.dist-info/RECORD +78 -0
- molbuilder-1.0.0.dist-info/WHEEL +5 -0
- molbuilder-1.0.0.dist-info/entry_points.txt +2 -0
- molbuilder-1.0.0.dist-info/licenses/LICENSE +21 -0
- molbuilder-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Lewis Structure Determination
|
|
3
|
+
|
|
4
|
+
Parses a molecular formula, identifies the central atom, distributes
|
|
5
|
+
bonding pairs and lone pairs, and checks octet satisfaction.
|
|
6
|
+
|
|
7
|
+
Handles:
|
|
8
|
+
- Single, double, and triple bonds
|
|
9
|
+
- Expanded octets (period 3+ central atoms)
|
|
10
|
+
- Incomplete octets (H, Be, B)
|
|
11
|
+
- Polyatomic ions via charge parameter
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from molbuilder.core.elements import SYMBOL_TO_Z
|
|
17
|
+
from molbuilder.core.element_properties import (
|
|
18
|
+
electronegativity,
|
|
19
|
+
target_electrons,
|
|
20
|
+
can_expand_octet,
|
|
21
|
+
PAULING_ELECTRONEGATIVITY,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ===================================================================
|
|
26
|
+
# Formula parsing
|
|
27
|
+
# ===================================================================
|
|
28
|
+
|
|
29
|
+
def parse_formula(formula: str) -> list[str]:
|
|
30
|
+
"""Parse a molecular formula string into a list of element symbols.
|
|
31
|
+
|
|
32
|
+
Examples
|
|
33
|
+
--------
|
|
34
|
+
>>> parse_formula('H2O')
|
|
35
|
+
['H', 'H', 'O']
|
|
36
|
+
>>> parse_formula('CH4')
|
|
37
|
+
['C', 'H', 'H', 'H', 'H']
|
|
38
|
+
>>> parse_formula('SF6')
|
|
39
|
+
['S', 'F', 'F', 'F', 'F', 'F', 'F']
|
|
40
|
+
"""
|
|
41
|
+
tokens = re.findall(r'([A-Z][a-z]?)(\d*)', formula)
|
|
42
|
+
atoms = []
|
|
43
|
+
for symbol, count_str in tokens:
|
|
44
|
+
if not symbol:
|
|
45
|
+
continue
|
|
46
|
+
if symbol not in SYMBOL_TO_Z:
|
|
47
|
+
raise ValueError(f"Unknown element symbol: {symbol}")
|
|
48
|
+
count = int(count_str) if count_str else 1
|
|
49
|
+
atoms.extend([symbol] * count)
|
|
50
|
+
return atoms
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ===================================================================
|
|
54
|
+
# Valence electron counting
|
|
55
|
+
# ===================================================================
|
|
56
|
+
|
|
57
|
+
# Main-group valence electron lookup by group number.
|
|
58
|
+
# VSEPR is primarily for main-group elements.
|
|
59
|
+
_GROUP_VALENCE = {
|
|
60
|
+
1: 1, 2: 2,
|
|
61
|
+
13: 3, 14: 4, 15: 5, 16: 6, 17: 7, 18: 8,
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
# Map atomic number -> main-group number for common elements
|
|
65
|
+
_Z_TO_GROUP = {
|
|
66
|
+
1: 1, 2: 18,
|
|
67
|
+
3: 1, 4: 2, 5: 13, 6: 14, 7: 15, 8: 16, 9: 17, 10: 18,
|
|
68
|
+
11: 1, 12: 2, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18,
|
|
69
|
+
19: 1, 20: 2,
|
|
70
|
+
31: 13, 32: 14, 33: 15, 34: 16, 35: 17, 36: 18,
|
|
71
|
+
37: 1, 38: 2,
|
|
72
|
+
49: 13, 50: 14, 51: 15, 52: 16, 53: 17, 54: 18,
|
|
73
|
+
55: 1, 56: 2,
|
|
74
|
+
81: 13, 82: 14, 83: 15, 84: 16, 85: 17, 86: 18,
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def get_valence_electrons(symbol: str) -> int:
|
|
79
|
+
"""Get the number of VSEPR-relevant valence electrons for an element.
|
|
80
|
+
|
|
81
|
+
Uses group-number lookup for main-group elements.
|
|
82
|
+
Falls back to quantum_model.QuantumAtom for transition metals.
|
|
83
|
+
"""
|
|
84
|
+
z = SYMBOL_TO_Z.get(symbol)
|
|
85
|
+
if z is None:
|
|
86
|
+
raise ValueError(f"Unknown element: {symbol}")
|
|
87
|
+
|
|
88
|
+
group = _Z_TO_GROUP.get(z)
|
|
89
|
+
if group is not None:
|
|
90
|
+
ve = _GROUP_VALENCE.get(group, 0)
|
|
91
|
+
# Helium special case
|
|
92
|
+
if symbol == "He":
|
|
93
|
+
return 2
|
|
94
|
+
return ve
|
|
95
|
+
|
|
96
|
+
# Fallback: use QuantumAtom
|
|
97
|
+
from molbuilder.atomic.quantum_atom import QuantumAtom
|
|
98
|
+
atom = QuantumAtom(z)
|
|
99
|
+
return atom.valence_electrons
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
# ===================================================================
|
|
103
|
+
# Central atom identification
|
|
104
|
+
# ===================================================================
|
|
105
|
+
|
|
106
|
+
def identify_central_atom(atoms: list[str]) -> int:
|
|
107
|
+
"""Identify the central atom index using VSEPR conventions.
|
|
108
|
+
|
|
109
|
+
Rules (in priority order):
|
|
110
|
+
1. Hydrogen is never the central atom.
|
|
111
|
+
2. The least electronegative non-H atom is central.
|
|
112
|
+
3. If tied, the atom appearing fewest times is central
|
|
113
|
+
(the unique atom tends to be central).
|
|
114
|
+
4. For diatomics with no non-H atoms, pick index 0.
|
|
115
|
+
|
|
116
|
+
Returns the index into the atoms list.
|
|
117
|
+
"""
|
|
118
|
+
if len(atoms) <= 2:
|
|
119
|
+
# Diatomic: pick the first non-H, or index 0
|
|
120
|
+
for i, sym in enumerate(atoms):
|
|
121
|
+
if sym != "H":
|
|
122
|
+
return i
|
|
123
|
+
return 0
|
|
124
|
+
|
|
125
|
+
# Collect non-H candidates
|
|
126
|
+
candidates = [(i, sym) for i, sym in enumerate(atoms) if sym != "H"]
|
|
127
|
+
|
|
128
|
+
if not candidates:
|
|
129
|
+
return 0 # all hydrogen (unusual)
|
|
130
|
+
|
|
131
|
+
if len(candidates) == 1:
|
|
132
|
+
return candidates[0][0]
|
|
133
|
+
|
|
134
|
+
# Sort by electronegativity (ascending), then by frequency (ascending)
|
|
135
|
+
from collections import Counter
|
|
136
|
+
freq = Counter(atoms)
|
|
137
|
+
candidates.sort(key=lambda t: (electronegativity(t[1]), freq[t[1]]))
|
|
138
|
+
return candidates[0][0]
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# ===================================================================
|
|
142
|
+
# Data classes
|
|
143
|
+
# ===================================================================
|
|
144
|
+
|
|
145
|
+
@dataclass
|
|
146
|
+
class Bond:
|
|
147
|
+
"""A bond between two atom indices."""
|
|
148
|
+
atom_a: int
|
|
149
|
+
atom_b: int
|
|
150
|
+
order: int = 1 # 1=single, 2=double, 3=triple
|
|
151
|
+
|
|
152
|
+
def __repr__(self):
|
|
153
|
+
order_str = {1: "-", 2: "=", 3: "#"}.get(self.order, "?")
|
|
154
|
+
return f"Bond({self.atom_a}{order_str}{self.atom_b})"
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@dataclass
|
|
158
|
+
class LonePair:
|
|
159
|
+
"""A lone pair on a specific atom."""
|
|
160
|
+
atom_index: int
|
|
161
|
+
|
|
162
|
+
def __repr__(self):
|
|
163
|
+
return f"LP({self.atom_index})"
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# ===================================================================
|
|
167
|
+
# Lewis Structure
|
|
168
|
+
# ===================================================================
|
|
169
|
+
|
|
170
|
+
class LewisStructure:
|
|
171
|
+
"""Determines and stores the Lewis structure of a molecule.
|
|
172
|
+
|
|
173
|
+
Parameters
|
|
174
|
+
----------
|
|
175
|
+
formula : str
|
|
176
|
+
Molecular formula (e.g., 'H2O', 'CH4', 'CO2').
|
|
177
|
+
charge : int
|
|
178
|
+
Net charge (0 for neutral, +1 for cation, -1 for anion, etc.).
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
def __init__(self, formula: str, charge: int = 0):
|
|
182
|
+
self.formula = formula
|
|
183
|
+
self.charge = charge
|
|
184
|
+
self.atoms = parse_formula(formula)
|
|
185
|
+
|
|
186
|
+
if len(self.atoms) < 1:
|
|
187
|
+
raise ValueError(f"Formula '{formula}' produced no atoms")
|
|
188
|
+
|
|
189
|
+
self.central_index = identify_central_atom(self.atoms)
|
|
190
|
+
self.central_symbol = self.atoms[self.central_index]
|
|
191
|
+
self.terminal_indices = [
|
|
192
|
+
i for i in range(len(self.atoms)) if i != self.central_index
|
|
193
|
+
]
|
|
194
|
+
|
|
195
|
+
self.bonds: list[Bond] = []
|
|
196
|
+
self.lone_pairs: list[LonePair] = []
|
|
197
|
+
self.total_valence_electrons = 0
|
|
198
|
+
|
|
199
|
+
self._solve()
|
|
200
|
+
|
|
201
|
+
# ------ solver ------
|
|
202
|
+
|
|
203
|
+
def _solve(self):
|
|
204
|
+
"""Main Lewis structure algorithm."""
|
|
205
|
+
# Step 1: count total valence electrons
|
|
206
|
+
self.total_valence_electrons = (
|
|
207
|
+
sum(get_valence_electrons(sym) for sym in self.atoms) - self.charge
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
remaining = self.total_valence_electrons
|
|
211
|
+
|
|
212
|
+
# Step 2: place single bonds from central to each terminal
|
|
213
|
+
for ti in self.terminal_indices:
|
|
214
|
+
self.bonds.append(Bond(self.central_index, ti, order=1))
|
|
215
|
+
remaining -= 2
|
|
216
|
+
|
|
217
|
+
if remaining < 0:
|
|
218
|
+
# Electron-deficient molecule (e.g., H2 with charge)
|
|
219
|
+
remaining = 0
|
|
220
|
+
|
|
221
|
+
# Step 3: distribute lone pairs to terminal atoms first
|
|
222
|
+
for ti in self.terminal_indices:
|
|
223
|
+
sym = self.atoms[ti]
|
|
224
|
+
target = target_electrons(sym)
|
|
225
|
+
# Terminal already has 2 electrons from its bond
|
|
226
|
+
needed = target - 2
|
|
227
|
+
pairs_to_add = needed // 2
|
|
228
|
+
for _ in range(pairs_to_add):
|
|
229
|
+
if remaining >= 2:
|
|
230
|
+
self.lone_pairs.append(LonePair(ti))
|
|
231
|
+
remaining -= 2
|
|
232
|
+
|
|
233
|
+
# Step 4: place remaining electrons on central atom as lone pairs
|
|
234
|
+
while remaining >= 2:
|
|
235
|
+
self.lone_pairs.append(LonePair(self.central_index))
|
|
236
|
+
remaining -= 2
|
|
237
|
+
|
|
238
|
+
# Step 5: check if central atom needs more electrons (form multiple bonds)
|
|
239
|
+
# Promote single bonds to double/triple when the central atom has
|
|
240
|
+
# fewer electrons than its target. This applies regardless of whether
|
|
241
|
+
# the atom can expand its octet -- e.g. SO2 needs S=O double bonds
|
|
242
|
+
# even though S can expand beyond 8 electrons.
|
|
243
|
+
central_target = target_electrons(self.central_symbol)
|
|
244
|
+
central_electrons = self._electrons_around(self.central_index)
|
|
245
|
+
|
|
246
|
+
while central_electrons < central_target:
|
|
247
|
+
promoted = False
|
|
248
|
+
# Sort bonds by order (ascending) so we promote the
|
|
249
|
+
# lowest-order bond first -- distributes evenly
|
|
250
|
+
for bond in sorted(self.bonds, key=lambda b: b.order):
|
|
251
|
+
if bond.order >= 3:
|
|
252
|
+
continue
|
|
253
|
+
# Find the terminal atom of this bond
|
|
254
|
+
ti = bond.atom_b if bond.atom_a == self.central_index else bond.atom_a
|
|
255
|
+
# Check if terminal has a lone pair we can convert
|
|
256
|
+
lp_idx = self._find_lone_pair_on(ti)
|
|
257
|
+
if lp_idx is not None:
|
|
258
|
+
self.lone_pairs.pop(lp_idx)
|
|
259
|
+
bond.order += 1
|
|
260
|
+
central_electrons += 2
|
|
261
|
+
promoted = True
|
|
262
|
+
break
|
|
263
|
+
if not promoted:
|
|
264
|
+
break # no more promotions possible
|
|
265
|
+
|
|
266
|
+
def _electrons_around(self, atom_index: int) -> int:
|
|
267
|
+
"""Count electrons around a given atom (bonding + lone pairs)."""
|
|
268
|
+
count = 0
|
|
269
|
+
for bond in self.bonds:
|
|
270
|
+
if bond.atom_a == atom_index or bond.atom_b == atom_index:
|
|
271
|
+
count += bond.order * 2
|
|
272
|
+
for lp in self.lone_pairs:
|
|
273
|
+
if lp.atom_index == atom_index:
|
|
274
|
+
count += 2
|
|
275
|
+
return count
|
|
276
|
+
|
|
277
|
+
def _find_lone_pair_on(self, atom_index: int):
|
|
278
|
+
"""Find the index of a lone pair on atom_index, or None."""
|
|
279
|
+
for i, lp in enumerate(self.lone_pairs):
|
|
280
|
+
if lp.atom_index == atom_index:
|
|
281
|
+
return i
|
|
282
|
+
return None
|
|
283
|
+
|
|
284
|
+
# ------ query methods ------
|
|
285
|
+
|
|
286
|
+
def bonding_pairs_on_central(self) -> int:
|
|
287
|
+
"""Number of bonding groups (sigma bonds) on the central atom."""
|
|
288
|
+
count = 0
|
|
289
|
+
for bond in self.bonds:
|
|
290
|
+
if bond.atom_a == self.central_index or bond.atom_b == self.central_index:
|
|
291
|
+
count += 1
|
|
292
|
+
return count
|
|
293
|
+
|
|
294
|
+
def lone_pairs_on_central(self) -> int:
|
|
295
|
+
"""Number of lone pairs on the central atom."""
|
|
296
|
+
return sum(1 for lp in self.lone_pairs if lp.atom_index == self.central_index)
|
|
297
|
+
|
|
298
|
+
def steric_number(self) -> int:
|
|
299
|
+
"""Steric number = bonding groups + lone pairs on central."""
|
|
300
|
+
return self.bonding_pairs_on_central() + self.lone_pairs_on_central()
|
|
301
|
+
|
|
302
|
+
def bond_order_to(self, terminal_index: int) -> int:
|
|
303
|
+
"""Get bond order between central atom and a terminal atom."""
|
|
304
|
+
for bond in self.bonds:
|
|
305
|
+
a, b = bond.atom_a, bond.atom_b
|
|
306
|
+
if (a == self.central_index and b == terminal_index) or \
|
|
307
|
+
(b == self.central_index and a == terminal_index):
|
|
308
|
+
return bond.order
|
|
309
|
+
return 0
|
|
310
|
+
|
|
311
|
+
# ------ display ------
|
|
312
|
+
|
|
313
|
+
def __repr__(self):
|
|
314
|
+
bp = self.bonding_pairs_on_central()
|
|
315
|
+
lp = self.lone_pairs_on_central()
|
|
316
|
+
return f"LewisStructure({self.formula}, central={self.central_symbol}, X={bp}, E={lp})"
|
|
317
|
+
|
|
318
|
+
def summary(self) -> str:
|
|
319
|
+
"""Return an ASCII summary of the Lewis structure."""
|
|
320
|
+
lines = [
|
|
321
|
+
f"{'='*55}",
|
|
322
|
+
f" Lewis Structure: {self.formula}",
|
|
323
|
+
f" Charge: {self.charge:+d}",
|
|
324
|
+
f" Total valence electrons: {self.total_valence_electrons}",
|
|
325
|
+
f" Central atom: {self.central_symbol} (index {self.central_index})",
|
|
326
|
+
f"{'='*55}",
|
|
327
|
+
]
|
|
328
|
+
|
|
329
|
+
# Bonds
|
|
330
|
+
lines.append(" Bonds:")
|
|
331
|
+
for bond in self.bonds:
|
|
332
|
+
sym_a = self.atoms[bond.atom_a]
|
|
333
|
+
sym_b = self.atoms[bond.atom_b]
|
|
334
|
+
order_label = {1: "single", 2: "double", 3: "triple"}.get(bond.order, "?")
|
|
335
|
+
order_sym = {1: "-", 2: "=", 3: "#"}.get(bond.order, "?")
|
|
336
|
+
lines.append(f" {sym_a}{order_sym}{sym_b} ({order_label})")
|
|
337
|
+
|
|
338
|
+
# Lone pairs per atom
|
|
339
|
+
lines.append(" Lone pairs:")
|
|
340
|
+
for i, sym in enumerate(self.atoms):
|
|
341
|
+
count = sum(1 for lp in self.lone_pairs if lp.atom_index == i)
|
|
342
|
+
if count > 0:
|
|
343
|
+
lines.append(f" {sym} (index {i}): {count} lone pair(s)")
|
|
344
|
+
|
|
345
|
+
lines.append(f" Steric number: {self.steric_number()}")
|
|
346
|
+
lines.append(f"{'='*55}")
|
|
347
|
+
return "\n".join(lines)
|