molbuilder 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- molbuilder/__init__.py +8 -0
- molbuilder/__main__.py +6 -0
- molbuilder/atomic/__init__.py +4 -0
- molbuilder/atomic/bohr.py +235 -0
- molbuilder/atomic/quantum_atom.py +334 -0
- molbuilder/atomic/quantum_numbers.py +196 -0
- molbuilder/atomic/wavefunctions.py +297 -0
- molbuilder/bonding/__init__.py +4 -0
- molbuilder/bonding/covalent.py +442 -0
- molbuilder/bonding/lewis.py +347 -0
- molbuilder/bonding/vsepr.py +433 -0
- molbuilder/cli/__init__.py +1 -0
- molbuilder/cli/demos.py +516 -0
- molbuilder/cli/menu.py +127 -0
- molbuilder/cli/wizard.py +831 -0
- molbuilder/core/__init__.py +6 -0
- molbuilder/core/bond_data.py +170 -0
- molbuilder/core/constants.py +51 -0
- molbuilder/core/element_properties.py +183 -0
- molbuilder/core/elements.py +181 -0
- molbuilder/core/geometry.py +232 -0
- molbuilder/gui/__init__.py +2 -0
- molbuilder/gui/app.py +286 -0
- molbuilder/gui/canvas3d.py +115 -0
- molbuilder/gui/dialogs.py +117 -0
- molbuilder/gui/event_handler.py +118 -0
- molbuilder/gui/sidebar.py +105 -0
- molbuilder/gui/toolbar.py +71 -0
- molbuilder/io/__init__.py +1 -0
- molbuilder/io/json_io.py +146 -0
- molbuilder/io/mol_sdf.py +169 -0
- molbuilder/io/pdb.py +184 -0
- molbuilder/io/smiles_io.py +47 -0
- molbuilder/io/xyz.py +103 -0
- molbuilder/molecule/__init__.py +2 -0
- molbuilder/molecule/amino_acids.py +919 -0
- molbuilder/molecule/builders.py +257 -0
- molbuilder/molecule/conformations.py +70 -0
- molbuilder/molecule/functional_groups.py +484 -0
- molbuilder/molecule/graph.py +712 -0
- molbuilder/molecule/peptides.py +13 -0
- molbuilder/molecule/stereochemistry.py +6 -0
- molbuilder/process/__init__.py +3 -0
- molbuilder/process/conditions.py +260 -0
- molbuilder/process/costing.py +316 -0
- molbuilder/process/purification.py +285 -0
- molbuilder/process/reactor.py +297 -0
- molbuilder/process/safety.py +476 -0
- molbuilder/process/scale_up.py +427 -0
- molbuilder/process/solvent_systems.py +204 -0
- molbuilder/reactions/__init__.py +3 -0
- molbuilder/reactions/functional_group_detect.py +728 -0
- molbuilder/reactions/knowledge_base.py +1716 -0
- molbuilder/reactions/reaction_types.py +102 -0
- molbuilder/reactions/reagent_data.py +1248 -0
- molbuilder/reactions/retrosynthesis.py +1430 -0
- molbuilder/reactions/synthesis_route.py +377 -0
- molbuilder/reports/__init__.py +158 -0
- molbuilder/reports/cost_report.py +206 -0
- molbuilder/reports/molecule_report.py +279 -0
- molbuilder/reports/safety_report.py +296 -0
- molbuilder/reports/synthesis_report.py +283 -0
- molbuilder/reports/text_formatter.py +170 -0
- molbuilder/smiles/__init__.py +4 -0
- molbuilder/smiles/parser.py +487 -0
- molbuilder/smiles/tokenizer.py +291 -0
- molbuilder/smiles/writer.py +375 -0
- molbuilder/visualization/__init__.py +1 -0
- molbuilder/visualization/bohr_viz.py +166 -0
- molbuilder/visualization/molecule_viz.py +368 -0
- molbuilder/visualization/quantum_viz.py +434 -0
- molbuilder/visualization/theme.py +12 -0
- molbuilder-1.0.0.dist-info/METADATA +360 -0
- molbuilder-1.0.0.dist-info/RECORD +78 -0
- molbuilder-1.0.0.dist-info/WHEEL +5 -0
- molbuilder-1.0.0.dist-info/entry_points.txt +2 -0
- molbuilder-1.0.0.dist-info/licenses/LICENSE +21 -0
- molbuilder-1.0.0.dist-info/top_level.txt +1 -0
molbuilder/io/mol_sdf.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""V2000 MOL/SDF file format reader/writer.
|
|
2
|
+
|
|
3
|
+
MOL file layout (V2000)::
|
|
4
|
+
|
|
5
|
+
<molecule name>
|
|
6
|
+
molbuilder 3D
|
|
7
|
+
|
|
8
|
+
<atom_count> <bond_count> 0 0 0 0 0 0 0 0999 V2000
|
|
9
|
+
<x10.4><y10.4><z10.4> <sym3> 0 0 0 0 0 0 0 0 0 0 0 0
|
|
10
|
+
...
|
|
11
|
+
<i3><j3><type3> 0 0 0 0
|
|
12
|
+
...
|
|
13
|
+
M END
|
|
14
|
+
|
|
15
|
+
SDF files contain one or more MOL blocks separated by ``$$$$``.
|
|
16
|
+
|
|
17
|
+
Note: MOL files use **1-based** atom indices.
|
|
18
|
+
Bond type: 1 = single, 2 = double, 3 = triple.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import numpy as np
|
|
24
|
+
|
|
25
|
+
from molbuilder.molecule.graph import Molecule, Hybridization
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# ── MOL string serialisation ─────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
def to_mol_string(mol: Molecule) -> str:
|
|
31
|
+
"""Serialise a Molecule to a V2000 MOL block string."""
|
|
32
|
+
lines: list[str] = []
|
|
33
|
+
|
|
34
|
+
# Header block (3 lines)
|
|
35
|
+
lines.append(mol.name if mol.name else "")
|
|
36
|
+
lines.append(" molbuilder 3D")
|
|
37
|
+
lines.append("")
|
|
38
|
+
|
|
39
|
+
# Counts line
|
|
40
|
+
n_atoms = len(mol.atoms)
|
|
41
|
+
n_bonds = len(mol.bonds)
|
|
42
|
+
lines.append(
|
|
43
|
+
f"{n_atoms:3d}{n_bonds:3d}"
|
|
44
|
+
f" 0 0 0 0 0 0 0 0999 V2000"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Atom block
|
|
48
|
+
for atom in mol.atoms:
|
|
49
|
+
x, y, z = atom.position
|
|
50
|
+
symbol = atom.symbol
|
|
51
|
+
lines.append(
|
|
52
|
+
f"{x:10.4f}{y:10.4f}{z:10.4f} {symbol:<3s} 0 0 0 0 0 0 0 0 0 0 0 0"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Bond block
|
|
56
|
+
for bond in mol.bonds:
|
|
57
|
+
i = bond.atom_i + 1 # 1-based
|
|
58
|
+
j = bond.atom_j + 1
|
|
59
|
+
order = bond.order
|
|
60
|
+
lines.append(f"{i:3d}{j:3d}{order:3d} 0 0 0 0")
|
|
61
|
+
|
|
62
|
+
lines.append("M END")
|
|
63
|
+
return "\n".join(lines) + "\n"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def from_mol_string(content: str) -> Molecule:
|
|
67
|
+
"""Parse a Molecule from a V2000 MOL block string."""
|
|
68
|
+
lines = content.splitlines()
|
|
69
|
+
|
|
70
|
+
# Header
|
|
71
|
+
name = lines[0].strip() if lines[0].strip() else ""
|
|
72
|
+
# lines[1] = program/timestamp, lines[2] = comment (both ignored)
|
|
73
|
+
|
|
74
|
+
# Counts line
|
|
75
|
+
counts_line = lines[3]
|
|
76
|
+
n_atoms = int(counts_line[0:3])
|
|
77
|
+
n_bonds = int(counts_line[3:6])
|
|
78
|
+
|
|
79
|
+
if len(lines) < 4 + n_atoms + n_bonds:
|
|
80
|
+
raise ValueError(f"MOL file truncated: expected {4 + n_atoms + n_bonds} lines")
|
|
81
|
+
|
|
82
|
+
mol = Molecule(name=name)
|
|
83
|
+
|
|
84
|
+
# Atom block: starts at line 4
|
|
85
|
+
for i in range(n_atoms):
|
|
86
|
+
line = lines[4 + i]
|
|
87
|
+
x = float(line[0:10])
|
|
88
|
+
y = float(line[10:20])
|
|
89
|
+
z = float(line[20:30])
|
|
90
|
+
symbol = line[31:34].strip()
|
|
91
|
+
mol.add_atom(symbol, [x, y, z])
|
|
92
|
+
|
|
93
|
+
# Bond block: starts after atom block
|
|
94
|
+
bond_start = 4 + n_atoms
|
|
95
|
+
for i in range(n_bonds):
|
|
96
|
+
line = lines[bond_start + i]
|
|
97
|
+
ai = int(line[0:3]) - 1 # convert to 0-based
|
|
98
|
+
aj = int(line[3:6]) - 1
|
|
99
|
+
order = int(line[6:9])
|
|
100
|
+
rotatable = (order == 1)
|
|
101
|
+
mol.add_bond(ai, aj, order=order, rotatable=rotatable)
|
|
102
|
+
|
|
103
|
+
return mol
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# ── MOL file I/O ─────────────────────────────────────────────────────
|
|
107
|
+
|
|
108
|
+
def write_mol(mol: Molecule, filepath: str) -> None:
|
|
109
|
+
"""Write a Molecule to a V2000 MOL file."""
|
|
110
|
+
with open(filepath, "w") as f:
|
|
111
|
+
f.write(to_mol_string(mol))
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def read_mol(filepath: str) -> Molecule:
|
|
115
|
+
"""Read a Molecule from a V2000 MOL file."""
|
|
116
|
+
with open(filepath, "r") as f:
|
|
117
|
+
content = f.read()
|
|
118
|
+
return from_mol_string(content)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
# ── SDF multi-molecule I/O ───────────────────────────────────────────
|
|
122
|
+
|
|
123
|
+
def write_sdf(molecules: list[Molecule], filepath: str) -> None:
|
|
124
|
+
"""Write multiple Molecules to an SDF file.
|
|
125
|
+
|
|
126
|
+
Each MOL block is followed by ``$$$$`` as the record separator.
|
|
127
|
+
"""
|
|
128
|
+
with open(filepath, "w") as f:
|
|
129
|
+
for mol in molecules:
|
|
130
|
+
f.write(to_mol_string(mol))
|
|
131
|
+
f.write("$$$$\n")
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def read_sdf(filepath: str) -> list[Molecule]:
|
|
135
|
+
"""Read all Molecules from an SDF file.
|
|
136
|
+
|
|
137
|
+
SDF files contain one or more MOL blocks separated by ``$$$$``.
|
|
138
|
+
Data items between ``M END`` and ``$$$$`` are silently ignored.
|
|
139
|
+
"""
|
|
140
|
+
with open(filepath, "r") as f:
|
|
141
|
+
content = f.read()
|
|
142
|
+
|
|
143
|
+
molecules: list[Molecule] = []
|
|
144
|
+
blocks = content.split("$$$$")
|
|
145
|
+
|
|
146
|
+
for block in blocks:
|
|
147
|
+
block = block.strip()
|
|
148
|
+
if not block:
|
|
149
|
+
continue
|
|
150
|
+
|
|
151
|
+
# Ensure the block contains a valid MOL section
|
|
152
|
+
if "V2000" not in block:
|
|
153
|
+
continue
|
|
154
|
+
|
|
155
|
+
# Trim anything after "M END" (SDF data items)
|
|
156
|
+
end_idx = block.find("M END")
|
|
157
|
+
if end_idx != -1:
|
|
158
|
+
mol_block = block[:end_idx + len("M END")]
|
|
159
|
+
else:
|
|
160
|
+
mol_block = block
|
|
161
|
+
|
|
162
|
+
try:
|
|
163
|
+
mol = from_mol_string(mol_block)
|
|
164
|
+
molecules.append(mol)
|
|
165
|
+
except (ValueError, IndexError):
|
|
166
|
+
# Skip malformed blocks rather than crashing
|
|
167
|
+
continue
|
|
168
|
+
|
|
169
|
+
return molecules
|
molbuilder/io/pdb.py
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""PDB file format reader/writer.
|
|
2
|
+
|
|
3
|
+
Supports ATOM/HETATM records for coordinates and CONECT records for
|
|
4
|
+
bond connectivity. Intended for small-molecule use (not full protein
|
|
5
|
+
PDB support).
|
|
6
|
+
|
|
7
|
+
Record formats used::
|
|
8
|
+
|
|
9
|
+
ATOM 1 C1 MOL A 1 0.000 0.000 0.000 1.00 0.00 C
|
|
10
|
+
CONECT 1 2 3 4 5
|
|
11
|
+
END
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from collections import Counter, defaultdict
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
|
|
20
|
+
from molbuilder.molecule.graph import Molecule
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# ── Helpers ───────────────────────────────────────────────────────────
|
|
24
|
+
|
|
25
|
+
def _atom_names(mol: Molecule) -> list[str]:
|
|
26
|
+
"""Generate PDB atom names (e.g. C1, C2, H1, H2, ...) for each atom
|
|
27
|
+
based on its element and order of occurrence within that element."""
|
|
28
|
+
counts: Counter[str] = Counter()
|
|
29
|
+
names: list[str] = []
|
|
30
|
+
for atom in mol.atoms:
|
|
31
|
+
counts[atom.symbol] += 1
|
|
32
|
+
names.append(f"{atom.symbol}{counts[atom.symbol]}")
|
|
33
|
+
return names
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _residue_name(mol: Molecule) -> str:
|
|
37
|
+
"""Return a 3-character residue name derived from the molecule name."""
|
|
38
|
+
name = mol.name.strip() if mol.name else "MOL"
|
|
39
|
+
if len(name) == 0:
|
|
40
|
+
return "MOL"
|
|
41
|
+
# Uppercase, first 3 characters
|
|
42
|
+
return name[:3].upper().ljust(3)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# ── String serialisation ─────────────────────────────────────────────
|
|
46
|
+
|
|
47
|
+
def to_pdb_string(mol: Molecule) -> str:
|
|
48
|
+
"""Serialise a Molecule to PDB-format string with ATOM and CONECT
|
|
49
|
+
records."""
|
|
50
|
+
lines: list[str] = []
|
|
51
|
+
atom_names = _atom_names(mol)
|
|
52
|
+
res_name = _residue_name(mol)
|
|
53
|
+
chain = "A"
|
|
54
|
+
res_seq = 1
|
|
55
|
+
|
|
56
|
+
# ATOM records
|
|
57
|
+
for atom in mol.atoms:
|
|
58
|
+
serial = atom.index + 1 # 1-based
|
|
59
|
+
aname = atom_names[atom.index]
|
|
60
|
+
x, y, z = atom.position
|
|
61
|
+
element = atom.symbol.rjust(2)
|
|
62
|
+
|
|
63
|
+
# PDB ATOM record (fixed-width columns)
|
|
64
|
+
# 1- 6 Record type
|
|
65
|
+
# 7-11 Serial
|
|
66
|
+
# 13-16 Atom name
|
|
67
|
+
# 17 Alternate location
|
|
68
|
+
# 18-20 Residue name
|
|
69
|
+
# 22 Chain ID
|
|
70
|
+
# 23-26 Residue sequence number
|
|
71
|
+
# 31-38 x (8.3f)
|
|
72
|
+
# 39-46 y (8.3f)
|
|
73
|
+
# 47-54 z (8.3f)
|
|
74
|
+
# 55-60 Occupancy (6.2f)
|
|
75
|
+
# 61-66 Temp factor (6.2f)
|
|
76
|
+
# 77-78 Element symbol
|
|
77
|
+
line = (
|
|
78
|
+
f"HETATM{serial:5d} {aname:<4s} {res_name:3s} {chain:1s}"
|
|
79
|
+
f"{res_seq:4d} "
|
|
80
|
+
f"{x:8.3f}{y:8.3f}{z:8.3f}"
|
|
81
|
+
f"{1.0:6.2f}{0.0:6.2f}"
|
|
82
|
+
f" {element:>2s}"
|
|
83
|
+
)
|
|
84
|
+
lines.append(line)
|
|
85
|
+
|
|
86
|
+
# CONECT records
|
|
87
|
+
# Build adjacency: for each atom, list of bonded atom serials (1-based)
|
|
88
|
+
adj: dict[int, list[int]] = defaultdict(list)
|
|
89
|
+
for bond in mol.bonds:
|
|
90
|
+
adj[bond.atom_i + 1].append(bond.atom_j + 1)
|
|
91
|
+
adj[bond.atom_j + 1].append(bond.atom_i + 1)
|
|
92
|
+
|
|
93
|
+
for serial in sorted(adj):
|
|
94
|
+
neighbours = sorted(adj[serial])
|
|
95
|
+
# PDB CONECT records can hold up to 4 bonded atoms per line
|
|
96
|
+
for chunk_start in range(0, len(neighbours), 4):
|
|
97
|
+
chunk = neighbours[chunk_start:chunk_start + 4]
|
|
98
|
+
parts = "".join(f"{n:5d}" for n in chunk)
|
|
99
|
+
lines.append(f"CONECT{serial:5d}{parts}")
|
|
100
|
+
|
|
101
|
+
lines.append("END")
|
|
102
|
+
return "\n".join(lines) + "\n"
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def from_pdb_string(content: str) -> Molecule:
|
|
106
|
+
"""Parse a Molecule from PDB-format string.
|
|
107
|
+
|
|
108
|
+
Reads HETATM / ATOM records for coordinates and CONECT records for
|
|
109
|
+
bond connectivity.
|
|
110
|
+
"""
|
|
111
|
+
mol = Molecule()
|
|
112
|
+
|
|
113
|
+
serial_to_index: dict[int, int] = {}
|
|
114
|
+
conect_records: list[tuple[int, list[int]]] = []
|
|
115
|
+
|
|
116
|
+
for line in content.splitlines():
|
|
117
|
+
record = line[:6].strip()
|
|
118
|
+
|
|
119
|
+
if record in ("ATOM", "HETATM"):
|
|
120
|
+
serial = int(line[6:11])
|
|
121
|
+
# Element symbol: columns 77-78 (preferred), fallback to atom name
|
|
122
|
+
element = line[76:78].strip() if len(line) >= 78 else ""
|
|
123
|
+
if not element:
|
|
124
|
+
# Fallback: strip digits from atom name (cols 12-16)
|
|
125
|
+
raw_name = line[12:16].strip()
|
|
126
|
+
element = "".join(c for c in raw_name if c.isalpha())
|
|
127
|
+
|
|
128
|
+
x = float(line[30:38])
|
|
129
|
+
y = float(line[38:46])
|
|
130
|
+
z = float(line[46:54])
|
|
131
|
+
|
|
132
|
+
idx = mol.add_atom(element, [x, y, z])
|
|
133
|
+
serial_to_index[serial] = idx
|
|
134
|
+
|
|
135
|
+
# Extract molecule name from residue name on first atom
|
|
136
|
+
if idx == 0:
|
|
137
|
+
res = line[17:20].strip()
|
|
138
|
+
mol.name = res
|
|
139
|
+
|
|
140
|
+
elif record == "CONECT":
|
|
141
|
+
serial = int(line[6:11])
|
|
142
|
+
neighbours: list[int] = []
|
|
143
|
+
col = 11
|
|
144
|
+
while col + 5 <= len(line):
|
|
145
|
+
token = line[col:col + 5].strip()
|
|
146
|
+
if token:
|
|
147
|
+
try:
|
|
148
|
+
neighbours.append(int(token))
|
|
149
|
+
except ValueError:
|
|
150
|
+
pass
|
|
151
|
+
col += 5
|
|
152
|
+
conect_records.append((serial, neighbours))
|
|
153
|
+
|
|
154
|
+
# Build bonds from CONECT records (avoid duplicates)
|
|
155
|
+
added_bonds: set[tuple[int, int]] = set()
|
|
156
|
+
for serial, neighbours in conect_records:
|
|
157
|
+
if serial not in serial_to_index:
|
|
158
|
+
continue
|
|
159
|
+
i = serial_to_index[serial]
|
|
160
|
+
for nb_serial in neighbours:
|
|
161
|
+
if nb_serial not in serial_to_index:
|
|
162
|
+
continue
|
|
163
|
+
j = serial_to_index[nb_serial]
|
|
164
|
+
bond_key = (min(i, j), max(i, j))
|
|
165
|
+
if bond_key not in added_bonds:
|
|
166
|
+
added_bonds.add(bond_key)
|
|
167
|
+
mol.add_bond(i, j, order=1, rotatable=True)
|
|
168
|
+
|
|
169
|
+
return mol
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# ── File I/O ──────────────────────────────────────────────────────────
|
|
173
|
+
|
|
174
|
+
def write_pdb(mol: Molecule, filepath: str) -> None:
|
|
175
|
+
"""Write a Molecule to a PDB file."""
|
|
176
|
+
with open(filepath, "w") as f:
|
|
177
|
+
f.write(to_pdb_string(mol))
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def read_pdb(filepath: str) -> Molecule:
|
|
181
|
+
"""Read a Molecule from a PDB file."""
|
|
182
|
+
with open(filepath, "r") as f:
|
|
183
|
+
content = f.read()
|
|
184
|
+
return from_pdb_string(content)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""SMILES file I/O wrapper.
|
|
2
|
+
|
|
3
|
+
Reads and writes SMILES files where each line contains a SMILES string
|
|
4
|
+
optionally followed by a molecule name::
|
|
5
|
+
|
|
6
|
+
CCO ethanol
|
|
7
|
+
c1ccccc1 benzene
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import warnings
|
|
11
|
+
|
|
12
|
+
from molbuilder.smiles.parser import parse
|
|
13
|
+
from molbuilder.smiles.writer import to_smiles
|
|
14
|
+
from molbuilder.molecule.graph import Molecule
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def write_smiles(mol: Molecule, filepath: str) -> None:
|
|
18
|
+
"""Write a SMILES string to a file."""
|
|
19
|
+
smi = to_smiles(mol)
|
|
20
|
+
with open(filepath, "w") as f:
|
|
21
|
+
f.write(f"{smi} {mol.name}\n")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def read_smiles(filepath: str) -> list[Molecule]:
|
|
25
|
+
"""Read molecules from a SMILES file (one per line).
|
|
26
|
+
|
|
27
|
+
Blank lines and lines starting with ``#`` are skipped. Each line
|
|
28
|
+
may contain a SMILES string followed by an optional name separated
|
|
29
|
+
by whitespace.
|
|
30
|
+
"""
|
|
31
|
+
molecules = []
|
|
32
|
+
with open(filepath, "r") as f:
|
|
33
|
+
for line in f:
|
|
34
|
+
line = line.strip()
|
|
35
|
+
if not line or line.startswith("#"):
|
|
36
|
+
continue
|
|
37
|
+
parts = line.split(None, 1)
|
|
38
|
+
smi = parts[0]
|
|
39
|
+
name = parts[1] if len(parts) > 1 else smi
|
|
40
|
+
try:
|
|
41
|
+
mol = parse(smi)
|
|
42
|
+
except (ValueError, IndexError) as e:
|
|
43
|
+
warnings.warn(f"Skipping invalid SMILES '{smi}': {e}")
|
|
44
|
+
continue
|
|
45
|
+
mol.name = name
|
|
46
|
+
molecules.append(mol)
|
|
47
|
+
return molecules
|
molbuilder/io/xyz.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""XYZ file format reader/writer.
|
|
2
|
+
|
|
3
|
+
The XYZ format is one of the simplest molecular geometry formats::
|
|
4
|
+
|
|
5
|
+
<atom_count>
|
|
6
|
+
<comment line>
|
|
7
|
+
<symbol> <x> <y> <z>
|
|
8
|
+
<symbol> <x> <y> <z>
|
|
9
|
+
...
|
|
10
|
+
|
|
11
|
+
XYZ files do not store bond connectivity, so bonds are inferred from
|
|
12
|
+
interatomic distances and covalent radii when reading.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import math
|
|
18
|
+
|
|
19
|
+
import numpy as np
|
|
20
|
+
|
|
21
|
+
from molbuilder.molecule.graph import Molecule
|
|
22
|
+
from molbuilder.core.element_properties import covalent_radius_pm
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ── Bond inference ────────────────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
_BOND_TOLERANCE = 1.3 # multiplier on sum-of-covalent-radii
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _infer_bonds(mol: Molecule) -> None:
|
|
31
|
+
"""Add bonds between atoms whose distance is within tolerance of
|
|
32
|
+
the sum of their covalent radii (converted from pm to Angstroms).
|
|
33
|
+
"""
|
|
34
|
+
n = len(mol.atoms)
|
|
35
|
+
for i in range(n):
|
|
36
|
+
for j in range(i + 1, n):
|
|
37
|
+
ri = covalent_radius_pm(mol.atoms[i].symbol) / 100.0
|
|
38
|
+
rj = covalent_radius_pm(mol.atoms[j].symbol) / 100.0
|
|
39
|
+
max_dist = _BOND_TOLERANCE * (ri + rj)
|
|
40
|
+
dist = float(np.linalg.norm(
|
|
41
|
+
mol.atoms[i].position - mol.atoms[j].position))
|
|
42
|
+
if dist < max_dist:
|
|
43
|
+
mol.add_bond(i, j, order=1, rotatable=True)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# ── String serialisation ─────────────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
def to_xyz_string(mol: Molecule) -> str:
|
|
49
|
+
"""Return the molecule as an XYZ-format string."""
|
|
50
|
+
lines: list[str] = []
|
|
51
|
+
lines.append(str(len(mol.atoms)))
|
|
52
|
+
lines.append(mol.name if mol.name else "")
|
|
53
|
+
for atom in mol.atoms:
|
|
54
|
+
x, y, z = atom.position
|
|
55
|
+
lines.append(f"{atom.symbol:<4s} {x:15.8f} {y:15.8f} {z:15.8f}")
|
|
56
|
+
return "\n".join(lines) + "\n"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def from_xyz_string(content: str) -> Molecule:
|
|
60
|
+
"""Parse a Molecule from an XYZ-format string.
|
|
61
|
+
|
|
62
|
+
Bonds are inferred from interatomic distances using covalent radii.
|
|
63
|
+
"""
|
|
64
|
+
lines = content.strip().splitlines()
|
|
65
|
+
if len(lines) < 2:
|
|
66
|
+
raise ValueError("XYZ content must have at least two lines "
|
|
67
|
+
"(atom count and comment).")
|
|
68
|
+
|
|
69
|
+
atom_count = int(lines[0].strip())
|
|
70
|
+
comment = lines[1].strip()
|
|
71
|
+
|
|
72
|
+
if len(lines) < atom_count + 2:
|
|
73
|
+
raise ValueError(f"XYZ file declares {atom_count} atoms but has only "
|
|
74
|
+
f"{len(lines) - 2} atom lines")
|
|
75
|
+
|
|
76
|
+
mol = Molecule(name=comment)
|
|
77
|
+
|
|
78
|
+
for i in range(atom_count):
|
|
79
|
+
parts = lines[2 + i].split()
|
|
80
|
+
symbol = parts[0]
|
|
81
|
+
x, y, z = float(parts[1]), float(parts[2]), float(parts[3])
|
|
82
|
+
mol.add_atom(symbol, [x, y, z])
|
|
83
|
+
|
|
84
|
+
_infer_bonds(mol)
|
|
85
|
+
return mol
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# ── File I/O ──────────────────────────────────────────────────────────
|
|
89
|
+
|
|
90
|
+
def write_xyz(mol: Molecule, filepath: str) -> None:
|
|
91
|
+
"""Write a Molecule to an XYZ file."""
|
|
92
|
+
with open(filepath, "w") as f:
|
|
93
|
+
f.write(to_xyz_string(mol))
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def read_xyz(filepath: str) -> Molecule:
|
|
97
|
+
"""Read a Molecule from an XYZ file.
|
|
98
|
+
|
|
99
|
+
Bonds are inferred from interatomic distances using covalent radii.
|
|
100
|
+
"""
|
|
101
|
+
with open(filepath, "r") as f:
|
|
102
|
+
content = f.read()
|
|
103
|
+
return from_xyz_string(content)
|