molbuilder 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. molbuilder/__init__.py +8 -0
  2. molbuilder/__main__.py +6 -0
  3. molbuilder/atomic/__init__.py +4 -0
  4. molbuilder/atomic/bohr.py +235 -0
  5. molbuilder/atomic/quantum_atom.py +334 -0
  6. molbuilder/atomic/quantum_numbers.py +196 -0
  7. molbuilder/atomic/wavefunctions.py +297 -0
  8. molbuilder/bonding/__init__.py +4 -0
  9. molbuilder/bonding/covalent.py +442 -0
  10. molbuilder/bonding/lewis.py +347 -0
  11. molbuilder/bonding/vsepr.py +433 -0
  12. molbuilder/cli/__init__.py +1 -0
  13. molbuilder/cli/demos.py +516 -0
  14. molbuilder/cli/menu.py +127 -0
  15. molbuilder/cli/wizard.py +831 -0
  16. molbuilder/core/__init__.py +6 -0
  17. molbuilder/core/bond_data.py +170 -0
  18. molbuilder/core/constants.py +51 -0
  19. molbuilder/core/element_properties.py +183 -0
  20. molbuilder/core/elements.py +181 -0
  21. molbuilder/core/geometry.py +232 -0
  22. molbuilder/gui/__init__.py +2 -0
  23. molbuilder/gui/app.py +286 -0
  24. molbuilder/gui/canvas3d.py +115 -0
  25. molbuilder/gui/dialogs.py +117 -0
  26. molbuilder/gui/event_handler.py +118 -0
  27. molbuilder/gui/sidebar.py +105 -0
  28. molbuilder/gui/toolbar.py +71 -0
  29. molbuilder/io/__init__.py +1 -0
  30. molbuilder/io/json_io.py +146 -0
  31. molbuilder/io/mol_sdf.py +169 -0
  32. molbuilder/io/pdb.py +184 -0
  33. molbuilder/io/smiles_io.py +47 -0
  34. molbuilder/io/xyz.py +103 -0
  35. molbuilder/molecule/__init__.py +2 -0
  36. molbuilder/molecule/amino_acids.py +919 -0
  37. molbuilder/molecule/builders.py +257 -0
  38. molbuilder/molecule/conformations.py +70 -0
  39. molbuilder/molecule/functional_groups.py +484 -0
  40. molbuilder/molecule/graph.py +712 -0
  41. molbuilder/molecule/peptides.py +13 -0
  42. molbuilder/molecule/stereochemistry.py +6 -0
  43. molbuilder/process/__init__.py +3 -0
  44. molbuilder/process/conditions.py +260 -0
  45. molbuilder/process/costing.py +316 -0
  46. molbuilder/process/purification.py +285 -0
  47. molbuilder/process/reactor.py +297 -0
  48. molbuilder/process/safety.py +476 -0
  49. molbuilder/process/scale_up.py +427 -0
  50. molbuilder/process/solvent_systems.py +204 -0
  51. molbuilder/reactions/__init__.py +3 -0
  52. molbuilder/reactions/functional_group_detect.py +728 -0
  53. molbuilder/reactions/knowledge_base.py +1716 -0
  54. molbuilder/reactions/reaction_types.py +102 -0
  55. molbuilder/reactions/reagent_data.py +1248 -0
  56. molbuilder/reactions/retrosynthesis.py +1430 -0
  57. molbuilder/reactions/synthesis_route.py +377 -0
  58. molbuilder/reports/__init__.py +158 -0
  59. molbuilder/reports/cost_report.py +206 -0
  60. molbuilder/reports/molecule_report.py +279 -0
  61. molbuilder/reports/safety_report.py +296 -0
  62. molbuilder/reports/synthesis_report.py +283 -0
  63. molbuilder/reports/text_formatter.py +170 -0
  64. molbuilder/smiles/__init__.py +4 -0
  65. molbuilder/smiles/parser.py +487 -0
  66. molbuilder/smiles/tokenizer.py +291 -0
  67. molbuilder/smiles/writer.py +375 -0
  68. molbuilder/visualization/__init__.py +1 -0
  69. molbuilder/visualization/bohr_viz.py +166 -0
  70. molbuilder/visualization/molecule_viz.py +368 -0
  71. molbuilder/visualization/quantum_viz.py +434 -0
  72. molbuilder/visualization/theme.py +12 -0
  73. molbuilder-1.0.0.dist-info/METADATA +360 -0
  74. molbuilder-1.0.0.dist-info/RECORD +78 -0
  75. molbuilder-1.0.0.dist-info/WHEEL +5 -0
  76. molbuilder-1.0.0.dist-info/entry_points.txt +2 -0
  77. molbuilder-1.0.0.dist-info/licenses/LICENSE +21 -0
  78. molbuilder-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,169 @@
1
+ """V2000 MOL/SDF file format reader/writer.
2
+
3
+ MOL file layout (V2000)::
4
+
5
+ <molecule name>
6
+ molbuilder 3D
7
+
8
+ <atom_count> <bond_count> 0 0 0 0 0 0 0 0999 V2000
9
+ <x10.4><y10.4><z10.4> <sym3> 0 0 0 0 0 0 0 0 0 0 0 0
10
+ ...
11
+ <i3><j3><type3> 0 0 0 0
12
+ ...
13
+ M END
14
+
15
+ SDF files contain one or more MOL blocks separated by ``$$$$``.
16
+
17
+ Note: MOL files use **1-based** atom indices.
18
+ Bond type: 1 = single, 2 = double, 3 = triple.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import numpy as np
24
+
25
+ from molbuilder.molecule.graph import Molecule, Hybridization
26
+
27
+
28
+ # ── MOL string serialisation ─────────────────────────────────────────
29
+
30
+ def to_mol_string(mol: Molecule) -> str:
31
+ """Serialise a Molecule to a V2000 MOL block string."""
32
+ lines: list[str] = []
33
+
34
+ # Header block (3 lines)
35
+ lines.append(mol.name if mol.name else "")
36
+ lines.append(" molbuilder 3D")
37
+ lines.append("")
38
+
39
+ # Counts line
40
+ n_atoms = len(mol.atoms)
41
+ n_bonds = len(mol.bonds)
42
+ lines.append(
43
+ f"{n_atoms:3d}{n_bonds:3d}"
44
+ f" 0 0 0 0 0 0 0 0999 V2000"
45
+ )
46
+
47
+ # Atom block
48
+ for atom in mol.atoms:
49
+ x, y, z = atom.position
50
+ symbol = atom.symbol
51
+ lines.append(
52
+ f"{x:10.4f}{y:10.4f}{z:10.4f} {symbol:<3s} 0 0 0 0 0 0 0 0 0 0 0 0"
53
+ )
54
+
55
+ # Bond block
56
+ for bond in mol.bonds:
57
+ i = bond.atom_i + 1 # 1-based
58
+ j = bond.atom_j + 1
59
+ order = bond.order
60
+ lines.append(f"{i:3d}{j:3d}{order:3d} 0 0 0 0")
61
+
62
+ lines.append("M END")
63
+ return "\n".join(lines) + "\n"
64
+
65
+
66
+ def from_mol_string(content: str) -> Molecule:
67
+ """Parse a Molecule from a V2000 MOL block string."""
68
+ lines = content.splitlines()
69
+
70
+ # Header
71
+ name = lines[0].strip() if lines[0].strip() else ""
72
+ # lines[1] = program/timestamp, lines[2] = comment (both ignored)
73
+
74
+ # Counts line
75
+ counts_line = lines[3]
76
+ n_atoms = int(counts_line[0:3])
77
+ n_bonds = int(counts_line[3:6])
78
+
79
+ if len(lines) < 4 + n_atoms + n_bonds:
80
+ raise ValueError(f"MOL file truncated: expected {4 + n_atoms + n_bonds} lines")
81
+
82
+ mol = Molecule(name=name)
83
+
84
+ # Atom block: starts at line 4
85
+ for i in range(n_atoms):
86
+ line = lines[4 + i]
87
+ x = float(line[0:10])
88
+ y = float(line[10:20])
89
+ z = float(line[20:30])
90
+ symbol = line[31:34].strip()
91
+ mol.add_atom(symbol, [x, y, z])
92
+
93
+ # Bond block: starts after atom block
94
+ bond_start = 4 + n_atoms
95
+ for i in range(n_bonds):
96
+ line = lines[bond_start + i]
97
+ ai = int(line[0:3]) - 1 # convert to 0-based
98
+ aj = int(line[3:6]) - 1
99
+ order = int(line[6:9])
100
+ rotatable = (order == 1)
101
+ mol.add_bond(ai, aj, order=order, rotatable=rotatable)
102
+
103
+ return mol
104
+
105
+
106
+ # ── MOL file I/O ─────────────────────────────────────────────────────
107
+
108
+ def write_mol(mol: Molecule, filepath: str) -> None:
109
+ """Write a Molecule to a V2000 MOL file."""
110
+ with open(filepath, "w") as f:
111
+ f.write(to_mol_string(mol))
112
+
113
+
114
+ def read_mol(filepath: str) -> Molecule:
115
+ """Read a Molecule from a V2000 MOL file."""
116
+ with open(filepath, "r") as f:
117
+ content = f.read()
118
+ return from_mol_string(content)
119
+
120
+
121
+ # ── SDF multi-molecule I/O ───────────────────────────────────────────
122
+
123
+ def write_sdf(molecules: list[Molecule], filepath: str) -> None:
124
+ """Write multiple Molecules to an SDF file.
125
+
126
+ Each MOL block is followed by ``$$$$`` as the record separator.
127
+ """
128
+ with open(filepath, "w") as f:
129
+ for mol in molecules:
130
+ f.write(to_mol_string(mol))
131
+ f.write("$$$$\n")
132
+
133
+
134
+ def read_sdf(filepath: str) -> list[Molecule]:
135
+ """Read all Molecules from an SDF file.
136
+
137
+ SDF files contain one or more MOL blocks separated by ``$$$$``.
138
+ Data items between ``M END`` and ``$$$$`` are silently ignored.
139
+ """
140
+ with open(filepath, "r") as f:
141
+ content = f.read()
142
+
143
+ molecules: list[Molecule] = []
144
+ blocks = content.split("$$$$")
145
+
146
+ for block in blocks:
147
+ block = block.strip()
148
+ if not block:
149
+ continue
150
+
151
+ # Ensure the block contains a valid MOL section
152
+ if "V2000" not in block:
153
+ continue
154
+
155
+ # Trim anything after "M END" (SDF data items)
156
+ end_idx = block.find("M END")
157
+ if end_idx != -1:
158
+ mol_block = block[:end_idx + len("M END")]
159
+ else:
160
+ mol_block = block
161
+
162
+ try:
163
+ mol = from_mol_string(mol_block)
164
+ molecules.append(mol)
165
+ except (ValueError, IndexError):
166
+ # Skip malformed blocks rather than crashing
167
+ continue
168
+
169
+ return molecules
molbuilder/io/pdb.py ADDED
@@ -0,0 +1,184 @@
1
+ """PDB file format reader/writer.
2
+
3
+ Supports ATOM/HETATM records for coordinates and CONECT records for
4
+ bond connectivity. Intended for small-molecule use (not full protein
5
+ PDB support).
6
+
7
+ Record formats used::
8
+
9
+ ATOM 1 C1 MOL A 1 0.000 0.000 0.000 1.00 0.00 C
10
+ CONECT 1 2 3 4 5
11
+ END
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from collections import Counter, defaultdict
17
+
18
+ import numpy as np
19
+
20
+ from molbuilder.molecule.graph import Molecule
21
+
22
+
23
+ # ── Helpers ───────────────────────────────────────────────────────────
24
+
25
+ def _atom_names(mol: Molecule) -> list[str]:
26
+ """Generate PDB atom names (e.g. C1, C2, H1, H2, ...) for each atom
27
+ based on its element and order of occurrence within that element."""
28
+ counts: Counter[str] = Counter()
29
+ names: list[str] = []
30
+ for atom in mol.atoms:
31
+ counts[atom.symbol] += 1
32
+ names.append(f"{atom.symbol}{counts[atom.symbol]}")
33
+ return names
34
+
35
+
36
+ def _residue_name(mol: Molecule) -> str:
37
+ """Return a 3-character residue name derived from the molecule name."""
38
+ name = mol.name.strip() if mol.name else "MOL"
39
+ if len(name) == 0:
40
+ return "MOL"
41
+ # Uppercase, first 3 characters
42
+ return name[:3].upper().ljust(3)
43
+
44
+
45
+ # ── String serialisation ─────────────────────────────────────────────
46
+
47
+ def to_pdb_string(mol: Molecule) -> str:
48
+ """Serialise a Molecule to PDB-format string with ATOM and CONECT
49
+ records."""
50
+ lines: list[str] = []
51
+ atom_names = _atom_names(mol)
52
+ res_name = _residue_name(mol)
53
+ chain = "A"
54
+ res_seq = 1
55
+
56
+ # ATOM records
57
+ for atom in mol.atoms:
58
+ serial = atom.index + 1 # 1-based
59
+ aname = atom_names[atom.index]
60
+ x, y, z = atom.position
61
+ element = atom.symbol.rjust(2)
62
+
63
+ # PDB ATOM record (fixed-width columns)
64
+ # 1- 6 Record type
65
+ # 7-11 Serial
66
+ # 13-16 Atom name
67
+ # 17 Alternate location
68
+ # 18-20 Residue name
69
+ # 22 Chain ID
70
+ # 23-26 Residue sequence number
71
+ # 31-38 x (8.3f)
72
+ # 39-46 y (8.3f)
73
+ # 47-54 z (8.3f)
74
+ # 55-60 Occupancy (6.2f)
75
+ # 61-66 Temp factor (6.2f)
76
+ # 77-78 Element symbol
77
+ line = (
78
+ f"HETATM{serial:5d} {aname:<4s} {res_name:3s} {chain:1s}"
79
+ f"{res_seq:4d} "
80
+ f"{x:8.3f}{y:8.3f}{z:8.3f}"
81
+ f"{1.0:6.2f}{0.0:6.2f}"
82
+ f" {element:>2s}"
83
+ )
84
+ lines.append(line)
85
+
86
+ # CONECT records
87
+ # Build adjacency: for each atom, list of bonded atom serials (1-based)
88
+ adj: dict[int, list[int]] = defaultdict(list)
89
+ for bond in mol.bonds:
90
+ adj[bond.atom_i + 1].append(bond.atom_j + 1)
91
+ adj[bond.atom_j + 1].append(bond.atom_i + 1)
92
+
93
+ for serial in sorted(adj):
94
+ neighbours = sorted(adj[serial])
95
+ # PDB CONECT records can hold up to 4 bonded atoms per line
96
+ for chunk_start in range(0, len(neighbours), 4):
97
+ chunk = neighbours[chunk_start:chunk_start + 4]
98
+ parts = "".join(f"{n:5d}" for n in chunk)
99
+ lines.append(f"CONECT{serial:5d}{parts}")
100
+
101
+ lines.append("END")
102
+ return "\n".join(lines) + "\n"
103
+
104
+
105
+ def from_pdb_string(content: str) -> Molecule:
106
+ """Parse a Molecule from PDB-format string.
107
+
108
+ Reads HETATM / ATOM records for coordinates and CONECT records for
109
+ bond connectivity.
110
+ """
111
+ mol = Molecule()
112
+
113
+ serial_to_index: dict[int, int] = {}
114
+ conect_records: list[tuple[int, list[int]]] = []
115
+
116
+ for line in content.splitlines():
117
+ record = line[:6].strip()
118
+
119
+ if record in ("ATOM", "HETATM"):
120
+ serial = int(line[6:11])
121
+ # Element symbol: columns 77-78 (preferred), fallback to atom name
122
+ element = line[76:78].strip() if len(line) >= 78 else ""
123
+ if not element:
124
+ # Fallback: strip digits from atom name (cols 12-16)
125
+ raw_name = line[12:16].strip()
126
+ element = "".join(c for c in raw_name if c.isalpha())
127
+
128
+ x = float(line[30:38])
129
+ y = float(line[38:46])
130
+ z = float(line[46:54])
131
+
132
+ idx = mol.add_atom(element, [x, y, z])
133
+ serial_to_index[serial] = idx
134
+
135
+ # Extract molecule name from residue name on first atom
136
+ if idx == 0:
137
+ res = line[17:20].strip()
138
+ mol.name = res
139
+
140
+ elif record == "CONECT":
141
+ serial = int(line[6:11])
142
+ neighbours: list[int] = []
143
+ col = 11
144
+ while col + 5 <= len(line):
145
+ token = line[col:col + 5].strip()
146
+ if token:
147
+ try:
148
+ neighbours.append(int(token))
149
+ except ValueError:
150
+ pass
151
+ col += 5
152
+ conect_records.append((serial, neighbours))
153
+
154
+ # Build bonds from CONECT records (avoid duplicates)
155
+ added_bonds: set[tuple[int, int]] = set()
156
+ for serial, neighbours in conect_records:
157
+ if serial not in serial_to_index:
158
+ continue
159
+ i = serial_to_index[serial]
160
+ for nb_serial in neighbours:
161
+ if nb_serial not in serial_to_index:
162
+ continue
163
+ j = serial_to_index[nb_serial]
164
+ bond_key = (min(i, j), max(i, j))
165
+ if bond_key not in added_bonds:
166
+ added_bonds.add(bond_key)
167
+ mol.add_bond(i, j, order=1, rotatable=True)
168
+
169
+ return mol
170
+
171
+
172
+ # ── File I/O ──────────────────────────────────────────────────────────
173
+
174
+ def write_pdb(mol: Molecule, filepath: str) -> None:
175
+ """Write a Molecule to a PDB file."""
176
+ with open(filepath, "w") as f:
177
+ f.write(to_pdb_string(mol))
178
+
179
+
180
+ def read_pdb(filepath: str) -> Molecule:
181
+ """Read a Molecule from a PDB file."""
182
+ with open(filepath, "r") as f:
183
+ content = f.read()
184
+ return from_pdb_string(content)
@@ -0,0 +1,47 @@
1
+ """SMILES file I/O wrapper.
2
+
3
+ Reads and writes SMILES files where each line contains a SMILES string
4
+ optionally followed by a molecule name::
5
+
6
+ CCO ethanol
7
+ c1ccccc1 benzene
8
+ """
9
+
10
+ import warnings
11
+
12
+ from molbuilder.smiles.parser import parse
13
+ from molbuilder.smiles.writer import to_smiles
14
+ from molbuilder.molecule.graph import Molecule
15
+
16
+
17
+ def write_smiles(mol: Molecule, filepath: str) -> None:
18
+ """Write a SMILES string to a file."""
19
+ smi = to_smiles(mol)
20
+ with open(filepath, "w") as f:
21
+ f.write(f"{smi} {mol.name}\n")
22
+
23
+
24
+ def read_smiles(filepath: str) -> list[Molecule]:
25
+ """Read molecules from a SMILES file (one per line).
26
+
27
+ Blank lines and lines starting with ``#`` are skipped. Each line
28
+ may contain a SMILES string followed by an optional name separated
29
+ by whitespace.
30
+ """
31
+ molecules = []
32
+ with open(filepath, "r") as f:
33
+ for line in f:
34
+ line = line.strip()
35
+ if not line or line.startswith("#"):
36
+ continue
37
+ parts = line.split(None, 1)
38
+ smi = parts[0]
39
+ name = parts[1] if len(parts) > 1 else smi
40
+ try:
41
+ mol = parse(smi)
42
+ except (ValueError, IndexError) as e:
43
+ warnings.warn(f"Skipping invalid SMILES '{smi}': {e}")
44
+ continue
45
+ mol.name = name
46
+ molecules.append(mol)
47
+ return molecules
molbuilder/io/xyz.py ADDED
@@ -0,0 +1,103 @@
1
+ """XYZ file format reader/writer.
2
+
3
+ The XYZ format is one of the simplest molecular geometry formats::
4
+
5
+ <atom_count>
6
+ <comment line>
7
+ <symbol> <x> <y> <z>
8
+ <symbol> <x> <y> <z>
9
+ ...
10
+
11
+ XYZ files do not store bond connectivity, so bonds are inferred from
12
+ interatomic distances and covalent radii when reading.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import math
18
+
19
+ import numpy as np
20
+
21
+ from molbuilder.molecule.graph import Molecule
22
+ from molbuilder.core.element_properties import covalent_radius_pm
23
+
24
+
25
+ # ── Bond inference ────────────────────────────────────────────────────
26
+
27
+ _BOND_TOLERANCE = 1.3 # multiplier on sum-of-covalent-radii
28
+
29
+
30
+ def _infer_bonds(mol: Molecule) -> None:
31
+ """Add bonds between atoms whose distance is within tolerance of
32
+ the sum of their covalent radii (converted from pm to Angstroms).
33
+ """
34
+ n = len(mol.atoms)
35
+ for i in range(n):
36
+ for j in range(i + 1, n):
37
+ ri = covalent_radius_pm(mol.atoms[i].symbol) / 100.0
38
+ rj = covalent_radius_pm(mol.atoms[j].symbol) / 100.0
39
+ max_dist = _BOND_TOLERANCE * (ri + rj)
40
+ dist = float(np.linalg.norm(
41
+ mol.atoms[i].position - mol.atoms[j].position))
42
+ if dist < max_dist:
43
+ mol.add_bond(i, j, order=1, rotatable=True)
44
+
45
+
46
+ # ── String serialisation ─────────────────────────────────────────────
47
+
48
+ def to_xyz_string(mol: Molecule) -> str:
49
+ """Return the molecule as an XYZ-format string."""
50
+ lines: list[str] = []
51
+ lines.append(str(len(mol.atoms)))
52
+ lines.append(mol.name if mol.name else "")
53
+ for atom in mol.atoms:
54
+ x, y, z = atom.position
55
+ lines.append(f"{atom.symbol:<4s} {x:15.8f} {y:15.8f} {z:15.8f}")
56
+ return "\n".join(lines) + "\n"
57
+
58
+
59
+ def from_xyz_string(content: str) -> Molecule:
60
+ """Parse a Molecule from an XYZ-format string.
61
+
62
+ Bonds are inferred from interatomic distances using covalent radii.
63
+ """
64
+ lines = content.strip().splitlines()
65
+ if len(lines) < 2:
66
+ raise ValueError("XYZ content must have at least two lines "
67
+ "(atom count and comment).")
68
+
69
+ atom_count = int(lines[0].strip())
70
+ comment = lines[1].strip()
71
+
72
+ if len(lines) < atom_count + 2:
73
+ raise ValueError(f"XYZ file declares {atom_count} atoms but has only "
74
+ f"{len(lines) - 2} atom lines")
75
+
76
+ mol = Molecule(name=comment)
77
+
78
+ for i in range(atom_count):
79
+ parts = lines[2 + i].split()
80
+ symbol = parts[0]
81
+ x, y, z = float(parts[1]), float(parts[2]), float(parts[3])
82
+ mol.add_atom(symbol, [x, y, z])
83
+
84
+ _infer_bonds(mol)
85
+ return mol
86
+
87
+
88
+ # ── File I/O ──────────────────────────────────────────────────────────
89
+
90
+ def write_xyz(mol: Molecule, filepath: str) -> None:
91
+ """Write a Molecule to an XYZ file."""
92
+ with open(filepath, "w") as f:
93
+ f.write(to_xyz_string(mol))
94
+
95
+
96
+ def read_xyz(filepath: str) -> Molecule:
97
+ """Read a Molecule from an XYZ file.
98
+
99
+ Bonds are inferred from interatomic distances using covalent radii.
100
+ """
101
+ with open(filepath, "r") as f:
102
+ content = f.read()
103
+ return from_xyz_string(content)
@@ -0,0 +1,2 @@
1
+ """Molecule graph, conformations, and builders."""
2
+ from molbuilder.molecule.graph import Molecule, Atom, Bond, Hybridization