rdkit-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdkit_cli/__init__.py +4 -0
- rdkit_cli/__main__.py +6 -0
- rdkit_cli/cli.py +162 -0
- rdkit_cli/commands/__init__.py +1 -0
- rdkit_cli/commands/conformers.py +220 -0
- rdkit_cli/commands/convert.py +162 -0
- rdkit_cli/commands/depict.py +311 -0
- rdkit_cli/commands/descriptors.py +251 -0
- rdkit_cli/commands/diversity.py +232 -0
- rdkit_cli/commands/enumerate.py +229 -0
- rdkit_cli/commands/filter.py +384 -0
- rdkit_cli/commands/fingerprints.py +179 -0
- rdkit_cli/commands/fragment.py +284 -0
- rdkit_cli/commands/mcs.py +162 -0
- rdkit_cli/commands/reactions.py +191 -0
- rdkit_cli/commands/scaffold.py +243 -0
- rdkit_cli/commands/similarity.py +359 -0
- rdkit_cli/commands/standardize.py +138 -0
- rdkit_cli/core/__init__.py +1 -0
- rdkit_cli/core/conformers.py +197 -0
- rdkit_cli/core/depict.py +241 -0
- rdkit_cli/core/descriptors.py +248 -0
- rdkit_cli/core/diversity.py +174 -0
- rdkit_cli/core/enumerate.py +190 -0
- rdkit_cli/core/filters.py +443 -0
- rdkit_cli/core/fingerprints.py +265 -0
- rdkit_cli/core/fragment.py +237 -0
- rdkit_cli/core/mcs.py +128 -0
- rdkit_cli/core/reactions.py +159 -0
- rdkit_cli/core/scaffold.py +174 -0
- rdkit_cli/core/similarity.py +206 -0
- rdkit_cli/core/standardizer.py +141 -0
- rdkit_cli/io/__init__.py +7 -0
- rdkit_cli/io/formats.py +109 -0
- rdkit_cli/io/readers.py +352 -0
- rdkit_cli/io/writers.py +275 -0
- rdkit_cli/parallel/__init__.py +5 -0
- rdkit_cli/parallel/batch.py +181 -0
- rdkit_cli/parallel/executor.py +180 -0
- rdkit_cli/progress/__init__.py +5 -0
- rdkit_cli/progress/ninja.py +195 -0
- rdkit_cli/utils/__init__.py +1 -0
- rdkit_cli-0.1.0.dist-info/METADATA +380 -0
- rdkit_cli-0.1.0.dist-info/RECORD +47 -0
- rdkit_cli-0.1.0.dist-info/WHEEL +4 -0
- rdkit_cli-0.1.0.dist-info/entry_points.txt +2 -0
- rdkit_cli-0.1.0.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""Standardize command implementation."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from rdkit_cli.cli import RdkitHelpFormatter, add_common_io_options, add_common_processing_options
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def register_parser(subparsers):
|
|
10
|
+
"""Register the standardize command and subcommands."""
|
|
11
|
+
parser = subparsers.add_parser(
|
|
12
|
+
"standardize",
|
|
13
|
+
help="Standardize and canonicalize molecules",
|
|
14
|
+
description="Apply standardization transforms to molecular structures.",
|
|
15
|
+
formatter_class=RdkitHelpFormatter,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
add_common_io_options(parser)
|
|
19
|
+
add_common_processing_options(parser)
|
|
20
|
+
|
|
21
|
+
# Standardization options
|
|
22
|
+
parser.add_argument(
|
|
23
|
+
"--no-canonicalize",
|
|
24
|
+
action="store_true",
|
|
25
|
+
help="Don't canonicalize output SMILES",
|
|
26
|
+
)
|
|
27
|
+
parser.add_argument(
|
|
28
|
+
"--remove-stereo",
|
|
29
|
+
action="store_true",
|
|
30
|
+
help="Remove stereochemistry information",
|
|
31
|
+
)
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"--disconnect-metals",
|
|
34
|
+
action="store_true",
|
|
35
|
+
help="Disconnect metal atoms from molecules",
|
|
36
|
+
)
|
|
37
|
+
parser.add_argument(
|
|
38
|
+
"--normalize",
|
|
39
|
+
action="store_true",
|
|
40
|
+
help="Apply normalization transforms",
|
|
41
|
+
)
|
|
42
|
+
parser.add_argument(
|
|
43
|
+
"--reionize",
|
|
44
|
+
action="store_true",
|
|
45
|
+
help="Standardize ionization states",
|
|
46
|
+
)
|
|
47
|
+
parser.add_argument(
|
|
48
|
+
"--uncharge",
|
|
49
|
+
action="store_true",
|
|
50
|
+
help="Neutralize charges",
|
|
51
|
+
)
|
|
52
|
+
parser.add_argument(
|
|
53
|
+
"--fragment-parent",
|
|
54
|
+
action="store_true",
|
|
55
|
+
help="Keep only the largest fragment",
|
|
56
|
+
)
|
|
57
|
+
parser.add_argument(
|
|
58
|
+
"--tautomer-parent",
|
|
59
|
+
action="store_true",
|
|
60
|
+
help="Canonicalize tautomer form",
|
|
61
|
+
)
|
|
62
|
+
parser.add_argument(
|
|
63
|
+
"--cleanup",
|
|
64
|
+
action="store_true",
|
|
65
|
+
help="Apply standard cleanup (normalize + uncharge + fragment-parent)",
|
|
66
|
+
)
|
|
67
|
+
parser.add_argument(
|
|
68
|
+
"--include-original",
|
|
69
|
+
action="store_true",
|
|
70
|
+
help="Include original SMILES in output",
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
parser.set_defaults(func=run_standardize)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def run_standardize(args) -> int:
|
|
77
|
+
"""Run the standardize command."""
|
|
78
|
+
# Lazy imports
|
|
79
|
+
from rdkit_cli.core.standardizer import MoleculeStandardizer
|
|
80
|
+
from rdkit_cli.io import create_reader, create_writer
|
|
81
|
+
from rdkit_cli.parallel.batch import process_molecules
|
|
82
|
+
|
|
83
|
+
# Handle --cleanup shortcut
|
|
84
|
+
normalize = args.normalize or args.cleanup
|
|
85
|
+
uncharge = args.uncharge or args.cleanup
|
|
86
|
+
fragment_parent = args.fragment_parent or args.cleanup
|
|
87
|
+
|
|
88
|
+
# Create standardizer
|
|
89
|
+
standardizer = MoleculeStandardizer(
|
|
90
|
+
canonicalize=not args.no_canonicalize,
|
|
91
|
+
remove_stereo=args.remove_stereo,
|
|
92
|
+
disconnect_metals=args.disconnect_metals,
|
|
93
|
+
normalize=normalize,
|
|
94
|
+
reionize=args.reionize,
|
|
95
|
+
uncharge=uncharge,
|
|
96
|
+
fragment_parent=fragment_parent,
|
|
97
|
+
tautomer_parent=args.tautomer_parent,
|
|
98
|
+
include_original=args.include_original,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# Create reader
|
|
102
|
+
input_path = Path(args.input)
|
|
103
|
+
if not input_path.exists():
|
|
104
|
+
print(f"Error: Input file not found: {input_path}", file=sys.stderr)
|
|
105
|
+
return 1
|
|
106
|
+
|
|
107
|
+
reader = create_reader(
|
|
108
|
+
input_path,
|
|
109
|
+
smiles_column=args.smiles_column,
|
|
110
|
+
name_column=args.name_column,
|
|
111
|
+
has_header=not args.no_header,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# Create writer
|
|
115
|
+
output_path = Path(args.output)
|
|
116
|
+
writer = create_writer(
|
|
117
|
+
output_path,
|
|
118
|
+
columns=standardizer.get_column_names(),
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# Process
|
|
122
|
+
with reader, writer:
|
|
123
|
+
result = process_molecules(
|
|
124
|
+
reader=reader,
|
|
125
|
+
writer=writer,
|
|
126
|
+
processor=standardizer.standardize,
|
|
127
|
+
n_workers=args.ncpu,
|
|
128
|
+
quiet=args.quiet,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
if not args.quiet:
|
|
132
|
+
print(
|
|
133
|
+
f"Processed {result.successful}/{result.total_processed} molecules "
|
|
134
|
+
f"({result.failed} failed) in {result.elapsed_time:.1f}s",
|
|
135
|
+
file=sys.stderr,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
return 0 if result.failed == 0 else 1
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Core processing logic for rdkit-cli."""
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""Conformer generation engine."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional, Any
|
|
4
|
+
|
|
5
|
+
from rdkit import Chem
|
|
6
|
+
from rdkit.Chem import AllChem, rdDistGeom
|
|
7
|
+
|
|
8
|
+
from rdkit_cli.io.readers import MoleculeRecord
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ConformerGenerator:
|
|
12
|
+
"""Generate 3D conformers for molecules."""
|
|
13
|
+
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
num_conformers: int = 10,
|
|
17
|
+
method: str = "etkdgv3",
|
|
18
|
+
optimize: bool = True,
|
|
19
|
+
force_field: str = "mmff",
|
|
20
|
+
max_iterations: int = 200,
|
|
21
|
+
random_seed: int = 42,
|
|
22
|
+
):
|
|
23
|
+
"""
|
|
24
|
+
Initialize conformer generator.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
num_conformers: Number of conformers to generate
|
|
28
|
+
method: Embedding method (etkdgv3, etkdgv2, etdg)
|
|
29
|
+
optimize: Whether to optimize conformers
|
|
30
|
+
force_field: Force field for optimization (mmff, uff)
|
|
31
|
+
max_iterations: Maximum optimization iterations
|
|
32
|
+
random_seed: Random seed for reproducibility
|
|
33
|
+
"""
|
|
34
|
+
self.num_conformers = num_conformers
|
|
35
|
+
self.method = method.lower()
|
|
36
|
+
self.optimize = optimize
|
|
37
|
+
self.force_field = force_field.lower()
|
|
38
|
+
self.max_iterations = max_iterations
|
|
39
|
+
self.random_seed = random_seed
|
|
40
|
+
|
|
41
|
+
# Set up embedding parameters
|
|
42
|
+
if self.method == "etkdgv3":
|
|
43
|
+
self.params = rdDistGeom.ETKDGv3()
|
|
44
|
+
elif self.method == "etkdgv2":
|
|
45
|
+
self.params = rdDistGeom.ETKDGv2()
|
|
46
|
+
elif self.method == "etdg":
|
|
47
|
+
self.params = rdDistGeom.ETDG()
|
|
48
|
+
else:
|
|
49
|
+
raise ValueError(f"Unknown method: {method}")
|
|
50
|
+
|
|
51
|
+
self.params.randomSeed = random_seed
|
|
52
|
+
self.params.numThreads = 0 # Use all available threads
|
|
53
|
+
|
|
54
|
+
def generate(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
|
|
55
|
+
"""
|
|
56
|
+
Generate conformers for a molecule.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
record: MoleculeRecord to process
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Dictionary with molecule and conformer info, or None if failed
|
|
63
|
+
"""
|
|
64
|
+
if record.mol is None:
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
# Add hydrogens
|
|
69
|
+
mol = Chem.AddHs(record.mol)
|
|
70
|
+
|
|
71
|
+
# Embed conformers
|
|
72
|
+
conf_ids = AllChem.EmbedMultipleConfs(
|
|
73
|
+
mol,
|
|
74
|
+
numConfs=self.num_conformers,
|
|
75
|
+
params=self.params,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
if len(conf_ids) == 0:
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
# Optimize if requested
|
|
82
|
+
energies = []
|
|
83
|
+
if self.optimize:
|
|
84
|
+
if self.force_field == "mmff":
|
|
85
|
+
results = AllChem.MMFFOptimizeMoleculeConfs(
|
|
86
|
+
mol,
|
|
87
|
+
maxIters=self.max_iterations,
|
|
88
|
+
numThreads=0,
|
|
89
|
+
)
|
|
90
|
+
energies = [r[1] for r in results]
|
|
91
|
+
elif self.force_field == "uff":
|
|
92
|
+
results = AllChem.UFFOptimizeMoleculeConfs(
|
|
93
|
+
mol,
|
|
94
|
+
maxIters=self.max_iterations,
|
|
95
|
+
numThreads=0,
|
|
96
|
+
)
|
|
97
|
+
energies = [r[1] for r in results]
|
|
98
|
+
|
|
99
|
+
# Get lowest energy conformer
|
|
100
|
+
if energies:
|
|
101
|
+
best_conf = min(range(len(energies)), key=lambda i: energies[i])
|
|
102
|
+
best_energy = energies[best_conf]
|
|
103
|
+
else:
|
|
104
|
+
best_conf = 0
|
|
105
|
+
best_energy = None
|
|
106
|
+
|
|
107
|
+
result: dict[str, Any] = {
|
|
108
|
+
"smiles": record.smiles,
|
|
109
|
+
"mol": mol,
|
|
110
|
+
"num_conformers": len(conf_ids),
|
|
111
|
+
"best_conformer": best_conf,
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if best_energy is not None:
|
|
115
|
+
result["energy"] = round(best_energy, 2)
|
|
116
|
+
|
|
117
|
+
if record.name:
|
|
118
|
+
result["name"] = record.name
|
|
119
|
+
|
|
120
|
+
return result
|
|
121
|
+
|
|
122
|
+
except Exception:
|
|
123
|
+
return None
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class ConformerOptimizer:
|
|
127
|
+
"""Optimize existing 3D structures."""
|
|
128
|
+
|
|
129
|
+
def __init__(
|
|
130
|
+
self,
|
|
131
|
+
force_field: str = "mmff",
|
|
132
|
+
max_iterations: int = 200,
|
|
133
|
+
):
|
|
134
|
+
"""
|
|
135
|
+
Initialize conformer optimizer.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
force_field: Force field (mmff, uff)
|
|
139
|
+
max_iterations: Maximum iterations
|
|
140
|
+
"""
|
|
141
|
+
self.force_field = force_field.lower()
|
|
142
|
+
self.max_iterations = max_iterations
|
|
143
|
+
|
|
144
|
+
def optimize(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
|
|
145
|
+
"""
|
|
146
|
+
Optimize a molecule's 3D structure.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
record: MoleculeRecord with 3D coordinates
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Dictionary with optimized molecule, or None if failed
|
|
153
|
+
"""
|
|
154
|
+
if record.mol is None:
|
|
155
|
+
return None
|
|
156
|
+
|
|
157
|
+
try:
|
|
158
|
+
mol = Chem.Mol(record.mol)
|
|
159
|
+
|
|
160
|
+
# Check if molecule has 3D coordinates
|
|
161
|
+
if mol.GetNumConformers() == 0:
|
|
162
|
+
# Try to generate 3D structure
|
|
163
|
+
mol = Chem.AddHs(mol)
|
|
164
|
+
AllChem.EmbedMolecule(mol, rdDistGeom.ETKDGv3())
|
|
165
|
+
|
|
166
|
+
if mol.GetNumConformers() == 0:
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
# Optimize
|
|
170
|
+
if self.force_field == "mmff":
|
|
171
|
+
result = AllChem.MMFFOptimizeMolecule(mol, maxIters=self.max_iterations)
|
|
172
|
+
props = AllChem.MMFFGetMoleculeProperties(mol)
|
|
173
|
+
if props:
|
|
174
|
+
ff = AllChem.MMFFGetMoleculeForceField(mol, props)
|
|
175
|
+
energy = ff.CalcEnergy() if ff else None
|
|
176
|
+
else:
|
|
177
|
+
energy = None
|
|
178
|
+
else:
|
|
179
|
+
result = AllChem.UFFOptimizeMolecule(mol, maxIters=self.max_iterations)
|
|
180
|
+
ff = AllChem.UFFGetMoleculeForceField(mol)
|
|
181
|
+
energy = ff.CalcEnergy() if ff else None
|
|
182
|
+
|
|
183
|
+
output: dict[str, Any] = {
|
|
184
|
+
"smiles": Chem.MolToSmiles(Chem.RemoveHs(mol)),
|
|
185
|
+
"mol": mol,
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
if energy is not None:
|
|
189
|
+
output["energy"] = round(energy, 2)
|
|
190
|
+
|
|
191
|
+
if record.name:
|
|
192
|
+
output["name"] = record.name
|
|
193
|
+
|
|
194
|
+
return output
|
|
195
|
+
|
|
196
|
+
except Exception:
|
|
197
|
+
return None
|
rdkit_cli/core/depict.py
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"""Molecular depiction/visualization engine."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional, Any
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from rdkit import Chem
|
|
7
|
+
from rdkit.Chem import AllChem, Draw, rdDepictor
|
|
8
|
+
from rdkit.Chem.Draw import rdMolDraw2D
|
|
9
|
+
|
|
10
|
+
from rdkit_cli.io.readers import MoleculeRecord
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class MoleculeDepiction:
|
|
14
|
+
"""Generate 2D depictions of molecules."""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
width: int = 300,
|
|
19
|
+
height: int = 300,
|
|
20
|
+
image_format: str = "svg",
|
|
21
|
+
add_atom_indices: bool = False,
|
|
22
|
+
add_stereo_annotation: bool = False,
|
|
23
|
+
highlight_atoms: Optional[list[int]] = None,
|
|
24
|
+
highlight_bonds: Optional[list[int]] = None,
|
|
25
|
+
use_kekulize: bool = True,
|
|
26
|
+
wedge_bonds: bool = True,
|
|
27
|
+
add_chiral_hs: bool = True,
|
|
28
|
+
):
|
|
29
|
+
"""
|
|
30
|
+
Initialize molecule depiction.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
width: Image width in pixels
|
|
34
|
+
height: Image height in pixels
|
|
35
|
+
image_format: Output format ('svg' or 'png')
|
|
36
|
+
add_atom_indices: Add atom index labels
|
|
37
|
+
add_stereo_annotation: Add stereo annotations
|
|
38
|
+
highlight_atoms: Atom indices to highlight
|
|
39
|
+
highlight_bonds: Bond indices to highlight
|
|
40
|
+
use_kekulize: Use Kekule form for drawing
|
|
41
|
+
wedge_bonds: Draw wedged bonds
|
|
42
|
+
add_chiral_hs: Add chiral Hs
|
|
43
|
+
"""
|
|
44
|
+
self.width = width
|
|
45
|
+
self.height = height
|
|
46
|
+
self.image_format = image_format.lower()
|
|
47
|
+
self.add_atom_indices = add_atom_indices
|
|
48
|
+
self.add_stereo_annotation = add_stereo_annotation
|
|
49
|
+
self.highlight_atoms = highlight_atoms or []
|
|
50
|
+
self.highlight_bonds = highlight_bonds or []
|
|
51
|
+
self.use_kekulize = use_kekulize
|
|
52
|
+
self.wedge_bonds = wedge_bonds
|
|
53
|
+
self.add_chiral_hs = add_chiral_hs
|
|
54
|
+
|
|
55
|
+
def depict(self, mol: Chem.Mol) -> Optional[str]:
|
|
56
|
+
"""
|
|
57
|
+
Generate depiction of a molecule.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
mol: RDKit molecule
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
SVG or PNG data as string/bytes
|
|
64
|
+
"""
|
|
65
|
+
if mol is None:
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
# Prepare molecule
|
|
70
|
+
mol = Chem.Mol(mol) # Copy
|
|
71
|
+
if self.add_chiral_hs:
|
|
72
|
+
mol = Chem.AddHs(mol)
|
|
73
|
+
AllChem.EmbedMolecule(mol, AllChem.ETKDGv3())
|
|
74
|
+
mol = Chem.RemoveHs(mol)
|
|
75
|
+
|
|
76
|
+
# Generate 2D coords
|
|
77
|
+
rdDepictor.Compute2DCoords(mol)
|
|
78
|
+
|
|
79
|
+
# Create drawer
|
|
80
|
+
if self.image_format == "svg":
|
|
81
|
+
drawer = rdMolDraw2D.MolDraw2DSVG(self.width, self.height)
|
|
82
|
+
else:
|
|
83
|
+
drawer = rdMolDraw2D.MolDraw2DCairo(self.width, self.height)
|
|
84
|
+
|
|
85
|
+
# Configure options
|
|
86
|
+
opts = drawer.drawOptions()
|
|
87
|
+
opts.addAtomIndices = self.add_atom_indices
|
|
88
|
+
opts.addStereoAnnotation = self.add_stereo_annotation
|
|
89
|
+
|
|
90
|
+
# Draw
|
|
91
|
+
if self.highlight_atoms or self.highlight_bonds:
|
|
92
|
+
drawer.DrawMolecule(
|
|
93
|
+
mol,
|
|
94
|
+
highlightAtoms=self.highlight_atoms,
|
|
95
|
+
highlightBonds=self.highlight_bonds,
|
|
96
|
+
)
|
|
97
|
+
else:
|
|
98
|
+
drawer.DrawMolecule(mol)
|
|
99
|
+
|
|
100
|
+
drawer.FinishDrawing()
|
|
101
|
+
|
|
102
|
+
return drawer.GetDrawingText()
|
|
103
|
+
|
|
104
|
+
except Exception:
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
def depict_record(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
|
|
108
|
+
"""
|
|
109
|
+
Generate depiction of a molecule record.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
record: MoleculeRecord to process
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
Dictionary with image data or None
|
|
116
|
+
"""
|
|
117
|
+
if record.mol is None:
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
image_data = self.depict(record.mol)
|
|
121
|
+
if image_data is None:
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
result: dict[str, Any] = {
|
|
125
|
+
"smiles": record.smiles,
|
|
126
|
+
"image": image_data,
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if record.name:
|
|
130
|
+
result["name"] = record.name
|
|
131
|
+
|
|
132
|
+
return result
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class GridDepiction:
|
|
136
|
+
"""Generate grid of molecule depictions."""
|
|
137
|
+
|
|
138
|
+
def __init__(
|
|
139
|
+
self,
|
|
140
|
+
mols_per_row: int = 4,
|
|
141
|
+
mol_width: int = 200,
|
|
142
|
+
mol_height: int = 200,
|
|
143
|
+
legends: Optional[list[str]] = None,
|
|
144
|
+
use_svg: bool = True,
|
|
145
|
+
):
|
|
146
|
+
"""
|
|
147
|
+
Initialize grid depiction.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
mols_per_row: Molecules per row
|
|
151
|
+
mol_width: Width per molecule
|
|
152
|
+
mol_height: Height per molecule
|
|
153
|
+
legends: List of labels for molecules
|
|
154
|
+
use_svg: Output SVG instead of PNG
|
|
155
|
+
"""
|
|
156
|
+
self.mols_per_row = mols_per_row
|
|
157
|
+
self.mol_width = mol_width
|
|
158
|
+
self.mol_height = mol_height
|
|
159
|
+
self.legends = legends
|
|
160
|
+
self.use_svg = use_svg
|
|
161
|
+
|
|
162
|
+
def depict(self, mols: list[Chem.Mol]) -> Optional[str]:
|
|
163
|
+
"""
|
|
164
|
+
Generate grid depiction.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
mols: List of molecules
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
SVG or PNG data
|
|
171
|
+
"""
|
|
172
|
+
if not mols:
|
|
173
|
+
return None
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
# Prepare molecules
|
|
177
|
+
prepared_mols = []
|
|
178
|
+
for mol in mols:
|
|
179
|
+
if mol is not None:
|
|
180
|
+
mol = Chem.Mol(mol)
|
|
181
|
+
rdDepictor.Compute2DCoords(mol)
|
|
182
|
+
prepared_mols.append(mol)
|
|
183
|
+
else:
|
|
184
|
+
prepared_mols.append(None)
|
|
185
|
+
|
|
186
|
+
legends = self.legends or [""] * len(prepared_mols)
|
|
187
|
+
|
|
188
|
+
if self.use_svg:
|
|
189
|
+
return Draw.MolsToGridImage(
|
|
190
|
+
prepared_mols,
|
|
191
|
+
molsPerRow=self.mols_per_row,
|
|
192
|
+
subImgSize=(self.mol_width, self.mol_height),
|
|
193
|
+
legends=legends[:len(prepared_mols)],
|
|
194
|
+
useSVG=True,
|
|
195
|
+
)
|
|
196
|
+
else:
|
|
197
|
+
img = Draw.MolsToGridImage(
|
|
198
|
+
prepared_mols,
|
|
199
|
+
molsPerRow=self.mols_per_row,
|
|
200
|
+
subImgSize=(self.mol_width, self.mol_height),
|
|
201
|
+
legends=legends[:len(prepared_mols)],
|
|
202
|
+
)
|
|
203
|
+
# Convert to bytes
|
|
204
|
+
import io
|
|
205
|
+
buf = io.BytesIO()
|
|
206
|
+
img.save(buf, format="PNG")
|
|
207
|
+
return buf.getvalue()
|
|
208
|
+
|
|
209
|
+
except Exception:
|
|
210
|
+
return None
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def depict_smiles(
|
|
214
|
+
smiles: str,
|
|
215
|
+
width: int = 300,
|
|
216
|
+
height: int = 300,
|
|
217
|
+
image_format: str = "svg",
|
|
218
|
+
) -> Optional[str]:
|
|
219
|
+
"""
|
|
220
|
+
Convenience function to depict a SMILES string.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
smiles: SMILES string
|
|
224
|
+
width: Image width
|
|
225
|
+
height: Image height
|
|
226
|
+
image_format: Output format
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
Image data or None
|
|
230
|
+
"""
|
|
231
|
+
mol = Chem.MolFromSmiles(smiles)
|
|
232
|
+
if mol is None:
|
|
233
|
+
return None
|
|
234
|
+
|
|
235
|
+
depictor = MoleculeDepiction(
|
|
236
|
+
width=width,
|
|
237
|
+
height=height,
|
|
238
|
+
image_format=image_format,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
return depictor.depict(mol)
|