rdkit-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. rdkit_cli/__init__.py +4 -0
  2. rdkit_cli/__main__.py +6 -0
  3. rdkit_cli/cli.py +162 -0
  4. rdkit_cli/commands/__init__.py +1 -0
  5. rdkit_cli/commands/conformers.py +220 -0
  6. rdkit_cli/commands/convert.py +162 -0
  7. rdkit_cli/commands/depict.py +311 -0
  8. rdkit_cli/commands/descriptors.py +251 -0
  9. rdkit_cli/commands/diversity.py +232 -0
  10. rdkit_cli/commands/enumerate.py +229 -0
  11. rdkit_cli/commands/filter.py +384 -0
  12. rdkit_cli/commands/fingerprints.py +179 -0
  13. rdkit_cli/commands/fragment.py +284 -0
  14. rdkit_cli/commands/mcs.py +162 -0
  15. rdkit_cli/commands/reactions.py +191 -0
  16. rdkit_cli/commands/scaffold.py +243 -0
  17. rdkit_cli/commands/similarity.py +359 -0
  18. rdkit_cli/commands/standardize.py +138 -0
  19. rdkit_cli/core/__init__.py +1 -0
  20. rdkit_cli/core/conformers.py +197 -0
  21. rdkit_cli/core/depict.py +241 -0
  22. rdkit_cli/core/descriptors.py +248 -0
  23. rdkit_cli/core/diversity.py +174 -0
  24. rdkit_cli/core/enumerate.py +190 -0
  25. rdkit_cli/core/filters.py +443 -0
  26. rdkit_cli/core/fingerprints.py +265 -0
  27. rdkit_cli/core/fragment.py +237 -0
  28. rdkit_cli/core/mcs.py +128 -0
  29. rdkit_cli/core/reactions.py +159 -0
  30. rdkit_cli/core/scaffold.py +174 -0
  31. rdkit_cli/core/similarity.py +206 -0
  32. rdkit_cli/core/standardizer.py +141 -0
  33. rdkit_cli/io/__init__.py +7 -0
  34. rdkit_cli/io/formats.py +109 -0
  35. rdkit_cli/io/readers.py +352 -0
  36. rdkit_cli/io/writers.py +275 -0
  37. rdkit_cli/parallel/__init__.py +5 -0
  38. rdkit_cli/parallel/batch.py +181 -0
  39. rdkit_cli/parallel/executor.py +180 -0
  40. rdkit_cli/progress/__init__.py +5 -0
  41. rdkit_cli/progress/ninja.py +195 -0
  42. rdkit_cli/utils/__init__.py +1 -0
  43. rdkit_cli-0.1.0.dist-info/METADATA +380 -0
  44. rdkit_cli-0.1.0.dist-info/RECORD +47 -0
  45. rdkit_cli-0.1.0.dist-info/WHEEL +4 -0
  46. rdkit_cli-0.1.0.dist-info/entry_points.txt +2 -0
  47. rdkit_cli-0.1.0.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,138 @@
1
+ """Standardize command implementation."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ from rdkit_cli.cli import RdkitHelpFormatter, add_common_io_options, add_common_processing_options
7
+
8
+
9
+ def register_parser(subparsers):
10
+ """Register the standardize command and subcommands."""
11
+ parser = subparsers.add_parser(
12
+ "standardize",
13
+ help="Standardize and canonicalize molecules",
14
+ description="Apply standardization transforms to molecular structures.",
15
+ formatter_class=RdkitHelpFormatter,
16
+ )
17
+
18
+ add_common_io_options(parser)
19
+ add_common_processing_options(parser)
20
+
21
+ # Standardization options
22
+ parser.add_argument(
23
+ "--no-canonicalize",
24
+ action="store_true",
25
+ help="Don't canonicalize output SMILES",
26
+ )
27
+ parser.add_argument(
28
+ "--remove-stereo",
29
+ action="store_true",
30
+ help="Remove stereochemistry information",
31
+ )
32
+ parser.add_argument(
33
+ "--disconnect-metals",
34
+ action="store_true",
35
+ help="Disconnect metal atoms from molecules",
36
+ )
37
+ parser.add_argument(
38
+ "--normalize",
39
+ action="store_true",
40
+ help="Apply normalization transforms",
41
+ )
42
+ parser.add_argument(
43
+ "--reionize",
44
+ action="store_true",
45
+ help="Standardize ionization states",
46
+ )
47
+ parser.add_argument(
48
+ "--uncharge",
49
+ action="store_true",
50
+ help="Neutralize charges",
51
+ )
52
+ parser.add_argument(
53
+ "--fragment-parent",
54
+ action="store_true",
55
+ help="Keep only the largest fragment",
56
+ )
57
+ parser.add_argument(
58
+ "--tautomer-parent",
59
+ action="store_true",
60
+ help="Canonicalize tautomer form",
61
+ )
62
+ parser.add_argument(
63
+ "--cleanup",
64
+ action="store_true",
65
+ help="Apply standard cleanup (normalize + uncharge + fragment-parent)",
66
+ )
67
+ parser.add_argument(
68
+ "--include-original",
69
+ action="store_true",
70
+ help="Include original SMILES in output",
71
+ )
72
+
73
+ parser.set_defaults(func=run_standardize)
74
+
75
+
76
+ def run_standardize(args) -> int:
77
+ """Run the standardize command."""
78
+ # Lazy imports
79
+ from rdkit_cli.core.standardizer import MoleculeStandardizer
80
+ from rdkit_cli.io import create_reader, create_writer
81
+ from rdkit_cli.parallel.batch import process_molecules
82
+
83
+ # Handle --cleanup shortcut
84
+ normalize = args.normalize or args.cleanup
85
+ uncharge = args.uncharge or args.cleanup
86
+ fragment_parent = args.fragment_parent or args.cleanup
87
+
88
+ # Create standardizer
89
+ standardizer = MoleculeStandardizer(
90
+ canonicalize=not args.no_canonicalize,
91
+ remove_stereo=args.remove_stereo,
92
+ disconnect_metals=args.disconnect_metals,
93
+ normalize=normalize,
94
+ reionize=args.reionize,
95
+ uncharge=uncharge,
96
+ fragment_parent=fragment_parent,
97
+ tautomer_parent=args.tautomer_parent,
98
+ include_original=args.include_original,
99
+ )
100
+
101
+ # Create reader
102
+ input_path = Path(args.input)
103
+ if not input_path.exists():
104
+ print(f"Error: Input file not found: {input_path}", file=sys.stderr)
105
+ return 1
106
+
107
+ reader = create_reader(
108
+ input_path,
109
+ smiles_column=args.smiles_column,
110
+ name_column=args.name_column,
111
+ has_header=not args.no_header,
112
+ )
113
+
114
+ # Create writer
115
+ output_path = Path(args.output)
116
+ writer = create_writer(
117
+ output_path,
118
+ columns=standardizer.get_column_names(),
119
+ )
120
+
121
+ # Process
122
+ with reader, writer:
123
+ result = process_molecules(
124
+ reader=reader,
125
+ writer=writer,
126
+ processor=standardizer.standardize,
127
+ n_workers=args.ncpu,
128
+ quiet=args.quiet,
129
+ )
130
+
131
+ if not args.quiet:
132
+ print(
133
+ f"Processed {result.successful}/{result.total_processed} molecules "
134
+ f"({result.failed} failed) in {result.elapsed_time:.1f}s",
135
+ file=sys.stderr,
136
+ )
137
+
138
+ return 0 if result.failed == 0 else 1
@@ -0,0 +1 @@
1
+ """Core processing logic for rdkit-cli."""
@@ -0,0 +1,197 @@
1
+ """Conformer generation engine."""
2
+
3
+ from typing import Optional, Any
4
+
5
+ from rdkit import Chem
6
+ from rdkit.Chem import AllChem, rdDistGeom
7
+
8
+ from rdkit_cli.io.readers import MoleculeRecord
9
+
10
+
11
+ class ConformerGenerator:
12
+ """Generate 3D conformers for molecules."""
13
+
14
+ def __init__(
15
+ self,
16
+ num_conformers: int = 10,
17
+ method: str = "etkdgv3",
18
+ optimize: bool = True,
19
+ force_field: str = "mmff",
20
+ max_iterations: int = 200,
21
+ random_seed: int = 42,
22
+ ):
23
+ """
24
+ Initialize conformer generator.
25
+
26
+ Args:
27
+ num_conformers: Number of conformers to generate
28
+ method: Embedding method (etkdgv3, etkdgv2, etdg)
29
+ optimize: Whether to optimize conformers
30
+ force_field: Force field for optimization (mmff, uff)
31
+ max_iterations: Maximum optimization iterations
32
+ random_seed: Random seed for reproducibility
33
+ """
34
+ self.num_conformers = num_conformers
35
+ self.method = method.lower()
36
+ self.optimize = optimize
37
+ self.force_field = force_field.lower()
38
+ self.max_iterations = max_iterations
39
+ self.random_seed = random_seed
40
+
41
+ # Set up embedding parameters
42
+ if self.method == "etkdgv3":
43
+ self.params = rdDistGeom.ETKDGv3()
44
+ elif self.method == "etkdgv2":
45
+ self.params = rdDistGeom.ETKDGv2()
46
+ elif self.method == "etdg":
47
+ self.params = rdDistGeom.ETDG()
48
+ else:
49
+ raise ValueError(f"Unknown method: {method}")
50
+
51
+ self.params.randomSeed = random_seed
52
+ self.params.numThreads = 0 # Use all available threads
53
+
54
+ def generate(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
55
+ """
56
+ Generate conformers for a molecule.
57
+
58
+ Args:
59
+ record: MoleculeRecord to process
60
+
61
+ Returns:
62
+ Dictionary with molecule and conformer info, or None if failed
63
+ """
64
+ if record.mol is None:
65
+ return None
66
+
67
+ try:
68
+ # Add hydrogens
69
+ mol = Chem.AddHs(record.mol)
70
+
71
+ # Embed conformers
72
+ conf_ids = AllChem.EmbedMultipleConfs(
73
+ mol,
74
+ numConfs=self.num_conformers,
75
+ params=self.params,
76
+ )
77
+
78
+ if len(conf_ids) == 0:
79
+ return None
80
+
81
+ # Optimize if requested
82
+ energies = []
83
+ if self.optimize:
84
+ if self.force_field == "mmff":
85
+ results = AllChem.MMFFOptimizeMoleculeConfs(
86
+ mol,
87
+ maxIters=self.max_iterations,
88
+ numThreads=0,
89
+ )
90
+ energies = [r[1] for r in results]
91
+ elif self.force_field == "uff":
92
+ results = AllChem.UFFOptimizeMoleculeConfs(
93
+ mol,
94
+ maxIters=self.max_iterations,
95
+ numThreads=0,
96
+ )
97
+ energies = [r[1] for r in results]
98
+
99
+ # Get lowest energy conformer
100
+ if energies:
101
+ best_conf = min(range(len(energies)), key=lambda i: energies[i])
102
+ best_energy = energies[best_conf]
103
+ else:
104
+ best_conf = 0
105
+ best_energy = None
106
+
107
+ result: dict[str, Any] = {
108
+ "smiles": record.smiles,
109
+ "mol": mol,
110
+ "num_conformers": len(conf_ids),
111
+ "best_conformer": best_conf,
112
+ }
113
+
114
+ if best_energy is not None:
115
+ result["energy"] = round(best_energy, 2)
116
+
117
+ if record.name:
118
+ result["name"] = record.name
119
+
120
+ return result
121
+
122
+ except Exception:
123
+ return None
124
+
125
+
126
+ class ConformerOptimizer:
127
+ """Optimize existing 3D structures."""
128
+
129
+ def __init__(
130
+ self,
131
+ force_field: str = "mmff",
132
+ max_iterations: int = 200,
133
+ ):
134
+ """
135
+ Initialize conformer optimizer.
136
+
137
+ Args:
138
+ force_field: Force field (mmff, uff)
139
+ max_iterations: Maximum iterations
140
+ """
141
+ self.force_field = force_field.lower()
142
+ self.max_iterations = max_iterations
143
+
144
+ def optimize(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
145
+ """
146
+ Optimize a molecule's 3D structure.
147
+
148
+ Args:
149
+ record: MoleculeRecord with 3D coordinates
150
+
151
+ Returns:
152
+ Dictionary with optimized molecule, or None if failed
153
+ """
154
+ if record.mol is None:
155
+ return None
156
+
157
+ try:
158
+ mol = Chem.Mol(record.mol)
159
+
160
+ # Check if molecule has 3D coordinates
161
+ if mol.GetNumConformers() == 0:
162
+ # Try to generate 3D structure
163
+ mol = Chem.AddHs(mol)
164
+ AllChem.EmbedMolecule(mol, rdDistGeom.ETKDGv3())
165
+
166
+ if mol.GetNumConformers() == 0:
167
+ return None
168
+
169
+ # Optimize
170
+ if self.force_field == "mmff":
171
+ result = AllChem.MMFFOptimizeMolecule(mol, maxIters=self.max_iterations)
172
+ props = AllChem.MMFFGetMoleculeProperties(mol)
173
+ if props:
174
+ ff = AllChem.MMFFGetMoleculeForceField(mol, props)
175
+ energy = ff.CalcEnergy() if ff else None
176
+ else:
177
+ energy = None
178
+ else:
179
+ result = AllChem.UFFOptimizeMolecule(mol, maxIters=self.max_iterations)
180
+ ff = AllChem.UFFGetMoleculeForceField(mol)
181
+ energy = ff.CalcEnergy() if ff else None
182
+
183
+ output: dict[str, Any] = {
184
+ "smiles": Chem.MolToSmiles(Chem.RemoveHs(mol)),
185
+ "mol": mol,
186
+ }
187
+
188
+ if energy is not None:
189
+ output["energy"] = round(energy, 2)
190
+
191
+ if record.name:
192
+ output["name"] = record.name
193
+
194
+ return output
195
+
196
+ except Exception:
197
+ return None
@@ -0,0 +1,241 @@
1
+ """Molecular depiction/visualization engine."""
2
+
3
+ from typing import Optional, Any
4
+ from pathlib import Path
5
+
6
+ from rdkit import Chem
7
+ from rdkit.Chem import AllChem, Draw, rdDepictor
8
+ from rdkit.Chem.Draw import rdMolDraw2D
9
+
10
+ from rdkit_cli.io.readers import MoleculeRecord
11
+
12
+
13
+ class MoleculeDepiction:
14
+ """Generate 2D depictions of molecules."""
15
+
16
+ def __init__(
17
+ self,
18
+ width: int = 300,
19
+ height: int = 300,
20
+ image_format: str = "svg",
21
+ add_atom_indices: bool = False,
22
+ add_stereo_annotation: bool = False,
23
+ highlight_atoms: Optional[list[int]] = None,
24
+ highlight_bonds: Optional[list[int]] = None,
25
+ use_kekulize: bool = True,
26
+ wedge_bonds: bool = True,
27
+ add_chiral_hs: bool = True,
28
+ ):
29
+ """
30
+ Initialize molecule depiction.
31
+
32
+ Args:
33
+ width: Image width in pixels
34
+ height: Image height in pixels
35
+ image_format: Output format ('svg' or 'png')
36
+ add_atom_indices: Add atom index labels
37
+ add_stereo_annotation: Add stereo annotations
38
+ highlight_atoms: Atom indices to highlight
39
+ highlight_bonds: Bond indices to highlight
40
+ use_kekulize: Use Kekule form for drawing
41
+ wedge_bonds: Draw wedged bonds
42
+ add_chiral_hs: Add chiral Hs
43
+ """
44
+ self.width = width
45
+ self.height = height
46
+ self.image_format = image_format.lower()
47
+ self.add_atom_indices = add_atom_indices
48
+ self.add_stereo_annotation = add_stereo_annotation
49
+ self.highlight_atoms = highlight_atoms or []
50
+ self.highlight_bonds = highlight_bonds or []
51
+ self.use_kekulize = use_kekulize
52
+ self.wedge_bonds = wedge_bonds
53
+ self.add_chiral_hs = add_chiral_hs
54
+
55
+ def depict(self, mol: Chem.Mol) -> Optional[str]:
56
+ """
57
+ Generate depiction of a molecule.
58
+
59
+ Args:
60
+ mol: RDKit molecule
61
+
62
+ Returns:
63
+ SVG or PNG data as string/bytes
64
+ """
65
+ if mol is None:
66
+ return None
67
+
68
+ try:
69
+ # Prepare molecule
70
+ mol = Chem.Mol(mol) # Copy
71
+ if self.add_chiral_hs:
72
+ mol = Chem.AddHs(mol)
73
+ AllChem.EmbedMolecule(mol, AllChem.ETKDGv3())
74
+ mol = Chem.RemoveHs(mol)
75
+
76
+ # Generate 2D coords
77
+ rdDepictor.Compute2DCoords(mol)
78
+
79
+ # Create drawer
80
+ if self.image_format == "svg":
81
+ drawer = rdMolDraw2D.MolDraw2DSVG(self.width, self.height)
82
+ else:
83
+ drawer = rdMolDraw2D.MolDraw2DCairo(self.width, self.height)
84
+
85
+ # Configure options
86
+ opts = drawer.drawOptions()
87
+ opts.addAtomIndices = self.add_atom_indices
88
+ opts.addStereoAnnotation = self.add_stereo_annotation
89
+
90
+ # Draw
91
+ if self.highlight_atoms or self.highlight_bonds:
92
+ drawer.DrawMolecule(
93
+ mol,
94
+ highlightAtoms=self.highlight_atoms,
95
+ highlightBonds=self.highlight_bonds,
96
+ )
97
+ else:
98
+ drawer.DrawMolecule(mol)
99
+
100
+ drawer.FinishDrawing()
101
+
102
+ return drawer.GetDrawingText()
103
+
104
+ except Exception:
105
+ return None
106
+
107
+ def depict_record(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
108
+ """
109
+ Generate depiction of a molecule record.
110
+
111
+ Args:
112
+ record: MoleculeRecord to process
113
+
114
+ Returns:
115
+ Dictionary with image data or None
116
+ """
117
+ if record.mol is None:
118
+ return None
119
+
120
+ image_data = self.depict(record.mol)
121
+ if image_data is None:
122
+ return None
123
+
124
+ result: dict[str, Any] = {
125
+ "smiles": record.smiles,
126
+ "image": image_data,
127
+ }
128
+
129
+ if record.name:
130
+ result["name"] = record.name
131
+
132
+ return result
133
+
134
+
135
+ class GridDepiction:
136
+ """Generate grid of molecule depictions."""
137
+
138
+ def __init__(
139
+ self,
140
+ mols_per_row: int = 4,
141
+ mol_width: int = 200,
142
+ mol_height: int = 200,
143
+ legends: Optional[list[str]] = None,
144
+ use_svg: bool = True,
145
+ ):
146
+ """
147
+ Initialize grid depiction.
148
+
149
+ Args:
150
+ mols_per_row: Molecules per row
151
+ mol_width: Width per molecule
152
+ mol_height: Height per molecule
153
+ legends: List of labels for molecules
154
+ use_svg: Output SVG instead of PNG
155
+ """
156
+ self.mols_per_row = mols_per_row
157
+ self.mol_width = mol_width
158
+ self.mol_height = mol_height
159
+ self.legends = legends
160
+ self.use_svg = use_svg
161
+
162
+ def depict(self, mols: list[Chem.Mol]) -> Optional[str]:
163
+ """
164
+ Generate grid depiction.
165
+
166
+ Args:
167
+ mols: List of molecules
168
+
169
+ Returns:
170
+ SVG or PNG data
171
+ """
172
+ if not mols:
173
+ return None
174
+
175
+ try:
176
+ # Prepare molecules
177
+ prepared_mols = []
178
+ for mol in mols:
179
+ if mol is not None:
180
+ mol = Chem.Mol(mol)
181
+ rdDepictor.Compute2DCoords(mol)
182
+ prepared_mols.append(mol)
183
+ else:
184
+ prepared_mols.append(None)
185
+
186
+ legends = self.legends or [""] * len(prepared_mols)
187
+
188
+ if self.use_svg:
189
+ return Draw.MolsToGridImage(
190
+ prepared_mols,
191
+ molsPerRow=self.mols_per_row,
192
+ subImgSize=(self.mol_width, self.mol_height),
193
+ legends=legends[:len(prepared_mols)],
194
+ useSVG=True,
195
+ )
196
+ else:
197
+ img = Draw.MolsToGridImage(
198
+ prepared_mols,
199
+ molsPerRow=self.mols_per_row,
200
+ subImgSize=(self.mol_width, self.mol_height),
201
+ legends=legends[:len(prepared_mols)],
202
+ )
203
+ # Convert to bytes
204
+ import io
205
+ buf = io.BytesIO()
206
+ img.save(buf, format="PNG")
207
+ return buf.getvalue()
208
+
209
+ except Exception:
210
+ return None
211
+
212
+
213
+ def depict_smiles(
214
+ smiles: str,
215
+ width: int = 300,
216
+ height: int = 300,
217
+ image_format: str = "svg",
218
+ ) -> Optional[str]:
219
+ """
220
+ Convenience function to depict a SMILES string.
221
+
222
+ Args:
223
+ smiles: SMILES string
224
+ width: Image width
225
+ height: Image height
226
+ image_format: Output format
227
+
228
+ Returns:
229
+ Image data or None
230
+ """
231
+ mol = Chem.MolFromSmiles(smiles)
232
+ if mol is None:
233
+ return None
234
+
235
+ depictor = MoleculeDepiction(
236
+ width=width,
237
+ height=height,
238
+ image_format=image_format,
239
+ )
240
+
241
+ return depictor.depict(mol)