rdkit-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. rdkit_cli/__init__.py +4 -0
  2. rdkit_cli/__main__.py +6 -0
  3. rdkit_cli/cli.py +162 -0
  4. rdkit_cli/commands/__init__.py +1 -0
  5. rdkit_cli/commands/conformers.py +220 -0
  6. rdkit_cli/commands/convert.py +162 -0
  7. rdkit_cli/commands/depict.py +311 -0
  8. rdkit_cli/commands/descriptors.py +251 -0
  9. rdkit_cli/commands/diversity.py +232 -0
  10. rdkit_cli/commands/enumerate.py +229 -0
  11. rdkit_cli/commands/filter.py +384 -0
  12. rdkit_cli/commands/fingerprints.py +179 -0
  13. rdkit_cli/commands/fragment.py +284 -0
  14. rdkit_cli/commands/mcs.py +162 -0
  15. rdkit_cli/commands/reactions.py +191 -0
  16. rdkit_cli/commands/scaffold.py +243 -0
  17. rdkit_cli/commands/similarity.py +359 -0
  18. rdkit_cli/commands/standardize.py +138 -0
  19. rdkit_cli/core/__init__.py +1 -0
  20. rdkit_cli/core/conformers.py +197 -0
  21. rdkit_cli/core/depict.py +241 -0
  22. rdkit_cli/core/descriptors.py +248 -0
  23. rdkit_cli/core/diversity.py +174 -0
  24. rdkit_cli/core/enumerate.py +190 -0
  25. rdkit_cli/core/filters.py +443 -0
  26. rdkit_cli/core/fingerprints.py +265 -0
  27. rdkit_cli/core/fragment.py +237 -0
  28. rdkit_cli/core/mcs.py +128 -0
  29. rdkit_cli/core/reactions.py +159 -0
  30. rdkit_cli/core/scaffold.py +174 -0
  31. rdkit_cli/core/similarity.py +206 -0
  32. rdkit_cli/core/standardizer.py +141 -0
  33. rdkit_cli/io/__init__.py +7 -0
  34. rdkit_cli/io/formats.py +109 -0
  35. rdkit_cli/io/readers.py +352 -0
  36. rdkit_cli/io/writers.py +275 -0
  37. rdkit_cli/parallel/__init__.py +5 -0
  38. rdkit_cli/parallel/batch.py +181 -0
  39. rdkit_cli/parallel/executor.py +180 -0
  40. rdkit_cli/progress/__init__.py +5 -0
  41. rdkit_cli/progress/ninja.py +195 -0
  42. rdkit_cli/utils/__init__.py +1 -0
  43. rdkit_cli-0.1.0.dist-info/METADATA +380 -0
  44. rdkit_cli-0.1.0.dist-info/RECORD +47 -0
  45. rdkit_cli-0.1.0.dist-info/WHEEL +4 -0
  46. rdkit_cli-0.1.0.dist-info/entry_points.txt +2 -0
  47. rdkit_cli-0.1.0.dist-info/licenses/LICENSE +190 -0
rdkit_cli/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ """rdkit-cli: A comprehensive CLI tool for RDKit cheminformatics operations."""
2
+
3
+ __version__ = "0.1.0"
4
+ __author__ = "Vitruves"
rdkit_cli/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Entry point for python -m rdkit_cli."""
2
+
3
+ from rdkit_cli.cli import main
4
+
5
+ if __name__ == "__main__":
6
+ raise SystemExit(main())
rdkit_cli/cli.py ADDED
@@ -0,0 +1,162 @@
1
+ """Main CLI entry point for rdkit-cli."""
2
+
3
+ import argparse
4
+ import sys
5
+ from typing import Optional
6
+
7
+ from rich_argparse import RichHelpFormatter
8
+
9
+ from rdkit_cli import __version__
10
+
11
+
12
+ class RdkitHelpFormatter(RichHelpFormatter):
13
+ """Custom formatter with adjusted styles and command-first ordering."""
14
+
15
+ styles = {
16
+ **RichHelpFormatter.styles,
17
+ "argparse.args": "cyan",
18
+ "argparse.groups": "bold yellow",
19
+ "argparse.metavar": "green",
20
+ "argparse.prog": "bold magenta",
21
+ }
22
+
23
+
24
+ def add_common_io_options(parser: argparse.ArgumentParser):
25
+ """Add common I/O options to a parser."""
26
+ parser.add_argument(
27
+ "-i", "--input",
28
+ required=True,
29
+ metavar="FILE",
30
+ help="Input file (CSV, TSV, SMI, SDF, or Parquet)",
31
+ )
32
+ parser.add_argument(
33
+ "-o", "--output",
34
+ required=True,
35
+ metavar="FILE",
36
+ help="Output file",
37
+ )
38
+
39
+
40
+ def add_common_processing_options(parser: argparse.ArgumentParser):
41
+ """Add common processing options to a parser."""
42
+ parser.add_argument(
43
+ "-n", "--ncpu",
44
+ type=int,
45
+ default=-1,
46
+ metavar="N",
47
+ help="Number of CPU cores (-1 for all, default: -1)",
48
+ )
49
+ parser.add_argument(
50
+ "--smiles-column",
51
+ default="smiles",
52
+ metavar="COL",
53
+ help="Name of SMILES column (default: smiles)",
54
+ )
55
+ parser.add_argument(
56
+ "--name-column",
57
+ default=None,
58
+ metavar="COL",
59
+ help="Name of molecule name column",
60
+ )
61
+ parser.add_argument(
62
+ "--no-header",
63
+ action="store_true",
64
+ help="Input file has no header row",
65
+ )
66
+ parser.add_argument(
67
+ "-q", "--quiet",
68
+ action="store_true",
69
+ help="Suppress progress output",
70
+ )
71
+
72
+
73
+ def create_parser() -> argparse.ArgumentParser:
74
+ """Create the main argument parser."""
75
+ parser = argparse.ArgumentParser(
76
+ prog="rdkit-cli",
77
+ description="A comprehensive CLI tool for RDKit cheminformatics operations.",
78
+ epilog="Use 'rdkit-cli <command> --help' for command-specific help.",
79
+ formatter_class=RdkitHelpFormatter,
80
+ )
81
+
82
+ # Version
83
+ parser.add_argument(
84
+ "-V", "--version",
85
+ action="version",
86
+ version=f"rdkit-cli {__version__}",
87
+ )
88
+
89
+ # Create subparsers for commands
90
+ subparsers = parser.add_subparsers(
91
+ title="Commands",
92
+ dest="command",
93
+ metavar="<command>",
94
+ )
95
+
96
+ # Register all command modules
97
+ _register_commands(subparsers)
98
+
99
+ return parser
100
+
101
+
102
+ def _register_commands(subparsers):
103
+ """Register all command subparsers."""
104
+ from rdkit_cli.commands import (
105
+ descriptors,
106
+ fingerprints,
107
+ filter,
108
+ convert,
109
+ standardize,
110
+ similarity,
111
+ conformers,
112
+ reactions,
113
+ scaffold,
114
+ enumerate,
115
+ fragment,
116
+ diversity,
117
+ mcs,
118
+ depict,
119
+ )
120
+
121
+ # Each module has a register_parser(subparsers) function
122
+ descriptors.register_parser(subparsers)
123
+ fingerprints.register_parser(subparsers)
124
+ filter.register_parser(subparsers)
125
+ convert.register_parser(subparsers)
126
+ standardize.register_parser(subparsers)
127
+ similarity.register_parser(subparsers)
128
+ conformers.register_parser(subparsers)
129
+ reactions.register_parser(subparsers)
130
+ scaffold.register_parser(subparsers)
131
+ enumerate.register_parser(subparsers)
132
+ fragment.register_parser(subparsers)
133
+ diversity.register_parser(subparsers)
134
+ mcs.register_parser(subparsers)
135
+ depict.register_parser(subparsers)
136
+
137
+
138
+ def main(args: Optional[list[str]] = None) -> int:
139
+ """Main entry point."""
140
+ parser = create_parser()
141
+ parsed_args = parser.parse_args(args)
142
+
143
+ if parsed_args.command is None:
144
+ parser.print_help()
145
+ return 1
146
+
147
+ # Each command has a run(args) function via set_defaults(func=...)
148
+ try:
149
+ return parsed_args.func(parsed_args)
150
+ except KeyboardInterrupt:
151
+ sys.stderr.write("\nInterrupted.\n")
152
+ return 130
153
+ except BrokenPipeError:
154
+ # Handle broken pipe gracefully (e.g., piping to head)
155
+ return 0
156
+ except Exception as e:
157
+ sys.stderr.write(f"Error: {e}\n")
158
+ return 1
159
+
160
+
161
+ if __name__ == "__main__":
162
+ sys.exit(main())
@@ -0,0 +1 @@
1
+ """CLI command implementations."""
@@ -0,0 +1,220 @@
1
+ """Conformers command implementation."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ from rdkit_cli.cli import RdkitHelpFormatter, add_common_io_options, add_common_processing_options
7
+
8
+
9
+ def register_parser(subparsers):
10
+ """Register the conformers command and subcommands."""
11
+ parser = subparsers.add_parser(
12
+ "conformers",
13
+ help="Generate and optimize 3D conformers",
14
+ description="Generate and optimize 3D molecular conformers.",
15
+ formatter_class=RdkitHelpFormatter,
16
+ )
17
+
18
+ conf_subparsers = parser.add_subparsers(
19
+ title="Subcommands",
20
+ dest="subcommand",
21
+ metavar="<subcommand>",
22
+ )
23
+
24
+ # conformers generate
25
+ gen_parser = conf_subparsers.add_parser(
26
+ "generate",
27
+ help="Generate 3D conformers",
28
+ formatter_class=RdkitHelpFormatter,
29
+ )
30
+ add_common_io_options(gen_parser)
31
+ add_common_processing_options(gen_parser)
32
+ gen_parser.add_argument(
33
+ "--num",
34
+ type=int,
35
+ default=10,
36
+ metavar="N",
37
+ help="Number of conformers to generate (default: 10)",
38
+ )
39
+ gen_parser.add_argument(
40
+ "-m", "--method",
41
+ choices=["etkdgv3", "etkdgv2", "etdg"],
42
+ default="etkdgv3",
43
+ help="Embedding method (default: etkdgv3)",
44
+ )
45
+ gen_parser.add_argument(
46
+ "--no-optimize",
47
+ action="store_true",
48
+ help="Skip force field optimization",
49
+ )
50
+ gen_parser.add_argument(
51
+ "-f", "--force-field",
52
+ choices=["mmff", "uff"],
53
+ default="mmff",
54
+ help="Force field for optimization (default: mmff)",
55
+ )
56
+ gen_parser.add_argument(
57
+ "--seed",
58
+ type=int,
59
+ default=42,
60
+ help="Random seed (default: 42)",
61
+ )
62
+ gen_parser.add_argument(
63
+ "--prune-rms",
64
+ type=float,
65
+ default=0.5,
66
+ metavar="THRESH",
67
+ help="RMSD threshold for pruning similar conformers (default: 0.5)",
68
+ )
69
+ gen_parser.add_argument(
70
+ "--energy-window",
71
+ type=float,
72
+ default=None,
73
+ metavar="KCAL",
74
+ help="Keep only conformers within N kcal/mol of lowest energy",
75
+ )
76
+ gen_parser.add_argument(
77
+ "--add-hydrogens",
78
+ action="store_true",
79
+ default=True,
80
+ help="Add hydrogens before embedding (default: True)",
81
+ )
82
+ gen_parser.add_argument(
83
+ "--no-hydrogens",
84
+ action="store_true",
85
+ help="Don't add hydrogens",
86
+ )
87
+ gen_parser.add_argument(
88
+ "--use-basic-knowledge",
89
+ action="store_true",
90
+ help="Use basic knowledge about conformer preferences",
91
+ )
92
+ gen_parser.add_argument(
93
+ "--max-attempts",
94
+ type=int,
95
+ default=0,
96
+ metavar="N",
97
+ help="Maximum embedding attempts per conformer (0 = auto)",
98
+ )
99
+ gen_parser.set_defaults(func=run_generate)
100
+
101
+ # conformers optimize
102
+ opt_parser = conf_subparsers.add_parser(
103
+ "optimize",
104
+ help="Optimize existing 3D structures",
105
+ formatter_class=RdkitHelpFormatter,
106
+ )
107
+ add_common_io_options(opt_parser)
108
+ add_common_processing_options(opt_parser)
109
+ opt_parser.add_argument(
110
+ "-f", "--force-field",
111
+ choices=["mmff", "uff"],
112
+ default="mmff",
113
+ help="Force field for optimization (default: mmff)",
114
+ )
115
+ opt_parser.add_argument(
116
+ "--max-iter",
117
+ type=int,
118
+ default=200,
119
+ help="Maximum optimization iterations (default: 200)",
120
+ )
121
+ opt_parser.set_defaults(func=run_optimize)
122
+
123
+ # Set default for main parser
124
+ parser.set_defaults(func=lambda args: parser.print_help() or 1)
125
+
126
+
127
+ def run_generate(args) -> int:
128
+ """Run conformer generation."""
129
+ # Lazy imports
130
+ from rdkit_cli.core.conformers import ConformerGenerator
131
+ from rdkit_cli.io import create_reader, create_writer, FileFormat
132
+ from rdkit_cli.parallel.batch import process_molecules
133
+
134
+ generator = ConformerGenerator(
135
+ num_conformers=args.num,
136
+ method=args.method,
137
+ optimize=not args.no_optimize,
138
+ force_field=args.force_field,
139
+ random_seed=args.seed,
140
+ )
141
+
142
+ input_path = Path(args.input)
143
+ if not input_path.exists():
144
+ print(f"Error: Input file not found: {input_path}", file=sys.stderr)
145
+ return 1
146
+
147
+ reader = create_reader(
148
+ input_path,
149
+ smiles_column=args.smiles_column,
150
+ name_column=args.name_column,
151
+ has_header=not args.no_header,
152
+ )
153
+
154
+ # Force SDF output for 3D structures
155
+ output_path = Path(args.output)
156
+ writer = create_writer(output_path, format_override=FileFormat.SDF)
157
+
158
+ with reader, writer:
159
+ result = process_molecules(
160
+ reader=reader,
161
+ writer=writer,
162
+ processor=generator.generate,
163
+ n_workers=args.ncpu,
164
+ quiet=args.quiet,
165
+ )
166
+
167
+ if not args.quiet:
168
+ print(
169
+ f"Generated conformers for {result.successful}/{result.total_processed} molecules "
170
+ f"({result.failed} failed) in {result.elapsed_time:.1f}s",
171
+ file=sys.stderr,
172
+ )
173
+
174
+ return 0 if result.failed == 0 else 1
175
+
176
+
177
+ def run_optimize(args) -> int:
178
+ """Run conformer optimization."""
179
+ # Lazy imports
180
+ from rdkit_cli.core.conformers import ConformerOptimizer
181
+ from rdkit_cli.io import create_reader, create_writer, FileFormat
182
+ from rdkit_cli.parallel.batch import process_molecules
183
+
184
+ optimizer = ConformerOptimizer(
185
+ force_field=args.force_field,
186
+ max_iterations=args.max_iter,
187
+ )
188
+
189
+ input_path = Path(args.input)
190
+ if not input_path.exists():
191
+ print(f"Error: Input file not found: {input_path}", file=sys.stderr)
192
+ return 1
193
+
194
+ reader = create_reader(
195
+ input_path,
196
+ smiles_column=args.smiles_column,
197
+ name_column=args.name_column,
198
+ has_header=not args.no_header,
199
+ )
200
+
201
+ output_path = Path(args.output)
202
+ writer = create_writer(output_path, format_override=FileFormat.SDF)
203
+
204
+ with reader, writer:
205
+ result = process_molecules(
206
+ reader=reader,
207
+ writer=writer,
208
+ processor=optimizer.optimize,
209
+ n_workers=args.ncpu,
210
+ quiet=args.quiet,
211
+ )
212
+
213
+ if not args.quiet:
214
+ print(
215
+ f"Optimized {result.successful}/{result.total_processed} molecules "
216
+ f"({result.failed} failed) in {result.elapsed_time:.1f}s",
217
+ file=sys.stderr,
218
+ )
219
+
220
+ return 0 if result.failed == 0 else 1
@@ -0,0 +1,162 @@
1
+ """Convert command implementation."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ from rdkit_cli.cli import RdkitHelpFormatter, add_common_io_options, add_common_processing_options
7
+
8
+ # Define formats here to avoid loading io module at startup
9
+ FILE_FORMATS = ["csv", "tsv", "smi", "sdf", "parquet"]
10
+
11
+
12
+ def register_parser(subparsers):
13
+ """Register the convert command."""
14
+ parser = subparsers.add_parser(
15
+ "convert",
16
+ help="Convert between molecular file formats",
17
+ description="Convert molecules between different file formats and representations.",
18
+ formatter_class=RdkitHelpFormatter,
19
+ )
20
+
21
+ add_common_io_options(parser)
22
+ add_common_processing_options(parser)
23
+
24
+ parser.add_argument(
25
+ "--in-format",
26
+ choices=FILE_FORMATS,
27
+ help="Input format (auto-detected from extension if not specified)",
28
+ )
29
+ parser.add_argument(
30
+ "--out-format",
31
+ choices=FILE_FORMATS,
32
+ help="Output format (auto-detected from extension if not specified)",
33
+ )
34
+ parser.add_argument(
35
+ "--canonical",
36
+ action="store_true",
37
+ default=True,
38
+ help="Canonicalize SMILES output (default: True)",
39
+ )
40
+ parser.add_argument(
41
+ "--no-canonical",
42
+ action="store_false",
43
+ dest="canonical",
44
+ help="Don't canonicalize SMILES output",
45
+ )
46
+ parser.add_argument(
47
+ "--add-inchi",
48
+ action="store_true",
49
+ help="Add InChI column to output",
50
+ )
51
+ parser.add_argument(
52
+ "--add-inchikey",
53
+ action="store_true",
54
+ help="Add InChIKey column to output",
55
+ )
56
+
57
+ parser.set_defaults(func=run_convert)
58
+
59
+
60
+ def run_convert(args) -> int:
61
+ """Run the convert command."""
62
+ # Lazy imports
63
+ from typing import Optional, Any
64
+ from rdkit import Chem
65
+ from rdkit.Chem.inchi import MolToInchi, MolToInchiKey
66
+ from rdkit_cli.io import create_reader, create_writer, FileFormat, detect_format
67
+ from rdkit_cli.io.readers import MoleculeRecord
68
+ from rdkit_cli.parallel.batch import process_molecules
69
+
70
+ class FormatConverter:
71
+ """Convert molecules between formats."""
72
+
73
+ def __init__(
74
+ self,
75
+ canonical: bool = True,
76
+ add_inchi: bool = False,
77
+ add_inchikey: bool = False,
78
+ ):
79
+ self.canonical = canonical
80
+ self.add_inchi = add_inchi
81
+ self.add_inchikey = add_inchikey
82
+
83
+ def convert(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
84
+ """Convert a molecule record."""
85
+ if record.mol is None:
86
+ return None
87
+
88
+ result: dict[str, Any] = {}
89
+
90
+ # Generate canonical SMILES
91
+ result["smiles"] = Chem.MolToSmiles(record.mol, canonical=self.canonical)
92
+
93
+ if record.name:
94
+ result["name"] = record.name
95
+
96
+ # Add InChI if requested
97
+ if self.add_inchi:
98
+ try:
99
+ result["inchi"] = MolToInchi(record.mol)
100
+ except Exception:
101
+ result["inchi"] = ""
102
+
103
+ # Add InChIKey if requested
104
+ if self.add_inchikey:
105
+ try:
106
+ result["inchikey"] = MolToInchiKey(record.mol)
107
+ except Exception:
108
+ result["inchikey"] = ""
109
+
110
+ # Copy other metadata
111
+ for key, value in record.metadata.items():
112
+ if key not in result and key != "smiles":
113
+ result[key] = value
114
+
115
+ return result
116
+
117
+ input_path = Path(args.input)
118
+ if not input_path.exists():
119
+ print(f"Error: Input file not found: {input_path}", file=sys.stderr)
120
+ return 1
121
+
122
+ # Detect formats
123
+ in_format = FileFormat(args.in_format) if args.in_format else detect_format(input_path)
124
+ output_path = Path(args.output)
125
+ out_format = FileFormat(args.out_format) if args.out_format else detect_format(output_path)
126
+
127
+ # Create converter
128
+ converter = FormatConverter(
129
+ canonical=args.canonical,
130
+ add_inchi=args.add_inchi,
131
+ add_inchikey=args.add_inchikey,
132
+ )
133
+
134
+ # Create reader
135
+ reader = create_reader(
136
+ input_path,
137
+ smiles_column=args.smiles_column,
138
+ name_column=args.name_column,
139
+ has_header=not args.no_header,
140
+ )
141
+
142
+ # Create writer
143
+ writer = create_writer(output_path)
144
+
145
+ # Process
146
+ with reader, writer:
147
+ result = process_molecules(
148
+ reader=reader,
149
+ writer=writer,
150
+ processor=converter.convert,
151
+ n_workers=args.ncpu,
152
+ quiet=args.quiet,
153
+ )
154
+
155
+ if not args.quiet:
156
+ print(
157
+ f"Converted {result.successful}/{result.total_processed} molecules "
158
+ f"({result.failed} failed) in {result.elapsed_time:.1f}s",
159
+ file=sys.stderr,
160
+ )
161
+
162
+ return 0 if result.failed == 0 else 1