rdkit-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdkit_cli/__init__.py +4 -0
- rdkit_cli/__main__.py +6 -0
- rdkit_cli/cli.py +162 -0
- rdkit_cli/commands/__init__.py +1 -0
- rdkit_cli/commands/conformers.py +220 -0
- rdkit_cli/commands/convert.py +162 -0
- rdkit_cli/commands/depict.py +311 -0
- rdkit_cli/commands/descriptors.py +251 -0
- rdkit_cli/commands/diversity.py +232 -0
- rdkit_cli/commands/enumerate.py +229 -0
- rdkit_cli/commands/filter.py +384 -0
- rdkit_cli/commands/fingerprints.py +179 -0
- rdkit_cli/commands/fragment.py +284 -0
- rdkit_cli/commands/mcs.py +162 -0
- rdkit_cli/commands/reactions.py +191 -0
- rdkit_cli/commands/scaffold.py +243 -0
- rdkit_cli/commands/similarity.py +359 -0
- rdkit_cli/commands/standardize.py +138 -0
- rdkit_cli/core/__init__.py +1 -0
- rdkit_cli/core/conformers.py +197 -0
- rdkit_cli/core/depict.py +241 -0
- rdkit_cli/core/descriptors.py +248 -0
- rdkit_cli/core/diversity.py +174 -0
- rdkit_cli/core/enumerate.py +190 -0
- rdkit_cli/core/filters.py +443 -0
- rdkit_cli/core/fingerprints.py +265 -0
- rdkit_cli/core/fragment.py +237 -0
- rdkit_cli/core/mcs.py +128 -0
- rdkit_cli/core/reactions.py +159 -0
- rdkit_cli/core/scaffold.py +174 -0
- rdkit_cli/core/similarity.py +206 -0
- rdkit_cli/core/standardizer.py +141 -0
- rdkit_cli/io/__init__.py +7 -0
- rdkit_cli/io/formats.py +109 -0
- rdkit_cli/io/readers.py +352 -0
- rdkit_cli/io/writers.py +275 -0
- rdkit_cli/parallel/__init__.py +5 -0
- rdkit_cli/parallel/batch.py +181 -0
- rdkit_cli/parallel/executor.py +180 -0
- rdkit_cli/progress/__init__.py +5 -0
- rdkit_cli/progress/ninja.py +195 -0
- rdkit_cli/utils/__init__.py +1 -0
- rdkit_cli-0.1.0.dist-info/METADATA +380 -0
- rdkit_cli-0.1.0.dist-info/RECORD +47 -0
- rdkit_cli-0.1.0.dist-info/WHEEL +4 -0
- rdkit_cli-0.1.0.dist-info/entry_points.txt +2 -0
- rdkit_cli-0.1.0.dist-info/licenses/LICENSE +190 -0
rdkit_cli/__init__.py
ADDED
rdkit_cli/__main__.py
ADDED
rdkit_cli/cli.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Main CLI entry point for rdkit-cli."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from rich_argparse import RichHelpFormatter
|
|
8
|
+
|
|
9
|
+
from rdkit_cli import __version__
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class RdkitHelpFormatter(RichHelpFormatter):
|
|
13
|
+
"""Custom formatter with adjusted styles and command-first ordering."""
|
|
14
|
+
|
|
15
|
+
styles = {
|
|
16
|
+
**RichHelpFormatter.styles,
|
|
17
|
+
"argparse.args": "cyan",
|
|
18
|
+
"argparse.groups": "bold yellow",
|
|
19
|
+
"argparse.metavar": "green",
|
|
20
|
+
"argparse.prog": "bold magenta",
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def add_common_io_options(parser: argparse.ArgumentParser):
|
|
25
|
+
"""Add common I/O options to a parser."""
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"-i", "--input",
|
|
28
|
+
required=True,
|
|
29
|
+
metavar="FILE",
|
|
30
|
+
help="Input file (CSV, TSV, SMI, SDF, or Parquet)",
|
|
31
|
+
)
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"-o", "--output",
|
|
34
|
+
required=True,
|
|
35
|
+
metavar="FILE",
|
|
36
|
+
help="Output file",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def add_common_processing_options(parser: argparse.ArgumentParser):
|
|
41
|
+
"""Add common processing options to a parser."""
|
|
42
|
+
parser.add_argument(
|
|
43
|
+
"-n", "--ncpu",
|
|
44
|
+
type=int,
|
|
45
|
+
default=-1,
|
|
46
|
+
metavar="N",
|
|
47
|
+
help="Number of CPU cores (-1 for all, default: -1)",
|
|
48
|
+
)
|
|
49
|
+
parser.add_argument(
|
|
50
|
+
"--smiles-column",
|
|
51
|
+
default="smiles",
|
|
52
|
+
metavar="COL",
|
|
53
|
+
help="Name of SMILES column (default: smiles)",
|
|
54
|
+
)
|
|
55
|
+
parser.add_argument(
|
|
56
|
+
"--name-column",
|
|
57
|
+
default=None,
|
|
58
|
+
metavar="COL",
|
|
59
|
+
help="Name of molecule name column",
|
|
60
|
+
)
|
|
61
|
+
parser.add_argument(
|
|
62
|
+
"--no-header",
|
|
63
|
+
action="store_true",
|
|
64
|
+
help="Input file has no header row",
|
|
65
|
+
)
|
|
66
|
+
parser.add_argument(
|
|
67
|
+
"-q", "--quiet",
|
|
68
|
+
action="store_true",
|
|
69
|
+
help="Suppress progress output",
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def create_parser() -> argparse.ArgumentParser:
|
|
74
|
+
"""Create the main argument parser."""
|
|
75
|
+
parser = argparse.ArgumentParser(
|
|
76
|
+
prog="rdkit-cli",
|
|
77
|
+
description="A comprehensive CLI tool for RDKit cheminformatics operations.",
|
|
78
|
+
epilog="Use 'rdkit-cli <command> --help' for command-specific help.",
|
|
79
|
+
formatter_class=RdkitHelpFormatter,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Version
|
|
83
|
+
parser.add_argument(
|
|
84
|
+
"-V", "--version",
|
|
85
|
+
action="version",
|
|
86
|
+
version=f"rdkit-cli {__version__}",
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Create subparsers for commands
|
|
90
|
+
subparsers = parser.add_subparsers(
|
|
91
|
+
title="Commands",
|
|
92
|
+
dest="command",
|
|
93
|
+
metavar="<command>",
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Register all command modules
|
|
97
|
+
_register_commands(subparsers)
|
|
98
|
+
|
|
99
|
+
return parser
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _register_commands(subparsers):
|
|
103
|
+
"""Register all command subparsers."""
|
|
104
|
+
from rdkit_cli.commands import (
|
|
105
|
+
descriptors,
|
|
106
|
+
fingerprints,
|
|
107
|
+
filter,
|
|
108
|
+
convert,
|
|
109
|
+
standardize,
|
|
110
|
+
similarity,
|
|
111
|
+
conformers,
|
|
112
|
+
reactions,
|
|
113
|
+
scaffold,
|
|
114
|
+
enumerate,
|
|
115
|
+
fragment,
|
|
116
|
+
diversity,
|
|
117
|
+
mcs,
|
|
118
|
+
depict,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# Each module has a register_parser(subparsers) function
|
|
122
|
+
descriptors.register_parser(subparsers)
|
|
123
|
+
fingerprints.register_parser(subparsers)
|
|
124
|
+
filter.register_parser(subparsers)
|
|
125
|
+
convert.register_parser(subparsers)
|
|
126
|
+
standardize.register_parser(subparsers)
|
|
127
|
+
similarity.register_parser(subparsers)
|
|
128
|
+
conformers.register_parser(subparsers)
|
|
129
|
+
reactions.register_parser(subparsers)
|
|
130
|
+
scaffold.register_parser(subparsers)
|
|
131
|
+
enumerate.register_parser(subparsers)
|
|
132
|
+
fragment.register_parser(subparsers)
|
|
133
|
+
diversity.register_parser(subparsers)
|
|
134
|
+
mcs.register_parser(subparsers)
|
|
135
|
+
depict.register_parser(subparsers)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def main(args: Optional[list[str]] = None) -> int:
|
|
139
|
+
"""Main entry point."""
|
|
140
|
+
parser = create_parser()
|
|
141
|
+
parsed_args = parser.parse_args(args)
|
|
142
|
+
|
|
143
|
+
if parsed_args.command is None:
|
|
144
|
+
parser.print_help()
|
|
145
|
+
return 1
|
|
146
|
+
|
|
147
|
+
# Each command has a run(args) function via set_defaults(func=...)
|
|
148
|
+
try:
|
|
149
|
+
return parsed_args.func(parsed_args)
|
|
150
|
+
except KeyboardInterrupt:
|
|
151
|
+
sys.stderr.write("\nInterrupted.\n")
|
|
152
|
+
return 130
|
|
153
|
+
except BrokenPipeError:
|
|
154
|
+
# Handle broken pipe gracefully (e.g., piping to head)
|
|
155
|
+
return 0
|
|
156
|
+
except Exception as e:
|
|
157
|
+
sys.stderr.write(f"Error: {e}\n")
|
|
158
|
+
return 1
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
if __name__ == "__main__":
|
|
162
|
+
sys.exit(main())
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CLI command implementations."""
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""Conformers command implementation."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from rdkit_cli.cli import RdkitHelpFormatter, add_common_io_options, add_common_processing_options
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def register_parser(subparsers):
|
|
10
|
+
"""Register the conformers command and subcommands."""
|
|
11
|
+
parser = subparsers.add_parser(
|
|
12
|
+
"conformers",
|
|
13
|
+
help="Generate and optimize 3D conformers",
|
|
14
|
+
description="Generate and optimize 3D molecular conformers.",
|
|
15
|
+
formatter_class=RdkitHelpFormatter,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
conf_subparsers = parser.add_subparsers(
|
|
19
|
+
title="Subcommands",
|
|
20
|
+
dest="subcommand",
|
|
21
|
+
metavar="<subcommand>",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# conformers generate
|
|
25
|
+
gen_parser = conf_subparsers.add_parser(
|
|
26
|
+
"generate",
|
|
27
|
+
help="Generate 3D conformers",
|
|
28
|
+
formatter_class=RdkitHelpFormatter,
|
|
29
|
+
)
|
|
30
|
+
add_common_io_options(gen_parser)
|
|
31
|
+
add_common_processing_options(gen_parser)
|
|
32
|
+
gen_parser.add_argument(
|
|
33
|
+
"--num",
|
|
34
|
+
type=int,
|
|
35
|
+
default=10,
|
|
36
|
+
metavar="N",
|
|
37
|
+
help="Number of conformers to generate (default: 10)",
|
|
38
|
+
)
|
|
39
|
+
gen_parser.add_argument(
|
|
40
|
+
"-m", "--method",
|
|
41
|
+
choices=["etkdgv3", "etkdgv2", "etdg"],
|
|
42
|
+
default="etkdgv3",
|
|
43
|
+
help="Embedding method (default: etkdgv3)",
|
|
44
|
+
)
|
|
45
|
+
gen_parser.add_argument(
|
|
46
|
+
"--no-optimize",
|
|
47
|
+
action="store_true",
|
|
48
|
+
help="Skip force field optimization",
|
|
49
|
+
)
|
|
50
|
+
gen_parser.add_argument(
|
|
51
|
+
"-f", "--force-field",
|
|
52
|
+
choices=["mmff", "uff"],
|
|
53
|
+
default="mmff",
|
|
54
|
+
help="Force field for optimization (default: mmff)",
|
|
55
|
+
)
|
|
56
|
+
gen_parser.add_argument(
|
|
57
|
+
"--seed",
|
|
58
|
+
type=int,
|
|
59
|
+
default=42,
|
|
60
|
+
help="Random seed (default: 42)",
|
|
61
|
+
)
|
|
62
|
+
gen_parser.add_argument(
|
|
63
|
+
"--prune-rms",
|
|
64
|
+
type=float,
|
|
65
|
+
default=0.5,
|
|
66
|
+
metavar="THRESH",
|
|
67
|
+
help="RMSD threshold for pruning similar conformers (default: 0.5)",
|
|
68
|
+
)
|
|
69
|
+
gen_parser.add_argument(
|
|
70
|
+
"--energy-window",
|
|
71
|
+
type=float,
|
|
72
|
+
default=None,
|
|
73
|
+
metavar="KCAL",
|
|
74
|
+
help="Keep only conformers within N kcal/mol of lowest energy",
|
|
75
|
+
)
|
|
76
|
+
gen_parser.add_argument(
|
|
77
|
+
"--add-hydrogens",
|
|
78
|
+
action="store_true",
|
|
79
|
+
default=True,
|
|
80
|
+
help="Add hydrogens before embedding (default: True)",
|
|
81
|
+
)
|
|
82
|
+
gen_parser.add_argument(
|
|
83
|
+
"--no-hydrogens",
|
|
84
|
+
action="store_true",
|
|
85
|
+
help="Don't add hydrogens",
|
|
86
|
+
)
|
|
87
|
+
gen_parser.add_argument(
|
|
88
|
+
"--use-basic-knowledge",
|
|
89
|
+
action="store_true",
|
|
90
|
+
help="Use basic knowledge about conformer preferences",
|
|
91
|
+
)
|
|
92
|
+
gen_parser.add_argument(
|
|
93
|
+
"--max-attempts",
|
|
94
|
+
type=int,
|
|
95
|
+
default=0,
|
|
96
|
+
metavar="N",
|
|
97
|
+
help="Maximum embedding attempts per conformer (0 = auto)",
|
|
98
|
+
)
|
|
99
|
+
gen_parser.set_defaults(func=run_generate)
|
|
100
|
+
|
|
101
|
+
# conformers optimize
|
|
102
|
+
opt_parser = conf_subparsers.add_parser(
|
|
103
|
+
"optimize",
|
|
104
|
+
help="Optimize existing 3D structures",
|
|
105
|
+
formatter_class=RdkitHelpFormatter,
|
|
106
|
+
)
|
|
107
|
+
add_common_io_options(opt_parser)
|
|
108
|
+
add_common_processing_options(opt_parser)
|
|
109
|
+
opt_parser.add_argument(
|
|
110
|
+
"-f", "--force-field",
|
|
111
|
+
choices=["mmff", "uff"],
|
|
112
|
+
default="mmff",
|
|
113
|
+
help="Force field for optimization (default: mmff)",
|
|
114
|
+
)
|
|
115
|
+
opt_parser.add_argument(
|
|
116
|
+
"--max-iter",
|
|
117
|
+
type=int,
|
|
118
|
+
default=200,
|
|
119
|
+
help="Maximum optimization iterations (default: 200)",
|
|
120
|
+
)
|
|
121
|
+
opt_parser.set_defaults(func=run_optimize)
|
|
122
|
+
|
|
123
|
+
# Set default for main parser
|
|
124
|
+
parser.set_defaults(func=lambda args: parser.print_help() or 1)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def run_generate(args) -> int:
|
|
128
|
+
"""Run conformer generation."""
|
|
129
|
+
# Lazy imports
|
|
130
|
+
from rdkit_cli.core.conformers import ConformerGenerator
|
|
131
|
+
from rdkit_cli.io import create_reader, create_writer, FileFormat
|
|
132
|
+
from rdkit_cli.parallel.batch import process_molecules
|
|
133
|
+
|
|
134
|
+
generator = ConformerGenerator(
|
|
135
|
+
num_conformers=args.num,
|
|
136
|
+
method=args.method,
|
|
137
|
+
optimize=not args.no_optimize,
|
|
138
|
+
force_field=args.force_field,
|
|
139
|
+
random_seed=args.seed,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
input_path = Path(args.input)
|
|
143
|
+
if not input_path.exists():
|
|
144
|
+
print(f"Error: Input file not found: {input_path}", file=sys.stderr)
|
|
145
|
+
return 1
|
|
146
|
+
|
|
147
|
+
reader = create_reader(
|
|
148
|
+
input_path,
|
|
149
|
+
smiles_column=args.smiles_column,
|
|
150
|
+
name_column=args.name_column,
|
|
151
|
+
has_header=not args.no_header,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Force SDF output for 3D structures
|
|
155
|
+
output_path = Path(args.output)
|
|
156
|
+
writer = create_writer(output_path, format_override=FileFormat.SDF)
|
|
157
|
+
|
|
158
|
+
with reader, writer:
|
|
159
|
+
result = process_molecules(
|
|
160
|
+
reader=reader,
|
|
161
|
+
writer=writer,
|
|
162
|
+
processor=generator.generate,
|
|
163
|
+
n_workers=args.ncpu,
|
|
164
|
+
quiet=args.quiet,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
if not args.quiet:
|
|
168
|
+
print(
|
|
169
|
+
f"Generated conformers for {result.successful}/{result.total_processed} molecules "
|
|
170
|
+
f"({result.failed} failed) in {result.elapsed_time:.1f}s",
|
|
171
|
+
file=sys.stderr,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
return 0 if result.failed == 0 else 1
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def run_optimize(args) -> int:
|
|
178
|
+
"""Run conformer optimization."""
|
|
179
|
+
# Lazy imports
|
|
180
|
+
from rdkit_cli.core.conformers import ConformerOptimizer
|
|
181
|
+
from rdkit_cli.io import create_reader, create_writer, FileFormat
|
|
182
|
+
from rdkit_cli.parallel.batch import process_molecules
|
|
183
|
+
|
|
184
|
+
optimizer = ConformerOptimizer(
|
|
185
|
+
force_field=args.force_field,
|
|
186
|
+
max_iterations=args.max_iter,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
input_path = Path(args.input)
|
|
190
|
+
if not input_path.exists():
|
|
191
|
+
print(f"Error: Input file not found: {input_path}", file=sys.stderr)
|
|
192
|
+
return 1
|
|
193
|
+
|
|
194
|
+
reader = create_reader(
|
|
195
|
+
input_path,
|
|
196
|
+
smiles_column=args.smiles_column,
|
|
197
|
+
name_column=args.name_column,
|
|
198
|
+
has_header=not args.no_header,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
output_path = Path(args.output)
|
|
202
|
+
writer = create_writer(output_path, format_override=FileFormat.SDF)
|
|
203
|
+
|
|
204
|
+
with reader, writer:
|
|
205
|
+
result = process_molecules(
|
|
206
|
+
reader=reader,
|
|
207
|
+
writer=writer,
|
|
208
|
+
processor=optimizer.optimize,
|
|
209
|
+
n_workers=args.ncpu,
|
|
210
|
+
quiet=args.quiet,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
if not args.quiet:
|
|
214
|
+
print(
|
|
215
|
+
f"Optimized {result.successful}/{result.total_processed} molecules "
|
|
216
|
+
f"({result.failed} failed) in {result.elapsed_time:.1f}s",
|
|
217
|
+
file=sys.stderr,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
return 0 if result.failed == 0 else 1
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Convert command implementation."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from rdkit_cli.cli import RdkitHelpFormatter, add_common_io_options, add_common_processing_options
|
|
7
|
+
|
|
8
|
+
# Define formats here to avoid loading io module at startup
|
|
9
|
+
FILE_FORMATS = ["csv", "tsv", "smi", "sdf", "parquet"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def register_parser(subparsers):
|
|
13
|
+
"""Register the convert command."""
|
|
14
|
+
parser = subparsers.add_parser(
|
|
15
|
+
"convert",
|
|
16
|
+
help="Convert between molecular file formats",
|
|
17
|
+
description="Convert molecules between different file formats and representations.",
|
|
18
|
+
formatter_class=RdkitHelpFormatter,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
add_common_io_options(parser)
|
|
22
|
+
add_common_processing_options(parser)
|
|
23
|
+
|
|
24
|
+
parser.add_argument(
|
|
25
|
+
"--in-format",
|
|
26
|
+
choices=FILE_FORMATS,
|
|
27
|
+
help="Input format (auto-detected from extension if not specified)",
|
|
28
|
+
)
|
|
29
|
+
parser.add_argument(
|
|
30
|
+
"--out-format",
|
|
31
|
+
choices=FILE_FORMATS,
|
|
32
|
+
help="Output format (auto-detected from extension if not specified)",
|
|
33
|
+
)
|
|
34
|
+
parser.add_argument(
|
|
35
|
+
"--canonical",
|
|
36
|
+
action="store_true",
|
|
37
|
+
default=True,
|
|
38
|
+
help="Canonicalize SMILES output (default: True)",
|
|
39
|
+
)
|
|
40
|
+
parser.add_argument(
|
|
41
|
+
"--no-canonical",
|
|
42
|
+
action="store_false",
|
|
43
|
+
dest="canonical",
|
|
44
|
+
help="Don't canonicalize SMILES output",
|
|
45
|
+
)
|
|
46
|
+
parser.add_argument(
|
|
47
|
+
"--add-inchi",
|
|
48
|
+
action="store_true",
|
|
49
|
+
help="Add InChI column to output",
|
|
50
|
+
)
|
|
51
|
+
parser.add_argument(
|
|
52
|
+
"--add-inchikey",
|
|
53
|
+
action="store_true",
|
|
54
|
+
help="Add InChIKey column to output",
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
parser.set_defaults(func=run_convert)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def run_convert(args) -> int:
|
|
61
|
+
"""Run the convert command."""
|
|
62
|
+
# Lazy imports
|
|
63
|
+
from typing import Optional, Any
|
|
64
|
+
from rdkit import Chem
|
|
65
|
+
from rdkit.Chem.inchi import MolToInchi, MolToInchiKey
|
|
66
|
+
from rdkit_cli.io import create_reader, create_writer, FileFormat, detect_format
|
|
67
|
+
from rdkit_cli.io.readers import MoleculeRecord
|
|
68
|
+
from rdkit_cli.parallel.batch import process_molecules
|
|
69
|
+
|
|
70
|
+
class FormatConverter:
|
|
71
|
+
"""Convert molecules between formats."""
|
|
72
|
+
|
|
73
|
+
def __init__(
|
|
74
|
+
self,
|
|
75
|
+
canonical: bool = True,
|
|
76
|
+
add_inchi: bool = False,
|
|
77
|
+
add_inchikey: bool = False,
|
|
78
|
+
):
|
|
79
|
+
self.canonical = canonical
|
|
80
|
+
self.add_inchi = add_inchi
|
|
81
|
+
self.add_inchikey = add_inchikey
|
|
82
|
+
|
|
83
|
+
def convert(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
|
|
84
|
+
"""Convert a molecule record."""
|
|
85
|
+
if record.mol is None:
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
result: dict[str, Any] = {}
|
|
89
|
+
|
|
90
|
+
# Generate canonical SMILES
|
|
91
|
+
result["smiles"] = Chem.MolToSmiles(record.mol, canonical=self.canonical)
|
|
92
|
+
|
|
93
|
+
if record.name:
|
|
94
|
+
result["name"] = record.name
|
|
95
|
+
|
|
96
|
+
# Add InChI if requested
|
|
97
|
+
if self.add_inchi:
|
|
98
|
+
try:
|
|
99
|
+
result["inchi"] = MolToInchi(record.mol)
|
|
100
|
+
except Exception:
|
|
101
|
+
result["inchi"] = ""
|
|
102
|
+
|
|
103
|
+
# Add InChIKey if requested
|
|
104
|
+
if self.add_inchikey:
|
|
105
|
+
try:
|
|
106
|
+
result["inchikey"] = MolToInchiKey(record.mol)
|
|
107
|
+
except Exception:
|
|
108
|
+
result["inchikey"] = ""
|
|
109
|
+
|
|
110
|
+
# Copy other metadata
|
|
111
|
+
for key, value in record.metadata.items():
|
|
112
|
+
if key not in result and key != "smiles":
|
|
113
|
+
result[key] = value
|
|
114
|
+
|
|
115
|
+
return result
|
|
116
|
+
|
|
117
|
+
input_path = Path(args.input)
|
|
118
|
+
if not input_path.exists():
|
|
119
|
+
print(f"Error: Input file not found: {input_path}", file=sys.stderr)
|
|
120
|
+
return 1
|
|
121
|
+
|
|
122
|
+
# Detect formats
|
|
123
|
+
in_format = FileFormat(args.in_format) if args.in_format else detect_format(input_path)
|
|
124
|
+
output_path = Path(args.output)
|
|
125
|
+
out_format = FileFormat(args.out_format) if args.out_format else detect_format(output_path)
|
|
126
|
+
|
|
127
|
+
# Create converter
|
|
128
|
+
converter = FormatConverter(
|
|
129
|
+
canonical=args.canonical,
|
|
130
|
+
add_inchi=args.add_inchi,
|
|
131
|
+
add_inchikey=args.add_inchikey,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# Create reader
|
|
135
|
+
reader = create_reader(
|
|
136
|
+
input_path,
|
|
137
|
+
smiles_column=args.smiles_column,
|
|
138
|
+
name_column=args.name_column,
|
|
139
|
+
has_header=not args.no_header,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Create writer
|
|
143
|
+
writer = create_writer(output_path)
|
|
144
|
+
|
|
145
|
+
# Process
|
|
146
|
+
with reader, writer:
|
|
147
|
+
result = process_molecules(
|
|
148
|
+
reader=reader,
|
|
149
|
+
writer=writer,
|
|
150
|
+
processor=converter.convert,
|
|
151
|
+
n_workers=args.ncpu,
|
|
152
|
+
quiet=args.quiet,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
if not args.quiet:
|
|
156
|
+
print(
|
|
157
|
+
f"Converted {result.successful}/{result.total_processed} molecules "
|
|
158
|
+
f"({result.failed} failed) in {result.elapsed_time:.1f}s",
|
|
159
|
+
file=sys.stderr,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
return 0 if result.failed == 0 else 1
|