rdkit-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdkit_cli/__init__.py +4 -0
- rdkit_cli/__main__.py +6 -0
- rdkit_cli/cli.py +162 -0
- rdkit_cli/commands/__init__.py +1 -0
- rdkit_cli/commands/conformers.py +220 -0
- rdkit_cli/commands/convert.py +162 -0
- rdkit_cli/commands/depict.py +311 -0
- rdkit_cli/commands/descriptors.py +251 -0
- rdkit_cli/commands/diversity.py +232 -0
- rdkit_cli/commands/enumerate.py +229 -0
- rdkit_cli/commands/filter.py +384 -0
- rdkit_cli/commands/fingerprints.py +179 -0
- rdkit_cli/commands/fragment.py +284 -0
- rdkit_cli/commands/mcs.py +162 -0
- rdkit_cli/commands/reactions.py +191 -0
- rdkit_cli/commands/scaffold.py +243 -0
- rdkit_cli/commands/similarity.py +359 -0
- rdkit_cli/commands/standardize.py +138 -0
- rdkit_cli/core/__init__.py +1 -0
- rdkit_cli/core/conformers.py +197 -0
- rdkit_cli/core/depict.py +241 -0
- rdkit_cli/core/descriptors.py +248 -0
- rdkit_cli/core/diversity.py +174 -0
- rdkit_cli/core/enumerate.py +190 -0
- rdkit_cli/core/filters.py +443 -0
- rdkit_cli/core/fingerprints.py +265 -0
- rdkit_cli/core/fragment.py +237 -0
- rdkit_cli/core/mcs.py +128 -0
- rdkit_cli/core/reactions.py +159 -0
- rdkit_cli/core/scaffold.py +174 -0
- rdkit_cli/core/similarity.py +206 -0
- rdkit_cli/core/standardizer.py +141 -0
- rdkit_cli/io/__init__.py +7 -0
- rdkit_cli/io/formats.py +109 -0
- rdkit_cli/io/readers.py +352 -0
- rdkit_cli/io/writers.py +275 -0
- rdkit_cli/parallel/__init__.py +5 -0
- rdkit_cli/parallel/batch.py +181 -0
- rdkit_cli/parallel/executor.py +180 -0
- rdkit_cli/progress/__init__.py +5 -0
- rdkit_cli/progress/ninja.py +195 -0
- rdkit_cli/utils/__init__.py +1 -0
- rdkit_cli-0.1.0.dist-info/METADATA +380 -0
- rdkit_cli-0.1.0.dist-info/RECORD +47 -0
- rdkit_cli-0.1.0.dist-info/WHEEL +4 -0
- rdkit_cli-0.1.0.dist-info/entry_points.txt +2 -0
- rdkit_cli-0.1.0.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
"""Depict command implementation."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from rdkit_cli.cli import RdkitHelpFormatter, add_common_processing_options
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def register_parser(subparsers):
|
|
10
|
+
"""Register the depict command and subcommands."""
|
|
11
|
+
parser = subparsers.add_parser(
|
|
12
|
+
"depict",
|
|
13
|
+
help="Generate molecular depictions",
|
|
14
|
+
description="Generate 2D images of molecules (SVG or PNG).",
|
|
15
|
+
formatter_class=RdkitHelpFormatter,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
depict_subparsers = parser.add_subparsers(
|
|
19
|
+
title="Subcommands",
|
|
20
|
+
dest="subcommand",
|
|
21
|
+
metavar="<subcommand>",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# depict single
|
|
25
|
+
single_parser = depict_subparsers.add_parser(
|
|
26
|
+
"single",
|
|
27
|
+
help="Depict a single SMILES",
|
|
28
|
+
formatter_class=RdkitHelpFormatter,
|
|
29
|
+
)
|
|
30
|
+
single_parser.add_argument(
|
|
31
|
+
"-s", "--smiles",
|
|
32
|
+
required=True,
|
|
33
|
+
metavar="SMILES",
|
|
34
|
+
help="SMILES string to depict",
|
|
35
|
+
)
|
|
36
|
+
single_parser.add_argument(
|
|
37
|
+
"-o", "--output",
|
|
38
|
+
required=True,
|
|
39
|
+
metavar="FILE",
|
|
40
|
+
help="Output file (SVG or PNG)",
|
|
41
|
+
)
|
|
42
|
+
single_parser.add_argument(
|
|
43
|
+
"-W", "--width",
|
|
44
|
+
type=int,
|
|
45
|
+
default=400,
|
|
46
|
+
help="Image width (default: 400)",
|
|
47
|
+
)
|
|
48
|
+
single_parser.add_argument(
|
|
49
|
+
"-H", "--height",
|
|
50
|
+
type=int,
|
|
51
|
+
default=400,
|
|
52
|
+
help="Image height (default: 400)",
|
|
53
|
+
)
|
|
54
|
+
single_parser.add_argument(
|
|
55
|
+
"--atom-indices",
|
|
56
|
+
action="store_true",
|
|
57
|
+
help="Show atom indices",
|
|
58
|
+
)
|
|
59
|
+
single_parser.add_argument(
|
|
60
|
+
"--stereo-annotations",
|
|
61
|
+
action="store_true",
|
|
62
|
+
help="Show stereo annotations",
|
|
63
|
+
)
|
|
64
|
+
single_parser.add_argument(
|
|
65
|
+
"-f", "--format",
|
|
66
|
+
choices=["svg", "png"],
|
|
67
|
+
help="Output format (default: from file extension)",
|
|
68
|
+
)
|
|
69
|
+
single_parser.set_defaults(func=run_single)
|
|
70
|
+
|
|
71
|
+
# depict batch
|
|
72
|
+
batch_parser = depict_subparsers.add_parser(
|
|
73
|
+
"batch",
|
|
74
|
+
help="Depict molecules from file to individual images",
|
|
75
|
+
formatter_class=RdkitHelpFormatter,
|
|
76
|
+
)
|
|
77
|
+
batch_parser.add_argument(
|
|
78
|
+
"-i", "--input",
|
|
79
|
+
required=True,
|
|
80
|
+
metavar="FILE",
|
|
81
|
+
help="Input file with molecules",
|
|
82
|
+
)
|
|
83
|
+
batch_parser.add_argument(
|
|
84
|
+
"-o", "--output-dir",
|
|
85
|
+
required=True,
|
|
86
|
+
metavar="DIR",
|
|
87
|
+
help="Output directory for images",
|
|
88
|
+
)
|
|
89
|
+
add_common_processing_options(batch_parser)
|
|
90
|
+
batch_parser.add_argument(
|
|
91
|
+
"-f", "--format",
|
|
92
|
+
choices=["svg", "png"],
|
|
93
|
+
default="svg",
|
|
94
|
+
help="Output format (default: svg)",
|
|
95
|
+
)
|
|
96
|
+
batch_parser.add_argument(
|
|
97
|
+
"-W", "--width",
|
|
98
|
+
type=int,
|
|
99
|
+
default=300,
|
|
100
|
+
help="Image width (default: 300)",
|
|
101
|
+
)
|
|
102
|
+
batch_parser.add_argument(
|
|
103
|
+
"-H", "--height",
|
|
104
|
+
type=int,
|
|
105
|
+
default=300,
|
|
106
|
+
help="Image height (default: 300)",
|
|
107
|
+
)
|
|
108
|
+
batch_parser.set_defaults(func=run_batch)
|
|
109
|
+
|
|
110
|
+
# depict grid
|
|
111
|
+
grid_parser = depict_subparsers.add_parser(
|
|
112
|
+
"grid",
|
|
113
|
+
help="Depict molecules as a grid image",
|
|
114
|
+
formatter_class=RdkitHelpFormatter,
|
|
115
|
+
)
|
|
116
|
+
grid_parser.add_argument(
|
|
117
|
+
"-i", "--input",
|
|
118
|
+
required=True,
|
|
119
|
+
metavar="FILE",
|
|
120
|
+
help="Input file with molecules",
|
|
121
|
+
)
|
|
122
|
+
grid_parser.add_argument(
|
|
123
|
+
"-o", "--output",
|
|
124
|
+
required=True,
|
|
125
|
+
metavar="FILE",
|
|
126
|
+
help="Output file (SVG or PNG)",
|
|
127
|
+
)
|
|
128
|
+
add_common_processing_options(grid_parser)
|
|
129
|
+
grid_parser.add_argument(
|
|
130
|
+
"--mols-per-row",
|
|
131
|
+
type=int,
|
|
132
|
+
default=4,
|
|
133
|
+
help="Molecules per row (default: 4)",
|
|
134
|
+
)
|
|
135
|
+
grid_parser.add_argument(
|
|
136
|
+
"-W", "--mol-width",
|
|
137
|
+
type=int,
|
|
138
|
+
default=200,
|
|
139
|
+
help="Width per molecule (default: 200)",
|
|
140
|
+
)
|
|
141
|
+
grid_parser.add_argument(
|
|
142
|
+
"-H", "--mol-height",
|
|
143
|
+
type=int,
|
|
144
|
+
default=200,
|
|
145
|
+
help="Height per molecule (default: 200)",
|
|
146
|
+
)
|
|
147
|
+
grid_parser.add_argument(
|
|
148
|
+
"--max-mols",
|
|
149
|
+
type=int,
|
|
150
|
+
default=100,
|
|
151
|
+
help="Maximum molecules to include (default: 100)",
|
|
152
|
+
)
|
|
153
|
+
grid_parser.set_defaults(func=run_grid)
|
|
154
|
+
|
|
155
|
+
# Set default for main parser
|
|
156
|
+
parser.set_defaults(func=lambda args: parser.print_help() or 1)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def run_single(args) -> int:
|
|
160
|
+
"""Run single molecule depiction."""
|
|
161
|
+
from rdkit_cli.core.depict import depict_smiles
|
|
162
|
+
|
|
163
|
+
output_path = Path(args.output)
|
|
164
|
+
|
|
165
|
+
# Use explicit format if provided, otherwise infer from extension
|
|
166
|
+
if args.format:
|
|
167
|
+
image_format = args.format
|
|
168
|
+
else:
|
|
169
|
+
image_format = output_path.suffix.lower().lstrip(".")
|
|
170
|
+
|
|
171
|
+
if image_format not in ("svg", "png"):
|
|
172
|
+
print(f"Error: Unsupported format '{image_format}'. Use .svg or .png", file=sys.stderr)
|
|
173
|
+
return 1
|
|
174
|
+
|
|
175
|
+
image_data = depict_smiles(
|
|
176
|
+
args.smiles,
|
|
177
|
+
width=args.width,
|
|
178
|
+
height=args.height,
|
|
179
|
+
image_format=image_format,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
if image_data is None:
|
|
183
|
+
print(f"Error: Failed to depict SMILES: {args.smiles}", file=sys.stderr)
|
|
184
|
+
return 1
|
|
185
|
+
|
|
186
|
+
# Write output
|
|
187
|
+
mode = "w" if image_format == "svg" else "wb"
|
|
188
|
+
with open(output_path, mode) as f:
|
|
189
|
+
f.write(image_data)
|
|
190
|
+
|
|
191
|
+
print(f"Wrote depiction to {output_path}", file=sys.stderr)
|
|
192
|
+
return 0
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def run_batch(args) -> int:
|
|
196
|
+
"""Run batch depiction."""
|
|
197
|
+
from rdkit_cli.core.depict import MoleculeDepiction
|
|
198
|
+
from rdkit_cli.io import create_reader
|
|
199
|
+
|
|
200
|
+
input_path = Path(args.input)
|
|
201
|
+
if not input_path.exists():
|
|
202
|
+
print(f"Error: Input file not found: {input_path}", file=sys.stderr)
|
|
203
|
+
return 1
|
|
204
|
+
|
|
205
|
+
output_dir = Path(args.output_dir)
|
|
206
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
207
|
+
|
|
208
|
+
reader = create_reader(
|
|
209
|
+
input_path,
|
|
210
|
+
smiles_column=args.smiles_column,
|
|
211
|
+
name_column=args.name_column,
|
|
212
|
+
has_header=not args.no_header,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
depictor = MoleculeDepiction(
|
|
216
|
+
width=args.width,
|
|
217
|
+
height=args.height,
|
|
218
|
+
image_format=args.format,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
count = 0
|
|
222
|
+
failed = 0
|
|
223
|
+
|
|
224
|
+
for i, record in enumerate(reader):
|
|
225
|
+
if record.mol is None:
|
|
226
|
+
failed += 1
|
|
227
|
+
continue
|
|
228
|
+
|
|
229
|
+
image_data = depictor.depict(record.mol)
|
|
230
|
+
if image_data is None:
|
|
231
|
+
failed += 1
|
|
232
|
+
continue
|
|
233
|
+
|
|
234
|
+
# Generate filename
|
|
235
|
+
name = record.name or f"mol_{i}"
|
|
236
|
+
# Sanitize filename
|
|
237
|
+
name = "".join(c for c in name if c.isalnum() or c in "-_")
|
|
238
|
+
filename = f"{name}.{args.format}"
|
|
239
|
+
|
|
240
|
+
output_path = output_dir / filename
|
|
241
|
+
|
|
242
|
+
mode = "w" if args.format == "svg" else "wb"
|
|
243
|
+
with open(output_path, mode) as f:
|
|
244
|
+
f.write(image_data)
|
|
245
|
+
|
|
246
|
+
count += 1
|
|
247
|
+
|
|
248
|
+
if not args.quiet:
|
|
249
|
+
print(f"Generated {count} images ({failed} failed) in {output_dir}", file=sys.stderr)
|
|
250
|
+
|
|
251
|
+
return 0
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def run_grid(args) -> int:
|
|
255
|
+
"""Run grid depiction."""
|
|
256
|
+
from rdkit_cli.core.depict import GridDepiction
|
|
257
|
+
from rdkit_cli.io import create_reader
|
|
258
|
+
|
|
259
|
+
input_path = Path(args.input)
|
|
260
|
+
if not input_path.exists():
|
|
261
|
+
print(f"Error: Input file not found: {input_path}", file=sys.stderr)
|
|
262
|
+
return 1
|
|
263
|
+
|
|
264
|
+
output_path = Path(args.output)
|
|
265
|
+
image_format = output_path.suffix.lower().lstrip(".")
|
|
266
|
+
|
|
267
|
+
if image_format not in ("svg", "png"):
|
|
268
|
+
print(f"Error: Unsupported format '{image_format}'. Use .svg or .png", file=sys.stderr)
|
|
269
|
+
return 1
|
|
270
|
+
|
|
271
|
+
reader = create_reader(
|
|
272
|
+
input_path,
|
|
273
|
+
smiles_column=args.smiles_column,
|
|
274
|
+
name_column=args.name_column,
|
|
275
|
+
has_header=not args.no_header,
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
if not args.quiet:
|
|
279
|
+
print("Reading molecules...", file=sys.stderr)
|
|
280
|
+
|
|
281
|
+
# Read molecules
|
|
282
|
+
records = list(reader)[:args.max_mols]
|
|
283
|
+
mols = [r.mol for r in records]
|
|
284
|
+
legends = [r.name or "" for r in records]
|
|
285
|
+
|
|
286
|
+
if not args.quiet:
|
|
287
|
+
print(f"Generating grid for {len(mols)} molecules...", file=sys.stderr)
|
|
288
|
+
|
|
289
|
+
grid_depictor = GridDepiction(
|
|
290
|
+
mols_per_row=args.mols_per_row,
|
|
291
|
+
mol_width=args.mol_width,
|
|
292
|
+
mol_height=args.mol_height,
|
|
293
|
+
legends=legends,
|
|
294
|
+
use_svg=(image_format == "svg"),
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
image_data = grid_depictor.depict(mols)
|
|
298
|
+
|
|
299
|
+
if image_data is None:
|
|
300
|
+
print("Error: Failed to generate grid image", file=sys.stderr)
|
|
301
|
+
return 1
|
|
302
|
+
|
|
303
|
+
# Write output
|
|
304
|
+
mode = "w" if image_format == "svg" else "wb"
|
|
305
|
+
with open(output_path, mode) as f:
|
|
306
|
+
f.write(image_data)
|
|
307
|
+
|
|
308
|
+
if not args.quiet:
|
|
309
|
+
print(f"Wrote grid image to {output_path}", file=sys.stderr)
|
|
310
|
+
|
|
311
|
+
return 0
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
"""Descriptors command implementation."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from rdkit_cli.cli import RdkitHelpFormatter, add_common_io_options, add_common_processing_options
|
|
7
|
+
|
|
8
|
+
# Lazy imports - these are only imported when command runs
|
|
9
|
+
# from rdkit_cli.core.descriptors import ...
|
|
10
|
+
# from rdkit_cli.io import ...
|
|
11
|
+
# from rdkit_cli.parallel.batch import ...
|
|
12
|
+
|
|
13
|
+
# Categories defined here to avoid importing core.descriptors for help
|
|
14
|
+
DESCRIPTOR_CATEGORIES = [
|
|
15
|
+
"constitutional",
|
|
16
|
+
"topological",
|
|
17
|
+
"electronic",
|
|
18
|
+
"geometric",
|
|
19
|
+
"molecular",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def register_parser(subparsers):
|
|
24
|
+
"""Register the descriptors command and subcommands."""
|
|
25
|
+
parser = subparsers.add_parser(
|
|
26
|
+
"descriptors",
|
|
27
|
+
help="Compute molecular descriptors",
|
|
28
|
+
description="Calculate RDKit molecular descriptors.",
|
|
29
|
+
formatter_class=RdkitHelpFormatter,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
desc_subparsers = parser.add_subparsers(
|
|
33
|
+
title="Subcommands",
|
|
34
|
+
dest="subcommand",
|
|
35
|
+
metavar="<subcommand>",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# descriptors list
|
|
39
|
+
list_parser = desc_subparsers.add_parser(
|
|
40
|
+
"list",
|
|
41
|
+
help="List available descriptors",
|
|
42
|
+
formatter_class=RdkitHelpFormatter,
|
|
43
|
+
)
|
|
44
|
+
list_parser.add_argument(
|
|
45
|
+
"--all",
|
|
46
|
+
action="store_true",
|
|
47
|
+
dest="show_all",
|
|
48
|
+
help="Show all descriptors with descriptions",
|
|
49
|
+
)
|
|
50
|
+
list_parser.add_argument(
|
|
51
|
+
"--category",
|
|
52
|
+
choices=DESCRIPTOR_CATEGORIES,
|
|
53
|
+
help="Filter by category",
|
|
54
|
+
)
|
|
55
|
+
list_parser.set_defaults(func=run_list)
|
|
56
|
+
|
|
57
|
+
# descriptors compute
|
|
58
|
+
compute_parser = desc_subparsers.add_parser(
|
|
59
|
+
"compute",
|
|
60
|
+
help="Compute descriptors for molecules",
|
|
61
|
+
formatter_class=RdkitHelpFormatter,
|
|
62
|
+
)
|
|
63
|
+
add_common_io_options(compute_parser)
|
|
64
|
+
add_common_processing_options(compute_parser)
|
|
65
|
+
|
|
66
|
+
desc_group = compute_parser.add_mutually_exclusive_group()
|
|
67
|
+
desc_group.add_argument(
|
|
68
|
+
"-d", "--descriptors",
|
|
69
|
+
metavar="DESC",
|
|
70
|
+
help="Comma-separated list of descriptors to compute",
|
|
71
|
+
)
|
|
72
|
+
desc_group.add_argument(
|
|
73
|
+
"--all",
|
|
74
|
+
action="store_true",
|
|
75
|
+
dest="compute_all",
|
|
76
|
+
help="Compute all available descriptors",
|
|
77
|
+
)
|
|
78
|
+
desc_group.add_argument(
|
|
79
|
+
"--common",
|
|
80
|
+
action="store_true",
|
|
81
|
+
help="Compute common descriptors (default)",
|
|
82
|
+
)
|
|
83
|
+
desc_group.add_argument(
|
|
84
|
+
"--lipinski",
|
|
85
|
+
action="store_true",
|
|
86
|
+
help="Compute Lipinski rule-of-5 descriptors",
|
|
87
|
+
)
|
|
88
|
+
desc_group.add_argument(
|
|
89
|
+
"--druglike",
|
|
90
|
+
action="store_true",
|
|
91
|
+
help="Compute drug-likeness descriptors",
|
|
92
|
+
)
|
|
93
|
+
desc_group.add_argument(
|
|
94
|
+
"--category",
|
|
95
|
+
choices=DESCRIPTOR_CATEGORIES,
|
|
96
|
+
dest="compute_category",
|
|
97
|
+
help="Compute all descriptors in category",
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Additional options
|
|
101
|
+
compute_parser.add_argument(
|
|
102
|
+
"--exclude",
|
|
103
|
+
metavar="DESC",
|
|
104
|
+
help="Comma-separated list of descriptors to exclude",
|
|
105
|
+
)
|
|
106
|
+
compute_parser.add_argument(
|
|
107
|
+
"--precision",
|
|
108
|
+
type=int,
|
|
109
|
+
default=4,
|
|
110
|
+
metavar="N",
|
|
111
|
+
help="Decimal precision for float values (default: 4)",
|
|
112
|
+
)
|
|
113
|
+
compute_parser.add_argument(
|
|
114
|
+
"--error-value",
|
|
115
|
+
default="NaN",
|
|
116
|
+
metavar="VAL",
|
|
117
|
+
help="Value to use for failed calculations (default: NaN)",
|
|
118
|
+
)
|
|
119
|
+
compute_parser.add_argument(
|
|
120
|
+
"--3d",
|
|
121
|
+
action="store_true",
|
|
122
|
+
dest="compute_3d",
|
|
123
|
+
help="Include 3D descriptors (requires 3D coordinates)",
|
|
124
|
+
)
|
|
125
|
+
compute_parser.add_argument(
|
|
126
|
+
"--no-smiles",
|
|
127
|
+
action="store_true",
|
|
128
|
+
help="Don't include SMILES in output",
|
|
129
|
+
)
|
|
130
|
+
compute_parser.add_argument(
|
|
131
|
+
"--no-name",
|
|
132
|
+
action="store_true",
|
|
133
|
+
help="Don't include name in output",
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
compute_parser.set_defaults(func=run_compute)
|
|
137
|
+
|
|
138
|
+
# Set default for main parser
|
|
139
|
+
parser.set_defaults(func=lambda args: parser.print_help() or 1)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def run_list(args) -> int:
|
|
143
|
+
"""Run the list subcommand."""
|
|
144
|
+
# Lazy import
|
|
145
|
+
from rdkit_cli.core.descriptors import list_descriptors
|
|
146
|
+
|
|
147
|
+
descriptors = list_descriptors(
|
|
148
|
+
category=getattr(args, "category", None),
|
|
149
|
+
verbose=getattr(args, "show_all", False),
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
if args.show_all:
|
|
153
|
+
# Print with descriptions
|
|
154
|
+
max_name_len = max(len(d.name) for d in descriptors) if descriptors else 0
|
|
155
|
+
for desc in descriptors:
|
|
156
|
+
print(f"{desc.name:<{max_name_len}} [{desc.category}]")
|
|
157
|
+
else:
|
|
158
|
+
for desc in descriptors:
|
|
159
|
+
print(desc.name)
|
|
160
|
+
|
|
161
|
+
print(f"\nTotal: {len(descriptors)} descriptors", file=sys.stderr)
|
|
162
|
+
return 0
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def run_compute(args) -> int:
|
|
166
|
+
"""Run the compute subcommand."""
|
|
167
|
+
# Lazy imports
|
|
168
|
+
from rdkit_cli.core.descriptors import (
|
|
169
|
+
DescriptorCalculator,
|
|
170
|
+
list_descriptors,
|
|
171
|
+
COMMON_DESCRIPTORS,
|
|
172
|
+
LIPINSKI_DESCRIPTORS,
|
|
173
|
+
DRUGLIKE_DESCRIPTORS,
|
|
174
|
+
)
|
|
175
|
+
from rdkit_cli.io import create_reader, create_writer
|
|
176
|
+
from rdkit_cli.parallel.batch import process_molecules
|
|
177
|
+
|
|
178
|
+
# Determine which descriptors to compute
|
|
179
|
+
descriptor_names = None
|
|
180
|
+
|
|
181
|
+
if args.descriptors:
|
|
182
|
+
descriptor_names = [d.strip() for d in args.descriptors.split(",")]
|
|
183
|
+
elif args.compute_all:
|
|
184
|
+
descriptor_names = None # All
|
|
185
|
+
elif args.lipinski:
|
|
186
|
+
descriptor_names = LIPINSKI_DESCRIPTORS
|
|
187
|
+
elif args.druglike:
|
|
188
|
+
descriptor_names = DRUGLIKE_DESCRIPTORS
|
|
189
|
+
elif args.compute_category:
|
|
190
|
+
descs = list_descriptors(category=args.compute_category)
|
|
191
|
+
descriptor_names = [d.name for d in descs]
|
|
192
|
+
else:
|
|
193
|
+
# Default to common descriptors
|
|
194
|
+
descriptor_names = COMMON_DESCRIPTORS
|
|
195
|
+
|
|
196
|
+
# Handle exclusions
|
|
197
|
+
if args.exclude and descriptor_names:
|
|
198
|
+
exclude_set = {d.strip() for d in args.exclude.split(",")}
|
|
199
|
+
descriptor_names = [d for d in descriptor_names if d not in exclude_set]
|
|
200
|
+
|
|
201
|
+
# Create calculator
|
|
202
|
+
try:
|
|
203
|
+
calculator = DescriptorCalculator(
|
|
204
|
+
descriptors=descriptor_names,
|
|
205
|
+
include_smiles=not args.no_smiles,
|
|
206
|
+
include_name=not args.no_name,
|
|
207
|
+
precision=args.precision,
|
|
208
|
+
error_value=args.error_value,
|
|
209
|
+
)
|
|
210
|
+
except ValueError as e:
|
|
211
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
212
|
+
return 1
|
|
213
|
+
|
|
214
|
+
# Create reader
|
|
215
|
+
input_path = Path(args.input)
|
|
216
|
+
if not input_path.exists():
|
|
217
|
+
print(f"Error: Input file not found: {input_path}", file=sys.stderr)
|
|
218
|
+
return 1
|
|
219
|
+
|
|
220
|
+
reader = create_reader(
|
|
221
|
+
input_path,
|
|
222
|
+
smiles_column=args.smiles_column,
|
|
223
|
+
name_column=args.name_column,
|
|
224
|
+
has_header=not args.no_header,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# Create writer
|
|
228
|
+
output_path = Path(args.output)
|
|
229
|
+
writer = create_writer(
|
|
230
|
+
output_path,
|
|
231
|
+
columns=calculator.get_column_names(),
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# Process
|
|
235
|
+
with reader, writer:
|
|
236
|
+
result = process_molecules(
|
|
237
|
+
reader=reader,
|
|
238
|
+
writer=writer,
|
|
239
|
+
processor=calculator.compute,
|
|
240
|
+
n_workers=args.ncpu,
|
|
241
|
+
quiet=args.quiet,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
if not args.quiet:
|
|
245
|
+
print(
|
|
246
|
+
f"Processed {result.successful}/{result.total_processed} molecules "
|
|
247
|
+
f"({result.failed} failed) in {result.elapsed_time:.1f}s",
|
|
248
|
+
file=sys.stderr,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
return 0 if result.failed == 0 else 1
|