rdkit-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdkit_cli/__init__.py +4 -0
- rdkit_cli/__main__.py +6 -0
- rdkit_cli/cli.py +162 -0
- rdkit_cli/commands/__init__.py +1 -0
- rdkit_cli/commands/conformers.py +220 -0
- rdkit_cli/commands/convert.py +162 -0
- rdkit_cli/commands/depict.py +311 -0
- rdkit_cli/commands/descriptors.py +251 -0
- rdkit_cli/commands/diversity.py +232 -0
- rdkit_cli/commands/enumerate.py +229 -0
- rdkit_cli/commands/filter.py +384 -0
- rdkit_cli/commands/fingerprints.py +179 -0
- rdkit_cli/commands/fragment.py +284 -0
- rdkit_cli/commands/mcs.py +162 -0
- rdkit_cli/commands/reactions.py +191 -0
- rdkit_cli/commands/scaffold.py +243 -0
- rdkit_cli/commands/similarity.py +359 -0
- rdkit_cli/commands/standardize.py +138 -0
- rdkit_cli/core/__init__.py +1 -0
- rdkit_cli/core/conformers.py +197 -0
- rdkit_cli/core/depict.py +241 -0
- rdkit_cli/core/descriptors.py +248 -0
- rdkit_cli/core/diversity.py +174 -0
- rdkit_cli/core/enumerate.py +190 -0
- rdkit_cli/core/filters.py +443 -0
- rdkit_cli/core/fingerprints.py +265 -0
- rdkit_cli/core/fragment.py +237 -0
- rdkit_cli/core/mcs.py +128 -0
- rdkit_cli/core/reactions.py +159 -0
- rdkit_cli/core/scaffold.py +174 -0
- rdkit_cli/core/similarity.py +206 -0
- rdkit_cli/core/standardizer.py +141 -0
- rdkit_cli/io/__init__.py +7 -0
- rdkit_cli/io/formats.py +109 -0
- rdkit_cli/io/readers.py +352 -0
- rdkit_cli/io/writers.py +275 -0
- rdkit_cli/parallel/__init__.py +5 -0
- rdkit_cli/parallel/batch.py +181 -0
- rdkit_cli/parallel/executor.py +180 -0
- rdkit_cli/progress/__init__.py +5 -0
- rdkit_cli/progress/ninja.py +195 -0
- rdkit_cli/utils/__init__.py +1 -0
- rdkit_cli-0.1.0.dist-info/METADATA +380 -0
- rdkit_cli-0.1.0.dist-info/RECORD +47 -0
- rdkit_cli-0.1.0.dist-info/WHEEL +4 -0
- rdkit_cli-0.1.0.dist-info/entry_points.txt +2 -0
- rdkit_cli-0.1.0.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
"""Filter command implementation."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from rdkit_cli.cli import RdkitHelpFormatter, add_common_io_options, add_common_processing_options
|
|
7
|
+
|
|
8
|
+
# Define here to avoid loading core at startup
|
|
9
|
+
DRUGLIKE_RULES = ["lipinski", "veber", "ghose", "egan", "muegge"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def register_parser(subparsers):
|
|
13
|
+
"""Register the filter command and subcommands."""
|
|
14
|
+
parser = subparsers.add_parser(
|
|
15
|
+
"filter",
|
|
16
|
+
help="Filter molecules by various criteria",
|
|
17
|
+
description="Filter molecules by substructure, properties, or drug-likeness.",
|
|
18
|
+
formatter_class=RdkitHelpFormatter,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
filter_subparsers = parser.add_subparsers(
|
|
22
|
+
title="Subcommands",
|
|
23
|
+
dest="subcommand",
|
|
24
|
+
metavar="<subcommand>",
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# filter substructure
|
|
28
|
+
sub_parser = filter_subparsers.add_parser(
|
|
29
|
+
"substructure",
|
|
30
|
+
help="Filter by substructure (SMARTS)",
|
|
31
|
+
formatter_class=RdkitHelpFormatter,
|
|
32
|
+
)
|
|
33
|
+
add_common_io_options(sub_parser)
|
|
34
|
+
add_common_processing_options(sub_parser)
|
|
35
|
+
sub_parser.add_argument(
|
|
36
|
+
"-s", "--smarts",
|
|
37
|
+
required=True,
|
|
38
|
+
metavar="PATTERN",
|
|
39
|
+
help="SMARTS pattern to match",
|
|
40
|
+
)
|
|
41
|
+
sub_parser.add_argument(
|
|
42
|
+
"--exclude",
|
|
43
|
+
action="store_true",
|
|
44
|
+
help="Exclude molecules matching the pattern (default: include)",
|
|
45
|
+
)
|
|
46
|
+
sub_parser.add_argument(
|
|
47
|
+
"--min-matches",
|
|
48
|
+
type=int,
|
|
49
|
+
default=1,
|
|
50
|
+
metavar="N",
|
|
51
|
+
help="Minimum number of matches required (default: 1)",
|
|
52
|
+
)
|
|
53
|
+
sub_parser.add_argument(
|
|
54
|
+
"--max-matches",
|
|
55
|
+
type=int,
|
|
56
|
+
default=None,
|
|
57
|
+
metavar="N",
|
|
58
|
+
help="Maximum number of matches allowed",
|
|
59
|
+
)
|
|
60
|
+
sub_parser.add_argument(
|
|
61
|
+
"--count-unique",
|
|
62
|
+
action="store_true",
|
|
63
|
+
help="Count only unique (non-overlapping) matches",
|
|
64
|
+
)
|
|
65
|
+
sub_parser.add_argument(
|
|
66
|
+
"--add-match-count",
|
|
67
|
+
action="store_true",
|
|
68
|
+
help="Add column with number of matches",
|
|
69
|
+
)
|
|
70
|
+
sub_parser.add_argument(
|
|
71
|
+
"--use-chirality",
|
|
72
|
+
action="store_true",
|
|
73
|
+
help="Consider chirality in matching",
|
|
74
|
+
)
|
|
75
|
+
sub_parser.set_defaults(func=run_substructure)
|
|
76
|
+
|
|
77
|
+
# filter property
|
|
78
|
+
prop_parser = filter_subparsers.add_parser(
|
|
79
|
+
"property",
|
|
80
|
+
help="Filter by property values",
|
|
81
|
+
formatter_class=RdkitHelpFormatter,
|
|
82
|
+
)
|
|
83
|
+
add_common_io_options(prop_parser)
|
|
84
|
+
add_common_processing_options(prop_parser)
|
|
85
|
+
prop_parser.add_argument(
|
|
86
|
+
"-r", "--rule",
|
|
87
|
+
action="append",
|
|
88
|
+
metavar="RULE",
|
|
89
|
+
help="Property rule in format 'PROP<OP>VALUE' (e.g., 'MolWt<500', 'LogP>-2'). Can be repeated.",
|
|
90
|
+
)
|
|
91
|
+
prop_parser.set_defaults(func=run_property)
|
|
92
|
+
|
|
93
|
+
# filter druglike
|
|
94
|
+
drug_parser = filter_subparsers.add_parser(
|
|
95
|
+
"druglike",
|
|
96
|
+
help="Filter by drug-likeness rules",
|
|
97
|
+
formatter_class=RdkitHelpFormatter,
|
|
98
|
+
)
|
|
99
|
+
add_common_io_options(drug_parser)
|
|
100
|
+
add_common_processing_options(drug_parser)
|
|
101
|
+
drug_parser.add_argument(
|
|
102
|
+
"-r", "--rule",
|
|
103
|
+
choices=DRUGLIKE_RULES,
|
|
104
|
+
default="lipinski",
|
|
105
|
+
help="Drug-likeness rule set (default: lipinski)",
|
|
106
|
+
)
|
|
107
|
+
drug_parser.add_argument(
|
|
108
|
+
"-v", "--max-violations",
|
|
109
|
+
type=int,
|
|
110
|
+
default=0,
|
|
111
|
+
metavar="N",
|
|
112
|
+
help="Maximum allowed violations (default: 0)",
|
|
113
|
+
)
|
|
114
|
+
drug_parser.add_argument(
|
|
115
|
+
"--add-violations",
|
|
116
|
+
action="store_true",
|
|
117
|
+
help="Add column with violation count",
|
|
118
|
+
)
|
|
119
|
+
drug_parser.add_argument(
|
|
120
|
+
"--add-details",
|
|
121
|
+
action="store_true",
|
|
122
|
+
help="Add columns with individual rule values",
|
|
123
|
+
)
|
|
124
|
+
drug_parser.set_defaults(func=run_druglike)
|
|
125
|
+
|
|
126
|
+
# filter pains
|
|
127
|
+
pains_parser = filter_subparsers.add_parser(
|
|
128
|
+
"pains",
|
|
129
|
+
help="Filter out PAINS compounds",
|
|
130
|
+
formatter_class=RdkitHelpFormatter,
|
|
131
|
+
)
|
|
132
|
+
add_common_io_options(pains_parser)
|
|
133
|
+
add_common_processing_options(pains_parser)
|
|
134
|
+
pains_parser.add_argument(
|
|
135
|
+
"--keep-pains",
|
|
136
|
+
action="store_true",
|
|
137
|
+
help="Keep PAINS compounds (inverse filter)",
|
|
138
|
+
)
|
|
139
|
+
pains_parser.add_argument(
|
|
140
|
+
"--add-pains-type",
|
|
141
|
+
action="store_true",
|
|
142
|
+
help="Add column with PAINS alert type",
|
|
143
|
+
)
|
|
144
|
+
pains_parser.set_defaults(func=run_pains)
|
|
145
|
+
|
|
146
|
+
# filter elements
|
|
147
|
+
elem_parser = filter_subparsers.add_parser(
|
|
148
|
+
"elements",
|
|
149
|
+
help="Filter by allowed elements",
|
|
150
|
+
formatter_class=RdkitHelpFormatter,
|
|
151
|
+
)
|
|
152
|
+
add_common_io_options(elem_parser)
|
|
153
|
+
add_common_processing_options(elem_parser)
|
|
154
|
+
elem_parser.add_argument(
|
|
155
|
+
"--allowed",
|
|
156
|
+
metavar="ELEMS",
|
|
157
|
+
default="C,H,N,O,S,F,Cl,Br,I,P",
|
|
158
|
+
help="Comma-separated allowed elements (default: C,H,N,O,S,F,Cl,Br,I,P)",
|
|
159
|
+
)
|
|
160
|
+
elem_parser.add_argument(
|
|
161
|
+
"--required",
|
|
162
|
+
metavar="ELEMS",
|
|
163
|
+
help="Comma-separated required elements (must contain all)",
|
|
164
|
+
)
|
|
165
|
+
elem_parser.add_argument(
|
|
166
|
+
"--forbidden",
|
|
167
|
+
metavar="ELEMS",
|
|
168
|
+
help="Comma-separated forbidden elements",
|
|
169
|
+
)
|
|
170
|
+
elem_parser.set_defaults(func=run_elements)
|
|
171
|
+
|
|
172
|
+
# filter complexity
|
|
173
|
+
comp_parser = filter_subparsers.add_parser(
|
|
174
|
+
"complexity",
|
|
175
|
+
help="Filter by molecular complexity",
|
|
176
|
+
formatter_class=RdkitHelpFormatter,
|
|
177
|
+
)
|
|
178
|
+
add_common_io_options(comp_parser)
|
|
179
|
+
add_common_processing_options(comp_parser)
|
|
180
|
+
comp_parser.add_argument(
|
|
181
|
+
"--min-atoms",
|
|
182
|
+
type=int,
|
|
183
|
+
default=1,
|
|
184
|
+
metavar="N",
|
|
185
|
+
help="Minimum heavy atom count (default: 1)",
|
|
186
|
+
)
|
|
187
|
+
comp_parser.add_argument(
|
|
188
|
+
"--max-atoms",
|
|
189
|
+
type=int,
|
|
190
|
+
default=100,
|
|
191
|
+
metavar="N",
|
|
192
|
+
help="Maximum heavy atom count (default: 100)",
|
|
193
|
+
)
|
|
194
|
+
comp_parser.add_argument(
|
|
195
|
+
"--min-rings",
|
|
196
|
+
type=int,
|
|
197
|
+
default=0,
|
|
198
|
+
metavar="N",
|
|
199
|
+
help="Minimum ring count (default: 0)",
|
|
200
|
+
)
|
|
201
|
+
comp_parser.add_argument(
|
|
202
|
+
"--max-rings",
|
|
203
|
+
type=int,
|
|
204
|
+
default=10,
|
|
205
|
+
metavar="N",
|
|
206
|
+
help="Maximum ring count (default: 10)",
|
|
207
|
+
)
|
|
208
|
+
comp_parser.add_argument(
|
|
209
|
+
"--min-rotatable",
|
|
210
|
+
type=int,
|
|
211
|
+
default=0,
|
|
212
|
+
metavar="N",
|
|
213
|
+
help="Minimum rotatable bonds (default: 0)",
|
|
214
|
+
)
|
|
215
|
+
comp_parser.add_argument(
|
|
216
|
+
"--max-rotatable",
|
|
217
|
+
type=int,
|
|
218
|
+
default=20,
|
|
219
|
+
metavar="N",
|
|
220
|
+
help="Maximum rotatable bonds (default: 20)",
|
|
221
|
+
)
|
|
222
|
+
comp_parser.set_defaults(func=run_complexity)
|
|
223
|
+
|
|
224
|
+
# Set default for main parser
|
|
225
|
+
parser.set_defaults(func=lambda args: parser.print_help() or 1)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def run_substructure(args) -> int:
|
|
229
|
+
"""Run the substructure filter."""
|
|
230
|
+
# Lazy imports
|
|
231
|
+
from rdkit_cli.core.filters import SubstructureFilter
|
|
232
|
+
from rdkit_cli.io import create_reader, create_writer
|
|
233
|
+
from rdkit_cli.parallel.batch import process_molecules
|
|
234
|
+
|
|
235
|
+
try:
|
|
236
|
+
filter_obj = SubstructureFilter(
|
|
237
|
+
smarts=args.smarts,
|
|
238
|
+
exclude=args.exclude,
|
|
239
|
+
)
|
|
240
|
+
except ValueError as e:
|
|
241
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
242
|
+
return 1
|
|
243
|
+
|
|
244
|
+
return _run_filter(args, filter_obj.filter)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def run_property(args) -> int:
|
|
248
|
+
"""Run the property filter."""
|
|
249
|
+
# Lazy imports
|
|
250
|
+
from rdkit_cli.core.filters import PropertyFilter
|
|
251
|
+
|
|
252
|
+
if not args.rule:
|
|
253
|
+
print("Error: At least one --rule is required", file=sys.stderr)
|
|
254
|
+
return 1
|
|
255
|
+
|
|
256
|
+
# Parse rules
|
|
257
|
+
rules = {}
|
|
258
|
+
for rule in args.rule:
|
|
259
|
+
try:
|
|
260
|
+
if "<=" in rule:
|
|
261
|
+
prop, val = rule.split("<=")
|
|
262
|
+
rules[prop.strip()] = (None, float(val.strip()))
|
|
263
|
+
elif ">=" in rule:
|
|
264
|
+
prop, val = rule.split(">=")
|
|
265
|
+
rules[prop.strip()] = (float(val.strip()), None)
|
|
266
|
+
elif "<" in rule:
|
|
267
|
+
prop, val = rule.split("<")
|
|
268
|
+
rules[prop.strip()] = (None, float(val.strip()))
|
|
269
|
+
elif ">" in rule:
|
|
270
|
+
prop, val = rule.split(">")
|
|
271
|
+
rules[prop.strip()] = (float(val.strip()), None)
|
|
272
|
+
else:
|
|
273
|
+
print(f"Error: Invalid rule format: {rule}", file=sys.stderr)
|
|
274
|
+
return 1
|
|
275
|
+
except ValueError as e:
|
|
276
|
+
print(f"Error parsing rule '{rule}': {e}", file=sys.stderr)
|
|
277
|
+
return 1
|
|
278
|
+
|
|
279
|
+
filter_obj = PropertyFilter(rules=rules)
|
|
280
|
+
return _run_filter(args, filter_obj.filter)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def run_druglike(args) -> int:
|
|
284
|
+
"""Run the drug-likeness filter."""
|
|
285
|
+
# Lazy import
|
|
286
|
+
from rdkit_cli.core.filters import DruglikeFilter
|
|
287
|
+
|
|
288
|
+
try:
|
|
289
|
+
filter_obj = DruglikeFilter(
|
|
290
|
+
rule_name=args.rule,
|
|
291
|
+
max_violations=args.max_violations,
|
|
292
|
+
)
|
|
293
|
+
except ValueError as e:
|
|
294
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
295
|
+
return 1
|
|
296
|
+
|
|
297
|
+
return _run_filter(args, filter_obj.filter)
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def run_pains(args) -> int:
|
|
301
|
+
"""Run the PAINS filter."""
|
|
302
|
+
# Lazy import
|
|
303
|
+
from rdkit_cli.core.filters import PAINSFilter
|
|
304
|
+
|
|
305
|
+
filter_obj = PAINSFilter(
|
|
306
|
+
exclude=not getattr(args, "keep_pains", False),
|
|
307
|
+
)
|
|
308
|
+
return _run_filter(args, filter_obj.filter)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def run_elements(args) -> int:
|
|
312
|
+
"""Run the element filter."""
|
|
313
|
+
# Lazy import
|
|
314
|
+
from rdkit_cli.core.filters import ElementFilter
|
|
315
|
+
|
|
316
|
+
allowed = [e.strip() for e in args.allowed.split(",")] if args.allowed else None
|
|
317
|
+
required = [e.strip() for e in args.required.split(",")] if args.required else None
|
|
318
|
+
forbidden = [e.strip() for e in args.forbidden.split(",")] if args.forbidden else None
|
|
319
|
+
|
|
320
|
+
filter_obj = ElementFilter(
|
|
321
|
+
allowed_elements=allowed,
|
|
322
|
+
required_elements=required,
|
|
323
|
+
forbidden_elements=forbidden,
|
|
324
|
+
)
|
|
325
|
+
return _run_filter(args, filter_obj.filter)
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def run_complexity(args) -> int:
|
|
329
|
+
"""Run the complexity filter."""
|
|
330
|
+
# Lazy import
|
|
331
|
+
from rdkit_cli.core.filters import ComplexityFilter
|
|
332
|
+
|
|
333
|
+
filter_obj = ComplexityFilter(
|
|
334
|
+
min_atoms=args.min_atoms,
|
|
335
|
+
max_atoms=args.max_atoms,
|
|
336
|
+
min_rings=args.min_rings,
|
|
337
|
+
max_rings=args.max_rings,
|
|
338
|
+
min_rotatable=args.min_rotatable,
|
|
339
|
+
max_rotatable=args.max_rotatable,
|
|
340
|
+
)
|
|
341
|
+
return _run_filter(args, filter_obj.filter)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _run_filter(args, filter_func) -> int:
|
|
345
|
+
"""Common filter execution."""
|
|
346
|
+
# Lazy imports
|
|
347
|
+
from rdkit_cli.io import create_reader, create_writer
|
|
348
|
+
from rdkit_cli.parallel.batch import process_molecules
|
|
349
|
+
|
|
350
|
+
input_path = Path(args.input)
|
|
351
|
+
if not input_path.exists():
|
|
352
|
+
print(f"Error: Input file not found: {input_path}", file=sys.stderr)
|
|
353
|
+
return 1
|
|
354
|
+
|
|
355
|
+
reader = create_reader(
|
|
356
|
+
input_path,
|
|
357
|
+
smiles_column=args.smiles_column,
|
|
358
|
+
name_column=args.name_column,
|
|
359
|
+
has_header=not args.no_header,
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
output_path = Path(args.output)
|
|
363
|
+
writer = create_writer(output_path)
|
|
364
|
+
|
|
365
|
+
with reader, writer:
|
|
366
|
+
result = process_molecules(
|
|
367
|
+
reader=reader,
|
|
368
|
+
writer=writer,
|
|
369
|
+
processor=filter_func,
|
|
370
|
+
n_workers=args.ncpu,
|
|
371
|
+
quiet=args.quiet,
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
if not args.quiet:
|
|
375
|
+
passed = result.successful
|
|
376
|
+
total = result.total_processed
|
|
377
|
+
filtered = total - passed - result.failed
|
|
378
|
+
print(
|
|
379
|
+
f"Passed: {passed}/{total} molecules "
|
|
380
|
+
f"(filtered: {filtered}, failed: {result.failed}) in {result.elapsed_time:.1f}s",
|
|
381
|
+
file=sys.stderr,
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
return 0
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""Fingerprints command implementation."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from rdkit_cli.cli import RdkitHelpFormatter, add_common_io_options, add_common_processing_options
|
|
7
|
+
|
|
8
|
+
# Fingerprint types defined here to avoid importing core at startup
|
|
9
|
+
FINGERPRINT_TYPES = ["morgan", "maccs", "rdkit", "atompair", "torsion", "pattern"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def register_parser(subparsers):
|
|
13
|
+
"""Register the fingerprints command and subcommands."""
|
|
14
|
+
parser = subparsers.add_parser(
|
|
15
|
+
"fingerprints",
|
|
16
|
+
help="Compute molecular fingerprints",
|
|
17
|
+
description="Generate various molecular fingerprint types.",
|
|
18
|
+
formatter_class=RdkitHelpFormatter,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
fp_subparsers = parser.add_subparsers(
|
|
22
|
+
title="Subcommands",
|
|
23
|
+
dest="subcommand",
|
|
24
|
+
metavar="<subcommand>",
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# fingerprints list
|
|
28
|
+
list_parser = fp_subparsers.add_parser(
|
|
29
|
+
"list",
|
|
30
|
+
help="List available fingerprint types",
|
|
31
|
+
formatter_class=RdkitHelpFormatter,
|
|
32
|
+
)
|
|
33
|
+
list_parser.set_defaults(func=run_list)
|
|
34
|
+
|
|
35
|
+
# fingerprints compute
|
|
36
|
+
compute_parser = fp_subparsers.add_parser(
|
|
37
|
+
"compute",
|
|
38
|
+
help="Compute fingerprints for molecules",
|
|
39
|
+
formatter_class=RdkitHelpFormatter,
|
|
40
|
+
)
|
|
41
|
+
add_common_io_options(compute_parser)
|
|
42
|
+
add_common_processing_options(compute_parser)
|
|
43
|
+
|
|
44
|
+
compute_parser.add_argument(
|
|
45
|
+
"-t", "--type",
|
|
46
|
+
choices=FINGERPRINT_TYPES,
|
|
47
|
+
default="morgan",
|
|
48
|
+
help="Fingerprint type (default: morgan)",
|
|
49
|
+
)
|
|
50
|
+
compute_parser.add_argument(
|
|
51
|
+
"-r", "--radius",
|
|
52
|
+
type=int,
|
|
53
|
+
default=2,
|
|
54
|
+
metavar="N",
|
|
55
|
+
help="Radius for Morgan fingerprints (default: 2, equivalent to ECFP4)",
|
|
56
|
+
)
|
|
57
|
+
compute_parser.add_argument(
|
|
58
|
+
"-b", "--bits",
|
|
59
|
+
type=int,
|
|
60
|
+
default=2048,
|
|
61
|
+
metavar="N",
|
|
62
|
+
help="Number of bits (default: 2048)",
|
|
63
|
+
)
|
|
64
|
+
compute_parser.add_argument(
|
|
65
|
+
"--counts",
|
|
66
|
+
action="store_true",
|
|
67
|
+
help="Output count fingerprints instead of binary (Morgan only)",
|
|
68
|
+
)
|
|
69
|
+
compute_parser.add_argument(
|
|
70
|
+
"-f", "--format",
|
|
71
|
+
choices=["hex", "bitstring", "bits", "numpy"],
|
|
72
|
+
default="hex",
|
|
73
|
+
dest="output_format",
|
|
74
|
+
help="Output format (default: hex)",
|
|
75
|
+
)
|
|
76
|
+
compute_parser.add_argument(
|
|
77
|
+
"--use-chirality",
|
|
78
|
+
action="store_true",
|
|
79
|
+
help="Include chirality in fingerprint (Morgan only)",
|
|
80
|
+
)
|
|
81
|
+
compute_parser.add_argument(
|
|
82
|
+
"--use-features",
|
|
83
|
+
action="store_true",
|
|
84
|
+
help="Use pharmacophoric features instead of atom invariants (Morgan only)",
|
|
85
|
+
)
|
|
86
|
+
compute_parser.add_argument(
|
|
87
|
+
"--use-bond-types",
|
|
88
|
+
action="store_true",
|
|
89
|
+
default=True,
|
|
90
|
+
help="Include bond types in fingerprint (Morgan, default: True)",
|
|
91
|
+
)
|
|
92
|
+
compute_parser.add_argument(
|
|
93
|
+
"--no-bond-types",
|
|
94
|
+
action="store_true",
|
|
95
|
+
help="Exclude bond types from fingerprint (Morgan)",
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
compute_parser.set_defaults(func=run_compute)
|
|
99
|
+
|
|
100
|
+
# Set default for main parser
|
|
101
|
+
parser.set_defaults(func=lambda args: parser.print_help() or 1)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def run_list(args) -> int:
|
|
105
|
+
"""Run the list subcommand."""
|
|
106
|
+
# Lazy import
|
|
107
|
+
from rdkit_cli.core.fingerprints import list_fingerprints
|
|
108
|
+
|
|
109
|
+
fps = list_fingerprints()
|
|
110
|
+
|
|
111
|
+
print("Available fingerprint types:\n")
|
|
112
|
+
for fp in fps:
|
|
113
|
+
radius_info = " (radius configurable)" if fp.has_radius else ""
|
|
114
|
+
print(f" {fp.name:<12} - {fp.description}")
|
|
115
|
+
print(f" Default bits: {fp.default_bits}{radius_info}")
|
|
116
|
+
print()
|
|
117
|
+
|
|
118
|
+
return 0
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def run_compute(args) -> int:
|
|
122
|
+
"""Run the compute subcommand."""
|
|
123
|
+
# Lazy imports
|
|
124
|
+
from rdkit_cli.core.fingerprints import FingerprintCalculator, FingerprintType
|
|
125
|
+
from rdkit_cli.io import create_reader, create_writer
|
|
126
|
+
from rdkit_cli.parallel.batch import process_molecules
|
|
127
|
+
|
|
128
|
+
# Parse fingerprint type
|
|
129
|
+
fp_type = FingerprintType(args.type)
|
|
130
|
+
|
|
131
|
+
# Create calculator
|
|
132
|
+
calculator = FingerprintCalculator(
|
|
133
|
+
fp_type=fp_type,
|
|
134
|
+
n_bits=args.bits,
|
|
135
|
+
radius=args.radius,
|
|
136
|
+
use_counts=args.counts,
|
|
137
|
+
output_format=args.output_format,
|
|
138
|
+
include_smiles=True,
|
|
139
|
+
include_name=True,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Create reader
|
|
143
|
+
input_path = Path(args.input)
|
|
144
|
+
if not input_path.exists():
|
|
145
|
+
print(f"Error: Input file not found: {input_path}", file=sys.stderr)
|
|
146
|
+
return 1
|
|
147
|
+
|
|
148
|
+
reader = create_reader(
|
|
149
|
+
input_path,
|
|
150
|
+
smiles_column=args.smiles_column,
|
|
151
|
+
name_column=args.name_column,
|
|
152
|
+
has_header=not args.no_header,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Create writer
|
|
156
|
+
output_path = Path(args.output)
|
|
157
|
+
writer = create_writer(
|
|
158
|
+
output_path,
|
|
159
|
+
columns=calculator.get_column_names(),
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Process
|
|
163
|
+
with reader, writer:
|
|
164
|
+
result = process_molecules(
|
|
165
|
+
reader=reader,
|
|
166
|
+
writer=writer,
|
|
167
|
+
processor=calculator.compute,
|
|
168
|
+
n_workers=args.ncpu,
|
|
169
|
+
quiet=args.quiet,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
if not args.quiet:
|
|
173
|
+
print(
|
|
174
|
+
f"Processed {result.successful}/{result.total_processed} molecules "
|
|
175
|
+
f"({result.failed} failed) in {result.elapsed_time:.1f}s",
|
|
176
|
+
file=sys.stderr,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
return 0 if result.failed == 0 else 1
|