rdkit-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. rdkit_cli/__init__.py +4 -0
  2. rdkit_cli/__main__.py +6 -0
  3. rdkit_cli/cli.py +162 -0
  4. rdkit_cli/commands/__init__.py +1 -0
  5. rdkit_cli/commands/conformers.py +220 -0
  6. rdkit_cli/commands/convert.py +162 -0
  7. rdkit_cli/commands/depict.py +311 -0
  8. rdkit_cli/commands/descriptors.py +251 -0
  9. rdkit_cli/commands/diversity.py +232 -0
  10. rdkit_cli/commands/enumerate.py +229 -0
  11. rdkit_cli/commands/filter.py +384 -0
  12. rdkit_cli/commands/fingerprints.py +179 -0
  13. rdkit_cli/commands/fragment.py +284 -0
  14. rdkit_cli/commands/mcs.py +162 -0
  15. rdkit_cli/commands/reactions.py +191 -0
  16. rdkit_cli/commands/scaffold.py +243 -0
  17. rdkit_cli/commands/similarity.py +359 -0
  18. rdkit_cli/commands/standardize.py +138 -0
  19. rdkit_cli/core/__init__.py +1 -0
  20. rdkit_cli/core/conformers.py +197 -0
  21. rdkit_cli/core/depict.py +241 -0
  22. rdkit_cli/core/descriptors.py +248 -0
  23. rdkit_cli/core/diversity.py +174 -0
  24. rdkit_cli/core/enumerate.py +190 -0
  25. rdkit_cli/core/filters.py +443 -0
  26. rdkit_cli/core/fingerprints.py +265 -0
  27. rdkit_cli/core/fragment.py +237 -0
  28. rdkit_cli/core/mcs.py +128 -0
  29. rdkit_cli/core/reactions.py +159 -0
  30. rdkit_cli/core/scaffold.py +174 -0
  31. rdkit_cli/core/similarity.py +206 -0
  32. rdkit_cli/core/standardizer.py +141 -0
  33. rdkit_cli/io/__init__.py +7 -0
  34. rdkit_cli/io/formats.py +109 -0
  35. rdkit_cli/io/readers.py +352 -0
  36. rdkit_cli/io/writers.py +275 -0
  37. rdkit_cli/parallel/__init__.py +5 -0
  38. rdkit_cli/parallel/batch.py +181 -0
  39. rdkit_cli/parallel/executor.py +180 -0
  40. rdkit_cli/progress/__init__.py +5 -0
  41. rdkit_cli/progress/ninja.py +195 -0
  42. rdkit_cli/utils/__init__.py +1 -0
  43. rdkit_cli-0.1.0.dist-info/METADATA +380 -0
  44. rdkit_cli-0.1.0.dist-info/RECORD +47 -0
  45. rdkit_cli-0.1.0.dist-info/WHEEL +4 -0
  46. rdkit_cli-0.1.0.dist-info/entry_points.txt +2 -0
  47. rdkit_cli-0.1.0.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,384 @@
1
+ """Filter command implementation."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ from rdkit_cli.cli import RdkitHelpFormatter, add_common_io_options, add_common_processing_options
7
+
8
+ # Define here to avoid loading core at startup
9
+ DRUGLIKE_RULES = ["lipinski", "veber", "ghose", "egan", "muegge"]
10
+
11
+
12
+ def register_parser(subparsers):
13
+ """Register the filter command and subcommands."""
14
+ parser = subparsers.add_parser(
15
+ "filter",
16
+ help="Filter molecules by various criteria",
17
+ description="Filter molecules by substructure, properties, or drug-likeness.",
18
+ formatter_class=RdkitHelpFormatter,
19
+ )
20
+
21
+ filter_subparsers = parser.add_subparsers(
22
+ title="Subcommands",
23
+ dest="subcommand",
24
+ metavar="<subcommand>",
25
+ )
26
+
27
+ # filter substructure
28
+ sub_parser = filter_subparsers.add_parser(
29
+ "substructure",
30
+ help="Filter by substructure (SMARTS)",
31
+ formatter_class=RdkitHelpFormatter,
32
+ )
33
+ add_common_io_options(sub_parser)
34
+ add_common_processing_options(sub_parser)
35
+ sub_parser.add_argument(
36
+ "-s", "--smarts",
37
+ required=True,
38
+ metavar="PATTERN",
39
+ help="SMARTS pattern to match",
40
+ )
41
+ sub_parser.add_argument(
42
+ "--exclude",
43
+ action="store_true",
44
+ help="Exclude molecules matching the pattern (default: include)",
45
+ )
46
+ sub_parser.add_argument(
47
+ "--min-matches",
48
+ type=int,
49
+ default=1,
50
+ metavar="N",
51
+ help="Minimum number of matches required (default: 1)",
52
+ )
53
+ sub_parser.add_argument(
54
+ "--max-matches",
55
+ type=int,
56
+ default=None,
57
+ metavar="N",
58
+ help="Maximum number of matches allowed",
59
+ )
60
+ sub_parser.add_argument(
61
+ "--count-unique",
62
+ action="store_true",
63
+ help="Count only unique (non-overlapping) matches",
64
+ )
65
+ sub_parser.add_argument(
66
+ "--add-match-count",
67
+ action="store_true",
68
+ help="Add column with number of matches",
69
+ )
70
+ sub_parser.add_argument(
71
+ "--use-chirality",
72
+ action="store_true",
73
+ help="Consider chirality in matching",
74
+ )
75
+ sub_parser.set_defaults(func=run_substructure)
76
+
77
+ # filter property
78
+ prop_parser = filter_subparsers.add_parser(
79
+ "property",
80
+ help="Filter by property values",
81
+ formatter_class=RdkitHelpFormatter,
82
+ )
83
+ add_common_io_options(prop_parser)
84
+ add_common_processing_options(prop_parser)
85
+ prop_parser.add_argument(
86
+ "-r", "--rule",
87
+ action="append",
88
+ metavar="RULE",
89
+ help="Property rule in format 'PROP<OP>VALUE' (e.g., 'MolWt<500', 'LogP>-2'). Can be repeated.",
90
+ )
91
+ prop_parser.set_defaults(func=run_property)
92
+
93
+ # filter druglike
94
+ drug_parser = filter_subparsers.add_parser(
95
+ "druglike",
96
+ help="Filter by drug-likeness rules",
97
+ formatter_class=RdkitHelpFormatter,
98
+ )
99
+ add_common_io_options(drug_parser)
100
+ add_common_processing_options(drug_parser)
101
+ drug_parser.add_argument(
102
+ "-r", "--rule",
103
+ choices=DRUGLIKE_RULES,
104
+ default="lipinski",
105
+ help="Drug-likeness rule set (default: lipinski)",
106
+ )
107
+ drug_parser.add_argument(
108
+ "-v", "--max-violations",
109
+ type=int,
110
+ default=0,
111
+ metavar="N",
112
+ help="Maximum allowed violations (default: 0)",
113
+ )
114
+ drug_parser.add_argument(
115
+ "--add-violations",
116
+ action="store_true",
117
+ help="Add column with violation count",
118
+ )
119
+ drug_parser.add_argument(
120
+ "--add-details",
121
+ action="store_true",
122
+ help="Add columns with individual rule values",
123
+ )
124
+ drug_parser.set_defaults(func=run_druglike)
125
+
126
+ # filter pains
127
+ pains_parser = filter_subparsers.add_parser(
128
+ "pains",
129
+ help="Filter out PAINS compounds",
130
+ formatter_class=RdkitHelpFormatter,
131
+ )
132
+ add_common_io_options(pains_parser)
133
+ add_common_processing_options(pains_parser)
134
+ pains_parser.add_argument(
135
+ "--keep-pains",
136
+ action="store_true",
137
+ help="Keep PAINS compounds (inverse filter)",
138
+ )
139
+ pains_parser.add_argument(
140
+ "--add-pains-type",
141
+ action="store_true",
142
+ help="Add column with PAINS alert type",
143
+ )
144
+ pains_parser.set_defaults(func=run_pains)
145
+
146
+ # filter elements
147
+ elem_parser = filter_subparsers.add_parser(
148
+ "elements",
149
+ help="Filter by allowed elements",
150
+ formatter_class=RdkitHelpFormatter,
151
+ )
152
+ add_common_io_options(elem_parser)
153
+ add_common_processing_options(elem_parser)
154
+ elem_parser.add_argument(
155
+ "--allowed",
156
+ metavar="ELEMS",
157
+ default="C,H,N,O,S,F,Cl,Br,I,P",
158
+ help="Comma-separated allowed elements (default: C,H,N,O,S,F,Cl,Br,I,P)",
159
+ )
160
+ elem_parser.add_argument(
161
+ "--required",
162
+ metavar="ELEMS",
163
+ help="Comma-separated required elements (must contain all)",
164
+ )
165
+ elem_parser.add_argument(
166
+ "--forbidden",
167
+ metavar="ELEMS",
168
+ help="Comma-separated forbidden elements",
169
+ )
170
+ elem_parser.set_defaults(func=run_elements)
171
+
172
+ # filter complexity
173
+ comp_parser = filter_subparsers.add_parser(
174
+ "complexity",
175
+ help="Filter by molecular complexity",
176
+ formatter_class=RdkitHelpFormatter,
177
+ )
178
+ add_common_io_options(comp_parser)
179
+ add_common_processing_options(comp_parser)
180
+ comp_parser.add_argument(
181
+ "--min-atoms",
182
+ type=int,
183
+ default=1,
184
+ metavar="N",
185
+ help="Minimum heavy atom count (default: 1)",
186
+ )
187
+ comp_parser.add_argument(
188
+ "--max-atoms",
189
+ type=int,
190
+ default=100,
191
+ metavar="N",
192
+ help="Maximum heavy atom count (default: 100)",
193
+ )
194
+ comp_parser.add_argument(
195
+ "--min-rings",
196
+ type=int,
197
+ default=0,
198
+ metavar="N",
199
+ help="Minimum ring count (default: 0)",
200
+ )
201
+ comp_parser.add_argument(
202
+ "--max-rings",
203
+ type=int,
204
+ default=10,
205
+ metavar="N",
206
+ help="Maximum ring count (default: 10)",
207
+ )
208
+ comp_parser.add_argument(
209
+ "--min-rotatable",
210
+ type=int,
211
+ default=0,
212
+ metavar="N",
213
+ help="Minimum rotatable bonds (default: 0)",
214
+ )
215
+ comp_parser.add_argument(
216
+ "--max-rotatable",
217
+ type=int,
218
+ default=20,
219
+ metavar="N",
220
+ help="Maximum rotatable bonds (default: 20)",
221
+ )
222
+ comp_parser.set_defaults(func=run_complexity)
223
+
224
+ # Set default for main parser
225
+ parser.set_defaults(func=lambda args: parser.print_help() or 1)
226
+
227
+
228
+ def run_substructure(args) -> int:
229
+ """Run the substructure filter."""
230
+ # Lazy imports
231
+ from rdkit_cli.core.filters import SubstructureFilter
232
+ from rdkit_cli.io import create_reader, create_writer
233
+ from rdkit_cli.parallel.batch import process_molecules
234
+
235
+ try:
236
+ filter_obj = SubstructureFilter(
237
+ smarts=args.smarts,
238
+ exclude=args.exclude,
239
+ )
240
+ except ValueError as e:
241
+ print(f"Error: {e}", file=sys.stderr)
242
+ return 1
243
+
244
+ return _run_filter(args, filter_obj.filter)
245
+
246
+
247
+ def run_property(args) -> int:
248
+ """Run the property filter."""
249
+ # Lazy imports
250
+ from rdkit_cli.core.filters import PropertyFilter
251
+
252
+ if not args.rule:
253
+ print("Error: At least one --rule is required", file=sys.stderr)
254
+ return 1
255
+
256
+ # Parse rules
257
+ rules = {}
258
+ for rule in args.rule:
259
+ try:
260
+ if "<=" in rule:
261
+ prop, val = rule.split("<=")
262
+ rules[prop.strip()] = (None, float(val.strip()))
263
+ elif ">=" in rule:
264
+ prop, val = rule.split(">=")
265
+ rules[prop.strip()] = (float(val.strip()), None)
266
+ elif "<" in rule:
267
+ prop, val = rule.split("<")
268
+ rules[prop.strip()] = (None, float(val.strip()))
269
+ elif ">" in rule:
270
+ prop, val = rule.split(">")
271
+ rules[prop.strip()] = (float(val.strip()), None)
272
+ else:
273
+ print(f"Error: Invalid rule format: {rule}", file=sys.stderr)
274
+ return 1
275
+ except ValueError as e:
276
+ print(f"Error parsing rule '{rule}': {e}", file=sys.stderr)
277
+ return 1
278
+
279
+ filter_obj = PropertyFilter(rules=rules)
280
+ return _run_filter(args, filter_obj.filter)
281
+
282
+
283
+ def run_druglike(args) -> int:
284
+ """Run the drug-likeness filter."""
285
+ # Lazy import
286
+ from rdkit_cli.core.filters import DruglikeFilter
287
+
288
+ try:
289
+ filter_obj = DruglikeFilter(
290
+ rule_name=args.rule,
291
+ max_violations=args.max_violations,
292
+ )
293
+ except ValueError as e:
294
+ print(f"Error: {e}", file=sys.stderr)
295
+ return 1
296
+
297
+ return _run_filter(args, filter_obj.filter)
298
+
299
+
300
+ def run_pains(args) -> int:
301
+ """Run the PAINS filter."""
302
+ # Lazy import
303
+ from rdkit_cli.core.filters import PAINSFilter
304
+
305
+ filter_obj = PAINSFilter(
306
+ exclude=not getattr(args, "keep_pains", False),
307
+ )
308
+ return _run_filter(args, filter_obj.filter)
309
+
310
+
311
+ def run_elements(args) -> int:
312
+ """Run the element filter."""
313
+ # Lazy import
314
+ from rdkit_cli.core.filters import ElementFilter
315
+
316
+ allowed = [e.strip() for e in args.allowed.split(",")] if args.allowed else None
317
+ required = [e.strip() for e in args.required.split(",")] if args.required else None
318
+ forbidden = [e.strip() for e in args.forbidden.split(",")] if args.forbidden else None
319
+
320
+ filter_obj = ElementFilter(
321
+ allowed_elements=allowed,
322
+ required_elements=required,
323
+ forbidden_elements=forbidden,
324
+ )
325
+ return _run_filter(args, filter_obj.filter)
326
+
327
+
328
+ def run_complexity(args) -> int:
329
+ """Run the complexity filter."""
330
+ # Lazy import
331
+ from rdkit_cli.core.filters import ComplexityFilter
332
+
333
+ filter_obj = ComplexityFilter(
334
+ min_atoms=args.min_atoms,
335
+ max_atoms=args.max_atoms,
336
+ min_rings=args.min_rings,
337
+ max_rings=args.max_rings,
338
+ min_rotatable=args.min_rotatable,
339
+ max_rotatable=args.max_rotatable,
340
+ )
341
+ return _run_filter(args, filter_obj.filter)
342
+
343
+
344
+ def _run_filter(args, filter_func) -> int:
345
+ """Common filter execution."""
346
+ # Lazy imports
347
+ from rdkit_cli.io import create_reader, create_writer
348
+ from rdkit_cli.parallel.batch import process_molecules
349
+
350
+ input_path = Path(args.input)
351
+ if not input_path.exists():
352
+ print(f"Error: Input file not found: {input_path}", file=sys.stderr)
353
+ return 1
354
+
355
+ reader = create_reader(
356
+ input_path,
357
+ smiles_column=args.smiles_column,
358
+ name_column=args.name_column,
359
+ has_header=not args.no_header,
360
+ )
361
+
362
+ output_path = Path(args.output)
363
+ writer = create_writer(output_path)
364
+
365
+ with reader, writer:
366
+ result = process_molecules(
367
+ reader=reader,
368
+ writer=writer,
369
+ processor=filter_func,
370
+ n_workers=args.ncpu,
371
+ quiet=args.quiet,
372
+ )
373
+
374
+ if not args.quiet:
375
+ passed = result.successful
376
+ total = result.total_processed
377
+ filtered = total - passed - result.failed
378
+ print(
379
+ f"Passed: {passed}/{total} molecules "
380
+ f"(filtered: {filtered}, failed: {result.failed}) in {result.elapsed_time:.1f}s",
381
+ file=sys.stderr,
382
+ )
383
+
384
+ return 0
@@ -0,0 +1,179 @@
1
+ """Fingerprints command implementation."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ from rdkit_cli.cli import RdkitHelpFormatter, add_common_io_options, add_common_processing_options
7
+
8
+ # Fingerprint types defined here to avoid importing core at startup
9
+ FINGERPRINT_TYPES = ["morgan", "maccs", "rdkit", "atompair", "torsion", "pattern"]
10
+
11
+
12
+ def register_parser(subparsers):
13
+ """Register the fingerprints command and subcommands."""
14
+ parser = subparsers.add_parser(
15
+ "fingerprints",
16
+ help="Compute molecular fingerprints",
17
+ description="Generate various molecular fingerprint types.",
18
+ formatter_class=RdkitHelpFormatter,
19
+ )
20
+
21
+ fp_subparsers = parser.add_subparsers(
22
+ title="Subcommands",
23
+ dest="subcommand",
24
+ metavar="<subcommand>",
25
+ )
26
+
27
+ # fingerprints list
28
+ list_parser = fp_subparsers.add_parser(
29
+ "list",
30
+ help="List available fingerprint types",
31
+ formatter_class=RdkitHelpFormatter,
32
+ )
33
+ list_parser.set_defaults(func=run_list)
34
+
35
+ # fingerprints compute
36
+ compute_parser = fp_subparsers.add_parser(
37
+ "compute",
38
+ help="Compute fingerprints for molecules",
39
+ formatter_class=RdkitHelpFormatter,
40
+ )
41
+ add_common_io_options(compute_parser)
42
+ add_common_processing_options(compute_parser)
43
+
44
+ compute_parser.add_argument(
45
+ "-t", "--type",
46
+ choices=FINGERPRINT_TYPES,
47
+ default="morgan",
48
+ help="Fingerprint type (default: morgan)",
49
+ )
50
+ compute_parser.add_argument(
51
+ "-r", "--radius",
52
+ type=int,
53
+ default=2,
54
+ metavar="N",
55
+ help="Radius for Morgan fingerprints (default: 2, equivalent to ECFP4)",
56
+ )
57
+ compute_parser.add_argument(
58
+ "-b", "--bits",
59
+ type=int,
60
+ default=2048,
61
+ metavar="N",
62
+ help="Number of bits (default: 2048)",
63
+ )
64
+ compute_parser.add_argument(
65
+ "--counts",
66
+ action="store_true",
67
+ help="Output count fingerprints instead of binary (Morgan only)",
68
+ )
69
+ compute_parser.add_argument(
70
+ "-f", "--format",
71
+ choices=["hex", "bitstring", "bits", "numpy"],
72
+ default="hex",
73
+ dest="output_format",
74
+ help="Output format (default: hex)",
75
+ )
76
+ compute_parser.add_argument(
77
+ "--use-chirality",
78
+ action="store_true",
79
+ help="Include chirality in fingerprint (Morgan only)",
80
+ )
81
+ compute_parser.add_argument(
82
+ "--use-features",
83
+ action="store_true",
84
+ help="Use pharmacophoric features instead of atom invariants (Morgan only)",
85
+ )
86
+ compute_parser.add_argument(
87
+ "--use-bond-types",
88
+ action="store_true",
89
+ default=True,
90
+ help="Include bond types in fingerprint (Morgan, default: True)",
91
+ )
92
+ compute_parser.add_argument(
93
+ "--no-bond-types",
94
+ action="store_true",
95
+ help="Exclude bond types from fingerprint (Morgan)",
96
+ )
97
+
98
+ compute_parser.set_defaults(func=run_compute)
99
+
100
+ # Set default for main parser
101
+ parser.set_defaults(func=lambda args: parser.print_help() or 1)
102
+
103
+
104
+ def run_list(args) -> int:
105
+ """Run the list subcommand."""
106
+ # Lazy import
107
+ from rdkit_cli.core.fingerprints import list_fingerprints
108
+
109
+ fps = list_fingerprints()
110
+
111
+ print("Available fingerprint types:\n")
112
+ for fp in fps:
113
+ radius_info = " (radius configurable)" if fp.has_radius else ""
114
+ print(f" {fp.name:<12} - {fp.description}")
115
+ print(f" Default bits: {fp.default_bits}{radius_info}")
116
+ print()
117
+
118
+ return 0
119
+
120
+
121
+ def run_compute(args) -> int:
122
+ """Run the compute subcommand."""
123
+ # Lazy imports
124
+ from rdkit_cli.core.fingerprints import FingerprintCalculator, FingerprintType
125
+ from rdkit_cli.io import create_reader, create_writer
126
+ from rdkit_cli.parallel.batch import process_molecules
127
+
128
+ # Parse fingerprint type
129
+ fp_type = FingerprintType(args.type)
130
+
131
+ # Create calculator
132
+ calculator = FingerprintCalculator(
133
+ fp_type=fp_type,
134
+ n_bits=args.bits,
135
+ radius=args.radius,
136
+ use_counts=args.counts,
137
+ output_format=args.output_format,
138
+ include_smiles=True,
139
+ include_name=True,
140
+ )
141
+
142
+ # Create reader
143
+ input_path = Path(args.input)
144
+ if not input_path.exists():
145
+ print(f"Error: Input file not found: {input_path}", file=sys.stderr)
146
+ return 1
147
+
148
+ reader = create_reader(
149
+ input_path,
150
+ smiles_column=args.smiles_column,
151
+ name_column=args.name_column,
152
+ has_header=not args.no_header,
153
+ )
154
+
155
+ # Create writer
156
+ output_path = Path(args.output)
157
+ writer = create_writer(
158
+ output_path,
159
+ columns=calculator.get_column_names(),
160
+ )
161
+
162
+ # Process
163
+ with reader, writer:
164
+ result = process_molecules(
165
+ reader=reader,
166
+ writer=writer,
167
+ processor=calculator.compute,
168
+ n_workers=args.ncpu,
169
+ quiet=args.quiet,
170
+ )
171
+
172
+ if not args.quiet:
173
+ print(
174
+ f"Processed {result.successful}/{result.total_processed} molecules "
175
+ f"({result.failed} failed) in {result.elapsed_time:.1f}s",
176
+ file=sys.stderr,
177
+ )
178
+
179
+ return 0 if result.failed == 0 else 1