rdkit-cli 0.3.0__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/CHANGELOG.md +37 -0
- rdkit_cli-0.3.2/PKG-INFO +177 -0
- rdkit_cli-0.3.2/README.md +143 -0
- rdkit_cli-0.3.0/README.md → rdkit_cli-0.3.2/docs/commands.md +283 -355
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/pyproject.toml +1 -1
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/__init__.py +1 -1
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/cli.py +8 -2
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/conformers.py +174 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/depict.py +111 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/descriptors.py +27 -3
- rdkit_cli-0.3.2/src/rdkit_cli/commands/energy.py +151 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/filter.py +47 -8
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/fingerprints.py +4 -1
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/fragment.py +103 -1
- rdkit_cli-0.3.2/src/rdkit_cli/commands/pharmacophore.py +151 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/props.py +163 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/reactions.py +158 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/scaffold.py +103 -1
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/similarity.py +143 -4
- rdkit_cli-0.3.2/src/rdkit_cli/commands/stereo.py +236 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/conformers.py +146 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/descriptors.py +80 -4
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/diversity.py +3 -1
- rdkit_cli-0.3.2/src/rdkit_cli/core/energy.py +118 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/filters.py +33 -4
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/fingerprints.py +49 -13
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/fragment.py +39 -2
- rdkit_cli-0.3.2/src/rdkit_cli/core/pharmacophore.py +115 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/reactions.py +78 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/sascorer.py +3 -1
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/scaffold.py +54 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/similarity.py +137 -14
- rdkit_cli-0.3.2/src/rdkit_cli/core/stereo.py +106 -0
- rdkit_cli-0.3.2/tests/integration/test_new_features.py +490 -0
- rdkit_cli-0.3.2/tests/unit/test_extended_features.py +457 -0
- rdkit_cli-0.3.2/tests/unit/test_shape_constrained_network.py +391 -0
- rdkit_cli-0.3.2/tests/unit/test_stereo_energy_pharmacophore.py +401 -0
- rdkit_cli-0.3.0/PKG-INFO +0 -643
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/.github/workflows/publish.yml +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/LICENSE +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/__main__.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/__init__.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/align.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/convert.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/deduplicate.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/diversity.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/enumerate.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/info.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/mcs.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/merge.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/mmp.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/protonate.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/rgroup.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/rings.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/rmsd.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/sample.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/sascorer.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/split.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/standardize.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/stats.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/commands/validate.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/__init__.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/align.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/deduplicate.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/depict.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/enumerate.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/info.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/mcs.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/merge.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/mmp.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/protonate.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/rgroup.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/rings.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/rmsd.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/sample.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/split.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/standardizer.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/stats.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/core/validate.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/io/__init__.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/io/formats.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/io/readers.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/io/writers.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/parallel/__init__.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/parallel/batch.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/parallel/executor.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/progress/__init__.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/progress/ninja.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/utils/__init__.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/src/rdkit_cli/utils/logging.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/__init__.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/conftest.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/fixtures/sample.csv +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/fixtures/sample.smi +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/integration/__init__.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/integration/test_cli.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/integration/test_interop.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/integration/test_new_commands.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/__init__.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_align.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_deduplicate.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_depict.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_descriptors.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_diversity.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_enumerate.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_filters.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_fingerprints.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_fragment.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_info.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_io.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_mcs.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_merge.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_mmp.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_protonate.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_reactions.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_rgroup.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_rings.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_rmsd.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_sample.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_sascorer.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_scaffold.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_similarity.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_split.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_standardizer.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_stats.py +0 -0
- {rdkit_cli-0.3.0 → rdkit_cli-0.3.2}/tests/unit/test_validate.py +0 -0
|
@@ -5,6 +5,43 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.3.2] - 2026-04-03
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- **stereo**: New command for stereochemistry analysis — CIP label assignment (R/S, E/Z), stereocenter perception, enhanced stereo group inspection, and stereo cleanup/canonicalization
|
|
13
|
+
- **energy**: New command for force field energy calculations — single-point MMFF/UFF energy and structure minimization with convergence reporting
|
|
14
|
+
- **pharmacophore**: New command for pharmacophore feature perception (Donor, Acceptor, Aromatic, etc.) and 2D pharmacophore fingerprint similarity search
|
|
15
|
+
- **fingerprints**: Added Avalon, MHFP (MinHash), and 2D pharmacophore (Gobbi) fingerprint types
|
|
16
|
+
- **descriptors**: Added 42 Molecular Quantum Numbers (MQN) and 10 3D shape descriptors (PMI, NPR, Asphericity, Eccentricity, SpherocityIndex, PBF); new `--mqn`, `--3d`, and `--generate-conformers` flags
|
|
17
|
+
- **similarity**: Added 8 metrics (AllBit, Asymmetric, BraunBlanquet, Kulczynski, McConnaughey, OnBit, RogotGoldberg, Tversky with alpha/beta); new `shape` subcommand for 3D shape similarity (Tanimoto, Protrude, Tversky)
|
|
18
|
+
- **filter**: Expanded structural alert catalogs — `--catalog` option supports PAINS, PAINS_A/B/C, Brenk, NIH, ZINC, and all combined; added `alerts` subcommand as alias
|
|
19
|
+
- **conformers**: Added `constrained` subcommand for template-constrained 3D embedding and `torsion` subcommand for dihedral angle scanning with energy profiles
|
|
20
|
+
- **reactions**: Added `map` subcommand for atom-atom mapping inspection (text/JSON) and `fingerprint` subcommand for reaction difference/structural fingerprints
|
|
21
|
+
- **scaffold**: Added `network` subcommand for scaffold network construction (CSV/JSON output) using rdScaffoldNetwork
|
|
22
|
+
- **props**: Added `charges` subcommand for Gasteiger partial charges and `crippen` subcommand for per-atom LogP/MR contributions
|
|
23
|
+
- **fragment**: Added `brics-build` subcommand for recombining BRICS fragments into new molecules
|
|
24
|
+
- **depict**: Added `highlight` subcommand for SMARTS-based atom/bond highlighting with custom RGB colors
|
|
25
|
+
|
|
26
|
+
### Changed
|
|
27
|
+
|
|
28
|
+
- Total command count increased from 29 to 32 (stereo, energy, pharmacophore)
|
|
29
|
+
- Total fingerprint types increased from 6 to 9
|
|
30
|
+
- Total descriptor count increased from ~133 to ~185
|
|
31
|
+
- Similarity metrics increased from 5 to 13
|
|
32
|
+
|
|
33
|
+
## [0.3.1] - 2026-03-14
|
|
34
|
+
|
|
35
|
+
### Changed
|
|
36
|
+
|
|
37
|
+
- **Migrated to MorganGenerator API**: replaced deprecated `GetMorganFingerprintAsBitVect` / `GetHashedMorganFingerprint` with `rdFingerprintGenerator.GetMorganGenerator` across fingerprints, similarity, diversity, and sascorer modules. Also migrated AtomPair and TopologicalTorsion fingerprints.
|
|
38
|
+
- **Smart parallelism defaults**: global default changed from all cores (`-1`) to single-threaded (`1`), avoiding IPC overhead on fast commands. Heavy workloads (`descriptors --all`, `descriptors --category`) auto-scale to all cores. Users can always override with `-n -1`.
|
|
39
|
+
- **Optimized README**: replaced verbose per-command docs with compact help-style reference and benchmark table. Full command docs moved to `docs/commands.md`.
|
|
40
|
+
|
|
41
|
+
### Removed
|
|
42
|
+
|
|
43
|
+
- Dead `_fgs` reference in fragment module (unused `GetMorganFingerprint` assignment)
|
|
44
|
+
|
|
8
45
|
## [0.3.0] - 2026-01-10
|
|
9
46
|
|
|
10
47
|
### Added
|
rdkit_cli-0.3.2/PKG-INFO
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rdkit-cli
|
|
3
|
+
Version: 0.3.2
|
|
4
|
+
Summary: A comprehensive CLI tool for RDKit cheminformatics operations
|
|
5
|
+
Project-URL: Homepage, https://github.com/vitruves/rdkit-cli
|
|
6
|
+
Project-URL: Repository, https://github.com/vitruves/rdkit-cli
|
|
7
|
+
Project-URL: Issues, https://github.com/vitruves/rdkit-cli/issues
|
|
8
|
+
Author: Johan HG Natter
|
|
9
|
+
License-Expression: Apache-2.0
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: cheminformatics,chemistry,cli,fingerprints,molecular-descriptors,rdkit
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Chemistry
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Requires-Dist: numpy>=1.24.0
|
|
24
|
+
Requires-Dist: pandas>=2.0.0
|
|
25
|
+
Requires-Dist: pyarrow>=14.0.0
|
|
26
|
+
Requires-Dist: rdkit>=2024.3.1
|
|
27
|
+
Requires-Dist: rich-argparse>=1.4.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: mypy>=1.0.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
32
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
|
|
35
|
+
# rdkit-cli
|
|
36
|
+
|
|
37
|
+
[](https://pypi.org/project/rdkit-cli/)
|
|
38
|
+
[](https://pypi.org/project/rdkit-cli/)
|
|
39
|
+
[](https://pepy.tech/projects/rdkit-cli)
|
|
40
|
+
[](https://github.com/vitruves/rdkit-cli/blob/main/LICENSE)
|
|
41
|
+
|
|
42
|
+
A high-performance CLI for cheminformatics workflows, powered by native RDKit (C++ under the hood).
|
|
43
|
+
|
|
44
|
+
**32 commands** | **5 I/O formats** (CSV, TSV, SMI, SDF, Parquet) | **multi-core parallel processing** | **~80ms startup**
|
|
45
|
+
|
|
46
|
+
## Installation
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install rdkit-cli
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Quick Start
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
# Quick molecule info — no files needed
|
|
56
|
+
rdkit-cli info "c1ccccc1"
|
|
57
|
+
|
|
58
|
+
# Compute descriptors
|
|
59
|
+
rdkit-cli descriptors compute -i molecules.csv -o desc.csv -d MolWt,MolLogP,TPSA
|
|
60
|
+
|
|
61
|
+
# Filter by drug-likeness
|
|
62
|
+
rdkit-cli filter druglike -i molecules.csv -o filtered.csv --rule lipinski
|
|
63
|
+
|
|
64
|
+
# Similarity search
|
|
65
|
+
rdkit-cli similarity search -i library.csv -o hits.csv --query "c1ccccc1" --threshold 0.7
|
|
66
|
+
|
|
67
|
+
# Standardize structures
|
|
68
|
+
rdkit-cli standardize -i molecules.csv -o std.csv --cleanup --neutralize
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Commands
|
|
72
|
+
|
|
73
|
+
```
|
|
74
|
+
Usage: rdkit-cli [-h] [-V] <command> ...
|
|
75
|
+
|
|
76
|
+
Commands:
|
|
77
|
+
align Align 3D molecules to a reference
|
|
78
|
+
conformers Generate and optimize 3D conformers
|
|
79
|
+
convert Convert between molecular file formats
|
|
80
|
+
deduplicate Remove duplicate molecules
|
|
81
|
+
depict Generate molecular depictions (SVG/PNG)
|
|
82
|
+
descriptors Compute molecular descriptors
|
|
83
|
+
diversity Analyze and select diverse molecules
|
|
84
|
+
energy Force field energy calculations
|
|
85
|
+
enumerate Enumerate stereoisomers and tautomers
|
|
86
|
+
filter Filter by substructure, properties, drug-likeness, PAINS
|
|
87
|
+
fingerprints Compute fingerprints (Morgan, MACCS, RDKit, AtomPair, Torsion)
|
|
88
|
+
fragment BRICS/RECAP fragmentation and functional groups
|
|
89
|
+
info Quick molecule information from SMILES
|
|
90
|
+
mcs Find Maximum Common Substructure
|
|
91
|
+
merge Merge multiple molecule files
|
|
92
|
+
mmp Matched Molecular Pairs analysis
|
|
93
|
+
pharmacophore Pharmacophore feature analysis
|
|
94
|
+
props Property column operations (add, rename, drop, keep)
|
|
95
|
+
protonate Enumerate protonation states
|
|
96
|
+
reactions Apply SMIRKS transformations and enumerate products
|
|
97
|
+
rgroup R-group decomposition around a core
|
|
98
|
+
rings Ring system analysis and extraction
|
|
99
|
+
rmsd Calculate RMSD between 3D structures
|
|
100
|
+
sample Randomly sample molecules (reservoir sampling supported)
|
|
101
|
+
sascorer Synthetic accessibility, QED, and NP-likeness scores
|
|
102
|
+
scaffold Extract Murcko scaffolds
|
|
103
|
+
similarity Search, matrix, and clustering
|
|
104
|
+
split Split files into smaller chunks
|
|
105
|
+
standardize Standardize and canonicalize molecules
|
|
106
|
+
stats Calculate dataset statistics
|
|
107
|
+
stereo Analyze and manipulate stereochemistry
|
|
108
|
+
validate Validate molecular structures
|
|
109
|
+
|
|
110
|
+
Use 'rdkit-cli <command> --help' for command-specific options.
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## Global Options
|
|
114
|
+
|
|
115
|
+
| Option | Description |
|
|
116
|
+
|--------|-------------|
|
|
117
|
+
| `-i, --input FILE` | Input file |
|
|
118
|
+
| `-o, --output FILE` | Output file |
|
|
119
|
+
| `-n, --ncpu N` | Number of CPUs (-1 = all, default: 1; auto-scales for heavy commands) |
|
|
120
|
+
| `--smiles-column COL` | SMILES column name (default: "smiles") |
|
|
121
|
+
| `--name-column COL` | Name column (optional) |
|
|
122
|
+
| `--no-header` | Input has no header row |
|
|
123
|
+
| `-q, --quiet` | Suppress progress output |
|
|
124
|
+
|
|
125
|
+
## Example Pipeline
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
# Validate → deduplicate → standardize → filter → describe → pick diverse subset
|
|
129
|
+
rdkit-cli validate -i raw.csv -o valid.csv --valid-only
|
|
130
|
+
rdkit-cli deduplicate -i valid.csv -o unique.csv -b inchikey
|
|
131
|
+
rdkit-cli standardize -i unique.csv -o std.csv --cleanup --neutralize
|
|
132
|
+
rdkit-cli filter druglike -i std.csv -o druglike.csv --rule lipinski
|
|
133
|
+
rdkit-cli descriptors compute -i druglike.csv -o desc.csv -d MolWt,MolLogP,TPSA,HBD,HBA
|
|
134
|
+
rdkit-cli diversity pick -i druglike.csv -o diverse.csv -k 500
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## Formats
|
|
138
|
+
|
|
139
|
+
| Format | Extension |
|
|
140
|
+
|--------|-----------|
|
|
141
|
+
| CSV | `.csv` |
|
|
142
|
+
| TSV | `.tsv` |
|
|
143
|
+
| SMILES | `.smi` |
|
|
144
|
+
| SDF | `.sdf` |
|
|
145
|
+
| Parquet | `.parquet` |
|
|
146
|
+
|
|
147
|
+
Formats are auto-detected from file extensions. Override with `--in-format` / `--out-format`.
|
|
148
|
+
|
|
149
|
+
## Performance
|
|
150
|
+
|
|
151
|
+
- **Native RDKit**: C++ computation with Python bindings — no performance penalty
|
|
152
|
+
- **Smart parallelism**: defaults to single-threaded for fast commands (avoids IPC overhead), auto-scales to all cores for heavy workloads (`descriptors --all`). Override with `-n -1`
|
|
153
|
+
- **Lazy imports**: ~80ms startup time regardless of installed packages
|
|
154
|
+
- **Streaming**: Memory-efficient reservoir sampling for large datasets
|
|
155
|
+
|
|
156
|
+
**Benchmarks** — 27K molecules, Apple M-series (8 cores):
|
|
157
|
+
|
|
158
|
+
| Command | Time | Throughput |
|
|
159
|
+
|---------|------|------------|
|
|
160
|
+
| `fingerprints compute --type morgan` | 3.1s | ~8,700 mol/s |
|
|
161
|
+
| `descriptors compute -d MolWt,MolLogP,TPSA` | 6.4s | ~4,200 mol/s |
|
|
162
|
+
| `filter druglike --rule lipinski` | 6.9s | ~3,900 mol/s |
|
|
163
|
+
| `standardize --cleanup --uncharge` | 7.0s | ~3,900 mol/s |
|
|
164
|
+
| `descriptors compute --all` (auto-parallel) | 55s | ~490 mol/s |
|
|
165
|
+
|
|
166
|
+
## Development
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
git clone https://github.com/vitruves/rdkit-cli
|
|
170
|
+
cd rdkit-cli
|
|
171
|
+
uv sync --dev
|
|
172
|
+
uv run pytest
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## License
|
|
176
|
+
|
|
177
|
+
Apache 2.0
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# rdkit-cli
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/rdkit-cli/)
|
|
4
|
+
[](https://pypi.org/project/rdkit-cli/)
|
|
5
|
+
[](https://pepy.tech/projects/rdkit-cli)
|
|
6
|
+
[](https://github.com/vitruves/rdkit-cli/blob/main/LICENSE)
|
|
7
|
+
|
|
8
|
+
A high-performance CLI for cheminformatics workflows, powered by native RDKit (C++ under the hood).
|
|
9
|
+
|
|
10
|
+
**32 commands** | **5 I/O formats** (CSV, TSV, SMI, SDF, Parquet) | **multi-core parallel processing** | **~80ms startup**
|
|
11
|
+
|
|
12
|
+
## Installation
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
pip install rdkit-cli
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## Quick Start
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
# Quick molecule info — no files needed
|
|
22
|
+
rdkit-cli info "c1ccccc1"
|
|
23
|
+
|
|
24
|
+
# Compute descriptors
|
|
25
|
+
rdkit-cli descriptors compute -i molecules.csv -o desc.csv -d MolWt,MolLogP,TPSA
|
|
26
|
+
|
|
27
|
+
# Filter by drug-likeness
|
|
28
|
+
rdkit-cli filter druglike -i molecules.csv -o filtered.csv --rule lipinski
|
|
29
|
+
|
|
30
|
+
# Similarity search
|
|
31
|
+
rdkit-cli similarity search -i library.csv -o hits.csv --query "c1ccccc1" --threshold 0.7
|
|
32
|
+
|
|
33
|
+
# Standardize structures
|
|
34
|
+
rdkit-cli standardize -i molecules.csv -o std.csv --cleanup --neutralize
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Commands
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
Usage: rdkit-cli [-h] [-V] <command> ...
|
|
41
|
+
|
|
42
|
+
Commands:
|
|
43
|
+
align Align 3D molecules to a reference
|
|
44
|
+
conformers Generate and optimize 3D conformers
|
|
45
|
+
convert Convert between molecular file formats
|
|
46
|
+
deduplicate Remove duplicate molecules
|
|
47
|
+
depict Generate molecular depictions (SVG/PNG)
|
|
48
|
+
descriptors Compute molecular descriptors
|
|
49
|
+
diversity Analyze and select diverse molecules
|
|
50
|
+
energy Force field energy calculations
|
|
51
|
+
enumerate Enumerate stereoisomers and tautomers
|
|
52
|
+
filter Filter by substructure, properties, drug-likeness, PAINS
|
|
53
|
+
fingerprints Compute fingerprints (Morgan, MACCS, RDKit, AtomPair, Torsion)
|
|
54
|
+
fragment BRICS/RECAP fragmentation and functional groups
|
|
55
|
+
info Quick molecule information from SMILES
|
|
56
|
+
mcs Find Maximum Common Substructure
|
|
57
|
+
merge Merge multiple molecule files
|
|
58
|
+
mmp Matched Molecular Pairs analysis
|
|
59
|
+
pharmacophore Pharmacophore feature analysis
|
|
60
|
+
props Property column operations (add, rename, drop, keep)
|
|
61
|
+
protonate Enumerate protonation states
|
|
62
|
+
reactions Apply SMIRKS transformations and enumerate products
|
|
63
|
+
rgroup R-group decomposition around a core
|
|
64
|
+
rings Ring system analysis and extraction
|
|
65
|
+
rmsd Calculate RMSD between 3D structures
|
|
66
|
+
sample Randomly sample molecules (reservoir sampling supported)
|
|
67
|
+
sascorer Synthetic accessibility, QED, and NP-likeness scores
|
|
68
|
+
scaffold Extract Murcko scaffolds
|
|
69
|
+
similarity Search, matrix, and clustering
|
|
70
|
+
split Split files into smaller chunks
|
|
71
|
+
standardize Standardize and canonicalize molecules
|
|
72
|
+
stats Calculate dataset statistics
|
|
73
|
+
stereo Analyze and manipulate stereochemistry
|
|
74
|
+
validate Validate molecular structures
|
|
75
|
+
|
|
76
|
+
Use 'rdkit-cli <command> --help' for command-specific options.
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Global Options
|
|
80
|
+
|
|
81
|
+
| Option | Description |
|
|
82
|
+
|--------|-------------|
|
|
83
|
+
| `-i, --input FILE` | Input file |
|
|
84
|
+
| `-o, --output FILE` | Output file |
|
|
85
|
+
| `-n, --ncpu N` | Number of CPUs (-1 = all, default: 1; auto-scales for heavy commands) |
|
|
86
|
+
| `--smiles-column COL` | SMILES column name (default: "smiles") |
|
|
87
|
+
| `--name-column COL` | Name column (optional) |
|
|
88
|
+
| `--no-header` | Input has no header row |
|
|
89
|
+
| `-q, --quiet` | Suppress progress output |
|
|
90
|
+
|
|
91
|
+
## Example Pipeline
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
# Validate → deduplicate → standardize → filter → describe → pick diverse subset
|
|
95
|
+
rdkit-cli validate -i raw.csv -o valid.csv --valid-only
|
|
96
|
+
rdkit-cli deduplicate -i valid.csv -o unique.csv -b inchikey
|
|
97
|
+
rdkit-cli standardize -i unique.csv -o std.csv --cleanup --neutralize
|
|
98
|
+
rdkit-cli filter druglike -i std.csv -o druglike.csv --rule lipinski
|
|
99
|
+
rdkit-cli descriptors compute -i druglike.csv -o desc.csv -d MolWt,MolLogP,TPSA,HBD,HBA
|
|
100
|
+
rdkit-cli diversity pick -i druglike.csv -o diverse.csv -k 500
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Formats
|
|
104
|
+
|
|
105
|
+
| Format | Extension |
|
|
106
|
+
|--------|-----------|
|
|
107
|
+
| CSV | `.csv` |
|
|
108
|
+
| TSV | `.tsv` |
|
|
109
|
+
| SMILES | `.smi` |
|
|
110
|
+
| SDF | `.sdf` |
|
|
111
|
+
| Parquet | `.parquet` |
|
|
112
|
+
|
|
113
|
+
Formats are auto-detected from file extensions. Override with `--in-format` / `--out-format`.
|
|
114
|
+
|
|
115
|
+
## Performance
|
|
116
|
+
|
|
117
|
+
- **Native RDKit**: C++ computation with Python bindings — no performance penalty
|
|
118
|
+
- **Smart parallelism**: defaults to single-threaded for fast commands (avoids IPC overhead), auto-scales to all cores for heavy workloads (`descriptors --all`). Override with `-n -1`
|
|
119
|
+
- **Lazy imports**: ~80ms startup time regardless of installed packages
|
|
120
|
+
- **Streaming**: Memory-efficient reservoir sampling for large datasets
|
|
121
|
+
|
|
122
|
+
**Benchmarks** — 27K molecules, Apple M-series (8 cores):
|
|
123
|
+
|
|
124
|
+
| Command | Time | Throughput |
|
|
125
|
+
|---------|------|------------|
|
|
126
|
+
| `fingerprints compute --type morgan` | 3.1s | ~8,700 mol/s |
|
|
127
|
+
| `descriptors compute -d MolWt,MolLogP,TPSA` | 6.4s | ~4,200 mol/s |
|
|
128
|
+
| `filter druglike --rule lipinski` | 6.9s | ~3,900 mol/s |
|
|
129
|
+
| `standardize --cleanup --uncharge` | 7.0s | ~3,900 mol/s |
|
|
130
|
+
| `descriptors compute --all` (auto-parallel) | 55s | ~490 mol/s |
|
|
131
|
+
|
|
132
|
+
## Development
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
git clone https://github.com/vitruves/rdkit-cli
|
|
136
|
+
cd rdkit-cli
|
|
137
|
+
uv sync --dev
|
|
138
|
+
uv run pytest
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## License
|
|
142
|
+
|
|
143
|
+
Apache 2.0
|