prodock 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prodock-0.0.3/.gitignore +5 -0
- prodock-0.0.3/CHANGELOG.md +0 -0
- prodock-0.0.3/LICENSE +21 -0
- prodock-0.0.3/PKG-INFO +143 -0
- prodock-0.0.3/README.md +106 -0
- prodock-0.0.3/prodock/__init_.py +0 -0
- prodock-0.0.3/prodock/binary/README.md +14 -0
- prodock-0.0.3/prodock/binary/qvina +0 -0
- prodock-0.0.3/prodock/binary/qvina-w +0 -0
- prodock-0.0.3/prodock/binary/smina +0 -0
- prodock-0.0.3/prodock/chem/__init__.py +0 -0
- prodock-0.0.3/prodock/chem/conformer.py +368 -0
- prodock-0.0.3/prodock/chem/embed.py +597 -0
- prodock-0.0.3/prodock/chem/optimize.py +271 -0
- prodock-0.0.3/prodock/develop/__init__.py +0 -0
- prodock-0.0.3/prodock/develop/prepare_ligand4.py +680 -0
- prodock-0.0.3/prodock/develop/prepare_receptor4.py +284 -0
- prodock-0.0.3/prodock/engine/__init__.py +0 -0
- prodock-0.0.3/prodock/engine/binary.py +875 -0
- prodock-0.0.3/prodock/engine/multiple.py +798 -0
- prodock-0.0.3/prodock/engine/vina.py +611 -0
- prodock-0.0.3/prodock/io/__init__.py +0 -0
- prodock-0.0.3/prodock/io/convert.py +988 -0
- prodock-0.0.3/prodock/io/logging.py +523 -0
- prodock-0.0.3/prodock/io/parser.py +188 -0
- prodock-0.0.3/prodock/io/pdb_query.py +798 -0
- prodock-0.0.3/prodock/io/rdkit_io.py +698 -0
- prodock-0.0.3/prodock/process/__init__.py +0 -0
- prodock-0.0.3/prodock/process/gridbox.py +598 -0
- prodock-0.0.3/prodock/process/ligand.py +632 -0
- prodock-0.0.3/prodock/process/pdbqt_sanitizer.py +546 -0
- prodock-0.0.3/prodock/process/receptor.py +664 -0
- prodock-0.0.3/prodock/vis/__init__.py +0 -0
- prodock-0.0.3/prodock/vis/dock_gui.py +1469 -0
- prodock-0.0.3/prodock/vis/provis.py +475 -0
- prodock-0.0.3/prodock/vis/provis_gui.py +1612 -0
- prodock-0.0.3/pyproject.toml +87 -0
|
File without changes
|
prodock-0.0.3/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Medicine-Artificial-Intelligence
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
prodock-0.0.3/PKG-INFO
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: prodock
|
|
3
|
+
Version: 0.0.3
|
|
4
|
+
Summary: ProDock — automation utilities for Molecular Docking workflows.
|
|
5
|
+
Project-URL: homepage, https://github.com/Medicine-Artificial-Intelligence/ProDock
|
|
6
|
+
Project-URL: source, https://github.com/Medicine-Artificial-Intelligence/ProDock
|
|
7
|
+
Project-URL: issues, https://github.com/Medicine-Artificial-Intelligence/ProDock/issues
|
|
8
|
+
Project-URL: documentation, https://prodock.readthedocs.io/en/latest/
|
|
9
|
+
Author-email: Hoang-Son Lai Le <lelaihoangson@gmail.com>, Thanh-An Pham <thanh-an.pham@etu.unistra.fr>, Tieu Long Phan <tieu@bioinf.uni-leipzig.de>
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: bioinformatics,docking,molecular-docking,rdkit,vina
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Requires-Dist: ipywidgets>=8.1.7
|
|
22
|
+
Requires-Dist: matplotlib
|
|
23
|
+
Requires-Dist: meeko>=0.6.1
|
|
24
|
+
Requires-Dist: openbabel-wheel>=3.1.1.21
|
|
25
|
+
Requires-Dist: pandas
|
|
26
|
+
Requires-Dist: prolif>=2.0.3
|
|
27
|
+
Requires-Dist: py3dmol>=2.5.2
|
|
28
|
+
Requires-Dist: pymol-open-source-whl>=3.1.0.4
|
|
29
|
+
Requires-Dist: rdkit>=2025.3.5
|
|
30
|
+
Requires-Dist: tqdm
|
|
31
|
+
Requires-Dist: vina>=1.2.7
|
|
32
|
+
Provides-Extra: docs
|
|
33
|
+
Requires-Dist: sphinx-rtd-theme; extra == 'docs'
|
|
34
|
+
Requires-Dist: sphinx>=6.0; extra == 'docs'
|
|
35
|
+
Requires-Dist: sphinxcontrib-bibtex; extra == 'docs'
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
# ProDock
|
|
39
|
+
ProDock
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
## Step-by-Step Installation Guide
|
|
43
|
+
|
|
44
|
+
1. **Python Installation:**
|
|
45
|
+
Ensure that Python 3.11 or later is installed on your system. You can download it from [python.org](https://www.python.org/downloads/).
|
|
46
|
+
|
|
47
|
+
2. **Creating a Virtual Environment (Optional but Recommended):**
|
|
48
|
+
It's recommended to use a virtual environment to avoid conflicts with other projects or system-wide packages. Use the following commands to create and activate a virtual environment:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
python -m venv prodock-env
|
|
52
|
+
source prodock-env/bin/activate
|
|
53
|
+
```
|
|
54
|
+
Or Conda
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
conda create --name prodock-env python=3.11
|
|
58
|
+
conda activate prodock-env
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
3. **Cloning and Installing SynTemp:**
|
|
62
|
+
Clone the SynTemp repository from GitHub and install it:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
git clone https://github.com/Medicine-Artificial-Intelligence/ProDock.git
|
|
66
|
+
cd ProDock
|
|
67
|
+
pip install -r requirements.txt
|
|
68
|
+
pip install black flake8 pytest # black for formating, flake8 for checking format, pytest for testing
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Setting Up Your Development Environment
|
|
72
|
+
|
|
73
|
+
Before you start, ensure your local development environment is set up correctly. Pull the latest version of the `main` branch to start with the most recent stable code.
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
git checkout main
|
|
77
|
+
git pull
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Working on New Features
|
|
81
|
+
|
|
82
|
+
1. **Create a New Branch**:
|
|
83
|
+
For every new feature or bug fix, create a new branch from the `main` branch. Name your branch meaningfully, related to the feature or fix you are working on.
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
git checkout -b feature/your-feature-name
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
2. **Develop and Commit Changes**:
|
|
90
|
+
Make your changes locally, commit them to your branch. Keep your commits small and focused; each should represent a logical unit of work.
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
git commit -m "Describe the change"
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
3. **Run Quality Checks**:
|
|
97
|
+
Before finalizing your feature, run the following commands to ensure your code meets our formatting standards and passes all tests:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
./lint.sh # Check code format
|
|
101
|
+
pytest Test # Run tests
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Fix any issues or errors highlighted by these checks.
|
|
105
|
+
|
|
106
|
+
## Integrating Changes
|
|
107
|
+
|
|
108
|
+
1. **Rebase onto Staging**:
|
|
109
|
+
Once your feature is complete and tests pass, rebase your changes onto the `staging` branch to prepare for integration.
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
git fetch origin
|
|
113
|
+
git rebase origin/staging
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Carefully resolve any conflicts that arise during the rebase.
|
|
117
|
+
|
|
118
|
+
2. **Push to Your Feature Branch**:
|
|
119
|
+
After successfully rebasing, push your branch to the remote repository.
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
git push origin feature/your-feature-name
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
3. **Create a Pull Request**:
|
|
126
|
+
Open a pull request from your feature branch to the `staging` branch. Ensure the pull request description clearly describes the changes and any additional context necessary for review.
|
|
127
|
+
|
|
128
|
+
## Important Notes
|
|
129
|
+
|
|
130
|
+
- **Direct Commits Prohibited**: Do not push changes directly to the `main` or `staging` branches. All changes must come through pull requests reviewed by at least one other team member.
|
|
131
|
+
- **Merge Restrictions**: The `main` branch can only be updated from the `staging` branch, not directly from feature branches.
|
|
132
|
+
|
|
133
|
+
## Publication
|
|
134
|
+
|
|
135
|
+
[**ProDock**]()
|
|
136
|
+
|
|
137
|
+
## License
|
|
138
|
+
|
|
139
|
+
This project is licensed under MIT License - see the [License](LICENSE) file for details.
|
|
140
|
+
|
|
141
|
+
## Acknowledgments
|
|
142
|
+
|
|
143
|
+
This work has received support from the Korea International Cooperation Agency (KOICA) under the project entitled “Education and Research Capacity Building Project at University of Medicine and Pharmacy at Ho Chi Minh City”, conducted from 2024 to 2025 (Project No. 2021-00020-3).
|
prodock-0.0.3/README.md
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# ProDock
|
|
2
|
+
ProDock
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
## Step-by-Step Installation Guide
|
|
6
|
+
|
|
7
|
+
1. **Python Installation:**
|
|
8
|
+
Ensure that Python 3.11 or later is installed on your system. You can download it from [python.org](https://www.python.org/downloads/).
|
|
9
|
+
|
|
10
|
+
2. **Creating a Virtual Environment (Optional but Recommended):**
|
|
11
|
+
It's recommended to use a virtual environment to avoid conflicts with other projects or system-wide packages. Use the following commands to create and activate a virtual environment:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
python -m venv prodock-env
|
|
15
|
+
source prodock-env/bin/activate
|
|
16
|
+
```
|
|
17
|
+
Or Conda
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
conda create --name prodock-env python=3.11
|
|
21
|
+
conda activate prodock-env
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
3. **Cloning and Installing SynTemp:**
|
|
25
|
+
Clone the SynTemp repository from GitHub and install it:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
git clone https://github.com/Medicine-Artificial-Intelligence/ProDock.git
|
|
29
|
+
cd ProDock
|
|
30
|
+
pip install -r requirements.txt
|
|
31
|
+
pip install black flake8 pytest # black for formating, flake8 for checking format, pytest for testing
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Setting Up Your Development Environment
|
|
35
|
+
|
|
36
|
+
Before you start, ensure your local development environment is set up correctly. Pull the latest version of the `main` branch to start with the most recent stable code.
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
git checkout main
|
|
40
|
+
git pull
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Working on New Features
|
|
44
|
+
|
|
45
|
+
1. **Create a New Branch**:
|
|
46
|
+
For every new feature or bug fix, create a new branch from the `main` branch. Name your branch meaningfully, related to the feature or fix you are working on.
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
git checkout -b feature/your-feature-name
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
2. **Develop and Commit Changes**:
|
|
53
|
+
Make your changes locally, commit them to your branch. Keep your commits small and focused; each should represent a logical unit of work.
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
git commit -m "Describe the change"
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
3. **Run Quality Checks**:
|
|
60
|
+
Before finalizing your feature, run the following commands to ensure your code meets our formatting standards and passes all tests:
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
./lint.sh # Check code format
|
|
64
|
+
pytest Test # Run tests
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Fix any issues or errors highlighted by these checks.
|
|
68
|
+
|
|
69
|
+
## Integrating Changes
|
|
70
|
+
|
|
71
|
+
1. **Rebase onto Staging**:
|
|
72
|
+
Once your feature is complete and tests pass, rebase your changes onto the `staging` branch to prepare for integration.
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
git fetch origin
|
|
76
|
+
git rebase origin/staging
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Carefully resolve any conflicts that arise during the rebase.
|
|
80
|
+
|
|
81
|
+
2. **Push to Your Feature Branch**:
|
|
82
|
+
After successfully rebasing, push your branch to the remote repository.
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
git push origin feature/your-feature-name
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
3. **Create a Pull Request**:
|
|
89
|
+
Open a pull request from your feature branch to the `staging` branch. Ensure the pull request description clearly describes the changes and any additional context necessary for review.
|
|
90
|
+
|
|
91
|
+
## Important Notes
|
|
92
|
+
|
|
93
|
+
- **Direct Commits Prohibited**: Do not push changes directly to the `main` or `staging` branches. All changes must come through pull requests reviewed by at least one other team member.
|
|
94
|
+
- **Merge Restrictions**: The `main` branch can only be updated from the `staging` branch, not directly from feature branches.
|
|
95
|
+
|
|
96
|
+
## Publication
|
|
97
|
+
|
|
98
|
+
[**ProDock**]()
|
|
99
|
+
|
|
100
|
+
## License
|
|
101
|
+
|
|
102
|
+
This project is licensed under MIT License - see the [License](LICENSE) file for details.
|
|
103
|
+
|
|
104
|
+
## Acknowledgments
|
|
105
|
+
|
|
106
|
+
This work has received support from the Korea International Cooperation Agency (KOICA) under the project entitled “Education and Research Capacity Building Project at University of Medicine and Pharmacy at Ho Chi Minh City”, conducted from 2024 to 2025 (Project No. 2021-00020-3).
|
|
File without changes
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
This folder contains third-party binaries used by **ProDock**. It is intentionally small and meant to make distribution and developer setup straightforward while remaining license-compliant.
|
|
2
|
+
|
|
3
|
+
## Included (examples)
|
|
4
|
+
- `qvina02` — QuickVina2 binary (Apache-2.0). Upstream: **https://github.com/QVina/qvina/tree/master**. See `LICENSE`.
|
|
5
|
+
- `smina` — smina binary (GPL-2.0). Upstream: **https://github.com/mwojcikowski/smina/tree/master**. See `LICENSE.APACHE` and `LICENSE.GNU`.
|
|
6
|
+
|
|
7
|
+
> Replace the placeholder upstream URLs and versions with the concrete links and version strings you obtained when grabbing the binary.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Important licensing notes
|
|
12
|
+
- `smina` is licensed under **GNU GPL v2**. Distribution of the binary requires that you either:
|
|
13
|
+
- include the corresponding **complete source** in this repository (recommended), or
|
|
14
|
+
- include a clear **written offer** describing where the source can be obtained and an offer to provide it on request (see `LICENSE.APACHE` and `LICENSE.GNU`.).
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
File without changes
|
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
# prodock/chem/conformer.py
|
|
2
|
+
"""
|
|
3
|
+
Conformer manager: orchestrates embedding + optimization, exposes algorithm choices.
|
|
4
|
+
|
|
5
|
+
This file provides ConformerManager (alias Conformer) which:
|
|
6
|
+
- loads SMILES
|
|
7
|
+
- uses prodock.chem.embed.Embedder for embedding (single-process inside worker)
|
|
8
|
+
- uses prodock.chem.optimize.Optimizer for optimization (single-process inside worker)
|
|
9
|
+
- runs parallel jobs via joblib (loky) only in this high-level manager
|
|
10
|
+
- writes per-ligand SDFs and adds CONF_ENERGY_<id> tags when requested
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
from typing import List, Dict, Optional, Tuple
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
import logging
|
|
17
|
+
import os
|
|
18
|
+
|
|
19
|
+
# RDKit imports and log suppression
|
|
20
|
+
try:
|
|
21
|
+
from rdkit import Chem
|
|
22
|
+
from rdkit import RDLogger
|
|
23
|
+
|
|
24
|
+
RDLogger.DisableLog("rdApp.*")
|
|
25
|
+
except Exception:
|
|
26
|
+
raise ImportError("RDKit is required for prodock.chem.conformer")
|
|
27
|
+
|
|
28
|
+
# prodock logging utilities — unified import + robust fallback
|
|
29
|
+
try:
|
|
30
|
+
from prodock.io.logging import get_logger, StructuredAdapter
|
|
31
|
+
except Exception:
|
|
32
|
+
|
|
33
|
+
def get_logger(name: str):
|
|
34
|
+
return logging.getLogger(name)
|
|
35
|
+
|
|
36
|
+
class StructuredAdapter(logging.LoggerAdapter):
|
|
37
|
+
def __init__(self, logger, extra):
|
|
38
|
+
super().__init__(logger, extra)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# local modules
|
|
42
|
+
from prodock.chem.embed import Embedder
|
|
43
|
+
from prodock.chem.optimize import Optimizer
|
|
44
|
+
|
|
45
|
+
logger = StructuredAdapter(
|
|
46
|
+
get_logger("prodock.chem.conformer"), {"component": "conformer"}
|
|
47
|
+
)
|
|
48
|
+
logger._base_logger = getattr(logger, "_base_logger", getattr(logger, "logger", None))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# joblib for parallelism
|
|
52
|
+
try:
|
|
53
|
+
from joblib import Parallel, delayed
|
|
54
|
+
|
|
55
|
+
_JOBLIB_AVAILABLE = True
|
|
56
|
+
except Exception:
|
|
57
|
+
_JOBLIB_AVAILABLE = False
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _embed_worker(
|
|
61
|
+
smiles: str,
|
|
62
|
+
seed: int,
|
|
63
|
+
n_confs: int,
|
|
64
|
+
add_hs: bool,
|
|
65
|
+
embed_algorithm: Optional[str],
|
|
66
|
+
) -> Tuple[Optional[str], int]:
|
|
67
|
+
"""
|
|
68
|
+
Worker wrapper for embedding: creates local Embedder, embeds one SMILES,
|
|
69
|
+
returns (MolBlock or None, conf_count).
|
|
70
|
+
"""
|
|
71
|
+
os.environ.setdefault("OMP_NUM_THREADS", "1")
|
|
72
|
+
os.environ.setdefault("MKL_NUM_THREADS", "1")
|
|
73
|
+
|
|
74
|
+
e = Embedder(seed=seed)
|
|
75
|
+
e.load_smiles_iterable([smiles])
|
|
76
|
+
e.embed_all(
|
|
77
|
+
n_confs=n_confs,
|
|
78
|
+
add_hs=add_hs,
|
|
79
|
+
embed_algorithm=embed_algorithm,
|
|
80
|
+
random_seed=seed,
|
|
81
|
+
)
|
|
82
|
+
if not e.molblocks:
|
|
83
|
+
return None, 0
|
|
84
|
+
return e.molblocks[0], (e.conf_counts[0] if e.conf_counts else 0)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _optimize_worker(
|
|
88
|
+
molblock: str,
|
|
89
|
+
method: str,
|
|
90
|
+
max_iters: int,
|
|
91
|
+
) -> Tuple[Optional[str], Dict[int, float]]:
|
|
92
|
+
"""
|
|
93
|
+
Worker wrapper for optimization: create local Optimizer, optimize single MolBlock,
|
|
94
|
+
return optimized MolBlock and energy map.
|
|
95
|
+
"""
|
|
96
|
+
os.environ.setdefault("OMP_NUM_THREADS", "1")
|
|
97
|
+
os.environ.setdefault("MKL_NUM_THREADS", "1")
|
|
98
|
+
|
|
99
|
+
opt = Optimizer(max_iters=max_iters)
|
|
100
|
+
opt.load_molblocks([molblock])
|
|
101
|
+
opt.optimize_all(method=method)
|
|
102
|
+
if not opt.optimized_molblocks:
|
|
103
|
+
return None, {}
|
|
104
|
+
return opt.optimized_molblocks[0], (opt.energies[0] if opt.energies else {})
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class ConformerManager:
|
|
108
|
+
"""
|
|
109
|
+
High-level manager composing Embedder + Optimizer.
|
|
110
|
+
|
|
111
|
+
Methods are chainable (return self). Use properties to access results.
|
|
112
|
+
|
|
113
|
+
:param seed: RNG seed for embedding
|
|
114
|
+
:param backend: joblib backend to use when parallelizing (default 'loky')
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
def __init__(self, seed: int = 42, backend: str = "loky") -> None:
|
|
118
|
+
self._seed = int(seed)
|
|
119
|
+
self._backend = backend
|
|
120
|
+
self._smiles: List[str] = []
|
|
121
|
+
self._molblocks: List[str] = []
|
|
122
|
+
self._conf_counts: List[int] = []
|
|
123
|
+
self._energies: List[Dict[int, float]] = []
|
|
124
|
+
|
|
125
|
+
def __repr__(self) -> str:
|
|
126
|
+
return f"<ConformerManager smiles={len(self._smiles)} mols={len(self._molblocks)} seed={self._seed}>"
|
|
127
|
+
|
|
128
|
+
# ---------- properties ----------
|
|
129
|
+
@property
|
|
130
|
+
def smiles(self) -> List[str]:
|
|
131
|
+
return list(self._smiles)
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def molblocks(self) -> List[str]:
|
|
135
|
+
return list(self._molblocks)
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def conf_counts(self) -> List[int]:
|
|
139
|
+
return list(self._conf_counts)
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def energies(self) -> List[Dict[int, float]]:
|
|
143
|
+
return [dict(e) for e in self._energies]
|
|
144
|
+
|
|
145
|
+
# ---------- loading ----------
|
|
146
|
+
def load_smiles_file(self, path: str) -> "ConformerManager":
|
|
147
|
+
p = Path(path)
|
|
148
|
+
if not p.exists():
|
|
149
|
+
raise FileNotFoundError(path)
|
|
150
|
+
with p.open("r", encoding="utf-8") as fh:
|
|
151
|
+
self._smiles = [ln.strip().split()[0] for ln in fh if ln.strip()]
|
|
152
|
+
logger.info("ConformerManager: loaded %d SMILES", len(self._smiles))
|
|
153
|
+
return self
|
|
154
|
+
|
|
155
|
+
def load_smiles(self, smiles: List[str]) -> "ConformerManager":
|
|
156
|
+
self._smiles = [s.strip().split()[0] for s in smiles if s]
|
|
157
|
+
return self
|
|
158
|
+
|
|
159
|
+
# ---------- embedding ----------
|
|
160
|
+
def embed_all(
|
|
161
|
+
self,
|
|
162
|
+
n_confs: int = 1,
|
|
163
|
+
n_jobs: int = 1,
|
|
164
|
+
add_hs: bool = True,
|
|
165
|
+
embed_algorithm: Optional[str] = "ETKDGv3",
|
|
166
|
+
) -> "ConformerManager":
|
|
167
|
+
"""
|
|
168
|
+
Embed loaded SMILES.
|
|
169
|
+
|
|
170
|
+
:param n_confs: conformers per molecule
|
|
171
|
+
:param n_jobs: parallel jobs (-1 for all CPUs), 1 for sequential
|
|
172
|
+
:param add_hs: add explicit Hs before embedding (default True)
|
|
173
|
+
:param embed_algorithm: 'ETKDGv3' | 'ETKDGv2' | 'ETKDG' | None
|
|
174
|
+
:return: self
|
|
175
|
+
"""
|
|
176
|
+
if not self._smiles:
|
|
177
|
+
raise RuntimeError(
|
|
178
|
+
"No SMILES loaded; call load_smiles_file() or load_smiles()"
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
if n_jobs == 1 or not _JOBLIB_AVAILABLE:
|
|
182
|
+
results = [
|
|
183
|
+
_embed_worker(
|
|
184
|
+
smi, self._seed, int(n_confs), bool(add_hs), embed_algorithm
|
|
185
|
+
)
|
|
186
|
+
for smi in self._smiles
|
|
187
|
+
]
|
|
188
|
+
else:
|
|
189
|
+
jobs = n_jobs
|
|
190
|
+
results = Parallel(n_jobs=jobs, backend=self._backend)(
|
|
191
|
+
delayed(_embed_worker)(
|
|
192
|
+
smi, self._seed, int(n_confs), bool(add_hs), embed_algorithm
|
|
193
|
+
)
|
|
194
|
+
for smi in self._smiles
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
molblocks: List[str] = []
|
|
198
|
+
conf_counts: List[int] = []
|
|
199
|
+
for mb, c in results:
|
|
200
|
+
if mb is None:
|
|
201
|
+
continue
|
|
202
|
+
molblocks.append(mb)
|
|
203
|
+
conf_counts.append(c)
|
|
204
|
+
|
|
205
|
+
self._molblocks = molblocks
|
|
206
|
+
self._conf_counts = conf_counts
|
|
207
|
+
logger.info(
|
|
208
|
+
"ConformerManager: embedded %d / %d molecules",
|
|
209
|
+
len(self._molblocks),
|
|
210
|
+
len(self._smiles),
|
|
211
|
+
)
|
|
212
|
+
return self
|
|
213
|
+
|
|
214
|
+
# ---------- optimization ----------
|
|
215
|
+
def optimize_all(
|
|
216
|
+
self, method: str = "MMFF94", n_jobs: int = 1, max_iters: int = 200
|
|
217
|
+
) -> "ConformerManager":
|
|
218
|
+
"""
|
|
219
|
+
Optimize all embedded molblocks.
|
|
220
|
+
|
|
221
|
+
:param method: 'UFF' | 'MMFF' | 'MMFF94' | 'MMFF94S'
|
|
222
|
+
:param n_jobs: parallel jobs; 1 for sequential
|
|
223
|
+
:param max_iters: max iterations for optimizer
|
|
224
|
+
:return: self
|
|
225
|
+
"""
|
|
226
|
+
if not self._molblocks:
|
|
227
|
+
raise RuntimeError(
|
|
228
|
+
"No embedded molecules available; call embed_all() first"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
if n_jobs == 1 or not _JOBLIB_AVAILABLE:
|
|
232
|
+
results = [
|
|
233
|
+
_optimize_worker(mb, method, int(max_iters)) for mb in self._molblocks
|
|
234
|
+
]
|
|
235
|
+
else:
|
|
236
|
+
jobs = n_jobs
|
|
237
|
+
results = Parallel(n_jobs=jobs, backend=self._backend)(
|
|
238
|
+
delayed(_optimize_worker)(mb, method, int(max_iters))
|
|
239
|
+
for mb in self._molblocks
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
optimized_blocks: List[str] = []
|
|
243
|
+
energies_list: List[Dict[int, float]] = []
|
|
244
|
+
for mb, en in results:
|
|
245
|
+
if mb is None:
|
|
246
|
+
continue
|
|
247
|
+
optimized_blocks.append(mb)
|
|
248
|
+
energies_list.append(en)
|
|
249
|
+
|
|
250
|
+
self._molblocks = optimized_blocks
|
|
251
|
+
self._energies = energies_list
|
|
252
|
+
logger.info("ConformerManager: optimized %d molecules", len(self._molblocks))
|
|
253
|
+
return self
|
|
254
|
+
|
|
255
|
+
# ---------- pruning ----------
|
|
256
|
+
def prune_top_k(self, k: int = 1) -> "ConformerManager":
|
|
257
|
+
"""
|
|
258
|
+
Keep only top-k lowest-energy conformers per molecule (based on last optimization).
|
|
259
|
+
"""
|
|
260
|
+
if not self._molblocks:
|
|
261
|
+
raise RuntimeError("No molecules to prune")
|
|
262
|
+
if not self._energies:
|
|
263
|
+
logger.warning("ConformerManager: no energy data available; skipping prune")
|
|
264
|
+
return self
|
|
265
|
+
|
|
266
|
+
new_blocks: List[str] = []
|
|
267
|
+
new_energies: List[Dict[int, float]] = []
|
|
268
|
+
for block, e_map in zip(self._molblocks, self._energies):
|
|
269
|
+
mol = Chem.MolFromMolBlock(block, sanitize=False, removeHs=False)
|
|
270
|
+
if mol is None:
|
|
271
|
+
continue
|
|
272
|
+
if not e_map:
|
|
273
|
+
new_blocks.append(block)
|
|
274
|
+
new_energies.append({})
|
|
275
|
+
continue
|
|
276
|
+
|
|
277
|
+
# sort conf ids by energy (ascending)
|
|
278
|
+
keep_ids = [
|
|
279
|
+
cid
|
|
280
|
+
for cid, _ in sorted(e_map.items(), key=lambda kv: kv[1])[
|
|
281
|
+
: max(1, int(k))
|
|
282
|
+
]
|
|
283
|
+
]
|
|
284
|
+
|
|
285
|
+
base = Chem.Mol(mol)
|
|
286
|
+
try:
|
|
287
|
+
base.RemoveAllConformers()
|
|
288
|
+
except Exception:
|
|
289
|
+
base = Chem.Mol(mol)
|
|
290
|
+
base.RemoveAllConformers()
|
|
291
|
+
|
|
292
|
+
for cid in keep_ids:
|
|
293
|
+
try:
|
|
294
|
+
conf = mol.GetConformer(cid)
|
|
295
|
+
base.AddConformer(conf, assignId=True)
|
|
296
|
+
except Exception:
|
|
297
|
+
logger.warning("ConformerManager: failed to copy conformer %s", cid)
|
|
298
|
+
|
|
299
|
+
new_map = {i: e_map[cid] for i, cid in enumerate(keep_ids)}
|
|
300
|
+
new_blocks.append(Chem.MolToMolBlock(base))
|
|
301
|
+
new_energies.append(new_map)
|
|
302
|
+
|
|
303
|
+
self._molblocks = new_blocks
|
|
304
|
+
self._energies = new_energies
|
|
305
|
+
self._conf_counts = [len(e) for e in new_energies]
|
|
306
|
+
logger.info(
|
|
307
|
+
"ConformerManager: pruned to top-%d confs for %d molecules",
|
|
308
|
+
k,
|
|
309
|
+
len(self._molblocks),
|
|
310
|
+
)
|
|
311
|
+
return self
|
|
312
|
+
|
|
313
|
+
# ---------- write ----------
|
|
314
|
+
def write_sdf(
|
|
315
|
+
self,
|
|
316
|
+
out_folder: str,
|
|
317
|
+
per_mol_folder: bool = True,
|
|
318
|
+
write_energy_tags: bool = True,
|
|
319
|
+
) -> "ConformerManager":
|
|
320
|
+
"""
|
|
321
|
+
Write SDF outputs. Each molblock becomes an SDF. Optionally add CONF_ENERGY_<id> properties.
|
|
322
|
+
|
|
323
|
+
:param out_folder: destination folder path
|
|
324
|
+
:param per_mol_folder: if True, create ligand_i/ligand_i.sdf
|
|
325
|
+
:param write_energy_tags: write CONF_ENERGY_<id> properties when energies available
|
|
326
|
+
:return: self
|
|
327
|
+
"""
|
|
328
|
+
out = Path(out_folder)
|
|
329
|
+
out.mkdir(parents=True, exist_ok=True)
|
|
330
|
+
for i, block in enumerate(self._molblocks):
|
|
331
|
+
mol = Chem.MolFromMolBlock(block, sanitize=False, removeHs=False)
|
|
332
|
+
if mol is None:
|
|
333
|
+
logger.warning(
|
|
334
|
+
"ConformerManager.write_sdf: could not parse molblock for index %d",
|
|
335
|
+
i,
|
|
336
|
+
)
|
|
337
|
+
continue
|
|
338
|
+
|
|
339
|
+
if write_energy_tags and i < len(self._energies):
|
|
340
|
+
e_map = self._energies[i]
|
|
341
|
+
for cid, energy in e_map.items():
|
|
342
|
+
try:
|
|
343
|
+
mol.SetProp(f"CONF_ENERGY_{cid}", str(energy))
|
|
344
|
+
except Exception:
|
|
345
|
+
logger.debug(
|
|
346
|
+
"Failed to set energy property for mol %d cid %s", i, cid
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
if per_mol_folder:
|
|
350
|
+
folder = out / f"ligand_{i}"
|
|
351
|
+
folder.mkdir(parents=True, exist_ok=True)
|
|
352
|
+
path = folder / f"{folder.name}.sdf"
|
|
353
|
+
else:
|
|
354
|
+
path = out / f"ligand_{i}.sdf"
|
|
355
|
+
|
|
356
|
+
writer = Chem.SDWriter(str(path))
|
|
357
|
+
writer.write(mol)
|
|
358
|
+
writer.close()
|
|
359
|
+
logger.debug("ConformerManager: wrote SDF for ligand %d -> %s", i, path)
|
|
360
|
+
|
|
361
|
+
logger.info(
|
|
362
|
+
"ConformerManager: wrote %d SDF files to %s", len(self._molblocks), out
|
|
363
|
+
)
|
|
364
|
+
return self
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
# Alias requested by your tests / usage
|
|
368
|
+
Conformer = ConformerManager
|