@nahisaho/satori 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +111 -45
- package/package.json +1 -1
- package/src/.github/skills/scientific-admet-pharmacokinetics/SKILL.md +1 -0
- package/src/.github/skills/scientific-cancer-genomics/SKILL.md +287 -0
- package/src/.github/skills/scientific-clinical-decision-support/SKILL.md +2 -0
- package/src/.github/skills/scientific-clinical-reporting/SKILL.md +324 -0
- package/src/.github/skills/scientific-computational-materials/SKILL.md +4 -4
- package/src/.github/skills/scientific-deep-learning/SKILL.md +1 -0
- package/src/.github/skills/scientific-epidemiology-public-health/SKILL.md +1 -0
- package/src/.github/skills/scientific-grant-writing/SKILL.md +2 -0
- package/src/.github/skills/scientific-lab-data-management/SKILL.md +2 -2
- package/src/.github/skills/scientific-literature-search/SKILL.md +443 -0
- package/src/.github/skills/scientific-meta-analysis/SKILL.md +10 -0
- package/src/.github/skills/scientific-metabolomics-databases/SKILL.md +288 -0
- package/src/.github/skills/scientific-molecular-docking/SKILL.md +303 -0
- package/src/.github/skills/scientific-pathway-enrichment/SKILL.md +449 -0
- package/src/.github/skills/scientific-pharmacovigilance/SKILL.md +3 -0
- package/src/.github/skills/scientific-population-genetics/SKILL.md +2 -0
- package/src/.github/skills/scientific-precision-oncology/SKILL.md +1 -0
- package/src/.github/skills/scientific-protein-domain-family/SKILL.md +369 -0
- package/src/.github/skills/scientific-protein-interaction-network/SKILL.md +352 -0
- package/src/.github/skills/scientific-scientific-schematics/SKILL.md +2 -2
- package/src/.github/skills/scientific-single-cell-genomics/SKILL.md +2 -0
- package/src/.github/skills/scientific-survival-clinical/SKILL.md +11 -0
- package/src/.github/skills/scientific-systematic-review/SKILL.md +361 -0
- package/src/.github/skills/scientific-variant-effect-prediction/SKILL.md +325 -0
- package/src/.github/skills/scientific-variant-interpretation/SKILL.md +1 -0
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-metabolomics-databases
|
|
3
|
+
description: |
|
|
4
|
+
メタボロミクスデータベース統合スキル。HMDB (Human Metabolome Database、
|
|
5
|
+
220,000+ 代謝物)、MetaCyc (代謝パスウェイ)、Metabolomics Workbench
|
|
6
|
+
(NIH メタボロミクスリポジトリ) の 3 大メタボロミクス DB を統合した
|
|
7
|
+
代謝物同定、パスウェイマッピング、バイオマーカー発見、
|
|
8
|
+
RefMet 標準化命名パイプライン。13 の ToolUniverse SMCP ツールと連携。
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Scientific Metabolomics Databases
|
|
12
|
+
|
|
13
|
+
HMDB / MetaCyc / Metabolomics Workbench の 3 大メタボロミクスデータベースを統合した
|
|
14
|
+
代謝物同定・パスウェイマッピング・バイオマーカー発見パイプラインを提供する。
|
|
15
|
+
|
|
16
|
+
## When to Use
|
|
17
|
+
|
|
18
|
+
- 質量 (m/z) から代謝物を同定するとき
|
|
19
|
+
- HMDB で代謝物の生物学的コンテキストを調べるとき
|
|
20
|
+
- MetaCyc で代謝パスウェイの詳細を確認するとき
|
|
21
|
+
- Metabolomics Workbench の公開データセットを検索するとき
|
|
22
|
+
- 複数 DB を横断した代謝物アノテーションが必要なとき
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
## 1. HMDB 代謝物検索
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import requests
|
|
32
|
+
import pandas as pd
|
|
33
|
+
import xml.etree.ElementTree as ET
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def hmdb_search(query, search_type="name", max_results=50):
|
|
37
|
+
"""
|
|
38
|
+
HMDB (Human Metabolome Database) 検索。
|
|
39
|
+
|
|
40
|
+
Parameters:
|
|
41
|
+
query: str — 検索クエリ (代謝物名, HMDB ID, 化学式)
|
|
42
|
+
search_type: "name", "hmdb_id", "formula", "mass"
|
|
43
|
+
"""
|
|
44
|
+
# ToolUniverse 経由: HMDB_search, HMDB_get_metabolite, HMDB_get_diseases
|
|
45
|
+
|
|
46
|
+
base_url = "https://hmdb.ca/metabolites"
|
|
47
|
+
|
|
48
|
+
if search_type == "hmdb_id":
|
|
49
|
+
url = f"{base_url}/{query}.xml"
|
|
50
|
+
resp = requests.get(url, timeout=30)
|
|
51
|
+
if resp.status_code == 200:
|
|
52
|
+
root = ET.fromstring(resp.text)
|
|
53
|
+
ns = {"hmdb": "http://www.hmdb.ca"}
|
|
54
|
+
result = {
|
|
55
|
+
"hmdb_id": root.findtext("hmdb:accession", "", ns),
|
|
56
|
+
"name": root.findtext("hmdb:name", "", ns),
|
|
57
|
+
"chemical_formula": root.findtext("hmdb:chemical_formula", "", ns),
|
|
58
|
+
"monoisotopic_mass": float(root.findtext(
|
|
59
|
+
"hmdb:monisotopic_molecular_weight", "0", ns) or 0),
|
|
60
|
+
"description": (root.findtext("hmdb:description", "", ns) or "")[:300],
|
|
61
|
+
"status": root.findtext("hmdb:status", "", ns),
|
|
62
|
+
"biological_role": root.findtext(
|
|
63
|
+
"hmdb:ontology/hmdb:root/hmdb:term", "", ns),
|
|
64
|
+
}
|
|
65
|
+
return pd.DataFrame([result])
|
|
66
|
+
|
|
67
|
+
# 名前検索
|
|
68
|
+
results = []
|
|
69
|
+
params = {"query": query, "search_type": search_type}
|
|
70
|
+
print(f"HMDB search: '{query}' (type={search_type})")
|
|
71
|
+
# 実際の検索は ToolUniverse SMCP 経由で実行
|
|
72
|
+
return pd.DataFrame(results)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## 2. MetaCyc 代謝パスウェイ検索
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
def metacyc_pathway_search(query, organism="Homo sapiens"):
|
|
79
|
+
"""
|
|
80
|
+
MetaCyc 代謝パスウェイ検索。
|
|
81
|
+
|
|
82
|
+
Parameters:
|
|
83
|
+
query: str — パスウェイ名/代謝物名/酵素名
|
|
84
|
+
organism: str — 生物種フィルタ
|
|
85
|
+
"""
|
|
86
|
+
# ToolUniverse 経由:
|
|
87
|
+
# MetaCyc_search_pathways, MetaCyc_get_pathway
|
|
88
|
+
# MetaCyc_get_compound, MetaCyc_get_reaction
|
|
89
|
+
|
|
90
|
+
results = {
|
|
91
|
+
"query": query,
|
|
92
|
+
"organism": organism,
|
|
93
|
+
"database": "MetaCyc",
|
|
94
|
+
"tools": [
|
|
95
|
+
"MetaCyc_search_pathways — パスウェイ検索",
|
|
96
|
+
"MetaCyc_get_pathway — パスウェイ詳細 (反応・酵素・代謝物)",
|
|
97
|
+
"MetaCyc_get_compound — 化合物詳細 (構造・特性)",
|
|
98
|
+
"MetaCyc_get_reaction — 反応詳細 (基質・生成物・酵素)",
|
|
99
|
+
],
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
print(f"MetaCyc: querying '{query}' for {organism}")
|
|
103
|
+
return results
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## 3. Metabolomics Workbench データ取得
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
def metabolomics_workbench_search(query, search_type="compound_name",
|
|
110
|
+
exact_mass=None, mz_tolerance=0.01):
|
|
111
|
+
"""
|
|
112
|
+
NIH Metabolomics Workbench REST API 検索。
|
|
113
|
+
|
|
114
|
+
Parameters:
|
|
115
|
+
query: str — 検索クエリ
|
|
116
|
+
search_type: "compound_name", "refmet", "study", "exact_mass", "mz"
|
|
117
|
+
exact_mass: float — 厳密質量 (search_type="exact_mass" 時)
|
|
118
|
+
mz_tolerance: float — m/z 許容誤差 (Da)
|
|
119
|
+
"""
|
|
120
|
+
base_url = "https://www.metabolomicsworkbench.org/rest"
|
|
121
|
+
|
|
122
|
+
# ToolUniverse 経由:
|
|
123
|
+
# MetabolomicsWorkbench_search_compound_by_name
|
|
124
|
+
# MetabolomicsWorkbench_get_refmet_info
|
|
125
|
+
# MetabolomicsWorkbench_get_study
|
|
126
|
+
# MetabolomicsWorkbench_search_by_exact_mass
|
|
127
|
+
# MetabolomicsWorkbench_search_by_mz
|
|
128
|
+
# MetabolomicsWorkbench_get_compound_by_pubchem_cid
|
|
129
|
+
|
|
130
|
+
if search_type == "compound_name":
|
|
131
|
+
url = f"{base_url}/compound/name/{query}/all/"
|
|
132
|
+
elif search_type == "refmet":
|
|
133
|
+
url = f"{base_url}/refmet/name/{query}/all/"
|
|
134
|
+
elif search_type == "study":
|
|
135
|
+
url = f"{base_url}/study/study_id/{query}/summary/"
|
|
136
|
+
elif search_type == "exact_mass":
|
|
137
|
+
url = f"{base_url}/compound/exact_mass/{exact_mass}/tolerance/{mz_tolerance}/"
|
|
138
|
+
elif search_type == "mz":
|
|
139
|
+
url = f"{base_url}/compound/mz_value/{query}/tolerance/{mz_tolerance}/"
|
|
140
|
+
else:
|
|
141
|
+
raise ValueError(f"Unknown search_type: {search_type}")
|
|
142
|
+
|
|
143
|
+
resp = requests.get(url, timeout=30)
|
|
144
|
+
if resp.status_code == 200:
|
|
145
|
+
try:
|
|
146
|
+
data = resp.json()
|
|
147
|
+
if isinstance(data, list):
|
|
148
|
+
df = pd.DataFrame(data)
|
|
149
|
+
elif isinstance(data, dict):
|
|
150
|
+
df = pd.DataFrame([data])
|
|
151
|
+
else:
|
|
152
|
+
df = pd.DataFrame()
|
|
153
|
+
except Exception:
|
|
154
|
+
df = pd.DataFrame()
|
|
155
|
+
else:
|
|
156
|
+
df = pd.DataFrame()
|
|
157
|
+
|
|
158
|
+
print(f"Metabolomics Workbench ({search_type}): {len(df)} results")
|
|
159
|
+
return df
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## 4. m/z ベース代謝物同定 (マルチ DB)
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
def identify_metabolites_by_mass(mz_values, adducts=None,
|
|
166
|
+
tolerance_ppm=10,
|
|
167
|
+
databases=None):
|
|
168
|
+
"""
|
|
169
|
+
m/z 値から複数 DB を横断して代謝物を同定。
|
|
170
|
+
|
|
171
|
+
Parameters:
|
|
172
|
+
mz_values: list[float] — 観測 m/z 値リスト
|
|
173
|
+
adducts: list — アダクトイオン (e.g., ["[M+H]+", "[M+Na]+", "[M-H]-"])
|
|
174
|
+
tolerance_ppm: float — 質量許容誤差 (ppm)
|
|
175
|
+
databases: list — 検索対象 DB ("hmdb", "metacyc", "mwb")
|
|
176
|
+
"""
|
|
177
|
+
import numpy as np
|
|
178
|
+
|
|
179
|
+
if adducts is None:
|
|
180
|
+
adducts = [
|
|
181
|
+
{"name": "[M+H]+", "mass_diff": 1.007276, "mode": "positive"},
|
|
182
|
+
{"name": "[M+Na]+", "mass_diff": 22.989218, "mode": "positive"},
|
|
183
|
+
{"name": "[M-H]-", "mass_diff": -1.007276, "mode": "negative"},
|
|
184
|
+
{"name": "[M+NH4]+", "mass_diff": 18.034164, "mode": "positive"},
|
|
185
|
+
]
|
|
186
|
+
|
|
187
|
+
if databases is None:
|
|
188
|
+
databases = ["hmdb", "mwb"]
|
|
189
|
+
|
|
190
|
+
all_results = []
|
|
191
|
+
|
|
192
|
+
for mz in mz_values:
|
|
193
|
+
for adduct in adducts:
|
|
194
|
+
neutral_mass = mz - adduct["mass_diff"]
|
|
195
|
+
tolerance_da = neutral_mass * tolerance_ppm / 1e6
|
|
196
|
+
|
|
197
|
+
result = {
|
|
198
|
+
"query_mz": mz,
|
|
199
|
+
"adduct": adduct["name"],
|
|
200
|
+
"neutral_mass": round(neutral_mass, 6),
|
|
201
|
+
"tolerance_da": round(tolerance_da, 6),
|
|
202
|
+
"databases_queried": databases,
|
|
203
|
+
}
|
|
204
|
+
all_results.append(result)
|
|
205
|
+
|
|
206
|
+
df = pd.DataFrame(all_results)
|
|
207
|
+
print(f"Mass-based ID: {len(mz_values)} m/z values × "
|
|
208
|
+
f"{len(adducts)} adducts = {len(df)} queries "
|
|
209
|
+
f"across {databases}")
|
|
210
|
+
return df
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
## 5. RefMet 標準化命名
|
|
214
|
+
|
|
215
|
+
```python
|
|
216
|
+
def refmet_standardize(metabolite_names):
|
|
217
|
+
"""
|
|
218
|
+
RefMet (Reference Metabolomics) 標準化命名。
|
|
219
|
+
|
|
220
|
+
Parameters:
|
|
221
|
+
metabolite_names: list — 代謝物名リスト (非標準化)
|
|
222
|
+
"""
|
|
223
|
+
results = []
|
|
224
|
+
|
|
225
|
+
for name in metabolite_names:
|
|
226
|
+
# ToolUniverse 経由: MetabolomicsWorkbench_get_refmet_info
|
|
227
|
+
result = {
|
|
228
|
+
"input_name": name,
|
|
229
|
+
"refmet_name": None,
|
|
230
|
+
"super_class": None,
|
|
231
|
+
"main_class": None,
|
|
232
|
+
"sub_class": None,
|
|
233
|
+
"formula": None,
|
|
234
|
+
"exact_mass": None,
|
|
235
|
+
}
|
|
236
|
+
results.append(result)
|
|
237
|
+
|
|
238
|
+
df = pd.DataFrame(results)
|
|
239
|
+
print(f"RefMet standardization: {len(df)} metabolites queried")
|
|
240
|
+
return df
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
## References
|
|
244
|
+
|
|
245
|
+
### Output Files
|
|
246
|
+
|
|
247
|
+
| ファイル | 形式 |
|
|
248
|
+
|---|---|
|
|
249
|
+
| `results/hmdb_metabolites.csv` | CSV |
|
|
250
|
+
| `results/metacyc_pathways.json` | JSON |
|
|
251
|
+
| `results/mwb_compounds.csv` | CSV |
|
|
252
|
+
| `results/mass_id_results.csv` | CSV |
|
|
253
|
+
| `results/refmet_standardized.csv` | CSV |
|
|
254
|
+
| `figures/metabolite_class_distribution.png` | PNG |
|
|
255
|
+
|
|
256
|
+
### 利用可能ツール
|
|
257
|
+
|
|
258
|
+
> [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
|
|
259
|
+
|
|
260
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
261
|
+
|---|---|---|
|
|
262
|
+
| HMDB | `HMDB_get_metabolite` | 代謝物詳細取得 |
|
|
263
|
+
| HMDB | `HMDB_search` | 代謝物検索 |
|
|
264
|
+
| HMDB | `HMDB_get_diseases` | 疾患関連代謝物 |
|
|
265
|
+
| MetaCyc | `MetaCyc_search_pathways` | 代謝パスウェイ検索 |
|
|
266
|
+
| MetaCyc | `MetaCyc_get_pathway` | パスウェイ詳細 |
|
|
267
|
+
| MetaCyc | `MetaCyc_get_compound` | 化合物詳細 |
|
|
268
|
+
| MetaCyc | `MetaCyc_get_reaction` | 反応詳細 |
|
|
269
|
+
| MWB | `MetabolomicsWorkbench_search_compound_by_name` | 化合物名検索 |
|
|
270
|
+
| MWB | `MetabolomicsWorkbench_get_refmet_info` | RefMet 標準化情報 |
|
|
271
|
+
| MWB | `MetabolomicsWorkbench_get_study` | 研究データ取得 |
|
|
272
|
+
| MWB | `MetabolomicsWorkbench_search_by_exact_mass` | 厳密質量検索 |
|
|
273
|
+
| MWB | `MetabolomicsWorkbench_search_by_mz` | m/z 値検索 |
|
|
274
|
+
| MWB | `MetabolomicsWorkbench_get_compound_by_pubchem_cid` | PubChem CID 検索 |
|
|
275
|
+
|
|
276
|
+
### 参照スキル
|
|
277
|
+
|
|
278
|
+
| スキル | 関連 |
|
|
279
|
+
|---|---|
|
|
280
|
+
| `scientific-metabolomics` | PLS-DA/VIP 統計解析 |
|
|
281
|
+
| `scientific-pathway-enrichment` | 代謝物 → パスウェイ富化 |
|
|
282
|
+
| `scientific-cheminformatics` | 分子記述子・構造解析 |
|
|
283
|
+
| `scientific-systems-biology` | FBA 代謝フラックス |
|
|
284
|
+
| `scientific-multi-omics` | メタボロミクス ↔ オミクス統合 |
|
|
285
|
+
|
|
286
|
+
### 依存パッケージ
|
|
287
|
+
|
|
288
|
+
`requests`, `pandas`, `numpy`, `xml.etree.ElementTree` (stdlib)
|
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-molecular-docking
|
|
3
|
+
description: |
|
|
4
|
+
構造ベース分子ドッキングスキル。DiffDock (拡散生成モデル)、
|
|
5
|
+
AutoDock Vina (スコアリング関数)、GNINA (CNN ベーススコアリング) を統合した
|
|
6
|
+
タンパク質-リガンド結合ポーズ予測、バーチャルスクリーニング、
|
|
7
|
+
結合自由エネルギー推定、ドッキングスコア統合パイプライン。
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Scientific Molecular Docking
|
|
11
|
+
|
|
12
|
+
DiffDock / AutoDock Vina / GNINA の 3 大ドッキングエンジンによる
|
|
13
|
+
構造ベース仮想スクリーニング・結合ポーズ予測パイプラインを提供する。
|
|
14
|
+
|
|
15
|
+
## When to Use
|
|
16
|
+
|
|
17
|
+
- タンパク質-リガンド結合モードを予測するとき
|
|
18
|
+
- 化合物ライブラリのバーチャルスクリーニングが必要なとき
|
|
19
|
+
- 結合自由エネルギーを推定してリガンドをランキングするとき
|
|
20
|
+
- DiffDock で AI ベースの結合ポーズ生成を行うとき
|
|
21
|
+
- 複数のドッキング手法のコンセンサス評価が必要なとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. リガンド・受容体の準備
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import os
|
|
31
|
+
import subprocess
|
|
32
|
+
import pandas as pd
|
|
33
|
+
import numpy as np
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def prepare_receptor(pdb_file, output_dir="structures/prepared",
|
|
37
|
+
remove_water=True, add_hydrogens=True):
|
|
38
|
+
"""
|
|
39
|
+
ドッキング用受容体 (タンパク質) 準備。
|
|
40
|
+
|
|
41
|
+
Parameters:
|
|
42
|
+
pdb_file: str — 入力 PDB ファイル
|
|
43
|
+
remove_water: bool — 水分子除去
|
|
44
|
+
add_hydrogens: bool — 水素原子付加
|
|
45
|
+
"""
|
|
46
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
47
|
+
base_name = os.path.splitext(os.path.basename(pdb_file))[0]
|
|
48
|
+
|
|
49
|
+
# PDB → PDBQT 変換 (AutoDock Vina 用)
|
|
50
|
+
pdbqt_file = f"{output_dir}/{base_name}_receptor.pdbqt"
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
from openbabel import pybel
|
|
54
|
+
mol = next(pybel.readfile("pdb", pdb_file))
|
|
55
|
+
if remove_water:
|
|
56
|
+
mol.OBMol.DeleteWater()
|
|
57
|
+
if add_hydrogens:
|
|
58
|
+
mol.addh()
|
|
59
|
+
mol.write("pdbqt", pdbqt_file, overwrite=True)
|
|
60
|
+
print(f"Receptor prepared: {pdbqt_file}")
|
|
61
|
+
except ImportError:
|
|
62
|
+
# Open Babel 不在時: MGLTools prepare_receptor4
|
|
63
|
+
cmd = ["prepare_receptor4.py", "-r", pdb_file,
|
|
64
|
+
"-o", pdbqt_file, "-A", "hydrogens"]
|
|
65
|
+
if remove_water:
|
|
66
|
+
cmd.extend(["-U", "waters"])
|
|
67
|
+
subprocess.run(cmd, check=True)
|
|
68
|
+
print(f"Receptor prepared (MGLTools): {pdbqt_file}")
|
|
69
|
+
|
|
70
|
+
return pdbqt_file
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def prepare_ligands(sdf_file, output_dir="structures/ligands"):
|
|
74
|
+
"""
|
|
75
|
+
リガンドファイル準備 (SDF → PDBQT/MOL2)。
|
|
76
|
+
"""
|
|
77
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
from openbabel import pybel
|
|
81
|
+
ligands = list(pybel.readfile("sdf", sdf_file))
|
|
82
|
+
prepared = []
|
|
83
|
+
for i, mol in enumerate(ligands):
|
|
84
|
+
mol.addh()
|
|
85
|
+
mol.make3D()
|
|
86
|
+
name = mol.title or f"ligand_{i}"
|
|
87
|
+
out = f"{output_dir}/{name}.pdbqt"
|
|
88
|
+
mol.write("pdbqt", out, overwrite=True)
|
|
89
|
+
prepared.append({"name": name, "file": out, "atoms": len(mol.atoms)})
|
|
90
|
+
print(f"Prepared {len(prepared)} ligands from {sdf_file}")
|
|
91
|
+
return pd.DataFrame(prepared)
|
|
92
|
+
except ImportError:
|
|
93
|
+
print("openbabel not available, using RDKit fallback")
|
|
94
|
+
from rdkit import Chem
|
|
95
|
+
from rdkit.Chem import AllChem
|
|
96
|
+
suppl = Chem.SDMolSupplier(sdf_file)
|
|
97
|
+
prepared = []
|
|
98
|
+
for i, mol in enumerate(suppl):
|
|
99
|
+
if mol is None:
|
|
100
|
+
continue
|
|
101
|
+
mol = Chem.AddHs(mol)
|
|
102
|
+
AllChem.EmbedMolecule(mol, randomSeed=42)
|
|
103
|
+
name = mol.GetProp("_Name") if mol.HasProp("_Name") else f"lig_{i}"
|
|
104
|
+
out = f"{output_dir}/{name}.mol2"
|
|
105
|
+
Chem.MolToMolFile(mol, out)
|
|
106
|
+
prepared.append({"name": name, "file": out})
|
|
107
|
+
return pd.DataFrame(prepared)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## 2. AutoDock Vina ドッキング
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
def autodock_vina_dock(receptor_pdbqt, ligand_pdbqt,
|
|
114
|
+
center, box_size,
|
|
115
|
+
exhaustiveness=32, n_poses=9):
|
|
116
|
+
"""
|
|
117
|
+
AutoDock Vina による分子ドッキング。
|
|
118
|
+
|
|
119
|
+
Parameters:
|
|
120
|
+
receptor_pdbqt: str — 受容体 PDBQT
|
|
121
|
+
ligand_pdbqt: str — リガンド PDBQT
|
|
122
|
+
center: tuple — (x, y, z) ボックス中心座標
|
|
123
|
+
box_size: tuple — (sx, sy, sz) ボックスサイズ (Å)
|
|
124
|
+
exhaustiveness: int — 探索精度 (8-64)
|
|
125
|
+
n_poses: int — 出力ポーズ数
|
|
126
|
+
"""
|
|
127
|
+
try:
|
|
128
|
+
from vina import Vina
|
|
129
|
+
v = Vina(sf_name="vina")
|
|
130
|
+
v.set_receptor(receptor_pdbqt)
|
|
131
|
+
v.set_ligand_from_file(ligand_pdbqt)
|
|
132
|
+
v.compute_vina_maps(center=list(center), box_size=list(box_size))
|
|
133
|
+
v.dock(exhaustiveness=exhaustiveness, n_poses=n_poses)
|
|
134
|
+
|
|
135
|
+
energies = v.energies()
|
|
136
|
+
results = []
|
|
137
|
+
for i, e in enumerate(energies):
|
|
138
|
+
results.append({
|
|
139
|
+
"pose": i + 1,
|
|
140
|
+
"affinity_kcal": e[0],
|
|
141
|
+
"rmsd_lb": e[1] if len(e) > 1 else None,
|
|
142
|
+
"rmsd_ub": e[2] if len(e) > 2 else None,
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
output = ligand_pdbqt.replace(".pdbqt", "_docked.pdbqt")
|
|
146
|
+
v.write_poses(output, n_poses=n_poses, overwrite=True)
|
|
147
|
+
|
|
148
|
+
df = pd.DataFrame(results)
|
|
149
|
+
print(f"Vina docking: best affinity = {df['affinity_kcal'].min():.1f} kcal/mol")
|
|
150
|
+
return df, output
|
|
151
|
+
|
|
152
|
+
except ImportError:
|
|
153
|
+
# CLI フォールバック
|
|
154
|
+
output = ligand_pdbqt.replace(".pdbqt", "_docked.pdbqt")
|
|
155
|
+
cmd = [
|
|
156
|
+
"vina",
|
|
157
|
+
"--receptor", receptor_pdbqt,
|
|
158
|
+
"--ligand", ligand_pdbqt,
|
|
159
|
+
"--center_x", str(center[0]),
|
|
160
|
+
"--center_y", str(center[1]),
|
|
161
|
+
"--center_z", str(center[2]),
|
|
162
|
+
"--size_x", str(box_size[0]),
|
|
163
|
+
"--size_y", str(box_size[1]),
|
|
164
|
+
"--size_z", str(box_size[2]),
|
|
165
|
+
"--exhaustiveness", str(exhaustiveness),
|
|
166
|
+
"--num_modes", str(n_poses),
|
|
167
|
+
"--out", output,
|
|
168
|
+
]
|
|
169
|
+
subprocess.run(cmd, check=True)
|
|
170
|
+
return pd.DataFrame(), output
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
## 3. DiffDock AI ドッキング
|
|
174
|
+
|
|
175
|
+
```python
|
|
176
|
+
def diffdock_predict(protein_file, ligand_file, n_poses=10,
|
|
177
|
+
output_dir="results/diffdock"):
|
|
178
|
+
"""
|
|
179
|
+
DiffDock (拡散生成モデル) ドッキング。
|
|
180
|
+
|
|
181
|
+
Parameters:
|
|
182
|
+
protein_file: str — タンパク質 PDB ファイル
|
|
183
|
+
ligand_file: str — リガンド SDF/MOL2 ファイル
|
|
184
|
+
n_poses: int — 生成ポーズ数
|
|
185
|
+
"""
|
|
186
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
187
|
+
|
|
188
|
+
# DiffDock-L (large model) 推論
|
|
189
|
+
cmd = [
|
|
190
|
+
"python", "-m", "diffdock.inference",
|
|
191
|
+
"--protein_path", protein_file,
|
|
192
|
+
"--ligand", ligand_file,
|
|
193
|
+
"--out_dir", output_dir,
|
|
194
|
+
"--samples_per_complex", str(n_poses),
|
|
195
|
+
"--model_dir", "DiffDock-L",
|
|
196
|
+
"--confidence_model_dir", "DiffDock-L",
|
|
197
|
+
]
|
|
198
|
+
|
|
199
|
+
print(f"Running DiffDock ({n_poses} poses)...")
|
|
200
|
+
try:
|
|
201
|
+
subprocess.run(cmd, check=True, capture_output=True)
|
|
202
|
+
except FileNotFoundError:
|
|
203
|
+
print("DiffDock not installed. Install from: "
|
|
204
|
+
"https://github.com/gcorso/DiffDock")
|
|
205
|
+
return pd.DataFrame()
|
|
206
|
+
|
|
207
|
+
# 結果パース
|
|
208
|
+
results = []
|
|
209
|
+
for i in range(n_poses):
|
|
210
|
+
pose_file = f"{output_dir}/rank{i+1}.sdf"
|
|
211
|
+
conf_file = f"{output_dir}/rank{i+1}_confidence.txt"
|
|
212
|
+
confidence = None
|
|
213
|
+
if os.path.exists(conf_file):
|
|
214
|
+
with open(conf_file) as f:
|
|
215
|
+
confidence = float(f.read().strip())
|
|
216
|
+
results.append({
|
|
217
|
+
"pose": i + 1,
|
|
218
|
+
"file": pose_file,
|
|
219
|
+
"confidence": confidence,
|
|
220
|
+
})
|
|
221
|
+
|
|
222
|
+
df = pd.DataFrame(results)
|
|
223
|
+
if len(df) > 0 and "confidence" in df.columns:
|
|
224
|
+
print(f"DiffDock: {len(df)} poses, "
|
|
225
|
+
f"best confidence = {df['confidence'].max()}")
|
|
226
|
+
return df
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
## 4. バーチャルスクリーニング
|
|
230
|
+
|
|
231
|
+
```python
|
|
232
|
+
def virtual_screening(receptor_pdbqt, ligand_library,
|
|
233
|
+
center, box_size,
|
|
234
|
+
method="vina", top_n=20):
|
|
235
|
+
"""
|
|
236
|
+
化合物ライブラリのバーチャルスクリーニング。
|
|
237
|
+
|
|
238
|
+
Parameters:
|
|
239
|
+
receptor_pdbqt: str — 受容体 PDBQT
|
|
240
|
+
ligand_library: list[str] — リガンド PDBQT ファイルのリスト
|
|
241
|
+
center/box_size: ドッキングボックスパラメータ
|
|
242
|
+
method: "vina" or "diffdock"
|
|
243
|
+
top_n: int — 上位候補数
|
|
244
|
+
"""
|
|
245
|
+
all_results = []
|
|
246
|
+
|
|
247
|
+
for i, ligand in enumerate(ligand_library):
|
|
248
|
+
lig_name = os.path.splitext(os.path.basename(ligand))[0]
|
|
249
|
+
print(f" [{i+1}/{len(ligand_library)}] Docking {lig_name}...", end=" ")
|
|
250
|
+
|
|
251
|
+
if method == "vina":
|
|
252
|
+
df, _ = autodock_vina_dock(
|
|
253
|
+
receptor_pdbqt, ligand, center, box_size,
|
|
254
|
+
exhaustiveness=16, n_poses=3
|
|
255
|
+
)
|
|
256
|
+
if len(df) > 0:
|
|
257
|
+
best = df.iloc[0]
|
|
258
|
+
all_results.append({
|
|
259
|
+
"ligand": lig_name,
|
|
260
|
+
"best_affinity": best["affinity_kcal"],
|
|
261
|
+
"n_poses": len(df),
|
|
262
|
+
})
|
|
263
|
+
print(f"{best['affinity_kcal']:.1f} kcal/mol")
|
|
264
|
+
|
|
265
|
+
results_df = pd.DataFrame(all_results)
|
|
266
|
+
results_df = results_df.sort_values("best_affinity").head(top_n)
|
|
267
|
+
|
|
268
|
+
print(f"\nVirtual screening: {len(ligand_library)} compounds → "
|
|
269
|
+
f"top {top_n} candidates")
|
|
270
|
+
return results_df
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
## References
|
|
274
|
+
|
|
275
|
+
### Output Files
|
|
276
|
+
|
|
277
|
+
| ファイル | 形式 |
|
|
278
|
+
|---|---|
|
|
279
|
+
| `structures/prepared/*_receptor.pdbqt` | PDBQT |
|
|
280
|
+
| `structures/ligands/*.pdbqt` | PDBQT |
|
|
281
|
+
| `results/docking_results.csv` | CSV |
|
|
282
|
+
| `results/diffdock/rank*.sdf` | SDF |
|
|
283
|
+
| `results/virtual_screening.csv` | CSV |
|
|
284
|
+
| `figures/docking_scores.png` | PNG |
|
|
285
|
+
|
|
286
|
+
### 利用可能ツール
|
|
287
|
+
|
|
288
|
+
> このスキルは主に K-Dense-AI/claude-scientific-skills の diffdock スキルを参照しています。ToolUniverse SMCP には専用ドッキングツールは含まれませんが、タンパク質構造は PDB/AlphaFold ツール経由で取得可能です。
|
|
289
|
+
|
|
290
|
+
### 参照スキル
|
|
291
|
+
|
|
292
|
+
| スキル | 関連 |
|
|
293
|
+
|---|---|
|
|
294
|
+
| `scientific-protein-structure-analysis` | 受容体構造取得・結合部位検出 |
|
|
295
|
+
| `scientific-drug-target-profiling` | 標的選定 → ドッキング |
|
|
296
|
+
| `scientific-cheminformatics` | リガンド記述子・フィルタリング |
|
|
297
|
+
| `scientific-admet-pharmacokinetics` | ドッキング → ADMET |
|
|
298
|
+
| `scientific-drug-repurposing` | リポジショニング候補ドッキング |
|
|
299
|
+
| `scientific-protein-interaction-network` | PPI → ドッキング界面 |
|
|
300
|
+
|
|
301
|
+
### 依存パッケージ
|
|
302
|
+
|
|
303
|
+
`vina` (AutoDock Vina), `rdkit`, `openbabel` (optional), `numpy`, `pandas`
|