@nahisaho/satori 0.17.0 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -38
- package/package.json +1 -1
- package/src/.github/skills/scientific-alphafold-structures/SKILL.md +256 -0
- package/src/.github/skills/scientific-arrayexpress-expression/SKILL.md +264 -0
- package/src/.github/skills/scientific-civic-evidence/SKILL.md +292 -0
- package/src/.github/skills/scientific-compound-screening/SKILL.md +4 -0
- package/src/.github/skills/scientific-crossref-metadata/SKILL.md +313 -0
- package/src/.github/skills/scientific-depmap-dependencies/SKILL.md +239 -0
- package/src/.github/skills/scientific-disease-research/SKILL.md +4 -0
- package/src/.github/skills/scientific-drugbank-resources/SKILL.md +269 -0
- package/src/.github/skills/scientific-gnomad-variants/SKILL.md +356 -0
- package/src/.github/skills/scientific-gtex-tissue-expression/SKILL.md +271 -0
- package/src/.github/skills/scientific-gwas-catalog/SKILL.md +267 -0
- package/src/.github/skills/scientific-icgc-cancer-data/SKILL.md +351 -0
- package/src/.github/skills/scientific-metabolomics-databases/SKILL.md +4 -0
- package/src/.github/skills/scientific-opentargets-genetics/SKILL.md +299 -0
- package/src/.github/skills/scientific-pharmgkb-pgx/SKILL.md +306 -0
- package/src/.github/skills/scientific-protein-interaction-network/SKILL.md +4 -0
- package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +4 -0
- package/src/.github/skills/scientific-rcsb-pdb-search/SKILL.md +280 -0
- package/src/.github/skills/scientific-reactome-pathways/SKILL.md +242 -0
- package/src/.github/skills/scientific-semantic-scholar/SKILL.md +298 -0
- package/src/.github/skills/scientific-uniprot-proteome/SKILL.md +273 -0
- package/src/.github/skills/scientific-variant-interpretation/SKILL.md +4 -0
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-pharmgkb-pgx
|
|
3
|
+
description: |
|
|
4
|
+
PharmGKB 薬理ゲノミクススキル。PharmGKB REST API による
|
|
5
|
+
臨床アノテーション・薬物遺伝子関連・投与量ガイドライン・
|
|
6
|
+
スターアレル解析。ToolUniverse 連携: pharmgkb。
|
|
7
|
+
tu_tools:
|
|
8
|
+
- key: pharmgkb
|
|
9
|
+
name: PharmGKB
|
|
10
|
+
description: 臨床アノテーション・薬物遺伝子関連・PGx ガイドライン
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Scientific PharmGKB PGx
|
|
14
|
+
|
|
15
|
+
PharmGKB (Pharmacogenomics Knowledgebase) REST API を活用した
|
|
16
|
+
薬理ゲノミクス臨床アノテーション・薬物遺伝子相互作用・投与量
|
|
17
|
+
ガイドライン検索パイプラインを提供する。
|
|
18
|
+
|
|
19
|
+
## When to Use
|
|
20
|
+
|
|
21
|
+
- 薬物と遺伝子変異の関連を調べるとき
|
|
22
|
+
- 臨床アノテーション (エビデンスレベル付き) を検索するとき
|
|
23
|
+
- 投与量調整ガイドライン (CPIC/DPWG) を取得するとき
|
|
24
|
+
- スターアレルと表現型の対応を確認するとき
|
|
25
|
+
- 特定薬物の薬理ゲノミクス情報を包括的に取得するとき
|
|
26
|
+
- 精密医療の薬物選択を支援するとき
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
## 1. 薬物・遺伝子検索
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
import requests
|
|
36
|
+
import pandas as pd
|
|
37
|
+
|
|
38
|
+
PGKB_BASE = "https://api.pharmgkb.org/v1/data"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def pharmgkb_search_drugs(query, limit=50):
|
|
42
|
+
"""
|
|
43
|
+
PharmGKB — 薬物検索。
|
|
44
|
+
|
|
45
|
+
Parameters:
|
|
46
|
+
query: str — 薬物名 (例: "warfarin", "clopidogrel")
|
|
47
|
+
limit: int — 最大結果数
|
|
48
|
+
"""
|
|
49
|
+
url = f"{PGKB_BASE}/chemical"
|
|
50
|
+
params = {"name": query, "view": "max"}
|
|
51
|
+
|
|
52
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
53
|
+
resp.raise_for_status()
|
|
54
|
+
data = resp.json()
|
|
55
|
+
|
|
56
|
+
results = []
|
|
57
|
+
for item in data.get("data", []):
|
|
58
|
+
results.append({
|
|
59
|
+
"pharmgkb_id": item.get("id", ""),
|
|
60
|
+
"name": item.get("name", ""),
|
|
61
|
+
"generic_names": "; ".join(
|
|
62
|
+
item.get("genericNames", [])),
|
|
63
|
+
"trade_names": "; ".join(
|
|
64
|
+
item.get("tradeNames", [])[:5]),
|
|
65
|
+
"type": item.get("type", ""),
|
|
66
|
+
"cross_references": len(
|
|
67
|
+
item.get("crossReferences", [])),
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
df = pd.DataFrame(results)
|
|
71
|
+
print(f"PharmGKB drugs: {len(df)} results "
|
|
72
|
+
f"(query='{query}')")
|
|
73
|
+
return df
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def pharmgkb_search_genes(query, limit=50):
|
|
77
|
+
"""
|
|
78
|
+
PharmGKB — 遺伝子検索。
|
|
79
|
+
|
|
80
|
+
Parameters:
|
|
81
|
+
query: str — 遺伝子シンボル (例: "CYP2D6")
|
|
82
|
+
limit: int — 最大結果数
|
|
83
|
+
"""
|
|
84
|
+
url = f"{PGKB_BASE}/gene"
|
|
85
|
+
params = {"symbol": query, "view": "max"}
|
|
86
|
+
|
|
87
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
88
|
+
resp.raise_for_status()
|
|
89
|
+
data = resp.json()
|
|
90
|
+
|
|
91
|
+
results = []
|
|
92
|
+
for item in data.get("data", []):
|
|
93
|
+
results.append({
|
|
94
|
+
"pharmgkb_id": item.get("id", ""),
|
|
95
|
+
"symbol": item.get("symbol", ""),
|
|
96
|
+
"name": item.get("name", ""),
|
|
97
|
+
"chromosome": item.get("chromosomeFormatted", ""),
|
|
98
|
+
"cpic_gene": item.get("cpicGene", False),
|
|
99
|
+
"has_prescribing_info": item.get(
|
|
100
|
+
"hasPrescribingInfo", False),
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
df = pd.DataFrame(results)
|
|
104
|
+
print(f"PharmGKB genes: {len(df)} results "
|
|
105
|
+
f"(query='{query}')")
|
|
106
|
+
return df
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## 2. 臨床アノテーション取得
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
def pharmgkb_clinical_annotations(gene_or_drug,
|
|
113
|
+
search_type="gene"):
|
|
114
|
+
"""
|
|
115
|
+
PharmGKB — 臨床アノテーション検索。
|
|
116
|
+
|
|
117
|
+
Parameters:
|
|
118
|
+
gene_or_drug: str — 遺伝子シンボル or 薬物名
|
|
119
|
+
search_type: str — "gene" or "drug"
|
|
120
|
+
"""
|
|
121
|
+
url = f"{PGKB_BASE}/clinicalAnnotation"
|
|
122
|
+
params = {"view": "max"}
|
|
123
|
+
|
|
124
|
+
if search_type == "gene":
|
|
125
|
+
# 遺伝子で検索
|
|
126
|
+
gene_url = f"{PGKB_BASE}/gene"
|
|
127
|
+
g_resp = requests.get(gene_url,
|
|
128
|
+
params={"symbol": gene_or_drug},
|
|
129
|
+
timeout=30)
|
|
130
|
+
g_resp.raise_for_status()
|
|
131
|
+
genes = g_resp.json().get("data", [])
|
|
132
|
+
if genes:
|
|
133
|
+
params["relatedGenes.id"] = genes[0].get("id", "")
|
|
134
|
+
else:
|
|
135
|
+
# 薬物で検索
|
|
136
|
+
drug_url = f"{PGKB_BASE}/chemical"
|
|
137
|
+
d_resp = requests.get(drug_url,
|
|
138
|
+
params={"name": gene_or_drug},
|
|
139
|
+
timeout=30)
|
|
140
|
+
d_resp.raise_for_status()
|
|
141
|
+
drugs = d_resp.json().get("data", [])
|
|
142
|
+
if drugs:
|
|
143
|
+
params["relatedChemicals.id"] = drugs[0].get("id", "")
|
|
144
|
+
|
|
145
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
146
|
+
resp.raise_for_status()
|
|
147
|
+
data = resp.json()
|
|
148
|
+
|
|
149
|
+
results = []
|
|
150
|
+
for item in data.get("data", []):
|
|
151
|
+
genes = [g.get("symbol", "")
|
|
152
|
+
for g in item.get("relatedGenes", [])]
|
|
153
|
+
drugs = [c.get("name", "")
|
|
154
|
+
for c in item.get("relatedChemicals", [])]
|
|
155
|
+
results.append({
|
|
156
|
+
"annotation_id": item.get("id", ""),
|
|
157
|
+
"level": item.get("level", ""),
|
|
158
|
+
"score": item.get("score", ""),
|
|
159
|
+
"genes": "; ".join(genes),
|
|
160
|
+
"drugs": "; ".join(drugs),
|
|
161
|
+
"phenotype_category": item.get(
|
|
162
|
+
"phenotypeCategory", ""),
|
|
163
|
+
"sentences": (item.get("textHtml") or "")[:300],
|
|
164
|
+
})
|
|
165
|
+
|
|
166
|
+
df = pd.DataFrame(results)
|
|
167
|
+
if not df.empty:
|
|
168
|
+
df = df.sort_values("level")
|
|
169
|
+
|
|
170
|
+
print(f"PharmGKB annotations: {len(df)} "
|
|
171
|
+
f"({search_type}={gene_or_drug})")
|
|
172
|
+
return df
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## 3. 投与量ガイドライン取得
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
def pharmgkb_dosing_guidelines(drug_name=None, gene=None):
|
|
179
|
+
"""
|
|
180
|
+
PharmGKB — 投与量ガイドライン (CPIC/DPWG) 検索。
|
|
181
|
+
|
|
182
|
+
Parameters:
|
|
183
|
+
drug_name: str — 薬物名
|
|
184
|
+
gene: str — 遺伝子シンボル
|
|
185
|
+
"""
|
|
186
|
+
url = f"{PGKB_BASE}/guideline"
|
|
187
|
+
params = {"view": "max"}
|
|
188
|
+
|
|
189
|
+
if drug_name:
|
|
190
|
+
params["relatedChemicals.name"] = drug_name
|
|
191
|
+
if gene:
|
|
192
|
+
params["relatedGenes.symbol"] = gene
|
|
193
|
+
|
|
194
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
195
|
+
resp.raise_for_status()
|
|
196
|
+
data = resp.json()
|
|
197
|
+
|
|
198
|
+
results = []
|
|
199
|
+
for item in data.get("data", []):
|
|
200
|
+
genes = [g.get("symbol", "")
|
|
201
|
+
for g in item.get("relatedGenes", [])]
|
|
202
|
+
drugs = [c.get("name", "")
|
|
203
|
+
for c in item.get("relatedChemicals", [])]
|
|
204
|
+
results.append({
|
|
205
|
+
"guideline_id": item.get("id", ""),
|
|
206
|
+
"name": item.get("name", ""),
|
|
207
|
+
"source": item.get("source", ""),
|
|
208
|
+
"genes": "; ".join(genes),
|
|
209
|
+
"drugs": "; ".join(drugs),
|
|
210
|
+
"recommendation": (item.get("textHtml") or "")[:500],
|
|
211
|
+
})
|
|
212
|
+
|
|
213
|
+
df = pd.DataFrame(results)
|
|
214
|
+
print(f"PharmGKB guidelines: {len(df)} "
|
|
215
|
+
f"(drug={drug_name}, gene={gene})")
|
|
216
|
+
return df
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## 4. PharmGKB 統合パイプライン
|
|
220
|
+
|
|
221
|
+
```python
|
|
222
|
+
def pharmgkb_pipeline(drug_name, genes=None,
|
|
223
|
+
output_dir="results"):
|
|
224
|
+
"""
|
|
225
|
+
PharmGKB 統合パイプライン。
|
|
226
|
+
|
|
227
|
+
Parameters:
|
|
228
|
+
drug_name: str — 薬物名
|
|
229
|
+
genes: list[str] — 関連遺伝子リスト
|
|
230
|
+
output_dir: str — 出力ディレクトリ
|
|
231
|
+
"""
|
|
232
|
+
from pathlib import Path
|
|
233
|
+
output_dir = Path(output_dir)
|
|
234
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
235
|
+
|
|
236
|
+
# 1) 薬物検索
|
|
237
|
+
drugs = pharmgkb_search_drugs(drug_name)
|
|
238
|
+
drugs.to_csv(output_dir / "drugs.csv", index=False)
|
|
239
|
+
|
|
240
|
+
# 2) 薬物の臨床アノテーション
|
|
241
|
+
annotations = pharmgkb_clinical_annotations(
|
|
242
|
+
drug_name, search_type="drug")
|
|
243
|
+
annotations.to_csv(output_dir / "annotations.csv",
|
|
244
|
+
index=False)
|
|
245
|
+
|
|
246
|
+
# 3) 投与量ガイドライン
|
|
247
|
+
guidelines = pharmgkb_dosing_guidelines(
|
|
248
|
+
drug_name=drug_name)
|
|
249
|
+
guidelines.to_csv(output_dir / "guidelines.csv",
|
|
250
|
+
index=False)
|
|
251
|
+
|
|
252
|
+
# 4) 関連遺伝子解析
|
|
253
|
+
if genes:
|
|
254
|
+
gene_results = []
|
|
255
|
+
for g in genes:
|
|
256
|
+
try:
|
|
257
|
+
g_ann = pharmgkb_clinical_annotations(
|
|
258
|
+
g, search_type="gene")
|
|
259
|
+
g_ann["query_gene"] = g
|
|
260
|
+
gene_results.append(g_ann)
|
|
261
|
+
except Exception:
|
|
262
|
+
continue
|
|
263
|
+
if gene_results:
|
|
264
|
+
gene_df = pd.concat(gene_results,
|
|
265
|
+
ignore_index=True)
|
|
266
|
+
gene_df.to_csv(
|
|
267
|
+
output_dir / "gene_annotations.csv",
|
|
268
|
+
index=False)
|
|
269
|
+
|
|
270
|
+
print(f"PharmGKB pipeline: {output_dir}")
|
|
271
|
+
return {
|
|
272
|
+
"drugs": drugs,
|
|
273
|
+
"annotations": annotations,
|
|
274
|
+
"guidelines": guidelines,
|
|
275
|
+
}
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
---
|
|
279
|
+
|
|
280
|
+
## ToolUniverse 連携
|
|
281
|
+
|
|
282
|
+
| TU Key | ツール名 | 連携内容 |
|
|
283
|
+
|--------|---------|---------|
|
|
284
|
+
| `pharmgkb` | PharmGKB | 臨床アノテーション・薬物遺伝子・PGx ガイドライン |
|
|
285
|
+
|
|
286
|
+
## パイプライン統合
|
|
287
|
+
|
|
288
|
+
```
|
|
289
|
+
pharmacogenomics → pharmgkb-pgx → clinical-decision-support
|
|
290
|
+
(PGx 解析全般) (PharmGKB API) (臨床意思決定)
|
|
291
|
+
│ │ ↓
|
|
292
|
+
drug-discovery ──────┘ precision-oncology
|
|
293
|
+
(薬物開発) │ (精密腫瘍学)
|
|
294
|
+
↓
|
|
295
|
+
variant-interpretation
|
|
296
|
+
(変異臨床解釈)
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
## パイプライン出力
|
|
300
|
+
|
|
301
|
+
| ファイル | 説明 | 次スキル |
|
|
302
|
+
|---------|------|---------|
|
|
303
|
+
| `results/drugs.csv` | 薬物情報 | → drug-discovery |
|
|
304
|
+
| `results/annotations.csv` | 臨床アノテーション | → variant-interpretation |
|
|
305
|
+
| `results/guidelines.csv` | 投与量ガイドライン | → clinical-decision-support |
|
|
306
|
+
| `results/gene_annotations.csv` | 遺伝子別アノテーション | → pharmacogenomics |
|
|
@@ -6,6 +6,10 @@ description: |
|
|
|
6
6
|
ネットワーク構築・解析パイプライン。GO/KEGG 富化、相互作用パートナー発見、
|
|
7
7
|
組織特異的ネットワーク (HumanBase)、化合物-標的ネットワーク対応。
|
|
8
8
|
14 の ToolUniverse SMCP ツールと連携。
|
|
9
|
+
tu_tools:
|
|
10
|
+
- key: intact
|
|
11
|
+
name: IntAct
|
|
12
|
+
description: 分子相互作用データベース (EBI)
|
|
9
13
|
---
|
|
10
14
|
|
|
11
15
|
# Scientific Protein Interaction Network
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-rcsb-pdb-search
|
|
3
|
+
description: |
|
|
4
|
+
RCSB PDB 構造検索スキル。RCSB PDB Search API および
|
|
5
|
+
Data API によるタンパク質立体構造検索・メタデータ取得・
|
|
6
|
+
リガンド情報・解像度フィルタリング。ToolUniverse 連携:
|
|
7
|
+
rcsb_pdb, rcsb_search。
|
|
8
|
+
tu_tools:
|
|
9
|
+
- key: rcsb_pdb
|
|
10
|
+
name: RCSB PDB Data
|
|
11
|
+
description: PDB エントリデータ取得・構造メタデータ
|
|
12
|
+
- key: rcsb_search
|
|
13
|
+
name: RCSB PDB Search
|
|
14
|
+
description: PDB 構造検索・テキスト/配列/構造類似検索
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
# Scientific RCSB PDB Search
|
|
18
|
+
|
|
19
|
+
RCSB PDB Search API および Data API を活用したタンパク質立体構造
|
|
20
|
+
検索・メタデータ取得・リガンド情報パイプラインを提供する。
|
|
21
|
+
|
|
22
|
+
## When to Use
|
|
23
|
+
|
|
24
|
+
- PDB のタンパク質立体構造をテキスト検索するとき
|
|
25
|
+
- 解像度・実験手法でフィルタリングするとき
|
|
26
|
+
- リガンド結合構造を検索するとき
|
|
27
|
+
- 構造のメタデータ (著者・引用・解像度) を取得するとき
|
|
28
|
+
- 配列類似性で構造を検索するとき
|
|
29
|
+
- PDB エントリからリガンド・結合サイト情報を取得するとき
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Quick Start
|
|
34
|
+
|
|
35
|
+
## 1. テキスト検索・構造メタデータ
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
import requests
|
|
39
|
+
import pandas as pd
|
|
40
|
+
|
|
41
|
+
RCSB_SEARCH = "https://search.rcsb.org/rcsbsearch/v2/query"
|
|
42
|
+
RCSB_DATA = "https://data.rcsb.org/rest/v1/core"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def rcsb_text_search(query, method=None,
|
|
46
|
+
resolution_max=None, limit=50):
|
|
47
|
+
"""
|
|
48
|
+
RCSB PDB — テキスト検索。
|
|
49
|
+
|
|
50
|
+
Parameters:
|
|
51
|
+
query: str — 検索クエリ (例: "BRCA1", "kinase")
|
|
52
|
+
method: str — 実験手法フィルタ
|
|
53
|
+
(例: "X-RAY DIFFRACTION", "ELECTRON MICROSCOPY")
|
|
54
|
+
resolution_max: float — 最大解像度 (Å)
|
|
55
|
+
limit: int — 最大結果数
|
|
56
|
+
"""
|
|
57
|
+
search_query = {
|
|
58
|
+
"query": {
|
|
59
|
+
"type": "group",
|
|
60
|
+
"logical_operator": "and",
|
|
61
|
+
"nodes": [
|
|
62
|
+
{
|
|
63
|
+
"type": "terminal",
|
|
64
|
+
"service": "full_text",
|
|
65
|
+
"parameters": {"value": query},
|
|
66
|
+
}
|
|
67
|
+
],
|
|
68
|
+
},
|
|
69
|
+
"return_type": "entry",
|
|
70
|
+
"request_options": {
|
|
71
|
+
"paginate": {"start": 0, "rows": limit},
|
|
72
|
+
"sort": [{"sort_by": "score", "direction": "desc"}],
|
|
73
|
+
},
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if method:
|
|
77
|
+
search_query["query"]["nodes"].append({
|
|
78
|
+
"type": "terminal",
|
|
79
|
+
"service": "text",
|
|
80
|
+
"parameters": {
|
|
81
|
+
"attribute": "exptl.method",
|
|
82
|
+
"operator": "exact_match",
|
|
83
|
+
"value": method,
|
|
84
|
+
},
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
if resolution_max:
|
|
88
|
+
search_query["query"]["nodes"].append({
|
|
89
|
+
"type": "terminal",
|
|
90
|
+
"service": "text",
|
|
91
|
+
"parameters": {
|
|
92
|
+
"attribute": "rcsb_entry_info."
|
|
93
|
+
"resolution_combined",
|
|
94
|
+
"operator": "less_or_equal",
|
|
95
|
+
"value": resolution_max,
|
|
96
|
+
},
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
resp = requests.post(RCSB_SEARCH, json=search_query,
|
|
100
|
+
timeout=30)
|
|
101
|
+
resp.raise_for_status()
|
|
102
|
+
data = resp.json()
|
|
103
|
+
|
|
104
|
+
pdb_ids = [r["identifier"]
|
|
105
|
+
for r in data.get("result_set", [])]
|
|
106
|
+
total = data.get("total_count", 0)
|
|
107
|
+
print(f"RCSB PDB search: {len(pdb_ids)}/{total} "
|
|
108
|
+
f"(query='{query}')")
|
|
109
|
+
return pdb_ids
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def rcsb_get_entry(pdb_id):
|
|
113
|
+
"""
|
|
114
|
+
RCSB PDB — エントリメタデータ取得。
|
|
115
|
+
|
|
116
|
+
Parameters:
|
|
117
|
+
pdb_id: str — PDB ID (例: "1BRS", "7S4O")
|
|
118
|
+
"""
|
|
119
|
+
url = f"{RCSB_DATA}/entry/{pdb_id}"
|
|
120
|
+
resp = requests.get(url, timeout=30)
|
|
121
|
+
resp.raise_for_status()
|
|
122
|
+
data = resp.json()
|
|
123
|
+
|
|
124
|
+
info = data.get("rcsb_entry_info", {})
|
|
125
|
+
citation = (data.get("rcsb_primary_citation") or {})
|
|
126
|
+
|
|
127
|
+
result = {
|
|
128
|
+
"pdb_id": pdb_id,
|
|
129
|
+
"title": data.get("struct", {}).get("title", ""),
|
|
130
|
+
"method": info.get("experimental_method", ""),
|
|
131
|
+
"resolution": info.get("resolution_combined", [None])[0],
|
|
132
|
+
"deposition_date": info.get("deposition_date", ""),
|
|
133
|
+
"polymer_count": info.get(
|
|
134
|
+
"deposited_polymer_entity_count", 0),
|
|
135
|
+
"nonpolymer_count": info.get(
|
|
136
|
+
"deposited_nonpolymer_entity_count", 0),
|
|
137
|
+
"citation_title": citation.get("title", ""),
|
|
138
|
+
"citation_doi": citation.get(
|
|
139
|
+
"pdbx_database_id_doi", ""),
|
|
140
|
+
}
|
|
141
|
+
return result
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## 2. 構造バッチ取得・比較
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
def rcsb_batch_metadata(pdb_ids):
|
|
148
|
+
"""
|
|
149
|
+
RCSB PDB — バッチメタデータ取得。
|
|
150
|
+
|
|
151
|
+
Parameters:
|
|
152
|
+
pdb_ids: list[str] — PDB ID リスト
|
|
153
|
+
"""
|
|
154
|
+
results = []
|
|
155
|
+
for pid in pdb_ids:
|
|
156
|
+
try:
|
|
157
|
+
meta = rcsb_get_entry(pid)
|
|
158
|
+
results.append(meta)
|
|
159
|
+
except Exception as e:
|
|
160
|
+
print(f" Warning: {pid} — {e}")
|
|
161
|
+
continue
|
|
162
|
+
|
|
163
|
+
df = pd.DataFrame(results)
|
|
164
|
+
if not df.empty:
|
|
165
|
+
df = df.sort_values("resolution")
|
|
166
|
+
print(f"RCSB batch: {len(df)} entries")
|
|
167
|
+
return df
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## 3. リガンド・結合サイト情報
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
def rcsb_get_ligands(pdb_id):
|
|
174
|
+
"""
|
|
175
|
+
RCSB PDB — リガンド情報取得。
|
|
176
|
+
|
|
177
|
+
Parameters:
|
|
178
|
+
pdb_id: str — PDB ID
|
|
179
|
+
"""
|
|
180
|
+
url = (f"https://data.rcsb.org/rest/v1/core/"
|
|
181
|
+
f"nonpolymer_entity/{pdb_id}")
|
|
182
|
+
# まずエントリのnonpolymerエンティティを取得
|
|
183
|
+
entry = rcsb_get_entry(pdb_id)
|
|
184
|
+
n_ligands = entry.get("nonpolymer_count", 0)
|
|
185
|
+
|
|
186
|
+
ligands = []
|
|
187
|
+
for i in range(1, n_ligands + 1):
|
|
188
|
+
try:
|
|
189
|
+
lig_url = (f"https://data.rcsb.org/rest/v1/core/"
|
|
190
|
+
f"nonpolymer_entity/{pdb_id}/{i}")
|
|
191
|
+
r = requests.get(lig_url, timeout=15)
|
|
192
|
+
if r.status_code == 200:
|
|
193
|
+
ld = r.json()
|
|
194
|
+
comp_id = ld.get(
|
|
195
|
+
"pdbx_entity_nonpoly", {}).get(
|
|
196
|
+
"comp_id", "")
|
|
197
|
+
ligands.append({
|
|
198
|
+
"pdb_id": pdb_id,
|
|
199
|
+
"entity_id": i,
|
|
200
|
+
"comp_id": comp_id,
|
|
201
|
+
"name": ld.get(
|
|
202
|
+
"rcsb_nonpolymer_entity", {}).get(
|
|
203
|
+
"pdbx_description", ""),
|
|
204
|
+
"formula": ld.get(
|
|
205
|
+
"rcsb_nonpolymer_entity", {}).get(
|
|
206
|
+
"formula_weight", ""),
|
|
207
|
+
})
|
|
208
|
+
except Exception:
|
|
209
|
+
continue
|
|
210
|
+
|
|
211
|
+
df = pd.DataFrame(ligands)
|
|
212
|
+
print(f"RCSB ligands: {pdb_id} → {len(df)} ligands")
|
|
213
|
+
return df
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
## 4. RCSB PDB 統合パイプライン
|
|
217
|
+
|
|
218
|
+
```python
|
|
219
|
+
def rcsb_pipeline(query, resolution_max=3.0,
|
|
220
|
+
output_dir="results"):
|
|
221
|
+
"""
|
|
222
|
+
RCSB PDB 統合パイプライン。
|
|
223
|
+
|
|
224
|
+
Parameters:
|
|
225
|
+
query: str — 検索クエリ
|
|
226
|
+
resolution_max: float — 最大解像度
|
|
227
|
+
output_dir: str — 出力ディレクトリ
|
|
228
|
+
"""
|
|
229
|
+
from pathlib import Path
|
|
230
|
+
output_dir = Path(output_dir)
|
|
231
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
232
|
+
|
|
233
|
+
# 1) テキスト検索
|
|
234
|
+
pdb_ids = rcsb_text_search(
|
|
235
|
+
query, resolution_max=resolution_max)
|
|
236
|
+
|
|
237
|
+
# 2) バッチメタデータ
|
|
238
|
+
metadata = rcsb_batch_metadata(pdb_ids[:20])
|
|
239
|
+
metadata.to_csv(output_dir / "pdb_entries.csv",
|
|
240
|
+
index=False)
|
|
241
|
+
|
|
242
|
+
# 3) トップ構造のリガンド
|
|
243
|
+
if not metadata.empty:
|
|
244
|
+
top = metadata.iloc[0]["pdb_id"]
|
|
245
|
+
ligands = rcsb_get_ligands(top)
|
|
246
|
+
ligands.to_csv(output_dir / "ligands.csv",
|
|
247
|
+
index=False)
|
|
248
|
+
|
|
249
|
+
print(f"RCSB pipeline: {output_dir}")
|
|
250
|
+
return {"entries": metadata}
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
---
|
|
254
|
+
|
|
255
|
+
## ToolUniverse 連携
|
|
256
|
+
|
|
257
|
+
| TU Key | ツール名 | 連携内容 |
|
|
258
|
+
|--------|---------|---------|
|
|
259
|
+
| `rcsb_pdb` | RCSB PDB Data | エントリデータ・構造メタデータ |
|
|
260
|
+
| `rcsb_search` | RCSB PDB Search | テキスト/配列/構造類似検索 |
|
|
261
|
+
|
|
262
|
+
## パイプライン統合
|
|
263
|
+
|
|
264
|
+
```
|
|
265
|
+
protein-structure-analysis → rcsb-pdb-search → molecular-docking
|
|
266
|
+
(PDB/AlphaFold 構造) (RCSB Search API) (Vina/DiffDock)
|
|
267
|
+
│ │ ↓
|
|
268
|
+
uniprot-proteome ──────────────┘ drug-target-profiling
|
|
269
|
+
(UniProt配列) │ (標的プロファイリング)
|
|
270
|
+
↓
|
|
271
|
+
alphafold-structures
|
|
272
|
+
(AlphaFold DB)
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
## パイプライン出力
|
|
276
|
+
|
|
277
|
+
| ファイル | 説明 | 次スキル |
|
|
278
|
+
|---------|------|---------|
|
|
279
|
+
| `results/pdb_entries.csv` | 構造メタデータ | → protein-structure-analysis |
|
|
280
|
+
| `results/ligands.csv` | リガンド情報 | → molecular-docking |
|