@nahisaho/satori 0.17.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -30
- package/package.json +1 -1
- package/src/.github/skills/scientific-alphafold-structures/SKILL.md +256 -0
- package/src/.github/skills/scientific-arrayexpress-expression/SKILL.md +264 -0
- package/src/.github/skills/scientific-crossref-metadata/SKILL.md +313 -0
- package/src/.github/skills/scientific-gtex-tissue-expression/SKILL.md +271 -0
- package/src/.github/skills/scientific-gwas-catalog/SKILL.md +267 -0
- package/src/.github/skills/scientific-icgc-cancer-data/SKILL.md +351 -0
- package/src/.github/skills/scientific-pharmgkb-pgx/SKILL.md +306 -0
- package/src/.github/skills/scientific-semantic-scholar/SKILL.md +298 -0
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-icgc-cancer-data
|
|
3
|
+
description: |
|
|
4
|
+
ICGC がんゲノムデータスキル。ICGC ARGO DCC API および
|
|
5
|
+
レガシー API による国際がんゲノムデータ検索・ドナー/
|
|
6
|
+
検体/変異解析。直接 API (ToolUniverse 非連携)。
|
|
7
|
+
tu_tools: []
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Scientific ICGC Cancer Data
|
|
11
|
+
|
|
12
|
+
ICGC (International Cancer Genome Consortium) ARGO DCC API を
|
|
13
|
+
活用した国際がんゲノムデータ検索・変異統計・がん種横断解析
|
|
14
|
+
パイプラインを提供する。
|
|
15
|
+
|
|
16
|
+
## When to Use
|
|
17
|
+
|
|
18
|
+
- 国際がんゲノムプロジェクトのデータを検索するとき
|
|
19
|
+
- がん種ごとの体細胞変異プロファイルを調べるとき
|
|
20
|
+
- ドナー・検体・変異の統計情報を取得するとき
|
|
21
|
+
- がんゲノムの変異シグネチャを分析するとき
|
|
22
|
+
- PCAWG (Pan-Cancer Analysis of Whole Genomes) データを活用するとき
|
|
23
|
+
- がん遺伝子変異の国際比較データが必要なとき
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
## 1. ICGC プロジェクト・ドナー検索
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import requests
|
|
33
|
+
import pandas as pd
|
|
34
|
+
|
|
35
|
+
ICGC_BASE = "https://dcc.icgc.org/api/v1"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def icgc_search_projects(query=None, limit=50):
|
|
39
|
+
"""
|
|
40
|
+
ICGC — がんゲノムプロジェクト検索。
|
|
41
|
+
|
|
42
|
+
Parameters:
|
|
43
|
+
query: str — 検索キーワード (例: "lung", "BRCA")
|
|
44
|
+
limit: int — 最大結果数
|
|
45
|
+
"""
|
|
46
|
+
url = f"{ICGC_BASE}/projects"
|
|
47
|
+
params = {"size": limit, "from": 1}
|
|
48
|
+
if query:
|
|
49
|
+
params["filters"] = (
|
|
50
|
+
f'{{"project":{{"primarySite":'
|
|
51
|
+
f'{{"is":["{query}"]}}}}}}'
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
55
|
+
resp.raise_for_status()
|
|
56
|
+
data = resp.json()
|
|
57
|
+
|
|
58
|
+
results = []
|
|
59
|
+
for hit in data.get("hits", []):
|
|
60
|
+
results.append({
|
|
61
|
+
"project_id": hit.get("id", ""),
|
|
62
|
+
"project_name": hit.get("name", ""),
|
|
63
|
+
"primary_site": hit.get("primarySite", ""),
|
|
64
|
+
"tumour_type": hit.get("tumourType", ""),
|
|
65
|
+
"tumour_subtype": hit.get("tumourSubtype", ""),
|
|
66
|
+
"primary_country": "; ".join(
|
|
67
|
+
hit.get("primaryCountries", [])),
|
|
68
|
+
"total_donors": hit.get("totalDonorCount", 0),
|
|
69
|
+
"ssm_count": hit.get("ssmCount", 0),
|
|
70
|
+
})
|
|
71
|
+
|
|
72
|
+
df = pd.DataFrame(results)
|
|
73
|
+
if not df.empty:
|
|
74
|
+
df = df.sort_values("total_donors", ascending=False)
|
|
75
|
+
|
|
76
|
+
total = data.get("pagination", {}).get("total", 0)
|
|
77
|
+
print(f"ICGC projects: {len(df)}/{total} "
|
|
78
|
+
f"(query='{query}')")
|
|
79
|
+
return df
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def icgc_search_donors(project_id, limit=100):
|
|
83
|
+
"""
|
|
84
|
+
ICGC — プロジェクト内ドナー検索。
|
|
85
|
+
|
|
86
|
+
Parameters:
|
|
87
|
+
project_id: str — プロジェクト ID (例: "BRCA-US")
|
|
88
|
+
limit: int — 最大結果数
|
|
89
|
+
"""
|
|
90
|
+
url = f"{ICGC_BASE}/donors"
|
|
91
|
+
params = {
|
|
92
|
+
"size": limit,
|
|
93
|
+
"filters": (
|
|
94
|
+
f'{{"donor":{{"projectId":'
|
|
95
|
+
f'{{"is":["{project_id}"]}}}}}}'
|
|
96
|
+
),
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
100
|
+
resp.raise_for_status()
|
|
101
|
+
data = resp.json()
|
|
102
|
+
|
|
103
|
+
results = []
|
|
104
|
+
for hit in data.get("hits", []):
|
|
105
|
+
results.append({
|
|
106
|
+
"donor_id": hit.get("id", ""),
|
|
107
|
+
"project_id": project_id,
|
|
108
|
+
"primary_site": hit.get("primarySite", ""),
|
|
109
|
+
"gender": hit.get("gender", ""),
|
|
110
|
+
"vital_status": hit.get("vitalStatus", ""),
|
|
111
|
+
"age_at_diagnosis": hit.get("ageAtDiagnosis"),
|
|
112
|
+
"disease_status": hit.get(
|
|
113
|
+
"diseaseStatusLastFollowup", ""),
|
|
114
|
+
"ssm_count": hit.get("ssmCount", 0),
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
df = pd.DataFrame(results)
|
|
118
|
+
total = data.get("pagination", {}).get("total", 0)
|
|
119
|
+
print(f"ICGC donors: {len(df)}/{total} "
|
|
120
|
+
f"(project={project_id})")
|
|
121
|
+
return df
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## 2. 体細胞変異 (SSM) 検索
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
def icgc_search_mutations(gene_symbol=None,
|
|
128
|
+
project_id=None,
|
|
129
|
+
consequence_type=None,
|
|
130
|
+
limit=100):
|
|
131
|
+
"""
|
|
132
|
+
ICGC — 体細胞変異 (Simple Somatic Mutation) 検索。
|
|
133
|
+
|
|
134
|
+
Parameters:
|
|
135
|
+
gene_symbol: str — 遺伝子シンボル (例: "TP53")
|
|
136
|
+
project_id: str — プロジェクト ID
|
|
137
|
+
consequence_type: str — 変異タイプ
|
|
138
|
+
(例: "missense_variant")
|
|
139
|
+
limit: int — 最大結果数
|
|
140
|
+
"""
|
|
141
|
+
url = f"{ICGC_BASE}/mutations"
|
|
142
|
+
filters = {}
|
|
143
|
+
|
|
144
|
+
if gene_symbol:
|
|
145
|
+
filters["gene"] = {"symbol": {"is": [gene_symbol]}}
|
|
146
|
+
if project_id:
|
|
147
|
+
filters["donor"] = {"projectId": {"is": [project_id]}}
|
|
148
|
+
if consequence_type:
|
|
149
|
+
filters["mutation"] = {
|
|
150
|
+
"consequenceType": {"is": [consequence_type]}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
import json
|
|
154
|
+
params = {
|
|
155
|
+
"size": limit,
|
|
156
|
+
"filters": json.dumps(filters) if filters else "{}",
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
160
|
+
resp.raise_for_status()
|
|
161
|
+
data = resp.json()
|
|
162
|
+
|
|
163
|
+
results = []
|
|
164
|
+
for hit in data.get("hits", []):
|
|
165
|
+
# 主要な consequence 取得
|
|
166
|
+
consequences = hit.get("consequences", [])
|
|
167
|
+
top_cons = consequences[0] if consequences else {}
|
|
168
|
+
|
|
169
|
+
results.append({
|
|
170
|
+
"mutation_id": hit.get("id", ""),
|
|
171
|
+
"chromosome": hit.get("chromosome", ""),
|
|
172
|
+
"start": hit.get("start"),
|
|
173
|
+
"end": hit.get("end"),
|
|
174
|
+
"mutation": hit.get("mutation", ""),
|
|
175
|
+
"type": hit.get("type", ""),
|
|
176
|
+
"gene_symbol": top_cons.get("geneSymbol", ""),
|
|
177
|
+
"consequence_type": top_cons.get("type", ""),
|
|
178
|
+
"aa_mutation": top_cons.get("aaMutation", ""),
|
|
179
|
+
"affected_donors": hit.get(
|
|
180
|
+
"affectedDonorCountTotal", 0),
|
|
181
|
+
"affected_projects": hit.get(
|
|
182
|
+
"affectedProjectCount", 0),
|
|
183
|
+
"functional_impact": hit.get(
|
|
184
|
+
"functionalImpact", ""),
|
|
185
|
+
})
|
|
186
|
+
|
|
187
|
+
df = pd.DataFrame(results)
|
|
188
|
+
if not df.empty:
|
|
189
|
+
df = df.sort_values("affected_donors",
|
|
190
|
+
ascending=False)
|
|
191
|
+
|
|
192
|
+
total = data.get("pagination", {}).get("total", 0)
|
|
193
|
+
print(f"ICGC mutations: {len(df)}/{total} "
|
|
194
|
+
f"(gene={gene_symbol}, project={project_id})")
|
|
195
|
+
return df
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## 3. がん種統計・変異サマリー
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
def icgc_cancer_stats(project_id=None):
|
|
202
|
+
"""
|
|
203
|
+
ICGC — がん種統計サマリー。
|
|
204
|
+
|
|
205
|
+
Parameters:
|
|
206
|
+
project_id: str — プロジェクト ID (None で全体統計)
|
|
207
|
+
"""
|
|
208
|
+
if project_id:
|
|
209
|
+
url = f"{ICGC_BASE}/projects/{project_id}"
|
|
210
|
+
resp = requests.get(url, timeout=30)
|
|
211
|
+
resp.raise_for_status()
|
|
212
|
+
data = resp.json()
|
|
213
|
+
|
|
214
|
+
stats = {
|
|
215
|
+
"project_id": project_id,
|
|
216
|
+
"project_name": data.get("name", ""),
|
|
217
|
+
"primary_site": data.get("primarySite", ""),
|
|
218
|
+
"total_donors": data.get("totalDonorCount", 0),
|
|
219
|
+
"total_specimens": data.get(
|
|
220
|
+
"totalSpecimenCount", 0),
|
|
221
|
+
"ssm_count": data.get("ssmCount", 0),
|
|
222
|
+
"repository": "; ".join(
|
|
223
|
+
data.get("repository", [])),
|
|
224
|
+
}
|
|
225
|
+
print(f"ICGC stats: {project_id} — "
|
|
226
|
+
f"{stats['total_donors']} donors, "
|
|
227
|
+
f"{stats['ssm_count']} mutations")
|
|
228
|
+
return stats
|
|
229
|
+
else:
|
|
230
|
+
# 全プロジェクト概要
|
|
231
|
+
projects = icgc_search_projects(limit=200)
|
|
232
|
+
summary = {
|
|
233
|
+
"total_projects": len(projects),
|
|
234
|
+
"total_donors": projects[
|
|
235
|
+
"total_donors"].sum(),
|
|
236
|
+
"total_ssm": projects["ssm_count"].sum(),
|
|
237
|
+
"top_sites": projects.groupby(
|
|
238
|
+
"primary_site")["total_donors"].sum(
|
|
239
|
+
).sort_values(ascending=False).head(10
|
|
240
|
+
).to_dict(),
|
|
241
|
+
}
|
|
242
|
+
print(f"ICGC summary: {summary['total_projects']} "
|
|
243
|
+
f"projects, {summary['total_donors']} donors")
|
|
244
|
+
return summary
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def icgc_gene_mutation_frequency(gene_symbol, top_n=20):
|
|
248
|
+
"""
|
|
249
|
+
ICGC — 遺伝子別がん種変異頻度。
|
|
250
|
+
|
|
251
|
+
Parameters:
|
|
252
|
+
gene_symbol: str — 遺伝子シンボル
|
|
253
|
+
top_n: int — 上位がん種数
|
|
254
|
+
"""
|
|
255
|
+
mutations = icgc_search_mutations(
|
|
256
|
+
gene_symbol=gene_symbol, limit=500)
|
|
257
|
+
|
|
258
|
+
if mutations.empty:
|
|
259
|
+
return pd.DataFrame()
|
|
260
|
+
|
|
261
|
+
# プロジェクト別集計
|
|
262
|
+
freq = mutations.groupby("gene_symbol").agg(
|
|
263
|
+
total_mutations=("mutation_id", "count"),
|
|
264
|
+
total_affected_donors=("affected_donors", "sum"),
|
|
265
|
+
mutation_types=("consequence_type",
|
|
266
|
+
lambda x: "; ".join(x.unique()[:5])),
|
|
267
|
+
).reset_index()
|
|
268
|
+
|
|
269
|
+
print(f"ICGC gene frequency: {gene_symbol} — "
|
|
270
|
+
f"{len(freq)} entries")
|
|
271
|
+
return freq
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
## 4. ICGC 統合パイプライン
|
|
275
|
+
|
|
276
|
+
```python
|
|
277
|
+
def icgc_pipeline(gene_symbols, cancer_site=None,
|
|
278
|
+
output_dir="results"):
|
|
279
|
+
"""
|
|
280
|
+
ICGC 統合パイプライン。
|
|
281
|
+
|
|
282
|
+
Parameters:
|
|
283
|
+
gene_symbols: list[str] — 遺伝子リスト
|
|
284
|
+
cancer_site: str — がん部位フィルタ
|
|
285
|
+
output_dir: str — 出力ディレクトリ
|
|
286
|
+
"""
|
|
287
|
+
from pathlib import Path
|
|
288
|
+
output_dir = Path(output_dir)
|
|
289
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
290
|
+
|
|
291
|
+
# 1) プロジェクト検索
|
|
292
|
+
projects = icgc_search_projects(query=cancer_site)
|
|
293
|
+
projects.to_csv(output_dir / "projects.csv", index=False)
|
|
294
|
+
|
|
295
|
+
# 2) 遺伝子別変異検索
|
|
296
|
+
all_mutations = []
|
|
297
|
+
for gene in gene_symbols:
|
|
298
|
+
try:
|
|
299
|
+
muts = icgc_search_mutations(
|
|
300
|
+
gene_symbol=gene, limit=200)
|
|
301
|
+
muts["query_gene"] = gene
|
|
302
|
+
all_mutations.append(muts)
|
|
303
|
+
except Exception as e:
|
|
304
|
+
print(f" Warning: {gene} — {e}")
|
|
305
|
+
continue
|
|
306
|
+
|
|
307
|
+
if all_mutations:
|
|
308
|
+
combined = pd.concat(all_mutations,
|
|
309
|
+
ignore_index=True)
|
|
310
|
+
combined.to_csv(output_dir / "mutations.csv",
|
|
311
|
+
index=False)
|
|
312
|
+
|
|
313
|
+
# 3) がん種統計
|
|
314
|
+
if not projects.empty:
|
|
315
|
+
top_project = projects.iloc[0]["project_id"]
|
|
316
|
+
stats = icgc_cancer_stats(project_id=top_project)
|
|
317
|
+
pd.DataFrame([stats]).to_csv(
|
|
318
|
+
output_dir / "cancer_stats.csv", index=False)
|
|
319
|
+
|
|
320
|
+
print(f"ICGC pipeline: {output_dir}")
|
|
321
|
+
return {"projects": projects}
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
---
|
|
325
|
+
|
|
326
|
+
## ToolUniverse 連携
|
|
327
|
+
|
|
328
|
+
| TU Key | ツール名 | 連携内容 |
|
|
329
|
+
|--------|---------|---------|
|
|
330
|
+
| (direct) | ICGC DCC API | 直接 REST API — TU 非連携 |
|
|
331
|
+
|
|
332
|
+
## パイプライン統合
|
|
333
|
+
|
|
334
|
+
```
|
|
335
|
+
cancer-genomics → icgc-cancer-data → precision-oncology
|
|
336
|
+
(がんゲノム全般) (ICGC DCC API) (精密腫瘍学)
|
|
337
|
+
│ │ ↓
|
|
338
|
+
tcga-data ────────────┘ clinical-decision-support
|
|
339
|
+
(TCGA データ) │ (臨床意思決定)
|
|
340
|
+
↓
|
|
341
|
+
variant-interpretation
|
|
342
|
+
(変異臨床解釈)
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
## パイプライン出力
|
|
346
|
+
|
|
347
|
+
| ファイル | 説明 | 次スキル |
|
|
348
|
+
|---------|------|---------|
|
|
349
|
+
| `results/projects.csv` | プロジェクト一覧 | → cancer-genomics |
|
|
350
|
+
| `results/mutations.csv` | 体細胞変異 | → variant-interpretation |
|
|
351
|
+
| `results/cancer_stats.csv` | がん種統計 | → precision-oncology |
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-pharmgkb-pgx
|
|
3
|
+
description: |
|
|
4
|
+
PharmGKB 薬理ゲノミクススキル。PharmGKB REST API による
|
|
5
|
+
臨床アノテーション・薬物遺伝子関連・投与量ガイドライン・
|
|
6
|
+
スターアレル解析。ToolUniverse 連携: pharmgkb。
|
|
7
|
+
tu_tools:
|
|
8
|
+
- key: pharmgkb
|
|
9
|
+
name: PharmGKB
|
|
10
|
+
description: 臨床アノテーション・薬物遺伝子関連・PGx ガイドライン
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Scientific PharmGKB PGx
|
|
14
|
+
|
|
15
|
+
PharmGKB (Pharmacogenomics Knowledgebase) REST API を活用した
|
|
16
|
+
薬理ゲノミクス臨床アノテーション・薬物遺伝子相互作用・投与量
|
|
17
|
+
ガイドライン検索パイプラインを提供する。
|
|
18
|
+
|
|
19
|
+
## When to Use
|
|
20
|
+
|
|
21
|
+
- 薬物と遺伝子変異の関連を調べるとき
|
|
22
|
+
- 臨床アノテーション (エビデンスレベル付き) を検索するとき
|
|
23
|
+
- 投与量調整ガイドライン (CPIC/DPWG) を取得するとき
|
|
24
|
+
- スターアレルと表現型の対応を確認するとき
|
|
25
|
+
- 特定薬物の薬理ゲノミクス情報を包括的に取得するとき
|
|
26
|
+
- 精密医療の薬物選択を支援するとき
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
## 1. 薬物・遺伝子検索
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
import requests
|
|
36
|
+
import pandas as pd
|
|
37
|
+
|
|
38
|
+
PGKB_BASE = "https://api.pharmgkb.org/v1/data"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def pharmgkb_search_drugs(query, limit=50):
|
|
42
|
+
"""
|
|
43
|
+
PharmGKB — 薬物検索。
|
|
44
|
+
|
|
45
|
+
Parameters:
|
|
46
|
+
query: str — 薬物名 (例: "warfarin", "clopidogrel")
|
|
47
|
+
limit: int — 最大結果数
|
|
48
|
+
"""
|
|
49
|
+
url = f"{PGKB_BASE}/chemical"
|
|
50
|
+
params = {"name": query, "view": "max"}
|
|
51
|
+
|
|
52
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
53
|
+
resp.raise_for_status()
|
|
54
|
+
data = resp.json()
|
|
55
|
+
|
|
56
|
+
results = []
|
|
57
|
+
for item in data.get("data", []):
|
|
58
|
+
results.append({
|
|
59
|
+
"pharmgkb_id": item.get("id", ""),
|
|
60
|
+
"name": item.get("name", ""),
|
|
61
|
+
"generic_names": "; ".join(
|
|
62
|
+
item.get("genericNames", [])),
|
|
63
|
+
"trade_names": "; ".join(
|
|
64
|
+
item.get("tradeNames", [])[:5]),
|
|
65
|
+
"type": item.get("type", ""),
|
|
66
|
+
"cross_references": len(
|
|
67
|
+
item.get("crossReferences", [])),
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
df = pd.DataFrame(results)
|
|
71
|
+
print(f"PharmGKB drugs: {len(df)} results "
|
|
72
|
+
f"(query='{query}')")
|
|
73
|
+
return df
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def pharmgkb_search_genes(query, limit=50):
|
|
77
|
+
"""
|
|
78
|
+
PharmGKB — 遺伝子検索。
|
|
79
|
+
|
|
80
|
+
Parameters:
|
|
81
|
+
query: str — 遺伝子シンボル (例: "CYP2D6")
|
|
82
|
+
limit: int — 最大結果数
|
|
83
|
+
"""
|
|
84
|
+
url = f"{PGKB_BASE}/gene"
|
|
85
|
+
params = {"symbol": query, "view": "max"}
|
|
86
|
+
|
|
87
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
88
|
+
resp.raise_for_status()
|
|
89
|
+
data = resp.json()
|
|
90
|
+
|
|
91
|
+
results = []
|
|
92
|
+
for item in data.get("data", []):
|
|
93
|
+
results.append({
|
|
94
|
+
"pharmgkb_id": item.get("id", ""),
|
|
95
|
+
"symbol": item.get("symbol", ""),
|
|
96
|
+
"name": item.get("name", ""),
|
|
97
|
+
"chromosome": item.get("chromosomeFormatted", ""),
|
|
98
|
+
"cpic_gene": item.get("cpicGene", False),
|
|
99
|
+
"has_prescribing_info": item.get(
|
|
100
|
+
"hasPrescribingInfo", False),
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
df = pd.DataFrame(results)
|
|
104
|
+
print(f"PharmGKB genes: {len(df)} results "
|
|
105
|
+
f"(query='{query}')")
|
|
106
|
+
return df
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## 2. 臨床アノテーション取得
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
def pharmgkb_clinical_annotations(gene_or_drug,
|
|
113
|
+
search_type="gene"):
|
|
114
|
+
"""
|
|
115
|
+
PharmGKB — 臨床アノテーション検索。
|
|
116
|
+
|
|
117
|
+
Parameters:
|
|
118
|
+
gene_or_drug: str — 遺伝子シンボル or 薬物名
|
|
119
|
+
search_type: str — "gene" or "drug"
|
|
120
|
+
"""
|
|
121
|
+
url = f"{PGKB_BASE}/clinicalAnnotation"
|
|
122
|
+
params = {"view": "max"}
|
|
123
|
+
|
|
124
|
+
if search_type == "gene":
|
|
125
|
+
# 遺伝子で検索
|
|
126
|
+
gene_url = f"{PGKB_BASE}/gene"
|
|
127
|
+
g_resp = requests.get(gene_url,
|
|
128
|
+
params={"symbol": gene_or_drug},
|
|
129
|
+
timeout=30)
|
|
130
|
+
g_resp.raise_for_status()
|
|
131
|
+
genes = g_resp.json().get("data", [])
|
|
132
|
+
if genes:
|
|
133
|
+
params["relatedGenes.id"] = genes[0].get("id", "")
|
|
134
|
+
else:
|
|
135
|
+
# 薬物で検索
|
|
136
|
+
drug_url = f"{PGKB_BASE}/chemical"
|
|
137
|
+
d_resp = requests.get(drug_url,
|
|
138
|
+
params={"name": gene_or_drug},
|
|
139
|
+
timeout=30)
|
|
140
|
+
d_resp.raise_for_status()
|
|
141
|
+
drugs = d_resp.json().get("data", [])
|
|
142
|
+
if drugs:
|
|
143
|
+
params["relatedChemicals.id"] = drugs[0].get("id", "")
|
|
144
|
+
|
|
145
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
146
|
+
resp.raise_for_status()
|
|
147
|
+
data = resp.json()
|
|
148
|
+
|
|
149
|
+
results = []
|
|
150
|
+
for item in data.get("data", []):
|
|
151
|
+
genes = [g.get("symbol", "")
|
|
152
|
+
for g in item.get("relatedGenes", [])]
|
|
153
|
+
drugs = [c.get("name", "")
|
|
154
|
+
for c in item.get("relatedChemicals", [])]
|
|
155
|
+
results.append({
|
|
156
|
+
"annotation_id": item.get("id", ""),
|
|
157
|
+
"level": item.get("level", ""),
|
|
158
|
+
"score": item.get("score", ""),
|
|
159
|
+
"genes": "; ".join(genes),
|
|
160
|
+
"drugs": "; ".join(drugs),
|
|
161
|
+
"phenotype_category": item.get(
|
|
162
|
+
"phenotypeCategory", ""),
|
|
163
|
+
"sentences": (item.get("textHtml") or "")[:300],
|
|
164
|
+
})
|
|
165
|
+
|
|
166
|
+
df = pd.DataFrame(results)
|
|
167
|
+
if not df.empty:
|
|
168
|
+
df = df.sort_values("level")
|
|
169
|
+
|
|
170
|
+
print(f"PharmGKB annotations: {len(df)} "
|
|
171
|
+
f"({search_type}={gene_or_drug})")
|
|
172
|
+
return df
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## 3. 投与量ガイドライン取得
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
def pharmgkb_dosing_guidelines(drug_name=None, gene=None):
|
|
179
|
+
"""
|
|
180
|
+
PharmGKB — 投与量ガイドライン (CPIC/DPWG) 検索。
|
|
181
|
+
|
|
182
|
+
Parameters:
|
|
183
|
+
drug_name: str — 薬物名
|
|
184
|
+
gene: str — 遺伝子シンボル
|
|
185
|
+
"""
|
|
186
|
+
url = f"{PGKB_BASE}/guideline"
|
|
187
|
+
params = {"view": "max"}
|
|
188
|
+
|
|
189
|
+
if drug_name:
|
|
190
|
+
params["relatedChemicals.name"] = drug_name
|
|
191
|
+
if gene:
|
|
192
|
+
params["relatedGenes.symbol"] = gene
|
|
193
|
+
|
|
194
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
195
|
+
resp.raise_for_status()
|
|
196
|
+
data = resp.json()
|
|
197
|
+
|
|
198
|
+
results = []
|
|
199
|
+
for item in data.get("data", []):
|
|
200
|
+
genes = [g.get("symbol", "")
|
|
201
|
+
for g in item.get("relatedGenes", [])]
|
|
202
|
+
drugs = [c.get("name", "")
|
|
203
|
+
for c in item.get("relatedChemicals", [])]
|
|
204
|
+
results.append({
|
|
205
|
+
"guideline_id": item.get("id", ""),
|
|
206
|
+
"name": item.get("name", ""),
|
|
207
|
+
"source": item.get("source", ""),
|
|
208
|
+
"genes": "; ".join(genes),
|
|
209
|
+
"drugs": "; ".join(drugs),
|
|
210
|
+
"recommendation": (item.get("textHtml") or "")[:500],
|
|
211
|
+
})
|
|
212
|
+
|
|
213
|
+
df = pd.DataFrame(results)
|
|
214
|
+
print(f"PharmGKB guidelines: {len(df)} "
|
|
215
|
+
f"(drug={drug_name}, gene={gene})")
|
|
216
|
+
return df
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## 4. PharmGKB 統合パイプライン
|
|
220
|
+
|
|
221
|
+
```python
|
|
222
|
+
def pharmgkb_pipeline(drug_name, genes=None,
|
|
223
|
+
output_dir="results"):
|
|
224
|
+
"""
|
|
225
|
+
PharmGKB 統合パイプライン。
|
|
226
|
+
|
|
227
|
+
Parameters:
|
|
228
|
+
drug_name: str — 薬物名
|
|
229
|
+
genes: list[str] — 関連遺伝子リスト
|
|
230
|
+
output_dir: str — 出力ディレクトリ
|
|
231
|
+
"""
|
|
232
|
+
from pathlib import Path
|
|
233
|
+
output_dir = Path(output_dir)
|
|
234
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
235
|
+
|
|
236
|
+
# 1) 薬物検索
|
|
237
|
+
drugs = pharmgkb_search_drugs(drug_name)
|
|
238
|
+
drugs.to_csv(output_dir / "drugs.csv", index=False)
|
|
239
|
+
|
|
240
|
+
# 2) 薬物の臨床アノテーション
|
|
241
|
+
annotations = pharmgkb_clinical_annotations(
|
|
242
|
+
drug_name, search_type="drug")
|
|
243
|
+
annotations.to_csv(output_dir / "annotations.csv",
|
|
244
|
+
index=False)
|
|
245
|
+
|
|
246
|
+
# 3) 投与量ガイドライン
|
|
247
|
+
guidelines = pharmgkb_dosing_guidelines(
|
|
248
|
+
drug_name=drug_name)
|
|
249
|
+
guidelines.to_csv(output_dir / "guidelines.csv",
|
|
250
|
+
index=False)
|
|
251
|
+
|
|
252
|
+
# 4) 関連遺伝子解析
|
|
253
|
+
if genes:
|
|
254
|
+
gene_results = []
|
|
255
|
+
for g in genes:
|
|
256
|
+
try:
|
|
257
|
+
g_ann = pharmgkb_clinical_annotations(
|
|
258
|
+
g, search_type="gene")
|
|
259
|
+
g_ann["query_gene"] = g
|
|
260
|
+
gene_results.append(g_ann)
|
|
261
|
+
except Exception:
|
|
262
|
+
continue
|
|
263
|
+
if gene_results:
|
|
264
|
+
gene_df = pd.concat(gene_results,
|
|
265
|
+
ignore_index=True)
|
|
266
|
+
gene_df.to_csv(
|
|
267
|
+
output_dir / "gene_annotations.csv",
|
|
268
|
+
index=False)
|
|
269
|
+
|
|
270
|
+
print(f"PharmGKB pipeline: {output_dir}")
|
|
271
|
+
return {
|
|
272
|
+
"drugs": drugs,
|
|
273
|
+
"annotations": annotations,
|
|
274
|
+
"guidelines": guidelines,
|
|
275
|
+
}
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
---
|
|
279
|
+
|
|
280
|
+
## ToolUniverse 連携
|
|
281
|
+
|
|
282
|
+
| TU Key | ツール名 | 連携内容 |
|
|
283
|
+
|--------|---------|---------|
|
|
284
|
+
| `pharmgkb` | PharmGKB | 臨床アノテーション・薬物遺伝子・PGx ガイドライン |
|
|
285
|
+
|
|
286
|
+
## パイプライン統合
|
|
287
|
+
|
|
288
|
+
```
|
|
289
|
+
pharmacogenomics → pharmgkb-pgx → clinical-decision-support
|
|
290
|
+
(PGx 解析全般) (PharmGKB API) (臨床意思決定)
|
|
291
|
+
│ │ ↓
|
|
292
|
+
drug-discovery ──────┘ precision-oncology
|
|
293
|
+
(薬物開発) │ (精密腫瘍学)
|
|
294
|
+
↓
|
|
295
|
+
variant-interpretation
|
|
296
|
+
(変異臨床解釈)
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
## パイプライン出力
|
|
300
|
+
|
|
301
|
+
| ファイル | 説明 | 次スキル |
|
|
302
|
+
|---------|------|---------|
|
|
303
|
+
| `results/drugs.csv` | 薬物情報 | → drug-discovery |
|
|
304
|
+
| `results/annotations.csv` | 臨床アノテーション | → variant-interpretation |
|
|
305
|
+
| `results/guidelines.csv` | 投与量ガイドライン | → clinical-decision-support |
|
|
306
|
+
| `results/gene_annotations.csv` | 遺伝子別アノテーション | → pharmacogenomics |
|