@nahisaho/satori 0.17.0 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -38
- package/package.json +1 -1
- package/src/.github/skills/scientific-alphafold-structures/SKILL.md +256 -0
- package/src/.github/skills/scientific-arrayexpress-expression/SKILL.md +264 -0
- package/src/.github/skills/scientific-civic-evidence/SKILL.md +292 -0
- package/src/.github/skills/scientific-compound-screening/SKILL.md +4 -0
- package/src/.github/skills/scientific-crossref-metadata/SKILL.md +313 -0
- package/src/.github/skills/scientific-depmap-dependencies/SKILL.md +239 -0
- package/src/.github/skills/scientific-disease-research/SKILL.md +4 -0
- package/src/.github/skills/scientific-drugbank-resources/SKILL.md +269 -0
- package/src/.github/skills/scientific-gnomad-variants/SKILL.md +356 -0
- package/src/.github/skills/scientific-gtex-tissue-expression/SKILL.md +271 -0
- package/src/.github/skills/scientific-gwas-catalog/SKILL.md +267 -0
- package/src/.github/skills/scientific-icgc-cancer-data/SKILL.md +351 -0
- package/src/.github/skills/scientific-metabolomics-databases/SKILL.md +4 -0
- package/src/.github/skills/scientific-opentargets-genetics/SKILL.md +299 -0
- package/src/.github/skills/scientific-pharmgkb-pgx/SKILL.md +306 -0
- package/src/.github/skills/scientific-protein-interaction-network/SKILL.md +4 -0
- package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +4 -0
- package/src/.github/skills/scientific-rcsb-pdb-search/SKILL.md +280 -0
- package/src/.github/skills/scientific-reactome-pathways/SKILL.md +242 -0
- package/src/.github/skills/scientific-semantic-scholar/SKILL.md +298 -0
- package/src/.github/skills/scientific-uniprot-proteome/SKILL.md +273 -0
- package/src/.github/skills/scientific-variant-interpretation/SKILL.md +4 -0
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-icgc-cancer-data
|
|
3
|
+
description: |
|
|
4
|
+
ICGC がんゲノムデータスキル。ICGC ARGO DCC API および
|
|
5
|
+
レガシー API による国際がんゲノムデータ検索・ドナー/
|
|
6
|
+
検体/変異解析。直接 API (ToolUniverse 非連携)。
|
|
7
|
+
tu_tools: []
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Scientific ICGC Cancer Data
|
|
11
|
+
|
|
12
|
+
ICGC (International Cancer Genome Consortium) ARGO DCC API を
|
|
13
|
+
活用した国際がんゲノムデータ検索・変異統計・がん種横断解析
|
|
14
|
+
パイプラインを提供する。
|
|
15
|
+
|
|
16
|
+
## When to Use
|
|
17
|
+
|
|
18
|
+
- 国際がんゲノムプロジェクトのデータを検索するとき
|
|
19
|
+
- がん種ごとの体細胞変異プロファイルを調べるとき
|
|
20
|
+
- ドナー・検体・変異の統計情報を取得するとき
|
|
21
|
+
- がんゲノムの変異シグネチャを分析するとき
|
|
22
|
+
- PCAWG (Pan-Cancer Analysis of Whole Genomes) データを活用するとき
|
|
23
|
+
- がん遺伝子変異の国際比較データが必要なとき
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
## 1. ICGC プロジェクト・ドナー検索
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import requests
|
|
33
|
+
import pandas as pd
|
|
34
|
+
|
|
35
|
+
ICGC_BASE = "https://dcc.icgc.org/api/v1"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def icgc_search_projects(query=None, limit=50):
|
|
39
|
+
"""
|
|
40
|
+
ICGC — がんゲノムプロジェクト検索。
|
|
41
|
+
|
|
42
|
+
Parameters:
|
|
43
|
+
query: str — 検索キーワード (例: "lung", "BRCA")
|
|
44
|
+
limit: int — 最大結果数
|
|
45
|
+
"""
|
|
46
|
+
url = f"{ICGC_BASE}/projects"
|
|
47
|
+
params = {"size": limit, "from": 1}
|
|
48
|
+
if query:
|
|
49
|
+
params["filters"] = (
|
|
50
|
+
f'{{"project":{{"primarySite":'
|
|
51
|
+
f'{{"is":["{query}"]}}}}}}'
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
55
|
+
resp.raise_for_status()
|
|
56
|
+
data = resp.json()
|
|
57
|
+
|
|
58
|
+
results = []
|
|
59
|
+
for hit in data.get("hits", []):
|
|
60
|
+
results.append({
|
|
61
|
+
"project_id": hit.get("id", ""),
|
|
62
|
+
"project_name": hit.get("name", ""),
|
|
63
|
+
"primary_site": hit.get("primarySite", ""),
|
|
64
|
+
"tumour_type": hit.get("tumourType", ""),
|
|
65
|
+
"tumour_subtype": hit.get("tumourSubtype", ""),
|
|
66
|
+
"primary_country": "; ".join(
|
|
67
|
+
hit.get("primaryCountries", [])),
|
|
68
|
+
"total_donors": hit.get("totalDonorCount", 0),
|
|
69
|
+
"ssm_count": hit.get("ssmCount", 0),
|
|
70
|
+
})
|
|
71
|
+
|
|
72
|
+
df = pd.DataFrame(results)
|
|
73
|
+
if not df.empty:
|
|
74
|
+
df = df.sort_values("total_donors", ascending=False)
|
|
75
|
+
|
|
76
|
+
total = data.get("pagination", {}).get("total", 0)
|
|
77
|
+
print(f"ICGC projects: {len(df)}/{total} "
|
|
78
|
+
f"(query='{query}')")
|
|
79
|
+
return df
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def icgc_search_donors(project_id, limit=100):
|
|
83
|
+
"""
|
|
84
|
+
ICGC — プロジェクト内ドナー検索。
|
|
85
|
+
|
|
86
|
+
Parameters:
|
|
87
|
+
project_id: str — プロジェクト ID (例: "BRCA-US")
|
|
88
|
+
limit: int — 最大結果数
|
|
89
|
+
"""
|
|
90
|
+
url = f"{ICGC_BASE}/donors"
|
|
91
|
+
params = {
|
|
92
|
+
"size": limit,
|
|
93
|
+
"filters": (
|
|
94
|
+
f'{{"donor":{{"projectId":'
|
|
95
|
+
f'{{"is":["{project_id}"]}}}}}}'
|
|
96
|
+
),
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
100
|
+
resp.raise_for_status()
|
|
101
|
+
data = resp.json()
|
|
102
|
+
|
|
103
|
+
results = []
|
|
104
|
+
for hit in data.get("hits", []):
|
|
105
|
+
results.append({
|
|
106
|
+
"donor_id": hit.get("id", ""),
|
|
107
|
+
"project_id": project_id,
|
|
108
|
+
"primary_site": hit.get("primarySite", ""),
|
|
109
|
+
"gender": hit.get("gender", ""),
|
|
110
|
+
"vital_status": hit.get("vitalStatus", ""),
|
|
111
|
+
"age_at_diagnosis": hit.get("ageAtDiagnosis"),
|
|
112
|
+
"disease_status": hit.get(
|
|
113
|
+
"diseaseStatusLastFollowup", ""),
|
|
114
|
+
"ssm_count": hit.get("ssmCount", 0),
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
df = pd.DataFrame(results)
|
|
118
|
+
total = data.get("pagination", {}).get("total", 0)
|
|
119
|
+
print(f"ICGC donors: {len(df)}/{total} "
|
|
120
|
+
f"(project={project_id})")
|
|
121
|
+
return df
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## 2. 体細胞変異 (SSM) 検索
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
def icgc_search_mutations(gene_symbol=None,
|
|
128
|
+
project_id=None,
|
|
129
|
+
consequence_type=None,
|
|
130
|
+
limit=100):
|
|
131
|
+
"""
|
|
132
|
+
ICGC — 体細胞変異 (Simple Somatic Mutation) 検索。
|
|
133
|
+
|
|
134
|
+
Parameters:
|
|
135
|
+
gene_symbol: str — 遺伝子シンボル (例: "TP53")
|
|
136
|
+
project_id: str — プロジェクト ID
|
|
137
|
+
consequence_type: str — 変異タイプ
|
|
138
|
+
(例: "missense_variant")
|
|
139
|
+
limit: int — 最大結果数
|
|
140
|
+
"""
|
|
141
|
+
url = f"{ICGC_BASE}/mutations"
|
|
142
|
+
filters = {}
|
|
143
|
+
|
|
144
|
+
if gene_symbol:
|
|
145
|
+
filters["gene"] = {"symbol": {"is": [gene_symbol]}}
|
|
146
|
+
if project_id:
|
|
147
|
+
filters["donor"] = {"projectId": {"is": [project_id]}}
|
|
148
|
+
if consequence_type:
|
|
149
|
+
filters["mutation"] = {
|
|
150
|
+
"consequenceType": {"is": [consequence_type]}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
import json
|
|
154
|
+
params = {
|
|
155
|
+
"size": limit,
|
|
156
|
+
"filters": json.dumps(filters) if filters else "{}",
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
160
|
+
resp.raise_for_status()
|
|
161
|
+
data = resp.json()
|
|
162
|
+
|
|
163
|
+
results = []
|
|
164
|
+
for hit in data.get("hits", []):
|
|
165
|
+
# 主要な consequence 取得
|
|
166
|
+
consequences = hit.get("consequences", [])
|
|
167
|
+
top_cons = consequences[0] if consequences else {}
|
|
168
|
+
|
|
169
|
+
results.append({
|
|
170
|
+
"mutation_id": hit.get("id", ""),
|
|
171
|
+
"chromosome": hit.get("chromosome", ""),
|
|
172
|
+
"start": hit.get("start"),
|
|
173
|
+
"end": hit.get("end"),
|
|
174
|
+
"mutation": hit.get("mutation", ""),
|
|
175
|
+
"type": hit.get("type", ""),
|
|
176
|
+
"gene_symbol": top_cons.get("geneSymbol", ""),
|
|
177
|
+
"consequence_type": top_cons.get("type", ""),
|
|
178
|
+
"aa_mutation": top_cons.get("aaMutation", ""),
|
|
179
|
+
"affected_donors": hit.get(
|
|
180
|
+
"affectedDonorCountTotal", 0),
|
|
181
|
+
"affected_projects": hit.get(
|
|
182
|
+
"affectedProjectCount", 0),
|
|
183
|
+
"functional_impact": hit.get(
|
|
184
|
+
"functionalImpact", ""),
|
|
185
|
+
})
|
|
186
|
+
|
|
187
|
+
df = pd.DataFrame(results)
|
|
188
|
+
if not df.empty:
|
|
189
|
+
df = df.sort_values("affected_donors",
|
|
190
|
+
ascending=False)
|
|
191
|
+
|
|
192
|
+
total = data.get("pagination", {}).get("total", 0)
|
|
193
|
+
print(f"ICGC mutations: {len(df)}/{total} "
|
|
194
|
+
f"(gene={gene_symbol}, project={project_id})")
|
|
195
|
+
return df
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## 3. がん種統計・変異サマリー
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
def icgc_cancer_stats(project_id=None):
|
|
202
|
+
"""
|
|
203
|
+
ICGC — がん種統計サマリー。
|
|
204
|
+
|
|
205
|
+
Parameters:
|
|
206
|
+
project_id: str — プロジェクト ID (None で全体統計)
|
|
207
|
+
"""
|
|
208
|
+
if project_id:
|
|
209
|
+
url = f"{ICGC_BASE}/projects/{project_id}"
|
|
210
|
+
resp = requests.get(url, timeout=30)
|
|
211
|
+
resp.raise_for_status()
|
|
212
|
+
data = resp.json()
|
|
213
|
+
|
|
214
|
+
stats = {
|
|
215
|
+
"project_id": project_id,
|
|
216
|
+
"project_name": data.get("name", ""),
|
|
217
|
+
"primary_site": data.get("primarySite", ""),
|
|
218
|
+
"total_donors": data.get("totalDonorCount", 0),
|
|
219
|
+
"total_specimens": data.get(
|
|
220
|
+
"totalSpecimenCount", 0),
|
|
221
|
+
"ssm_count": data.get("ssmCount", 0),
|
|
222
|
+
"repository": "; ".join(
|
|
223
|
+
data.get("repository", [])),
|
|
224
|
+
}
|
|
225
|
+
print(f"ICGC stats: {project_id} — "
|
|
226
|
+
f"{stats['total_donors']} donors, "
|
|
227
|
+
f"{stats['ssm_count']} mutations")
|
|
228
|
+
return stats
|
|
229
|
+
else:
|
|
230
|
+
# 全プロジェクト概要
|
|
231
|
+
projects = icgc_search_projects(limit=200)
|
|
232
|
+
summary = {
|
|
233
|
+
"total_projects": len(projects),
|
|
234
|
+
"total_donors": projects[
|
|
235
|
+
"total_donors"].sum(),
|
|
236
|
+
"total_ssm": projects["ssm_count"].sum(),
|
|
237
|
+
"top_sites": projects.groupby(
|
|
238
|
+
"primary_site")["total_donors"].sum(
|
|
239
|
+
).sort_values(ascending=False).head(10
|
|
240
|
+
).to_dict(),
|
|
241
|
+
}
|
|
242
|
+
print(f"ICGC summary: {summary['total_projects']} "
|
|
243
|
+
f"projects, {summary['total_donors']} donors")
|
|
244
|
+
return summary
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def icgc_gene_mutation_frequency(gene_symbol, top_n=20):
|
|
248
|
+
"""
|
|
249
|
+
ICGC — 遺伝子別がん種変異頻度。
|
|
250
|
+
|
|
251
|
+
Parameters:
|
|
252
|
+
gene_symbol: str — 遺伝子シンボル
|
|
253
|
+
top_n: int — 上位がん種数
|
|
254
|
+
"""
|
|
255
|
+
mutations = icgc_search_mutations(
|
|
256
|
+
gene_symbol=gene_symbol, limit=500)
|
|
257
|
+
|
|
258
|
+
if mutations.empty:
|
|
259
|
+
return pd.DataFrame()
|
|
260
|
+
|
|
261
|
+
# プロジェクト別集計
|
|
262
|
+
freq = mutations.groupby("gene_symbol").agg(
|
|
263
|
+
total_mutations=("mutation_id", "count"),
|
|
264
|
+
total_affected_donors=("affected_donors", "sum"),
|
|
265
|
+
mutation_types=("consequence_type",
|
|
266
|
+
lambda x: "; ".join(x.unique()[:5])),
|
|
267
|
+
).reset_index()
|
|
268
|
+
|
|
269
|
+
print(f"ICGC gene frequency: {gene_symbol} — "
|
|
270
|
+
f"{len(freq)} entries")
|
|
271
|
+
return freq
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
## 4. ICGC 統合パイプライン
|
|
275
|
+
|
|
276
|
+
```python
|
|
277
|
+
def icgc_pipeline(gene_symbols, cancer_site=None,
|
|
278
|
+
output_dir="results"):
|
|
279
|
+
"""
|
|
280
|
+
ICGC 統合パイプライン。
|
|
281
|
+
|
|
282
|
+
Parameters:
|
|
283
|
+
gene_symbols: list[str] — 遺伝子リスト
|
|
284
|
+
cancer_site: str — がん部位フィルタ
|
|
285
|
+
output_dir: str — 出力ディレクトリ
|
|
286
|
+
"""
|
|
287
|
+
from pathlib import Path
|
|
288
|
+
output_dir = Path(output_dir)
|
|
289
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
290
|
+
|
|
291
|
+
# 1) プロジェクト検索
|
|
292
|
+
projects = icgc_search_projects(query=cancer_site)
|
|
293
|
+
projects.to_csv(output_dir / "projects.csv", index=False)
|
|
294
|
+
|
|
295
|
+
# 2) 遺伝子別変異検索
|
|
296
|
+
all_mutations = []
|
|
297
|
+
for gene in gene_symbols:
|
|
298
|
+
try:
|
|
299
|
+
muts = icgc_search_mutations(
|
|
300
|
+
gene_symbol=gene, limit=200)
|
|
301
|
+
muts["query_gene"] = gene
|
|
302
|
+
all_mutations.append(muts)
|
|
303
|
+
except Exception as e:
|
|
304
|
+
print(f" Warning: {gene} — {e}")
|
|
305
|
+
continue
|
|
306
|
+
|
|
307
|
+
if all_mutations:
|
|
308
|
+
combined = pd.concat(all_mutations,
|
|
309
|
+
ignore_index=True)
|
|
310
|
+
combined.to_csv(output_dir / "mutations.csv",
|
|
311
|
+
index=False)
|
|
312
|
+
|
|
313
|
+
# 3) がん種統計
|
|
314
|
+
if not projects.empty:
|
|
315
|
+
top_project = projects.iloc[0]["project_id"]
|
|
316
|
+
stats = icgc_cancer_stats(project_id=top_project)
|
|
317
|
+
pd.DataFrame([stats]).to_csv(
|
|
318
|
+
output_dir / "cancer_stats.csv", index=False)
|
|
319
|
+
|
|
320
|
+
print(f"ICGC pipeline: {output_dir}")
|
|
321
|
+
return {"projects": projects}
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
---
|
|
325
|
+
|
|
326
|
+
## ToolUniverse 連携
|
|
327
|
+
|
|
328
|
+
| TU Key | ツール名 | 連携内容 |
|
|
329
|
+
|--------|---------|---------|
|
|
330
|
+
| (direct) | ICGC DCC API | 直接 REST API — TU 非連携 |
|
|
331
|
+
|
|
332
|
+
## パイプライン統合
|
|
333
|
+
|
|
334
|
+
```
|
|
335
|
+
cancer-genomics → icgc-cancer-data → precision-oncology
|
|
336
|
+
(がんゲノム全般) (ICGC DCC API) (精密腫瘍学)
|
|
337
|
+
│ │ ↓
|
|
338
|
+
tcga-data ────────────┘ clinical-decision-support
|
|
339
|
+
(TCGA データ) │ (臨床意思決定)
|
|
340
|
+
↓
|
|
341
|
+
variant-interpretation
|
|
342
|
+
(変異臨床解釈)
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
## パイプライン出力
|
|
346
|
+
|
|
347
|
+
| ファイル | 説明 | 次スキル |
|
|
348
|
+
|---------|------|---------|
|
|
349
|
+
| `results/projects.csv` | プロジェクト一覧 | → cancer-genomics |
|
|
350
|
+
| `results/mutations.csv` | 体細胞変異 | → variant-interpretation |
|
|
351
|
+
| `results/cancer_stats.csv` | がん種統計 | → precision-oncology |
|
|
@@ -6,6 +6,10 @@ description: |
|
|
|
6
6
|
(NIH メタボロミクスリポジトリ) の 3 大メタボロミクス DB を統合した
|
|
7
7
|
代謝物同定、パスウェイマッピング、バイオマーカー発見、
|
|
8
8
|
RefMet 標準化命名パイプライン。13 の ToolUniverse SMCP ツールと連携。
|
|
9
|
+
tu_tools:
|
|
10
|
+
- key: metacyc
|
|
11
|
+
name: MetaCyc
|
|
12
|
+
description: 代謝パスウェイ・反応・化合物データベース
|
|
9
13
|
---
|
|
10
14
|
|
|
11
15
|
# Scientific Metabolomics Databases
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-opentargets-genetics
|
|
3
|
+
description: |
|
|
4
|
+
Open Targets Platform 遺伝学スキル。Open Targets Platform
|
|
5
|
+
GraphQL API を用いた標的-疾患アソシエーション・薬剤
|
|
6
|
+
エビデンス・L2G 遺伝的関連・ファーマコゲノミクス検索。
|
|
7
|
+
ToolUniverse 連携: opentarget。
|
|
8
|
+
tu_tools:
|
|
9
|
+
- key: opentarget
|
|
10
|
+
name: Open Targets
|
|
11
|
+
description: 標的-疾患アソシエーション GraphQL API
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
# Scientific Open Targets Genetics
|
|
15
|
+
|
|
16
|
+
Open Targets Platform GraphQL API を活用した標的-疾患
|
|
17
|
+
アソシエーションスコア取得・薬剤エビデンス検索・L2G
|
|
18
|
+
遺伝的関連パイプラインを提供する。
|
|
19
|
+
|
|
20
|
+
## When to Use
|
|
21
|
+
|
|
22
|
+
- 遺伝子 (標的) と疾患のアソシエーションスコアを検索するとき
|
|
23
|
+
- 薬剤エビデンスデータを取得するとき
|
|
24
|
+
- GWAS バリアントから遺伝子を L2G スコアでマッピングするとき
|
|
25
|
+
- 標的の安全性プロファイルを確認するとき
|
|
26
|
+
- ファーマコゲノミクスデータを検索するとき
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
## 1. 標的-疾患アソシエーション
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
import requests
|
|
36
|
+
import pandas as pd
|
|
37
|
+
|
|
38
|
+
OT_API = ("https://api.platform.opentargets.org"
|
|
39
|
+
"/api/v4/graphql")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def ot_target_disease_assoc(target_id, limit=25):
|
|
43
|
+
"""
|
|
44
|
+
Open Targets — 標的-疾患アソシエーション。
|
|
45
|
+
|
|
46
|
+
Parameters:
|
|
47
|
+
target_id: str — Ensembl Gene ID
|
|
48
|
+
(例: "ENSG00000012048" = BRCA1)
|
|
49
|
+
limit: int — 最大結果数
|
|
50
|
+
"""
|
|
51
|
+
query = """
|
|
52
|
+
query targetDisease($id: String!, $size: Int!) {
|
|
53
|
+
target(ensemblId: $id) {
|
|
54
|
+
id
|
|
55
|
+
approvedSymbol
|
|
56
|
+
associatedDiseases(page: {size: $size, index: 0}) {
|
|
57
|
+
count
|
|
58
|
+
rows {
|
|
59
|
+
disease { id name }
|
|
60
|
+
score
|
|
61
|
+
datatypeScores {
|
|
62
|
+
componentId: id
|
|
63
|
+
score
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
"""
|
|
70
|
+
variables = {"id": target_id, "size": limit}
|
|
71
|
+
resp = requests.post(OT_API,
|
|
72
|
+
json={"query": query,
|
|
73
|
+
"variables": variables},
|
|
74
|
+
timeout=30)
|
|
75
|
+
resp.raise_for_status()
|
|
76
|
+
data = resp.json()["data"]["target"]
|
|
77
|
+
|
|
78
|
+
rows = []
|
|
79
|
+
for r in data["associatedDiseases"]["rows"]:
|
|
80
|
+
row = {
|
|
81
|
+
"target_id": target_id,
|
|
82
|
+
"target_symbol": data["approvedSymbol"],
|
|
83
|
+
"disease_id": r["disease"]["id"],
|
|
84
|
+
"disease_name": r["disease"]["name"],
|
|
85
|
+
"overall_score": r["score"],
|
|
86
|
+
}
|
|
87
|
+
for dt in r["datatypeScores"]:
|
|
88
|
+
row[dt["componentId"]] = dt["score"]
|
|
89
|
+
rows.append(row)
|
|
90
|
+
|
|
91
|
+
df = pd.DataFrame(rows)
|
|
92
|
+
total = data["associatedDiseases"]["count"]
|
|
93
|
+
print(f"OT associations: {data['approvedSymbol']} "
|
|
94
|
+
f"→ {len(df)}/{total} diseases")
|
|
95
|
+
return df
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## 2. 薬剤エビデンス
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
def ot_drug_evidence(target_id, disease_id, limit=50):
|
|
102
|
+
"""
|
|
103
|
+
Open Targets — 薬剤エビデンス。
|
|
104
|
+
|
|
105
|
+
Parameters:
|
|
106
|
+
target_id: str — Ensembl Gene ID
|
|
107
|
+
disease_id: str — EFO Disease ID
|
|
108
|
+
(例: "EFO_0000305" = breast carcinoma)
|
|
109
|
+
limit: int — 最大結果数
|
|
110
|
+
"""
|
|
111
|
+
query = """
|
|
112
|
+
query drugEvidence($ensemblId: String!,
|
|
113
|
+
$efoId: String!,
|
|
114
|
+
$size: Int!) {
|
|
115
|
+
disease(efoId: $efoId) {
|
|
116
|
+
id
|
|
117
|
+
name
|
|
118
|
+
evidences(
|
|
119
|
+
ensemblIds: [$ensemblId]
|
|
120
|
+
datasourceIds: ["chembl"]
|
|
121
|
+
size: $size
|
|
122
|
+
) {
|
|
123
|
+
count
|
|
124
|
+
rows {
|
|
125
|
+
id
|
|
126
|
+
score
|
|
127
|
+
drug {
|
|
128
|
+
id name drugType
|
|
129
|
+
maximumClinicalTrialPhase
|
|
130
|
+
mechanismsOfAction {
|
|
131
|
+
rows { actionType }
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
clinicalPhase
|
|
135
|
+
clinicalStatus
|
|
136
|
+
urls { niceName url }
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
"""
|
|
142
|
+
variables = {"ensemblId": target_id,
|
|
143
|
+
"efoId": disease_id,
|
|
144
|
+
"size": limit}
|
|
145
|
+
resp = requests.post(OT_API,
|
|
146
|
+
json={"query": query,
|
|
147
|
+
"variables": variables},
|
|
148
|
+
timeout=30)
|
|
149
|
+
resp.raise_for_status()
|
|
150
|
+
data = resp.json()["data"]["disease"]
|
|
151
|
+
|
|
152
|
+
results = []
|
|
153
|
+
for ev in data["evidences"]["rows"]:
|
|
154
|
+
drug = ev.get("drug", {})
|
|
155
|
+
moas = drug.get("mechanismsOfAction", {})
|
|
156
|
+
moa_list = [m["actionType"]
|
|
157
|
+
for m in moas.get("rows", [])]
|
|
158
|
+
results.append({
|
|
159
|
+
"disease": data["name"],
|
|
160
|
+
"drug_id": drug.get("id", ""),
|
|
161
|
+
"drug_name": drug.get("name", ""),
|
|
162
|
+
"drug_type": drug.get("drugType", ""),
|
|
163
|
+
"max_phase": drug.get(
|
|
164
|
+
"maximumClinicalTrialPhase", 0),
|
|
165
|
+
"clinical_phase": ev.get("clinicalPhase", ""),
|
|
166
|
+
"clinical_status": ev.get(
|
|
167
|
+
"clinicalStatus", ""),
|
|
168
|
+
"moa": "; ".join(moa_list),
|
|
169
|
+
"score": ev.get("score", 0),
|
|
170
|
+
})
|
|
171
|
+
|
|
172
|
+
df = pd.DataFrame(results)
|
|
173
|
+
print(f"OT drug evidence: {len(df)} entries")
|
|
174
|
+
return df
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## 3. L2G 遺伝的関連 (Locus-to-Gene)
|
|
178
|
+
|
|
179
|
+
```python
|
|
180
|
+
def ot_l2g_variants(study_id, limit=50):
|
|
181
|
+
"""
|
|
182
|
+
Open Targets Genetics — L2G バリアント-遺伝子マッピング。
|
|
183
|
+
|
|
184
|
+
Parameters:
|
|
185
|
+
study_id: str — GWAS Study ID
|
|
186
|
+
(例: "GCST004988")
|
|
187
|
+
limit: int — 最大結果数
|
|
188
|
+
"""
|
|
189
|
+
# OT Genetics API
|
|
190
|
+
OT_GENETICS = ("https://api.genetics.opentargets.org"
|
|
191
|
+
"/graphql")
|
|
192
|
+
query = """
|
|
193
|
+
query l2g($studyId: String!, $size: Int!) {
|
|
194
|
+
studyLocus2GeneTable(studyId: $studyId,
|
|
195
|
+
pageSize: $size) {
|
|
196
|
+
rows {
|
|
197
|
+
gene { id symbol }
|
|
198
|
+
variant { id rsId }
|
|
199
|
+
yProbaModel
|
|
200
|
+
yProbaDistance
|
|
201
|
+
yProbaInteraction
|
|
202
|
+
yProbaMolecularQTL
|
|
203
|
+
yProbaPathogenicity
|
|
204
|
+
hasColoc
|
|
205
|
+
distanceToLocus
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
"""
|
|
210
|
+
variables = {"studyId": study_id, "size": limit}
|
|
211
|
+
resp = requests.post(OT_GENETICS,
|
|
212
|
+
json={"query": query,
|
|
213
|
+
"variables": variables},
|
|
214
|
+
timeout=30)
|
|
215
|
+
resp.raise_for_status()
|
|
216
|
+
data = resp.json()["data"]["studyLocus2GeneTable"]
|
|
217
|
+
|
|
218
|
+
rows = []
|
|
219
|
+
for r in data["rows"]:
|
|
220
|
+
rows.append({
|
|
221
|
+
"gene_id": r["gene"]["id"],
|
|
222
|
+
"gene_symbol": r["gene"]["symbol"],
|
|
223
|
+
"variant_id": r["variant"]["id"],
|
|
224
|
+
"rsid": r["variant"]["rsId"],
|
|
225
|
+
"l2g_score": r["yProbaModel"],
|
|
226
|
+
"distance_score": r["yProbaDistance"],
|
|
227
|
+
"interaction_score": r["yProbaInteraction"],
|
|
228
|
+
"qtl_score": r["yProbaMolecularQTL"],
|
|
229
|
+
"pathogenicity": r["yProbaPathogenicity"],
|
|
230
|
+
"has_coloc": r["hasColoc"],
|
|
231
|
+
})
|
|
232
|
+
|
|
233
|
+
df = pd.DataFrame(rows)
|
|
234
|
+
if not df.empty:
|
|
235
|
+
df = df.sort_values("l2g_score", ascending=False)
|
|
236
|
+
print(f"OT L2G: {study_id} → {len(df)} gene mappings")
|
|
237
|
+
return df
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
## 4. Open Targets 統合パイプライン
|
|
241
|
+
|
|
242
|
+
```python
|
|
243
|
+
def ot_pipeline(gene_symbol, ensembl_id,
|
|
244
|
+
output_dir="results"):
|
|
245
|
+
"""
|
|
246
|
+
Open Targets 統合パイプライン。
|
|
247
|
+
|
|
248
|
+
Parameters:
|
|
249
|
+
gene_symbol: str — 遺伝子シンボル (例: "BRCA1")
|
|
250
|
+
ensembl_id: str — Ensembl Gene ID
|
|
251
|
+
output_dir: str — 出力ディレクトリ
|
|
252
|
+
"""
|
|
253
|
+
from pathlib import Path
|
|
254
|
+
output_dir = Path(output_dir)
|
|
255
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
256
|
+
|
|
257
|
+
# 1) 標的-疾患アソシエーション
|
|
258
|
+
assoc = ot_target_disease_assoc(ensembl_id)
|
|
259
|
+
assoc.to_csv(output_dir / "ot_associations.csv",
|
|
260
|
+
index=False)
|
|
261
|
+
|
|
262
|
+
# 2) トップ疾患の薬剤エビデンス
|
|
263
|
+
if not assoc.empty:
|
|
264
|
+
top_disease = assoc.iloc[0]["disease_id"]
|
|
265
|
+
drugs = ot_drug_evidence(ensembl_id, top_disease)
|
|
266
|
+
drugs.to_csv(output_dir / "ot_drugs.csv",
|
|
267
|
+
index=False)
|
|
268
|
+
|
|
269
|
+
print(f"OT pipeline: {gene_symbol} → {output_dir}")
|
|
270
|
+
return {"associations": assoc}
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
---
|
|
274
|
+
|
|
275
|
+
## ToolUniverse 連携
|
|
276
|
+
|
|
277
|
+
| TU Key | ツール名 | 連携内容 |
|
|
278
|
+
|--------|---------|---------|
|
|
279
|
+
| `opentarget` | Open Targets | 標的-疾患アソシエーション GraphQL (~55 tools) |
|
|
280
|
+
|
|
281
|
+
## パイプライン統合
|
|
282
|
+
|
|
283
|
+
```
|
|
284
|
+
disease-research → opentargets-genetics → drug-target-profiling
|
|
285
|
+
(疾患遺伝子) (OT Platform API) (標的プロファイリング)
|
|
286
|
+
│ │ ↓
|
|
287
|
+
variant-interpretation ────┘ pharmacogenomics
|
|
288
|
+
(ClinVar/VEP) │ (薬理ゲノミクス)
|
|
289
|
+
↓
|
|
290
|
+
gnomad-variants
|
|
291
|
+
(集団頻度)
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
## パイプライン出力
|
|
295
|
+
|
|
296
|
+
| ファイル | 説明 | 次スキル |
|
|
297
|
+
|---------|------|---------|
|
|
298
|
+
| `results/ot_associations.csv` | 標的-疾患スコア | → disease-research |
|
|
299
|
+
| `results/ot_drugs.csv` | 薬剤エビデンス | → drug-target-profiling |
|