@nahisaho/satori 0.18.0 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -23
- package/package.json +1 -1
- package/src/.github/skills/scientific-civic-evidence/SKILL.md +292 -0
- package/src/.github/skills/scientific-compound-screening/SKILL.md +4 -0
- package/src/.github/skills/scientific-depmap-dependencies/SKILL.md +239 -0
- package/src/.github/skills/scientific-disease-research/SKILL.md +4 -0
- package/src/.github/skills/scientific-drugbank-resources/SKILL.md +269 -0
- package/src/.github/skills/scientific-gnomad-variants/SKILL.md +356 -0
- package/src/.github/skills/scientific-metabolomics-databases/SKILL.md +4 -0
- package/src/.github/skills/scientific-opentargets-genetics/SKILL.md +299 -0
- package/src/.github/skills/scientific-protein-interaction-network/SKILL.md +4 -0
- package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +4 -0
- package/src/.github/skills/scientific-rcsb-pdb-search/SKILL.md +280 -0
- package/src/.github/skills/scientific-reactome-pathways/SKILL.md +242 -0
- package/src/.github/skills/scientific-uniprot-proteome/SKILL.md +273 -0
- package/src/.github/skills/scientific-variant-interpretation/SKILL.md +4 -0
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-gnomad-variants
|
|
3
|
+
description: |
|
|
4
|
+
gnomAD バリアントスキル。gnomAD (Genome Aggregation Database)
|
|
5
|
+
GraphQL API を用いた集団アレル頻度・遺伝子制約スコア
|
|
6
|
+
(pLI/LOEUF)・リージョンクエリ・トランスクリプトレベル
|
|
7
|
+
データ取得。ToolUniverse 連携: gnomad。
|
|
8
|
+
tu_tools:
|
|
9
|
+
- key: gnomad
|
|
10
|
+
name: gnomAD
|
|
11
|
+
description: ゲノム集約データベース GraphQL API
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
# Scientific gnomAD Variants
|
|
15
|
+
|
|
16
|
+
gnomAD (Genome Aggregation Database) GraphQL API を活用した
|
|
17
|
+
集団アレル頻度取得・遺伝子制約スコア (pLI/LOEUF/Z-scores)・
|
|
18
|
+
リージョンクエリ・トランスクリプトレベルデータパイプラインを
|
|
19
|
+
提供する。
|
|
20
|
+
|
|
21
|
+
## When to Use
|
|
22
|
+
|
|
23
|
+
- バリアントの集団アレル頻度 (AF) を確認するとき
|
|
24
|
+
- 遺伝子の LoF 不耐性 (pLI/LOEUF) を評価するとき
|
|
25
|
+
- ゲノムリージョン内のバリアントを列挙するとき
|
|
26
|
+
- 集団別 (gnomAD v4 exome/genome) 頻度を比較するとき
|
|
27
|
+
- ClinVar/VEP アノテーションと頻度を統合するとき
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Quick Start
|
|
32
|
+
|
|
33
|
+
## 1. バリアント集団頻度
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import requests
|
|
37
|
+
import pandas as pd
|
|
38
|
+
|
|
39
|
+
GNOMAD_API = "https://gnomad.broadinstitute.org/api"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def gnomad_variant(variant_id, dataset="gnomad_r4"):
|
|
43
|
+
"""
|
|
44
|
+
gnomAD — バリアント集団頻度取得。
|
|
45
|
+
|
|
46
|
+
Parameters:
|
|
47
|
+
variant_id: str — バリアント ID
|
|
48
|
+
(例: "1-55516888-G-A", chr-pos-ref-alt)
|
|
49
|
+
dataset: str — データセット
|
|
50
|
+
(例: "gnomad_r4", "gnomad_r3")
|
|
51
|
+
"""
|
|
52
|
+
query = """
|
|
53
|
+
query gnomadVariant($variantId: String!,
|
|
54
|
+
$dataset: DatasetId!) {
|
|
55
|
+
variant(variantId: $variantId,
|
|
56
|
+
dataset: $dataset) {
|
|
57
|
+
variant_id
|
|
58
|
+
chrom
|
|
59
|
+
pos
|
|
60
|
+
ref
|
|
61
|
+
alt
|
|
62
|
+
exome {
|
|
63
|
+
ac
|
|
64
|
+
an
|
|
65
|
+
af
|
|
66
|
+
ac_hom
|
|
67
|
+
populations {
|
|
68
|
+
id
|
|
69
|
+
ac
|
|
70
|
+
an
|
|
71
|
+
af
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
genome {
|
|
75
|
+
ac
|
|
76
|
+
an
|
|
77
|
+
af
|
|
78
|
+
ac_hom
|
|
79
|
+
populations {
|
|
80
|
+
id
|
|
81
|
+
ac
|
|
82
|
+
an
|
|
83
|
+
af
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
rsids
|
|
87
|
+
transcript_consequences {
|
|
88
|
+
gene_symbol
|
|
89
|
+
transcript_id
|
|
90
|
+
consequence
|
|
91
|
+
hgvsc
|
|
92
|
+
hgvsp
|
|
93
|
+
lof
|
|
94
|
+
lof_filter
|
|
95
|
+
polyphen_prediction
|
|
96
|
+
sift_prediction
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
"""
|
|
101
|
+
variables = {"variantId": variant_id,
|
|
102
|
+
"dataset": dataset}
|
|
103
|
+
resp = requests.post(GNOMAD_API,
|
|
104
|
+
json={"query": query,
|
|
105
|
+
"variables": variables},
|
|
106
|
+
timeout=30)
|
|
107
|
+
resp.raise_for_status()
|
|
108
|
+
data = resp.json().get("data", {}).get("variant")
|
|
109
|
+
|
|
110
|
+
if not data:
|
|
111
|
+
print(f"gnomAD: {variant_id} not found")
|
|
112
|
+
return {}
|
|
113
|
+
|
|
114
|
+
exome = data.get("exome") or {}
|
|
115
|
+
genome = data.get("genome") or {}
|
|
116
|
+
|
|
117
|
+
result = {
|
|
118
|
+
"variant_id": data["variant_id"],
|
|
119
|
+
"chrom": data["chrom"],
|
|
120
|
+
"pos": data["pos"],
|
|
121
|
+
"ref": data["ref"],
|
|
122
|
+
"alt": data["alt"],
|
|
123
|
+
"rsids": "; ".join(data.get("rsids", [])),
|
|
124
|
+
"exome_af": exome.get("af", 0),
|
|
125
|
+
"exome_ac": exome.get("ac", 0),
|
|
126
|
+
"exome_an": exome.get("an", 0),
|
|
127
|
+
"exome_hom": exome.get("ac_hom", 0),
|
|
128
|
+
"genome_af": genome.get("af", 0),
|
|
129
|
+
"genome_ac": genome.get("ac", 0),
|
|
130
|
+
"genome_an": genome.get("an", 0),
|
|
131
|
+
"genome_hom": genome.get("ac_hom", 0),
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
# 集団別頻度 (exome)
|
|
135
|
+
for pop in exome.get("populations", []):
|
|
136
|
+
result[f"exome_{pop['id']}_af"] = pop.get("af", 0)
|
|
137
|
+
|
|
138
|
+
print(f"gnomAD variant: {variant_id} "
|
|
139
|
+
f"(exome AF={result['exome_af']:.6f})")
|
|
140
|
+
return result
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## 2. 遺伝子制約スコア (pLI/LOEUF)
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
def gnomad_gene_constraint(gene_symbol,
|
|
147
|
+
dataset="gnomad_r4"):
|
|
148
|
+
"""
|
|
149
|
+
gnomAD — 遺伝子制約スコア取得。
|
|
150
|
+
|
|
151
|
+
Parameters:
|
|
152
|
+
gene_symbol: str — 遺伝子シンボル (例: "BRCA1")
|
|
153
|
+
dataset: str — データセット
|
|
154
|
+
"""
|
|
155
|
+
query = """
|
|
156
|
+
query geneConstraint($gene: String!,
|
|
157
|
+
$dataset: DatasetId!) {
|
|
158
|
+
gene(gene_symbol: $gene,
|
|
159
|
+
reference_genome: GRCh38) {
|
|
160
|
+
gene_id
|
|
161
|
+
symbol
|
|
162
|
+
gnomad_constraint {
|
|
163
|
+
exp_lof
|
|
164
|
+
exp_mis
|
|
165
|
+
exp_syn
|
|
166
|
+
obs_lof
|
|
167
|
+
obs_mis
|
|
168
|
+
obs_syn
|
|
169
|
+
oe_lof
|
|
170
|
+
oe_lof_lower
|
|
171
|
+
oe_lof_upper
|
|
172
|
+
oe_mis
|
|
173
|
+
oe_syn
|
|
174
|
+
lof_z
|
|
175
|
+
mis_z
|
|
176
|
+
syn_z
|
|
177
|
+
pLI
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
"""
|
|
182
|
+
variables = {"gene": gene_symbol,
|
|
183
|
+
"dataset": dataset}
|
|
184
|
+
resp = requests.post(GNOMAD_API,
|
|
185
|
+
json={"query": query,
|
|
186
|
+
"variables": variables},
|
|
187
|
+
timeout=30)
|
|
188
|
+
resp.raise_for_status()
|
|
189
|
+
gene = resp.json().get("data", {}).get("gene")
|
|
190
|
+
|
|
191
|
+
if not gene:
|
|
192
|
+
print(f"gnomAD gene: {gene_symbol} not found")
|
|
193
|
+
return {}
|
|
194
|
+
|
|
195
|
+
c = gene.get("gnomad_constraint") or {}
|
|
196
|
+
result = {
|
|
197
|
+
"gene_id": gene["gene_id"],
|
|
198
|
+
"symbol": gene["symbol"],
|
|
199
|
+
"pLI": c.get("pLI", None),
|
|
200
|
+
"LOEUF": c.get("oe_lof_upper", None),
|
|
201
|
+
"oe_lof": c.get("oe_lof", None),
|
|
202
|
+
"oe_mis": c.get("oe_mis", None),
|
|
203
|
+
"oe_syn": c.get("oe_syn", None),
|
|
204
|
+
"lof_z": c.get("lof_z", None),
|
|
205
|
+
"mis_z": c.get("mis_z", None),
|
|
206
|
+
"syn_z": c.get("syn_z", None),
|
|
207
|
+
"exp_lof": c.get("exp_lof", None),
|
|
208
|
+
"obs_lof": c.get("obs_lof", None),
|
|
209
|
+
}
|
|
210
|
+
pli = result.get("pLI") or 0
|
|
211
|
+
loeuf = result.get("LOEUF") or 0
|
|
212
|
+
print(f"gnomAD constraint: {gene_symbol} "
|
|
213
|
+
f"(pLI={pli:.3f}, LOEUF={loeuf:.3f})")
|
|
214
|
+
return result
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
## 3. リージョンクエリ
|
|
218
|
+
|
|
219
|
+
```python
|
|
220
|
+
def gnomad_region(chrom, start, stop,
|
|
221
|
+
dataset="gnomad_r4", limit=500):
|
|
222
|
+
"""
|
|
223
|
+
gnomAD — リージョンバリアント取得。
|
|
224
|
+
|
|
225
|
+
Parameters:
|
|
226
|
+
chrom: str — 染色体 (例: "1")
|
|
227
|
+
start: int — 開始位置 (GRCh38)
|
|
228
|
+
stop: int — 終了位置
|
|
229
|
+
dataset: str — データセット
|
|
230
|
+
limit: int — 最大結果数
|
|
231
|
+
"""
|
|
232
|
+
query = """
|
|
233
|
+
query regionVariants($chrom: String!,
|
|
234
|
+
$start: Int!,
|
|
235
|
+
$stop: Int!,
|
|
236
|
+
$dataset: DatasetId!) {
|
|
237
|
+
region(chrom: $chrom, start: $start,
|
|
238
|
+
stop: $stop,
|
|
239
|
+
reference_genome: GRCh38) {
|
|
240
|
+
variants(dataset: $dataset) {
|
|
241
|
+
variant_id
|
|
242
|
+
pos
|
|
243
|
+
ref
|
|
244
|
+
alt
|
|
245
|
+
exome { af ac an }
|
|
246
|
+
genome { af ac an }
|
|
247
|
+
rsids
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
"""
|
|
252
|
+
variables = {"chrom": chrom, "start": start,
|
|
253
|
+
"stop": stop, "dataset": dataset}
|
|
254
|
+
resp = requests.post(GNOMAD_API,
|
|
255
|
+
json={"query": query,
|
|
256
|
+
"variables": variables},
|
|
257
|
+
timeout=30)
|
|
258
|
+
resp.raise_for_status()
|
|
259
|
+
data = resp.json().get("data", {}).get("region", {})
|
|
260
|
+
|
|
261
|
+
rows = []
|
|
262
|
+
for v in data.get("variants", [])[:limit]:
|
|
263
|
+
exome = v.get("exome") or {}
|
|
264
|
+
genome = v.get("genome") or {}
|
|
265
|
+
rows.append({
|
|
266
|
+
"variant_id": v["variant_id"],
|
|
267
|
+
"pos": v["pos"],
|
|
268
|
+
"ref": v["ref"],
|
|
269
|
+
"alt": v["alt"],
|
|
270
|
+
"rsids": "; ".join(v.get("rsids", [])),
|
|
271
|
+
"exome_af": exome.get("af", 0),
|
|
272
|
+
"genome_af": genome.get("af", 0),
|
|
273
|
+
})
|
|
274
|
+
|
|
275
|
+
df = pd.DataFrame(rows)
|
|
276
|
+
print(f"gnomAD region: {chrom}:{start}-{stop} "
|
|
277
|
+
f"→ {len(df)} variants")
|
|
278
|
+
return df
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
## 4. gnomAD 統合パイプライン
|
|
282
|
+
|
|
283
|
+
```python
|
|
284
|
+
def gnomad_pipeline(gene_symbol, chrom, start, stop,
|
|
285
|
+
output_dir="results"):
|
|
286
|
+
"""
|
|
287
|
+
gnomAD 統合パイプライン。
|
|
288
|
+
|
|
289
|
+
Parameters:
|
|
290
|
+
gene_symbol: str — 遺伝子シンボル
|
|
291
|
+
chrom: str — 染色体
|
|
292
|
+
start: int — 開始位置
|
|
293
|
+
stop: int — 終了位置
|
|
294
|
+
output_dir: str — 出力ディレクトリ
|
|
295
|
+
"""
|
|
296
|
+
from pathlib import Path
|
|
297
|
+
output_dir = Path(output_dir)
|
|
298
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
299
|
+
|
|
300
|
+
# 1) 遺伝子制約スコア
|
|
301
|
+
constraint = gnomad_gene_constraint(gene_symbol)
|
|
302
|
+
pd.DataFrame([constraint]).to_csv(
|
|
303
|
+
output_dir / "gnomad_constraint.csv",
|
|
304
|
+
index=False)
|
|
305
|
+
|
|
306
|
+
# 2) リージョンバリアント
|
|
307
|
+
variants = gnomad_region(chrom, start, stop)
|
|
308
|
+
variants.to_csv(
|
|
309
|
+
output_dir / "gnomad_region.csv",
|
|
310
|
+
index=False)
|
|
311
|
+
|
|
312
|
+
# 3) レアバリアント抽出 (AF < 0.01)
|
|
313
|
+
if not variants.empty:
|
|
314
|
+
rare = variants[
|
|
315
|
+
(variants["exome_af"] < 0.01) |
|
|
316
|
+
(variants["genome_af"] < 0.01)
|
|
317
|
+
]
|
|
318
|
+
rare.to_csv(
|
|
319
|
+
output_dir / "gnomad_rare.csv",
|
|
320
|
+
index=False)
|
|
321
|
+
print(f" Rare variants: {len(rare)}")
|
|
322
|
+
|
|
323
|
+
print(f"gnomAD pipeline: {gene_symbol} "
|
|
324
|
+
f"→ {output_dir}")
|
|
325
|
+
return {"constraint": constraint,
|
|
326
|
+
"variants": variants}
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
---
|
|
330
|
+
|
|
331
|
+
## ToolUniverse 連携
|
|
332
|
+
|
|
333
|
+
| TU Key | ツール名 | 連携内容 |
|
|
334
|
+
|--------|---------|---------|
|
|
335
|
+
| `gnomad` | gnomAD | ゲノム集約データベース GraphQL (~7 tools) |
|
|
336
|
+
|
|
337
|
+
## パイプライン統合
|
|
338
|
+
|
|
339
|
+
```
|
|
340
|
+
variant-interpretation → gnomad-variants → variant-effect-prediction
|
|
341
|
+
(ClinVar バリアント) (gnomAD API) (VEP/CADD/REVEL)
|
|
342
|
+
│ │ ↓
|
|
343
|
+
civic-evidence ──────────────┘ rare-disease-genetics
|
|
344
|
+
(CIViC 臨床) │ (希少疾患遺伝学)
|
|
345
|
+
↓
|
|
346
|
+
opentargets-genetics
|
|
347
|
+
(OT 遺伝的関連)
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
## パイプライン出力
|
|
351
|
+
|
|
352
|
+
| ファイル | 説明 | 次スキル |
|
|
353
|
+
|---------|------|---------|
|
|
354
|
+
| `results/gnomad_constraint.csv` | 遺伝子制約 | → rare-disease-genetics |
|
|
355
|
+
| `results/gnomad_region.csv` | リージョンバリアント | → variant-interpretation |
|
|
356
|
+
| `results/gnomad_rare.csv` | レアバリアント | → variant-effect-prediction |
|
|
@@ -6,6 +6,10 @@ description: |
|
|
|
6
6
|
(NIH メタボロミクスリポジトリ) の 3 大メタボロミクス DB を統合した
|
|
7
7
|
代謝物同定、パスウェイマッピング、バイオマーカー発見、
|
|
8
8
|
RefMet 標準化命名パイプライン。13 の ToolUniverse SMCP ツールと連携。
|
|
9
|
+
tu_tools:
|
|
10
|
+
- key: metacyc
|
|
11
|
+
name: MetaCyc
|
|
12
|
+
description: 代謝パスウェイ・反応・化合物データベース
|
|
9
13
|
---
|
|
10
14
|
|
|
11
15
|
# Scientific Metabolomics Databases
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-opentargets-genetics
|
|
3
|
+
description: |
|
|
4
|
+
Open Targets Platform 遺伝学スキル。Open Targets Platform
|
|
5
|
+
GraphQL API を用いた標的-疾患アソシエーション・薬剤
|
|
6
|
+
エビデンス・L2G 遺伝的関連・ファーマコゲノミクス検索。
|
|
7
|
+
ToolUniverse 連携: opentarget。
|
|
8
|
+
tu_tools:
|
|
9
|
+
- key: opentarget
|
|
10
|
+
name: Open Targets
|
|
11
|
+
description: 標的-疾患アソシエーション GraphQL API
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
# Scientific Open Targets Genetics
|
|
15
|
+
|
|
16
|
+
Open Targets Platform GraphQL API を活用した標的-疾患
|
|
17
|
+
アソシエーションスコア取得・薬剤エビデンス検索・L2G
|
|
18
|
+
遺伝的関連パイプラインを提供する。
|
|
19
|
+
|
|
20
|
+
## When to Use
|
|
21
|
+
|
|
22
|
+
- 遺伝子 (標的) と疾患のアソシエーションスコアを検索するとき
|
|
23
|
+
- 薬剤エビデンスデータを取得するとき
|
|
24
|
+
- GWAS バリアントから遺伝子を L2G スコアでマッピングするとき
|
|
25
|
+
- 標的の安全性プロファイルを確認するとき
|
|
26
|
+
- ファーマコゲノミクスデータを検索するとき
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
## 1. 標的-疾患アソシエーション
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
import requests
|
|
36
|
+
import pandas as pd
|
|
37
|
+
|
|
38
|
+
OT_API = ("https://api.platform.opentargets.org"
|
|
39
|
+
"/api/v4/graphql")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def ot_target_disease_assoc(target_id, limit=25):
|
|
43
|
+
"""
|
|
44
|
+
Open Targets — 標的-疾患アソシエーション。
|
|
45
|
+
|
|
46
|
+
Parameters:
|
|
47
|
+
target_id: str — Ensembl Gene ID
|
|
48
|
+
(例: "ENSG00000012048" = BRCA1)
|
|
49
|
+
limit: int — 最大結果数
|
|
50
|
+
"""
|
|
51
|
+
query = """
|
|
52
|
+
query targetDisease($id: String!, $size: Int!) {
|
|
53
|
+
target(ensemblId: $id) {
|
|
54
|
+
id
|
|
55
|
+
approvedSymbol
|
|
56
|
+
associatedDiseases(page: {size: $size, index: 0}) {
|
|
57
|
+
count
|
|
58
|
+
rows {
|
|
59
|
+
disease { id name }
|
|
60
|
+
score
|
|
61
|
+
datatypeScores {
|
|
62
|
+
componentId: id
|
|
63
|
+
score
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
"""
|
|
70
|
+
variables = {"id": target_id, "size": limit}
|
|
71
|
+
resp = requests.post(OT_API,
|
|
72
|
+
json={"query": query,
|
|
73
|
+
"variables": variables},
|
|
74
|
+
timeout=30)
|
|
75
|
+
resp.raise_for_status()
|
|
76
|
+
data = resp.json()["data"]["target"]
|
|
77
|
+
|
|
78
|
+
rows = []
|
|
79
|
+
for r in data["associatedDiseases"]["rows"]:
|
|
80
|
+
row = {
|
|
81
|
+
"target_id": target_id,
|
|
82
|
+
"target_symbol": data["approvedSymbol"],
|
|
83
|
+
"disease_id": r["disease"]["id"],
|
|
84
|
+
"disease_name": r["disease"]["name"],
|
|
85
|
+
"overall_score": r["score"],
|
|
86
|
+
}
|
|
87
|
+
for dt in r["datatypeScores"]:
|
|
88
|
+
row[dt["componentId"]] = dt["score"]
|
|
89
|
+
rows.append(row)
|
|
90
|
+
|
|
91
|
+
df = pd.DataFrame(rows)
|
|
92
|
+
total = data["associatedDiseases"]["count"]
|
|
93
|
+
print(f"OT associations: {data['approvedSymbol']} "
|
|
94
|
+
f"→ {len(df)}/{total} diseases")
|
|
95
|
+
return df
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## 2. 薬剤エビデンス
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
def ot_drug_evidence(target_id, disease_id, limit=50):
|
|
102
|
+
"""
|
|
103
|
+
Open Targets — 薬剤エビデンス。
|
|
104
|
+
|
|
105
|
+
Parameters:
|
|
106
|
+
target_id: str — Ensembl Gene ID
|
|
107
|
+
disease_id: str — EFO Disease ID
|
|
108
|
+
(例: "EFO_0000305" = breast carcinoma)
|
|
109
|
+
limit: int — 最大結果数
|
|
110
|
+
"""
|
|
111
|
+
query = """
|
|
112
|
+
query drugEvidence($ensemblId: String!,
|
|
113
|
+
$efoId: String!,
|
|
114
|
+
$size: Int!) {
|
|
115
|
+
disease(efoId: $efoId) {
|
|
116
|
+
id
|
|
117
|
+
name
|
|
118
|
+
evidences(
|
|
119
|
+
ensemblIds: [$ensemblId]
|
|
120
|
+
datasourceIds: ["chembl"]
|
|
121
|
+
size: $size
|
|
122
|
+
) {
|
|
123
|
+
count
|
|
124
|
+
rows {
|
|
125
|
+
id
|
|
126
|
+
score
|
|
127
|
+
drug {
|
|
128
|
+
id name drugType
|
|
129
|
+
maximumClinicalTrialPhase
|
|
130
|
+
mechanismsOfAction {
|
|
131
|
+
rows { actionType }
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
clinicalPhase
|
|
135
|
+
clinicalStatus
|
|
136
|
+
urls { niceName url }
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
"""
|
|
142
|
+
variables = {"ensemblId": target_id,
|
|
143
|
+
"efoId": disease_id,
|
|
144
|
+
"size": limit}
|
|
145
|
+
resp = requests.post(OT_API,
|
|
146
|
+
json={"query": query,
|
|
147
|
+
"variables": variables},
|
|
148
|
+
timeout=30)
|
|
149
|
+
resp.raise_for_status()
|
|
150
|
+
data = resp.json()["data"]["disease"]
|
|
151
|
+
|
|
152
|
+
results = []
|
|
153
|
+
for ev in data["evidences"]["rows"]:
|
|
154
|
+
drug = ev.get("drug", {})
|
|
155
|
+
moas = drug.get("mechanismsOfAction", {})
|
|
156
|
+
moa_list = [m["actionType"]
|
|
157
|
+
for m in moas.get("rows", [])]
|
|
158
|
+
results.append({
|
|
159
|
+
"disease": data["name"],
|
|
160
|
+
"drug_id": drug.get("id", ""),
|
|
161
|
+
"drug_name": drug.get("name", ""),
|
|
162
|
+
"drug_type": drug.get("drugType", ""),
|
|
163
|
+
"max_phase": drug.get(
|
|
164
|
+
"maximumClinicalTrialPhase", 0),
|
|
165
|
+
"clinical_phase": ev.get("clinicalPhase", ""),
|
|
166
|
+
"clinical_status": ev.get(
|
|
167
|
+
"clinicalStatus", ""),
|
|
168
|
+
"moa": "; ".join(moa_list),
|
|
169
|
+
"score": ev.get("score", 0),
|
|
170
|
+
})
|
|
171
|
+
|
|
172
|
+
df = pd.DataFrame(results)
|
|
173
|
+
print(f"OT drug evidence: {len(df)} entries")
|
|
174
|
+
return df
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## 3. L2G 遺伝的関連 (Locus-to-Gene)
|
|
178
|
+
|
|
179
|
+
```python
|
|
180
|
+
def ot_l2g_variants(study_id, limit=50):
|
|
181
|
+
"""
|
|
182
|
+
Open Targets Genetics — L2G バリアント-遺伝子マッピング。
|
|
183
|
+
|
|
184
|
+
Parameters:
|
|
185
|
+
study_id: str — GWAS Study ID
|
|
186
|
+
(例: "GCST004988")
|
|
187
|
+
limit: int — 最大結果数
|
|
188
|
+
"""
|
|
189
|
+
# OT Genetics API
|
|
190
|
+
OT_GENETICS = ("https://api.genetics.opentargets.org"
|
|
191
|
+
"/graphql")
|
|
192
|
+
query = """
|
|
193
|
+
query l2g($studyId: String!, $size: Int!) {
|
|
194
|
+
studyLocus2GeneTable(studyId: $studyId,
|
|
195
|
+
pageSize: $size) {
|
|
196
|
+
rows {
|
|
197
|
+
gene { id symbol }
|
|
198
|
+
variant { id rsId }
|
|
199
|
+
yProbaModel
|
|
200
|
+
yProbaDistance
|
|
201
|
+
yProbaInteraction
|
|
202
|
+
yProbaMolecularQTL
|
|
203
|
+
yProbaPathogenicity
|
|
204
|
+
hasColoc
|
|
205
|
+
distanceToLocus
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
"""
|
|
210
|
+
variables = {"studyId": study_id, "size": limit}
|
|
211
|
+
resp = requests.post(OT_GENETICS,
|
|
212
|
+
json={"query": query,
|
|
213
|
+
"variables": variables},
|
|
214
|
+
timeout=30)
|
|
215
|
+
resp.raise_for_status()
|
|
216
|
+
data = resp.json()["data"]["studyLocus2GeneTable"]
|
|
217
|
+
|
|
218
|
+
rows = []
|
|
219
|
+
for r in data["rows"]:
|
|
220
|
+
rows.append({
|
|
221
|
+
"gene_id": r["gene"]["id"],
|
|
222
|
+
"gene_symbol": r["gene"]["symbol"],
|
|
223
|
+
"variant_id": r["variant"]["id"],
|
|
224
|
+
"rsid": r["variant"]["rsId"],
|
|
225
|
+
"l2g_score": r["yProbaModel"],
|
|
226
|
+
"distance_score": r["yProbaDistance"],
|
|
227
|
+
"interaction_score": r["yProbaInteraction"],
|
|
228
|
+
"qtl_score": r["yProbaMolecularQTL"],
|
|
229
|
+
"pathogenicity": r["yProbaPathogenicity"],
|
|
230
|
+
"has_coloc": r["hasColoc"],
|
|
231
|
+
})
|
|
232
|
+
|
|
233
|
+
df = pd.DataFrame(rows)
|
|
234
|
+
if not df.empty:
|
|
235
|
+
df = df.sort_values("l2g_score", ascending=False)
|
|
236
|
+
print(f"OT L2G: {study_id} → {len(df)} gene mappings")
|
|
237
|
+
return df
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
## 4. Open Targets 統合パイプライン
|
|
241
|
+
|
|
242
|
+
```python
|
|
243
|
+
def ot_pipeline(gene_symbol, ensembl_id,
|
|
244
|
+
output_dir="results"):
|
|
245
|
+
"""
|
|
246
|
+
Open Targets 統合パイプライン。
|
|
247
|
+
|
|
248
|
+
Parameters:
|
|
249
|
+
gene_symbol: str — 遺伝子シンボル (例: "BRCA1")
|
|
250
|
+
ensembl_id: str — Ensembl Gene ID
|
|
251
|
+
output_dir: str — 出力ディレクトリ
|
|
252
|
+
"""
|
|
253
|
+
from pathlib import Path
|
|
254
|
+
output_dir = Path(output_dir)
|
|
255
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
256
|
+
|
|
257
|
+
# 1) 標的-疾患アソシエーション
|
|
258
|
+
assoc = ot_target_disease_assoc(ensembl_id)
|
|
259
|
+
assoc.to_csv(output_dir / "ot_associations.csv",
|
|
260
|
+
index=False)
|
|
261
|
+
|
|
262
|
+
# 2) トップ疾患の薬剤エビデンス
|
|
263
|
+
if not assoc.empty:
|
|
264
|
+
top_disease = assoc.iloc[0]["disease_id"]
|
|
265
|
+
drugs = ot_drug_evidence(ensembl_id, top_disease)
|
|
266
|
+
drugs.to_csv(output_dir / "ot_drugs.csv",
|
|
267
|
+
index=False)
|
|
268
|
+
|
|
269
|
+
print(f"OT pipeline: {gene_symbol} → {output_dir}")
|
|
270
|
+
return {"associations": assoc}
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
---
|
|
274
|
+
|
|
275
|
+
## ToolUniverse 連携
|
|
276
|
+
|
|
277
|
+
| TU Key | ツール名 | 連携内容 |
|
|
278
|
+
|--------|---------|---------|
|
|
279
|
+
| `opentarget` | Open Targets | 標的-疾患アソシエーション GraphQL (~55 tools) |
|
|
280
|
+
|
|
281
|
+
## パイプライン統合
|
|
282
|
+
|
|
283
|
+
```
|
|
284
|
+
disease-research → opentargets-genetics → drug-target-profiling
|
|
285
|
+
(疾患遺伝子) (OT Platform API) (標的プロファイリング)
|
|
286
|
+
│ │ ↓
|
|
287
|
+
variant-interpretation ────┘ pharmacogenomics
|
|
288
|
+
(ClinVar/VEP) │ (薬理ゲノミクス)
|
|
289
|
+
↓
|
|
290
|
+
gnomad-variants
|
|
291
|
+
(集団頻度)
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
## パイプライン出力
|
|
295
|
+
|
|
296
|
+
| ファイル | 説明 | 次スキル |
|
|
297
|
+
|---------|------|---------|
|
|
298
|
+
| `results/ot_associations.csv` | 標的-疾患スコア | → disease-research |
|
|
299
|
+
| `results/ot_drugs.csv` | 薬剤エビデンス | → drug-target-profiling |
|
|
@@ -6,6 +6,10 @@ description: |
|
|
|
6
6
|
ネットワーク構築・解析パイプライン。GO/KEGG 富化、相互作用パートナー発見、
|
|
7
7
|
組織特異的ネットワーク (HumanBase)、化合物-標的ネットワーク対応。
|
|
8
8
|
14 の ToolUniverse SMCP ツールと連携。
|
|
9
|
+
tu_tools:
|
|
10
|
+
- key: intact
|
|
11
|
+
name: IntAct
|
|
12
|
+
description: 分子相互作用データベース (EBI)
|
|
9
13
|
---
|
|
10
14
|
|
|
11
15
|
# Scientific Protein Interaction Network
|