@nahisaho/satori 0.17.0 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -38
- package/package.json +1 -1
- package/src/.github/skills/scientific-alphafold-structures/SKILL.md +256 -0
- package/src/.github/skills/scientific-arrayexpress-expression/SKILL.md +264 -0
- package/src/.github/skills/scientific-civic-evidence/SKILL.md +292 -0
- package/src/.github/skills/scientific-compound-screening/SKILL.md +4 -0
- package/src/.github/skills/scientific-crossref-metadata/SKILL.md +313 -0
- package/src/.github/skills/scientific-depmap-dependencies/SKILL.md +239 -0
- package/src/.github/skills/scientific-disease-research/SKILL.md +4 -0
- package/src/.github/skills/scientific-drugbank-resources/SKILL.md +269 -0
- package/src/.github/skills/scientific-gnomad-variants/SKILL.md +356 -0
- package/src/.github/skills/scientific-gtex-tissue-expression/SKILL.md +271 -0
- package/src/.github/skills/scientific-gwas-catalog/SKILL.md +267 -0
- package/src/.github/skills/scientific-icgc-cancer-data/SKILL.md +351 -0
- package/src/.github/skills/scientific-metabolomics-databases/SKILL.md +4 -0
- package/src/.github/skills/scientific-opentargets-genetics/SKILL.md +299 -0
- package/src/.github/skills/scientific-pharmgkb-pgx/SKILL.md +306 -0
- package/src/.github/skills/scientific-protein-interaction-network/SKILL.md +4 -0
- package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +4 -0
- package/src/.github/skills/scientific-rcsb-pdb-search/SKILL.md +280 -0
- package/src/.github/skills/scientific-reactome-pathways/SKILL.md +242 -0
- package/src/.github/skills/scientific-semantic-scholar/SKILL.md +298 -0
- package/src/.github/skills/scientific-uniprot-proteome/SKILL.md +273 -0
- package/src/.github/skills/scientific-variant-interpretation/SKILL.md +4 -0
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-drugbank-resources
|
|
3
|
+
description: |
|
|
4
|
+
DrugBank リソーススキル。DrugBank API を用いた薬剤記述・
|
|
5
|
+
薬理情報・標的タンパク質・薬物相互作用検索。
|
|
6
|
+
ToolUniverse 連携: drugbank。
|
|
7
|
+
tu_tools:
|
|
8
|
+
- key: drugbank
|
|
9
|
+
name: DrugBank
|
|
10
|
+
description: 薬剤データベース API
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Scientific DrugBank Resources
|
|
14
|
+
|
|
15
|
+
DrugBank API を活用した薬剤記述・薬理情報 (MOA)・標的タンパク質
|
|
16
|
+
検索・薬物相互作用 (DDI) パイプラインを提供する。
|
|
17
|
+
|
|
18
|
+
## When to Use
|
|
19
|
+
|
|
20
|
+
- 薬剤の基本情報 (名前・分類・構造) を検索するとき
|
|
21
|
+
- 薬理メカニズム (MOA) を調べるとき
|
|
22
|
+
- 標的タンパク質から薬剤を逆引き検索するとき
|
|
23
|
+
- 薬物相互作用 (DDI) を確認するとき
|
|
24
|
+
- 薬剤の ADMET プロパティを取得するとき
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Quick Start
|
|
29
|
+
|
|
30
|
+
## 1. 薬剤検索・基本情報
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import requests
|
|
34
|
+
import pandas as pd
|
|
35
|
+
|
|
36
|
+
DRUGBANK_API = "https://api.drugbank.com/v1"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def drugbank_search(query, limit=25, api_key=None):
|
|
40
|
+
"""
|
|
41
|
+
DrugBank — 薬剤テキスト検索。
|
|
42
|
+
|
|
43
|
+
Parameters:
|
|
44
|
+
query: str — 検索クエリ (例: "imatinib")
|
|
45
|
+
limit: int — 最大結果数
|
|
46
|
+
api_key: str — DrugBank API キー
|
|
47
|
+
"""
|
|
48
|
+
headers = {}
|
|
49
|
+
if api_key:
|
|
50
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
51
|
+
|
|
52
|
+
url = f"{DRUGBANK_API}/drugs"
|
|
53
|
+
params = {"q": query, "per_page": limit}
|
|
54
|
+
resp = requests.get(url, params=params,
|
|
55
|
+
headers=headers, timeout=30)
|
|
56
|
+
resp.raise_for_status()
|
|
57
|
+
data = resp.json()
|
|
58
|
+
|
|
59
|
+
rows = []
|
|
60
|
+
for d in data:
|
|
61
|
+
rows.append({
|
|
62
|
+
"drugbank_id": d.get("drugbank_id", ""),
|
|
63
|
+
"name": d.get("name", ""),
|
|
64
|
+
"cas_number": d.get("cas_number", ""),
|
|
65
|
+
"drug_type": d.get("type", ""),
|
|
66
|
+
"state": d.get("state", ""),
|
|
67
|
+
"description": (d.get("description", "")
|
|
68
|
+
[:200]),
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
df = pd.DataFrame(rows)
|
|
72
|
+
print(f"DrugBank search: '{query}' → {len(df)} drugs")
|
|
73
|
+
return df
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def drugbank_drug_detail(drugbank_id, api_key=None):
|
|
77
|
+
"""
|
|
78
|
+
DrugBank — 薬剤詳細取得。
|
|
79
|
+
|
|
80
|
+
Parameters:
|
|
81
|
+
drugbank_id: str — DrugBank ID (例: "DB01254")
|
|
82
|
+
api_key: str — DrugBank API キー
|
|
83
|
+
"""
|
|
84
|
+
headers = {}
|
|
85
|
+
if api_key:
|
|
86
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
87
|
+
|
|
88
|
+
url = f"{DRUGBANK_API}/drugs/{drugbank_id}"
|
|
89
|
+
resp = requests.get(url, headers=headers, timeout=30)
|
|
90
|
+
resp.raise_for_status()
|
|
91
|
+
data = resp.json()
|
|
92
|
+
|
|
93
|
+
result = {
|
|
94
|
+
"drugbank_id": data.get("drugbank_id", ""),
|
|
95
|
+
"name": data.get("name", ""),
|
|
96
|
+
"description": data.get("description", ""),
|
|
97
|
+
"indication": data.get("indication", ""),
|
|
98
|
+
"pharmacodynamics": data.get(
|
|
99
|
+
"pharmacodynamics", ""),
|
|
100
|
+
"mechanism_of_action": data.get(
|
|
101
|
+
"mechanism_of_action", ""),
|
|
102
|
+
"absorption": data.get("absorption", ""),
|
|
103
|
+
"half_life": data.get("half_life", ""),
|
|
104
|
+
"protein_binding": data.get(
|
|
105
|
+
"protein_binding", ""),
|
|
106
|
+
"molecular_weight": data.get(
|
|
107
|
+
"average_molecular_weight", ""),
|
|
108
|
+
}
|
|
109
|
+
return result
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## 2. 標的タンパク質検索
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
def drugbank_targets(drugbank_id, api_key=None):
|
|
116
|
+
"""
|
|
117
|
+
DrugBank — 薬剤の標的タンパク質取得。
|
|
118
|
+
|
|
119
|
+
Parameters:
|
|
120
|
+
drugbank_id: str — DrugBank ID
|
|
121
|
+
api_key: str — DrugBank API キー
|
|
122
|
+
"""
|
|
123
|
+
headers = {}
|
|
124
|
+
if api_key:
|
|
125
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
126
|
+
|
|
127
|
+
url = f"{DRUGBANK_API}/drugs/{drugbank_id}/targets"
|
|
128
|
+
resp = requests.get(url, headers=headers, timeout=30)
|
|
129
|
+
resp.raise_for_status()
|
|
130
|
+
data = resp.json()
|
|
131
|
+
|
|
132
|
+
rows = []
|
|
133
|
+
for t in data:
|
|
134
|
+
polypeptide = t.get("polypeptide", {}) or {}
|
|
135
|
+
rows.append({
|
|
136
|
+
"drugbank_id": drugbank_id,
|
|
137
|
+
"target_name": t.get("name", ""),
|
|
138
|
+
"organism": t.get("organism", ""),
|
|
139
|
+
"known_action": t.get("known_action", ""),
|
|
140
|
+
"gene_name": polypeptide.get(
|
|
141
|
+
"gene_name", ""),
|
|
142
|
+
"uniprot_id": polypeptide.get(
|
|
143
|
+
"external_identifiers", {}).get(
|
|
144
|
+
"UniProtKB", ""),
|
|
145
|
+
})
|
|
146
|
+
|
|
147
|
+
df = pd.DataFrame(rows)
|
|
148
|
+
print(f"DrugBank targets: {drugbank_id} "
|
|
149
|
+
f"→ {len(df)} targets")
|
|
150
|
+
return df
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## 3. 薬物相互作用 (DDI)
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
def drugbank_interactions(drugbank_id, api_key=None):
|
|
157
|
+
"""
|
|
158
|
+
DrugBank — 薬物相互作用取得。
|
|
159
|
+
|
|
160
|
+
Parameters:
|
|
161
|
+
drugbank_id: str — DrugBank ID
|
|
162
|
+
api_key: str — DrugBank API キー
|
|
163
|
+
"""
|
|
164
|
+
headers = {}
|
|
165
|
+
if api_key:
|
|
166
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
167
|
+
|
|
168
|
+
url = (f"{DRUGBANK_API}/drugs/"
|
|
169
|
+
f"{drugbank_id}/drug_interactions")
|
|
170
|
+
resp = requests.get(url, headers=headers, timeout=30)
|
|
171
|
+
resp.raise_for_status()
|
|
172
|
+
data = resp.json()
|
|
173
|
+
|
|
174
|
+
rows = []
|
|
175
|
+
for inter in data:
|
|
176
|
+
rows.append({
|
|
177
|
+
"drug_a": drugbank_id,
|
|
178
|
+
"drug_b_id": inter.get(
|
|
179
|
+
"drugbank_id", ""),
|
|
180
|
+
"drug_b_name": inter.get("name", ""),
|
|
181
|
+
"description": inter.get(
|
|
182
|
+
"description", "")[:300],
|
|
183
|
+
})
|
|
184
|
+
|
|
185
|
+
df = pd.DataFrame(rows)
|
|
186
|
+
print(f"DrugBank DDI: {drugbank_id} "
|
|
187
|
+
f"→ {len(df)} interactions")
|
|
188
|
+
return df
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## 4. DrugBank 統合パイプライン
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
def drugbank_pipeline(drug_name, api_key=None,
|
|
195
|
+
output_dir="results"):
|
|
196
|
+
"""
|
|
197
|
+
DrugBank 統合パイプライン。
|
|
198
|
+
|
|
199
|
+
Parameters:
|
|
200
|
+
drug_name: str — 薬剤名 (例: "imatinib")
|
|
201
|
+
api_key: str — DrugBank API キー
|
|
202
|
+
output_dir: str — 出力ディレクトリ
|
|
203
|
+
"""
|
|
204
|
+
from pathlib import Path
|
|
205
|
+
output_dir = Path(output_dir)
|
|
206
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
207
|
+
|
|
208
|
+
# 1) 検索
|
|
209
|
+
results = drugbank_search(drug_name,
|
|
210
|
+
api_key=api_key)
|
|
211
|
+
results.to_csv(output_dir / "drugbank_search.csv",
|
|
212
|
+
index=False)
|
|
213
|
+
|
|
214
|
+
if results.empty:
|
|
215
|
+
print(f"DrugBank: '{drug_name}' not found")
|
|
216
|
+
return {"search": results}
|
|
217
|
+
|
|
218
|
+
db_id = results.iloc[0]["drugbank_id"]
|
|
219
|
+
|
|
220
|
+
# 2) 詳細
|
|
221
|
+
detail = drugbank_drug_detail(db_id,
|
|
222
|
+
api_key=api_key)
|
|
223
|
+
pd.DataFrame([detail]).to_csv(
|
|
224
|
+
output_dir / "drugbank_detail.csv",
|
|
225
|
+
index=False)
|
|
226
|
+
|
|
227
|
+
# 3) 標的
|
|
228
|
+
targets = drugbank_targets(db_id,
|
|
229
|
+
api_key=api_key)
|
|
230
|
+
targets.to_csv(output_dir / "drugbank_targets.csv",
|
|
231
|
+
index=False)
|
|
232
|
+
|
|
233
|
+
# 4) DDI
|
|
234
|
+
ddi = drugbank_interactions(db_id,
|
|
235
|
+
api_key=api_key)
|
|
236
|
+
ddi.to_csv(output_dir / "drugbank_ddi.csv",
|
|
237
|
+
index=False)
|
|
238
|
+
|
|
239
|
+
print(f"DrugBank pipeline: {drug_name} → {output_dir}")
|
|
240
|
+
return {"detail": detail, "targets": targets,
|
|
241
|
+
"ddi": ddi}
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
---
|
|
245
|
+
|
|
246
|
+
## ToolUniverse 連携
|
|
247
|
+
|
|
248
|
+
| TU Key | ツール名 | 連携内容 |
|
|
249
|
+
|--------|---------|---------|
|
|
250
|
+
| `drugbank` | DrugBank | 薬剤データベース API |
|
|
251
|
+
|
|
252
|
+
## パイプライン統合
|
|
253
|
+
|
|
254
|
+
```
|
|
255
|
+
drug-target-profiling → drugbank-resources → admet-pharmacokinetics
|
|
256
|
+
(標的プロファイリング) (DrugBank API) (ADMET 予測)
|
|
257
|
+
│ │ ↓
|
|
258
|
+
opentargets-genetics ──────────┘ compound-screening
|
|
259
|
+
(OT 薬剤エビデンス) (ZINC 化合物検索)
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
## パイプライン出力
|
|
263
|
+
|
|
264
|
+
| ファイル | 説明 | 次スキル |
|
|
265
|
+
|---------|------|---------|
|
|
266
|
+
| `results/drugbank_search.csv` | 薬剤検索結果 | → drug-target-profiling |
|
|
267
|
+
| `results/drugbank_detail.csv` | 薬剤詳細 | → admet-pharmacokinetics |
|
|
268
|
+
| `results/drugbank_targets.csv` | 標的タンパク質 | → protein-interaction-network |
|
|
269
|
+
| `results/drugbank_ddi.csv` | 薬物相互作用 | → pharmacogenomics |
|
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-gnomad-variants
|
|
3
|
+
description: |
|
|
4
|
+
gnomAD バリアントスキル。gnomAD (Genome Aggregation Database)
|
|
5
|
+
GraphQL API を用いた集団アレル頻度・遺伝子制約スコア
|
|
6
|
+
(pLI/LOEUF)・リージョンクエリ・トランスクリプトレベル
|
|
7
|
+
データ取得。ToolUniverse 連携: gnomad。
|
|
8
|
+
tu_tools:
|
|
9
|
+
- key: gnomad
|
|
10
|
+
name: gnomAD
|
|
11
|
+
description: ゲノム集約データベース GraphQL API
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
# Scientific gnomAD Variants
|
|
15
|
+
|
|
16
|
+
gnomAD (Genome Aggregation Database) GraphQL API を活用した
|
|
17
|
+
集団アレル頻度取得・遺伝子制約スコア (pLI/LOEUF/Z-scores)・
|
|
18
|
+
リージョンクエリ・トランスクリプトレベルデータパイプラインを
|
|
19
|
+
提供する。
|
|
20
|
+
|
|
21
|
+
## When to Use
|
|
22
|
+
|
|
23
|
+
- バリアントの集団アレル頻度 (AF) を確認するとき
|
|
24
|
+
- 遺伝子の LoF 不耐性 (pLI/LOEUF) を評価するとき
|
|
25
|
+
- ゲノムリージョン内のバリアントを列挙するとき
|
|
26
|
+
- 集団別 (gnomAD v4 exome/genome) 頻度を比較するとき
|
|
27
|
+
- ClinVar/VEP アノテーションと頻度を統合するとき
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Quick Start
|
|
32
|
+
|
|
33
|
+
## 1. バリアント集団頻度
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import requests
|
|
37
|
+
import pandas as pd
|
|
38
|
+
|
|
39
|
+
GNOMAD_API = "https://gnomad.broadinstitute.org/api"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def gnomad_variant(variant_id, dataset="gnomad_r4"):
|
|
43
|
+
"""
|
|
44
|
+
gnomAD — バリアント集団頻度取得。
|
|
45
|
+
|
|
46
|
+
Parameters:
|
|
47
|
+
variant_id: str — バリアント ID
|
|
48
|
+
(例: "1-55516888-G-A", chr-pos-ref-alt)
|
|
49
|
+
dataset: str — データセット
|
|
50
|
+
(例: "gnomad_r4", "gnomad_r3")
|
|
51
|
+
"""
|
|
52
|
+
query = """
|
|
53
|
+
query gnomadVariant($variantId: String!,
|
|
54
|
+
$dataset: DatasetId!) {
|
|
55
|
+
variant(variantId: $variantId,
|
|
56
|
+
dataset: $dataset) {
|
|
57
|
+
variant_id
|
|
58
|
+
chrom
|
|
59
|
+
pos
|
|
60
|
+
ref
|
|
61
|
+
alt
|
|
62
|
+
exome {
|
|
63
|
+
ac
|
|
64
|
+
an
|
|
65
|
+
af
|
|
66
|
+
ac_hom
|
|
67
|
+
populations {
|
|
68
|
+
id
|
|
69
|
+
ac
|
|
70
|
+
an
|
|
71
|
+
af
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
genome {
|
|
75
|
+
ac
|
|
76
|
+
an
|
|
77
|
+
af
|
|
78
|
+
ac_hom
|
|
79
|
+
populations {
|
|
80
|
+
id
|
|
81
|
+
ac
|
|
82
|
+
an
|
|
83
|
+
af
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
rsids
|
|
87
|
+
transcript_consequences {
|
|
88
|
+
gene_symbol
|
|
89
|
+
transcript_id
|
|
90
|
+
consequence
|
|
91
|
+
hgvsc
|
|
92
|
+
hgvsp
|
|
93
|
+
lof
|
|
94
|
+
lof_filter
|
|
95
|
+
polyphen_prediction
|
|
96
|
+
sift_prediction
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
"""
|
|
101
|
+
variables = {"variantId": variant_id,
|
|
102
|
+
"dataset": dataset}
|
|
103
|
+
resp = requests.post(GNOMAD_API,
|
|
104
|
+
json={"query": query,
|
|
105
|
+
"variables": variables},
|
|
106
|
+
timeout=30)
|
|
107
|
+
resp.raise_for_status()
|
|
108
|
+
data = resp.json().get("data", {}).get("variant")
|
|
109
|
+
|
|
110
|
+
if not data:
|
|
111
|
+
print(f"gnomAD: {variant_id} not found")
|
|
112
|
+
return {}
|
|
113
|
+
|
|
114
|
+
exome = data.get("exome") or {}
|
|
115
|
+
genome = data.get("genome") or {}
|
|
116
|
+
|
|
117
|
+
result = {
|
|
118
|
+
"variant_id": data["variant_id"],
|
|
119
|
+
"chrom": data["chrom"],
|
|
120
|
+
"pos": data["pos"],
|
|
121
|
+
"ref": data["ref"],
|
|
122
|
+
"alt": data["alt"],
|
|
123
|
+
"rsids": "; ".join(data.get("rsids", [])),
|
|
124
|
+
"exome_af": exome.get("af", 0),
|
|
125
|
+
"exome_ac": exome.get("ac", 0),
|
|
126
|
+
"exome_an": exome.get("an", 0),
|
|
127
|
+
"exome_hom": exome.get("ac_hom", 0),
|
|
128
|
+
"genome_af": genome.get("af", 0),
|
|
129
|
+
"genome_ac": genome.get("ac", 0),
|
|
130
|
+
"genome_an": genome.get("an", 0),
|
|
131
|
+
"genome_hom": genome.get("ac_hom", 0),
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
# 集団別頻度 (exome)
|
|
135
|
+
for pop in exome.get("populations", []):
|
|
136
|
+
result[f"exome_{pop['id']}_af"] = pop.get("af", 0)
|
|
137
|
+
|
|
138
|
+
print(f"gnomAD variant: {variant_id} "
|
|
139
|
+
f"(exome AF={result['exome_af']:.6f})")
|
|
140
|
+
return result
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## 2. 遺伝子制約スコア (pLI/LOEUF)
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
def gnomad_gene_constraint(gene_symbol,
|
|
147
|
+
dataset="gnomad_r4"):
|
|
148
|
+
"""
|
|
149
|
+
gnomAD — 遺伝子制約スコア取得。
|
|
150
|
+
|
|
151
|
+
Parameters:
|
|
152
|
+
gene_symbol: str — 遺伝子シンボル (例: "BRCA1")
|
|
153
|
+
dataset: str — データセット
|
|
154
|
+
"""
|
|
155
|
+
query = """
|
|
156
|
+
query geneConstraint($gene: String!,
|
|
157
|
+
$dataset: DatasetId!) {
|
|
158
|
+
gene(gene_symbol: $gene,
|
|
159
|
+
reference_genome: GRCh38) {
|
|
160
|
+
gene_id
|
|
161
|
+
symbol
|
|
162
|
+
gnomad_constraint {
|
|
163
|
+
exp_lof
|
|
164
|
+
exp_mis
|
|
165
|
+
exp_syn
|
|
166
|
+
obs_lof
|
|
167
|
+
obs_mis
|
|
168
|
+
obs_syn
|
|
169
|
+
oe_lof
|
|
170
|
+
oe_lof_lower
|
|
171
|
+
oe_lof_upper
|
|
172
|
+
oe_mis
|
|
173
|
+
oe_syn
|
|
174
|
+
lof_z
|
|
175
|
+
mis_z
|
|
176
|
+
syn_z
|
|
177
|
+
pLI
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
"""
|
|
182
|
+
variables = {"gene": gene_symbol,
|
|
183
|
+
"dataset": dataset}
|
|
184
|
+
resp = requests.post(GNOMAD_API,
|
|
185
|
+
json={"query": query,
|
|
186
|
+
"variables": variables},
|
|
187
|
+
timeout=30)
|
|
188
|
+
resp.raise_for_status()
|
|
189
|
+
gene = resp.json().get("data", {}).get("gene")
|
|
190
|
+
|
|
191
|
+
if not gene:
|
|
192
|
+
print(f"gnomAD gene: {gene_symbol} not found")
|
|
193
|
+
return {}
|
|
194
|
+
|
|
195
|
+
c = gene.get("gnomad_constraint") or {}
|
|
196
|
+
result = {
|
|
197
|
+
"gene_id": gene["gene_id"],
|
|
198
|
+
"symbol": gene["symbol"],
|
|
199
|
+
"pLI": c.get("pLI", None),
|
|
200
|
+
"LOEUF": c.get("oe_lof_upper", None),
|
|
201
|
+
"oe_lof": c.get("oe_lof", None),
|
|
202
|
+
"oe_mis": c.get("oe_mis", None),
|
|
203
|
+
"oe_syn": c.get("oe_syn", None),
|
|
204
|
+
"lof_z": c.get("lof_z", None),
|
|
205
|
+
"mis_z": c.get("mis_z", None),
|
|
206
|
+
"syn_z": c.get("syn_z", None),
|
|
207
|
+
"exp_lof": c.get("exp_lof", None),
|
|
208
|
+
"obs_lof": c.get("obs_lof", None),
|
|
209
|
+
}
|
|
210
|
+
pli = result.get("pLI") or 0
|
|
211
|
+
loeuf = result.get("LOEUF") or 0
|
|
212
|
+
print(f"gnomAD constraint: {gene_symbol} "
|
|
213
|
+
f"(pLI={pli:.3f}, LOEUF={loeuf:.3f})")
|
|
214
|
+
return result
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
## 3. リージョンクエリ
|
|
218
|
+
|
|
219
|
+
```python
|
|
220
|
+
def gnomad_region(chrom, start, stop,
|
|
221
|
+
dataset="gnomad_r4", limit=500):
|
|
222
|
+
"""
|
|
223
|
+
gnomAD — リージョンバリアント取得。
|
|
224
|
+
|
|
225
|
+
Parameters:
|
|
226
|
+
chrom: str — 染色体 (例: "1")
|
|
227
|
+
start: int — 開始位置 (GRCh38)
|
|
228
|
+
stop: int — 終了位置
|
|
229
|
+
dataset: str — データセット
|
|
230
|
+
limit: int — 最大結果数
|
|
231
|
+
"""
|
|
232
|
+
query = """
|
|
233
|
+
query regionVariants($chrom: String!,
|
|
234
|
+
$start: Int!,
|
|
235
|
+
$stop: Int!,
|
|
236
|
+
$dataset: DatasetId!) {
|
|
237
|
+
region(chrom: $chrom, start: $start,
|
|
238
|
+
stop: $stop,
|
|
239
|
+
reference_genome: GRCh38) {
|
|
240
|
+
variants(dataset: $dataset) {
|
|
241
|
+
variant_id
|
|
242
|
+
pos
|
|
243
|
+
ref
|
|
244
|
+
alt
|
|
245
|
+
exome { af ac an }
|
|
246
|
+
genome { af ac an }
|
|
247
|
+
rsids
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
"""
|
|
252
|
+
variables = {"chrom": chrom, "start": start,
|
|
253
|
+
"stop": stop, "dataset": dataset}
|
|
254
|
+
resp = requests.post(GNOMAD_API,
|
|
255
|
+
json={"query": query,
|
|
256
|
+
"variables": variables},
|
|
257
|
+
timeout=30)
|
|
258
|
+
resp.raise_for_status()
|
|
259
|
+
data = resp.json().get("data", {}).get("region", {})
|
|
260
|
+
|
|
261
|
+
rows = []
|
|
262
|
+
for v in data.get("variants", [])[:limit]:
|
|
263
|
+
exome = v.get("exome") or {}
|
|
264
|
+
genome = v.get("genome") or {}
|
|
265
|
+
rows.append({
|
|
266
|
+
"variant_id": v["variant_id"],
|
|
267
|
+
"pos": v["pos"],
|
|
268
|
+
"ref": v["ref"],
|
|
269
|
+
"alt": v["alt"],
|
|
270
|
+
"rsids": "; ".join(v.get("rsids", [])),
|
|
271
|
+
"exome_af": exome.get("af", 0),
|
|
272
|
+
"genome_af": genome.get("af", 0),
|
|
273
|
+
})
|
|
274
|
+
|
|
275
|
+
df = pd.DataFrame(rows)
|
|
276
|
+
print(f"gnomAD region: {chrom}:{start}-{stop} "
|
|
277
|
+
f"→ {len(df)} variants")
|
|
278
|
+
return df
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
## 4. gnomAD 統合パイプライン
|
|
282
|
+
|
|
283
|
+
```python
|
|
284
|
+
def gnomad_pipeline(gene_symbol, chrom, start, stop,
|
|
285
|
+
output_dir="results"):
|
|
286
|
+
"""
|
|
287
|
+
gnomAD 統合パイプライン。
|
|
288
|
+
|
|
289
|
+
Parameters:
|
|
290
|
+
gene_symbol: str — 遺伝子シンボル
|
|
291
|
+
chrom: str — 染色体
|
|
292
|
+
start: int — 開始位置
|
|
293
|
+
stop: int — 終了位置
|
|
294
|
+
output_dir: str — 出力ディレクトリ
|
|
295
|
+
"""
|
|
296
|
+
from pathlib import Path
|
|
297
|
+
output_dir = Path(output_dir)
|
|
298
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
299
|
+
|
|
300
|
+
# 1) 遺伝子制約スコア
|
|
301
|
+
constraint = gnomad_gene_constraint(gene_symbol)
|
|
302
|
+
pd.DataFrame([constraint]).to_csv(
|
|
303
|
+
output_dir / "gnomad_constraint.csv",
|
|
304
|
+
index=False)
|
|
305
|
+
|
|
306
|
+
# 2) リージョンバリアント
|
|
307
|
+
variants = gnomad_region(chrom, start, stop)
|
|
308
|
+
variants.to_csv(
|
|
309
|
+
output_dir / "gnomad_region.csv",
|
|
310
|
+
index=False)
|
|
311
|
+
|
|
312
|
+
# 3) レアバリアント抽出 (AF < 0.01)
|
|
313
|
+
if not variants.empty:
|
|
314
|
+
rare = variants[
|
|
315
|
+
(variants["exome_af"] < 0.01) |
|
|
316
|
+
(variants["genome_af"] < 0.01)
|
|
317
|
+
]
|
|
318
|
+
rare.to_csv(
|
|
319
|
+
output_dir / "gnomad_rare.csv",
|
|
320
|
+
index=False)
|
|
321
|
+
print(f" Rare variants: {len(rare)}")
|
|
322
|
+
|
|
323
|
+
print(f"gnomAD pipeline: {gene_symbol} "
|
|
324
|
+
f"→ {output_dir}")
|
|
325
|
+
return {"constraint": constraint,
|
|
326
|
+
"variants": variants}
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
---
|
|
330
|
+
|
|
331
|
+
## ToolUniverse 連携
|
|
332
|
+
|
|
333
|
+
| TU Key | ツール名 | 連携内容 |
|
|
334
|
+
|--------|---------|---------|
|
|
335
|
+
| `gnomad` | gnomAD | ゲノム集約データベース GraphQL (~7 tools) |
|
|
336
|
+
|
|
337
|
+
## パイプライン統合
|
|
338
|
+
|
|
339
|
+
```
|
|
340
|
+
variant-interpretation → gnomad-variants → variant-effect-prediction
|
|
341
|
+
(ClinVar バリアント) (gnomAD API) (VEP/CADD/REVEL)
|
|
342
|
+
│ │ ↓
|
|
343
|
+
civic-evidence ──────────────┘ rare-disease-genetics
|
|
344
|
+
(CIViC 臨床) │ (希少疾患遺伝学)
|
|
345
|
+
↓
|
|
346
|
+
opentargets-genetics
|
|
347
|
+
(OT 遺伝的関連)
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
## パイプライン出力
|
|
351
|
+
|
|
352
|
+
| ファイル | 説明 | 次スキル |
|
|
353
|
+
|---------|------|---------|
|
|
354
|
+
| `results/gnomad_constraint.csv` | 遺伝子制約 | → rare-disease-genetics |
|
|
355
|
+
| `results/gnomad_region.csv` | リージョンバリアント | → variant-interpretation |
|
|
356
|
+
| `results/gnomad_rare.csv` | レアバリアント | → variant-effect-prediction |
|