@nahisaho/satori 0.17.0 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -38
- package/package.json +1 -1
- package/src/.github/skills/scientific-alphafold-structures/SKILL.md +256 -0
- package/src/.github/skills/scientific-arrayexpress-expression/SKILL.md +264 -0
- package/src/.github/skills/scientific-civic-evidence/SKILL.md +292 -0
- package/src/.github/skills/scientific-compound-screening/SKILL.md +4 -0
- package/src/.github/skills/scientific-crossref-metadata/SKILL.md +313 -0
- package/src/.github/skills/scientific-depmap-dependencies/SKILL.md +239 -0
- package/src/.github/skills/scientific-disease-research/SKILL.md +4 -0
- package/src/.github/skills/scientific-drugbank-resources/SKILL.md +269 -0
- package/src/.github/skills/scientific-gnomad-variants/SKILL.md +356 -0
- package/src/.github/skills/scientific-gtex-tissue-expression/SKILL.md +271 -0
- package/src/.github/skills/scientific-gwas-catalog/SKILL.md +267 -0
- package/src/.github/skills/scientific-icgc-cancer-data/SKILL.md +351 -0
- package/src/.github/skills/scientific-metabolomics-databases/SKILL.md +4 -0
- package/src/.github/skills/scientific-opentargets-genetics/SKILL.md +299 -0
- package/src/.github/skills/scientific-pharmgkb-pgx/SKILL.md +306 -0
- package/src/.github/skills/scientific-protein-interaction-network/SKILL.md +4 -0
- package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +4 -0
- package/src/.github/skills/scientific-rcsb-pdb-search/SKILL.md +280 -0
- package/src/.github/skills/scientific-reactome-pathways/SKILL.md +242 -0
- package/src/.github/skills/scientific-semantic-scholar/SKILL.md +298 -0
- package/src/.github/skills/scientific-uniprot-proteome/SKILL.md +273 -0
- package/src/.github/skills/scientific-variant-interpretation/SKILL.md +4 -0
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-gtex-tissue-expression
|
|
3
|
+
description: |
|
|
4
|
+
GTEx 組織発現スキル。GTEx Portal REST API v2 による
|
|
5
|
+
組織特異的遺伝子発現パターン解析・eQTL ルックアップ・
|
|
6
|
+
多組織比較。直接 API (ToolUniverse 非連携)。
|
|
7
|
+
tu_tools: []
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Scientific GTEx Tissue Expression
|
|
11
|
+
|
|
12
|
+
GTEx (Genotype-Tissue Expression) Portal REST API v2 を活用した
|
|
13
|
+
組織特異的遺伝子発現解析・eQTL 検索・多組織比較パイプライン
|
|
14
|
+
を提供する。
|
|
15
|
+
|
|
16
|
+
## When to Use
|
|
17
|
+
|
|
18
|
+
- 遺伝子の組織特異的発現パターンを調べるとき
|
|
19
|
+
- 特定組織における eQTL (発現量的形質遺伝子座) を検索するとき
|
|
20
|
+
- 複数組織間で遺伝子発現レベルを比較するとき
|
|
21
|
+
- TPM (Transcripts Per Million) 発現データを取得するとき
|
|
22
|
+
- バリアントが遺伝子発現に与える影響を評価するとき
|
|
23
|
+
- 組織間の遺伝子共発現パターンを分析するとき
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
## 1. 組織特異的遺伝子発現取得
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import requests
|
|
33
|
+
import pandas as pd
|
|
34
|
+
|
|
35
|
+
GTEX_BASE = "https://gtexportal.org/api/v2"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def gtex_gene_expression(gene_id, tissue=None):
|
|
39
|
+
"""
|
|
40
|
+
GTEx — 組織別遺伝子発現 (中央値 TPM) 取得。
|
|
41
|
+
|
|
42
|
+
Parameters:
|
|
43
|
+
gene_id: str — 遺伝子シンボル or Ensembl ID
|
|
44
|
+
(例: "BRCA1", "ENSG00000012048")
|
|
45
|
+
tissue: str — 組織 ID (None で全組織)
|
|
46
|
+
(例: "Breast_Mammary_Tissue")
|
|
47
|
+
"""
|
|
48
|
+
url = f"{GTEX_BASE}/expression/medianGeneExpression"
|
|
49
|
+
params = {
|
|
50
|
+
"gencodeId": gene_id,
|
|
51
|
+
"datasetId": "gtex_v8",
|
|
52
|
+
}
|
|
53
|
+
if tissue:
|
|
54
|
+
params["tissueSiteDetailId"] = tissue
|
|
55
|
+
|
|
56
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
57
|
+
resp.raise_for_status()
|
|
58
|
+
data = resp.json()
|
|
59
|
+
|
|
60
|
+
results = []
|
|
61
|
+
for item in data.get("data", []):
|
|
62
|
+
results.append({
|
|
63
|
+
"gene_symbol": item.get("geneSymbol", ""),
|
|
64
|
+
"gencode_id": item.get("gencodeId", ""),
|
|
65
|
+
"tissue": item.get("tissueSiteDetailId", ""),
|
|
66
|
+
"tissue_name": item.get("tissueSiteDetail", ""),
|
|
67
|
+
"median_tpm": item.get("median", 0),
|
|
68
|
+
"sample_count": item.get("numSamples", 0),
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
df = pd.DataFrame(results)
|
|
72
|
+
if not df.empty:
|
|
73
|
+
df = df.sort_values("median_tpm", ascending=False)
|
|
74
|
+
|
|
75
|
+
print(f"GTEx expression: {gene_id} → "
|
|
76
|
+
f"{len(df)} tissues")
|
|
77
|
+
return df
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def gtex_top_tissues(gene_id, top_n=10):
|
|
81
|
+
"""
|
|
82
|
+
GTEx — 発現量上位組織。
|
|
83
|
+
|
|
84
|
+
Parameters:
|
|
85
|
+
gene_id: str — 遺伝子シンボル or Ensembl ID
|
|
86
|
+
top_n: int — 上位組織数
|
|
87
|
+
"""
|
|
88
|
+
df = gtex_gene_expression(gene_id)
|
|
89
|
+
top = df.head(top_n) if not df.empty else df
|
|
90
|
+
print(f"GTEx top {top_n} tissues for {gene_id}:")
|
|
91
|
+
for _, row in top.iterrows():
|
|
92
|
+
print(f" {row['tissue_name']}: "
|
|
93
|
+
f"{row['median_tpm']:.2f} TPM "
|
|
94
|
+
f"(n={row['sample_count']})")
|
|
95
|
+
return top
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## 2. eQTL 検索
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
def gtex_eqtl_lookup(gene_id, tissue, variant_id=None):
|
|
102
|
+
"""
|
|
103
|
+
GTEx — eQTL ルックアップ。
|
|
104
|
+
|
|
105
|
+
Parameters:
|
|
106
|
+
gene_id: str — 遺伝子シンボル or Ensembl ID
|
|
107
|
+
tissue: str — 組織 ID
|
|
108
|
+
(例: "Liver", "Whole_Blood")
|
|
109
|
+
variant_id: str — バリアント ID (任意)
|
|
110
|
+
(例: "rs12345")
|
|
111
|
+
"""
|
|
112
|
+
url = f"{GTEX_BASE}/association/singleTissueEqtl"
|
|
113
|
+
params = {
|
|
114
|
+
"gencodeId": gene_id,
|
|
115
|
+
"tissueSiteDetailId": tissue,
|
|
116
|
+
"datasetId": "gtex_v8",
|
|
117
|
+
}
|
|
118
|
+
if variant_id:
|
|
119
|
+
params["variantId"] = variant_id
|
|
120
|
+
|
|
121
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
122
|
+
resp.raise_for_status()
|
|
123
|
+
data = resp.json()
|
|
124
|
+
|
|
125
|
+
results = []
|
|
126
|
+
for item in data.get("data", []):
|
|
127
|
+
results.append({
|
|
128
|
+
"gene_symbol": item.get("geneSymbol", ""),
|
|
129
|
+
"variant_id": item.get("variantId", ""),
|
|
130
|
+
"tissue": tissue,
|
|
131
|
+
"pvalue": item.get("pValue"),
|
|
132
|
+
"nes": item.get("nes"), # normalized effect size
|
|
133
|
+
"maf": item.get("maf"),
|
|
134
|
+
"ref": item.get("ref", ""),
|
|
135
|
+
"alt": item.get("alt", ""),
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
df = pd.DataFrame(results)
|
|
139
|
+
if not df.empty:
|
|
140
|
+
df = df.sort_values("pvalue")
|
|
141
|
+
|
|
142
|
+
print(f"GTEx eQTL: {gene_id} in {tissue} → "
|
|
143
|
+
f"{len(df)} associations")
|
|
144
|
+
return df
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## 3. 多組織比較
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
def gtex_multi_gene_comparison(gene_ids, tissues=None):
|
|
151
|
+
"""
|
|
152
|
+
GTEx — 複数遺伝子・複数組織の発現比較。
|
|
153
|
+
|
|
154
|
+
Parameters:
|
|
155
|
+
gene_ids: list[str] — 遺伝子リスト
|
|
156
|
+
tissues: list[str] — 組織リスト (None で全組織)
|
|
157
|
+
"""
|
|
158
|
+
all_data = []
|
|
159
|
+
for gid in gene_ids:
|
|
160
|
+
try:
|
|
161
|
+
df = gtex_gene_expression(gid)
|
|
162
|
+
if tissues:
|
|
163
|
+
df = df[df["tissue"].isin(tissues)]
|
|
164
|
+
all_data.append(df)
|
|
165
|
+
except Exception as e:
|
|
166
|
+
print(f" Warning: {gid} — {e}")
|
|
167
|
+
continue
|
|
168
|
+
|
|
169
|
+
if not all_data:
|
|
170
|
+
return pd.DataFrame()
|
|
171
|
+
|
|
172
|
+
combined = pd.concat(all_data, ignore_index=True)
|
|
173
|
+
|
|
174
|
+
# ピボットテーブル: 行=組織, 列=遺伝子, 値=TPM
|
|
175
|
+
if not combined.empty:
|
|
176
|
+
pivot = combined.pivot_table(
|
|
177
|
+
index="tissue_name",
|
|
178
|
+
columns="gene_symbol",
|
|
179
|
+
values="median_tpm",
|
|
180
|
+
aggfunc="first",
|
|
181
|
+
)
|
|
182
|
+
print(f"GTEx comparison: {len(gene_ids)} genes × "
|
|
183
|
+
f"{len(pivot)} tissues")
|
|
184
|
+
return pivot
|
|
185
|
+
|
|
186
|
+
return combined
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## 4. GTEx 統合パイプライン
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
def gtex_pipeline(gene_ids, tissues=None,
|
|
193
|
+
output_dir="results"):
|
|
194
|
+
"""
|
|
195
|
+
GTEx 統合パイプライン。
|
|
196
|
+
|
|
197
|
+
Parameters:
|
|
198
|
+
gene_ids: list[str] — 遺伝子リスト
|
|
199
|
+
tissues: list[str] — 組織リスト (None で全組織)
|
|
200
|
+
output_dir: str — 出力ディレクトリ
|
|
201
|
+
"""
|
|
202
|
+
from pathlib import Path
|
|
203
|
+
output_dir = Path(output_dir)
|
|
204
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
205
|
+
|
|
206
|
+
# 1) 全遺伝子の組織発現
|
|
207
|
+
all_expr = []
|
|
208
|
+
for gid in gene_ids:
|
|
209
|
+
try:
|
|
210
|
+
df = gtex_gene_expression(gid)
|
|
211
|
+
df.to_csv(output_dir / f"expression_{gid}.csv",
|
|
212
|
+
index=False)
|
|
213
|
+
all_expr.append(df)
|
|
214
|
+
except Exception:
|
|
215
|
+
continue
|
|
216
|
+
|
|
217
|
+
# 2) 多組織比較マトリクス
|
|
218
|
+
pivot = gtex_multi_gene_comparison(gene_ids, tissues)
|
|
219
|
+
if isinstance(pivot, pd.DataFrame) and not pivot.empty:
|
|
220
|
+
pivot.to_csv(output_dir / "expression_matrix.csv")
|
|
221
|
+
|
|
222
|
+
# 3) eQTL 検索 (上位組織)
|
|
223
|
+
eqtl_results = []
|
|
224
|
+
for gid in gene_ids:
|
|
225
|
+
if all_expr:
|
|
226
|
+
top = all_expr[-1].head(3)
|
|
227
|
+
for _, row in top.iterrows():
|
|
228
|
+
try:
|
|
229
|
+
eqtl = gtex_eqtl_lookup(gid,
|
|
230
|
+
row["tissue"])
|
|
231
|
+
eqtl_results.append(eqtl)
|
|
232
|
+
except Exception:
|
|
233
|
+
continue
|
|
234
|
+
if eqtl_results:
|
|
235
|
+
eqtl_combined = pd.concat(eqtl_results,
|
|
236
|
+
ignore_index=True)
|
|
237
|
+
eqtl_combined.to_csv(output_dir / "eqtl_results.csv",
|
|
238
|
+
index=False)
|
|
239
|
+
|
|
240
|
+
print(f"GTEx pipeline: {output_dir}")
|
|
241
|
+
return {"expression": all_expr, "matrix": pivot}
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
---
|
|
245
|
+
|
|
246
|
+
## ToolUniverse 連携
|
|
247
|
+
|
|
248
|
+
| TU Key | ツール名 | 連携内容 |
|
|
249
|
+
|--------|---------|---------|
|
|
250
|
+
| (direct) | GTEx Portal API v2 | 直接 REST API — TU 非連携 |
|
|
251
|
+
|
|
252
|
+
## パイプライン統合
|
|
253
|
+
|
|
254
|
+
```
|
|
255
|
+
gene-expression-transcriptomics → gtex-tissue-expression → variant-interpretation
|
|
256
|
+
(DESeq2/edgeR 差分発現) (組織別 TPM + eQTL) (臨床変異評価)
|
|
257
|
+
│ │ ↓
|
|
258
|
+
arrayexpress-expression ──────────┘ gwas-catalog
|
|
259
|
+
(ArrayExpress データ) │ (GWAS 関連解析)
|
|
260
|
+
↓
|
|
261
|
+
disease-research
|
|
262
|
+
(疾患関連遺伝子)
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
## パイプライン出力
|
|
266
|
+
|
|
267
|
+
| ファイル | 説明 | 次スキル |
|
|
268
|
+
|---------|------|---------|
|
|
269
|
+
| `results/expression_*.csv` | 遺伝子別組織発現 | → disease-research |
|
|
270
|
+
| `results/expression_matrix.csv` | 多遺伝子比較 | → pathway-enrichment |
|
|
271
|
+
| `results/eqtl_results.csv` | eQTL 関連 | → variant-interpretation |
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-gwas-catalog
|
|
3
|
+
description: |
|
|
4
|
+
GWAS カタログスキル。NHGRI-EBI GWAS Catalog REST API によるゲノム
|
|
5
|
+
ワイド関連研究メタデータ・関連シグナル・形質・遺伝子座検索。
|
|
6
|
+
ToolUniverse 連携: gwas。
|
|
7
|
+
tu_tools:
|
|
8
|
+
- key: gwas
|
|
9
|
+
name: GWAS Catalog
|
|
10
|
+
description: GWAS 関連シグナル・形質・遺伝子座検索
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Scientific GWAS Catalog
|
|
14
|
+
|
|
15
|
+
NHGRI-EBI GWAS Catalog REST API を活用した GWAS メタデータ
|
|
16
|
+
解析・遺伝子座レベル解釈パイプラインを提供する。
|
|
17
|
+
|
|
18
|
+
## When to Use
|
|
19
|
+
|
|
20
|
+
- GWAS Catalog から疾患/形質の関連バリアントを検索するとき
|
|
21
|
+
- 遺伝的関連シグナルのエフェクトサイズ・P値を取得するとき
|
|
22
|
+
- 特定遺伝子座の LD ブロック情報を解析するとき
|
|
23
|
+
- 多形質 PheWAS-like 解析を実施するとき
|
|
24
|
+
- GWAS サマリ統計量を下流解析に準備するとき
|
|
25
|
+
- 公開 GWAS データから PRS ウェイトを抽出するとき
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Quick Start
|
|
30
|
+
|
|
31
|
+
## 1. GWAS 関連シグナル検索
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
import requests
|
|
35
|
+
import pandas as pd
|
|
36
|
+
import numpy as np
|
|
37
|
+
|
|
38
|
+
GWAS_BASE = "https://www.ebi.ac.uk/gwas/rest/api"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def gwas_search_associations(trait=None, gene=None, variant=None,
|
|
42
|
+
p_upper=5e-8, limit=100):
|
|
43
|
+
"""
|
|
44
|
+
GWAS Catalog — 関連シグナル検索。
|
|
45
|
+
|
|
46
|
+
Parameters:
|
|
47
|
+
trait: str — 形質/疾患 EFO ID or 名前 (例: "EFO_0001645")
|
|
48
|
+
gene: str — 遺伝子名 (例: "BRCA1")
|
|
49
|
+
variant: str — rsID (例: "rs1234567")
|
|
50
|
+
p_upper: float — P値上限
|
|
51
|
+
limit: int — 最大結果数
|
|
52
|
+
"""
|
|
53
|
+
if trait:
|
|
54
|
+
url = f"{GWAS_BASE}/efoTraits/{trait}/associations"
|
|
55
|
+
elif gene:
|
|
56
|
+
url = f"{GWAS_BASE}/associations/search/findByGene"
|
|
57
|
+
elif variant:
|
|
58
|
+
url = f"{GWAS_BASE}/singleNucleotidePolymorphisms/{variant}/associations"
|
|
59
|
+
else:
|
|
60
|
+
url = f"{GWAS_BASE}/associations"
|
|
61
|
+
|
|
62
|
+
params = {"size": limit}
|
|
63
|
+
if gene:
|
|
64
|
+
params["geneName"] = gene
|
|
65
|
+
|
|
66
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
67
|
+
resp.raise_for_status()
|
|
68
|
+
data = resp.json()
|
|
69
|
+
|
|
70
|
+
associations = data.get("_embedded", {}).get("associations", [])
|
|
71
|
+
results = []
|
|
72
|
+
for assoc in associations:
|
|
73
|
+
p_value = assoc.get("pvalue", 1.0)
|
|
74
|
+
if p_value and float(p_value) > p_upper:
|
|
75
|
+
continue
|
|
76
|
+
|
|
77
|
+
loci = assoc.get("loci", [{}])
|
|
78
|
+
genes = []
|
|
79
|
+
for locus in loci:
|
|
80
|
+
for gene_info in locus.get("authorReportedGenes", []):
|
|
81
|
+
genes.append(gene_info.get("geneName", ""))
|
|
82
|
+
|
|
83
|
+
snps = []
|
|
84
|
+
for snp_info in assoc.get("snps", []):
|
|
85
|
+
snps.append(snp_info.get("rsId", ""))
|
|
86
|
+
|
|
87
|
+
results.append({
|
|
88
|
+
"association_id": assoc.get("associationId", ""),
|
|
89
|
+
"p_value": float(p_value) if p_value else None,
|
|
90
|
+
"p_value_mlog": assoc.get("pvalueMantissa", 0),
|
|
91
|
+
"or_beta": assoc.get("orPerCopyNum", None),
|
|
92
|
+
"beta_num": assoc.get("betaNum", None),
|
|
93
|
+
"beta_direction": assoc.get("betaDirection", ""),
|
|
94
|
+
"ci": assoc.get("range", ""),
|
|
95
|
+
"risk_allele_freq": assoc.get("riskFrequency", ""),
|
|
96
|
+
"snps": "; ".join(snps),
|
|
97
|
+
"genes": "; ".join(genes),
|
|
98
|
+
"trait": assoc.get("efoTraits", [{}])[0].get("trait", "")
|
|
99
|
+
if assoc.get("efoTraits") else "",
|
|
100
|
+
"study_accession": assoc.get("study", {}).get(
|
|
101
|
+
"accessionId", ""),
|
|
102
|
+
})
|
|
103
|
+
|
|
104
|
+
df = pd.DataFrame(results)
|
|
105
|
+
print(f"GWAS associations: {len(df)} results "
|
|
106
|
+
f"(trait={trait}, gene={gene}, p<{p_upper})")
|
|
107
|
+
return df.sort_values("p_value") if not df.empty else df
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## 2. GWAS 研究メタデータ検索
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
def gwas_search_studies(query=None, efo_trait=None, limit=50):
|
|
114
|
+
"""
|
|
115
|
+
GWAS Catalog — 研究メタデータ検索。
|
|
116
|
+
|
|
117
|
+
Parameters:
|
|
118
|
+
query: str — フリーテキスト検索
|
|
119
|
+
efo_trait: str — EFO 形質 ID
|
|
120
|
+
limit: int — 最大結果数
|
|
121
|
+
"""
|
|
122
|
+
if efo_trait:
|
|
123
|
+
url = f"{GWAS_BASE}/efoTraits/{efo_trait}/studies"
|
|
124
|
+
else:
|
|
125
|
+
url = f"{GWAS_BASE}/studies/search/findByDiseaseTrait"
|
|
126
|
+
|
|
127
|
+
params = {"size": limit}
|
|
128
|
+
if query:
|
|
129
|
+
params["diseaseTrait"] = query
|
|
130
|
+
|
|
131
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
132
|
+
resp.raise_for_status()
|
|
133
|
+
data = resp.json()
|
|
134
|
+
|
|
135
|
+
studies = data.get("_embedded", {}).get("studies", [])
|
|
136
|
+
results = []
|
|
137
|
+
for s in studies:
|
|
138
|
+
results.append({
|
|
139
|
+
"accession": s.get("accessionId", ""),
|
|
140
|
+
"title": s.get("title", ""),
|
|
141
|
+
"pubmed_id": s.get("publicationInfo", {}).get(
|
|
142
|
+
"pubmedId", ""),
|
|
143
|
+
"author": s.get("publicationInfo", {}).get(
|
|
144
|
+
"author", {}).get("fullname", ""),
|
|
145
|
+
"journal": s.get("publicationInfo", {}).get(
|
|
146
|
+
"publication", ""),
|
|
147
|
+
"date": s.get("publicationInfo", {}).get(
|
|
148
|
+
"publicationDate", ""),
|
|
149
|
+
"initial_sample_size": s.get("initialSampleSize", ""),
|
|
150
|
+
"replication_sample_size": s.get(
|
|
151
|
+
"replicationSampleSize", ""),
|
|
152
|
+
"ancestry": s.get("ancestries", []),
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
df = pd.DataFrame(results)
|
|
156
|
+
print(f"GWAS studies: {len(df)} results")
|
|
157
|
+
return df
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
## 3. GWAS 形質検索・PheWAS
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
def gwas_phewas(variant_rsid, p_threshold=5e-8):
|
|
164
|
+
"""
|
|
165
|
+
GWAS Catalog — バリアント PheWAS (形質横断検索)。
|
|
166
|
+
|
|
167
|
+
Parameters:
|
|
168
|
+
variant_rsid: str — rsID (例: "rs7903146")
|
|
169
|
+
p_threshold: float — P値閾値
|
|
170
|
+
"""
|
|
171
|
+
url = (f"{GWAS_BASE}/singleNucleotidePolymorphisms/"
|
|
172
|
+
f"{variant_rsid}/associations")
|
|
173
|
+
resp = requests.get(url, params={"size": 500}, timeout=30)
|
|
174
|
+
resp.raise_for_status()
|
|
175
|
+
data = resp.json()
|
|
176
|
+
|
|
177
|
+
associations = data.get("_embedded", {}).get("associations", [])
|
|
178
|
+
results = []
|
|
179
|
+
for assoc in associations:
|
|
180
|
+
p_val = assoc.get("pvalue", 1.0)
|
|
181
|
+
if p_val and float(p_val) > p_threshold:
|
|
182
|
+
continue
|
|
183
|
+
for trait in assoc.get("efoTraits", []):
|
|
184
|
+
results.append({
|
|
185
|
+
"variant": variant_rsid,
|
|
186
|
+
"trait": trait.get("trait", ""),
|
|
187
|
+
"efo_uri": trait.get("shortForm", ""),
|
|
188
|
+
"p_value": float(p_val) if p_val else None,
|
|
189
|
+
"or_beta": assoc.get("orPerCopyNum", None),
|
|
190
|
+
"study": assoc.get("study", {}).get(
|
|
191
|
+
"accessionId", ""),
|
|
192
|
+
})
|
|
193
|
+
|
|
194
|
+
df = pd.DataFrame(results)
|
|
195
|
+
if not df.empty:
|
|
196
|
+
df = df.sort_values("p_value")
|
|
197
|
+
print(f"PheWAS {variant_rsid}: {len(df)} trait associations")
|
|
198
|
+
return df
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
## 4. GWAS 統合パイプライン
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
def gwas_catalog_pipeline(trait_query, output_dir="results"):
|
|
205
|
+
"""
|
|
206
|
+
GWAS Catalog 統合パイプライン。
|
|
207
|
+
|
|
208
|
+
Parameters:
|
|
209
|
+
trait_query: str — 形質/疾患名
|
|
210
|
+
output_dir: str — 出力ディレクトリ
|
|
211
|
+
"""
|
|
212
|
+
from pathlib import Path
|
|
213
|
+
output_dir = Path(output_dir)
|
|
214
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
215
|
+
|
|
216
|
+
# 1) 研究検索
|
|
217
|
+
studies = gwas_search_studies(query=trait_query)
|
|
218
|
+
studies.to_csv(output_dir / "gwas_studies.csv", index=False)
|
|
219
|
+
|
|
220
|
+
# 2) 関連シグナル
|
|
221
|
+
assocs = gwas_search_associations(gene=None, trait=None)
|
|
222
|
+
assocs.to_csv(output_dir / "gwas_associations.csv", index=False)
|
|
223
|
+
|
|
224
|
+
# 3) トップバリアントの PheWAS
|
|
225
|
+
if not assocs.empty:
|
|
226
|
+
top_snps = assocs["snps"].str.split("; ").explode().unique()[:5]
|
|
227
|
+
phewas_all = []
|
|
228
|
+
for rsid in top_snps:
|
|
229
|
+
if rsid.startswith("rs"):
|
|
230
|
+
phewas = gwas_phewas(rsid)
|
|
231
|
+
phewas_all.append(phewas)
|
|
232
|
+
if phewas_all:
|
|
233
|
+
phewas_df = pd.concat(phewas_all, ignore_index=True)
|
|
234
|
+
phewas_df.to_csv(output_dir / "phewas.csv", index=False)
|
|
235
|
+
|
|
236
|
+
print(f"GWAS pipeline: {output_dir}")
|
|
237
|
+
return {"studies": studies, "associations": assocs}
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
---
|
|
241
|
+
|
|
242
|
+
## ToolUniverse 連携
|
|
243
|
+
|
|
244
|
+
| TU Key | ツール名 | 連携内容 |
|
|
245
|
+
|--------|---------|---------|
|
|
246
|
+
| `gwas` | GWAS Catalog | 関連シグナル・形質・研究メタデータ検索 |
|
|
247
|
+
|
|
248
|
+
## パイプライン統合
|
|
249
|
+
|
|
250
|
+
```
|
|
251
|
+
disease-research → gwas-catalog → variant-interpretation
|
|
252
|
+
(DisGeNET/OMIM) (GWAS Catalog) (ACMG/AMP)
|
|
253
|
+
│ │ ↓
|
|
254
|
+
population-genetics ──┘ variant-effect-prediction
|
|
255
|
+
(Fst/PCA) │ (CADD/SpliceAI)
|
|
256
|
+
↓
|
|
257
|
+
precision-oncology
|
|
258
|
+
(臨床的意義判定)
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
## パイプライン出力
|
|
262
|
+
|
|
263
|
+
| ファイル | 説明 | 次スキル |
|
|
264
|
+
|---------|------|---------|
|
|
265
|
+
| `results/gwas_studies.csv` | GWAS 研究メタデータ | → literature-search |
|
|
266
|
+
| `results/gwas_associations.csv` | 関連シグナル | → variant-interpretation |
|
|
267
|
+
| `results/phewas.csv` | PheWAS 結果 | → disease-research |
|