@nahisaho/satori 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -30
- package/package.json +1 -1
- package/src/.github/skills/scientific-advanced-imaging/SKILL.md +382 -0
- package/src/.github/skills/scientific-chembl-assay-mining/SKILL.md +509 -0
- package/src/.github/skills/scientific-data-submission/SKILL.md +357 -0
- package/src/.github/skills/scientific-deep-chemistry/SKILL.md +350 -0
- package/src/.github/skills/scientific-ensembl-genomics/SKILL.md +378 -0
- package/src/.github/skills/scientific-expression-comparison/SKILL.md +303 -0
- package/src/.github/skills/scientific-gpu-singlecell/SKILL.md +296 -0
- package/src/.github/skills/scientific-marine-ecology/SKILL.md +429 -0
- package/src/.github/skills/scientific-md-simulation/SKILL.md +315 -0
- package/src/.github/skills/scientific-model-organism-db/SKILL.md +329 -0
- package/src/.github/skills/scientific-nci60-screening/SKILL.md +307 -0
- package/src/.github/skills/scientific-perturbation-analysis/SKILL.md +297 -0
- package/src/.github/skills/scientific-plant-biology/SKILL.md +321 -0
- package/src/.github/skills/scientific-rrna-taxonomy/SKILL.md +379 -0
- package/src/.github/skills/scientific-scatac-signac/SKILL.md +300 -0
- package/src/.github/skills/scientific-scvi-integration/SKILL.md +344 -0
- package/src/.github/skills/scientific-string-network-api/SKILL.md +376 -0
- package/src/.github/skills/scientific-toxicology-env/SKILL.md +309 -0
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-ensembl-genomics
|
|
3
|
+
description: |
|
|
4
|
+
Ensembl REST API ゲノミクススキル。遺伝子ルックアップ・配列取得・
|
|
5
|
+
VEP (Variant Effect Predictor) バリアントアノテーション・
|
|
6
|
+
クロスリファレンス・制御要素・系統樹・相同性検索・分類学統合パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Ensembl Genomics
|
|
10
|
+
|
|
11
|
+
Ensembl REST API (rest.ensembl.org) を活用したゲノミクスデータアクセス
|
|
12
|
+
パイプラインを提供する。遺伝子情報取得、VEP バリアント効果予測、
|
|
13
|
+
相同性検索、系統解析を統合。
|
|
14
|
+
|
|
15
|
+
## When to Use
|
|
16
|
+
|
|
17
|
+
- Ensembl Gene ID から遺伝子情報・座標を取得するとき
|
|
18
|
+
- VEP でバリアントの機能的影響を予測するとき (SIFT/PolyPhen/CADD)
|
|
19
|
+
- 遺伝子のオルソログ・パラログを検索するとき
|
|
20
|
+
- Ensembl ↔ UniProt / RefSeq / HGNC 間の ID 変換をするとき
|
|
21
|
+
- ゲノム領域の制御要素 (promoter/enhancer) を検索するとき
|
|
22
|
+
- 種間比較ゲノミクスデータを取得するとき
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
## 1. 遺伝子ルックアップ
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import requests
|
|
32
|
+
import pandas as pd
|
|
33
|
+
|
|
34
|
+
ENSEMBL_REST = "https://rest.ensembl.org"
|
|
35
|
+
HEADERS = {"Content-Type": "application/json", "Accept": "application/json"}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def lookup_gene(gene_id, expand=True):
|
|
39
|
+
"""
|
|
40
|
+
Ensembl 遺伝子情報取得。
|
|
41
|
+
|
|
42
|
+
Parameters:
|
|
43
|
+
gene_id: str — Ensembl Gene ID (例: "ENSG00000141510")
|
|
44
|
+
expand: bool — トランスクリプト情報を含めるか
|
|
45
|
+
|
|
46
|
+
ToolUniverse:
|
|
47
|
+
ensembl_lookup_gene(gene_id=gene_id, species="homo_sapiens")
|
|
48
|
+
"""
|
|
49
|
+
url = f"{ENSEMBL_REST}/lookup/id/{gene_id}"
|
|
50
|
+
params = {"expand": 1 if expand else 0}
|
|
51
|
+
resp = requests.get(url, params=params, headers=HEADERS)
|
|
52
|
+
resp.raise_for_status()
|
|
53
|
+
data = resp.json()
|
|
54
|
+
|
|
55
|
+
info = {
|
|
56
|
+
"id": data.get("id"),
|
|
57
|
+
"display_name": data.get("display_name"),
|
|
58
|
+
"biotype": data.get("biotype"),
|
|
59
|
+
"species": data.get("species"),
|
|
60
|
+
"assembly_name": data.get("assembly_name"),
|
|
61
|
+
"seq_region_name": data.get("seq_region_name"),
|
|
62
|
+
"start": data.get("start"),
|
|
63
|
+
"end": data.get("end"),
|
|
64
|
+
"strand": data.get("strand"),
|
|
65
|
+
"description": data.get("description"),
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if expand and "Transcript" in data:
|
|
69
|
+
info["n_transcripts"] = len(data["Transcript"])
|
|
70
|
+
info["canonical_transcript"] = data.get("canonical_transcript")
|
|
71
|
+
|
|
72
|
+
print(f"Gene: {info['display_name']} ({info['id']}), "
|
|
73
|
+
f"chr{info['seq_region_name']}:{info['start']}-{info['end']}")
|
|
74
|
+
return info
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## 2. 配列取得
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
def get_sequence(seq_id, seq_type="genomic", species="homo_sapiens"):
|
|
81
|
+
"""
|
|
82
|
+
Ensembl 配列取得 (DNA/cDNA/CDS/protein)。
|
|
83
|
+
|
|
84
|
+
Parameters:
|
|
85
|
+
seq_id: str — Ensembl ID (Gene/Transcript/Translation)
|
|
86
|
+
seq_type: str — "genomic", "cdna", "cds", "protein"
|
|
87
|
+
species: str — 生物種
|
|
88
|
+
|
|
89
|
+
ToolUniverse:
|
|
90
|
+
ensembl_get_sequence(id=seq_id, type=seq_type, species=species)
|
|
91
|
+
"""
|
|
92
|
+
url = f"{ENSEMBL_REST}/sequence/id/{seq_id}"
|
|
93
|
+
params = {"type": seq_type}
|
|
94
|
+
resp = requests.get(url, params=params, headers=HEADERS)
|
|
95
|
+
resp.raise_for_status()
|
|
96
|
+
data = resp.json()
|
|
97
|
+
|
|
98
|
+
result = {
|
|
99
|
+
"id": data.get("id"),
|
|
100
|
+
"seq_type": seq_type,
|
|
101
|
+
"molecule": data.get("molecule"),
|
|
102
|
+
"length": len(data.get("seq", "")),
|
|
103
|
+
"sequence": data.get("seq"),
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
print(f"Sequence: {result['id']} ({seq_type}), {result['length']} bp/aa")
|
|
107
|
+
return result
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## 3. VEP (Variant Effect Predictor)
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
def vep_region(species, chromosome, position, allele,
|
|
114
|
+
sift=True, polyphen=True, cadd=False):
|
|
115
|
+
"""
|
|
116
|
+
VEP によるバリアント効果予測。
|
|
117
|
+
|
|
118
|
+
Parameters:
|
|
119
|
+
species: str — "homo_sapiens"
|
|
120
|
+
chromosome: str — 染色体番号
|
|
121
|
+
position: int — ゲノム座標
|
|
122
|
+
allele: str — 代替アレル (例: "T")
|
|
123
|
+
sift: bool — SIFT 予測を含める
|
|
124
|
+
polyphen: bool — PolyPhen 予測を含める
|
|
125
|
+
cadd: bool — CADD スコアを含める
|
|
126
|
+
|
|
127
|
+
ToolUniverse:
|
|
128
|
+
ensembl_vep_region(
|
|
129
|
+
species=species, region=f"{chromosome}:{position}:{position}",
|
|
130
|
+
allele=allele, SIFT="b", PolyPhen="b"
|
|
131
|
+
)
|
|
132
|
+
"""
|
|
133
|
+
region = f"{chromosome}:{position}:{position}"
|
|
134
|
+
url = f"{ENSEMBL_REST}/vep/{species}/region/{region}/{allele}"
|
|
135
|
+
params = {}
|
|
136
|
+
if sift:
|
|
137
|
+
params["SIFT"] = "b"
|
|
138
|
+
if polyphen:
|
|
139
|
+
params["PolyPhen"] = "b"
|
|
140
|
+
if cadd:
|
|
141
|
+
params["CADD"] = 1
|
|
142
|
+
|
|
143
|
+
resp = requests.get(url, params=params, headers=HEADERS)
|
|
144
|
+
resp.raise_for_status()
|
|
145
|
+
results = resp.json()
|
|
146
|
+
|
|
147
|
+
consequences = []
|
|
148
|
+
for r in results:
|
|
149
|
+
for tc in r.get("transcript_consequences", []):
|
|
150
|
+
cons = {
|
|
151
|
+
"gene_symbol": tc.get("gene_symbol"),
|
|
152
|
+
"gene_id": tc.get("gene_id"),
|
|
153
|
+
"transcript_id": tc.get("transcript_id"),
|
|
154
|
+
"consequence_terms": tc.get("consequence_terms", []),
|
|
155
|
+
"impact": tc.get("impact"),
|
|
156
|
+
"biotype": tc.get("biotype"),
|
|
157
|
+
"amino_acids": tc.get("amino_acids"),
|
|
158
|
+
"codons": tc.get("codons"),
|
|
159
|
+
}
|
|
160
|
+
if "sift_prediction" in tc:
|
|
161
|
+
cons["sift"] = f"{tc['sift_prediction']}({tc.get('sift_score')})"
|
|
162
|
+
if "polyphen_prediction" in tc:
|
|
163
|
+
cons["polyphen"] = f"{tc['polyphen_prediction']}({tc.get('polyphen_score')})"
|
|
164
|
+
consequences.append(cons)
|
|
165
|
+
|
|
166
|
+
df = pd.DataFrame(consequences)
|
|
167
|
+
print(f"VEP {chromosome}:{position} {allele}: "
|
|
168
|
+
f"{len(df)} transcript consequences")
|
|
169
|
+
return df
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## 4. クロスリファレンス (ID 変換)
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
def get_xrefs(ensembl_id, external_db=None):
|
|
176
|
+
"""
|
|
177
|
+
Ensembl ID から外部 DB の ID を取得。
|
|
178
|
+
|
|
179
|
+
Parameters:
|
|
180
|
+
ensembl_id: str — Ensembl ID
|
|
181
|
+
external_db: str — フィルタ DB 名 (例: "UniProt", "RefSeq", "HGNC")
|
|
182
|
+
|
|
183
|
+
ToolUniverse:
|
|
184
|
+
ensembl_get_xrefs(id=ensembl_id, external_db=external_db)
|
|
185
|
+
"""
|
|
186
|
+
url = f"{ENSEMBL_REST}/xrefs/id/{ensembl_id}"
|
|
187
|
+
params = {}
|
|
188
|
+
if external_db:
|
|
189
|
+
params["external_db"] = external_db
|
|
190
|
+
|
|
191
|
+
resp = requests.get(url, params=params, headers=HEADERS)
|
|
192
|
+
resp.raise_for_status()
|
|
193
|
+
xrefs = resp.json()
|
|
194
|
+
|
|
195
|
+
rows = []
|
|
196
|
+
for x in xrefs:
|
|
197
|
+
rows.append({
|
|
198
|
+
"primary_id": x.get("primary_id"),
|
|
199
|
+
"display_id": x.get("display_id"),
|
|
200
|
+
"dbname": x.get("dbname"),
|
|
201
|
+
"description": x.get("description", "")[:100],
|
|
202
|
+
})
|
|
203
|
+
|
|
204
|
+
df = pd.DataFrame(rows)
|
|
205
|
+
print(f"Cross-references for {ensembl_id}: {len(df)} entries")
|
|
206
|
+
return df
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
## 5. 相同性検索 (オルソログ/パラログ)
|
|
210
|
+
|
|
211
|
+
```python
|
|
212
|
+
def get_homology(species, gene_symbol, target_species=None,
|
|
213
|
+
homology_type="orthologues"):
|
|
214
|
+
"""
|
|
215
|
+
遺伝子相同性検索。
|
|
216
|
+
|
|
217
|
+
Parameters:
|
|
218
|
+
species: str — ソース生物種
|
|
219
|
+
gene_symbol: str — 遺伝子シンボル
|
|
220
|
+
target_species: str — ターゲット生物種 (None で全種)
|
|
221
|
+
homology_type: str — "orthologues", "paralogues", "all"
|
|
222
|
+
|
|
223
|
+
ToolUniverse:
|
|
224
|
+
ensembl_get_homology(
|
|
225
|
+
species=species, symbol=gene_symbol,
|
|
226
|
+
target_species=target_species, type=homology_type
|
|
227
|
+
)
|
|
228
|
+
"""
|
|
229
|
+
url = f"{ENSEMBL_REST}/homology/symbol/{species}/{gene_symbol}"
|
|
230
|
+
params = {"type": homology_type}
|
|
231
|
+
if target_species:
|
|
232
|
+
params["target_species"] = target_species
|
|
233
|
+
|
|
234
|
+
resp = requests.get(url, params=params, headers=HEADERS)
|
|
235
|
+
resp.raise_for_status()
|
|
236
|
+
|
|
237
|
+
homologies = resp.json().get("data", [{}])[0].get("homologies", [])
|
|
238
|
+
|
|
239
|
+
rows = []
|
|
240
|
+
for h in homologies:
|
|
241
|
+
target = h.get("target", {})
|
|
242
|
+
rows.append({
|
|
243
|
+
"type": h.get("type"),
|
|
244
|
+
"target_species": target.get("species"),
|
|
245
|
+
"target_gene_id": target.get("id"),
|
|
246
|
+
"target_symbol": target.get("protein_id"),
|
|
247
|
+
"perc_id": target.get("perc_id"),
|
|
248
|
+
"perc_pos": target.get("perc_pos"),
|
|
249
|
+
"dn_ds": h.get("dn_ds"),
|
|
250
|
+
})
|
|
251
|
+
|
|
252
|
+
df = pd.DataFrame(rows)
|
|
253
|
+
print(f"Homologs of {gene_symbol} ({species}): {len(df)} found")
|
|
254
|
+
return df
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
## 6. 制御要素検索
|
|
258
|
+
|
|
259
|
+
```python
|
|
260
|
+
def get_regulatory_features(species, region):
|
|
261
|
+
"""
|
|
262
|
+
ゲノム領域の制御要素 (promoter/enhancer/CTCF) 検索。
|
|
263
|
+
|
|
264
|
+
Parameters:
|
|
265
|
+
species: str — 生物種 (例: "homo_sapiens")
|
|
266
|
+
region: str — ゲノム領域 (例: "7:140000000-140100000")
|
|
267
|
+
|
|
268
|
+
ToolUniverse:
|
|
269
|
+
ensembl_get_regulatory_features(region=region, species=species)
|
|
270
|
+
ensembl_get_overlap_features(region=region)
|
|
271
|
+
"""
|
|
272
|
+
url = f"{ENSEMBL_REST}/overlap/region/{species}/{region}"
|
|
273
|
+
params = {"feature": "regulatory"}
|
|
274
|
+
|
|
275
|
+
resp = requests.get(url, params=params, headers=HEADERS)
|
|
276
|
+
resp.raise_for_status()
|
|
277
|
+
features = resp.json()
|
|
278
|
+
|
|
279
|
+
rows = []
|
|
280
|
+
for f in features:
|
|
281
|
+
rows.append({
|
|
282
|
+
"id": f.get("id"),
|
|
283
|
+
"feature_type": f.get("feature_type"),
|
|
284
|
+
"start": f.get("start"),
|
|
285
|
+
"end": f.get("end"),
|
|
286
|
+
"strand": f.get("strand"),
|
|
287
|
+
"description": f.get("description", ""),
|
|
288
|
+
})
|
|
289
|
+
|
|
290
|
+
df = pd.DataFrame(rows)
|
|
291
|
+
print(f"Regulatory features in {region}: {len(df)}")
|
|
292
|
+
return df
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
## 7. 遺伝子系統樹
|
|
296
|
+
|
|
297
|
+
```python
|
|
298
|
+
def get_gene_tree(gene_id, prune_species=None):
|
|
299
|
+
"""
|
|
300
|
+
遺伝子ファミリーの系統樹取得。
|
|
301
|
+
|
|
302
|
+
Parameters:
|
|
303
|
+
gene_id: str — Ensembl Gene ID
|
|
304
|
+
prune_species: list — 系統樹を制限する生物種リスト
|
|
305
|
+
|
|
306
|
+
ToolUniverse:
|
|
307
|
+
ensembl_get_genetree(id=gene_id, prune_species=species_list)
|
|
308
|
+
"""
|
|
309
|
+
url = f"{ENSEMBL_REST}/genetree/member/id/{gene_id}"
|
|
310
|
+
params = {"sequence": "none", "aligned": 0}
|
|
311
|
+
if prune_species:
|
|
312
|
+
params["prune_species"] = ";".join(prune_species)
|
|
313
|
+
|
|
314
|
+
resp = requests.get(url, params=params, headers=HEADERS)
|
|
315
|
+
resp.raise_for_status()
|
|
316
|
+
tree = resp.json()
|
|
317
|
+
|
|
318
|
+
result = {
|
|
319
|
+
"tree_id": tree.get("tree", {}).get("id"),
|
|
320
|
+
"type": tree.get("tree", {}).get("type"),
|
|
321
|
+
"n_members": _count_leaves(tree.get("tree", {})),
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
print(f"Gene tree {result['tree_id']}: {result['n_members']} members")
|
|
325
|
+
return tree
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _count_leaves(node):
|
|
329
|
+
"""系統樹リーフ数をカウント。"""
|
|
330
|
+
if "children" not in node:
|
|
331
|
+
return 1
|
|
332
|
+
return sum(_count_leaves(c) for c in node["children"])
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
---
|
|
336
|
+
|
|
337
|
+
## パイプライン統合
|
|
338
|
+
|
|
339
|
+
```
|
|
340
|
+
bioinformatics ───→ ensembl-genomics ───→ variant-interpretation
|
|
341
|
+
(Ensembl Gene ID) (VEP アノテーション) (ACMG/AMP 分類)
|
|
342
|
+
│ │ ↓
|
|
343
|
+
genome-sequence-tools ──┘ │ variant-effect-prediction
|
|
344
|
+
(BLAST/dbSNP) │ (AlphaMissense/CADD)
|
|
345
|
+
↓
|
|
346
|
+
regulatory-genomics → epigenomics-chromatin
|
|
347
|
+
(RegulomeDB/ReMap) (ChIP-seq/ATAC-seq)
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
## パイプライン出力
|
|
351
|
+
|
|
352
|
+
| ファイル | 説明 | 次スキル |
|
|
353
|
+
|---------|------|---------|
|
|
354
|
+
| `results/ensembl_gene_info.json` | 遺伝子情報 | → bioinformatics |
|
|
355
|
+
| `results/vep_consequences.csv` | VEP バリアント効果 | → variant-interpretation |
|
|
356
|
+
| `results/homology_table.csv` | オルソログ/パラログ | → phylogenetics |
|
|
357
|
+
| `results/regulatory_features.csv` | 制御要素 | → regulatory-genomics |
|
|
358
|
+
|
|
359
|
+
## 利用可能ツール (ToolUniverse SMCP)
|
|
360
|
+
|
|
361
|
+
| ツール名 | 用途 |
|
|
362
|
+
|---------|------|
|
|
363
|
+
| `ensembl_lookup_gene` | 遺伝子ルックアップ |
|
|
364
|
+
| `ensembl_get_sequence` | 配列取得 |
|
|
365
|
+
| `ensembl_get_variants` | バリアント取得 |
|
|
366
|
+
| `ensembl_get_variation` | バリエーション詳細 |
|
|
367
|
+
| `ensembl_get_variation_phenotypes` | バリアント表現型 |
|
|
368
|
+
| `ensembl_vep_region` | VEP 効果予測 |
|
|
369
|
+
| `ensembl_get_xrefs` | クロスリファレンス |
|
|
370
|
+
| `ensembl_get_xrefs_by_name` | 名前ベース xref |
|
|
371
|
+
| `ensembl_get_regulatory_features` | 制御要素 |
|
|
372
|
+
| `ensembl_get_genetree` | 遺伝子系統樹 |
|
|
373
|
+
| `ensembl_get_homology` | 相同性検索 |
|
|
374
|
+
| `ensembl_get_alignment` | 配列アラインメント |
|
|
375
|
+
| `ensembl_get_taxonomy` | 分類学情報 |
|
|
376
|
+
| `ensembl_get_species` | 生物種一覧 |
|
|
377
|
+
| `ensembl_get_ontology_term` | GO オントロジー |
|
|
378
|
+
| `ensembl_get_overlap_features` | 領域オーバーラップ |
|
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-expression-comparison
|
|
3
|
+
description: |
|
|
4
|
+
Expression Atlas / GTEx / HPA 統合発現比較スキル。EBI Expression Atlas
|
|
5
|
+
ベースライン/差次的発現検索、実験アクセション取得、組織間・条件間
|
|
6
|
+
発現比較、マルチソース統合発現プロファイリングパイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Expression Comparison
|
|
10
|
+
|
|
11
|
+
EBI Expression Atlas API を中核として GTEx/HPA データと統合した
|
|
12
|
+
遺伝子発現比較パイプラインを提供する。ベースライン発現・差次的発現・
|
|
13
|
+
実験メタデータを横断的に検索・比較。
|
|
14
|
+
|
|
15
|
+
## When to Use
|
|
16
|
+
|
|
17
|
+
- EBI Expression Atlas で遺伝子のベースライン発現パターンを調べるとき
|
|
18
|
+
- 疾患 vs 正常の差次的発現データを検索するとき
|
|
19
|
+
- 複数の組織/細胞型にわたる発現比較を行うとき
|
|
20
|
+
- Expression Atlas の実験メタデータを取得するとき
|
|
21
|
+
- GTEx/HPA の発現データと統合して比較分析するとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. ベースライン発現検索
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import requests
|
|
31
|
+
import pandas as pd
|
|
32
|
+
|
|
33
|
+
ATLAS_API = "https://www.ebi.ac.uk/gxa/json"
|
|
34
|
+
ATLAS_REST = "https://www.ebi.ac.uk/gxa"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_baseline_expression(gene, species="homo sapiens"):
|
|
38
|
+
"""
|
|
39
|
+
Expression Atlas ベースライン発現プロファイル取得。
|
|
40
|
+
|
|
41
|
+
Parameters:
|
|
42
|
+
gene: str — 遺伝子シンボルまたは Ensembl ID
|
|
43
|
+
species: str — 生物種
|
|
44
|
+
|
|
45
|
+
ToolUniverse:
|
|
46
|
+
ExpressionAtlas_get_baseline(gene=gene, species=species)
|
|
47
|
+
"""
|
|
48
|
+
url = f"{ATLAS_REST}/json/baseline_expression"
|
|
49
|
+
params = {"gene": gene, "species": species}
|
|
50
|
+
resp = requests.get(url, params=params)
|
|
51
|
+
resp.raise_for_status()
|
|
52
|
+
data = resp.json()
|
|
53
|
+
|
|
54
|
+
profiles = data.get("profiles", {}).get("rows", [])
|
|
55
|
+
rows = []
|
|
56
|
+
for profile in profiles:
|
|
57
|
+
gene_name = profile.get("name", gene)
|
|
58
|
+
for exp in profile.get("expressions", []):
|
|
59
|
+
rows.append({
|
|
60
|
+
"gene": gene_name,
|
|
61
|
+
"factor_value": exp.get("factorValue", ""),
|
|
62
|
+
"expression_level": exp.get("value"),
|
|
63
|
+
"unit": "TPM",
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
df = pd.DataFrame(rows)
|
|
67
|
+
print(f"Baseline expression for {gene}: {len(df)} tissue/cell profiles")
|
|
68
|
+
return df
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## 2. 差次的発現検索
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
def search_differential_expression(gene, condition=None, species="homo sapiens"):
|
|
75
|
+
"""
|
|
76
|
+
差次的発現実験の検索。
|
|
77
|
+
|
|
78
|
+
Parameters:
|
|
79
|
+
gene: str — 遺伝子シンボルまたは Ensembl ID
|
|
80
|
+
condition: str — 条件 (例: "cancer", "inflammation")
|
|
81
|
+
species: str — 生物種
|
|
82
|
+
|
|
83
|
+
ToolUniverse:
|
|
84
|
+
ExpressionAtlas_search_differential(
|
|
85
|
+
gene=gene, condition=condition, species=species
|
|
86
|
+
)
|
|
87
|
+
"""
|
|
88
|
+
url = f"{ATLAS_REST}/json/search"
|
|
89
|
+
params = {"geneQuery": gene, "species": species}
|
|
90
|
+
if condition:
|
|
91
|
+
params["conditionQuery"] = condition
|
|
92
|
+
|
|
93
|
+
resp = requests.get(url, params=params)
|
|
94
|
+
resp.raise_for_status()
|
|
95
|
+
data = resp.json()
|
|
96
|
+
|
|
97
|
+
results = data.get("results", [])
|
|
98
|
+
rows = []
|
|
99
|
+
for r in results:
|
|
100
|
+
rows.append({
|
|
101
|
+
"experiment_accession": r.get("experimentAccession"),
|
|
102
|
+
"experiment_description": r.get("experimentDescription", "")[:200],
|
|
103
|
+
"experiment_type": r.get("experimentType"),
|
|
104
|
+
"species": r.get("species"),
|
|
105
|
+
"contrast": r.get("contrastId", ""),
|
|
106
|
+
"log2_fold_change": r.get("foldChange"),
|
|
107
|
+
"p_value": r.get("pValue"),
|
|
108
|
+
})
|
|
109
|
+
|
|
110
|
+
df = pd.DataFrame(rows)
|
|
111
|
+
print(f"Differential expression for {gene}: {len(df)} contrasts found")
|
|
112
|
+
return df
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## 3. 実験メタデータ取得
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
def get_experiment_details(accession):
|
|
119
|
+
"""
|
|
120
|
+
Expression Atlas 実験詳細取得。
|
|
121
|
+
|
|
122
|
+
Parameters:
|
|
123
|
+
accession: str — 実験 ID (例: "E-MTAB-5214")
|
|
124
|
+
|
|
125
|
+
ToolUniverse:
|
|
126
|
+
ExpressionAtlas_get_experiment(accession=accession)
|
|
127
|
+
"""
|
|
128
|
+
url = f"{ATLAS_REST}/json/experiments/{accession}"
|
|
129
|
+
resp = requests.get(url)
|
|
130
|
+
resp.raise_for_status()
|
|
131
|
+
data = resp.json()
|
|
132
|
+
|
|
133
|
+
experiment = data.get("experiment", {})
|
|
134
|
+
info = {
|
|
135
|
+
"accession": experiment.get("accession"),
|
|
136
|
+
"description": experiment.get("description"),
|
|
137
|
+
"type": experiment.get("type"),
|
|
138
|
+
"species": experiment.get("species", []),
|
|
139
|
+
"pubmed_ids": experiment.get("pubmedIds", []),
|
|
140
|
+
"n_assays": experiment.get("numberOfAssays"),
|
|
141
|
+
"n_contrasts": experiment.get("numberOfContrasts"),
|
|
142
|
+
"last_updated": experiment.get("lastUpdate"),
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
print(f"Experiment: {info['accession']} — {info['description'][:100]}")
|
|
146
|
+
return info
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
## 4. 実験検索
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
def search_experiments(gene=None, condition=None, species="homo sapiens"):
|
|
153
|
+
"""
|
|
154
|
+
Expression Atlas 実験検索。
|
|
155
|
+
|
|
156
|
+
Parameters:
|
|
157
|
+
gene: str — 遺伝子クエリ
|
|
158
|
+
condition: str — 条件クエリ
|
|
159
|
+
species: str — 生物種
|
|
160
|
+
|
|
161
|
+
ToolUniverse:
|
|
162
|
+
ExpressionAtlas_search_experiments(
|
|
163
|
+
gene=gene, condition=condition, species=species
|
|
164
|
+
)
|
|
165
|
+
"""
|
|
166
|
+
url = f"{ATLAS_REST}/json/search"
|
|
167
|
+
params = {"species": species}
|
|
168
|
+
if gene:
|
|
169
|
+
params["geneQuery"] = gene
|
|
170
|
+
if condition:
|
|
171
|
+
params["conditionQuery"] = condition
|
|
172
|
+
|
|
173
|
+
resp = requests.get(url, params=params)
|
|
174
|
+
resp.raise_for_status()
|
|
175
|
+
data = resp.json()
|
|
176
|
+
|
|
177
|
+
experiments = data.get("matchingExperiments", [])
|
|
178
|
+
rows = []
|
|
179
|
+
for e in experiments:
|
|
180
|
+
rows.append({
|
|
181
|
+
"accession": e.get("experimentAccession"),
|
|
182
|
+
"description": e.get("experimentDescription", "")[:200],
|
|
183
|
+
"type": e.get("experimentType"),
|
|
184
|
+
"species": e.get("species"),
|
|
185
|
+
"n_assays": e.get("numberOfAssays"),
|
|
186
|
+
})
|
|
187
|
+
|
|
188
|
+
df = pd.DataFrame(rows)
|
|
189
|
+
print(f"Experiments found: {len(df)}")
|
|
190
|
+
return df
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
## 5. 組織横断発現比較
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
def cross_tissue_comparison(genes, species="homo sapiens"):
|
|
197
|
+
"""
|
|
198
|
+
複数遺伝子の組織横断発現比較。
|
|
199
|
+
|
|
200
|
+
Parameters:
|
|
201
|
+
genes: list — 遺伝子リスト
|
|
202
|
+
species: str — 生物種
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
DataFrame — genes × tissues 発現マトリクス
|
|
206
|
+
"""
|
|
207
|
+
all_data = []
|
|
208
|
+
for gene in genes:
|
|
209
|
+
df = get_baseline_expression(gene, species)
|
|
210
|
+
if not df.empty:
|
|
211
|
+
df["gene_query"] = gene
|
|
212
|
+
all_data.append(df)
|
|
213
|
+
|
|
214
|
+
if not all_data:
|
|
215
|
+
print("No expression data found")
|
|
216
|
+
return pd.DataFrame()
|
|
217
|
+
|
|
218
|
+
combined = pd.concat(all_data, ignore_index=True)
|
|
219
|
+
|
|
220
|
+
# ピボットテーブル (genes × tissues)
|
|
221
|
+
matrix = combined.pivot_table(
|
|
222
|
+
index="gene",
|
|
223
|
+
columns="factor_value",
|
|
224
|
+
values="expression_level",
|
|
225
|
+
aggfunc="mean",
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
print(f"Expression matrix: {matrix.shape[0]} genes × "
|
|
229
|
+
f"{matrix.shape[1]} tissues/conditions")
|
|
230
|
+
return matrix
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
## 6. 発現ヒートマップ
|
|
234
|
+
|
|
235
|
+
```python
|
|
236
|
+
import matplotlib.pyplot as plt
|
|
237
|
+
import seaborn as sns
|
|
238
|
+
import numpy as np
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def plot_expression_heatmap(matrix, title="Gene Expression Comparison",
|
|
242
|
+
figsize=(14, 8), save_path=None):
|
|
243
|
+
"""
|
|
244
|
+
発現マトリクスのヒートマップ描画。
|
|
245
|
+
|
|
246
|
+
Parameters:
|
|
247
|
+
matrix: DataFrame — cross_tissue_comparison の出力
|
|
248
|
+
title: str — 図タイトル
|
|
249
|
+
figsize: tuple — 図サイズ
|
|
250
|
+
save_path: str — 保存パス
|
|
251
|
+
"""
|
|
252
|
+
log_matrix = np.log2(matrix.fillna(0) + 1)
|
|
253
|
+
|
|
254
|
+
fig, ax = plt.subplots(figsize=figsize)
|
|
255
|
+
sns.heatmap(
|
|
256
|
+
log_matrix,
|
|
257
|
+
cmap="viridis",
|
|
258
|
+
xticklabels=True,
|
|
259
|
+
yticklabels=True,
|
|
260
|
+
ax=ax,
|
|
261
|
+
)
|
|
262
|
+
ax.set_title(title)
|
|
263
|
+
ax.set_xlabel("Tissue / Condition")
|
|
264
|
+
ax.set_ylabel("Gene")
|
|
265
|
+
plt.tight_layout()
|
|
266
|
+
|
|
267
|
+
if save_path:
|
|
268
|
+
fig.savefig(save_path, dpi=150, bbox_inches="tight")
|
|
269
|
+
print(f"Saved: {save_path}")
|
|
270
|
+
plt.show()
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
---
|
|
274
|
+
|
|
275
|
+
## パイプライン統合
|
|
276
|
+
|
|
277
|
+
```
|
|
278
|
+
gene-expression-transcriptomics → expression-comparison → multi-omics
|
|
279
|
+
(GEO/GTEx/DESeq2) (Atlas 発現比較) (統合解析)
|
|
280
|
+
│ │ ↓
|
|
281
|
+
human-protein-atlas ────────────┘ │ pathway-enrichment
|
|
282
|
+
(HPA 組織/がん発現) ↓ (KEGG/GO)
|
|
283
|
+
ontology-enrichment
|
|
284
|
+
(EFO 形質マッピング)
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
## パイプライン出力
|
|
288
|
+
|
|
289
|
+
| ファイル | 説明 | 次スキル |
|
|
290
|
+
|---------|------|---------|
|
|
291
|
+
| `results/baseline_expression.csv` | ベースライン発現 | → gene-expression |
|
|
292
|
+
| `results/differential_expression.csv` | 差次的発現 | → pathway-enrichment |
|
|
293
|
+
| `results/expression_matrix.csv` | 発現マトリクス | → multi-omics |
|
|
294
|
+
| `figures/expression_heatmap.png` | ヒートマップ | → publication-figures |
|
|
295
|
+
|
|
296
|
+
## 利用可能ツール (ToolUniverse SMCP)
|
|
297
|
+
|
|
298
|
+
| ツール名 | 用途 |
|
|
299
|
+
|---------|------|
|
|
300
|
+
| `ExpressionAtlas_get_baseline` | ベースライン発現 |
|
|
301
|
+
| `ExpressionAtlas_search_differential` | 差次的発現検索 |
|
|
302
|
+
| `ExpressionAtlas_search_experiments` | 実験検索 |
|
|
303
|
+
| `ExpressionAtlas_get_experiment` | 実験詳細 |
|