@nahisaho/satori 0.16.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -41
- package/package.json +1 -1
- package/src/.github/skills/scientific-alphafold-structures/SKILL.md +256 -0
- package/src/.github/skills/scientific-arrayexpress-expression/SKILL.md +264 -0
- package/src/.github/skills/scientific-crossref-metadata/SKILL.md +313 -0
- package/src/.github/skills/scientific-encode-screen/SKILL.md +315 -0
- package/src/.github/skills/scientific-environmental-geodata/SKILL.md +255 -0
- package/src/.github/skills/scientific-geo-expression/SKILL.md +274 -0
- package/src/.github/skills/scientific-gtex-tissue-expression/SKILL.md +271 -0
- package/src/.github/skills/scientific-gwas-catalog/SKILL.md +267 -0
- package/src/.github/skills/scientific-human-cell-atlas/SKILL.md +294 -0
- package/src/.github/skills/scientific-icgc-cancer-data/SKILL.md +351 -0
- package/src/.github/skills/scientific-metabolic-atlas/SKILL.md +263 -0
- package/src/.github/skills/scientific-paleobiology/SKILL.md +265 -0
- package/src/.github/skills/scientific-parasite-genomics/SKILL.md +280 -0
- package/src/.github/skills/scientific-pharmgkb-pgx/SKILL.md +306 -0
- package/src/.github/skills/scientific-semantic-scholar/SKILL.md +298 -0
- package/src/.github/skills/scientific-squidpy-advanced/SKILL.md +251 -0
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-icgc-cancer-data
|
|
3
|
+
description: |
|
|
4
|
+
ICGC がんゲノムデータスキル。ICGC ARGO DCC API および
|
|
5
|
+
レガシー API による国際がんゲノムデータ検索・ドナー/
|
|
6
|
+
検体/変異解析。直接 API (ToolUniverse 非連携)。
|
|
7
|
+
tu_tools: []
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Scientific ICGC Cancer Data
|
|
11
|
+
|
|
12
|
+
ICGC (International Cancer Genome Consortium) ARGO DCC API を
|
|
13
|
+
活用した国際がんゲノムデータ検索・変異統計・がん種横断解析
|
|
14
|
+
パイプラインを提供する。
|
|
15
|
+
|
|
16
|
+
## When to Use
|
|
17
|
+
|
|
18
|
+
- 国際がんゲノムプロジェクトのデータを検索するとき
|
|
19
|
+
- がん種ごとの体細胞変異プロファイルを調べるとき
|
|
20
|
+
- ドナー・検体・変異の統計情報を取得するとき
|
|
21
|
+
- がんゲノムの変異シグネチャを分析するとき
|
|
22
|
+
- PCAWG (Pan-Cancer Analysis of Whole Genomes) データを活用するとき
|
|
23
|
+
- がん遺伝子変異の国際比較データが必要なとき
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
## 1. ICGC プロジェクト・ドナー検索
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import requests
|
|
33
|
+
import pandas as pd
|
|
34
|
+
|
|
35
|
+
ICGC_BASE = "https://dcc.icgc.org/api/v1"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def icgc_search_projects(query=None, limit=50):
|
|
39
|
+
"""
|
|
40
|
+
ICGC — がんゲノムプロジェクト検索。
|
|
41
|
+
|
|
42
|
+
Parameters:
|
|
43
|
+
query: str — 検索キーワード (例: "lung", "BRCA")
|
|
44
|
+
limit: int — 最大結果数
|
|
45
|
+
"""
|
|
46
|
+
url = f"{ICGC_BASE}/projects"
|
|
47
|
+
params = {"size": limit, "from": 1}
|
|
48
|
+
if query:
|
|
49
|
+
params["filters"] = (
|
|
50
|
+
f'{{"project":{{"primarySite":'
|
|
51
|
+
f'{{"is":["{query}"]}}}}}}'
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
55
|
+
resp.raise_for_status()
|
|
56
|
+
data = resp.json()
|
|
57
|
+
|
|
58
|
+
results = []
|
|
59
|
+
for hit in data.get("hits", []):
|
|
60
|
+
results.append({
|
|
61
|
+
"project_id": hit.get("id", ""),
|
|
62
|
+
"project_name": hit.get("name", ""),
|
|
63
|
+
"primary_site": hit.get("primarySite", ""),
|
|
64
|
+
"tumour_type": hit.get("tumourType", ""),
|
|
65
|
+
"tumour_subtype": hit.get("tumourSubtype", ""),
|
|
66
|
+
"primary_country": "; ".join(
|
|
67
|
+
hit.get("primaryCountries", [])),
|
|
68
|
+
"total_donors": hit.get("totalDonorCount", 0),
|
|
69
|
+
"ssm_count": hit.get("ssmCount", 0),
|
|
70
|
+
})
|
|
71
|
+
|
|
72
|
+
df = pd.DataFrame(results)
|
|
73
|
+
if not df.empty:
|
|
74
|
+
df = df.sort_values("total_donors", ascending=False)
|
|
75
|
+
|
|
76
|
+
total = data.get("pagination", {}).get("total", 0)
|
|
77
|
+
print(f"ICGC projects: {len(df)}/{total} "
|
|
78
|
+
f"(query='{query}')")
|
|
79
|
+
return df
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def icgc_search_donors(project_id, limit=100):
|
|
83
|
+
"""
|
|
84
|
+
ICGC — プロジェクト内ドナー検索。
|
|
85
|
+
|
|
86
|
+
Parameters:
|
|
87
|
+
project_id: str — プロジェクト ID (例: "BRCA-US")
|
|
88
|
+
limit: int — 最大結果数
|
|
89
|
+
"""
|
|
90
|
+
url = f"{ICGC_BASE}/donors"
|
|
91
|
+
params = {
|
|
92
|
+
"size": limit,
|
|
93
|
+
"filters": (
|
|
94
|
+
f'{{"donor":{{"projectId":'
|
|
95
|
+
f'{{"is":["{project_id}"]}}}}}}'
|
|
96
|
+
),
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
100
|
+
resp.raise_for_status()
|
|
101
|
+
data = resp.json()
|
|
102
|
+
|
|
103
|
+
results = []
|
|
104
|
+
for hit in data.get("hits", []):
|
|
105
|
+
results.append({
|
|
106
|
+
"donor_id": hit.get("id", ""),
|
|
107
|
+
"project_id": project_id,
|
|
108
|
+
"primary_site": hit.get("primarySite", ""),
|
|
109
|
+
"gender": hit.get("gender", ""),
|
|
110
|
+
"vital_status": hit.get("vitalStatus", ""),
|
|
111
|
+
"age_at_diagnosis": hit.get("ageAtDiagnosis"),
|
|
112
|
+
"disease_status": hit.get(
|
|
113
|
+
"diseaseStatusLastFollowup", ""),
|
|
114
|
+
"ssm_count": hit.get("ssmCount", 0),
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
df = pd.DataFrame(results)
|
|
118
|
+
total = data.get("pagination", {}).get("total", 0)
|
|
119
|
+
print(f"ICGC donors: {len(df)}/{total} "
|
|
120
|
+
f"(project={project_id})")
|
|
121
|
+
return df
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## 2. 体細胞変異 (SSM) 検索
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
def icgc_search_mutations(gene_symbol=None,
|
|
128
|
+
project_id=None,
|
|
129
|
+
consequence_type=None,
|
|
130
|
+
limit=100):
|
|
131
|
+
"""
|
|
132
|
+
ICGC — 体細胞変異 (Simple Somatic Mutation) 検索。
|
|
133
|
+
|
|
134
|
+
Parameters:
|
|
135
|
+
gene_symbol: str — 遺伝子シンボル (例: "TP53")
|
|
136
|
+
project_id: str — プロジェクト ID
|
|
137
|
+
consequence_type: str — 変異タイプ
|
|
138
|
+
(例: "missense_variant")
|
|
139
|
+
limit: int — 最大結果数
|
|
140
|
+
"""
|
|
141
|
+
url = f"{ICGC_BASE}/mutations"
|
|
142
|
+
filters = {}
|
|
143
|
+
|
|
144
|
+
if gene_symbol:
|
|
145
|
+
filters["gene"] = {"symbol": {"is": [gene_symbol]}}
|
|
146
|
+
if project_id:
|
|
147
|
+
filters["donor"] = {"projectId": {"is": [project_id]}}
|
|
148
|
+
if consequence_type:
|
|
149
|
+
filters["mutation"] = {
|
|
150
|
+
"consequenceType": {"is": [consequence_type]}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
import json
|
|
154
|
+
params = {
|
|
155
|
+
"size": limit,
|
|
156
|
+
"filters": json.dumps(filters) if filters else "{}",
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
160
|
+
resp.raise_for_status()
|
|
161
|
+
data = resp.json()
|
|
162
|
+
|
|
163
|
+
results = []
|
|
164
|
+
for hit in data.get("hits", []):
|
|
165
|
+
# 主要な consequence 取得
|
|
166
|
+
consequences = hit.get("consequences", [])
|
|
167
|
+
top_cons = consequences[0] if consequences else {}
|
|
168
|
+
|
|
169
|
+
results.append({
|
|
170
|
+
"mutation_id": hit.get("id", ""),
|
|
171
|
+
"chromosome": hit.get("chromosome", ""),
|
|
172
|
+
"start": hit.get("start"),
|
|
173
|
+
"end": hit.get("end"),
|
|
174
|
+
"mutation": hit.get("mutation", ""),
|
|
175
|
+
"type": hit.get("type", ""),
|
|
176
|
+
"gene_symbol": top_cons.get("geneSymbol", ""),
|
|
177
|
+
"consequence_type": top_cons.get("type", ""),
|
|
178
|
+
"aa_mutation": top_cons.get("aaMutation", ""),
|
|
179
|
+
"affected_donors": hit.get(
|
|
180
|
+
"affectedDonorCountTotal", 0),
|
|
181
|
+
"affected_projects": hit.get(
|
|
182
|
+
"affectedProjectCount", 0),
|
|
183
|
+
"functional_impact": hit.get(
|
|
184
|
+
"functionalImpact", ""),
|
|
185
|
+
})
|
|
186
|
+
|
|
187
|
+
df = pd.DataFrame(results)
|
|
188
|
+
if not df.empty:
|
|
189
|
+
df = df.sort_values("affected_donors",
|
|
190
|
+
ascending=False)
|
|
191
|
+
|
|
192
|
+
total = data.get("pagination", {}).get("total", 0)
|
|
193
|
+
print(f"ICGC mutations: {len(df)}/{total} "
|
|
194
|
+
f"(gene={gene_symbol}, project={project_id})")
|
|
195
|
+
return df
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## 3. がん種統計・変異サマリー
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
def icgc_cancer_stats(project_id=None):
|
|
202
|
+
"""
|
|
203
|
+
ICGC — がん種統計サマリー。
|
|
204
|
+
|
|
205
|
+
Parameters:
|
|
206
|
+
project_id: str — プロジェクト ID (None で全体統計)
|
|
207
|
+
"""
|
|
208
|
+
if project_id:
|
|
209
|
+
url = f"{ICGC_BASE}/projects/{project_id}"
|
|
210
|
+
resp = requests.get(url, timeout=30)
|
|
211
|
+
resp.raise_for_status()
|
|
212
|
+
data = resp.json()
|
|
213
|
+
|
|
214
|
+
stats = {
|
|
215
|
+
"project_id": project_id,
|
|
216
|
+
"project_name": data.get("name", ""),
|
|
217
|
+
"primary_site": data.get("primarySite", ""),
|
|
218
|
+
"total_donors": data.get("totalDonorCount", 0),
|
|
219
|
+
"total_specimens": data.get(
|
|
220
|
+
"totalSpecimenCount", 0),
|
|
221
|
+
"ssm_count": data.get("ssmCount", 0),
|
|
222
|
+
"repository": "; ".join(
|
|
223
|
+
data.get("repository", [])),
|
|
224
|
+
}
|
|
225
|
+
print(f"ICGC stats: {project_id} — "
|
|
226
|
+
f"{stats['total_donors']} donors, "
|
|
227
|
+
f"{stats['ssm_count']} mutations")
|
|
228
|
+
return stats
|
|
229
|
+
else:
|
|
230
|
+
# 全プロジェクト概要
|
|
231
|
+
projects = icgc_search_projects(limit=200)
|
|
232
|
+
summary = {
|
|
233
|
+
"total_projects": len(projects),
|
|
234
|
+
"total_donors": projects[
|
|
235
|
+
"total_donors"].sum(),
|
|
236
|
+
"total_ssm": projects["ssm_count"].sum(),
|
|
237
|
+
"top_sites": projects.groupby(
|
|
238
|
+
"primary_site")["total_donors"].sum(
|
|
239
|
+
).sort_values(ascending=False).head(10
|
|
240
|
+
).to_dict(),
|
|
241
|
+
}
|
|
242
|
+
print(f"ICGC summary: {summary['total_projects']} "
|
|
243
|
+
f"projects, {summary['total_donors']} donors")
|
|
244
|
+
return summary
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def icgc_gene_mutation_frequency(gene_symbol, top_n=20):
|
|
248
|
+
"""
|
|
249
|
+
ICGC — 遺伝子別がん種変異頻度。
|
|
250
|
+
|
|
251
|
+
Parameters:
|
|
252
|
+
gene_symbol: str — 遺伝子シンボル
|
|
253
|
+
top_n: int — 上位がん種数
|
|
254
|
+
"""
|
|
255
|
+
mutations = icgc_search_mutations(
|
|
256
|
+
gene_symbol=gene_symbol, limit=500)
|
|
257
|
+
|
|
258
|
+
if mutations.empty:
|
|
259
|
+
return pd.DataFrame()
|
|
260
|
+
|
|
261
|
+
# プロジェクト別集計
|
|
262
|
+
freq = mutations.groupby("gene_symbol").agg(
|
|
263
|
+
total_mutations=("mutation_id", "count"),
|
|
264
|
+
total_affected_donors=("affected_donors", "sum"),
|
|
265
|
+
mutation_types=("consequence_type",
|
|
266
|
+
lambda x: "; ".join(x.unique()[:5])),
|
|
267
|
+
).reset_index()
|
|
268
|
+
|
|
269
|
+
print(f"ICGC gene frequency: {gene_symbol} — "
|
|
270
|
+
f"{len(freq)} entries")
|
|
271
|
+
return freq
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
## 4. ICGC 統合パイプライン
|
|
275
|
+
|
|
276
|
+
```python
|
|
277
|
+
def icgc_pipeline(gene_symbols, cancer_site=None,
|
|
278
|
+
output_dir="results"):
|
|
279
|
+
"""
|
|
280
|
+
ICGC 統合パイプライン。
|
|
281
|
+
|
|
282
|
+
Parameters:
|
|
283
|
+
gene_symbols: list[str] — 遺伝子リスト
|
|
284
|
+
cancer_site: str — がん部位フィルタ
|
|
285
|
+
output_dir: str — 出力ディレクトリ
|
|
286
|
+
"""
|
|
287
|
+
from pathlib import Path
|
|
288
|
+
output_dir = Path(output_dir)
|
|
289
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
290
|
+
|
|
291
|
+
# 1) プロジェクト検索
|
|
292
|
+
projects = icgc_search_projects(query=cancer_site)
|
|
293
|
+
projects.to_csv(output_dir / "projects.csv", index=False)
|
|
294
|
+
|
|
295
|
+
# 2) 遺伝子別変異検索
|
|
296
|
+
all_mutations = []
|
|
297
|
+
for gene in gene_symbols:
|
|
298
|
+
try:
|
|
299
|
+
muts = icgc_search_mutations(
|
|
300
|
+
gene_symbol=gene, limit=200)
|
|
301
|
+
muts["query_gene"] = gene
|
|
302
|
+
all_mutations.append(muts)
|
|
303
|
+
except Exception as e:
|
|
304
|
+
print(f" Warning: {gene} — {e}")
|
|
305
|
+
continue
|
|
306
|
+
|
|
307
|
+
if all_mutations:
|
|
308
|
+
combined = pd.concat(all_mutations,
|
|
309
|
+
ignore_index=True)
|
|
310
|
+
combined.to_csv(output_dir / "mutations.csv",
|
|
311
|
+
index=False)
|
|
312
|
+
|
|
313
|
+
# 3) がん種統計
|
|
314
|
+
if not projects.empty:
|
|
315
|
+
top_project = projects.iloc[0]["project_id"]
|
|
316
|
+
stats = icgc_cancer_stats(project_id=top_project)
|
|
317
|
+
pd.DataFrame([stats]).to_csv(
|
|
318
|
+
output_dir / "cancer_stats.csv", index=False)
|
|
319
|
+
|
|
320
|
+
print(f"ICGC pipeline: {output_dir}")
|
|
321
|
+
return {"projects": projects}
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
---
|
|
325
|
+
|
|
326
|
+
## ToolUniverse 連携
|
|
327
|
+
|
|
328
|
+
| TU Key | ツール名 | 連携内容 |
|
|
329
|
+
|--------|---------|---------|
|
|
330
|
+
| (direct) | ICGC DCC API | 直接 REST API — TU 非連携 |
|
|
331
|
+
|
|
332
|
+
## パイプライン統合
|
|
333
|
+
|
|
334
|
+
```
|
|
335
|
+
cancer-genomics → icgc-cancer-data → precision-oncology
|
|
336
|
+
(がんゲノム全般) (ICGC DCC API) (精密腫瘍学)
|
|
337
|
+
│ │ ↓
|
|
338
|
+
tcga-data ────────────┘ clinical-decision-support
|
|
339
|
+
(TCGA データ) │ (臨床意思決定)
|
|
340
|
+
↓
|
|
341
|
+
variant-interpretation
|
|
342
|
+
(変異臨床解釈)
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
## パイプライン出力
|
|
346
|
+
|
|
347
|
+
| ファイル | 説明 | 次スキル |
|
|
348
|
+
|---------|------|---------|
|
|
349
|
+
| `results/projects.csv` | プロジェクト一覧 | → cancer-genomics |
|
|
350
|
+
| `results/mutations.csv` | 体細胞変異 | → variant-interpretation |
|
|
351
|
+
| `results/cancer_stats.csv` | がん種統計 | → precision-oncology |
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-metabolic-atlas
|
|
3
|
+
description: |
|
|
4
|
+
代謝アトラススキル。Metabolic Atlas / Human-GEM REST API による
|
|
5
|
+
代謝反応・代謝産物・コンパートメント検索、フラックス解析統合、
|
|
6
|
+
代謝ネットワーク可視化。K-Dense 連携: metabolic-atlas。
|
|
7
|
+
tu_tools: []
|
|
8
|
+
kdense_ref: metabolic-atlas
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Scientific Metabolic Atlas
|
|
12
|
+
|
|
13
|
+
Metabolic Atlas REST API を活用したゲノムスケール代謝モデル
|
|
14
|
+
(GEM) 解析パイプラインを提供する。
|
|
15
|
+
|
|
16
|
+
## When to Use
|
|
17
|
+
|
|
18
|
+
- ヒト代謝反応・代謝産物を検索するとき
|
|
19
|
+
- Human-GEM のコンパートメント情報を取得するとき
|
|
20
|
+
- 代謝経路のネットワーク構造を解析するとき
|
|
21
|
+
- フラックスバランス解析 (FBA) の入力を準備するとき
|
|
22
|
+
- 代謝産物コネクティビティを可視化するとき
|
|
23
|
+
- 組織特異的代謝モデルを構築するとき
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
## 1. 代謝反応検索
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import requests
|
|
33
|
+
import pandas as pd
|
|
34
|
+
import numpy as np
|
|
35
|
+
|
|
36
|
+
MA_BASE = "https://metabolicatlas.org/api/v2"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def metabolic_atlas_search_reactions(query, model="Human-GEM",
|
|
40
|
+
compartment=None, limit=50):
|
|
41
|
+
"""
|
|
42
|
+
Metabolic Atlas — 代謝反応検索。
|
|
43
|
+
|
|
44
|
+
Parameters:
|
|
45
|
+
query: str — 検索クエリ (例: "glycolysis", "citrate")
|
|
46
|
+
model: str — GEM モデル名
|
|
47
|
+
compartment: str — コンパートメント (例: "cytosol", "mitochondria")
|
|
48
|
+
limit: int — 最大結果数
|
|
49
|
+
"""
|
|
50
|
+
url = f"{MA_BASE}/search"
|
|
51
|
+
params = {
|
|
52
|
+
"query": query,
|
|
53
|
+
"model": model,
|
|
54
|
+
"type": "reaction",
|
|
55
|
+
"limit": limit,
|
|
56
|
+
}
|
|
57
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
58
|
+
resp.raise_for_status()
|
|
59
|
+
data = resp.json()
|
|
60
|
+
|
|
61
|
+
results = []
|
|
62
|
+
for r in data.get("results", data) if isinstance(data, dict) else data:
|
|
63
|
+
rxn = r if isinstance(r, dict) else {}
|
|
64
|
+
row = {
|
|
65
|
+
"reaction_id": rxn.get("id", ""),
|
|
66
|
+
"name": rxn.get("name", ""),
|
|
67
|
+
"equation": rxn.get("equation", ""),
|
|
68
|
+
"subsystem": rxn.get("subsystem", ""),
|
|
69
|
+
"compartment": rxn.get("compartment", ""),
|
|
70
|
+
"gene_rule": rxn.get("geneRule", ""),
|
|
71
|
+
"lower_bound": rxn.get("lowerBound", None),
|
|
72
|
+
"upper_bound": rxn.get("upperBound", None),
|
|
73
|
+
}
|
|
74
|
+
if compartment and compartment.lower() not in str(
|
|
75
|
+
row.get("compartment", "")).lower():
|
|
76
|
+
continue
|
|
77
|
+
results.append(row)
|
|
78
|
+
|
|
79
|
+
df = pd.DataFrame(results[:limit])
|
|
80
|
+
print(f"Metabolic Atlas reactions: {len(df)} results "
|
|
81
|
+
f"(query={query})")
|
|
82
|
+
return df
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## 2. 代謝産物検索
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
def metabolic_atlas_search_metabolites(query, model="Human-GEM",
|
|
89
|
+
limit=50):
|
|
90
|
+
"""
|
|
91
|
+
Metabolic Atlas — 代謝産物検索。
|
|
92
|
+
|
|
93
|
+
Parameters:
|
|
94
|
+
query: str — 検索クエリ (例: "glucose", "ATP")
|
|
95
|
+
model: str — GEM モデル名
|
|
96
|
+
limit: int — 最大結果数
|
|
97
|
+
"""
|
|
98
|
+
url = f"{MA_BASE}/search"
|
|
99
|
+
params = {
|
|
100
|
+
"query": query,
|
|
101
|
+
"model": model,
|
|
102
|
+
"type": "metabolite",
|
|
103
|
+
"limit": limit,
|
|
104
|
+
}
|
|
105
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
106
|
+
resp.raise_for_status()
|
|
107
|
+
data = resp.json()
|
|
108
|
+
|
|
109
|
+
results = []
|
|
110
|
+
for m in data.get("results", data) if isinstance(data, dict) else data:
|
|
111
|
+
met = m if isinstance(m, dict) else {}
|
|
112
|
+
results.append({
|
|
113
|
+
"metabolite_id": met.get("id", ""),
|
|
114
|
+
"name": met.get("name", ""),
|
|
115
|
+
"formula": met.get("formula", ""),
|
|
116
|
+
"charge": met.get("charge", None),
|
|
117
|
+
"compartment": met.get("compartment", ""),
|
|
118
|
+
"chebi_id": met.get("chebiId", ""),
|
|
119
|
+
"kegg_id": met.get("keggId", ""),
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
df = pd.DataFrame(results[:limit])
|
|
123
|
+
print(f"Metabolic Atlas metabolites: {len(df)} results "
|
|
124
|
+
f"(query={query})")
|
|
125
|
+
return df
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## 3. 代謝ネットワーク解析
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
import networkx as nx
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def metabolic_atlas_network(subsystem, model="Human-GEM"):
|
|
135
|
+
"""
|
|
136
|
+
Metabolic Atlas — サブシステム代謝ネットワーク構築。
|
|
137
|
+
|
|
138
|
+
Parameters:
|
|
139
|
+
subsystem: str — サブシステム名 (例: "Glycolysis")
|
|
140
|
+
model: str — GEM モデル名
|
|
141
|
+
"""
|
|
142
|
+
reactions = metabolic_atlas_search_reactions(
|
|
143
|
+
subsystem, model=model, limit=200)
|
|
144
|
+
|
|
145
|
+
G = nx.DiGraph()
|
|
146
|
+
|
|
147
|
+
for _, rxn in reactions.iterrows():
|
|
148
|
+
rxn_id = rxn["reaction_id"]
|
|
149
|
+
equation = str(rxn.get("equation", ""))
|
|
150
|
+
|
|
151
|
+
# 簡易パーサ: "A + B => C + D"
|
|
152
|
+
if "=>" in equation:
|
|
153
|
+
substrates_str, products_str = equation.split("=>", 1)
|
|
154
|
+
elif "=" in equation:
|
|
155
|
+
substrates_str, products_str = equation.split("=", 1)
|
|
156
|
+
else:
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
substrates = [s.strip() for s in substrates_str.split("+")
|
|
160
|
+
if s.strip()]
|
|
161
|
+
products = [p.strip() for p in products_str.split("+")
|
|
162
|
+
if p.strip()]
|
|
163
|
+
|
|
164
|
+
G.add_node(rxn_id, type="reaction",
|
|
165
|
+
name=rxn.get("name", ""))
|
|
166
|
+
|
|
167
|
+
for s in substrates:
|
|
168
|
+
G.add_node(s, type="metabolite")
|
|
169
|
+
G.add_edge(s, rxn_id)
|
|
170
|
+
|
|
171
|
+
for p in products:
|
|
172
|
+
G.add_node(p, type="metabolite")
|
|
173
|
+
G.add_edge(rxn_id, p)
|
|
174
|
+
|
|
175
|
+
# ネットワーク統計
|
|
176
|
+
n_reactions = sum(1 for _, d in G.nodes(data=True)
|
|
177
|
+
if d.get("type") == "reaction")
|
|
178
|
+
n_metabolites = sum(1 for _, d in G.nodes(data=True)
|
|
179
|
+
if d.get("type") == "metabolite")
|
|
180
|
+
|
|
181
|
+
print(f"Metabolic network: {n_reactions} reactions, "
|
|
182
|
+
f"{n_metabolites} metabolites, {G.number_of_edges()} edges")
|
|
183
|
+
return G
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## 4. 代謝アトラス統合パイプライン
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
def metabolic_atlas_pipeline(query, model="Human-GEM",
|
|
190
|
+
output_dir="results"):
|
|
191
|
+
"""
|
|
192
|
+
代謝アトラス統合パイプライン。
|
|
193
|
+
|
|
194
|
+
Parameters:
|
|
195
|
+
query: str — 代謝経路/サブシステム名
|
|
196
|
+
model: str — GEM モデル名
|
|
197
|
+
output_dir: str — 出力ディレクトリ
|
|
198
|
+
"""
|
|
199
|
+
from pathlib import Path
|
|
200
|
+
output_dir = Path(output_dir)
|
|
201
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
202
|
+
|
|
203
|
+
# 1) 反応検索
|
|
204
|
+
reactions = metabolic_atlas_search_reactions(query, model=model)
|
|
205
|
+
reactions.to_csv(output_dir / "reactions.csv", index=False)
|
|
206
|
+
|
|
207
|
+
# 2) 代謝産物検索
|
|
208
|
+
metabolites = metabolic_atlas_search_metabolites(query, model=model)
|
|
209
|
+
metabolites.to_csv(output_dir / "metabolites.csv", index=False)
|
|
210
|
+
|
|
211
|
+
# 3) ネットワーク構築
|
|
212
|
+
G = metabolic_atlas_network(query, model=model)
|
|
213
|
+
nx.write_graphml(G, str(output_dir / "metabolic_network.graphml"))
|
|
214
|
+
|
|
215
|
+
# 4) ハブ代謝産物
|
|
216
|
+
met_nodes = [n for n, d in G.nodes(data=True)
|
|
217
|
+
if d.get("type") == "metabolite"]
|
|
218
|
+
hub_scores = {n: G.degree(n) for n in met_nodes}
|
|
219
|
+
hub_df = pd.DataFrame([
|
|
220
|
+
{"metabolite": k, "degree": v}
|
|
221
|
+
for k, v in sorted(hub_scores.items(),
|
|
222
|
+
key=lambda x: -x[1])[:20]
|
|
223
|
+
])
|
|
224
|
+
hub_df.to_csv(output_dir / "hub_metabolites.csv", index=False)
|
|
225
|
+
|
|
226
|
+
print(f"Metabolic Atlas pipeline: {output_dir}")
|
|
227
|
+
return {
|
|
228
|
+
"reactions": reactions,
|
|
229
|
+
"metabolites": metabolites,
|
|
230
|
+
"network": G,
|
|
231
|
+
"hubs": hub_df,
|
|
232
|
+
}
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
---
|
|
236
|
+
|
|
237
|
+
## K-Dense 連携
|
|
238
|
+
|
|
239
|
+
| K-Dense Key | 参照内容 |
|
|
240
|
+
|-------------|---------|
|
|
241
|
+
| `metabolic-atlas` | 代謝モデル構造・反応データベース |
|
|
242
|
+
|
|
243
|
+
## パイプライン統合
|
|
244
|
+
|
|
245
|
+
```
|
|
246
|
+
metabolic-modeling → metabolic-atlas → systems-biology
|
|
247
|
+
(COBRA/FBA) (Human-GEM) (統合モデリング)
|
|
248
|
+
│ │ ↓
|
|
249
|
+
pathway-enrichment ─────┘ gene-expression
|
|
250
|
+
(KEGG/Reactome) │ (発現データ)
|
|
251
|
+
↓
|
|
252
|
+
multi-omics
|
|
253
|
+
(メタボロミクス統合)
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
## パイプライン出力
|
|
257
|
+
|
|
258
|
+
| ファイル | 説明 | 次スキル |
|
|
259
|
+
|---------|------|---------|
|
|
260
|
+
| `results/reactions.csv` | 代謝反応一覧 | → metabolic-modeling |
|
|
261
|
+
| `results/metabolites.csv` | 代謝産物一覧 | → pathway-enrichment |
|
|
262
|
+
| `results/metabolic_network.graphml` | 代謝ネットワーク | → systems-biology |
|
|
263
|
+
| `results/hub_metabolites.csv` | ハブ代謝産物 | → multi-omics |
|