@nahisaho/satori 0.16.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,351 @@
1
+ ---
2
+ name: scientific-icgc-cancer-data
3
+ description: |
4
+ ICGC がんゲノムデータスキル。ICGC ARGO DCC API および
5
+ レガシー API による国際がんゲノムデータ検索・ドナー/
6
+ 検体/変異解析。直接 API (ToolUniverse 非連携)。
7
+ tu_tools: []
8
+ ---
9
+
10
+ # Scientific ICGC Cancer Data
11
+
12
+ ICGC (International Cancer Genome Consortium) ARGO DCC API を
13
+ 活用した国際がんゲノムデータ検索・変異統計・がん種横断解析
14
+ パイプラインを提供する。
15
+
16
+ ## When to Use
17
+
18
+ - 国際がんゲノムプロジェクトのデータを検索するとき
19
+ - がん種ごとの体細胞変異プロファイルを調べるとき
20
+ - ドナー・検体・変異の統計情報を取得するとき
21
+ - がんゲノムの変異シグネチャを分析するとき
22
+ - PCAWG (Pan-Cancer Analysis of Whole Genomes) データを活用するとき
23
+ - がん遺伝子変異の国際比較データが必要なとき
24
+
25
+ ---
26
+
27
+ ## Quick Start
28
+
29
+ ## 1. ICGC プロジェクト・ドナー検索
30
+
31
+ ```python
32
+ import requests
33
+ import pandas as pd
34
+
35
+ ICGC_BASE = "https://dcc.icgc.org/api/v1"
36
+
37
+
38
+ def icgc_search_projects(query=None, limit=50):
39
+ """
40
+ ICGC — がんゲノムプロジェクト検索。
41
+
42
+ Parameters:
43
+ query: str — 検索キーワード (例: "lung", "BRCA")
44
+ limit: int — 最大結果数
45
+ """
46
+ url = f"{ICGC_BASE}/projects"
47
+ params = {"size": limit, "from": 1}
48
+ if query:
49
+ params["filters"] = (
50
+ f'{{"project":{{"primarySite":'
51
+ f'{{"is":["{query}"]}}}}}}'
52
+ )
53
+
54
+ resp = requests.get(url, params=params, timeout=30)
55
+ resp.raise_for_status()
56
+ data = resp.json()
57
+
58
+ results = []
59
+ for hit in data.get("hits", []):
60
+ results.append({
61
+ "project_id": hit.get("id", ""),
62
+ "project_name": hit.get("name", ""),
63
+ "primary_site": hit.get("primarySite", ""),
64
+ "tumour_type": hit.get("tumourType", ""),
65
+ "tumour_subtype": hit.get("tumourSubtype", ""),
66
+ "primary_country": "; ".join(
67
+ hit.get("primaryCountries", [])),
68
+ "total_donors": hit.get("totalDonorCount", 0),
69
+ "ssm_count": hit.get("ssmCount", 0),
70
+ })
71
+
72
+ df = pd.DataFrame(results)
73
+ if not df.empty:
74
+ df = df.sort_values("total_donors", ascending=False)
75
+
76
+ total = data.get("pagination", {}).get("total", 0)
77
+ print(f"ICGC projects: {len(df)}/{total} "
78
+ f"(query='{query}')")
79
+ return df
80
+
81
+
82
+ def icgc_search_donors(project_id, limit=100):
83
+ """
84
+ ICGC — プロジェクト内ドナー検索。
85
+
86
+ Parameters:
87
+ project_id: str — プロジェクト ID (例: "BRCA-US")
88
+ limit: int — 最大結果数
89
+ """
90
+ url = f"{ICGC_BASE}/donors"
91
+ params = {
92
+ "size": limit,
93
+ "filters": (
94
+ f'{{"donor":{{"projectId":'
95
+ f'{{"is":["{project_id}"]}}}}}}'
96
+ ),
97
+ }
98
+
99
+ resp = requests.get(url, params=params, timeout=30)
100
+ resp.raise_for_status()
101
+ data = resp.json()
102
+
103
+ results = []
104
+ for hit in data.get("hits", []):
105
+ results.append({
106
+ "donor_id": hit.get("id", ""),
107
+ "project_id": project_id,
108
+ "primary_site": hit.get("primarySite", ""),
109
+ "gender": hit.get("gender", ""),
110
+ "vital_status": hit.get("vitalStatus", ""),
111
+ "age_at_diagnosis": hit.get("ageAtDiagnosis"),
112
+ "disease_status": hit.get(
113
+ "diseaseStatusLastFollowup", ""),
114
+ "ssm_count": hit.get("ssmCount", 0),
115
+ })
116
+
117
+ df = pd.DataFrame(results)
118
+ total = data.get("pagination", {}).get("total", 0)
119
+ print(f"ICGC donors: {len(df)}/{total} "
120
+ f"(project={project_id})")
121
+ return df
122
+ ```
123
+
124
+ ## 2. 体細胞変異 (SSM) 検索
125
+
126
+ ```python
127
+ def icgc_search_mutations(gene_symbol=None,
128
+ project_id=None,
129
+ consequence_type=None,
130
+ limit=100):
131
+ """
132
+ ICGC — 体細胞変異 (Simple Somatic Mutation) 検索。
133
+
134
+ Parameters:
135
+ gene_symbol: str — 遺伝子シンボル (例: "TP53")
136
+ project_id: str — プロジェクト ID
137
+ consequence_type: str — 変異タイプ
138
+ (例: "missense_variant")
139
+ limit: int — 最大結果数
140
+ """
141
+ url = f"{ICGC_BASE}/mutations"
142
+ filters = {}
143
+
144
+ if gene_symbol:
145
+ filters["gene"] = {"symbol": {"is": [gene_symbol]}}
146
+ if project_id:
147
+ filters["donor"] = {"projectId": {"is": [project_id]}}
148
+ if consequence_type:
149
+ filters["mutation"] = {
150
+ "consequenceType": {"is": [consequence_type]}
151
+ }
152
+
153
+ import json
154
+ params = {
155
+ "size": limit,
156
+ "filters": json.dumps(filters) if filters else "{}",
157
+ }
158
+
159
+ resp = requests.get(url, params=params, timeout=30)
160
+ resp.raise_for_status()
161
+ data = resp.json()
162
+
163
+ results = []
164
+ for hit in data.get("hits", []):
165
+ # 主要な consequence 取得
166
+ consequences = hit.get("consequences", [])
167
+ top_cons = consequences[0] if consequences else {}
168
+
169
+ results.append({
170
+ "mutation_id": hit.get("id", ""),
171
+ "chromosome": hit.get("chromosome", ""),
172
+ "start": hit.get("start"),
173
+ "end": hit.get("end"),
174
+ "mutation": hit.get("mutation", ""),
175
+ "type": hit.get("type", ""),
176
+ "gene_symbol": top_cons.get("geneSymbol", ""),
177
+ "consequence_type": top_cons.get("type", ""),
178
+ "aa_mutation": top_cons.get("aaMutation", ""),
179
+ "affected_donors": hit.get(
180
+ "affectedDonorCountTotal", 0),
181
+ "affected_projects": hit.get(
182
+ "affectedProjectCount", 0),
183
+ "functional_impact": hit.get(
184
+ "functionalImpact", ""),
185
+ })
186
+
187
+ df = pd.DataFrame(results)
188
+ if not df.empty:
189
+ df = df.sort_values("affected_donors",
190
+ ascending=False)
191
+
192
+ total = data.get("pagination", {}).get("total", 0)
193
+ print(f"ICGC mutations: {len(df)}/{total} "
194
+ f"(gene={gene_symbol}, project={project_id})")
195
+ return df
196
+ ```
197
+
198
+ ## 3. がん種統計・変異サマリー
199
+
200
+ ```python
201
+ def icgc_cancer_stats(project_id=None):
202
+ """
203
+ ICGC — がん種統計サマリー。
204
+
205
+ Parameters:
206
+ project_id: str — プロジェクト ID (None で全体統計)
207
+ """
208
+ if project_id:
209
+ url = f"{ICGC_BASE}/projects/{project_id}"
210
+ resp = requests.get(url, timeout=30)
211
+ resp.raise_for_status()
212
+ data = resp.json()
213
+
214
+ stats = {
215
+ "project_id": project_id,
216
+ "project_name": data.get("name", ""),
217
+ "primary_site": data.get("primarySite", ""),
218
+ "total_donors": data.get("totalDonorCount", 0),
219
+ "total_specimens": data.get(
220
+ "totalSpecimenCount", 0),
221
+ "ssm_count": data.get("ssmCount", 0),
222
+ "repository": "; ".join(
223
+ data.get("repository", [])),
224
+ }
225
+ print(f"ICGC stats: {project_id} — "
226
+ f"{stats['total_donors']} donors, "
227
+ f"{stats['ssm_count']} mutations")
228
+ return stats
229
+ else:
230
+ # 全プロジェクト概要
231
+ projects = icgc_search_projects(limit=200)
232
+ summary = {
233
+ "total_projects": len(projects),
234
+ "total_donors": projects[
235
+ "total_donors"].sum(),
236
+ "total_ssm": projects["ssm_count"].sum(),
237
+ "top_sites": projects.groupby(
238
+ "primary_site")["total_donors"].sum(
239
+ ).sort_values(ascending=False).head(10
240
+ ).to_dict(),
241
+ }
242
+ print(f"ICGC summary: {summary['total_projects']} "
243
+ f"projects, {summary['total_donors']} donors")
244
+ return summary
245
+
246
+
247
+ def icgc_gene_mutation_frequency(gene_symbol, top_n=20):
248
+ """
249
+ ICGC — 遺伝子別がん種変異頻度。
250
+
251
+ Parameters:
252
+ gene_symbol: str — 遺伝子シンボル
253
+ top_n: int — 上位がん種数
254
+ """
255
+ mutations = icgc_search_mutations(
256
+ gene_symbol=gene_symbol, limit=500)
257
+
258
+ if mutations.empty:
259
+ return pd.DataFrame()
260
+
261
+ # プロジェクト別集計
262
+ freq = mutations.groupby("gene_symbol").agg(
263
+ total_mutations=("mutation_id", "count"),
264
+ total_affected_donors=("affected_donors", "sum"),
265
+ mutation_types=("consequence_type",
266
+ lambda x: "; ".join(x.unique()[:5])),
267
+ ).reset_index()
268
+
269
+ print(f"ICGC gene frequency: {gene_symbol} — "
270
+ f"{len(freq)} entries")
271
+ return freq
272
+ ```
273
+
274
+ ## 4. ICGC 統合パイプライン
275
+
276
+ ```python
277
+ def icgc_pipeline(gene_symbols, cancer_site=None,
278
+ output_dir="results"):
279
+ """
280
+ ICGC 統合パイプライン。
281
+
282
+ Parameters:
283
+ gene_symbols: list[str] — 遺伝子リスト
284
+ cancer_site: str — がん部位フィルタ
285
+ output_dir: str — 出力ディレクトリ
286
+ """
287
+ from pathlib import Path
288
+ output_dir = Path(output_dir)
289
+ output_dir.mkdir(parents=True, exist_ok=True)
290
+
291
+ # 1) プロジェクト検索
292
+ projects = icgc_search_projects(query=cancer_site)
293
+ projects.to_csv(output_dir / "projects.csv", index=False)
294
+
295
+ # 2) 遺伝子別変異検索
296
+ all_mutations = []
297
+ for gene in gene_symbols:
298
+ try:
299
+ muts = icgc_search_mutations(
300
+ gene_symbol=gene, limit=200)
301
+ muts["query_gene"] = gene
302
+ all_mutations.append(muts)
303
+ except Exception as e:
304
+ print(f" Warning: {gene} — {e}")
305
+ continue
306
+
307
+ if all_mutations:
308
+ combined = pd.concat(all_mutations,
309
+ ignore_index=True)
310
+ combined.to_csv(output_dir / "mutations.csv",
311
+ index=False)
312
+
313
+ # 3) がん種統計
314
+ if not projects.empty:
315
+ top_project = projects.iloc[0]["project_id"]
316
+ stats = icgc_cancer_stats(project_id=top_project)
317
+ pd.DataFrame([stats]).to_csv(
318
+ output_dir / "cancer_stats.csv", index=False)
319
+
320
+ print(f"ICGC pipeline: {output_dir}")
321
+ return {"projects": projects}
322
+ ```
323
+
324
+ ---
325
+
326
+ ## ToolUniverse 連携
327
+
328
+ | TU Key | ツール名 | 連携内容 |
329
+ |--------|---------|---------|
330
+ | (direct) | ICGC DCC API | 直接 REST API — TU 非連携 |
331
+
332
+ ## パイプライン統合
333
+
334
+ ```
335
+ cancer-genomics → icgc-cancer-data → precision-oncology
336
+ (がんゲノム全般) (ICGC DCC API) (精密腫瘍学)
337
+ │ │ ↓
338
+ tcga-data ────────────┘ clinical-decision-support
339
+ (TCGA データ) │ (臨床意思決定)
340
+
341
+ variant-interpretation
342
+ (変異臨床解釈)
343
+ ```
344
+
345
+ ## パイプライン出力
346
+
347
+ | ファイル | 説明 | 次スキル |
348
+ |---------|------|---------|
349
+ | `results/projects.csv` | プロジェクト一覧 | → cancer-genomics |
350
+ | `results/mutations.csv` | 体細胞変異 | → variant-interpretation |
351
+ | `results/cancer_stats.csv` | がん種統計 | → precision-oncology |
@@ -0,0 +1,263 @@
1
+ ---
2
+ name: scientific-metabolic-atlas
3
+ description: |
4
+ 代謝アトラススキル。Metabolic Atlas / Human-GEM REST API による
5
+ 代謝反応・代謝産物・コンパートメント検索、フラックス解析統合、
6
+ 代謝ネットワーク可視化。K-Dense 連携: metabolic-atlas。
7
+ tu_tools: []
8
+ kdense_ref: metabolic-atlas
9
+ ---
10
+
11
+ # Scientific Metabolic Atlas
12
+
13
+ Metabolic Atlas REST API を活用したゲノムスケール代謝モデル
14
+ (GEM) 解析パイプラインを提供する。
15
+
16
+ ## When to Use
17
+
18
+ - ヒト代謝反応・代謝産物を検索するとき
19
+ - Human-GEM のコンパートメント情報を取得するとき
20
+ - 代謝経路のネットワーク構造を解析するとき
21
+ - フラックスバランス解析 (FBA) の入力を準備するとき
22
+ - 代謝産物コネクティビティを可視化するとき
23
+ - 組織特異的代謝モデルを構築するとき
24
+
25
+ ---
26
+
27
+ ## Quick Start
28
+
29
+ ## 1. 代謝反応検索
30
+
31
+ ```python
32
+ import requests
33
+ import pandas as pd
34
+ import numpy as np
35
+
36
+ MA_BASE = "https://metabolicatlas.org/api/v2"
37
+
38
+
39
+ def metabolic_atlas_search_reactions(query, model="Human-GEM",
40
+ compartment=None, limit=50):
41
+ """
42
+ Metabolic Atlas — 代謝反応検索。
43
+
44
+ Parameters:
45
+ query: str — 検索クエリ (例: "glycolysis", "citrate")
46
+ model: str — GEM モデル名
47
+ compartment: str — コンパートメント (例: "cytosol", "mitochondria")
48
+ limit: int — 最大結果数
49
+ """
50
+ url = f"{MA_BASE}/search"
51
+ params = {
52
+ "query": query,
53
+ "model": model,
54
+ "type": "reaction",
55
+ "limit": limit,
56
+ }
57
+ resp = requests.get(url, params=params, timeout=30)
58
+ resp.raise_for_status()
59
+ data = resp.json()
60
+
61
+ results = []
62
+ for r in data.get("results", data) if isinstance(data, dict) else data:
63
+ rxn = r if isinstance(r, dict) else {}
64
+ row = {
65
+ "reaction_id": rxn.get("id", ""),
66
+ "name": rxn.get("name", ""),
67
+ "equation": rxn.get("equation", ""),
68
+ "subsystem": rxn.get("subsystem", ""),
69
+ "compartment": rxn.get("compartment", ""),
70
+ "gene_rule": rxn.get("geneRule", ""),
71
+ "lower_bound": rxn.get("lowerBound", None),
72
+ "upper_bound": rxn.get("upperBound", None),
73
+ }
74
+ if compartment and compartment.lower() not in str(
75
+ row.get("compartment", "")).lower():
76
+ continue
77
+ results.append(row)
78
+
79
+ df = pd.DataFrame(results[:limit])
80
+ print(f"Metabolic Atlas reactions: {len(df)} results "
81
+ f"(query={query})")
82
+ return df
83
+ ```
84
+
85
+ ## 2. 代謝産物検索
86
+
87
+ ```python
88
+ def metabolic_atlas_search_metabolites(query, model="Human-GEM",
89
+ limit=50):
90
+ """
91
+ Metabolic Atlas — 代謝産物検索。
92
+
93
+ Parameters:
94
+ query: str — 検索クエリ (例: "glucose", "ATP")
95
+ model: str — GEM モデル名
96
+ limit: int — 最大結果数
97
+ """
98
+ url = f"{MA_BASE}/search"
99
+ params = {
100
+ "query": query,
101
+ "model": model,
102
+ "type": "metabolite",
103
+ "limit": limit,
104
+ }
105
+ resp = requests.get(url, params=params, timeout=30)
106
+ resp.raise_for_status()
107
+ data = resp.json()
108
+
109
+ results = []
110
+ for m in data.get("results", data) if isinstance(data, dict) else data:
111
+ met = m if isinstance(m, dict) else {}
112
+ results.append({
113
+ "metabolite_id": met.get("id", ""),
114
+ "name": met.get("name", ""),
115
+ "formula": met.get("formula", ""),
116
+ "charge": met.get("charge", None),
117
+ "compartment": met.get("compartment", ""),
118
+ "chebi_id": met.get("chebiId", ""),
119
+ "kegg_id": met.get("keggId", ""),
120
+ })
121
+
122
+ df = pd.DataFrame(results[:limit])
123
+ print(f"Metabolic Atlas metabolites: {len(df)} results "
124
+ f"(query={query})")
125
+ return df
126
+ ```
127
+
128
+ ## 3. 代謝ネットワーク解析
129
+
130
+ ```python
131
+ import networkx as nx
132
+
133
+
134
+ def metabolic_atlas_network(subsystem, model="Human-GEM"):
135
+ """
136
+ Metabolic Atlas — サブシステム代謝ネットワーク構築。
137
+
138
+ Parameters:
139
+ subsystem: str — サブシステム名 (例: "Glycolysis")
140
+ model: str — GEM モデル名
141
+ """
142
+ reactions = metabolic_atlas_search_reactions(
143
+ subsystem, model=model, limit=200)
144
+
145
+ G = nx.DiGraph()
146
+
147
+ for _, rxn in reactions.iterrows():
148
+ rxn_id = rxn["reaction_id"]
149
+ equation = str(rxn.get("equation", ""))
150
+
151
+ # 簡易パーサ: "A + B => C + D"
152
+ if "=>" in equation:
153
+ substrates_str, products_str = equation.split("=>", 1)
154
+ elif "=" in equation:
155
+ substrates_str, products_str = equation.split("=", 1)
156
+ else:
157
+ continue
158
+
159
+ substrates = [s.strip() for s in substrates_str.split("+")
160
+ if s.strip()]
161
+ products = [p.strip() for p in products_str.split("+")
162
+ if p.strip()]
163
+
164
+ G.add_node(rxn_id, type="reaction",
165
+ name=rxn.get("name", ""))
166
+
167
+ for s in substrates:
168
+ G.add_node(s, type="metabolite")
169
+ G.add_edge(s, rxn_id)
170
+
171
+ for p in products:
172
+ G.add_node(p, type="metabolite")
173
+ G.add_edge(rxn_id, p)
174
+
175
+ # ネットワーク統計
176
+ n_reactions = sum(1 for _, d in G.nodes(data=True)
177
+ if d.get("type") == "reaction")
178
+ n_metabolites = sum(1 for _, d in G.nodes(data=True)
179
+ if d.get("type") == "metabolite")
180
+
181
+ print(f"Metabolic network: {n_reactions} reactions, "
182
+ f"{n_metabolites} metabolites, {G.number_of_edges()} edges")
183
+ return G
184
+ ```
185
+
186
+ ## 4. 代謝アトラス統合パイプライン
187
+
188
+ ```python
189
+ def metabolic_atlas_pipeline(query, model="Human-GEM",
190
+ output_dir="results"):
191
+ """
192
+ 代謝アトラス統合パイプライン。
193
+
194
+ Parameters:
195
+ query: str — 代謝経路/サブシステム名
196
+ model: str — GEM モデル名
197
+ output_dir: str — 出力ディレクトリ
198
+ """
199
+ from pathlib import Path
200
+ output_dir = Path(output_dir)
201
+ output_dir.mkdir(parents=True, exist_ok=True)
202
+
203
+ # 1) 反応検索
204
+ reactions = metabolic_atlas_search_reactions(query, model=model)
205
+ reactions.to_csv(output_dir / "reactions.csv", index=False)
206
+
207
+ # 2) 代謝産物検索
208
+ metabolites = metabolic_atlas_search_metabolites(query, model=model)
209
+ metabolites.to_csv(output_dir / "metabolites.csv", index=False)
210
+
211
+ # 3) ネットワーク構築
212
+ G = metabolic_atlas_network(query, model=model)
213
+ nx.write_graphml(G, str(output_dir / "metabolic_network.graphml"))
214
+
215
+ # 4) ハブ代謝産物
216
+ met_nodes = [n for n, d in G.nodes(data=True)
217
+ if d.get("type") == "metabolite"]
218
+ hub_scores = {n: G.degree(n) for n in met_nodes}
219
+ hub_df = pd.DataFrame([
220
+ {"metabolite": k, "degree": v}
221
+ for k, v in sorted(hub_scores.items(),
222
+ key=lambda x: -x[1])[:20]
223
+ ])
224
+ hub_df.to_csv(output_dir / "hub_metabolites.csv", index=False)
225
+
226
+ print(f"Metabolic Atlas pipeline: {output_dir}")
227
+ return {
228
+ "reactions": reactions,
229
+ "metabolites": metabolites,
230
+ "network": G,
231
+ "hubs": hub_df,
232
+ }
233
+ ```
234
+
235
+ ---
236
+
237
+ ## K-Dense 連携
238
+
239
+ | K-Dense Key | 参照内容 |
240
+ |-------------|---------|
241
+ | `metabolic-atlas` | 代謝モデル構造・反応データベース |
242
+
243
+ ## パイプライン統合
244
+
245
+ ```
246
+ metabolic-modeling → metabolic-atlas → systems-biology
247
+ (COBRA/FBA) (Human-GEM) (統合モデリング)
248
+ │ │ ↓
249
+ pathway-enrichment ─────┘ gene-expression
250
+ (KEGG/Reactome) │ (発現データ)
251
+
252
+ multi-omics
253
+ (メタボロミクス統合)
254
+ ```
255
+
256
+ ## パイプライン出力
257
+
258
+ | ファイル | 説明 | 次スキル |
259
+ |---------|------|---------|
260
+ | `results/reactions.csv` | 代謝反応一覧 | → metabolic-modeling |
261
+ | `results/metabolites.csv` | 代謝産物一覧 | → pathway-enrichment |
262
+ | `results/metabolic_network.graphml` | 代謝ネットワーク | → systems-biology |
263
+ | `results/hub_metabolites.csv` | ハブ代謝産物 | → multi-omics |