@nahisaho/satori 0.19.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -23
- package/package.json +1 -1
- package/src/.github/skills/scientific-admet-pharmacokinetics/SKILL.md +4 -0
- package/src/.github/skills/scientific-biobank-cohort/SKILL.md +268 -0
- package/src/.github/skills/scientific-cancer-genomics/SKILL.md +7 -0
- package/src/.github/skills/scientific-cell-line-resources/SKILL.md +4 -0
- package/src/.github/skills/scientific-chembl-assay-mining/SKILL.md +4 -0
- package/src/.github/skills/scientific-drug-target-profiling/SKILL.md +4 -0
- package/src/.github/skills/scientific-gdc-portal/SKILL.md +280 -0
- package/src/.github/skills/scientific-immunoinformatics/SKILL.md +4 -0
- package/src/.github/skills/scientific-metabolic-flux/SKILL.md +306 -0
- package/src/.github/skills/scientific-metabolic-modeling/SKILL.md +4 -0
- package/src/.github/skills/scientific-metabolomics/SKILL.md +4 -0
- package/src/.github/skills/scientific-microbiome-metagenomics/SKILL.md +4 -0
- package/src/.github/skills/scientific-monarch-ontology/SKILL.md +260 -0
- package/src/.github/skills/scientific-pharmacology-targets/SKILL.md +10 -0
- package/src/.github/skills/scientific-precision-oncology/SKILL.md +4 -0
- package/src/.github/skills/scientific-spatial-multiomics/SKILL.md +293 -0
- package/src/.github/skills/scientific-stitch-chemical-network/SKILL.md +318 -0
- package/src/.github/skills/scientific-string-network-api/SKILL.md +4 -0
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-gdc-portal
|
|
3
|
+
description: |
|
|
4
|
+
NCI Genomic Data Commons ポータルスキル。GDC REST API
|
|
5
|
+
を用いたがんゲノムプロジェクト横断検索・ケースメタデータ・
|
|
6
|
+
体細胞変異 (SSM)・遺伝子発現・ファイル取得。
|
|
7
|
+
ToolUniverse 連携: gdc。
|
|
8
|
+
tu_tools:
|
|
9
|
+
- key: gdc
|
|
10
|
+
name: GDC
|
|
11
|
+
description: NCI Genomic Data Commons REST API
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
# Scientific GDC Portal
|
|
15
|
+
|
|
16
|
+
NCI Genomic Data Commons (GDC) REST API を活用した
|
|
17
|
+
がんゲノムプロジェクト横断検索・ケースメタデータ取得・
|
|
18
|
+
体細胞変異 (SSM)・遺伝子発現パイプラインを提供する。
|
|
19
|
+
|
|
20
|
+
## When to Use
|
|
21
|
+
|
|
22
|
+
- TCGA/TARGET 等のがんゲノムデータを横断検索するとき
|
|
23
|
+
- がん種別のケースメタデータを取得するとき
|
|
24
|
+
- 特定遺伝子の体細胞変異 (SSM) 頻度を調べるとき
|
|
25
|
+
- がんプロジェクトの統計サマリーを取得するとき
|
|
26
|
+
- GDC ファイルメタデータを検索してダウンロード URL を取得するとき
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
## 1. プロジェクト検索・統計
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
import requests
|
|
36
|
+
import pandas as pd
|
|
37
|
+
|
|
38
|
+
GDC_API = "https://api.gdc.cancer.gov"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def gdc_projects(disease_type=None, limit=50):
|
|
42
|
+
"""
|
|
43
|
+
GDC — プロジェクト検索。
|
|
44
|
+
|
|
45
|
+
Parameters:
|
|
46
|
+
disease_type: str — 疾患タイプフィルタ
|
|
47
|
+
(例: "Breast Invasive Carcinoma")
|
|
48
|
+
limit: int — 最大結果数
|
|
49
|
+
"""
|
|
50
|
+
url = f"{GDC_API}/projects"
|
|
51
|
+
params = {
|
|
52
|
+
"size": limit,
|
|
53
|
+
"fields": ("project_id,name,primary_site,"
|
|
54
|
+
"disease_type,summary.case_count,"
|
|
55
|
+
"summary.file_count"),
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if disease_type:
|
|
59
|
+
params["filters"] = (
|
|
60
|
+
'{"op":"=","content":{"field":'
|
|
61
|
+
f'"disease_type","value":"{disease_type}"}}}}'
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
65
|
+
resp.raise_for_status()
|
|
66
|
+
data = resp.json()
|
|
67
|
+
|
|
68
|
+
rows = []
|
|
69
|
+
for hit in data.get("data", {}).get("hits", []):
|
|
70
|
+
summary = hit.get("summary", {})
|
|
71
|
+
rows.append({
|
|
72
|
+
"project_id": hit.get("project_id", ""),
|
|
73
|
+
"name": hit.get("name", ""),
|
|
74
|
+
"primary_site": "; ".join(
|
|
75
|
+
hit.get("primary_site", [])),
|
|
76
|
+
"disease_type": "; ".join(
|
|
77
|
+
hit.get("disease_type", [])),
|
|
78
|
+
"case_count": summary.get(
|
|
79
|
+
"case_count", 0),
|
|
80
|
+
"file_count": summary.get(
|
|
81
|
+
"file_count", 0),
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
df = pd.DataFrame(rows)
|
|
85
|
+
print(f"GDC projects: {len(df)}")
|
|
86
|
+
return df
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## 2. ケースメタデータ
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
def gdc_cases(project_id, limit=100):
|
|
93
|
+
"""
|
|
94
|
+
GDC — ケースメタデータ取得。
|
|
95
|
+
|
|
96
|
+
Parameters:
|
|
97
|
+
project_id: str — プロジェクト ID
|
|
98
|
+
(例: "TCGA-BRCA")
|
|
99
|
+
limit: int — 最大結果数
|
|
100
|
+
"""
|
|
101
|
+
url = f"{GDC_API}/cases"
|
|
102
|
+
filters = {
|
|
103
|
+
"op": "=",
|
|
104
|
+
"content": {
|
|
105
|
+
"field": "project.project_id",
|
|
106
|
+
"value": project_id,
|
|
107
|
+
},
|
|
108
|
+
}
|
|
109
|
+
params = {
|
|
110
|
+
"filters": str(filters).replace("'", '"'),
|
|
111
|
+
"fields": ("case_id,submitter_id,"
|
|
112
|
+
"demographic.gender,"
|
|
113
|
+
"demographic.race,"
|
|
114
|
+
"demographic.vital_status,"
|
|
115
|
+
"diagnoses.primary_diagnosis,"
|
|
116
|
+
"diagnoses.tumor_stage,"
|
|
117
|
+
"diagnoses.age_at_diagnosis"),
|
|
118
|
+
"size": limit,
|
|
119
|
+
}
|
|
120
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
121
|
+
resp.raise_for_status()
|
|
122
|
+
data = resp.json()
|
|
123
|
+
|
|
124
|
+
rows = []
|
|
125
|
+
for hit in data.get("data", {}).get("hits", []):
|
|
126
|
+
demo = hit.get("demographic", {}) or {}
|
|
127
|
+
diag = (hit.get("diagnoses", [{}]) or [{}])[0]
|
|
128
|
+
rows.append({
|
|
129
|
+
"case_id": hit.get("case_id", ""),
|
|
130
|
+
"submitter_id": hit.get("submitter_id", ""),
|
|
131
|
+
"gender": demo.get("gender", ""),
|
|
132
|
+
"race": demo.get("race", ""),
|
|
133
|
+
"vital_status": demo.get(
|
|
134
|
+
"vital_status", ""),
|
|
135
|
+
"diagnosis": diag.get(
|
|
136
|
+
"primary_diagnosis", ""),
|
|
137
|
+
"stage": diag.get("tumor_stage", ""),
|
|
138
|
+
"age_at_diagnosis": diag.get(
|
|
139
|
+
"age_at_diagnosis", ""),
|
|
140
|
+
})
|
|
141
|
+
|
|
142
|
+
df = pd.DataFrame(rows)
|
|
143
|
+
print(f"GDC cases: {project_id} → {len(df)}")
|
|
144
|
+
return df
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## 3. 体細胞変異 (SSM) 検索
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
def gdc_ssm_by_gene(gene_symbol, project_id=None,
|
|
151
|
+
limit=100):
|
|
152
|
+
"""
|
|
153
|
+
GDC — 遺伝子別体細胞変異検索。
|
|
154
|
+
|
|
155
|
+
Parameters:
|
|
156
|
+
gene_symbol: str — 遺伝子シンボル (例: "TP53")
|
|
157
|
+
project_id: str — プロジェクト ID フィルタ
|
|
158
|
+
limit: int — 最大結果数
|
|
159
|
+
"""
|
|
160
|
+
url = f"{GDC_API}/ssms"
|
|
161
|
+
filters = {
|
|
162
|
+
"op": "and",
|
|
163
|
+
"content": [
|
|
164
|
+
{
|
|
165
|
+
"op": "=",
|
|
166
|
+
"content": {
|
|
167
|
+
"field":
|
|
168
|
+
"consequence.transcript."
|
|
169
|
+
"gene.symbol",
|
|
170
|
+
"value": gene_symbol,
|
|
171
|
+
},
|
|
172
|
+
}
|
|
173
|
+
],
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
if project_id:
|
|
177
|
+
filters["content"].append({
|
|
178
|
+
"op": "=",
|
|
179
|
+
"content": {
|
|
180
|
+
"field": "cases.project.project_id",
|
|
181
|
+
"value": project_id,
|
|
182
|
+
},
|
|
183
|
+
})
|
|
184
|
+
|
|
185
|
+
params = {
|
|
186
|
+
"filters": str(filters).replace("'", '"'),
|
|
187
|
+
"fields": ("ssm_id,genomic_dna_change,"
|
|
188
|
+
"consequence.transcript.aa_change,"
|
|
189
|
+
"consequence.transcript."
|
|
190
|
+
"consequence_type,"
|
|
191
|
+
"consequence.transcript."
|
|
192
|
+
"gene.symbol"),
|
|
193
|
+
"size": limit,
|
|
194
|
+
}
|
|
195
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
196
|
+
resp.raise_for_status()
|
|
197
|
+
data = resp.json()
|
|
198
|
+
|
|
199
|
+
rows = []
|
|
200
|
+
for hit in data.get("data", {}).get("hits", []):
|
|
201
|
+
for csq in hit.get("consequence", []):
|
|
202
|
+
tx = csq.get("transcript", {})
|
|
203
|
+
rows.append({
|
|
204
|
+
"ssm_id": hit.get("ssm_id", ""),
|
|
205
|
+
"genomic_change": hit.get(
|
|
206
|
+
"genomic_dna_change", ""),
|
|
207
|
+
"gene": tx.get("gene", {}).get(
|
|
208
|
+
"symbol", ""),
|
|
209
|
+
"aa_change": tx.get("aa_change", ""),
|
|
210
|
+
"consequence_type": tx.get(
|
|
211
|
+
"consequence_type", ""),
|
|
212
|
+
})
|
|
213
|
+
|
|
214
|
+
df = pd.DataFrame(rows)
|
|
215
|
+
print(f"GDC SSM: {gene_symbol} → {len(df)} variants")
|
|
216
|
+
return df
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## 4. GDC 統合パイプライン
|
|
220
|
+
|
|
221
|
+
```python
|
|
222
|
+
def gdc_pipeline(project_id, gene_symbol=None,
|
|
223
|
+
output_dir="results"):
|
|
224
|
+
"""
|
|
225
|
+
GDC 統合パイプライン。
|
|
226
|
+
|
|
227
|
+
Parameters:
|
|
228
|
+
project_id: str — プロジェクト ID
|
|
229
|
+
gene_symbol: str — 遺伝子フィルタ
|
|
230
|
+
output_dir: str — 出力ディレクトリ
|
|
231
|
+
"""
|
|
232
|
+
from pathlib import Path
|
|
233
|
+
output_dir = Path(output_dir)
|
|
234
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
235
|
+
|
|
236
|
+
# 1) プロジェクト情報
|
|
237
|
+
projects = gdc_projects()
|
|
238
|
+
projects.to_csv(output_dir / "gdc_projects.csv",
|
|
239
|
+
index=False)
|
|
240
|
+
|
|
241
|
+
# 2) ケースメタデータ
|
|
242
|
+
cases = gdc_cases(project_id)
|
|
243
|
+
cases.to_csv(output_dir / "gdc_cases.csv",
|
|
244
|
+
index=False)
|
|
245
|
+
|
|
246
|
+
# 3) 体細胞変異
|
|
247
|
+
if gene_symbol:
|
|
248
|
+
ssm = gdc_ssm_by_gene(gene_symbol, project_id)
|
|
249
|
+
ssm.to_csv(output_dir / "gdc_ssm.csv",
|
|
250
|
+
index=False)
|
|
251
|
+
|
|
252
|
+
print(f"GDC pipeline: {project_id} → {output_dir}")
|
|
253
|
+
return {"cases": cases}
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
258
|
+
## ToolUniverse 連携
|
|
259
|
+
|
|
260
|
+
| TU Key | ツール名 | 連携内容 |
|
|
261
|
+
|--------|---------|---------|
|
|
262
|
+
| `gdc` | GDC | NCI Genomic Data Commons REST API |
|
|
263
|
+
|
|
264
|
+
## パイプライン統合
|
|
265
|
+
|
|
266
|
+
```
|
|
267
|
+
cancer-genomics → gdc-portal → precision-oncology
|
|
268
|
+
(COSMIC/DepMap) (GDC API) (MTB レポート)
|
|
269
|
+
│ │ ↓
|
|
270
|
+
icgc-cancer-data ───────┘ variant-interpretation
|
|
271
|
+
(ICGC DCC) (ClinVar/ACMG)
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
## パイプライン出力
|
|
275
|
+
|
|
276
|
+
| ファイル | 説明 | 次スキル |
|
|
277
|
+
|---------|------|---------|
|
|
278
|
+
| `results/gdc_projects.csv` | プロジェクト一覧 | → cancer-genomics |
|
|
279
|
+
| `results/gdc_cases.csv` | ケースメタデータ | → precision-oncology |
|
|
280
|
+
| `results/gdc_ssm.csv` | 体細胞変異 | → variant-interpretation |
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-metabolic-flux
|
|
3
|
+
description: |
|
|
4
|
+
代謝フラックス解析スキル。13C/15N 安定同位体トレーサー
|
|
5
|
+
データを用いた代謝フラックス推定・EMU モデリング・
|
|
6
|
+
フラックスバランス制約統合パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Metabolic Flux
|
|
10
|
+
|
|
11
|
+
13C/15N 安定同位体トレーサー実験データを用いた代謝フラックス
|
|
12
|
+
推定・EMU (Elementary Metabolite Unit) フレームワーク・
|
|
13
|
+
フラックスバランス解析 (FBA) 制約統合パイプラインを提供する。
|
|
14
|
+
|
|
15
|
+
## When to Use
|
|
16
|
+
|
|
17
|
+
- 13C 安定同位体トレーサー実験データを解析するとき
|
|
18
|
+
- EMU/アイソトポマーモデルを構築するとき
|
|
19
|
+
- MID (Mass Isotopomer Distribution) データをフィッティングするとき
|
|
20
|
+
- 経路別の代謝フラックスを定量するとき
|
|
21
|
+
- FBA 制約とトレーサーデータを統合するとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. MID (Mass Isotopomer Distribution) データ処理
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import numpy as np
|
|
31
|
+
import pandas as pd
|
|
32
|
+
from scipy.optimize import minimize
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def load_mid_data(mid_file, sep="\t"):
|
|
36
|
+
"""
|
|
37
|
+
MID データ読み込み・正規化。
|
|
38
|
+
|
|
39
|
+
Parameters:
|
|
40
|
+
mid_file: str — MID データファイルパス
|
|
41
|
+
(TSV: metabolite, M+0, M+1, M+2, ...)
|
|
42
|
+
sep: str — 区切り文字
|
|
43
|
+
"""
|
|
44
|
+
df = pd.read_csv(mid_file, sep=sep,
|
|
45
|
+
index_col="metabolite")
|
|
46
|
+
|
|
47
|
+
mid_cols = [c for c in df.columns
|
|
48
|
+
if c.startswith("M+")]
|
|
49
|
+
|
|
50
|
+
for idx in df.index:
|
|
51
|
+
row_sum = df.loc[idx, mid_cols].sum()
|
|
52
|
+
if row_sum > 0:
|
|
53
|
+
df.loc[idx, mid_cols] /= row_sum
|
|
54
|
+
|
|
55
|
+
print(f"MID data: {len(df)} metabolites, "
|
|
56
|
+
f"{len(mid_cols)} isotopomers")
|
|
57
|
+
return df[mid_cols]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def natural_abundance_correction(mid_df, n_carbons):
|
|
61
|
+
"""
|
|
62
|
+
天然同位体存在量補正。
|
|
63
|
+
|
|
64
|
+
Parameters:
|
|
65
|
+
mid_df: DataFrame — 正規化済み MID データ
|
|
66
|
+
n_carbons: dict — 代謝物名→炭素数マッピング
|
|
67
|
+
"""
|
|
68
|
+
C13_NAT = 0.011 # 13C 天然存在比
|
|
69
|
+
|
|
70
|
+
corrected = mid_df.copy()
|
|
71
|
+
for met in corrected.index:
|
|
72
|
+
n_c = n_carbons.get(met, 6)
|
|
73
|
+
n_iso = min(corrected.shape[1], n_c + 1)
|
|
74
|
+
raw = corrected.loc[met].values[:n_iso]
|
|
75
|
+
|
|
76
|
+
# 補正行列 (簡易)
|
|
77
|
+
corr_matrix = np.zeros((n_iso, n_iso))
|
|
78
|
+
for i in range(n_iso):
|
|
79
|
+
for j in range(i, n_iso):
|
|
80
|
+
from math import comb
|
|
81
|
+
k = j - i
|
|
82
|
+
remain = n_c - i
|
|
83
|
+
if k <= remain:
|
|
84
|
+
corr_matrix[i, j] = (
|
|
85
|
+
comb(remain, k)
|
|
86
|
+
* C13_NAT ** k
|
|
87
|
+
* (1 - C13_NAT) ** (remain - k)
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
corrected_vals = np.linalg.solve(
|
|
92
|
+
corr_matrix[:n_iso, :n_iso], raw)
|
|
93
|
+
corrected_vals = np.maximum(corrected_vals, 0)
|
|
94
|
+
corrected_vals /= corrected_vals.sum()
|
|
95
|
+
corrected.loc[met, corrected.columns[:n_iso]] = (
|
|
96
|
+
corrected_vals)
|
|
97
|
+
except np.linalg.LinAlgError:
|
|
98
|
+
pass
|
|
99
|
+
|
|
100
|
+
print(f"NA correction: {len(corrected)} metabolites")
|
|
101
|
+
return corrected
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## 2. EMU フラックスモデル
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
def build_emu_model(reactions, atom_transitions):
|
|
108
|
+
"""
|
|
109
|
+
EMU (Elementary Metabolite Unit) モデル構築。
|
|
110
|
+
|
|
111
|
+
Parameters:
|
|
112
|
+
reactions: list[dict] — 反応定義
|
|
113
|
+
[{id, substrates, products, reversible}]
|
|
114
|
+
atom_transitions: dict — 原子遷移マッピング
|
|
115
|
+
{reaction_id: [(from_met, from_atoms,
|
|
116
|
+
to_met, to_atoms)]}
|
|
117
|
+
"""
|
|
118
|
+
emu_network = {}
|
|
119
|
+
|
|
120
|
+
for rxn in reactions:
|
|
121
|
+
rxn_id = rxn["id"]
|
|
122
|
+
transitions = atom_transitions.get(rxn_id, [])
|
|
123
|
+
|
|
124
|
+
for from_met, f_atoms, to_met, t_atoms in (
|
|
125
|
+
transitions
|
|
126
|
+
):
|
|
127
|
+
emu_size = len(t_atoms)
|
|
128
|
+
emu_key = (to_met, tuple(sorted(t_atoms)))
|
|
129
|
+
|
|
130
|
+
if emu_key not in emu_network:
|
|
131
|
+
emu_network[emu_key] = []
|
|
132
|
+
|
|
133
|
+
emu_network[emu_key].append({
|
|
134
|
+
"reaction": rxn_id,
|
|
135
|
+
"precursor": from_met,
|
|
136
|
+
"precursor_atoms": f_atoms,
|
|
137
|
+
"reversible": rxn.get(
|
|
138
|
+
"reversible", False),
|
|
139
|
+
})
|
|
140
|
+
|
|
141
|
+
print(f"EMU model: {len(emu_network)} EMUs, "
|
|
142
|
+
f"{len(reactions)} reactions")
|
|
143
|
+
return emu_network
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def simulate_mid(fluxes, emu_model, substrate_labeling,
|
|
147
|
+
metabolite):
|
|
148
|
+
"""
|
|
149
|
+
フラックスからの MID シミュレーション。
|
|
150
|
+
|
|
151
|
+
Parameters:
|
|
152
|
+
fluxes: dict — {reaction_id: flux_value}
|
|
153
|
+
emu_model: dict — EMU ネットワーク
|
|
154
|
+
substrate_labeling: dict — 基質ラベリングパターン
|
|
155
|
+
{metabolite: [M+0 fraction, M+1, ...]}
|
|
156
|
+
metabolite: str — シミュレーション対象代謝物
|
|
157
|
+
"""
|
|
158
|
+
relevant_emus = {
|
|
159
|
+
k: v for k, v in emu_model.items()
|
|
160
|
+
if k[0] == metabolite
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if not relevant_emus:
|
|
164
|
+
return np.array([1.0])
|
|
165
|
+
|
|
166
|
+
max_size = max(len(k[1]) for k in relevant_emus)
|
|
167
|
+
mid = np.zeros(max_size + 1)
|
|
168
|
+
mid[0] = 1.0 # デフォルト: 未標識
|
|
169
|
+
|
|
170
|
+
for emu_key, precursors in relevant_emus.items():
|
|
171
|
+
emu_size = len(emu_key[1])
|
|
172
|
+
for prec in precursors:
|
|
173
|
+
rxn_flux = fluxes.get(prec["reaction"], 0)
|
|
174
|
+
prec_label = substrate_labeling.get(
|
|
175
|
+
prec["precursor"],
|
|
176
|
+
[1.0] + [0.0] * emu_size)
|
|
177
|
+
|
|
178
|
+
for i, frac in enumerate(
|
|
179
|
+
prec_label[:emu_size + 1]
|
|
180
|
+
):
|
|
181
|
+
if i <= max_size:
|
|
182
|
+
mid[i] += rxn_flux * frac
|
|
183
|
+
|
|
184
|
+
mid_sum = mid.sum()
|
|
185
|
+
if mid_sum > 0:
|
|
186
|
+
mid /= mid_sum
|
|
187
|
+
|
|
188
|
+
return mid
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## 3. フラックス推定
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
def estimate_fluxes(observed_mids, emu_model,
|
|
195
|
+
substrate_labeling,
|
|
196
|
+
initial_fluxes,
|
|
197
|
+
metabolites):
|
|
198
|
+
"""
|
|
199
|
+
最小二乗法によるフラックス推定。
|
|
200
|
+
|
|
201
|
+
Parameters:
|
|
202
|
+
observed_mids: dict — {metabolite: np.array}
|
|
203
|
+
観測 MID データ
|
|
204
|
+
emu_model: dict — EMU ネットワーク
|
|
205
|
+
substrate_labeling: dict — 基質ラベリング
|
|
206
|
+
initial_fluxes: dict — 初期フラックス推定値
|
|
207
|
+
metabolites: list — 対象代謝物リスト
|
|
208
|
+
"""
|
|
209
|
+
flux_names = list(initial_fluxes.keys())
|
|
210
|
+
x0 = [initial_fluxes[f] for f in flux_names]
|
|
211
|
+
|
|
212
|
+
def objective(x):
|
|
213
|
+
fluxes = dict(zip(flux_names, x))
|
|
214
|
+
residual = 0.0
|
|
215
|
+
for met in metabolites:
|
|
216
|
+
if met not in observed_mids:
|
|
217
|
+
continue
|
|
218
|
+
obs = observed_mids[met]
|
|
219
|
+
sim = simulate_mid(
|
|
220
|
+
fluxes, emu_model,
|
|
221
|
+
substrate_labeling, met)
|
|
222
|
+
n = min(len(obs), len(sim))
|
|
223
|
+
residual += np.sum(
|
|
224
|
+
(obs[:n] - sim[:n]) ** 2)
|
|
225
|
+
return residual
|
|
226
|
+
|
|
227
|
+
bounds = [(0, None) for _ in flux_names]
|
|
228
|
+
result = minimize(objective, x0, method="L-BFGS-B",
|
|
229
|
+
bounds=bounds)
|
|
230
|
+
|
|
231
|
+
estimated = dict(zip(flux_names, result.x))
|
|
232
|
+
print(f"Flux estimation: SSR={result.fun:.6f}, "
|
|
233
|
+
f"converged={result.success}")
|
|
234
|
+
return estimated, result
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## 4. 代謝フラックス統合パイプライン
|
|
238
|
+
|
|
239
|
+
```python
|
|
240
|
+
def metabolic_flux_pipeline(mid_file, reactions,
|
|
241
|
+
atom_transitions,
|
|
242
|
+
substrate_labeling,
|
|
243
|
+
n_carbons,
|
|
244
|
+
output_dir="results"):
|
|
245
|
+
"""
|
|
246
|
+
代謝フラックス統合パイプライン。
|
|
247
|
+
|
|
248
|
+
Parameters:
|
|
249
|
+
mid_file: str — MID データファイル
|
|
250
|
+
reactions: list — 反応定義リスト
|
|
251
|
+
atom_transitions: dict — 原子遷移マッピング
|
|
252
|
+
substrate_labeling: dict — 基質ラベリング
|
|
253
|
+
n_carbons: dict — 代謝物→炭素数
|
|
254
|
+
output_dir: str — 出力ディレクトリ
|
|
255
|
+
"""
|
|
256
|
+
from pathlib import Path
|
|
257
|
+
output_dir = Path(output_dir)
|
|
258
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
259
|
+
|
|
260
|
+
# 1) MID 読み込み・補正
|
|
261
|
+
mid_raw = load_mid_data(mid_file)
|
|
262
|
+
mid_corr = natural_abundance_correction(
|
|
263
|
+
mid_raw, n_carbons)
|
|
264
|
+
mid_corr.to_csv(output_dir / "mid_corrected.csv")
|
|
265
|
+
|
|
266
|
+
# 2) EMU モデル構築
|
|
267
|
+
emu_model = build_emu_model(
|
|
268
|
+
reactions, atom_transitions)
|
|
269
|
+
|
|
270
|
+
# 3) フラックス推定
|
|
271
|
+
observed = {met: mid_corr.loc[met].values
|
|
272
|
+
for met in mid_corr.index}
|
|
273
|
+
init_fluxes = {r["id"]: 1.0 for r in reactions}
|
|
274
|
+
fluxes, opt_result = estimate_fluxes(
|
|
275
|
+
observed, emu_model, substrate_labeling,
|
|
276
|
+
init_fluxes, list(observed.keys()))
|
|
277
|
+
|
|
278
|
+
flux_df = pd.DataFrame([
|
|
279
|
+
{"reaction": k, "flux": v}
|
|
280
|
+
for k, v in fluxes.items()
|
|
281
|
+
])
|
|
282
|
+
flux_df.to_csv(output_dir / "fluxes.csv",
|
|
283
|
+
index=False)
|
|
284
|
+
|
|
285
|
+
print(f"Metabolic flux pipeline → {output_dir}")
|
|
286
|
+
return {"fluxes": fluxes, "mid_corrected": mid_corr}
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
---
|
|
290
|
+
|
|
291
|
+
## パイプライン統合
|
|
292
|
+
|
|
293
|
+
```
|
|
294
|
+
metabolic-modeling → metabolic-flux → systems-biology
|
|
295
|
+
(FBA/COBRA) (13C MFA) (統合解析)
|
|
296
|
+
│ │ ↓
|
|
297
|
+
flux-balance-analysis ───┘ pathway-enrichment
|
|
298
|
+
(制約ベース) (パスウェイ集積)
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
## パイプライン出力
|
|
302
|
+
|
|
303
|
+
| ファイル | 説明 | 次スキル |
|
|
304
|
+
|---------|------|---------|
|
|
305
|
+
| `results/mid_corrected.csv` | 補正済み MID | → metabolic-modeling |
|
|
306
|
+
| `results/fluxes.csv` | 推定フラックス | → systems-biology |
|