@nahisaho/satori 0.18.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -39
- package/package.json +1 -1
- package/src/.github/skills/scientific-admet-pharmacokinetics/SKILL.md +4 -0
- package/src/.github/skills/scientific-biobank-cohort/SKILL.md +268 -0
- package/src/.github/skills/scientific-cancer-genomics/SKILL.md +7 -0
- package/src/.github/skills/scientific-cell-line-resources/SKILL.md +4 -0
- package/src/.github/skills/scientific-chembl-assay-mining/SKILL.md +4 -0
- package/src/.github/skills/scientific-civic-evidence/SKILL.md +292 -0
- package/src/.github/skills/scientific-compound-screening/SKILL.md +4 -0
- package/src/.github/skills/scientific-depmap-dependencies/SKILL.md +239 -0
- package/src/.github/skills/scientific-disease-research/SKILL.md +4 -0
- package/src/.github/skills/scientific-drug-target-profiling/SKILL.md +4 -0
- package/src/.github/skills/scientific-drugbank-resources/SKILL.md +269 -0
- package/src/.github/skills/scientific-gdc-portal/SKILL.md +280 -0
- package/src/.github/skills/scientific-gnomad-variants/SKILL.md +356 -0
- package/src/.github/skills/scientific-immunoinformatics/SKILL.md +4 -0
- package/src/.github/skills/scientific-metabolic-flux/SKILL.md +306 -0
- package/src/.github/skills/scientific-metabolic-modeling/SKILL.md +4 -0
- package/src/.github/skills/scientific-metabolomics/SKILL.md +4 -0
- package/src/.github/skills/scientific-metabolomics-databases/SKILL.md +4 -0
- package/src/.github/skills/scientific-microbiome-metagenomics/SKILL.md +4 -0
- package/src/.github/skills/scientific-monarch-ontology/SKILL.md +260 -0
- package/src/.github/skills/scientific-opentargets-genetics/SKILL.md +299 -0
- package/src/.github/skills/scientific-pharmacology-targets/SKILL.md +10 -0
- package/src/.github/skills/scientific-precision-oncology/SKILL.md +4 -0
- package/src/.github/skills/scientific-protein-interaction-network/SKILL.md +4 -0
- package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +4 -0
- package/src/.github/skills/scientific-rcsb-pdb-search/SKILL.md +280 -0
- package/src/.github/skills/scientific-reactome-pathways/SKILL.md +242 -0
- package/src/.github/skills/scientific-spatial-multiomics/SKILL.md +293 -0
- package/src/.github/skills/scientific-stitch-chemical-network/SKILL.md +318 -0
- package/src/.github/skills/scientific-string-network-api/SKILL.md +4 -0
- package/src/.github/skills/scientific-uniprot-proteome/SKILL.md +273 -0
- package/src/.github/skills/scientific-variant-interpretation/SKILL.md +4 -0
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-gnomad-variants
|
|
3
|
+
description: |
|
|
4
|
+
gnomAD バリアントスキル。gnomAD (Genome Aggregation Database)
|
|
5
|
+
GraphQL API を用いた集団アレル頻度・遺伝子制約スコア
|
|
6
|
+
(pLI/LOEUF)・リージョンクエリ・トランスクリプトレベル
|
|
7
|
+
データ取得。ToolUniverse 連携: gnomad。
|
|
8
|
+
tu_tools:
|
|
9
|
+
- key: gnomad
|
|
10
|
+
name: gnomAD
|
|
11
|
+
description: ゲノム集約データベース GraphQL API
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
# Scientific gnomAD Variants
|
|
15
|
+
|
|
16
|
+
gnomAD (Genome Aggregation Database) GraphQL API を活用した
|
|
17
|
+
集団アレル頻度取得・遺伝子制約スコア (pLI/LOEUF/Z-scores)・
|
|
18
|
+
リージョンクエリ・トランスクリプトレベルデータパイプラインを
|
|
19
|
+
提供する。
|
|
20
|
+
|
|
21
|
+
## When to Use
|
|
22
|
+
|
|
23
|
+
- バリアントの集団アレル頻度 (AF) を確認するとき
|
|
24
|
+
- 遺伝子の LoF 不耐性 (pLI/LOEUF) を評価するとき
|
|
25
|
+
- ゲノムリージョン内のバリアントを列挙するとき
|
|
26
|
+
- 集団別 (gnomAD v4 exome/genome) 頻度を比較するとき
|
|
27
|
+
- ClinVar/VEP アノテーションと頻度を統合するとき
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Quick Start
|
|
32
|
+
|
|
33
|
+
## 1. バリアント集団頻度
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import requests
|
|
37
|
+
import pandas as pd
|
|
38
|
+
|
|
39
|
+
GNOMAD_API = "https://gnomad.broadinstitute.org/api"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def gnomad_variant(variant_id, dataset="gnomad_r4"):
|
|
43
|
+
"""
|
|
44
|
+
gnomAD — バリアント集団頻度取得。
|
|
45
|
+
|
|
46
|
+
Parameters:
|
|
47
|
+
variant_id: str — バリアント ID
|
|
48
|
+
(例: "1-55516888-G-A", chr-pos-ref-alt)
|
|
49
|
+
dataset: str — データセット
|
|
50
|
+
(例: "gnomad_r4", "gnomad_r3")
|
|
51
|
+
"""
|
|
52
|
+
query = """
|
|
53
|
+
query gnomadVariant($variantId: String!,
|
|
54
|
+
$dataset: DatasetId!) {
|
|
55
|
+
variant(variantId: $variantId,
|
|
56
|
+
dataset: $dataset) {
|
|
57
|
+
variant_id
|
|
58
|
+
chrom
|
|
59
|
+
pos
|
|
60
|
+
ref
|
|
61
|
+
alt
|
|
62
|
+
exome {
|
|
63
|
+
ac
|
|
64
|
+
an
|
|
65
|
+
af
|
|
66
|
+
ac_hom
|
|
67
|
+
populations {
|
|
68
|
+
id
|
|
69
|
+
ac
|
|
70
|
+
an
|
|
71
|
+
af
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
genome {
|
|
75
|
+
ac
|
|
76
|
+
an
|
|
77
|
+
af
|
|
78
|
+
ac_hom
|
|
79
|
+
populations {
|
|
80
|
+
id
|
|
81
|
+
ac
|
|
82
|
+
an
|
|
83
|
+
af
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
rsids
|
|
87
|
+
transcript_consequences {
|
|
88
|
+
gene_symbol
|
|
89
|
+
transcript_id
|
|
90
|
+
consequence
|
|
91
|
+
hgvsc
|
|
92
|
+
hgvsp
|
|
93
|
+
lof
|
|
94
|
+
lof_filter
|
|
95
|
+
polyphen_prediction
|
|
96
|
+
sift_prediction
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
"""
|
|
101
|
+
variables = {"variantId": variant_id,
|
|
102
|
+
"dataset": dataset}
|
|
103
|
+
resp = requests.post(GNOMAD_API,
|
|
104
|
+
json={"query": query,
|
|
105
|
+
"variables": variables},
|
|
106
|
+
timeout=30)
|
|
107
|
+
resp.raise_for_status()
|
|
108
|
+
data = resp.json().get("data", {}).get("variant")
|
|
109
|
+
|
|
110
|
+
if not data:
|
|
111
|
+
print(f"gnomAD: {variant_id} not found")
|
|
112
|
+
return {}
|
|
113
|
+
|
|
114
|
+
exome = data.get("exome") or {}
|
|
115
|
+
genome = data.get("genome") or {}
|
|
116
|
+
|
|
117
|
+
result = {
|
|
118
|
+
"variant_id": data["variant_id"],
|
|
119
|
+
"chrom": data["chrom"],
|
|
120
|
+
"pos": data["pos"],
|
|
121
|
+
"ref": data["ref"],
|
|
122
|
+
"alt": data["alt"],
|
|
123
|
+
"rsids": "; ".join(data.get("rsids", [])),
|
|
124
|
+
"exome_af": exome.get("af", 0),
|
|
125
|
+
"exome_ac": exome.get("ac", 0),
|
|
126
|
+
"exome_an": exome.get("an", 0),
|
|
127
|
+
"exome_hom": exome.get("ac_hom", 0),
|
|
128
|
+
"genome_af": genome.get("af", 0),
|
|
129
|
+
"genome_ac": genome.get("ac", 0),
|
|
130
|
+
"genome_an": genome.get("an", 0),
|
|
131
|
+
"genome_hom": genome.get("ac_hom", 0),
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
# 集団別頻度 (exome)
|
|
135
|
+
for pop in exome.get("populations", []):
|
|
136
|
+
result[f"exome_{pop['id']}_af"] = pop.get("af", 0)
|
|
137
|
+
|
|
138
|
+
print(f"gnomAD variant: {variant_id} "
|
|
139
|
+
f"(exome AF={result['exome_af']:.6f})")
|
|
140
|
+
return result
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## 2. 遺伝子制約スコア (pLI/LOEUF)
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
def gnomad_gene_constraint(gene_symbol,
|
|
147
|
+
dataset="gnomad_r4"):
|
|
148
|
+
"""
|
|
149
|
+
gnomAD — 遺伝子制約スコア取得。
|
|
150
|
+
|
|
151
|
+
Parameters:
|
|
152
|
+
gene_symbol: str — 遺伝子シンボル (例: "BRCA1")
|
|
153
|
+
dataset: str — データセット
|
|
154
|
+
"""
|
|
155
|
+
query = """
|
|
156
|
+
query geneConstraint($gene: String!,
|
|
157
|
+
$dataset: DatasetId!) {
|
|
158
|
+
gene(gene_symbol: $gene,
|
|
159
|
+
reference_genome: GRCh38) {
|
|
160
|
+
gene_id
|
|
161
|
+
symbol
|
|
162
|
+
gnomad_constraint {
|
|
163
|
+
exp_lof
|
|
164
|
+
exp_mis
|
|
165
|
+
exp_syn
|
|
166
|
+
obs_lof
|
|
167
|
+
obs_mis
|
|
168
|
+
obs_syn
|
|
169
|
+
oe_lof
|
|
170
|
+
oe_lof_lower
|
|
171
|
+
oe_lof_upper
|
|
172
|
+
oe_mis
|
|
173
|
+
oe_syn
|
|
174
|
+
lof_z
|
|
175
|
+
mis_z
|
|
176
|
+
syn_z
|
|
177
|
+
pLI
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
"""
|
|
182
|
+
variables = {"gene": gene_symbol,
|
|
183
|
+
"dataset": dataset}
|
|
184
|
+
resp = requests.post(GNOMAD_API,
|
|
185
|
+
json={"query": query,
|
|
186
|
+
"variables": variables},
|
|
187
|
+
timeout=30)
|
|
188
|
+
resp.raise_for_status()
|
|
189
|
+
gene = resp.json().get("data", {}).get("gene")
|
|
190
|
+
|
|
191
|
+
if not gene:
|
|
192
|
+
print(f"gnomAD gene: {gene_symbol} not found")
|
|
193
|
+
return {}
|
|
194
|
+
|
|
195
|
+
c = gene.get("gnomad_constraint") or {}
|
|
196
|
+
result = {
|
|
197
|
+
"gene_id": gene["gene_id"],
|
|
198
|
+
"symbol": gene["symbol"],
|
|
199
|
+
"pLI": c.get("pLI", None),
|
|
200
|
+
"LOEUF": c.get("oe_lof_upper", None),
|
|
201
|
+
"oe_lof": c.get("oe_lof", None),
|
|
202
|
+
"oe_mis": c.get("oe_mis", None),
|
|
203
|
+
"oe_syn": c.get("oe_syn", None),
|
|
204
|
+
"lof_z": c.get("lof_z", None),
|
|
205
|
+
"mis_z": c.get("mis_z", None),
|
|
206
|
+
"syn_z": c.get("syn_z", None),
|
|
207
|
+
"exp_lof": c.get("exp_lof", None),
|
|
208
|
+
"obs_lof": c.get("obs_lof", None),
|
|
209
|
+
}
|
|
210
|
+
pli = result.get("pLI") or 0
|
|
211
|
+
loeuf = result.get("LOEUF") or 0
|
|
212
|
+
print(f"gnomAD constraint: {gene_symbol} "
|
|
213
|
+
f"(pLI={pli:.3f}, LOEUF={loeuf:.3f})")
|
|
214
|
+
return result
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
## 3. リージョンクエリ
|
|
218
|
+
|
|
219
|
+
```python
|
|
220
|
+
def gnomad_region(chrom, start, stop,
|
|
221
|
+
dataset="gnomad_r4", limit=500):
|
|
222
|
+
"""
|
|
223
|
+
gnomAD — リージョンバリアント取得。
|
|
224
|
+
|
|
225
|
+
Parameters:
|
|
226
|
+
chrom: str — 染色体 (例: "1")
|
|
227
|
+
start: int — 開始位置 (GRCh38)
|
|
228
|
+
stop: int — 終了位置
|
|
229
|
+
dataset: str — データセット
|
|
230
|
+
limit: int — 最大結果数
|
|
231
|
+
"""
|
|
232
|
+
query = """
|
|
233
|
+
query regionVariants($chrom: String!,
|
|
234
|
+
$start: Int!,
|
|
235
|
+
$stop: Int!,
|
|
236
|
+
$dataset: DatasetId!) {
|
|
237
|
+
region(chrom: $chrom, start: $start,
|
|
238
|
+
stop: $stop,
|
|
239
|
+
reference_genome: GRCh38) {
|
|
240
|
+
variants(dataset: $dataset) {
|
|
241
|
+
variant_id
|
|
242
|
+
pos
|
|
243
|
+
ref
|
|
244
|
+
alt
|
|
245
|
+
exome { af ac an }
|
|
246
|
+
genome { af ac an }
|
|
247
|
+
rsids
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
"""
|
|
252
|
+
variables = {"chrom": chrom, "start": start,
|
|
253
|
+
"stop": stop, "dataset": dataset}
|
|
254
|
+
resp = requests.post(GNOMAD_API,
|
|
255
|
+
json={"query": query,
|
|
256
|
+
"variables": variables},
|
|
257
|
+
timeout=30)
|
|
258
|
+
resp.raise_for_status()
|
|
259
|
+
data = resp.json().get("data", {}).get("region", {})
|
|
260
|
+
|
|
261
|
+
rows = []
|
|
262
|
+
for v in data.get("variants", [])[:limit]:
|
|
263
|
+
exome = v.get("exome") or {}
|
|
264
|
+
genome = v.get("genome") or {}
|
|
265
|
+
rows.append({
|
|
266
|
+
"variant_id": v["variant_id"],
|
|
267
|
+
"pos": v["pos"],
|
|
268
|
+
"ref": v["ref"],
|
|
269
|
+
"alt": v["alt"],
|
|
270
|
+
"rsids": "; ".join(v.get("rsids", [])),
|
|
271
|
+
"exome_af": exome.get("af", 0),
|
|
272
|
+
"genome_af": genome.get("af", 0),
|
|
273
|
+
})
|
|
274
|
+
|
|
275
|
+
df = pd.DataFrame(rows)
|
|
276
|
+
print(f"gnomAD region: {chrom}:{start}-{stop} "
|
|
277
|
+
f"→ {len(df)} variants")
|
|
278
|
+
return df
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
## 4. gnomAD 統合パイプライン
|
|
282
|
+
|
|
283
|
+
```python
|
|
284
|
+
def gnomad_pipeline(gene_symbol, chrom, start, stop,
|
|
285
|
+
output_dir="results"):
|
|
286
|
+
"""
|
|
287
|
+
gnomAD 統合パイプライン。
|
|
288
|
+
|
|
289
|
+
Parameters:
|
|
290
|
+
gene_symbol: str — 遺伝子シンボル
|
|
291
|
+
chrom: str — 染色体
|
|
292
|
+
start: int — 開始位置
|
|
293
|
+
stop: int — 終了位置
|
|
294
|
+
output_dir: str — 出力ディレクトリ
|
|
295
|
+
"""
|
|
296
|
+
from pathlib import Path
|
|
297
|
+
output_dir = Path(output_dir)
|
|
298
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
299
|
+
|
|
300
|
+
# 1) 遺伝子制約スコア
|
|
301
|
+
constraint = gnomad_gene_constraint(gene_symbol)
|
|
302
|
+
pd.DataFrame([constraint]).to_csv(
|
|
303
|
+
output_dir / "gnomad_constraint.csv",
|
|
304
|
+
index=False)
|
|
305
|
+
|
|
306
|
+
# 2) リージョンバリアント
|
|
307
|
+
variants = gnomad_region(chrom, start, stop)
|
|
308
|
+
variants.to_csv(
|
|
309
|
+
output_dir / "gnomad_region.csv",
|
|
310
|
+
index=False)
|
|
311
|
+
|
|
312
|
+
# 3) レアバリアント抽出 (AF < 0.01)
|
|
313
|
+
if not variants.empty:
|
|
314
|
+
rare = variants[
|
|
315
|
+
(variants["exome_af"] < 0.01) |
|
|
316
|
+
(variants["genome_af"] < 0.01)
|
|
317
|
+
]
|
|
318
|
+
rare.to_csv(
|
|
319
|
+
output_dir / "gnomad_rare.csv",
|
|
320
|
+
index=False)
|
|
321
|
+
print(f" Rare variants: {len(rare)}")
|
|
322
|
+
|
|
323
|
+
print(f"gnomAD pipeline: {gene_symbol} "
|
|
324
|
+
f"→ {output_dir}")
|
|
325
|
+
return {"constraint": constraint,
|
|
326
|
+
"variants": variants}
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
---
|
|
330
|
+
|
|
331
|
+
## ToolUniverse 連携
|
|
332
|
+
|
|
333
|
+
| TU Key | ツール名 | 連携内容 |
|
|
334
|
+
|--------|---------|---------|
|
|
335
|
+
| `gnomad` | gnomAD | ゲノム集約データベース GraphQL (~7 tools) |
|
|
336
|
+
|
|
337
|
+
## パイプライン統合
|
|
338
|
+
|
|
339
|
+
```
|
|
340
|
+
variant-interpretation → gnomad-variants → variant-effect-prediction
|
|
341
|
+
(ClinVar バリアント) (gnomAD API) (VEP/CADD/REVEL)
|
|
342
|
+
│ │ ↓
|
|
343
|
+
civic-evidence ──────────────┘ rare-disease-genetics
|
|
344
|
+
(CIViC 臨床) │ (希少疾患遺伝学)
|
|
345
|
+
↓
|
|
346
|
+
opentargets-genetics
|
|
347
|
+
(OT 遺伝的関連)
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
## パイプライン出力
|
|
351
|
+
|
|
352
|
+
| ファイル | 説明 | 次スキル |
|
|
353
|
+
|---------|------|---------|
|
|
354
|
+
| `results/gnomad_constraint.csv` | 遺伝子制約 | → rare-disease-genetics |
|
|
355
|
+
| `results/gnomad_region.csv` | リージョンバリアント | → variant-interpretation |
|
|
356
|
+
| `results/gnomad_rare.csv` | レアバリアント | → variant-effect-prediction |
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-metabolic-flux
|
|
3
|
+
description: |
|
|
4
|
+
代謝フラックス解析スキル。13C/15N 安定同位体トレーサー
|
|
5
|
+
データを用いた代謝フラックス推定・EMU モデリング・
|
|
6
|
+
フラックスバランス制約統合パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Metabolic Flux
|
|
10
|
+
|
|
11
|
+
13C/15N 安定同位体トレーサー実験データを用いた代謝フラックス
|
|
12
|
+
推定・EMU (Elementary Metabolite Unit) フレームワーク・
|
|
13
|
+
フラックスバランス解析 (FBA) 制約統合パイプラインを提供する。
|
|
14
|
+
|
|
15
|
+
## When to Use
|
|
16
|
+
|
|
17
|
+
- 13C 安定同位体トレーサー実験データを解析するとき
|
|
18
|
+
- EMU/アイソトポマーモデルを構築するとき
|
|
19
|
+
- MID (Mass Isotopomer Distribution) データをフィッティングするとき
|
|
20
|
+
- 経路別の代謝フラックスを定量するとき
|
|
21
|
+
- FBA 制約とトレーサーデータを統合するとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. MID (Mass Isotopomer Distribution) データ処理
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import numpy as np
|
|
31
|
+
import pandas as pd
|
|
32
|
+
from scipy.optimize import minimize
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def load_mid_data(mid_file, sep="\t"):
|
|
36
|
+
"""
|
|
37
|
+
MID データ読み込み・正規化。
|
|
38
|
+
|
|
39
|
+
Parameters:
|
|
40
|
+
mid_file: str — MID データファイルパス
|
|
41
|
+
(TSV: metabolite, M+0, M+1, M+2, ...)
|
|
42
|
+
sep: str — 区切り文字
|
|
43
|
+
"""
|
|
44
|
+
df = pd.read_csv(mid_file, sep=sep,
|
|
45
|
+
index_col="metabolite")
|
|
46
|
+
|
|
47
|
+
mid_cols = [c for c in df.columns
|
|
48
|
+
if c.startswith("M+")]
|
|
49
|
+
|
|
50
|
+
for idx in df.index:
|
|
51
|
+
row_sum = df.loc[idx, mid_cols].sum()
|
|
52
|
+
if row_sum > 0:
|
|
53
|
+
df.loc[idx, mid_cols] /= row_sum
|
|
54
|
+
|
|
55
|
+
print(f"MID data: {len(df)} metabolites, "
|
|
56
|
+
f"{len(mid_cols)} isotopomers")
|
|
57
|
+
return df[mid_cols]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def natural_abundance_correction(mid_df, n_carbons):
|
|
61
|
+
"""
|
|
62
|
+
天然同位体存在量補正。
|
|
63
|
+
|
|
64
|
+
Parameters:
|
|
65
|
+
mid_df: DataFrame — 正規化済み MID データ
|
|
66
|
+
n_carbons: dict — 代謝物名→炭素数マッピング
|
|
67
|
+
"""
|
|
68
|
+
C13_NAT = 0.011 # 13C 天然存在比
|
|
69
|
+
|
|
70
|
+
corrected = mid_df.copy()
|
|
71
|
+
for met in corrected.index:
|
|
72
|
+
n_c = n_carbons.get(met, 6)
|
|
73
|
+
n_iso = min(corrected.shape[1], n_c + 1)
|
|
74
|
+
raw = corrected.loc[met].values[:n_iso]
|
|
75
|
+
|
|
76
|
+
# 補正行列 (簡易)
|
|
77
|
+
corr_matrix = np.zeros((n_iso, n_iso))
|
|
78
|
+
for i in range(n_iso):
|
|
79
|
+
for j in range(i, n_iso):
|
|
80
|
+
from math import comb
|
|
81
|
+
k = j - i
|
|
82
|
+
remain = n_c - i
|
|
83
|
+
if k <= remain:
|
|
84
|
+
corr_matrix[i, j] = (
|
|
85
|
+
comb(remain, k)
|
|
86
|
+
* C13_NAT ** k
|
|
87
|
+
* (1 - C13_NAT) ** (remain - k)
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
corrected_vals = np.linalg.solve(
|
|
92
|
+
corr_matrix[:n_iso, :n_iso], raw)
|
|
93
|
+
corrected_vals = np.maximum(corrected_vals, 0)
|
|
94
|
+
corrected_vals /= corrected_vals.sum()
|
|
95
|
+
corrected.loc[met, corrected.columns[:n_iso]] = (
|
|
96
|
+
corrected_vals)
|
|
97
|
+
except np.linalg.LinAlgError:
|
|
98
|
+
pass
|
|
99
|
+
|
|
100
|
+
print(f"NA correction: {len(corrected)} metabolites")
|
|
101
|
+
return corrected
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## 2. EMU フラックスモデル
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
def build_emu_model(reactions, atom_transitions):
|
|
108
|
+
"""
|
|
109
|
+
EMU (Elementary Metabolite Unit) モデル構築。
|
|
110
|
+
|
|
111
|
+
Parameters:
|
|
112
|
+
reactions: list[dict] — 反応定義
|
|
113
|
+
[{id, substrates, products, reversible}]
|
|
114
|
+
atom_transitions: dict — 原子遷移マッピング
|
|
115
|
+
{reaction_id: [(from_met, from_atoms,
|
|
116
|
+
to_met, to_atoms)]}
|
|
117
|
+
"""
|
|
118
|
+
emu_network = {}
|
|
119
|
+
|
|
120
|
+
for rxn in reactions:
|
|
121
|
+
rxn_id = rxn["id"]
|
|
122
|
+
transitions = atom_transitions.get(rxn_id, [])
|
|
123
|
+
|
|
124
|
+
for from_met, f_atoms, to_met, t_atoms in (
|
|
125
|
+
transitions
|
|
126
|
+
):
|
|
127
|
+
emu_size = len(t_atoms)
|
|
128
|
+
emu_key = (to_met, tuple(sorted(t_atoms)))
|
|
129
|
+
|
|
130
|
+
if emu_key not in emu_network:
|
|
131
|
+
emu_network[emu_key] = []
|
|
132
|
+
|
|
133
|
+
emu_network[emu_key].append({
|
|
134
|
+
"reaction": rxn_id,
|
|
135
|
+
"precursor": from_met,
|
|
136
|
+
"precursor_atoms": f_atoms,
|
|
137
|
+
"reversible": rxn.get(
|
|
138
|
+
"reversible", False),
|
|
139
|
+
})
|
|
140
|
+
|
|
141
|
+
print(f"EMU model: {len(emu_network)} EMUs, "
|
|
142
|
+
f"{len(reactions)} reactions")
|
|
143
|
+
return emu_network
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def simulate_mid(fluxes, emu_model, substrate_labeling,
|
|
147
|
+
metabolite):
|
|
148
|
+
"""
|
|
149
|
+
フラックスからの MID シミュレーション。
|
|
150
|
+
|
|
151
|
+
Parameters:
|
|
152
|
+
fluxes: dict — {reaction_id: flux_value}
|
|
153
|
+
emu_model: dict — EMU ネットワーク
|
|
154
|
+
substrate_labeling: dict — 基質ラベリングパターン
|
|
155
|
+
{metabolite: [M+0 fraction, M+1, ...]}
|
|
156
|
+
metabolite: str — シミュレーション対象代謝物
|
|
157
|
+
"""
|
|
158
|
+
relevant_emus = {
|
|
159
|
+
k: v for k, v in emu_model.items()
|
|
160
|
+
if k[0] == metabolite
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if not relevant_emus:
|
|
164
|
+
return np.array([1.0])
|
|
165
|
+
|
|
166
|
+
max_size = max(len(k[1]) for k in relevant_emus)
|
|
167
|
+
mid = np.zeros(max_size + 1)
|
|
168
|
+
mid[0] = 1.0 # デフォルト: 未標識
|
|
169
|
+
|
|
170
|
+
for emu_key, precursors in relevant_emus.items():
|
|
171
|
+
emu_size = len(emu_key[1])
|
|
172
|
+
for prec in precursors:
|
|
173
|
+
rxn_flux = fluxes.get(prec["reaction"], 0)
|
|
174
|
+
prec_label = substrate_labeling.get(
|
|
175
|
+
prec["precursor"],
|
|
176
|
+
[1.0] + [0.0] * emu_size)
|
|
177
|
+
|
|
178
|
+
for i, frac in enumerate(
|
|
179
|
+
prec_label[:emu_size + 1]
|
|
180
|
+
):
|
|
181
|
+
if i <= max_size:
|
|
182
|
+
mid[i] += rxn_flux * frac
|
|
183
|
+
|
|
184
|
+
mid_sum = mid.sum()
|
|
185
|
+
if mid_sum > 0:
|
|
186
|
+
mid /= mid_sum
|
|
187
|
+
|
|
188
|
+
return mid
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## 3. フラックス推定
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
def estimate_fluxes(observed_mids, emu_model,
|
|
195
|
+
substrate_labeling,
|
|
196
|
+
initial_fluxes,
|
|
197
|
+
metabolites):
|
|
198
|
+
"""
|
|
199
|
+
最小二乗法によるフラックス推定。
|
|
200
|
+
|
|
201
|
+
Parameters:
|
|
202
|
+
observed_mids: dict — {metabolite: np.array}
|
|
203
|
+
観測 MID データ
|
|
204
|
+
emu_model: dict — EMU ネットワーク
|
|
205
|
+
substrate_labeling: dict — 基質ラベリング
|
|
206
|
+
initial_fluxes: dict — 初期フラックス推定値
|
|
207
|
+
metabolites: list — 対象代謝物リスト
|
|
208
|
+
"""
|
|
209
|
+
flux_names = list(initial_fluxes.keys())
|
|
210
|
+
x0 = [initial_fluxes[f] for f in flux_names]
|
|
211
|
+
|
|
212
|
+
def objective(x):
|
|
213
|
+
fluxes = dict(zip(flux_names, x))
|
|
214
|
+
residual = 0.0
|
|
215
|
+
for met in metabolites:
|
|
216
|
+
if met not in observed_mids:
|
|
217
|
+
continue
|
|
218
|
+
obs = observed_mids[met]
|
|
219
|
+
sim = simulate_mid(
|
|
220
|
+
fluxes, emu_model,
|
|
221
|
+
substrate_labeling, met)
|
|
222
|
+
n = min(len(obs), len(sim))
|
|
223
|
+
residual += np.sum(
|
|
224
|
+
(obs[:n] - sim[:n]) ** 2)
|
|
225
|
+
return residual
|
|
226
|
+
|
|
227
|
+
bounds = [(0, None) for _ in flux_names]
|
|
228
|
+
result = minimize(objective, x0, method="L-BFGS-B",
|
|
229
|
+
bounds=bounds)
|
|
230
|
+
|
|
231
|
+
estimated = dict(zip(flux_names, result.x))
|
|
232
|
+
print(f"Flux estimation: SSR={result.fun:.6f}, "
|
|
233
|
+
f"converged={result.success}")
|
|
234
|
+
return estimated, result
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## 4. 代謝フラックス統合パイプライン
|
|
238
|
+
|
|
239
|
+
```python
|
|
240
|
+
def metabolic_flux_pipeline(mid_file, reactions,
|
|
241
|
+
atom_transitions,
|
|
242
|
+
substrate_labeling,
|
|
243
|
+
n_carbons,
|
|
244
|
+
output_dir="results"):
|
|
245
|
+
"""
|
|
246
|
+
代謝フラックス統合パイプライン。
|
|
247
|
+
|
|
248
|
+
Parameters:
|
|
249
|
+
mid_file: str — MID データファイル
|
|
250
|
+
reactions: list — 反応定義リスト
|
|
251
|
+
atom_transitions: dict — 原子遷移マッピング
|
|
252
|
+
substrate_labeling: dict — 基質ラベリング
|
|
253
|
+
n_carbons: dict — 代謝物→炭素数
|
|
254
|
+
output_dir: str — 出力ディレクトリ
|
|
255
|
+
"""
|
|
256
|
+
from pathlib import Path
|
|
257
|
+
output_dir = Path(output_dir)
|
|
258
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
259
|
+
|
|
260
|
+
# 1) MID 読み込み・補正
|
|
261
|
+
mid_raw = load_mid_data(mid_file)
|
|
262
|
+
mid_corr = natural_abundance_correction(
|
|
263
|
+
mid_raw, n_carbons)
|
|
264
|
+
mid_corr.to_csv(output_dir / "mid_corrected.csv")
|
|
265
|
+
|
|
266
|
+
# 2) EMU モデル構築
|
|
267
|
+
emu_model = build_emu_model(
|
|
268
|
+
reactions, atom_transitions)
|
|
269
|
+
|
|
270
|
+
# 3) フラックス推定
|
|
271
|
+
observed = {met: mid_corr.loc[met].values
|
|
272
|
+
for met in mid_corr.index}
|
|
273
|
+
init_fluxes = {r["id"]: 1.0 for r in reactions}
|
|
274
|
+
fluxes, opt_result = estimate_fluxes(
|
|
275
|
+
observed, emu_model, substrate_labeling,
|
|
276
|
+
init_fluxes, list(observed.keys()))
|
|
277
|
+
|
|
278
|
+
flux_df = pd.DataFrame([
|
|
279
|
+
{"reaction": k, "flux": v}
|
|
280
|
+
for k, v in fluxes.items()
|
|
281
|
+
])
|
|
282
|
+
flux_df.to_csv(output_dir / "fluxes.csv",
|
|
283
|
+
index=False)
|
|
284
|
+
|
|
285
|
+
print(f"Metabolic flux pipeline → {output_dir}")
|
|
286
|
+
return {"fluxes": fluxes, "mid_corrected": mid_corr}
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
---
|
|
290
|
+
|
|
291
|
+
## パイプライン統合
|
|
292
|
+
|
|
293
|
+
```
|
|
294
|
+
metabolic-modeling → metabolic-flux → systems-biology
|
|
295
|
+
(FBA/COBRA) (13C MFA) (統合解析)
|
|
296
|
+
│ │ ↓
|
|
297
|
+
flux-balance-analysis ───┘ pathway-enrichment
|
|
298
|
+
(制約ベース) (パスウェイ集積)
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
## パイプライン出力
|
|
302
|
+
|
|
303
|
+
| ファイル | 説明 | 次スキル |
|
|
304
|
+
|---------|------|---------|
|
|
305
|
+
| `results/mid_corrected.csv` | 補正済み MID | → metabolic-modeling |
|
|
306
|
+
| `results/fluxes.csv` | 推定フラックス | → systems-biology |
|
|
@@ -6,6 +6,10 @@ description: |
|
|
|
6
6
|
(NIH メタボロミクスリポジトリ) の 3 大メタボロミクス DB を統合した
|
|
7
7
|
代謝物同定、パスウェイマッピング、バイオマーカー発見、
|
|
8
8
|
RefMet 標準化命名パイプライン。13 の ToolUniverse SMCP ツールと連携。
|
|
9
|
+
tu_tools:
|
|
10
|
+
- key: metacyc
|
|
11
|
+
name: MetaCyc
|
|
12
|
+
description: 代謝パスウェイ・反応・化合物データベース
|
|
9
13
|
---
|
|
10
14
|
|
|
11
15
|
# Scientific Metabolomics Databases
|