@nahisaho/satori 0.20.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +70 -39
- package/package.json +1 -1
- package/src/.github/skills/scientific-biothings-idmapping/SKILL.md +4 -0
- package/src/.github/skills/scientific-cellxgene-census/SKILL.md +257 -0
- package/src/.github/skills/scientific-clingen-curation/SKILL.md +258 -0
- package/src/.github/skills/scientific-clinical-nlp/SKILL.md +250 -0
- package/src/.github/skills/scientific-clinical-pharmacology/SKILL.md +361 -0
- package/src/.github/skills/scientific-clinical-standards/SKILL.md +444 -0
- package/src/.github/skills/scientific-crispr-design/SKILL.md +369 -0
- package/src/.github/skills/scientific-drug-repurposing/SKILL.md +4 -0
- package/src/.github/skills/scientific-environmental-ecology/SKILL.md +5 -0
- package/src/.github/skills/scientific-epidemiology-public-health/SKILL.md +5 -0
- package/src/.github/skills/scientific-epigenomics-chromatin/SKILL.md +5 -0
- package/src/.github/skills/scientific-glycomics/SKILL.md +274 -0
- package/src/.github/skills/scientific-gtex-tissue-expression/SKILL.md +5 -2
- package/src/.github/skills/scientific-hgnc-nomenclature/SKILL.md +282 -0
- package/src/.github/skills/scientific-human-cell-atlas/SKILL.md +3 -0
- package/src/.github/skills/scientific-human-protein-atlas/SKILL.md +4 -0
- package/src/.github/skills/scientific-immunoinformatics/SKILL.md +9 -0
- package/src/.github/skills/scientific-lipidomics/SKILL.md +284 -0
- package/src/.github/skills/scientific-metabolomics/SKILL.md +3 -0
- package/src/.github/skills/scientific-metabolomics-network/SKILL.md +311 -0
- package/src/.github/skills/scientific-metagenome-assembled-genomes/SKILL.md +299 -0
- package/src/.github/skills/scientific-model-organism-db/SKILL.md +8 -0
- package/src/.github/skills/scientific-pharmacogenomics/SKILL.md +4 -0
- package/src/.github/skills/scientific-pharos-targets/SKILL.md +276 -0
- package/src/.github/skills/scientific-protein-structure-analysis/SKILL.md +4 -0
- package/src/.github/skills/scientific-public-health-data/SKILL.md +11 -0
- package/src/.github/skills/scientific-systems-biology/SKILL.md +11 -0
- package/src/.github/skills/scientific-variant-effect-prediction/SKILL.md +7 -0
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-hgnc-nomenclature
|
|
3
|
+
description: |
|
|
4
|
+
HGNC 遺伝子命名法スキル。HUGO Gene Nomenclature Committee
|
|
5
|
+
REST API による公式遺伝子シンボル検索・エイリアス解決・
|
|
6
|
+
遺伝子ファミリー/グループクエリ・ID クロスリファレンス
|
|
7
|
+
パイプライン。
|
|
8
|
+
TU 外スキル (直接 REST API)。
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Scientific HGNC Nomenclature
|
|
12
|
+
|
|
13
|
+
HGNC (HUGO Gene Nomenclature Committee) REST API を活用した
|
|
14
|
+
公式遺伝子シンボル検索・エイリアス/旧シンボル解決・
|
|
15
|
+
遺伝子ファミリー照会・マルチデータベース ID 相互参照
|
|
16
|
+
パイプラインを提供する。
|
|
17
|
+
|
|
18
|
+
## When to Use
|
|
19
|
+
|
|
20
|
+
- 遺伝子エイリアスから公式 HGNC シンボルを取得するとき
|
|
21
|
+
- 旧遺伝子シンボル (previous symbol) を最新名に変換するとき
|
|
22
|
+
- 遺伝子ファミリー/グループのメンバーリストを取得するとき
|
|
23
|
+
- HGNC ID ↔ Ensembl / NCBI Gene / UniProt のクロスリファレンスを行うとき
|
|
24
|
+
- 遺伝子座タイプ (protein-coding, ncRNA 等) でフィルタするとき
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Quick Start
|
|
29
|
+
|
|
30
|
+
## 1. HGNC シンボル検索
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import requests
|
|
34
|
+
import pandas as pd
|
|
35
|
+
|
|
36
|
+
HGNC_BASE = "https://rest.genenames.org"
|
|
37
|
+
HEADERS = {"Accept": "application/json"}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def hgnc_search(query):
|
|
41
|
+
"""
|
|
42
|
+
HGNC — 遺伝子シンボル/名前検索。
|
|
43
|
+
|
|
44
|
+
Parameters:
|
|
45
|
+
query: str — 検索クエリ (シンボル/名前)
|
|
46
|
+
"""
|
|
47
|
+
url = f"{HGNC_BASE}/search/{query}"
|
|
48
|
+
resp = requests.get(url, headers=HEADERS,
|
|
49
|
+
timeout=30)
|
|
50
|
+
resp.raise_for_status()
|
|
51
|
+
data = resp.json().get("response", {})
|
|
52
|
+
docs = data.get("docs", [])
|
|
53
|
+
|
|
54
|
+
rows = []
|
|
55
|
+
for doc in docs:
|
|
56
|
+
rows.append({
|
|
57
|
+
"hgnc_id": doc.get("hgnc_id", ""),
|
|
58
|
+
"symbol": doc.get("symbol", ""),
|
|
59
|
+
"name": doc.get("name", ""),
|
|
60
|
+
"locus_type": doc.get("locus_type", ""),
|
|
61
|
+
"status": doc.get("status", ""),
|
|
62
|
+
})
|
|
63
|
+
|
|
64
|
+
df = pd.DataFrame(rows)
|
|
65
|
+
print(f"HGNC search '{query}': {len(df)} hits")
|
|
66
|
+
return df
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def hgnc_fetch_symbol(symbol):
|
|
70
|
+
"""
|
|
71
|
+
HGNC — 公式シンボルで遺伝子詳細取得。
|
|
72
|
+
|
|
73
|
+
Parameters:
|
|
74
|
+
symbol: str — 公式遺伝子シンボル (例: "BRCA1")
|
|
75
|
+
"""
|
|
76
|
+
url = f"{HGNC_BASE}/fetch/symbol/{symbol}"
|
|
77
|
+
resp = requests.get(url, headers=HEADERS,
|
|
78
|
+
timeout=30)
|
|
79
|
+
resp.raise_for_status()
|
|
80
|
+
docs = resp.json().get("response", {}).get(
|
|
81
|
+
"docs", [])
|
|
82
|
+
|
|
83
|
+
if not docs:
|
|
84
|
+
print(f"HGNC: {symbol} not found")
|
|
85
|
+
return {}
|
|
86
|
+
|
|
87
|
+
doc = docs[0]
|
|
88
|
+
info = {
|
|
89
|
+
"hgnc_id": doc.get("hgnc_id", ""),
|
|
90
|
+
"symbol": doc.get("symbol", ""),
|
|
91
|
+
"name": doc.get("name", ""),
|
|
92
|
+
"locus_type": doc.get("locus_type", ""),
|
|
93
|
+
"location": doc.get("location", ""),
|
|
94
|
+
"alias_symbol": doc.get("alias_symbol", []),
|
|
95
|
+
"prev_symbol": doc.get("prev_symbol", []),
|
|
96
|
+
"ensembl_gene_id": doc.get(
|
|
97
|
+
"ensembl_gene_id", ""),
|
|
98
|
+
"entrez_id": doc.get("entrez_id", ""),
|
|
99
|
+
"uniprot_ids": doc.get("uniprot_ids", []),
|
|
100
|
+
"gene_group": doc.get("gene_group", []),
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
print(f"HGNC: {symbol} → {info['name']} "
|
|
104
|
+
f"({info['locus_type']})")
|
|
105
|
+
return info
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## 2. エイリアス/旧シンボル解決
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
def hgnc_resolve_alias(alias):
|
|
112
|
+
"""
|
|
113
|
+
HGNC — エイリアスから公式シンボルへ解決。
|
|
114
|
+
|
|
115
|
+
Parameters:
|
|
116
|
+
alias: str — エイリアスまたは旧シンボル
|
|
117
|
+
"""
|
|
118
|
+
# 1) alias_symbol で検索
|
|
119
|
+
url = f"{HGNC_BASE}/fetch/alias_symbol/{alias}"
|
|
120
|
+
resp = requests.get(url, headers=HEADERS,
|
|
121
|
+
timeout=30)
|
|
122
|
+
resp.raise_for_status()
|
|
123
|
+
docs = resp.json().get("response", {}).get(
|
|
124
|
+
"docs", [])
|
|
125
|
+
|
|
126
|
+
if docs:
|
|
127
|
+
symbols = [d["symbol"] for d in docs]
|
|
128
|
+
print(f"HGNC alias '{alias}' → "
|
|
129
|
+
f"{', '.join(symbols)}")
|
|
130
|
+
return symbols
|
|
131
|
+
|
|
132
|
+
# 2) prev_symbol で検索
|
|
133
|
+
url2 = f"{HGNC_BASE}/fetch/prev_symbol/{alias}"
|
|
134
|
+
resp2 = requests.get(url2, headers=HEADERS,
|
|
135
|
+
timeout=30)
|
|
136
|
+
resp2.raise_for_status()
|
|
137
|
+
docs2 = resp2.json().get("response", {}).get(
|
|
138
|
+
"docs", [])
|
|
139
|
+
|
|
140
|
+
if docs2:
|
|
141
|
+
symbols = [d["symbol"] for d in docs2]
|
|
142
|
+
print(f"HGNC prev '{alias}' → "
|
|
143
|
+
f"{', '.join(symbols)}")
|
|
144
|
+
return symbols
|
|
145
|
+
|
|
146
|
+
print(f"HGNC: '{alias}' not resolved")
|
|
147
|
+
return []
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def hgnc_resolve_batch(aliases):
|
|
151
|
+
"""
|
|
152
|
+
HGNC — バッチエイリアス解決。
|
|
153
|
+
|
|
154
|
+
Parameters:
|
|
155
|
+
aliases: list[str] — エイリアス/旧シンボルリスト
|
|
156
|
+
"""
|
|
157
|
+
results = []
|
|
158
|
+
for alias in aliases:
|
|
159
|
+
resolved = hgnc_resolve_alias(alias)
|
|
160
|
+
results.append({
|
|
161
|
+
"input": alias,
|
|
162
|
+
"resolved": resolved[0] if resolved
|
|
163
|
+
else "UNRESOLVED",
|
|
164
|
+
"ambiguous": len(resolved) > 1,
|
|
165
|
+
})
|
|
166
|
+
|
|
167
|
+
df = pd.DataFrame(results)
|
|
168
|
+
n_resolved = (df["resolved"] != "UNRESOLVED").sum()
|
|
169
|
+
print(f"HGNC batch: {n_resolved}/{len(df)} "
|
|
170
|
+
f"resolved")
|
|
171
|
+
return df
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## 3. 遺伝子ファミリー/グループ
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
def hgnc_gene_group(group_name):
|
|
178
|
+
"""
|
|
179
|
+
HGNC — 遺伝子ファミリー/グループメンバー取得。
|
|
180
|
+
|
|
181
|
+
Parameters:
|
|
182
|
+
group_name: str — グループ名
|
|
183
|
+
(例: "Kinases", "Ion channels")
|
|
184
|
+
"""
|
|
185
|
+
url = (f"{HGNC_BASE}/search/"
|
|
186
|
+
f"gene_group:%22{group_name}%22")
|
|
187
|
+
resp = requests.get(url, headers=HEADERS,
|
|
188
|
+
timeout=30)
|
|
189
|
+
resp.raise_for_status()
|
|
190
|
+
docs = resp.json().get("response", {}).get(
|
|
191
|
+
"docs", [])
|
|
192
|
+
|
|
193
|
+
rows = []
|
|
194
|
+
for doc in docs:
|
|
195
|
+
rows.append({
|
|
196
|
+
"symbol": doc.get("symbol", ""),
|
|
197
|
+
"name": doc.get("name", ""),
|
|
198
|
+
"locus_type": doc.get("locus_type", ""),
|
|
199
|
+
"location": doc.get("location", ""),
|
|
200
|
+
})
|
|
201
|
+
|
|
202
|
+
df = pd.DataFrame(rows)
|
|
203
|
+
print(f"HGNC group '{group_name}': "
|
|
204
|
+
f"{len(df)} members")
|
|
205
|
+
return df
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
## 4. HGNC 統合パイプライン
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
def hgnc_pipeline(symbols, aliases=None,
|
|
212
|
+
output_dir="results"):
|
|
213
|
+
"""
|
|
214
|
+
HGNC 統合命名法パイプライン。
|
|
215
|
+
|
|
216
|
+
Parameters:
|
|
217
|
+
symbols: list[str] — 公式シンボルリスト
|
|
218
|
+
aliases: list[str] | None — 解決するエイリアス
|
|
219
|
+
output_dir: str — 出力ディレクトリ
|
|
220
|
+
"""
|
|
221
|
+
from pathlib import Path
|
|
222
|
+
output_dir = Path(output_dir)
|
|
223
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
224
|
+
|
|
225
|
+
# 1) シンボル詳細
|
|
226
|
+
details = []
|
|
227
|
+
for sym in symbols:
|
|
228
|
+
info = hgnc_fetch_symbol(sym)
|
|
229
|
+
if info:
|
|
230
|
+
details.append(info)
|
|
231
|
+
detail_df = pd.DataFrame(details)
|
|
232
|
+
detail_df.to_csv(
|
|
233
|
+
output_dir / "hgnc_details.csv",
|
|
234
|
+
index=False)
|
|
235
|
+
|
|
236
|
+
# 2) エイリアス解決
|
|
237
|
+
if aliases:
|
|
238
|
+
alias_df = hgnc_resolve_batch(aliases)
|
|
239
|
+
alias_df.to_csv(
|
|
240
|
+
output_dir / "hgnc_alias_resolved.csv",
|
|
241
|
+
index=False)
|
|
242
|
+
|
|
243
|
+
# 3) ID クロスリファレンス
|
|
244
|
+
xref_rows = []
|
|
245
|
+
for d in details:
|
|
246
|
+
xref_rows.append({
|
|
247
|
+
"symbol": d.get("symbol", ""),
|
|
248
|
+
"hgnc_id": d.get("hgnc_id", ""),
|
|
249
|
+
"ensembl": d.get("ensembl_gene_id", ""),
|
|
250
|
+
"entrez": d.get("entrez_id", ""),
|
|
251
|
+
"uniprot": (d.get("uniprot_ids", [""])[0]
|
|
252
|
+
if d.get("uniprot_ids")
|
|
253
|
+
else ""),
|
|
254
|
+
})
|
|
255
|
+
xref_df = pd.DataFrame(xref_rows)
|
|
256
|
+
xref_df.to_csv(
|
|
257
|
+
output_dir / "hgnc_xref.csv",
|
|
258
|
+
index=False)
|
|
259
|
+
|
|
260
|
+
print(f"HGNC pipeline → {output_dir}")
|
|
261
|
+
return {"details": detail_df, "xref": xref_df}
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
---
|
|
265
|
+
|
|
266
|
+
## パイプライン統合
|
|
267
|
+
|
|
268
|
+
```
|
|
269
|
+
biothings-idmapping → hgnc-nomenclature → genome-sequence-tools
|
|
270
|
+
(MyGene/MyVariant) (公式シンボル) (配列解析)
|
|
271
|
+
│ │ ↓
|
|
272
|
+
gene-expression ────────────┘ variant-interpretation
|
|
273
|
+
(RNA-seq) (バリアント解釈)
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
## パイプライン出力
|
|
277
|
+
|
|
278
|
+
| ファイル | 説明 | 次スキル |
|
|
279
|
+
|---------|------|---------|
|
|
280
|
+
| `results/hgnc_details.csv` | 遺伝子詳細 | → gene-expression |
|
|
281
|
+
| `results/hgnc_alias_resolved.csv` | エイリアス解決 | → biothings-idmapping |
|
|
282
|
+
| `results/hgnc_xref.csv` | ID 相互参照 | → genome-sequence-tools |
|
|
@@ -4,6 +4,10 @@ description: |
|
|
|
4
4
|
Human Protein Atlas (HPA) 統合スキル。組織/細胞タンパク質発現、
|
|
5
5
|
がん予後バイオマーカー、RNA 発現プロファイル、細胞内局在、
|
|
6
6
|
タンパク質相互作用の包括的検索・解析パイプライン。
|
|
7
|
+
tu_tools:
|
|
8
|
+
- key: hpa
|
|
9
|
+
name: Human Protein Atlas
|
|
10
|
+
description: 組織/細胞タンパク質発現・RNA 発現・がん予後
|
|
7
11
|
---
|
|
8
12
|
|
|
9
13
|
# Scientific Human Protein Atlas
|
|
@@ -9,6 +9,15 @@ tu_tools:
|
|
|
9
9
|
- key: iedb
|
|
10
10
|
name: IEDB
|
|
11
11
|
description: 免疫エピトープデータベース
|
|
12
|
+
- key: imgt
|
|
13
|
+
name: IMGT
|
|
14
|
+
description: 国際免疫遺伝学情報システム
|
|
15
|
+
- key: sabdab
|
|
16
|
+
name: SAbDab
|
|
17
|
+
description: 構造抗体データベース
|
|
18
|
+
- key: therasabdab
|
|
19
|
+
name: TheraSAbDab
|
|
20
|
+
description: 治療用抗体構造データベース
|
|
12
21
|
---
|
|
13
22
|
|
|
14
23
|
# Scientific Immunoinformatics
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-lipidomics
|
|
3
|
+
description: |
|
|
4
|
+
リピドミクス解析スキル。LipidMAPS / SwissLipids / LION
|
|
5
|
+
脂質データベース統合検索・脂質サブクラス分類・
|
|
6
|
+
脂質 MS/MS スペクトル同定・脂質パスウェイエンリッチメント・
|
|
7
|
+
脂質プロファイリングパイプライン。
|
|
8
|
+
TU 外スキル (直接 REST API + Python ライブラリ)。
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Scientific Lipidomics
|
|
12
|
+
|
|
13
|
+
LipidMAPS / SwissLipids / LION 脂質データベースを統合した
|
|
14
|
+
脂質構造検索・サブクラス分類・MS/MS 同定・
|
|
15
|
+
脂質パスウェイエンリッチメント解析パイプラインを提供する。
|
|
16
|
+
|
|
17
|
+
## When to Use
|
|
18
|
+
|
|
19
|
+
- LC-MS/MS リピドミクスデータの脂質同定を行うとき
|
|
20
|
+
- LipidMAPS で脂質構造・サブクラスを検索するとき
|
|
21
|
+
- 脂質プロファイルの差次解析 (fold change/p-value) を行うとき
|
|
22
|
+
- LION エンリッチメントで脂質機能解析を行うとき
|
|
23
|
+
- 脂質パスウェイ (スフィンゴ脂質/リン脂質代謝) を可視化するとき
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
## 1. LipidMAPS 構造検索
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import requests
|
|
33
|
+
import pandas as pd
|
|
34
|
+
|
|
35
|
+
LIPIDMAPS_API = "https://www.lipidmaps.org/rest"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def lipidmaps_search(name=None, formula=None,
|
|
39
|
+
mass=None, tolerance=0.01):
|
|
40
|
+
"""
|
|
41
|
+
LipidMAPS — 脂質構造検索。
|
|
42
|
+
|
|
43
|
+
Parameters:
|
|
44
|
+
name: str | None — 脂質名 (部分一致)
|
|
45
|
+
formula: str | None — 分子式
|
|
46
|
+
mass: float | None — 精密質量
|
|
47
|
+
tolerance: float — 質量誤差 (Da)
|
|
48
|
+
"""
|
|
49
|
+
if mass is not None:
|
|
50
|
+
url = (f"{LIPIDMAPS_API}/compound/lm_id/"
|
|
51
|
+
f"mass/{mass}/{tolerance}")
|
|
52
|
+
elif name:
|
|
53
|
+
url = (f"{LIPIDMAPS_API}/compound/lm_id/"
|
|
54
|
+
f"name/{name}")
|
|
55
|
+
elif formula:
|
|
56
|
+
url = (f"{LIPIDMAPS_API}/compound/lm_id/"
|
|
57
|
+
f"formula/{formula}")
|
|
58
|
+
else:
|
|
59
|
+
print("LipidMAPS: provide name, formula, "
|
|
60
|
+
"or mass")
|
|
61
|
+
return pd.DataFrame()
|
|
62
|
+
|
|
63
|
+
resp = requests.get(url, timeout=30)
|
|
64
|
+
resp.raise_for_status()
|
|
65
|
+
data = resp.json()
|
|
66
|
+
|
|
67
|
+
if isinstance(data, dict):
|
|
68
|
+
data = [data]
|
|
69
|
+
|
|
70
|
+
rows = []
|
|
71
|
+
for item in data:
|
|
72
|
+
rows.append({
|
|
73
|
+
"lm_id": item.get("lm_id", ""),
|
|
74
|
+
"name": item.get("name", ""),
|
|
75
|
+
"sys_name": item.get(
|
|
76
|
+
"systematic_name", ""),
|
|
77
|
+
"formula": item.get("formula", ""),
|
|
78
|
+
"mass": item.get("mass", 0),
|
|
79
|
+
"main_class": item.get(
|
|
80
|
+
"main_class", ""),
|
|
81
|
+
"sub_class": item.get("sub_class", ""),
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
df = pd.DataFrame(rows)
|
|
85
|
+
print(f"LipidMAPS: {len(df)} lipids found")
|
|
86
|
+
return df
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def lipidmaps_classification(lm_id):
|
|
90
|
+
"""
|
|
91
|
+
LipidMAPS — 脂質分類階層取得。
|
|
92
|
+
|
|
93
|
+
Parameters:
|
|
94
|
+
lm_id: str — LipidMAPS ID (例: "LMFA01010001")
|
|
95
|
+
"""
|
|
96
|
+
url = (f"{LIPIDMAPS_API}/compound/"
|
|
97
|
+
f"lm_id/{lm_id}/all")
|
|
98
|
+
resp = requests.get(url, timeout=30)
|
|
99
|
+
resp.raise_for_status()
|
|
100
|
+
data = resp.json()
|
|
101
|
+
|
|
102
|
+
classification = {
|
|
103
|
+
"lm_id": data.get("lm_id", ""),
|
|
104
|
+
"category": data.get("core", ""),
|
|
105
|
+
"main_class": data.get("main_class", ""),
|
|
106
|
+
"sub_class": data.get("sub_class", ""),
|
|
107
|
+
"class_level4": data.get(
|
|
108
|
+
"class_level4", ""),
|
|
109
|
+
"smiles": data.get("smiles", ""),
|
|
110
|
+
"inchi_key": data.get("inchi_key", ""),
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
print(f"LipidMAPS: {lm_id} → "
|
|
114
|
+
f"{classification['main_class']} / "
|
|
115
|
+
f"{classification['sub_class']}")
|
|
116
|
+
return classification
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## 2. 脂質差次解析
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
import numpy as np
|
|
123
|
+
from scipy import stats
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def lipid_differential_analysis(data, groups,
|
|
127
|
+
fdr_threshold=0.05):
|
|
128
|
+
"""
|
|
129
|
+
脂質差次解析 (Fold Change + t-test)。
|
|
130
|
+
|
|
131
|
+
Parameters:
|
|
132
|
+
data: pd.DataFrame — 脂質濃度行列
|
|
133
|
+
(行=サンプル, 列=脂質)
|
|
134
|
+
groups: list[int] — グループラベル (0 or 1)
|
|
135
|
+
fdr_threshold: float — FDR 閾値
|
|
136
|
+
"""
|
|
137
|
+
from statsmodels.stats.multitest import (
|
|
138
|
+
multipletests)
|
|
139
|
+
|
|
140
|
+
groups = np.array(groups)
|
|
141
|
+
g0 = data[groups == 0]
|
|
142
|
+
g1 = data[groups == 1]
|
|
143
|
+
|
|
144
|
+
results = []
|
|
145
|
+
for lipid in data.columns:
|
|
146
|
+
mean0 = g0[lipid].mean()
|
|
147
|
+
mean1 = g1[lipid].mean()
|
|
148
|
+
fc = (mean1 / mean0 if mean0 > 0
|
|
149
|
+
else np.inf)
|
|
150
|
+
log2fc = np.log2(fc) if fc > 0 else 0
|
|
151
|
+
_, pval = stats.ttest_ind(
|
|
152
|
+
g0[lipid], g1[lipid])
|
|
153
|
+
results.append({
|
|
154
|
+
"lipid": lipid,
|
|
155
|
+
"mean_ctrl": round(mean0, 4),
|
|
156
|
+
"mean_case": round(mean1, 4),
|
|
157
|
+
"fold_change": round(fc, 4),
|
|
158
|
+
"log2FC": round(log2fc, 4),
|
|
159
|
+
"pvalue": pval,
|
|
160
|
+
})
|
|
161
|
+
|
|
162
|
+
df = pd.DataFrame(results)
|
|
163
|
+
_, fdr, _, _ = multipletests(
|
|
164
|
+
df["pvalue"], method="fdr_bh")
|
|
165
|
+
df["fdr"] = fdr
|
|
166
|
+
df["significant"] = df["fdr"] < fdr_threshold
|
|
167
|
+
|
|
168
|
+
n_sig = df["significant"].sum()
|
|
169
|
+
n_up = ((df["significant"]) &
|
|
170
|
+
(df["log2FC"] > 0)).sum()
|
|
171
|
+
n_down = ((df["significant"]) &
|
|
172
|
+
(df["log2FC"] < 0)).sum()
|
|
173
|
+
print(f"Lipid DA: {n_sig} significant "
|
|
174
|
+
f"({n_up} up, {n_down} down)")
|
|
175
|
+
return df.sort_values("pvalue")
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## 3. 脂質サブクラスエンリッチメント
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
def lipid_subclass_enrichment(
|
|
182
|
+
sig_lipids, all_lipids, class_map):
|
|
183
|
+
"""
|
|
184
|
+
脂質サブクラスエンリッチメント (Fisher exact)。
|
|
185
|
+
|
|
186
|
+
Parameters:
|
|
187
|
+
sig_lipids: list[str] — 有意脂質リスト
|
|
188
|
+
all_lipids: list[str] — 全脂質リスト
|
|
189
|
+
class_map: dict — {lipid: subclass} マッピング
|
|
190
|
+
"""
|
|
191
|
+
from scipy.stats import fisher_exact
|
|
192
|
+
|
|
193
|
+
sig_set = set(sig_lipids)
|
|
194
|
+
all_set = set(all_lipids)
|
|
195
|
+
|
|
196
|
+
# サブクラス別集計
|
|
197
|
+
subclasses = set(class_map.values())
|
|
198
|
+
results = []
|
|
199
|
+
for sc in subclasses:
|
|
200
|
+
sc_all = {l for l, c in class_map.items()
|
|
201
|
+
if c == sc and l in all_set}
|
|
202
|
+
sc_sig = sc_all & sig_set
|
|
203
|
+
a = len(sc_sig)
|
|
204
|
+
b = len(sig_set) - a
|
|
205
|
+
c = len(sc_all) - a
|
|
206
|
+
d = len(all_set) - a - b - c
|
|
207
|
+
if a == 0:
|
|
208
|
+
continue
|
|
209
|
+
_, pval = fisher_exact(
|
|
210
|
+
[[a, b], [c, d]],
|
|
211
|
+
alternative="greater")
|
|
212
|
+
results.append({
|
|
213
|
+
"subclass": sc,
|
|
214
|
+
"sig_in_class": a,
|
|
215
|
+
"total_in_class": len(sc_all),
|
|
216
|
+
"pvalue": pval,
|
|
217
|
+
"ratio": round(a / len(sc_all), 3),
|
|
218
|
+
})
|
|
219
|
+
|
|
220
|
+
df = pd.DataFrame(results).sort_values("pvalue")
|
|
221
|
+
print(f"Subclass enrichment: "
|
|
222
|
+
f"{(df['pvalue'] < 0.05).sum()} "
|
|
223
|
+
f"significant subclasses")
|
|
224
|
+
return df
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
## 4. リピドミクス統合パイプライン
|
|
228
|
+
|
|
229
|
+
```python
|
|
230
|
+
def lipidomics_pipeline(data, groups,
|
|
231
|
+
output_dir="results"):
|
|
232
|
+
"""
|
|
233
|
+
リピドミクス統合パイプライン。
|
|
234
|
+
|
|
235
|
+
Parameters:
|
|
236
|
+
data: pd.DataFrame — 脂質濃度行列
|
|
237
|
+
groups: list[int] — グループラベル
|
|
238
|
+
output_dir: str — 出力ディレクトリ
|
|
239
|
+
"""
|
|
240
|
+
from pathlib import Path
|
|
241
|
+
output_dir = Path(output_dir)
|
|
242
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
243
|
+
|
|
244
|
+
# 1) 差次解析
|
|
245
|
+
da = lipid_differential_analysis(data, groups)
|
|
246
|
+
da.to_csv(output_dir / "lipid_da.csv",
|
|
247
|
+
index=False)
|
|
248
|
+
|
|
249
|
+
# 2) LipidMAPS アノテーション
|
|
250
|
+
annotations = []
|
|
251
|
+
for lipid in data.columns[:30]:
|
|
252
|
+
result = lipidmaps_search(name=lipid)
|
|
253
|
+
if not result.empty:
|
|
254
|
+
row = result.iloc[0].to_dict()
|
|
255
|
+
row["query"] = lipid
|
|
256
|
+
annotations.append(row)
|
|
257
|
+
if annotations:
|
|
258
|
+
ann_df = pd.DataFrame(annotations)
|
|
259
|
+
ann_df.to_csv(
|
|
260
|
+
output_dir / "lipid_annotations.csv",
|
|
261
|
+
index=False)
|
|
262
|
+
|
|
263
|
+
print(f"Lipidomics pipeline → {output_dir}")
|
|
264
|
+
return {"da": da}
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
---
|
|
268
|
+
|
|
269
|
+
## パイプライン統合
|
|
270
|
+
|
|
271
|
+
```
|
|
272
|
+
metabolomics → lipidomics → pathway-enrichment
|
|
273
|
+
(LC-MS 全代謝物) (脂質特化) (脂質代謝パスウェイ)
|
|
274
|
+
│ │ ↓
|
|
275
|
+
metabolomics-network ─┘ multi-omics
|
|
276
|
+
(代謝物相関) (オミクス統合)
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
## パイプライン出力
|
|
280
|
+
|
|
281
|
+
| ファイル | 説明 | 次スキル |
|
|
282
|
+
|---------|------|---------|
|
|
283
|
+
| `results/lipid_da.csv` | 差次脂質 | → biomarker-discovery |
|
|
284
|
+
| `results/lipid_annotations.csv` | LipidMAPS 注釈 | → pathway-enrichment |
|