@nahisaho/satori 0.19.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -23
- package/package.json +1 -1
- package/src/.github/skills/scientific-admet-pharmacokinetics/SKILL.md +4 -0
- package/src/.github/skills/scientific-biobank-cohort/SKILL.md +268 -0
- package/src/.github/skills/scientific-cancer-genomics/SKILL.md +7 -0
- package/src/.github/skills/scientific-cell-line-resources/SKILL.md +4 -0
- package/src/.github/skills/scientific-chembl-assay-mining/SKILL.md +4 -0
- package/src/.github/skills/scientific-drug-target-profiling/SKILL.md +4 -0
- package/src/.github/skills/scientific-gdc-portal/SKILL.md +280 -0
- package/src/.github/skills/scientific-immunoinformatics/SKILL.md +4 -0
- package/src/.github/skills/scientific-metabolic-flux/SKILL.md +306 -0
- package/src/.github/skills/scientific-metabolic-modeling/SKILL.md +4 -0
- package/src/.github/skills/scientific-metabolomics/SKILL.md +4 -0
- package/src/.github/skills/scientific-microbiome-metagenomics/SKILL.md +4 -0
- package/src/.github/skills/scientific-monarch-ontology/SKILL.md +260 -0
- package/src/.github/skills/scientific-pharmacology-targets/SKILL.md +10 -0
- package/src/.github/skills/scientific-precision-oncology/SKILL.md +4 -0
- package/src/.github/skills/scientific-spatial-multiomics/SKILL.md +293 -0
- package/src/.github/skills/scientific-stitch-chemical-network/SKILL.md +318 -0
- package/src/.github/skills/scientific-string-network-api/SKILL.md +4 -0
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-monarch-ontology
|
|
3
|
+
description: |
|
|
4
|
+
Monarch Initiative 疾患-表現型オントロジースキル。
|
|
5
|
+
Monarch Initiative API を用いた疾患-遺伝子-表現型
|
|
6
|
+
アソシエーション・HPO フェノタイピング・
|
|
7
|
+
遺伝子-疾患推定・オントロジーセマンティック検索。
|
|
8
|
+
ToolUniverse 連携: monarch。
|
|
9
|
+
tu_tools:
|
|
10
|
+
- key: monarch
|
|
11
|
+
name: Monarch Initiative
|
|
12
|
+
description: 疾患-表現型-遺伝子オントロジー統合 API
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
# Scientific Monarch Initiative Ontology
|
|
16
|
+
|
|
17
|
+
Monarch Initiative API を活用した疾患-遺伝子-表現型
|
|
18
|
+
アソシエーション取得・HPO ベースフェノタイピング・
|
|
19
|
+
セマンティックオントロジー検索パイプラインを提供する。
|
|
20
|
+
|
|
21
|
+
## When to Use
|
|
22
|
+
|
|
23
|
+
- 疾患の関連遺伝子・表現型 (HPO) を検索するとき
|
|
24
|
+
- 遺伝子から関連疾患・表現型を逆引きするとき
|
|
25
|
+
- HPO 用語でフェノタイプマッチングするとき
|
|
26
|
+
- オントロジー用語間の意味的類似度を計算するとき
|
|
27
|
+
- 疾患-表現型-遺伝子の三者間アソシエーションを統合するとき
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Quick Start
|
|
32
|
+
|
|
33
|
+
## 1. 疾患-遺伝子-表現型アソシエーション
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import requests
|
|
37
|
+
import pandas as pd
|
|
38
|
+
|
|
39
|
+
MONARCH_API = "https://api.monarchinitiative.org/v3/api"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def monarch_disease_genes(disease_id, limit=50):
|
|
43
|
+
"""
|
|
44
|
+
Monarch — 疾患→関連遺伝子取得。
|
|
45
|
+
|
|
46
|
+
Parameters:
|
|
47
|
+
disease_id: str — 疾患 ID
|
|
48
|
+
(例: "MONDO:0007254" = breast cancer)
|
|
49
|
+
limit: int — 最大結果数
|
|
50
|
+
"""
|
|
51
|
+
url = f"{MONARCH_API}/association"
|
|
52
|
+
params = {
|
|
53
|
+
"subject": disease_id,
|
|
54
|
+
"category": "biolink:GeneToDiseaseAssociation",
|
|
55
|
+
"limit": limit,
|
|
56
|
+
}
|
|
57
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
58
|
+
resp.raise_for_status()
|
|
59
|
+
data = resp.json()
|
|
60
|
+
|
|
61
|
+
rows = []
|
|
62
|
+
for item in data.get("items", []):
|
|
63
|
+
obj = item.get("object", {})
|
|
64
|
+
rows.append({
|
|
65
|
+
"disease_id": disease_id,
|
|
66
|
+
"gene_id": obj.get("id", ""),
|
|
67
|
+
"gene_label": obj.get("label", ""),
|
|
68
|
+
"relation": item.get("predicate", ""),
|
|
69
|
+
"source": "; ".join(
|
|
70
|
+
item.get("provided_by", [])),
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
df = pd.DataFrame(rows)
|
|
74
|
+
print(f"Monarch disease→genes: {disease_id} "
|
|
75
|
+
f"→ {len(df)} genes")
|
|
76
|
+
return df
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def monarch_disease_phenotypes(disease_id, limit=100):
|
|
80
|
+
"""
|
|
81
|
+
Monarch — 疾患→表現型 (HPO) 取得。
|
|
82
|
+
|
|
83
|
+
Parameters:
|
|
84
|
+
disease_id: str — 疾患 ID
|
|
85
|
+
limit: int — 最大結果数
|
|
86
|
+
"""
|
|
87
|
+
url = f"{MONARCH_API}/association"
|
|
88
|
+
params = {
|
|
89
|
+
"subject": disease_id,
|
|
90
|
+
"category":
|
|
91
|
+
"biolink:DiseaseToPhenotypicFeatureAssociation",
|
|
92
|
+
"limit": limit,
|
|
93
|
+
}
|
|
94
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
95
|
+
resp.raise_for_status()
|
|
96
|
+
data = resp.json()
|
|
97
|
+
|
|
98
|
+
rows = []
|
|
99
|
+
for item in data.get("items", []):
|
|
100
|
+
obj = item.get("object", {})
|
|
101
|
+
rows.append({
|
|
102
|
+
"disease_id": disease_id,
|
|
103
|
+
"phenotype_id": obj.get("id", ""),
|
|
104
|
+
"phenotype_label": obj.get("label", ""),
|
|
105
|
+
"frequency": item.get("frequency_qualifier",
|
|
106
|
+
""),
|
|
107
|
+
"onset": item.get("onset_qualifier", ""),
|
|
108
|
+
})
|
|
109
|
+
|
|
110
|
+
df = pd.DataFrame(rows)
|
|
111
|
+
print(f"Monarch disease→phenotypes: {disease_id} "
|
|
112
|
+
f"→ {len(df)} HPO terms")
|
|
113
|
+
return df
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## 2. 遺伝子→疾患逆引き
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
def monarch_gene_diseases(gene_id, limit=50):
|
|
120
|
+
"""
|
|
121
|
+
Monarch — 遺伝子→関連疾患取得。
|
|
122
|
+
|
|
123
|
+
Parameters:
|
|
124
|
+
gene_id: str — 遺伝子 ID
|
|
125
|
+
(例: "HGNC:1100" = BRCA1)
|
|
126
|
+
limit: int — 最大結果数
|
|
127
|
+
"""
|
|
128
|
+
url = f"{MONARCH_API}/association"
|
|
129
|
+
params = {
|
|
130
|
+
"subject": gene_id,
|
|
131
|
+
"category": "biolink:GeneToDiseaseAssociation",
|
|
132
|
+
"limit": limit,
|
|
133
|
+
}
|
|
134
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
135
|
+
resp.raise_for_status()
|
|
136
|
+
data = resp.json()
|
|
137
|
+
|
|
138
|
+
rows = []
|
|
139
|
+
for item in data.get("items", []):
|
|
140
|
+
obj = item.get("object", {})
|
|
141
|
+
rows.append({
|
|
142
|
+
"gene_id": gene_id,
|
|
143
|
+
"disease_id": obj.get("id", ""),
|
|
144
|
+
"disease_label": obj.get("label", ""),
|
|
145
|
+
"relation": item.get("predicate", ""),
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
df = pd.DataFrame(rows)
|
|
149
|
+
print(f"Monarch gene→diseases: {gene_id} "
|
|
150
|
+
f"→ {len(df)} diseases")
|
|
151
|
+
return df
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
## 3. エンティティ検索・オントロジー用語
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
def monarch_search(query, category=None, limit=25):
|
|
158
|
+
"""
|
|
159
|
+
Monarch — エンティティテキスト検索。
|
|
160
|
+
|
|
161
|
+
Parameters:
|
|
162
|
+
query: str — 検索クエリ
|
|
163
|
+
category: str — カテゴリフィルタ
|
|
164
|
+
(例: "biolink:Disease", "biolink:Gene",
|
|
165
|
+
"biolink:PhenotypicFeature")
|
|
166
|
+
limit: int — 最大結果数
|
|
167
|
+
"""
|
|
168
|
+
url = f"{MONARCH_API}/search"
|
|
169
|
+
params = {"q": query, "limit": limit}
|
|
170
|
+
if category:
|
|
171
|
+
params["category"] = category
|
|
172
|
+
|
|
173
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
174
|
+
resp.raise_for_status()
|
|
175
|
+
data = resp.json()
|
|
176
|
+
|
|
177
|
+
rows = []
|
|
178
|
+
for item in data.get("items", []):
|
|
179
|
+
rows.append({
|
|
180
|
+
"id": item.get("id", ""),
|
|
181
|
+
"label": item.get("name", ""),
|
|
182
|
+
"category": item.get("category", ""),
|
|
183
|
+
"description": (item.get("description", "")
|
|
184
|
+
or "")[:200],
|
|
185
|
+
})
|
|
186
|
+
|
|
187
|
+
df = pd.DataFrame(rows)
|
|
188
|
+
print(f"Monarch search: '{query}' → {len(df)}")
|
|
189
|
+
return df
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## 4. Monarch 統合パイプライン
|
|
193
|
+
|
|
194
|
+
```python
|
|
195
|
+
def monarch_pipeline(disease_query,
|
|
196
|
+
output_dir="results"):
|
|
197
|
+
"""
|
|
198
|
+
Monarch 統合パイプライン。
|
|
199
|
+
|
|
200
|
+
Parameters:
|
|
201
|
+
disease_query: str — 疾患名 or ID
|
|
202
|
+
output_dir: str — 出力ディレクトリ
|
|
203
|
+
"""
|
|
204
|
+
from pathlib import Path
|
|
205
|
+
output_dir = Path(output_dir)
|
|
206
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
207
|
+
|
|
208
|
+
# 1) 疾患検索
|
|
209
|
+
diseases = monarch_search(disease_query,
|
|
210
|
+
category="biolink:Disease")
|
|
211
|
+
diseases.to_csv(output_dir / "monarch_diseases.csv",
|
|
212
|
+
index=False)
|
|
213
|
+
|
|
214
|
+
if diseases.empty:
|
|
215
|
+
print(f"Monarch: '{disease_query}' not found")
|
|
216
|
+
return {"diseases": diseases}
|
|
217
|
+
|
|
218
|
+
disease_id = diseases.iloc[0]["id"]
|
|
219
|
+
|
|
220
|
+
# 2) 関連遺伝子
|
|
221
|
+
genes = monarch_disease_genes(disease_id)
|
|
222
|
+
genes.to_csv(output_dir / "monarch_genes.csv",
|
|
223
|
+
index=False)
|
|
224
|
+
|
|
225
|
+
# 3) 表現型 (HPO)
|
|
226
|
+
phenotypes = monarch_disease_phenotypes(disease_id)
|
|
227
|
+
phenotypes.to_csv(
|
|
228
|
+
output_dir / "monarch_phenotypes.csv",
|
|
229
|
+
index=False)
|
|
230
|
+
|
|
231
|
+
print(f"Monarch pipeline: {disease_query} "
|
|
232
|
+
f"→ {output_dir}")
|
|
233
|
+
return {"genes": genes, "phenotypes": phenotypes}
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
## ToolUniverse 連携
|
|
239
|
+
|
|
240
|
+
| TU Key | ツール名 | 連携内容 |
|
|
241
|
+
|--------|---------|---------|
|
|
242
|
+
| `monarch` | Monarch Initiative | 疾患-表現型-遺伝子オントロジー統合 |
|
|
243
|
+
|
|
244
|
+
## パイプライン統合
|
|
245
|
+
|
|
246
|
+
```
|
|
247
|
+
disease-research → monarch-ontology → rare-disease-genetics
|
|
248
|
+
(GWAS/DisGeNET) (Monarch API) (OMIM/Orphanet)
|
|
249
|
+
│ │ ↓
|
|
250
|
+
variant-interpretation ───┘ ontology-enrichment
|
|
251
|
+
(ClinVar/ACMG) (EFO/OLS/Enrichr)
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
## パイプライン出力
|
|
255
|
+
|
|
256
|
+
| ファイル | 説明 | 次スキル |
|
|
257
|
+
|---------|------|---------|
|
|
258
|
+
| `results/monarch_diseases.csv` | 疾患検索結果 | → disease-research |
|
|
259
|
+
| `results/monarch_genes.csv` | 関連遺伝子 | → variant-interpretation |
|
|
260
|
+
| `results/monarch_phenotypes.csv` | HPO 表現型 | → rare-disease-genetics |
|
|
@@ -4,6 +4,16 @@ description: |
|
|
|
4
4
|
薬理学的ターゲットプロファイリングスキル。BindingDB 結合親和性、
|
|
5
5
|
GPCRdb GPCR 構造-活性、GtoPdb 薬理学、BRENDA 酵素動態、
|
|
6
6
|
Pharos 未解明ターゲット(TDL)の統合解析パイプライン。
|
|
7
|
+
tu_tools:
|
|
8
|
+
- key: bindingdb
|
|
9
|
+
name: BindingDB
|
|
10
|
+
description: 結合親和性データベース
|
|
11
|
+
- key: gtopdb
|
|
12
|
+
name: GtoPdb
|
|
13
|
+
description: Guide to PHARMACOLOGY
|
|
14
|
+
- key: brenda
|
|
15
|
+
name: BRENDA
|
|
16
|
+
description: 酵素動態データベース
|
|
7
17
|
---
|
|
8
18
|
|
|
9
19
|
# Scientific Pharmacology Targets
|
|
@@ -4,6 +4,10 @@ description: |
|
|
|
4
4
|
精密腫瘍学スキル。CIViC・OncoKB・cBioPortal・COSMIC・GDC/TCGA を統合し、
|
|
5
5
|
腫瘍ゲノムプロファイリング・分子標的選定・バイオマーカー評価・治療推奨を支援。
|
|
6
6
|
「がんゲノム解析して」「腫瘍プロファイリングして」「OncoKB で検索して」で発火。
|
|
7
|
+
tu_tools:
|
|
8
|
+
- key: oncokb
|
|
9
|
+
name: OncoKB
|
|
10
|
+
description: 精密腫瘍学アノテーション
|
|
7
11
|
---
|
|
8
12
|
|
|
9
13
|
# Scientific Precision Oncology
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-spatial-multiomics
|
|
3
|
+
description: |
|
|
4
|
+
空間マルチオミクス統合スキル。MERFISH/Visium 等の空間
|
|
5
|
+
トランスクリプトームと空間プロテオミクスのマルチモーダル
|
|
6
|
+
統合・空間共検出解析・セル近傍グラフ構築パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Spatial Multi-omics
|
|
10
|
+
|
|
11
|
+
MERFISH・Visium・CODEX 等の空間マルチオミクスデータを統合し、
|
|
12
|
+
マルチモーダルアライメント・空間共検出解析・近傍グラフベースの
|
|
13
|
+
空間コミュニティ検出パイプラインを提供する。
|
|
14
|
+
|
|
15
|
+
## When to Use
|
|
16
|
+
|
|
17
|
+
- 空間トランスクリプトームと空間プロテオミクスを統合するとき
|
|
18
|
+
- MERFISH + CODEX 等マルチモーダル空間データをアライメントするとき
|
|
19
|
+
- 空間的に共局在する分子シグネチャを同定するとき
|
|
20
|
+
- セル近傍グラフからニッチ/コミュニティを抽出するとき
|
|
21
|
+
- 空間マルチオミクスの前処理パイプラインを構築するとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. 空間マルチモーダルデータ読み込み
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import numpy as np
|
|
31
|
+
import pandas as pd
|
|
32
|
+
from scipy.spatial import cKDTree
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def load_spatial_modality(coord_file, expr_file,
|
|
36
|
+
modality_name="RNA"):
|
|
37
|
+
"""
|
|
38
|
+
空間モダリティデータ読み込み。
|
|
39
|
+
|
|
40
|
+
Parameters:
|
|
41
|
+
coord_file: str — 座標 CSV (cell_id, x, y)
|
|
42
|
+
expr_file: str — 発現/タンパク質 CSV
|
|
43
|
+
(cell_id, features...)
|
|
44
|
+
modality_name: str — モダリティ名
|
|
45
|
+
"""
|
|
46
|
+
coords = pd.read_csv(coord_file, index_col="cell_id")
|
|
47
|
+
expr = pd.read_csv(expr_file, index_col="cell_id")
|
|
48
|
+
|
|
49
|
+
common = coords.index.intersection(expr.index)
|
|
50
|
+
coords = coords.loc[common]
|
|
51
|
+
expr = expr.loc[common]
|
|
52
|
+
|
|
53
|
+
print(f"Spatial {modality_name}: "
|
|
54
|
+
f"{len(common)} cells, "
|
|
55
|
+
f"{expr.shape[1]} features")
|
|
56
|
+
return coords, expr
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def spatial_alignment(coords_a, coords_b,
|
|
60
|
+
max_distance=50.0):
|
|
61
|
+
"""
|
|
62
|
+
空間座標アライメント (最近傍マッチング)。
|
|
63
|
+
|
|
64
|
+
Parameters:
|
|
65
|
+
coords_a: DataFrame — モダリティ A 座標 (x, y)
|
|
66
|
+
coords_b: DataFrame — モダリティ B 座標 (x, y)
|
|
67
|
+
max_distance: float — 最大マッチング距離 (μm)
|
|
68
|
+
"""
|
|
69
|
+
tree_b = cKDTree(coords_b[["x", "y"]].values)
|
|
70
|
+
dists, idxs = tree_b.query(
|
|
71
|
+
coords_a[["x", "y"]].values, k=1)
|
|
72
|
+
|
|
73
|
+
mask = dists < max_distance
|
|
74
|
+
matched_a = coords_a.index[mask]
|
|
75
|
+
matched_b = coords_b.index[idxs[mask]]
|
|
76
|
+
|
|
77
|
+
alignment = pd.DataFrame({
|
|
78
|
+
"cell_a": matched_a,
|
|
79
|
+
"cell_b": matched_b,
|
|
80
|
+
"distance": dists[mask],
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
print(f"Alignment: {len(alignment)} matched pairs "
|
|
84
|
+
f"(max_dist={max_distance}μm)")
|
|
85
|
+
return alignment
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## 2. 空間共検出解析
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
def spatial_codetection(expr_a, expr_b, alignment,
|
|
92
|
+
method="pearson", top_n=50):
|
|
93
|
+
"""
|
|
94
|
+
空間共検出相関解析。
|
|
95
|
+
|
|
96
|
+
Parameters:
|
|
97
|
+
expr_a: DataFrame — モダリティ A 発現行列
|
|
98
|
+
expr_b: DataFrame — モダリティ B 発現行列
|
|
99
|
+
alignment: DataFrame — アライメント結果
|
|
100
|
+
method: str — 相関メソッド
|
|
101
|
+
(pearson / spearman)
|
|
102
|
+
top_n: int — 上位ペア数
|
|
103
|
+
"""
|
|
104
|
+
from itertools import product
|
|
105
|
+
from scipy import stats
|
|
106
|
+
|
|
107
|
+
a_matched = expr_a.loc[alignment["cell_a"]]
|
|
108
|
+
b_matched = expr_b.loc[alignment["cell_b"]]
|
|
109
|
+
a_matched.index = range(len(a_matched))
|
|
110
|
+
b_matched.index = range(len(b_matched))
|
|
111
|
+
|
|
112
|
+
results = []
|
|
113
|
+
for fa, fb in product(a_matched.columns[:100],
|
|
114
|
+
b_matched.columns[:100]):
|
|
115
|
+
va = a_matched[fa].values
|
|
116
|
+
vb = b_matched[fb].values
|
|
117
|
+
mask = np.isfinite(va) & np.isfinite(vb)
|
|
118
|
+
if mask.sum() < 30:
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
if method == "spearman":
|
|
122
|
+
r, p = stats.spearmanr(va[mask], vb[mask])
|
|
123
|
+
else:
|
|
124
|
+
r, p = stats.pearsonr(va[mask], vb[mask])
|
|
125
|
+
|
|
126
|
+
results.append({
|
|
127
|
+
"feature_a": fa,
|
|
128
|
+
"feature_b": fb,
|
|
129
|
+
"correlation": r,
|
|
130
|
+
"p_value": p,
|
|
131
|
+
})
|
|
132
|
+
|
|
133
|
+
df = pd.DataFrame(results)
|
|
134
|
+
df.sort_values("correlation", ascending=False,
|
|
135
|
+
key=abs, inplace=True)
|
|
136
|
+
top = df.head(top_n)
|
|
137
|
+
|
|
138
|
+
print(f"Codetection: {len(df)} pairs, "
|
|
139
|
+
f"top r={top.iloc[0]['correlation']:.3f}")
|
|
140
|
+
return top
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## 3. セル近傍グラフ・コミュニティ検出
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
def cell_neighborhood_graph(coords, k_neighbors=15):
|
|
147
|
+
"""
|
|
148
|
+
セル近傍グラフ構築。
|
|
149
|
+
|
|
150
|
+
Parameters:
|
|
151
|
+
coords: DataFrame — 座標 (x, y)
|
|
152
|
+
k_neighbors: int — k 近傍数
|
|
153
|
+
"""
|
|
154
|
+
tree = cKDTree(coords[["x", "y"]].values)
|
|
155
|
+
dists, idxs = tree.query(
|
|
156
|
+
coords[["x", "y"]].values,
|
|
157
|
+
k=k_neighbors + 1)
|
|
158
|
+
|
|
159
|
+
edges = []
|
|
160
|
+
for i in range(len(coords)):
|
|
161
|
+
for j_idx in range(1, k_neighbors + 1):
|
|
162
|
+
j = idxs[i, j_idx]
|
|
163
|
+
edges.append({
|
|
164
|
+
"source": coords.index[i],
|
|
165
|
+
"target": coords.index[j],
|
|
166
|
+
"distance": dists[i, j_idx],
|
|
167
|
+
})
|
|
168
|
+
|
|
169
|
+
edge_df = pd.DataFrame(edges)
|
|
170
|
+
print(f"Neighborhood graph: "
|
|
171
|
+
f"{len(coords)} nodes, "
|
|
172
|
+
f"{len(edge_df)} edges (k={k_neighbors})")
|
|
173
|
+
return edge_df
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def spatial_community_detection(edge_df, coords,
|
|
177
|
+
resolution=1.0):
|
|
178
|
+
"""
|
|
179
|
+
空間コミュニティ検出 (Leiden)。
|
|
180
|
+
|
|
181
|
+
Parameters:
|
|
182
|
+
edge_df: DataFrame — エッジリスト
|
|
183
|
+
coords: DataFrame — 座標
|
|
184
|
+
resolution: float — Leiden 解像度
|
|
185
|
+
"""
|
|
186
|
+
try:
|
|
187
|
+
import igraph as ig
|
|
188
|
+
import leidenalg
|
|
189
|
+
except ImportError:
|
|
190
|
+
print("pip install igraph leidenalg")
|
|
191
|
+
return pd.DataFrame()
|
|
192
|
+
|
|
193
|
+
nodes = list(coords.index)
|
|
194
|
+
node_map = {n: i for i, n in enumerate(nodes)}
|
|
195
|
+
|
|
196
|
+
g = ig.Graph(directed=False)
|
|
197
|
+
g.add_vertices(len(nodes))
|
|
198
|
+
edges = [
|
|
199
|
+
(node_map[r["source"]], node_map[r["target"]])
|
|
200
|
+
for _, r in edge_df.iterrows()
|
|
201
|
+
if r["source"] in node_map
|
|
202
|
+
and r["target"] in node_map
|
|
203
|
+
]
|
|
204
|
+
g.add_edges(edges)
|
|
205
|
+
|
|
206
|
+
part = leidenalg.find_partition(
|
|
207
|
+
g, leidenalg.RBConfigurationVertexPartition,
|
|
208
|
+
resolution_parameter=resolution)
|
|
209
|
+
|
|
210
|
+
result = pd.DataFrame({
|
|
211
|
+
"cell_id": nodes,
|
|
212
|
+
"community": part.membership,
|
|
213
|
+
"x": coords["x"].values,
|
|
214
|
+
"y": coords["y"].values,
|
|
215
|
+
})
|
|
216
|
+
|
|
217
|
+
n_comm = result["community"].nunique()
|
|
218
|
+
print(f"Communities: {n_comm} spatial niches "
|
|
219
|
+
f"(resolution={resolution})")
|
|
220
|
+
return result
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
## 4. 空間マルチオミクス統合パイプライン
|
|
224
|
+
|
|
225
|
+
```python
|
|
226
|
+
def spatial_multiomics_pipeline(
|
|
227
|
+
rna_coords, rna_expr,
|
|
228
|
+
protein_coords, protein_expr,
|
|
229
|
+
output_dir="results",
|
|
230
|
+
):
|
|
231
|
+
"""
|
|
232
|
+
空間マルチオミクス統合パイプライン。
|
|
233
|
+
|
|
234
|
+
Parameters:
|
|
235
|
+
rna_coords: str — RNA 座標ファイル
|
|
236
|
+
rna_expr: str — RNA 発現ファイル
|
|
237
|
+
protein_coords: str — プロテオミクス座標ファイル
|
|
238
|
+
protein_expr: str — プロテオミクス発現ファイル
|
|
239
|
+
output_dir: str — 出力ディレクトリ
|
|
240
|
+
"""
|
|
241
|
+
from pathlib import Path
|
|
242
|
+
output_dir = Path(output_dir)
|
|
243
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
244
|
+
|
|
245
|
+
# 1) データ読み込み
|
|
246
|
+
rc, re = load_spatial_modality(
|
|
247
|
+
rna_coords, rna_expr, "RNA")
|
|
248
|
+
pc, pe = load_spatial_modality(
|
|
249
|
+
protein_coords, protein_expr, "Protein")
|
|
250
|
+
|
|
251
|
+
# 2) 空間アライメント
|
|
252
|
+
alignment = spatial_alignment(rc, pc)
|
|
253
|
+
alignment.to_csv(output_dir / "alignment.csv",
|
|
254
|
+
index=False)
|
|
255
|
+
|
|
256
|
+
# 3) 共検出解析
|
|
257
|
+
codet = spatial_codetection(re, pe, alignment)
|
|
258
|
+
codet.to_csv(output_dir / "codetection.csv",
|
|
259
|
+
index=False)
|
|
260
|
+
|
|
261
|
+
# 4) 近傍グラフ + コミュニティ
|
|
262
|
+
edges = cell_neighborhood_graph(rc)
|
|
263
|
+
comms = spatial_community_detection(edges, rc)
|
|
264
|
+
comms.to_csv(output_dir / "communities.csv",
|
|
265
|
+
index=False)
|
|
266
|
+
|
|
267
|
+
print(f"Spatial multiomics pipeline → {output_dir}")
|
|
268
|
+
return {
|
|
269
|
+
"alignment": alignment,
|
|
270
|
+
"codetection": codet,
|
|
271
|
+
"communities": comms,
|
|
272
|
+
}
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
---
|
|
276
|
+
|
|
277
|
+
## パイプライン統合
|
|
278
|
+
|
|
279
|
+
```
|
|
280
|
+
spatial-transcriptomics → spatial-multiomics → multi-omics
|
|
281
|
+
(Visium/MERFISH) (マルチモーダル統合) (統合オミクス)
|
|
282
|
+
│ │ ↓
|
|
283
|
+
human-cell-atlas ─────────────┘ single-cell-rnaseq
|
|
284
|
+
(HCA atlas) (scRNA-seq 参照)
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
## パイプライン出力
|
|
288
|
+
|
|
289
|
+
| ファイル | 説明 | 次スキル |
|
|
290
|
+
|---------|------|---------|
|
|
291
|
+
| `results/alignment.csv` | モダリティ間アライメント | → multi-omics |
|
|
292
|
+
| `results/codetection.csv` | 共検出ペア | → pathway-analysis |
|
|
293
|
+
| `results/communities.csv` | 空間コミュニティ | → spatial-transcriptomics |
|