@nahisaho/satori 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENCE +0 -0
- package/README.md +191 -0
- package/bin/satori.js +95 -0
- package/package.json +29 -0
- package/src/.github/skills/scientific-academic-writing/SKILL.md +361 -0
- package/src/.github/skills/scientific-academic-writing/assets/acs_article.md +199 -0
- package/src/.github/skills/scientific-academic-writing/assets/elsevier_article.md +244 -0
- package/src/.github/skills/scientific-academic-writing/assets/ieee_transactions.md +212 -0
- package/src/.github/skills/scientific-academic-writing/assets/imrad_standard.md +181 -0
- package/src/.github/skills/scientific-academic-writing/assets/nature_article.md +179 -0
- package/src/.github/skills/scientific-academic-writing/assets/qiita_technical_article.md +385 -0
- package/src/.github/skills/scientific-academic-writing/assets/science_research_article.md +169 -0
- package/src/.github/skills/scientific-bioinformatics/SKILL.md +220 -0
- package/src/.github/skills/scientific-biosignal-processing/SKILL.md +357 -0
- package/src/.github/skills/scientific-causal-inference/SKILL.md +347 -0
- package/src/.github/skills/scientific-cheminformatics/SKILL.md +196 -0
- package/src/.github/skills/scientific-data-preprocessing/SKILL.md +413 -0
- package/src/.github/skills/scientific-data-simulation/SKILL.md +244 -0
- package/src/.github/skills/scientific-doe/SKILL.md +360 -0
- package/src/.github/skills/scientific-eda-correlation/SKILL.md +141 -0
- package/src/.github/skills/scientific-feature-importance/SKILL.md +208 -0
- package/src/.github/skills/scientific-image-analysis/SKILL.md +310 -0
- package/src/.github/skills/scientific-materials-characterization/SKILL.md +368 -0
- package/src/.github/skills/scientific-meta-analysis/SKILL.md +352 -0
- package/src/.github/skills/scientific-metabolomics/SKILL.md +326 -0
- package/src/.github/skills/scientific-ml-classification/SKILL.md +265 -0
- package/src/.github/skills/scientific-ml-regression/SKILL.md +215 -0
- package/src/.github/skills/scientific-multi-omics/SKILL.md +303 -0
- package/src/.github/skills/scientific-network-analysis/SKILL.md +257 -0
- package/src/.github/skills/scientific-pca-tsne/SKILL.md +235 -0
- package/src/.github/skills/scientific-pipeline-scaffold/SKILL.md +331 -0
- package/src/.github/skills/scientific-process-optimization/SKILL.md +215 -0
- package/src/.github/skills/scientific-publication-figures/SKILL.md +208 -0
- package/src/.github/skills/scientific-sequence-analysis/SKILL.md +389 -0
- package/src/.github/skills/scientific-spectral-signal/SKILL.md +227 -0
- package/src/.github/skills/scientific-statistical-testing/SKILL.md +240 -0
- package/src/.github/skills/scientific-survival-clinical/SKILL.md +239 -0
- package/src/.github/skills/scientific-time-series/SKILL.md +291 -0
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-multi-omics
|
|
3
|
+
description: |
|
|
4
|
+
マルチオミクス統合解析スキル。ゲノム・トランスクリプトーム・プロテオーム・メタボローム
|
|
5
|
+
データの統合手法(MOFA/SNF/DIABLO)、オミクス間相関解析、CCA/PLS 統合、
|
|
6
|
+
パスウェイレベル統合、ネットワーク統合のテンプレートを提供。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Multi-Omics Integration
|
|
10
|
+
|
|
11
|
+
複数のオミクスレイヤー(ゲノミクス、トランスクリプトミクス、プロテオミクス、
|
|
12
|
+
メタボロミクス)のデータを統合的に解析するためのスキル。各オミクスの個別解析は
|
|
13
|
+
それぞれの専門スキル(bioinformatics, metabolomics)に委ねつつ、本スキルは
|
|
14
|
+
**統合**に特化する。
|
|
15
|
+
|
|
16
|
+
## When to Use
|
|
17
|
+
|
|
18
|
+
- 同一サンプルから得られた複数オミクスデータを統合するとき
|
|
19
|
+
- オミクス間の相関構造を解明したいとき
|
|
20
|
+
- マルチオミクスバイオマーカー発見が必要なとき
|
|
21
|
+
- パスウェイレベルでの統合解析が必要なとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. データ整合性チェック
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import numpy as np
|
|
31
|
+
import pandas as pd
|
|
32
|
+
|
|
33
|
+
def check_multiomics_alignment(omics_dict, sample_col="Sample_ID"):
|
|
34
|
+
"""
|
|
35
|
+
マルチオミクスデータの整合性チェック。
|
|
36
|
+
|
|
37
|
+
Parameters:
|
|
38
|
+
omics_dict: {"transcriptomics": df, "proteomics": df, "metabolomics": df}
|
|
39
|
+
"""
|
|
40
|
+
all_samples = {}
|
|
41
|
+
for name, df in omics_dict.items():
|
|
42
|
+
all_samples[name] = set(df[sample_col])
|
|
43
|
+
|
|
44
|
+
# 共通サンプル
|
|
45
|
+
common = set.intersection(*all_samples.values())
|
|
46
|
+
report = {
|
|
47
|
+
"n_omics_layers": len(omics_dict),
|
|
48
|
+
"layers": list(omics_dict.keys()),
|
|
49
|
+
"samples_per_layer": {k: len(v) for k, v in all_samples.items()},
|
|
50
|
+
"common_samples": len(common),
|
|
51
|
+
"features_per_layer": {k: df.shape[1] - 1 for k, df in omics_dict.items()},
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
# 共通サンプルでフィルタ
|
|
55
|
+
aligned = {}
|
|
56
|
+
for name, df in omics_dict.items():
|
|
57
|
+
aligned[name] = df[df[sample_col].isin(common)].sort_values(sample_col).reset_index(drop=True)
|
|
58
|
+
|
|
59
|
+
print(f"=== Multi-Omics Alignment ===")
|
|
60
|
+
print(f"Common samples: {len(common)} / {max(report['samples_per_layer'].values())}")
|
|
61
|
+
for k, v in report["features_per_layer"].items():
|
|
62
|
+
print(f" {k}: {v} features")
|
|
63
|
+
|
|
64
|
+
return aligned, report
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## 2. オミクス間相関解析
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from scipy.stats import spearmanr
|
|
71
|
+
|
|
72
|
+
def cross_omics_correlation(omics1_df, omics2_df, name1="Omics1", name2="Omics2",
|
|
73
|
+
top_n=50, method="spearman"):
|
|
74
|
+
"""
|
|
75
|
+
2 つのオミクスレイヤー間の特徴量ペアワイズ相関を計算する。
|
|
76
|
+
|
|
77
|
+
Parameters:
|
|
78
|
+
omics1_df: DataFrame (samples × features), no sample_col
|
|
79
|
+
omics2_df: DataFrame (samples × features), no sample_col
|
|
80
|
+
top_n: 上位の強い相関ペアを返す数
|
|
81
|
+
"""
|
|
82
|
+
features1 = omics1_df.columns.tolist()
|
|
83
|
+
features2 = omics2_df.columns.tolist()
|
|
84
|
+
|
|
85
|
+
correlations = []
|
|
86
|
+
for f1 in features1:
|
|
87
|
+
for f2 in features2:
|
|
88
|
+
if method == "spearman":
|
|
89
|
+
r, p = spearmanr(omics1_df[f1], omics2_df[f2])
|
|
90
|
+
else:
|
|
91
|
+
from scipy.stats import pearsonr
|
|
92
|
+
r, p = pearsonr(omics1_df[f1], omics2_df[f2])
|
|
93
|
+
correlations.append({
|
|
94
|
+
f"{name1}_feature": f1,
|
|
95
|
+
f"{name2}_feature": f2,
|
|
96
|
+
"correlation": r,
|
|
97
|
+
"p_value": p,
|
|
98
|
+
"abs_correlation": abs(r),
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
corr_df = pd.DataFrame(correlations).sort_values("abs_correlation", ascending=False)
|
|
102
|
+
|
|
103
|
+
return corr_df.head(top_n)
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## 3. CCA (Canonical Correlation Analysis)
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from sklearn.cross_decomposition import CCA
|
|
110
|
+
|
|
111
|
+
def canonical_correlation_analysis(X1, X2, n_components=2):
|
|
112
|
+
"""
|
|
113
|
+
正準相関分析: 2 つのオミクスデータ間の最大相関方向を見つける。
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
cca_model, scores_X1, scores_X2, canonical_correlations
|
|
117
|
+
"""
|
|
118
|
+
cca = CCA(n_components=n_components)
|
|
119
|
+
scores_X1, scores_X2 = cca.fit_transform(X1, X2)
|
|
120
|
+
|
|
121
|
+
# 正準相関係数
|
|
122
|
+
canonical_corrs = []
|
|
123
|
+
for i in range(n_components):
|
|
124
|
+
r = np.corrcoef(scores_X1[:, i], scores_X2[:, i])[0, 1]
|
|
125
|
+
canonical_corrs.append(r)
|
|
126
|
+
|
|
127
|
+
return cca, scores_X1, scores_X2, canonical_corrs
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def plot_cca_scores(scores_X1, scores_X2, labels, name1="Omics1",
|
|
131
|
+
name2="Omics2", figsize=(12, 5)):
|
|
132
|
+
"""CCA スコアプロットを描画する。"""
|
|
133
|
+
import matplotlib.pyplot as plt
|
|
134
|
+
|
|
135
|
+
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize)
|
|
136
|
+
|
|
137
|
+
unique_labels = np.unique(labels)
|
|
138
|
+
colors = plt.cm.Set1(np.linspace(0, 0.5, len(unique_labels)))
|
|
139
|
+
|
|
140
|
+
for color, label in zip(colors, unique_labels):
|
|
141
|
+
mask = labels == label
|
|
142
|
+
ax1.scatter(scores_X1[mask, 0], scores_X1[mask, 1],
|
|
143
|
+
c=[color], label=label, alpha=0.7, edgecolors="black")
|
|
144
|
+
ax2.scatter(scores_X2[mask, 0], scores_X2[mask, 1],
|
|
145
|
+
c=[color], label=label, alpha=0.7, edgecolors="black")
|
|
146
|
+
|
|
147
|
+
ax1.set_title(f"CCA — {name1}", fontweight="bold")
|
|
148
|
+
ax1.set_xlabel("CC1"); ax1.set_ylabel("CC2")
|
|
149
|
+
ax2.set_title(f"CCA — {name2}", fontweight="bold")
|
|
150
|
+
ax2.set_xlabel("CC1"); ax2.set_ylabel("CC2")
|
|
151
|
+
ax1.legend(); ax2.legend()
|
|
152
|
+
plt.tight_layout()
|
|
153
|
+
plt.savefig("figures/cca_scores.png", dpi=300, bbox_inches="tight")
|
|
154
|
+
plt.close()
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## 4. SNF (Similarity Network Fusion)
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
def similarity_network_fusion(omics_list, k_neighbors=20, n_iterations=20):
|
|
161
|
+
"""
|
|
162
|
+
Similarity Network Fusion — 複数オミクスの類似度ネットワークを融合する。
|
|
163
|
+
|
|
164
|
+
Wang et al., Nature Methods 2014
|
|
165
|
+
|
|
166
|
+
Parameters:
|
|
167
|
+
omics_list: list of np.arrays [(n_samples, p1), (n_samples, p2), ...]
|
|
168
|
+
k_neighbors: KNN のK
|
|
169
|
+
n_iterations: 融合反復回数
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
fused_similarity: (n_samples, n_samples) 融合類似度行列
|
|
173
|
+
"""
|
|
174
|
+
from sklearn.metrics import pairwise_distances
|
|
175
|
+
|
|
176
|
+
n = omics_list[0].shape[0]
|
|
177
|
+
n_views = len(omics_list)
|
|
178
|
+
|
|
179
|
+
# 各ビューの類似度行列
|
|
180
|
+
similarities = []
|
|
181
|
+
for X in omics_list:
|
|
182
|
+
D = pairwise_distances(X, metric="euclidean")
|
|
183
|
+
mu = np.mean(np.sort(D, axis=1)[:, 1:k_neighbors+1], axis=1)
|
|
184
|
+
S = np.exp(-D**2 / (mu[:, None] * mu[None, :] + 1e-10))
|
|
185
|
+
np.fill_diagonal(S, 0)
|
|
186
|
+
# 正規化
|
|
187
|
+
S = S / (S.sum(axis=1, keepdims=True) + 1e-10)
|
|
188
|
+
similarities.append(S)
|
|
189
|
+
|
|
190
|
+
# KNN マスク
|
|
191
|
+
knn_masks = []
|
|
192
|
+
for X in omics_list:
|
|
193
|
+
D = pairwise_distances(X, metric="euclidean")
|
|
194
|
+
mask = np.zeros_like(D, dtype=bool)
|
|
195
|
+
for i in range(n):
|
|
196
|
+
nn = np.argsort(D[i])[:k_neighbors+1]
|
|
197
|
+
mask[i, nn] = True
|
|
198
|
+
knn_masks.append(mask)
|
|
199
|
+
|
|
200
|
+
# 反復融合
|
|
201
|
+
P = [s.copy() for s in similarities]
|
|
202
|
+
for _ in range(n_iterations):
|
|
203
|
+
P_new = []
|
|
204
|
+
for v in range(n_views):
|
|
205
|
+
# 他のビューの平均
|
|
206
|
+
other_avg = np.mean([P[j] for j in range(n_views) if j != v], axis=0)
|
|
207
|
+
# 局所構造の保持
|
|
208
|
+
S_local = similarities[v] * knn_masks[v]
|
|
209
|
+
S_local = S_local / (S_local.sum(axis=1, keepdims=True) + 1e-10)
|
|
210
|
+
P_updated = S_local @ other_avg @ S_local.T
|
|
211
|
+
P_updated = P_updated / (P_updated.sum(axis=1, keepdims=True) + 1e-10)
|
|
212
|
+
P_new.append(P_updated)
|
|
213
|
+
P = P_new
|
|
214
|
+
|
|
215
|
+
fused = np.mean(P, axis=0)
|
|
216
|
+
fused = (fused + fused.T) / 2
|
|
217
|
+
|
|
218
|
+
return fused
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## 5. パスウェイレベル統合
|
|
222
|
+
|
|
223
|
+
```python
|
|
224
|
+
def pathway_level_integration(omics_dict, pathway_mapping, pathway_col="Pathway",
|
|
225
|
+
feature_col="Feature"):
|
|
226
|
+
"""
|
|
227
|
+
パスウェイレベルでオミクスデータを統合する。
|
|
228
|
+
各パスウェイの活性スコアを PCA 第一主成分で算出。
|
|
229
|
+
|
|
230
|
+
Parameters:
|
|
231
|
+
omics_dict: {"transcriptomics": df, "proteomics": df}
|
|
232
|
+
pathway_mapping: DataFrame (Feature, Pathway, Omics_Layer)
|
|
233
|
+
"""
|
|
234
|
+
from sklearn.decomposition import PCA
|
|
235
|
+
|
|
236
|
+
pathway_scores = {}
|
|
237
|
+
pathways = pathway_mapping[pathway_col].unique()
|
|
238
|
+
|
|
239
|
+
for pw in pathways:
|
|
240
|
+
pw_features = pathway_mapping[pathway_mapping[pathway_col] == pw]
|
|
241
|
+
combined_data = []
|
|
242
|
+
|
|
243
|
+
for omics_name, df in omics_dict.items():
|
|
244
|
+
features_in_omics = pw_features[pw_features["Omics_Layer"] == omics_name][feature_col]
|
|
245
|
+
available = [f for f in features_in_omics if f in df.columns]
|
|
246
|
+
if available:
|
|
247
|
+
combined_data.append(df[available].values)
|
|
248
|
+
|
|
249
|
+
if combined_data:
|
|
250
|
+
X = np.hstack(combined_data)
|
|
251
|
+
if X.shape[1] >= 2:
|
|
252
|
+
pca = PCA(n_components=1)
|
|
253
|
+
score = pca.fit_transform(X).ravel()
|
|
254
|
+
pathway_scores[pw] = {
|
|
255
|
+
"score": score,
|
|
256
|
+
"explained_variance": pca.explained_variance_ratio_[0],
|
|
257
|
+
"n_features": X.shape[1],
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return pathway_scores
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
## 6. マルチオミクスクラスタリング
|
|
264
|
+
|
|
265
|
+
```python
|
|
266
|
+
from sklearn.cluster import SpectralClustering
|
|
267
|
+
|
|
268
|
+
def multiomics_clustering(fused_similarity, n_clusters, labels_true=None):
|
|
269
|
+
"""融合類似度行列に基づくスペクトラルクラスタリング。"""
|
|
270
|
+
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score
|
|
271
|
+
|
|
272
|
+
clustering = SpectralClustering(n_clusters=n_clusters, affinity="precomputed",
|
|
273
|
+
random_state=42)
|
|
274
|
+
cluster_labels = clustering.fit_predict(fused_similarity)
|
|
275
|
+
|
|
276
|
+
metrics = {"n_clusters": n_clusters}
|
|
277
|
+
if labels_true is not None:
|
|
278
|
+
metrics["ARI"] = adjusted_rand_score(labels_true, cluster_labels)
|
|
279
|
+
metrics["NMI"] = normalized_mutual_info_score(labels_true, cluster_labels)
|
|
280
|
+
|
|
281
|
+
return cluster_labels, metrics
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
## References
|
|
285
|
+
|
|
286
|
+
### Output Files
|
|
287
|
+
|
|
288
|
+
| ファイル | 形式 |
|
|
289
|
+
|---|---|
|
|
290
|
+
| `results/cross_omics_correlation.csv` | CSV |
|
|
291
|
+
| `results/canonical_correlations.csv` | CSV |
|
|
292
|
+
| `results/pathway_activity_scores.csv` | CSV |
|
|
293
|
+
| `results/multiomics_clusters.csv` | CSV |
|
|
294
|
+
| `figures/cca_scores.png` | PNG |
|
|
295
|
+
| `figures/snf_heatmap.png` | PNG |
|
|
296
|
+
| `figures/multiomics_umap.png` | PNG |
|
|
297
|
+
|
|
298
|
+
#### 依存パッケージ
|
|
299
|
+
|
|
300
|
+
```
|
|
301
|
+
scikit-learn>=1.3
|
|
302
|
+
scipy>=1.10
|
|
303
|
+
```
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-network-analysis
|
|
3
|
+
description: |
|
|
4
|
+
ネットワーク解析・相関ネットワーク構築のスキル。NetworkX を用いたグラフ構築、
|
|
5
|
+
中心性解析、コミュニティ検出、ネットワーク可視化を行う際に使用。
|
|
6
|
+
Scientific Skills Exp-04, 07 で確立したパターン。PSP パスダイアグラムにも適用。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Network Analysis
|
|
10
|
+
|
|
11
|
+
NetworkX を用いたネットワーク解析パイプラインスキル。PPI ネットワーク、
|
|
12
|
+
相関ネットワーク、PSP パスダイアグラムなどの構築・解析・可視化を提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- タンパク質相互作用ネットワークを構築・解析したいとき
|
|
17
|
+
- 相関行列からネットワークを構築したいとき
|
|
18
|
+
- ノードの重要性(ハブ、ボトルネック)を評価したいとき
|
|
19
|
+
- コミュニティ(モジュール)を検出したいとき
|
|
20
|
+
- PSP パスダイアグラム(因果連鎖)を可視化したいとき
|
|
21
|
+
|
|
22
|
+
## Quick Start
|
|
23
|
+
|
|
24
|
+
## 標準パイプライン
|
|
25
|
+
|
|
26
|
+
### 1. ネットワーク構築
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
import networkx as nx
|
|
30
|
+
import pandas as pd
|
|
31
|
+
import numpy as np
|
|
32
|
+
import matplotlib.pyplot as plt
|
|
33
|
+
|
|
34
|
+
def build_network_from_edgelist(edges_df, source_col, target_col,
|
|
35
|
+
weight_col=None, directed=False):
|
|
36
|
+
"""エッジリスト DataFrame からネットワークを構築する。"""
|
|
37
|
+
if directed:
|
|
38
|
+
G = nx.DiGraph()
|
|
39
|
+
else:
|
|
40
|
+
G = nx.Graph()
|
|
41
|
+
|
|
42
|
+
for _, row in edges_df.iterrows():
|
|
43
|
+
kwargs = {}
|
|
44
|
+
if weight_col and weight_col in row:
|
|
45
|
+
kwargs["weight"] = row[weight_col]
|
|
46
|
+
G.add_edge(row[source_col], row[target_col], **kwargs)
|
|
47
|
+
|
|
48
|
+
return G
|
|
49
|
+
|
|
50
|
+
def build_correlation_network(corr_matrix, threshold=0.5, absolute=True):
|
|
51
|
+
"""
|
|
52
|
+
相関行列から閾値以上のエッジを持つネットワークを構築する。
|
|
53
|
+
メタボロミクスの相関ネットワーク等に使用(Exp-07)。
|
|
54
|
+
"""
|
|
55
|
+
G = nx.Graph()
|
|
56
|
+
variables = corr_matrix.columns
|
|
57
|
+
|
|
58
|
+
for i, var1 in enumerate(variables):
|
|
59
|
+
for j, var2 in enumerate(variables):
|
|
60
|
+
if i < j:
|
|
61
|
+
r = corr_matrix.iloc[i, j]
|
|
62
|
+
if absolute:
|
|
63
|
+
if abs(r) >= threshold:
|
|
64
|
+
G.add_edge(var1, var2, weight=abs(r),
|
|
65
|
+
correlation=r, sign="+" if r > 0 else "-")
|
|
66
|
+
else:
|
|
67
|
+
if r >= threshold:
|
|
68
|
+
G.add_edge(var1, var2, weight=r)
|
|
69
|
+
|
|
70
|
+
return G
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### 2. 中心性解析
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
def comprehensive_centrality(G):
|
|
77
|
+
"""4 種の中心性指標を一括計算する。"""
|
|
78
|
+
centrality = pd.DataFrame(index=list(G.nodes()))
|
|
79
|
+
centrality["Degree"] = pd.Series(dict(G.degree()))
|
|
80
|
+
centrality["Betweenness"] = pd.Series(nx.betweenness_centrality(G))
|
|
81
|
+
centrality["Closeness"] = pd.Series(nx.closeness_centrality(G))
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
centrality["Eigenvector"] = pd.Series(
|
|
85
|
+
nx.eigenvector_centrality(G, max_iter=1000)
|
|
86
|
+
)
|
|
87
|
+
except nx.PowerIterationFailedConvergence:
|
|
88
|
+
centrality["Eigenvector"] = np.nan
|
|
89
|
+
|
|
90
|
+
centrality = centrality.sort_values("Degree", ascending=False)
|
|
91
|
+
centrality.to_csv("results/centrality_measures.csv")
|
|
92
|
+
return centrality
|
|
93
|
+
|
|
94
|
+
def identify_hubs(centrality_df, top_n=10):
|
|
95
|
+
"""ハブノード(高次数+高媒介中心性)を同定する。"""
|
|
96
|
+
# Degree と Betweenness の両方で上位のノード
|
|
97
|
+
degree_top = set(centrality_df.nlargest(top_n, "Degree").index)
|
|
98
|
+
between_top = set(centrality_df.nlargest(top_n, "Betweenness").index)
|
|
99
|
+
hubs = degree_top & between_top
|
|
100
|
+
return list(hubs), centrality_df.loc[list(hubs)]
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### 3. コミュニティ検出
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
def detect_communities(G, method="louvain"):
|
|
107
|
+
"""
|
|
108
|
+
コミュニティ検出を実行する。
|
|
109
|
+
method: 'louvain', 'greedy', 'label_propagation'
|
|
110
|
+
"""
|
|
111
|
+
if method == "louvain":
|
|
112
|
+
try:
|
|
113
|
+
import community as community_louvain
|
|
114
|
+
partition = community_louvain.best_partition(G, random_state=42)
|
|
115
|
+
except ImportError:
|
|
116
|
+
# フォールバック
|
|
117
|
+
from networkx.algorithms.community import greedy_modularity_communities
|
|
118
|
+
communities = list(greedy_modularity_communities(G))
|
|
119
|
+
partition = {node: i for i, comm in enumerate(communities)
|
|
120
|
+
for node in comm}
|
|
121
|
+
elif method == "greedy":
|
|
122
|
+
from networkx.algorithms.community import greedy_modularity_communities
|
|
123
|
+
communities = list(greedy_modularity_communities(G))
|
|
124
|
+
partition = {node: i for i, comm in enumerate(communities)
|
|
125
|
+
for node in comm}
|
|
126
|
+
elif method == "label_propagation":
|
|
127
|
+
from networkx.algorithms.community import label_propagation_communities
|
|
128
|
+
communities = list(label_propagation_communities(G))
|
|
129
|
+
partition = {node: i for i, comm in enumerate(communities)
|
|
130
|
+
for node in comm}
|
|
131
|
+
|
|
132
|
+
nx.set_node_attributes(G, partition, "community")
|
|
133
|
+
modularity = nx.community.modularity(
|
|
134
|
+
G, [{n for n, c in partition.items() if c == i}
|
|
135
|
+
for i in set(partition.values())]
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
return partition, modularity
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### 4. ネットワーク可視化
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
def visualize_network(G, partition=None, node_size_attr="Degree",
|
|
145
|
+
title="Network", figsize=(12, 12)):
|
|
146
|
+
"""ネットワークを spring layout で可視化する。"""
|
|
147
|
+
fig, ax = plt.subplots(figsize=figsize)
|
|
148
|
+
pos = nx.spring_layout(G, seed=42, k=2/np.sqrt(len(G.nodes())))
|
|
149
|
+
|
|
150
|
+
# ノードサイズ
|
|
151
|
+
if node_size_attr == "Degree":
|
|
152
|
+
sizes = np.array([G.degree(n) for n in G.nodes()])
|
|
153
|
+
else:
|
|
154
|
+
sizes = np.array([G.nodes[n].get(node_size_attr, 1) for n in G.nodes()])
|
|
155
|
+
sizes = 100 + sizes / sizes.max() * 500
|
|
156
|
+
|
|
157
|
+
# ノード色(コミュニティ)
|
|
158
|
+
if partition:
|
|
159
|
+
colors = [partition.get(n, 0) for n in G.nodes()]
|
|
160
|
+
cmap = plt.cm.Set2
|
|
161
|
+
else:
|
|
162
|
+
colors = "steelblue"
|
|
163
|
+
cmap = None
|
|
164
|
+
|
|
165
|
+
nx.draw_networkx_edges(G, pos, alpha=0.2, ax=ax)
|
|
166
|
+
nodes = nx.draw_networkx_nodes(G, pos, node_size=sizes,
|
|
167
|
+
node_color=colors, cmap=cmap,
|
|
168
|
+
alpha=0.8, ax=ax)
|
|
169
|
+
nx.draw_networkx_labels(G, pos, font_size=7, ax=ax)
|
|
170
|
+
|
|
171
|
+
ax.set_title(title, fontsize=14, fontweight="bold")
|
|
172
|
+
ax.axis("off")
|
|
173
|
+
plt.tight_layout()
|
|
174
|
+
plt.savefig("figures/network_visualization.png", dpi=300, bbox_inches="tight")
|
|
175
|
+
plt.close()
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### 5. PSP パスダイアグラム(Exp-13 独自)
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
def psp_path_diagram(ps_corr, sp_corr, pp_corr,
|
|
182
|
+
threshold=0.3, figsize=(14, 10)):
|
|
183
|
+
"""
|
|
184
|
+
Process → Structure → Property のパスダイアグラムを描画する。
|
|
185
|
+
矢印の色: 赤=正の相関、青=負の相関、太さ=|r| に比例。
|
|
186
|
+
"""
|
|
187
|
+
fig, ax = plt.subplots(figsize=figsize)
|
|
188
|
+
|
|
189
|
+
# ノード配置(3 列)
|
|
190
|
+
process_vars = list(ps_corr.index)
|
|
191
|
+
structure_vars = list(ps_corr.columns)
|
|
192
|
+
property_vars = list(sp_corr.columns)
|
|
193
|
+
|
|
194
|
+
def place_nodes(names, x, color):
|
|
195
|
+
positions = {}
|
|
196
|
+
n = len(names)
|
|
197
|
+
for i, name in enumerate(names):
|
|
198
|
+
y = (n - 1 - i) / max(n - 1, 1)
|
|
199
|
+
positions[name] = (x, y)
|
|
200
|
+
ax.scatter(x, y, s=300, c=color, zorder=5, edgecolors="black")
|
|
201
|
+
ax.text(x, y, name, ha="center", va="center", fontsize=7,
|
|
202
|
+
fontweight="bold", zorder=6)
|
|
203
|
+
return positions
|
|
204
|
+
|
|
205
|
+
pos_p = place_nodes(process_vars, 0, "#FFB3BA")
|
|
206
|
+
pos_s = place_nodes(structure_vars, 1, "#BAE1FF")
|
|
207
|
+
pos_pr = place_nodes(property_vars, 2, "#BAFFC9")
|
|
208
|
+
|
|
209
|
+
# エッジ描画
|
|
210
|
+
def draw_edges(corr_df, pos_from, pos_to):
|
|
211
|
+
for var1 in corr_df.index:
|
|
212
|
+
for var2 in corr_df.columns:
|
|
213
|
+
r = corr_df.loc[var1, var2]
|
|
214
|
+
if abs(r) >= threshold:
|
|
215
|
+
color = "red" if r > 0 else "blue"
|
|
216
|
+
width = abs(r) * 3
|
|
217
|
+
ax.annotate("", xy=pos_to[var2], xytext=pos_from[var1],
|
|
218
|
+
arrowprops=dict(arrowstyle="->", color=color,
|
|
219
|
+
lw=width, alpha=0.6))
|
|
220
|
+
|
|
221
|
+
draw_edges(ps_corr, pos_p, pos_s)
|
|
222
|
+
draw_edges(sp_corr, pos_s, pos_pr)
|
|
223
|
+
|
|
224
|
+
# ラベル
|
|
225
|
+
ax.text(0, -0.1, "Process", ha="center", fontsize=12,
|
|
226
|
+
fontweight="bold", color="#FF6B6B")
|
|
227
|
+
ax.text(1, -0.1, "Structure", ha="center", fontsize=12,
|
|
228
|
+
fontweight="bold", color="#4ECDC4")
|
|
229
|
+
ax.text(2, -0.1, "Property", ha="center", fontsize=12,
|
|
230
|
+
fontweight="bold", color="#45B7D1")
|
|
231
|
+
|
|
232
|
+
ax.set_xlim(-0.3, 2.3)
|
|
233
|
+
ax.set_ylim(-0.2, 1.1)
|
|
234
|
+
ax.axis("off")
|
|
235
|
+
ax.set_title("PSP Linkage Path Diagram", fontweight="bold", fontsize=14)
|
|
236
|
+
plt.tight_layout()
|
|
237
|
+
plt.savefig("figures/psp_path_diagram.png", dpi=300, bbox_inches="tight")
|
|
238
|
+
plt.close()
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## References
|
|
242
|
+
|
|
243
|
+
### Output Files
|
|
244
|
+
|
|
245
|
+
| ファイル | 形式 |
|
|
246
|
+
|---|---|
|
|
247
|
+
| `results/centrality_measures.csv` | CSV |
|
|
248
|
+
| `results/edge_list.csv` | CSV |
|
|
249
|
+
| `results/node_attributes.csv` | CSV |
|
|
250
|
+
| `figures/network_visualization.png` | PNG |
|
|
251
|
+
| `figures/psp_path_diagram.png` | PNG |
|
|
252
|
+
|
|
253
|
+
#### 参照実験
|
|
254
|
+
|
|
255
|
+
- **Exp-04**: PPI ネットワーク(71 タンパク質、4 種中心性、Louvain コミュニティ)
|
|
256
|
+
- **Exp-07**: Spearman 相関ネットワーク(メタボロミクス)
|
|
257
|
+
- **Exp-13**: PSP パスダイアグラム(Process→Structure→Property 因果連鎖)
|