@nahisaho/satori 0.23.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,238 @@
1
+ ---
2
+ name: scientific-multi-task-learning
3
+ description: |
4
+ マルチタスク学習スキル。Hard/Soft Parameter Sharing・
5
+ GradNorm 勾配正規化・PCGrad 勾配投影・
6
+ タスクバランシング・補助タスク設計。
7
+ ---
8
+
9
+ # Scientific Multi-Task Learning
10
+
11
+ 複数の関連タスクを同時に学習し、共有表現を活用して
12
+ 各タスクの汎化性能を向上させるパイプラインを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - 複数の関連予測タスクを同時に実行するとき
17
+ - 共有表現を学習してデータ効率を高めたいとき
18
+ - 主タスク + 補助タスクの構成で学習するとき
19
+ - タスク間の勾配干渉を解消するとき
20
+ - マルチ出力回帰・分類を設計するとき
21
+
22
+ ---
23
+
24
+ ## Quick Start
25
+
26
+ ## 1. Hard Parameter Sharing MTL
27
+
28
+ ```python
29
+ import torch
30
+ import torch.nn as nn
31
+ from typing import Dict, List, Tuple
32
+
33
+
34
+ class HardSharingMTL(nn.Module):
35
+ """
36
+ Hard Parameter Sharing マルチタスクモデル。
37
+
38
+ 共有エンコーダ + タスク別ヘッドの構成。
39
+ """
40
+
41
+ def __init__(self, input_dim, shared_dims, task_configs):
42
+ """
43
+ Parameters:
44
+ input_dim: int — 入力次元
45
+ shared_dims: list[int] — 共有層のユニット数
46
+ task_configs: dict — {task_name: {"output_dim": int, "head_dims": [int]}}
47
+ """
48
+ super().__init__()
49
+
50
+ # 共有エンコーダ
51
+ layers = []
52
+ in_d = input_dim
53
+ for d in shared_dims:
54
+ layers.extend([nn.Linear(in_d, d), nn.ReLU(),
55
+ nn.BatchNorm1d(d), nn.Dropout(0.2)])
56
+ in_d = d
57
+ self.shared_encoder = nn.Sequential(*layers)
58
+
59
+ # タスク別ヘッド
60
+ self.task_heads = nn.ModuleDict()
61
+ for name, config in task_configs.items():
62
+ head_layers = []
63
+ h_in = in_d
64
+ for h_d in config.get("head_dims", [64]):
65
+ head_layers.extend([nn.Linear(h_in, h_d), nn.ReLU()])
66
+ h_in = h_d
67
+ head_layers.append(nn.Linear(h_in, config["output_dim"]))
68
+ self.task_heads[name] = nn.Sequential(*head_layers)
69
+
70
+ def forward(self, x):
71
+ shared = self.shared_encoder(x)
72
+ return {name: head(shared) for name, head in self.task_heads.items()}
73
+
74
+
75
+ def train_mtl_model(model, train_loader, task_losses,
76
+ task_weights=None, epochs=50,
77
+ lr=1e-3, device="cpu"):
78
+ """
79
+ MTL モデルの学習。
80
+
81
+ Parameters:
82
+ model: HardSharingMTL — MTL モデル
83
+ train_loader: DataLoader — {task_name: (X, y)} バッチ
84
+ task_losses: dict — {task_name: loss_fn}
85
+ task_weights: dict | None — {task_name: float} タスク重み
86
+ epochs: int — 学習エポック数
87
+ lr: float — 学習率
88
+ device: str — デバイス
89
+ """
90
+ import pandas as pd
91
+
92
+ if task_weights is None:
93
+ task_weights = {name: 1.0 for name in task_losses}
94
+
95
+ model.to(device)
96
+ optimizer = torch.optim.Adam(model.parameters(), lr=lr)
97
+ history = []
98
+
99
+ for epoch in range(epochs):
100
+ model.train()
101
+ epoch_losses = {name: 0.0 for name in task_losses}
102
+
103
+ for batch in train_loader:
104
+ X = batch["X"].to(device)
105
+ outputs = model(X)
106
+ optimizer.zero_grad()
107
+
108
+ total_loss = 0
109
+ for name, loss_fn in task_losses.items():
110
+ y = batch[name].to(device)
111
+ task_loss = loss_fn(outputs[name], y)
112
+ total_loss += task_weights[name] * task_loss
113
+ epoch_losses[name] += task_loss.item()
114
+
115
+ total_loss.backward()
116
+ optimizer.step()
117
+
118
+ record = {"epoch": epoch + 1}
119
+ for name in task_losses:
120
+ record[f"loss_{name}"] = epoch_losses[name] / len(train_loader)
121
+ history.append(record)
122
+
123
+ if (epoch + 1) % 10 == 0:
124
+ losses_str = " | ".join(
125
+ f"{n}={epoch_losses[n]/len(train_loader):.4f}"
126
+ for n in task_losses)
127
+ print(f"Epoch {epoch+1}: {losses_str}")
128
+
129
+ return pd.DataFrame(history)
130
+ ```
131
+
132
+ ## 2. GradNorm — 動的タスクバランシング
133
+
134
+ ```python
135
+ def gradnorm_balance(model, task_losses, train_loader,
136
+ alpha=1.5, epochs=50, lr=1e-3, device="cpu"):
137
+ """
138
+ GradNorm による動的タスク重みバランシング。
139
+
140
+ Parameters:
141
+ model: HardSharingMTL — MTL モデル
142
+ task_losses: dict — {task_name: loss_fn}
143
+ train_loader: DataLoader
144
+ alpha: float — GradNorm 非対称度パラメータ
145
+ epochs: int — 学習エポック
146
+ lr: float — 学習率
147
+ device: str — デバイス
148
+ """
149
+ import pandas as pd
150
+
151
+ task_names = list(task_losses.keys())
152
+ n_tasks = len(task_names)
153
+ log_weights = torch.zeros(n_tasks, requires_grad=True, device=device)
154
+
155
+ model.to(device)
156
+ optimizer = torch.optim.Adam(model.parameters(), lr=lr)
157
+ weight_optimizer = torch.optim.Adam([log_weights], lr=0.025)
158
+
159
+ initial_losses = None
160
+ history = []
161
+
162
+ for epoch in range(epochs):
163
+ model.train()
164
+ epoch_losses = {n: 0.0 for n in task_names}
165
+
166
+ for batch in train_loader:
167
+ X = batch["X"].to(device)
168
+ outputs = model(X)
169
+
170
+ weights = torch.softmax(log_weights, dim=0) * n_tasks
171
+ losses = []
172
+ for i, name in enumerate(task_names):
173
+ y = batch[name].to(device)
174
+ task_loss = task_losses[name](outputs[name], y)
175
+ losses.append(task_loss)
176
+ epoch_losses[name] += task_loss.item()
177
+
178
+ if initial_losses is None:
179
+ initial_losses = [l.item() for l in losses]
180
+
181
+ total_loss = sum(w * l for w, l in zip(weights, losses))
182
+
183
+ optimizer.zero_grad()
184
+ weight_optimizer.zero_grad()
185
+ total_loss.backward(retain_graph=True)
186
+
187
+ # GradNorm 更新
188
+ shared_params = list(model.shared_encoder.parameters())
189
+ norms = []
190
+ for l in losses:
191
+ g = torch.autograd.grad(l, shared_params[-1],
192
+ retain_graph=True)[0]
193
+ norms.append(torch.norm(g))
194
+
195
+ avg_norm = torch.stack(norms).mean()
196
+ loss_ratios = torch.tensor(
197
+ [l.item() / il for l, il in
198
+ zip(losses, initial_losses)], device=device)
199
+ relative_inv = loss_ratios / loss_ratios.mean()
200
+ target_norms = avg_norm * (relative_inv ** alpha)
201
+
202
+ gradnorm_loss = sum(
203
+ torch.abs(n - t) for n, t in
204
+ zip(norms, target_norms))
205
+ gradnorm_loss.backward()
206
+
207
+ optimizer.step()
208
+ weight_optimizer.step()
209
+
210
+ record = {"epoch": epoch + 1}
211
+ for i, name in enumerate(task_names):
212
+ record[f"loss_{name}"] = epoch_losses[name] / len(train_loader)
213
+ record[f"weight_{name}"] = (
214
+ torch.softmax(log_weights, 0) * n_tasks)[i].item()
215
+ history.append(record)
216
+
217
+ return pd.DataFrame(history)
218
+ ```
219
+
220
+ ---
221
+
222
+ ## パイプライン統合
223
+
224
+ ```
225
+ [複数タスク定義] → multi-task-learning → feature-importance
226
+ (共有表現学習) (特徴量解釈)
227
+
228
+ deep-learning ← transfer-learning
229
+ (基盤 NN) (転移学習)
230
+ ```
231
+
232
+ ## パイプライン出力
233
+
234
+ | ファイル | 説明 | 次スキル |
235
+ |---------|------|---------|
236
+ | `mtl_model.pt` | MTL モデル | → 推論 |
237
+ | `mtl_history.csv` | タスク別学習履歴 | → 可視化 |
238
+ | `gradnorm_weights.csv` | 動的タスク重み推移 | → バランシング分析 |
@@ -0,0 +1,278 @@
1
+ ---
2
+ name: scientific-network-visualization
3
+ description: |
4
+ ネットワーク解析・可視化スキル。NetworkX グラフ構築・
5
+ コミュニティ検出 (Louvain/Leiden)・中心性指標・
6
+ PyVis インタラクティブ・ネットワーク統計量・動的ネットワーク。
7
+ ---
8
+
9
+ # Scientific Network Visualization
10
+
11
+ ネットワーク/グラフデータの解析・コミュニティ検出・
12
+ インタラクティブ可視化パイプラインを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - 関連性・相互作用のネットワーク構造を解析するとき
17
+ - コミュニティ (クラスタ) を検出するとき
18
+ - 中心性指標でハブ・ブリッジノードを特定するとき
19
+ - PyVis でインタラクティブなネットワーク図を作成するとき
20
+ - 相関行列からネットワークを構築するとき
21
+ - 時間発展するネットワークを解析するとき
22
+
23
+ > **Note**: タンパク質 PPI ネットワークは `scientific-protein-interaction-network` を参照。
24
+
25
+ ---
26
+
27
+ ## Quick Start
28
+
29
+ ## 1. ネットワーク構築・基本統計
30
+
31
+ ```python
32
+ import numpy as np
33
+ import pandas as pd
34
+ import networkx as nx
35
+
36
+
37
+ def build_network_from_edgelist(edges_df, source_col, target_col,
38
+ weight_col=None, directed=False):
39
+ """
40
+ エッジリストからネットワーク構築 + 基本統計。
41
+
42
+ Parameters:
43
+ edges_df: pd.DataFrame — エッジリスト
44
+ source_col: str — ソースノードカラム
45
+ target_col: str — ターゲットノードカラム
46
+ weight_col: str | None — 重みカラム
47
+ directed: bool — 有向グラフ
48
+ """
49
+ if directed:
50
+ G = nx.DiGraph()
51
+ else:
52
+ G = nx.Graph()
53
+
54
+ for _, row in edges_df.iterrows():
55
+ kwargs = {}
56
+ if weight_col and pd.notna(row[weight_col]):
57
+ kwargs["weight"] = row[weight_col]
58
+ G.add_edge(row[source_col], row[target_col], **kwargs)
59
+
60
+ stats = {
61
+ "n_nodes": G.number_of_nodes(),
62
+ "n_edges": G.number_of_edges(),
63
+ "density": nx.density(G),
64
+ "is_connected": nx.is_connected(G) if not directed else nx.is_weakly_connected(G),
65
+ "n_components": nx.number_connected_components(G) if not directed
66
+ else nx.number_weakly_connected_components(G),
67
+ "avg_degree": np.mean([d for _, d in G.degree()]),
68
+ "avg_clustering": nx.average_clustering(G) if not directed else None,
69
+ }
70
+
71
+ if stats["is_connected"] and not directed:
72
+ stats["avg_path_length"] = nx.average_shortest_path_length(G)
73
+ stats["diameter"] = nx.diameter(G)
74
+
75
+ print(f"Network: {stats['n_nodes']} nodes, {stats['n_edges']} edges, "
76
+ f"density={stats['density']:.4f}")
77
+ return G, stats
78
+
79
+
80
+ def build_network_from_correlation(df, threshold=0.5,
81
+ method="pearson"):
82
+ """
83
+ 相関行列からネットワーク構築。
84
+
85
+ Parameters:
86
+ df: pd.DataFrame — 数値データ
87
+ threshold: float — 相関閾値 (|r| ≥ threshold でエッジ)
88
+ method: str — "pearson" / "spearman"
89
+ """
90
+ corr = df.corr(method=method)
91
+ G = nx.Graph()
92
+
93
+ for i, col_i in enumerate(corr.columns):
94
+ G.add_node(col_i)
95
+ for j, col_j in enumerate(corr.columns):
96
+ if i < j and abs(corr.iloc[i, j]) >= threshold:
97
+ G.add_edge(col_i, col_j,
98
+ weight=abs(corr.iloc[i, j]),
99
+ correlation=corr.iloc[i, j])
100
+
101
+ print(f"Correlation Network (|r|≥{threshold}): "
102
+ f"{G.number_of_nodes()} nodes, {G.number_of_edges()} edges")
103
+ return G
104
+ ```
105
+
106
+ ## 2. コミュニティ検出
107
+
108
+ ```python
109
+ def detect_communities(G, method="louvain", resolution=1.0):
110
+ """
111
+ コミュニティ検出。
112
+
113
+ Parameters:
114
+ G: nx.Graph — ネットワーク
115
+ method: str — "louvain" / "leiden" / "label_propagation" / "girvan_newman"
116
+ resolution: float — 解像度パラメータ (Louvain/Leiden)
117
+ """
118
+ import matplotlib.pyplot as plt
119
+
120
+ if method == "louvain":
121
+ communities = nx.community.louvain_communities(
122
+ G, resolution=resolution, seed=42)
123
+ elif method == "leiden":
124
+ try:
125
+ import leidenalg
126
+ import igraph as ig
127
+ ig_graph = ig.Graph.from_networkx(G)
128
+ partition = leidenalg.find_partition(
129
+ ig_graph, leidenalg.RBConfigurationVertexPartition,
130
+ resolution_parameter=resolution, seed=42)
131
+ communities = [set(ig_graph.vs[c]["_nx_name"] for c in comm)
132
+ for comm in partition]
133
+ except ImportError:
134
+ communities = nx.community.louvain_communities(
135
+ G, resolution=resolution, seed=42)
136
+ elif method == "label_propagation":
137
+ communities = list(nx.community.label_propagation_communities(G))
138
+ elif method == "girvan_newman":
139
+ comp = nx.community.girvan_newman(G)
140
+ communities = next(comp) # 最初の分割
141
+
142
+ # ノードにコミュニティ ID 割当
143
+ node_community = {}
144
+ for i, comm in enumerate(communities):
145
+ for node in comm:
146
+ node_community[node] = i
147
+ nx.set_node_attributes(G, node_community, "community")
148
+
149
+ # モジュラリティ
150
+ modularity = nx.community.modularity(G, communities)
151
+
152
+ # 可視化
153
+ fig, ax = plt.subplots(figsize=(12, 10))
154
+ pos = nx.spring_layout(G, k=1/np.sqrt(G.number_of_nodes()), seed=42)
155
+ colors = [node_community.get(n, 0) for n in G.nodes()]
156
+
157
+ nx.draw_networkx(G, pos, ax=ax, node_color=colors,
158
+ cmap=plt.cm.Set3, node_size=100,
159
+ font_size=6, edge_color="gray", alpha=0.7,
160
+ with_labels=G.number_of_nodes() < 100)
161
+ ax.set_title(f"Communities ({method}): {len(communities)} clusters, "
162
+ f"Q={modularity:.4f}")
163
+ plt.tight_layout()
164
+
165
+ path = "network_communities.png"
166
+ plt.savefig(path, dpi=150, bbox_inches="tight")
167
+ plt.close()
168
+
169
+ print(f"Communities ({method}): {len(communities)} clusters, "
170
+ f"modularity={modularity:.4f}")
171
+ return {"communities": communities, "modularity": modularity,
172
+ "node_community": node_community, "fig": path}
173
+ ```
174
+
175
+ ## 3. 中心性指標
176
+
177
+ ```python
178
+ def centrality_analysis(G, top_n=20):
179
+ """
180
+ 多面的中心性解析。
181
+
182
+ Parameters:
183
+ G: nx.Graph — ネットワーク
184
+ top_n: int — 上位ノード数
185
+ """
186
+ centralities = {
187
+ "degree": nx.degree_centrality(G),
188
+ "betweenness": nx.betweenness_centrality(G),
189
+ "closeness": nx.closeness_centrality(G),
190
+ "eigenvector": nx.eigenvector_centrality(G, max_iter=1000),
191
+ "pagerank": nx.pagerank(G)
192
+ }
193
+
194
+ # DataFrame 化
195
+ cent_df = pd.DataFrame(centralities)
196
+ cent_df.index.name = "node"
197
+
198
+ # ランキング
199
+ rankings = {}
200
+ for metric in centralities:
201
+ top = cent_df[metric].nlargest(top_n)
202
+ rankings[metric] = top.index.tolist()
203
+
204
+ # ハブスコア (複数指標の統合)
205
+ for metric in centralities:
206
+ cent_df[f"{metric}_rank"] = cent_df[metric].rank(ascending=False)
207
+ cent_df["hub_score"] = cent_df[[f"{m}_rank" for m in centralities]].mean(axis=1)
208
+ cent_df = cent_df.sort_values("hub_score")
209
+
210
+ print(f"Centrality: {len(G.nodes())} nodes analyzed")
211
+ print(f" Top hubs: {cent_df.head(5).index.tolist()}")
212
+ return {"centrality_df": cent_df, "rankings": rankings}
213
+ ```
214
+
215
+ ## 4. PyVis インタラクティブ可視化
216
+
217
+ ```python
218
+ def interactive_network(G, output="network_interactive.html",
219
+ height="700px", width="100%"):
220
+ """
221
+ PyVis インタラクティブネットワーク図。
222
+
223
+ Parameters:
224
+ G: nx.Graph — ネットワーク
225
+ output: str — 出力 HTML パス
226
+ height: str — 高さ
227
+ width: str — 幅
228
+ """
229
+ from pyvis.network import Network
230
+
231
+ nt = Network(height=height, width=width, notebook=False,
232
+ bgcolor="#ffffff", font_color="black")
233
+
234
+ # コミュニティカラーリング
235
+ community_map = nx.get_node_attributes(G, "community")
236
+ colors = ["#e41a1c", "#377eb8", "#4daf4a", "#984ea3",
237
+ "#ff7f00", "#ffff33", "#a65628", "#f781bf"]
238
+
239
+ for node in G.nodes():
240
+ comm = community_map.get(node, 0)
241
+ degree = G.degree(node)
242
+ nt.add_node(str(node), label=str(node),
243
+ color=colors[comm % len(colors)],
244
+ size=max(5, min(degree * 3, 50)),
245
+ title=f"{node}\nDegree: {degree}\nCommunity: {comm}")
246
+
247
+ for u, v, data in G.edges(data=True):
248
+ weight = data.get("weight", 1)
249
+ nt.add_edge(str(u), str(v), value=weight)
250
+
251
+ nt.toggle_physics(True)
252
+ nt.show_buttons(filter_=["physics"])
253
+ nt.save_graph(output)
254
+
255
+ print(f"Interactive Network → {output} "
256
+ f"({G.number_of_nodes()} nodes, {G.number_of_edges()} edges)")
257
+ return output
258
+ ```
259
+
260
+ ---
261
+
262
+ ## パイプライン統合
263
+
264
+ ```
265
+ eda-correlation → network-visualization → advanced-visualization
266
+ (相関解析) (ネットワーク解析) (高度可視化)
267
+ │ │ ↓
268
+ graph-neural-networks ───┘ interactive-dashboard
269
+ (GNN) (ダッシュボード)
270
+ ```
271
+
272
+ ## パイプライン出力
273
+
274
+ | ファイル | 説明 | 次スキル |
275
+ |---------|------|---------|
276
+ | `network_communities.png` | コミュニティ構造 | → presentation |
277
+ | `centrality_analysis.csv` | 中心性指標 | → feature-importance |
278
+ | `network_interactive.html` | PyVis 図 | → dashboard |