@nahisaho/satori 0.22.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -20
- package/package.json +1 -1
- package/src/.github/skills/scientific-active-learning/SKILL.md +289 -0
- package/src/.github/skills/scientific-advanced-visualization/SKILL.md +310 -0
- package/src/.github/skills/scientific-anomaly-detection/SKILL.md +296 -0
- package/src/.github/skills/scientific-automl/SKILL.md +264 -0
- package/src/.github/skills/scientific-causal-ml/SKILL.md +240 -0
- package/src/.github/skills/scientific-data-profiling/SKILL.md +247 -0
- package/src/.github/skills/scientific-ensemble-methods/SKILL.md +263 -0
- package/src/.github/skills/scientific-geospatial-analysis/SKILL.md +274 -0
- package/src/.github/skills/scientific-interactive-dashboard/SKILL.md +346 -0
- package/src/.github/skills/scientific-missing-data-analysis/SKILL.md +312 -0
- package/src/.github/skills/scientific-model-monitoring/SKILL.md +247 -0
- package/src/.github/skills/scientific-network-visualization/SKILL.md +278 -0
- package/src/.github/skills/scientific-reproducible-reporting/SKILL.md +330 -0
- package/src/.github/skills/scientific-time-series-forecasting/SKILL.md +246 -0
- package/src/.github/skills/scientific-transfer-learning/SKILL.md +298 -0
- package/src/.github/skills/scientific-uncertainty-quantification/SKILL.md +286 -0
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-network-visualization
|
|
3
|
+
description: |
|
|
4
|
+
ネットワーク解析・可視化スキル。NetworkX グラフ構築・
|
|
5
|
+
コミュニティ検出 (Louvain/Leiden)・中心性指標・
|
|
6
|
+
PyVis インタラクティブ・ネットワーク統計量・動的ネットワーク。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Network Visualization
|
|
10
|
+
|
|
11
|
+
ネットワーク/グラフデータの解析・コミュニティ検出・
|
|
12
|
+
インタラクティブ可視化パイプラインを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- 関連性・相互作用のネットワーク構造を解析するとき
|
|
17
|
+
- コミュニティ (クラスタ) を検出するとき
|
|
18
|
+
- 中心性指標でハブ・ブリッジノードを特定するとき
|
|
19
|
+
- PyVis でインタラクティブなネットワーク図を作成するとき
|
|
20
|
+
- 相関行列からネットワークを構築するとき
|
|
21
|
+
- 時間発展するネットワークを解析するとき
|
|
22
|
+
|
|
23
|
+
> **Note**: タンパク質 PPI ネットワークは `scientific-protein-interaction-network` を参照。
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
## 1. ネットワーク構築・基本統計
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import numpy as np
|
|
33
|
+
import pandas as pd
|
|
34
|
+
import networkx as nx
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def build_network_from_edgelist(edges_df, source_col, target_col,
|
|
38
|
+
weight_col=None, directed=False):
|
|
39
|
+
"""
|
|
40
|
+
エッジリストからネットワーク構築 + 基本統計。
|
|
41
|
+
|
|
42
|
+
Parameters:
|
|
43
|
+
edges_df: pd.DataFrame — エッジリスト
|
|
44
|
+
source_col: str — ソースノードカラム
|
|
45
|
+
target_col: str — ターゲットノードカラム
|
|
46
|
+
weight_col: str | None — 重みカラム
|
|
47
|
+
directed: bool — 有向グラフ
|
|
48
|
+
"""
|
|
49
|
+
if directed:
|
|
50
|
+
G = nx.DiGraph()
|
|
51
|
+
else:
|
|
52
|
+
G = nx.Graph()
|
|
53
|
+
|
|
54
|
+
for _, row in edges_df.iterrows():
|
|
55
|
+
kwargs = {}
|
|
56
|
+
if weight_col and pd.notna(row[weight_col]):
|
|
57
|
+
kwargs["weight"] = row[weight_col]
|
|
58
|
+
G.add_edge(row[source_col], row[target_col], **kwargs)
|
|
59
|
+
|
|
60
|
+
stats = {
|
|
61
|
+
"n_nodes": G.number_of_nodes(),
|
|
62
|
+
"n_edges": G.number_of_edges(),
|
|
63
|
+
"density": nx.density(G),
|
|
64
|
+
"is_connected": nx.is_connected(G) if not directed else nx.is_weakly_connected(G),
|
|
65
|
+
"n_components": nx.number_connected_components(G) if not directed
|
|
66
|
+
else nx.number_weakly_connected_components(G),
|
|
67
|
+
"avg_degree": np.mean([d for _, d in G.degree()]),
|
|
68
|
+
"avg_clustering": nx.average_clustering(G) if not directed else None,
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if stats["is_connected"] and not directed:
|
|
72
|
+
stats["avg_path_length"] = nx.average_shortest_path_length(G)
|
|
73
|
+
stats["diameter"] = nx.diameter(G)
|
|
74
|
+
|
|
75
|
+
print(f"Network: {stats['n_nodes']} nodes, {stats['n_edges']} edges, "
|
|
76
|
+
f"density={stats['density']:.4f}")
|
|
77
|
+
return G, stats
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def build_network_from_correlation(df, threshold=0.5,
|
|
81
|
+
method="pearson"):
|
|
82
|
+
"""
|
|
83
|
+
相関行列からネットワーク構築。
|
|
84
|
+
|
|
85
|
+
Parameters:
|
|
86
|
+
df: pd.DataFrame — 数値データ
|
|
87
|
+
threshold: float — 相関閾値 (|r| ≥ threshold でエッジ)
|
|
88
|
+
method: str — "pearson" / "spearman"
|
|
89
|
+
"""
|
|
90
|
+
corr = df.corr(method=method)
|
|
91
|
+
G = nx.Graph()
|
|
92
|
+
|
|
93
|
+
for i, col_i in enumerate(corr.columns):
|
|
94
|
+
G.add_node(col_i)
|
|
95
|
+
for j, col_j in enumerate(corr.columns):
|
|
96
|
+
if i < j and abs(corr.iloc[i, j]) >= threshold:
|
|
97
|
+
G.add_edge(col_i, col_j,
|
|
98
|
+
weight=abs(corr.iloc[i, j]),
|
|
99
|
+
correlation=corr.iloc[i, j])
|
|
100
|
+
|
|
101
|
+
print(f"Correlation Network (|r|≥{threshold}): "
|
|
102
|
+
f"{G.number_of_nodes()} nodes, {G.number_of_edges()} edges")
|
|
103
|
+
return G
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## 2. コミュニティ検出
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
def detect_communities(G, method="louvain", resolution=1.0):
|
|
110
|
+
"""
|
|
111
|
+
コミュニティ検出。
|
|
112
|
+
|
|
113
|
+
Parameters:
|
|
114
|
+
G: nx.Graph — ネットワーク
|
|
115
|
+
method: str — "louvain" / "leiden" / "label_propagation" / "girvan_newman"
|
|
116
|
+
resolution: float — 解像度パラメータ (Louvain/Leiden)
|
|
117
|
+
"""
|
|
118
|
+
import matplotlib.pyplot as plt
|
|
119
|
+
|
|
120
|
+
if method == "louvain":
|
|
121
|
+
communities = nx.community.louvain_communities(
|
|
122
|
+
G, resolution=resolution, seed=42)
|
|
123
|
+
elif method == "leiden":
|
|
124
|
+
try:
|
|
125
|
+
import leidenalg
|
|
126
|
+
import igraph as ig
|
|
127
|
+
ig_graph = ig.Graph.from_networkx(G)
|
|
128
|
+
partition = leidenalg.find_partition(
|
|
129
|
+
ig_graph, leidenalg.RBConfigurationVertexPartition,
|
|
130
|
+
resolution_parameter=resolution, seed=42)
|
|
131
|
+
communities = [set(ig_graph.vs[c]["_nx_name"] for c in comm)
|
|
132
|
+
for comm in partition]
|
|
133
|
+
except ImportError:
|
|
134
|
+
communities = nx.community.louvain_communities(
|
|
135
|
+
G, resolution=resolution, seed=42)
|
|
136
|
+
elif method == "label_propagation":
|
|
137
|
+
communities = list(nx.community.label_propagation_communities(G))
|
|
138
|
+
elif method == "girvan_newman":
|
|
139
|
+
comp = nx.community.girvan_newman(G)
|
|
140
|
+
communities = next(comp) # 最初の分割
|
|
141
|
+
|
|
142
|
+
# ノードにコミュニティ ID 割当
|
|
143
|
+
node_community = {}
|
|
144
|
+
for i, comm in enumerate(communities):
|
|
145
|
+
for node in comm:
|
|
146
|
+
node_community[node] = i
|
|
147
|
+
nx.set_node_attributes(G, node_community, "community")
|
|
148
|
+
|
|
149
|
+
# モジュラリティ
|
|
150
|
+
modularity = nx.community.modularity(G, communities)
|
|
151
|
+
|
|
152
|
+
# 可視化
|
|
153
|
+
fig, ax = plt.subplots(figsize=(12, 10))
|
|
154
|
+
pos = nx.spring_layout(G, k=1/np.sqrt(G.number_of_nodes()), seed=42)
|
|
155
|
+
colors = [node_community.get(n, 0) for n in G.nodes()]
|
|
156
|
+
|
|
157
|
+
nx.draw_networkx(G, pos, ax=ax, node_color=colors,
|
|
158
|
+
cmap=plt.cm.Set3, node_size=100,
|
|
159
|
+
font_size=6, edge_color="gray", alpha=0.7,
|
|
160
|
+
with_labels=G.number_of_nodes() < 100)
|
|
161
|
+
ax.set_title(f"Communities ({method}): {len(communities)} clusters, "
|
|
162
|
+
f"Q={modularity:.4f}")
|
|
163
|
+
plt.tight_layout()
|
|
164
|
+
|
|
165
|
+
path = "network_communities.png"
|
|
166
|
+
plt.savefig(path, dpi=150, bbox_inches="tight")
|
|
167
|
+
plt.close()
|
|
168
|
+
|
|
169
|
+
print(f"Communities ({method}): {len(communities)} clusters, "
|
|
170
|
+
f"modularity={modularity:.4f}")
|
|
171
|
+
return {"communities": communities, "modularity": modularity,
|
|
172
|
+
"node_community": node_community, "fig": path}
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## 3. 中心性指標
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
def centrality_analysis(G, top_n=20):
|
|
179
|
+
"""
|
|
180
|
+
多面的中心性解析。
|
|
181
|
+
|
|
182
|
+
Parameters:
|
|
183
|
+
G: nx.Graph — ネットワーク
|
|
184
|
+
top_n: int — 上位ノード数
|
|
185
|
+
"""
|
|
186
|
+
centralities = {
|
|
187
|
+
"degree": nx.degree_centrality(G),
|
|
188
|
+
"betweenness": nx.betweenness_centrality(G),
|
|
189
|
+
"closeness": nx.closeness_centrality(G),
|
|
190
|
+
"eigenvector": nx.eigenvector_centrality(G, max_iter=1000),
|
|
191
|
+
"pagerank": nx.pagerank(G)
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
# DataFrame 化
|
|
195
|
+
cent_df = pd.DataFrame(centralities)
|
|
196
|
+
cent_df.index.name = "node"
|
|
197
|
+
|
|
198
|
+
# ランキング
|
|
199
|
+
rankings = {}
|
|
200
|
+
for metric in centralities:
|
|
201
|
+
top = cent_df[metric].nlargest(top_n)
|
|
202
|
+
rankings[metric] = top.index.tolist()
|
|
203
|
+
|
|
204
|
+
# ハブスコア (複数指標の統合)
|
|
205
|
+
for metric in centralities:
|
|
206
|
+
cent_df[f"{metric}_rank"] = cent_df[metric].rank(ascending=False)
|
|
207
|
+
cent_df["hub_score"] = cent_df[[f"{m}_rank" for m in centralities]].mean(axis=1)
|
|
208
|
+
cent_df = cent_df.sort_values("hub_score")
|
|
209
|
+
|
|
210
|
+
print(f"Centrality: {len(G.nodes())} nodes analyzed")
|
|
211
|
+
print(f" Top hubs: {cent_df.head(5).index.tolist()}")
|
|
212
|
+
return {"centrality_df": cent_df, "rankings": rankings}
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
## 4. PyVis インタラクティブ可視化
|
|
216
|
+
|
|
217
|
+
```python
|
|
218
|
+
def interactive_network(G, output="network_interactive.html",
|
|
219
|
+
height="700px", width="100%"):
|
|
220
|
+
"""
|
|
221
|
+
PyVis インタラクティブネットワーク図。
|
|
222
|
+
|
|
223
|
+
Parameters:
|
|
224
|
+
G: nx.Graph — ネットワーク
|
|
225
|
+
output: str — 出力 HTML パス
|
|
226
|
+
height: str — 高さ
|
|
227
|
+
width: str — 幅
|
|
228
|
+
"""
|
|
229
|
+
from pyvis.network import Network
|
|
230
|
+
|
|
231
|
+
nt = Network(height=height, width=width, notebook=False,
|
|
232
|
+
bgcolor="#ffffff", font_color="black")
|
|
233
|
+
|
|
234
|
+
# コミュニティカラーリング
|
|
235
|
+
community_map = nx.get_node_attributes(G, "community")
|
|
236
|
+
colors = ["#e41a1c", "#377eb8", "#4daf4a", "#984ea3",
|
|
237
|
+
"#ff7f00", "#ffff33", "#a65628", "#f781bf"]
|
|
238
|
+
|
|
239
|
+
for node in G.nodes():
|
|
240
|
+
comm = community_map.get(node, 0)
|
|
241
|
+
degree = G.degree(node)
|
|
242
|
+
nt.add_node(str(node), label=str(node),
|
|
243
|
+
color=colors[comm % len(colors)],
|
|
244
|
+
size=max(5, min(degree * 3, 50)),
|
|
245
|
+
title=f"{node}\nDegree: {degree}\nCommunity: {comm}")
|
|
246
|
+
|
|
247
|
+
for u, v, data in G.edges(data=True):
|
|
248
|
+
weight = data.get("weight", 1)
|
|
249
|
+
nt.add_edge(str(u), str(v), value=weight)
|
|
250
|
+
|
|
251
|
+
nt.toggle_physics(True)
|
|
252
|
+
nt.show_buttons(filter_=["physics"])
|
|
253
|
+
nt.save_graph(output)
|
|
254
|
+
|
|
255
|
+
print(f"Interactive Network → {output} "
|
|
256
|
+
f"({G.number_of_nodes()} nodes, {G.number_of_edges()} edges)")
|
|
257
|
+
return output
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
---
|
|
261
|
+
|
|
262
|
+
## パイプライン統合
|
|
263
|
+
|
|
264
|
+
```
|
|
265
|
+
eda-correlation → network-visualization → advanced-visualization
|
|
266
|
+
(相関解析) (ネットワーク解析) (高度可視化)
|
|
267
|
+
│ │ ↓
|
|
268
|
+
graph-neural-networks ───┘ interactive-dashboard
|
|
269
|
+
(GNN) (ダッシュボード)
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
## パイプライン出力
|
|
273
|
+
|
|
274
|
+
| ファイル | 説明 | 次スキル |
|
|
275
|
+
|---------|------|---------|
|
|
276
|
+
| `network_communities.png` | コミュニティ構造 | → presentation |
|
|
277
|
+
| `centrality_analysis.csv` | 中心性指標 | → feature-importance |
|
|
278
|
+
| `network_interactive.html` | PyVis 図 | → dashboard |
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-reproducible-reporting
|
|
3
|
+
description: |
|
|
4
|
+
再現可能レポーティングスキル。Quarto 科学文書・
|
|
5
|
+
Jupyter Book 多章構成・Papermill パラメトリック実行・
|
|
6
|
+
nbconvert 自動変換・Sphinx-Gallery コード例ドキュメント。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Reproducible Reporting
|
|
10
|
+
|
|
11
|
+
再現可能な科学レポート・文書生成パイプラインを提供し、
|
|
12
|
+
コード → 実行 → 文書化の自動化を実現する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- Quarto で再現可能な科学文書を作成するとき
|
|
17
|
+
- Jupyter Book で多章構成の文書を構築するとき
|
|
18
|
+
- Papermill でパラメトリック実行を自動化するとき
|
|
19
|
+
- nbconvert でノートブックを各種形式に変換するとき
|
|
20
|
+
- CI/CD で解析レポートを自動生成するとき
|
|
21
|
+
- 複数パラメータセットで解析を繰り返し実行するとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. Quarto 科学文書テンプレート
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import os
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def generate_quarto_document(title="Scientific Analysis Report",
|
|
34
|
+
author="SATORI",
|
|
35
|
+
format_type="html",
|
|
36
|
+
output_dir="quarto_project"):
|
|
37
|
+
"""
|
|
38
|
+
Quarto 科学文書テンプレート生成。
|
|
39
|
+
|
|
40
|
+
Parameters:
|
|
41
|
+
title: str — ドキュメントタイトル
|
|
42
|
+
author: str — 著者名
|
|
43
|
+
format_type: str — "html" / "pdf" / "docx" / "revealjs"
|
|
44
|
+
output_dir: str — 出力ディレクトリ
|
|
45
|
+
"""
|
|
46
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
47
|
+
|
|
48
|
+
# _quarto.yml
|
|
49
|
+
quarto_config = f"""project:
|
|
50
|
+
type: default
|
|
51
|
+
output-dir: _output
|
|
52
|
+
|
|
53
|
+
format:
|
|
54
|
+
{format_type}:
|
|
55
|
+
toc: true
|
|
56
|
+
toc-depth: 3
|
|
57
|
+
number-sections: true
|
|
58
|
+
code-fold: true
|
|
59
|
+
code-tools: true
|
|
60
|
+
theme: cosmo
|
|
61
|
+
|
|
62
|
+
execute:
|
|
63
|
+
echo: true
|
|
64
|
+
warning: false
|
|
65
|
+
cache: true
|
|
66
|
+
|
|
67
|
+
bibliography: references.bib
|
|
68
|
+
csl: nature.csl
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
# メインドキュメント
|
|
72
|
+
main_qmd = f"""---
|
|
73
|
+
title: "{title}"
|
|
74
|
+
author: "{author}"
|
|
75
|
+
date: today
|
|
76
|
+
format:
|
|
77
|
+
{format_type}:
|
|
78
|
+
code-fold: true
|
|
79
|
+
code-tools: true
|
|
80
|
+
jupyter: python3
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## はじめに
|
|
84
|
+
|
|
85
|
+
このレポートは SATORI スキルを用いた再現可能な科学解析文書です。
|
|
86
|
+
|
|
87
|
+
```{{python}}
|
|
88
|
+
#| label: setup
|
|
89
|
+
#| echo: false
|
|
90
|
+
|
|
91
|
+
import numpy as np
|
|
92
|
+
import pandas as pd
|
|
93
|
+
import matplotlib.pyplot as plt
|
|
94
|
+
import warnings
|
|
95
|
+
warnings.filterwarnings("ignore")
|
|
96
|
+
|
|
97
|
+
# パラメータ (Papermill 互換)
|
|
98
|
+
n_samples = 1000
|
|
99
|
+
random_seed = 42
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## データ概要
|
|
103
|
+
|
|
104
|
+
```{{python}}
|
|
105
|
+
#| label: data-summary
|
|
106
|
+
#| tbl-cap: "データセット概要"
|
|
107
|
+
|
|
108
|
+
np.random.seed(random_seed)
|
|
109
|
+
df = pd.DataFrame({{
|
|
110
|
+
"x": np.random.randn(n_samples),
|
|
111
|
+
"y": np.random.randn(n_samples),
|
|
112
|
+
"group": np.random.choice(["A", "B", "C"], n_samples)
|
|
113
|
+
}})
|
|
114
|
+
df.describe()
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## 可視化
|
|
118
|
+
|
|
119
|
+
```{{python}}
|
|
120
|
+
#| label: fig-scatter
|
|
121
|
+
#| fig-cap: "散布図"
|
|
122
|
+
|
|
123
|
+
fig, ax = plt.subplots(figsize=(8, 6))
|
|
124
|
+
for g, sub in df.groupby("group"):
|
|
125
|
+
ax.scatter(sub["x"], sub["y"], label=g, alpha=0.6, s=20)
|
|
126
|
+
ax.legend()
|
|
127
|
+
ax.set_xlabel("X")
|
|
128
|
+
ax.set_ylabel("Y")
|
|
129
|
+
plt.show()
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## 結論
|
|
133
|
+
|
|
134
|
+
解析結果のサマリーを記載する。
|
|
135
|
+
|
|
136
|
+
## References
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
# references.bib (空テンプレート)
|
|
140
|
+
bib_template = """@article{example2024,
|
|
141
|
+
title={Example Reference},
|
|
142
|
+
author={Author, A.},
|
|
143
|
+
journal={Journal},
|
|
144
|
+
year={2024}
|
|
145
|
+
}
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
with open(os.path.join(output_dir, "_quarto.yml"), "w") as f:
|
|
149
|
+
f.write(quarto_config)
|
|
150
|
+
with open(os.path.join(output_dir, "report.qmd"), "w") as f:
|
|
151
|
+
f.write(main_qmd)
|
|
152
|
+
with open(os.path.join(output_dir, "references.bib"), "w") as f:
|
|
153
|
+
f.write(bib_template)
|
|
154
|
+
|
|
155
|
+
print(f"Quarto project → {output_dir}/")
|
|
156
|
+
print(f" Build: cd {output_dir} && quarto render report.qmd")
|
|
157
|
+
return output_dir
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
## 2. Papermill パラメトリック実行
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
def papermill_parametric_run(template_notebook, output_dir,
|
|
164
|
+
parameter_sets, kernel="python3"):
|
|
165
|
+
"""
|
|
166
|
+
Papermill パラメトリック実行 — 複数パラメータセットで自動実行。
|
|
167
|
+
|
|
168
|
+
Parameters:
|
|
169
|
+
template_notebook: str — テンプレートノートブックパス
|
|
170
|
+
output_dir: str — 出力ディレクトリ
|
|
171
|
+
parameter_sets: list[dict] — パラメータセットのリスト
|
|
172
|
+
kernel: str — カーネル名
|
|
173
|
+
"""
|
|
174
|
+
import papermill as pm
|
|
175
|
+
|
|
176
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
177
|
+
results = []
|
|
178
|
+
|
|
179
|
+
for i, params in enumerate(parameter_sets):
|
|
180
|
+
output_path = os.path.join(output_dir, f"run_{i:03d}.ipynb")
|
|
181
|
+
try:
|
|
182
|
+
pm.execute_notebook(
|
|
183
|
+
template_notebook,
|
|
184
|
+
output_path,
|
|
185
|
+
parameters=params,
|
|
186
|
+
kernel_name=kernel)
|
|
187
|
+
results.append({
|
|
188
|
+
"run": i, "params": params,
|
|
189
|
+
"output": output_path, "status": "success"})
|
|
190
|
+
except Exception as e:
|
|
191
|
+
results.append({
|
|
192
|
+
"run": i, "params": params,
|
|
193
|
+
"output": output_path, "status": f"error: {str(e)}"})
|
|
194
|
+
|
|
195
|
+
import pandas as pd
|
|
196
|
+
results_df = pd.DataFrame(results)
|
|
197
|
+
n_success = (results_df["status"] == "success").sum()
|
|
198
|
+
print(f"Papermill: {n_success}/{len(parameter_sets)} runs succeeded")
|
|
199
|
+
return results_df
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## 3. Jupyter Book 多章構成
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
def generate_jupyter_book(title="Scientific Analysis Book",
|
|
206
|
+
chapters=None,
|
|
207
|
+
output_dir="jupyter_book"):
|
|
208
|
+
"""
|
|
209
|
+
Jupyter Book プロジェクトテンプレート生成。
|
|
210
|
+
|
|
211
|
+
Parameters:
|
|
212
|
+
title: str — 書籍タイトル
|
|
213
|
+
chapters: list[dict] | None — 章情報 [{"title": ..., "file": ...}]
|
|
214
|
+
output_dir: str — 出力ディレクトリ
|
|
215
|
+
"""
|
|
216
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
217
|
+
|
|
218
|
+
if chapters is None:
|
|
219
|
+
chapters = [
|
|
220
|
+
{"title": "Introduction", "file": "intro"},
|
|
221
|
+
{"title": "Data Loading", "file": "ch01_data"},
|
|
222
|
+
{"title": "Exploratory Analysis", "file": "ch02_eda"},
|
|
223
|
+
{"title": "Modeling", "file": "ch03_model"},
|
|
224
|
+
{"title": "Results", "file": "ch04_results"},
|
|
225
|
+
]
|
|
226
|
+
|
|
227
|
+
# _config.yml
|
|
228
|
+
config = f"""title: "{title}"
|
|
229
|
+
author: SATORI
|
|
230
|
+
execute:
|
|
231
|
+
execute_notebooks: auto
|
|
232
|
+
timeout: 600
|
|
233
|
+
repository:
|
|
234
|
+
url: ""
|
|
235
|
+
launch_buttons:
|
|
236
|
+
binderhub_url: ""
|
|
237
|
+
sphinx:
|
|
238
|
+
extra_extensions:
|
|
239
|
+
- sphinx_proof
|
|
240
|
+
"""
|
|
241
|
+
|
|
242
|
+
# _toc.yml
|
|
243
|
+
toc_entries = "\n".join(
|
|
244
|
+
[f" - file: {ch['file']}" for ch in chapters])
|
|
245
|
+
toc = f"""format: jb-book
|
|
246
|
+
root: intro
|
|
247
|
+
chapters:
|
|
248
|
+
{toc_entries}
|
|
249
|
+
"""
|
|
250
|
+
|
|
251
|
+
with open(os.path.join(output_dir, "_config.yml"), "w") as f:
|
|
252
|
+
f.write(config)
|
|
253
|
+
with open(os.path.join(output_dir, "_toc.yml"), "w") as f:
|
|
254
|
+
f.write(toc)
|
|
255
|
+
|
|
256
|
+
# 各章テンプレート
|
|
257
|
+
for ch in chapters:
|
|
258
|
+
filepath = os.path.join(output_dir, f"{ch['file']}.md")
|
|
259
|
+
if not os.path.exists(filepath):
|
|
260
|
+
content = f"# {ch['title']}\n\nThis chapter covers {ch['title'].lower()}.\n"
|
|
261
|
+
with open(filepath, "w") as f:
|
|
262
|
+
f.write(content)
|
|
263
|
+
|
|
264
|
+
print(f"Jupyter Book → {output_dir}/")
|
|
265
|
+
print(f" Build: jupyter-book build {output_dir}")
|
|
266
|
+
return output_dir
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
## 4. nbconvert 自動変換
|
|
270
|
+
|
|
271
|
+
```python
|
|
272
|
+
def batch_convert_notebooks(notebook_dir, output_format="html",
|
|
273
|
+
output_dir=None, execute=True):
|
|
274
|
+
"""
|
|
275
|
+
ノートブック一括変換。
|
|
276
|
+
|
|
277
|
+
Parameters:
|
|
278
|
+
notebook_dir: str — ノートブックディレクトリ
|
|
279
|
+
output_format: str — "html" / "pdf" / "markdown" / "script"
|
|
280
|
+
output_dir: str | None — 出力先 (None=同ディレクトリ)
|
|
281
|
+
execute: bool — 実行後に変換
|
|
282
|
+
"""
|
|
283
|
+
import subprocess
|
|
284
|
+
import glob
|
|
285
|
+
|
|
286
|
+
notebooks = sorted(glob.glob(os.path.join(notebook_dir, "*.ipynb")))
|
|
287
|
+
if output_dir:
|
|
288
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
289
|
+
|
|
290
|
+
results = []
|
|
291
|
+
for nb_path in notebooks:
|
|
292
|
+
cmd = ["jupyter", "nbconvert", f"--to={output_format}"]
|
|
293
|
+
if execute:
|
|
294
|
+
cmd.append("--execute")
|
|
295
|
+
if output_dir:
|
|
296
|
+
cmd.extend(["--output-dir", output_dir])
|
|
297
|
+
cmd.append(nb_path)
|
|
298
|
+
|
|
299
|
+
try:
|
|
300
|
+
subprocess.run(cmd, check=True, capture_output=True, text=True)
|
|
301
|
+
results.append({"notebook": nb_path, "status": "success"})
|
|
302
|
+
except subprocess.CalledProcessError as e:
|
|
303
|
+
results.append({"notebook": nb_path, "status": f"error: {e.stderr[:100]}"})
|
|
304
|
+
|
|
305
|
+
import pandas as pd
|
|
306
|
+
results_df = pd.DataFrame(results)
|
|
307
|
+
n_ok = (results_df["status"] == "success").sum()
|
|
308
|
+
print(f"nbconvert ({output_format}): {n_ok}/{len(notebooks)} converted")
|
|
309
|
+
return results_df
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
---
|
|
313
|
+
|
|
314
|
+
## パイプライン統合
|
|
315
|
+
|
|
316
|
+
```
|
|
317
|
+
[解析完了] → reproducible-reporting → presentation-design
|
|
318
|
+
(レポート自動生成) (プレゼン作成)
|
|
319
|
+
│ ↓
|
|
320
|
+
interactive-dashboard academic-writing
|
|
321
|
+
(ダッシュボード) (論文執筆)
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
## パイプライン出力
|
|
325
|
+
|
|
326
|
+
| ファイル | 説明 | 次スキル |
|
|
327
|
+
|---------|------|---------|
|
|
328
|
+
| `quarto_project/` | Quarto プロジェクト | → quarto render |
|
|
329
|
+
| `papermill_runs/` | パラメトリック実行結果 | → 集計 |
|
|
330
|
+
| `jupyter_book/` | Jupyter Book プロジェクト | → jb build |
|