@nahisaho/satori 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/LICENCE +0 -0
  2. package/README.md +191 -0
  3. package/bin/satori.js +95 -0
  4. package/package.json +29 -0
  5. package/src/.github/skills/scientific-academic-writing/SKILL.md +361 -0
  6. package/src/.github/skills/scientific-academic-writing/assets/acs_article.md +199 -0
  7. package/src/.github/skills/scientific-academic-writing/assets/elsevier_article.md +244 -0
  8. package/src/.github/skills/scientific-academic-writing/assets/ieee_transactions.md +212 -0
  9. package/src/.github/skills/scientific-academic-writing/assets/imrad_standard.md +181 -0
  10. package/src/.github/skills/scientific-academic-writing/assets/nature_article.md +179 -0
  11. package/src/.github/skills/scientific-academic-writing/assets/qiita_technical_article.md +385 -0
  12. package/src/.github/skills/scientific-academic-writing/assets/science_research_article.md +169 -0
  13. package/src/.github/skills/scientific-bioinformatics/SKILL.md +220 -0
  14. package/src/.github/skills/scientific-biosignal-processing/SKILL.md +357 -0
  15. package/src/.github/skills/scientific-causal-inference/SKILL.md +347 -0
  16. package/src/.github/skills/scientific-cheminformatics/SKILL.md +196 -0
  17. package/src/.github/skills/scientific-data-preprocessing/SKILL.md +413 -0
  18. package/src/.github/skills/scientific-data-simulation/SKILL.md +244 -0
  19. package/src/.github/skills/scientific-doe/SKILL.md +360 -0
  20. package/src/.github/skills/scientific-eda-correlation/SKILL.md +141 -0
  21. package/src/.github/skills/scientific-feature-importance/SKILL.md +208 -0
  22. package/src/.github/skills/scientific-image-analysis/SKILL.md +310 -0
  23. package/src/.github/skills/scientific-materials-characterization/SKILL.md +368 -0
  24. package/src/.github/skills/scientific-meta-analysis/SKILL.md +352 -0
  25. package/src/.github/skills/scientific-metabolomics/SKILL.md +326 -0
  26. package/src/.github/skills/scientific-ml-classification/SKILL.md +265 -0
  27. package/src/.github/skills/scientific-ml-regression/SKILL.md +215 -0
  28. package/src/.github/skills/scientific-multi-omics/SKILL.md +303 -0
  29. package/src/.github/skills/scientific-network-analysis/SKILL.md +257 -0
  30. package/src/.github/skills/scientific-pca-tsne/SKILL.md +235 -0
  31. package/src/.github/skills/scientific-pipeline-scaffold/SKILL.md +331 -0
  32. package/src/.github/skills/scientific-process-optimization/SKILL.md +215 -0
  33. package/src/.github/skills/scientific-publication-figures/SKILL.md +208 -0
  34. package/src/.github/skills/scientific-sequence-analysis/SKILL.md +389 -0
  35. package/src/.github/skills/scientific-spectral-signal/SKILL.md +227 -0
  36. package/src/.github/skills/scientific-statistical-testing/SKILL.md +240 -0
  37. package/src/.github/skills/scientific-survival-clinical/SKILL.md +239 -0
  38. package/src/.github/skills/scientific-time-series/SKILL.md +291 -0
@@ -0,0 +1,360 @@
1
+ ---
2
+ name: scientific-doe
3
+ description: |
4
+ 実験計画法(DOE)スキル。直交配列表(L9/L16/L27)、中心複合計画(CCD)、
5
+ Box-Behnken 設計、D-最適計画、応答曲面法(RSM)、交互作用解析、
6
+ ベイズ最適化(Gaussian Process)、効果プロット(主効果/交互作用/pareto)の
7
+ テンプレートを提供。
8
+ ---
9
+
10
+ # Scientific Design of Experiments (DOE)
11
+
12
+ 体系的な実験計画と最適化のためのスキル。直交表による因子スクリーニングから
13
+ RSM による最適条件探索、ベイズ最適化による逐次最適化まで、実験の各段階に
14
+ 対応するテンプレートを提供する。
15
+
16
+ ## When to Use
17
+
18
+ - 多因子実験の計画(因子・水準の設計)が必要なとき
19
+ - 直交表やCCD で実験回数を最小化したいとき
20
+ - 主効果・交互作用の寄与率を定量化するとき
21
+ - 応答曲面で最適条件を探索するとき
22
+ - ベイズ最適化で逐次実験を行いたいとき
23
+
24
+ ---
25
+
26
+ ## Quick Start
27
+
28
+ ## 1. 因子設計テンプレート
29
+
30
+ ```python
31
+ import numpy as np
32
+ import pandas as pd
33
+
34
+ def define_factors(factor_dict):
35
+ """
36
+ 因子定義テンプレート。
37
+
38
+ factor_dict 例:
39
+ {
40
+ "Temperature": {"levels": [200, 250, 300], "unit": "°C", "type": "continuous"},
41
+ "Pressure": {"levels": [1, 5, 10], "unit": "mTorr", "type": "continuous"},
42
+ "Gas_Ratio": {"levels": [0.2, 0.5, 0.8], "unit": "-", "type": "continuous"},
43
+ "Material": {"levels": ["ZnO", "ITO", "TiO2"], "unit": "-", "type": "categorical"},
44
+ }
45
+ """
46
+ summary = pd.DataFrame([
47
+ {"Factor": k, "Levels": len(v["levels"]), "Values": str(v["levels"]),
48
+ "Unit": v["unit"], "Type": v["type"]}
49
+ for k, v in factor_dict.items()
50
+ ])
51
+ print("=== Factor Design ===")
52
+ print(summary.to_string(index=False))
53
+ return summary
54
+ ```
55
+
56
+ ## 2. 直交配列表
57
+
58
+ ```python
59
+ # 田口 L9 直交表 (3 因子 × 3 水準)
60
+ L9 = np.array([
61
+ [0, 0, 0],
62
+ [0, 1, 1],
63
+ [0, 2, 2],
64
+ [1, 0, 1],
65
+ [1, 1, 2],
66
+ [1, 2, 0],
67
+ [2, 0, 2],
68
+ [2, 1, 0],
69
+ [2, 2, 1],
70
+ ])
71
+
72
+ def generate_taguchi_design(factor_dict, array="L9"):
73
+ """
74
+ 田口直交表から実験計画を生成する。
75
+
76
+ Available arrays: L4(2^3), L9(3^3-4), L16(2^15), L27(3^13)
77
+ """
78
+ arrays = {
79
+ "L9": L9,
80
+ "L4": np.array([[0,0,0],[0,1,1],[1,0,1],[1,1,0]]),
81
+ }
82
+ oa = arrays.get(array, L9)
83
+ factors = list(factor_dict.keys())
84
+
85
+ runs = []
86
+ for row in oa:
87
+ run = {}
88
+ for i, factor in enumerate(factors[:oa.shape[1]]):
89
+ levels = factor_dict[factor]["levels"]
90
+ run[factor] = levels[row[i] % len(levels)]
91
+ runs.append(run)
92
+
93
+ design_df = pd.DataFrame(runs)
94
+ design_df.index.name = "Run"
95
+ design_df.index += 1
96
+ return design_df
97
+ ```
98
+
99
+ ## 3. 中心複合計画 (CCD)
100
+
101
+ ```python
102
+ from itertools import product
103
+
104
+ def central_composite_design(factor_dict, alpha="rotatable", center_points=3):
105
+ """
106
+ 中心複合計画 (Central Composite Design) を生成する。
107
+
108
+ Components:
109
+ - 2^k 完全実施要因計画 (cube points)
110
+ - 2k 軸点 (axial/star points)
111
+ - n_c 中心点
112
+
113
+ alpha:
114
+ "rotatable" — α = (2^k)^(1/4) (回転可能)
115
+ "face" — α = 1 (面心)
116
+ float — 任意の値
117
+ """
118
+ continuous_factors = {k: v for k, v in factor_dict.items()
119
+ if v["type"] == "continuous"}
120
+ factor_names = list(continuous_factors.keys())
121
+ k = len(factor_names)
122
+
123
+ if alpha == "rotatable":
124
+ alpha_val = (2 ** k) ** 0.25
125
+ elif alpha == "face":
126
+ alpha_val = 1.0
127
+ else:
128
+ alpha_val = float(alpha)
129
+
130
+ # コード化: -1, 0, +1
131
+ midpoints = {}
132
+ half_ranges = {}
133
+ for name, info in continuous_factors.items():
134
+ levels = info["levels"]
135
+ mid = (max(levels) + min(levels)) / 2
136
+ half = (max(levels) - min(levels)) / 2
137
+ midpoints[name] = mid
138
+ half_ranges[name] = half
139
+
140
+ runs = []
141
+
142
+ # Cube points (2^k)
143
+ for combo in product([-1, 1], repeat=k):
144
+ run = {factor_names[i]: midpoints[factor_names[i]] + combo[i] * half_ranges[factor_names[i]]
145
+ for i in range(k)}
146
+ run["_type"] = "cube"
147
+ runs.append(run)
148
+
149
+ # Axial points (2k)
150
+ for i in range(k):
151
+ for direction in [-1, 1]:
152
+ run = {name: midpoints[name] for name in factor_names}
153
+ run[factor_names[i]] = midpoints[factor_names[i]] + direction * alpha_val * half_ranges[factor_names[i]]
154
+ run["_type"] = "axial"
155
+ runs.append(run)
156
+
157
+ # Center points
158
+ for _ in range(center_points):
159
+ run = {name: midpoints[name] for name in factor_names}
160
+ run["_type"] = "center"
161
+ runs.append(run)
162
+
163
+ design_df = pd.DataFrame(runs)
164
+ design_df.index.name = "Run"
165
+ design_df.index += 1
166
+ return design_df
167
+ ```
168
+
169
+ ## 4. 分散分析 (ANOVA) — 因子効果解析
170
+
171
+ ```python
172
+ from scipy.stats import f_oneway
173
+
174
+ def anova_factor_effects(design_df, response_col, factor_cols):
175
+ """
176
+ 各因子の主効果を ANOVA で評価する。
177
+
178
+ Returns:
179
+ DataFrame with Factor, SS, DF, MS, F_value, p_value, contribution_pct
180
+ """
181
+ ss_total = np.sum((design_df[response_col] - design_df[response_col].mean())**2)
182
+ results = []
183
+
184
+ for factor in factor_cols:
185
+ groups = [group[response_col].values
186
+ for _, group in design_df.groupby(factor)]
187
+ if len(groups) < 2:
188
+ continue
189
+ f_val, p_val = f_oneway(*groups)
190
+
191
+ # SS_factor
192
+ grand_mean = design_df[response_col].mean()
193
+ ss_factor = sum(len(g) * (np.mean(g) - grand_mean)**2 for g in groups)
194
+ df_factor = len(groups) - 1
195
+ ms_factor = ss_factor / df_factor
196
+
197
+ results.append({
198
+ "Factor": factor,
199
+ "SS": ss_factor,
200
+ "DF": df_factor,
201
+ "MS": ms_factor,
202
+ "F_value": f_val,
203
+ "p_value": p_val,
204
+ "Contribution_pct": ss_factor / ss_total * 100 if ss_total > 0 else 0,
205
+ })
206
+
207
+ return pd.DataFrame(results).sort_values("Contribution_pct", ascending=False)
208
+ ```
209
+
210
+ ## 5. 主効果プロット
211
+
212
+ ```python
213
+ import matplotlib.pyplot as plt
214
+
215
+ def main_effects_plot(design_df, response_col, factor_cols, figsize=None):
216
+ """全因子の主効果プロットを描画する。"""
217
+ n = len(factor_cols)
218
+ if figsize is None:
219
+ figsize = (4 * n, 4)
220
+
221
+ fig, axes = plt.subplots(1, n, figsize=figsize, sharey=True)
222
+ if n == 1:
223
+ axes = [axes]
224
+
225
+ grand_mean = design_df[response_col].mean()
226
+
227
+ for ax, factor in zip(axes, factor_cols):
228
+ means = design_df.groupby(factor)[response_col].mean()
229
+ ax.plot(range(len(means)), means.values, "bo-", linewidth=2, markersize=8)
230
+ ax.axhline(grand_mean, color="gray", linestyle="--", alpha=0.5)
231
+ ax.set_xticks(range(len(means)))
232
+ ax.set_xticklabels(means.index, rotation=45)
233
+ ax.set_xlabel(factor)
234
+ ax.grid(alpha=0.3)
235
+
236
+ axes[0].set_ylabel(response_col)
237
+ plt.suptitle("Main Effects Plot", fontweight="bold", y=1.02)
238
+ plt.tight_layout()
239
+ plt.savefig("figures/main_effects_plot.png", dpi=300, bbox_inches="tight")
240
+ plt.close()
241
+ ```
242
+
243
+ ## 6. ベイズ最適化(Gaussian Process)
244
+
245
+ ```python
246
+ from sklearn.gaussian_process import GaussianProcessRegressor
247
+ from sklearn.gaussian_process.kernels import Matern
248
+
249
+ def bayesian_optimization(objective_func, bounds, n_initial=5,
250
+ n_iterations=20, kappa=2.576):
251
+ """
252
+ ベイズ最適化(Gaussian Process + Expected Improvement)。
253
+
254
+ Parameters:
255
+ objective_func: callable f(x) → y (最大化)
256
+ bounds: dict {"param": (low, high)}
257
+ n_initial: 初期ランダムサンプリング数
258
+ n_iterations: 最適化ステップ数
259
+ kappa: 探索-活用トレードオフ (UCB の κ)
260
+ """
261
+ from scipy.optimize import minimize as scipy_minimize
262
+ from scipy.stats import norm
263
+
264
+ param_names = list(bounds.keys())
265
+ lows = np.array([bounds[p][0] for p in param_names])
266
+ highs = np.array([bounds[p][1] for p in param_names])
267
+
268
+ # 初期サンプリング
269
+ X_init = np.random.uniform(lows, highs, size=(n_initial, len(param_names)))
270
+ y_init = np.array([objective_func(dict(zip(param_names, x))) for x in X_init])
271
+
272
+ X_observed = X_init.tolist()
273
+ y_observed = y_init.tolist()
274
+
275
+ gp = GaussianProcessRegressor(kernel=Matern(nu=2.5), n_restarts_optimizer=5,
276
+ random_state=42)
277
+
278
+ for i in range(n_iterations):
279
+ X_arr = np.array(X_observed)
280
+ y_arr = np.array(y_observed)
281
+ gp.fit(X_arr, y_arr)
282
+
283
+ # UCB acquisition function
284
+ def neg_ucb(x):
285
+ mu, sigma = gp.predict(x.reshape(1, -1), return_std=True)
286
+ return -(mu + kappa * sigma)
287
+
288
+ # 複数の開始点から最適化
289
+ best_x = None
290
+ best_val = float("inf")
291
+ for _ in range(10):
292
+ x0 = np.random.uniform(lows, highs)
293
+ res = scipy_minimize(neg_ucb, x0, bounds=list(zip(lows, highs)),
294
+ method="L-BFGS-B")
295
+ if res.fun < best_val:
296
+ best_val = res.fun
297
+ best_x = res.x
298
+
299
+ # 新しい点を評価
300
+ y_new = objective_func(dict(zip(param_names, best_x)))
301
+ X_observed.append(best_x.tolist())
302
+ y_observed.append(y_new)
303
+
304
+ # 最適解
305
+ best_idx = np.argmax(y_observed)
306
+ best_params = dict(zip(param_names, X_observed[best_idx]))
307
+ best_y = y_observed[best_idx]
308
+
309
+ return {
310
+ "best_params": best_params,
311
+ "best_value": best_y,
312
+ "X_history": np.array(X_observed),
313
+ "y_history": np.array(y_observed),
314
+ "gp_model": gp,
315
+ }
316
+ ```
317
+
318
+ ## 7. 交互作用プロット
319
+
320
+ ```python
321
+ def interaction_plot(design_df, response_col, factor1, factor2, figsize=(8, 6)):
322
+ """2 因子間の交互作用プロットを描画する。"""
323
+ fig, ax = plt.subplots(figsize=figsize)
324
+
325
+ for level2, group in design_df.groupby(factor2):
326
+ means = group.groupby(factor1)[response_col].mean()
327
+ ax.plot(range(len(means)), means.values, "o-", linewidth=2,
328
+ markersize=8, label=f"{factor2}={level2}")
329
+
330
+ ax.set_xticks(range(len(means)))
331
+ ax.set_xticklabels(means.index)
332
+ ax.set_xlabel(factor1)
333
+ ax.set_ylabel(response_col)
334
+ ax.set_title(f"Interaction Plot: {factor1} × {factor2}", fontweight="bold")
335
+ ax.legend()
336
+ ax.grid(alpha=0.3)
337
+ plt.tight_layout()
338
+ plt.savefig("figures/interaction_plot.png", dpi=300, bbox_inches="tight")
339
+ plt.close()
340
+ ```
341
+
342
+ ## References
343
+
344
+ ### Output Files
345
+
346
+ | ファイル | 形式 |
347
+ |---|---|
348
+ | `results/experimental_design.csv` | CSV |
349
+ | `results/anova_factor_effects.csv` | CSV |
350
+ | `results/bayesian_optimization_history.csv` | CSV |
351
+ | `figures/main_effects_plot.png` | PNG |
352
+ | `figures/interaction_plot.png` | PNG |
353
+ | `figures/bayesian_convergence.png` | PNG |
354
+
355
+ #### 依存パッケージ
356
+
357
+ ```
358
+ scipy>=1.10
359
+ scikit-learn>=1.3
360
+ ```
@@ -0,0 +1,141 @@
1
+ ---
2
+ name: scientific-eda-correlation
3
+ description: |
4
+ 探索的データ解析(EDA)と相関分析のスキル。データの分布可視化、相関ヒートマップ、
5
+ 散布図行列の作成を行う際に使用。Scientific Skills Exp-02, 12, 13 で確立したパターン。
6
+ ---
7
+
8
+ # Scientific EDA & Correlation Analysis
9
+
10
+ 探索的データ解析(Exploratory Data Analysis)のパイプラインスキル。
11
+ データ理解の初期段階で使用し、分布・外れ値・変数間相関を把握する。
12
+
13
+ ## When to Use
14
+
15
+ - 新しいデータセットを受け取ったとき
16
+ - 変数間の関係性を把握したいとき
17
+ - 相関ヒートマップを作成したいとき
18
+ - 材料別・群別のボックスプロット比較が必要なとき
19
+
20
+ ## Quick Start
21
+
22
+ ## 標準パイプライン
23
+
24
+ ### 1. 記述統計量の算出
25
+
26
+ ```python
27
+ import pandas as pd
28
+ import numpy as np
29
+
30
+ def descriptive_statistics(df, numeric_cols, group_col=None):
31
+ """記述統計量を算出して CSV に保存する。"""
32
+ if group_col:
33
+ stats = df.groupby(group_col)[numeric_cols].describe()
34
+ else:
35
+ stats = df[numeric_cols].describe()
36
+ stats.to_csv("results/descriptive_statistics.csv")
37
+ return stats
38
+ ```
39
+
40
+ ### 2. 分布可視化(ボックスプロット + バイオリンプロット)
41
+
42
+ ```python
43
+ import matplotlib.pyplot as plt
44
+ import seaborn as sns
45
+
46
+ def plot_distributions(df, variables, group_col, figsize=(20, 16), ncols=3):
47
+ """群別のボックスプロットを変数ごとに描画する。"""
48
+ nrows = (len(variables) + ncols - 1) // ncols
49
+ fig, axes = plt.subplots(nrows, ncols, figsize=figsize)
50
+ axes = axes.flatten()
51
+
52
+ for i, var in enumerate(variables):
53
+ sns.boxplot(data=df, x=group_col, y=var, ax=axes[i],
54
+ palette="Set2", showfliers=True)
55
+ axes[i].set_title(var, fontsize=12, fontweight="bold")
56
+ axes[i].tick_params(axis="x", rotation=45)
57
+
58
+ for j in range(i + 1, len(axes)):
59
+ axes[j].set_visible(False)
60
+
61
+ plt.tight_layout()
62
+ plt.savefig("figures/distribution_boxplots.png", dpi=300, bbox_inches="tight")
63
+ plt.close()
64
+ ```
65
+
66
+ ### 3. 相関ヒートマップ(Exp-02 / Exp-13 パターン)
67
+
68
+ ```python
69
+ def plot_correlation_heatmap(df, numeric_cols, block_boundaries=None,
70
+ figsize=(14, 12), method="pearson"):
71
+ """
72
+ 相関ヒートマップを描画する。
73
+ block_boundaries: PSP などの階層境界を示す線の位置リスト(オプション)。
74
+ """
75
+ corr = df[numeric_cols].corr(method=method)
76
+
77
+ fig, ax = plt.subplots(figsize=figsize)
78
+ mask = np.triu(np.ones_like(corr, dtype=bool), k=1)
79
+ sns.heatmap(corr, mask=mask, annot=True, fmt=".2f",
80
+ cmap="RdBu_r", center=0, vmin=-1, vmax=1,
81
+ square=True, linewidths=0.5, ax=ax,
82
+ annot_kws={"size": 8})
83
+
84
+ # 階層境界線(PSP ブロック分離)
85
+ if block_boundaries:
86
+ for b in block_boundaries:
87
+ ax.axhline(y=b, color="black", linewidth=2)
88
+ ax.axvline(x=b, color="black", linewidth=2)
89
+
90
+ ax.set_title("Correlation Heatmap", fontsize=14, fontweight="bold")
91
+ plt.tight_layout()
92
+ plt.savefig("figures/correlation_heatmap.png", dpi=300, bbox_inches="tight")
93
+ plt.close()
94
+ return corr
95
+ ```
96
+
97
+ ### 4. 散布図行列
98
+
99
+ ```python
100
+ def plot_scatter_matrix(df, variables, hue_col, figsize=(16, 14)):
101
+ """主要変数の散布図行列を描画する。"""
102
+ g = sns.pairplot(df[variables + [hue_col]], hue=hue_col,
103
+ diag_kind="kde", palette="Set2",
104
+ plot_kws={"alpha": 0.6, "s": 30})
105
+ g.fig.suptitle("Scatter Matrix", y=1.02, fontsize=14, fontweight="bold")
106
+ plt.savefig("figures/scatter_matrix.png", dpi=300, bbox_inches="tight")
107
+ plt.close()
108
+ ```
109
+
110
+ ### 5. PSP ブロック相関分析(Exp-13 独自)
111
+
112
+ ```python
113
+ def psp_block_correlation(df, process_cols, structure_cols, property_cols):
114
+ """Process→Structure→Property の 3 ブロック相関を個別に算出する。"""
115
+ ps_corr = df[process_cols + structure_cols].corr().loc[process_cols, structure_cols]
116
+ sp_corr = df[structure_cols + property_cols].corr().loc[structure_cols, property_cols]
117
+ pp_corr = df[process_cols + property_cols].corr().loc[process_cols, property_cols]
118
+
119
+ ps_corr.to_csv("results/PSP_process_structure_corr.csv")
120
+ sp_corr.to_csv("results/PSP_structure_property_corr.csv")
121
+ pp_corr.to_csv("results/PSP_process_property_corr.csv")
122
+
123
+ return ps_corr, sp_corr, pp_corr
124
+ ```
125
+
126
+ ## References
127
+
128
+ ### Output Files
129
+
130
+ | ファイル | 形式 |
131
+ |---|---|
132
+ | `results/descriptive_statistics.csv` | CSV |
133
+ | `figures/distribution_boxplots.png` | PNG (300 DPI) |
134
+ | `figures/correlation_heatmap.png` | PNG (300 DPI) |
135
+ | `figures/scatter_matrix.png` | PNG (300 DPI) |
136
+
137
+ #### 参照実験
138
+
139
+ - **Exp-02**: `sns.heatmap` 相関ヒートマップの基本パターン
140
+ - **Exp-12**: 8 プロセスパラメータの EDA
141
+ - **Exp-13**: PSP 3 ブロック相関行列
@@ -0,0 +1,208 @@
1
+ ---
2
+ name: scientific-feature-importance
3
+ description: |
4
+ 特徴量重要度分析のスキル。Tree-based Feature Importance と Permutation Importance を
5
+ 用いて予測モデルの説明可能性を向上させる際に使用。
6
+ Scientific Skills Exp-05, 12, 13 で確立したパターン。
7
+ ---
8
+
9
+ # Scientific Feature Importance Analysis
10
+
11
+ 機械学習モデルの「どの特徴量が予測に最も寄与しているか」を定量化するスキル。
12
+ Tree-based Importance(MDI)と Permutation Importance の 2 手法を併用して
13
+ ロバストな解釈を提供する。
14
+
15
+ ## When to Use
16
+
17
+ - 機械学習モデルの予測結果を解釈したいとき
18
+ - どのプロセスパラメータが最も影響力を持つか知りたいとき
19
+ - 特徴量選択の根拠が必要なとき
20
+ - 複数ターゲット変数に対する重要度の比較
21
+
22
+ ## Quick Start
23
+
24
+ ## 標準パイプライン
25
+
26
+ ### 1. Tree-based Feature Importance(MDI)
27
+
28
+ ```python
29
+ import numpy as np
30
+ import pandas as pd
31
+ import matplotlib.pyplot as plt
32
+
33
+ def tree_feature_importance(model, feature_names, target_name,
34
+ top_n=10, figsize=(10, 6)):
35
+ """
36
+ Tree ベースモデルの .feature_importances_ を取得して棒グラフで描画する。
37
+ RandomForest, GradientBoosting, ExtraTrees に対応。
38
+ """
39
+ importances = model.feature_importances_
40
+ fi_df = pd.DataFrame({
41
+ "Feature": feature_names,
42
+ "Importance": importances,
43
+ }).sort_values("Importance", ascending=False)
44
+
45
+ fig, ax = plt.subplots(figsize=figsize)
46
+ top = fi_df.head(top_n)
47
+ ax.barh(range(len(top)), top["Importance"].values[::-1],
48
+ color="steelblue", edgecolor="black")
49
+ ax.set_yticks(range(len(top)))
50
+ ax.set_yticklabels(top["Feature"].values[::-1])
51
+ ax.set_xlabel("Feature Importance (MDI)")
52
+ ax.set_title(f"Feature Importance: {target_name}", fontweight="bold")
53
+ plt.tight_layout()
54
+ plt.savefig(f"figures/feature_importance_{target_name}.png",
55
+ dpi=300, bbox_inches="tight")
56
+ plt.close()
57
+
58
+ return fi_df
59
+ ```
60
+
61
+ ### 2. Permutation Importance
62
+
63
+ ```python
64
+ from sklearn.inspection import permutation_importance
65
+
66
+ def permutation_feature_importance(model, X_test, y_test, feature_names,
67
+ target_name, n_repeats=10,
68
+ top_n=10, figsize=(10, 6)):
69
+ """
70
+ Permutation Importance を算出。モデルの種類によらず適用可能。
71
+ """
72
+ result = permutation_importance(model, X_test, y_test,
73
+ n_repeats=n_repeats, random_state=42)
74
+ pi_df = pd.DataFrame({
75
+ "Feature": feature_names,
76
+ "Importance_mean": result.importances_mean,
77
+ "Importance_std": result.importances_std,
78
+ }).sort_values("Importance_mean", ascending=False)
79
+
80
+ fig, ax = plt.subplots(figsize=figsize)
81
+ top = pi_df.head(top_n)
82
+ ax.barh(range(len(top)), top["Importance_mean"].values[::-1],
83
+ xerr=top["Importance_std"].values[::-1],
84
+ color="coral", edgecolor="black", capsize=3)
85
+ ax.set_yticks(range(len(top)))
86
+ ax.set_yticklabels(top["Feature"].values[::-1])
87
+ ax.set_xlabel("Permutation Importance")
88
+ ax.set_title(f"Permutation Importance: {target_name}", fontweight="bold")
89
+ plt.tight_layout()
90
+ plt.savefig(f"figures/permutation_importance_{target_name}.png",
91
+ dpi=300, bbox_inches="tight")
92
+ plt.close()
93
+
94
+ return pi_df
95
+ ```
96
+
97
+ ### 3. マルチターゲット重要度パネル(Exp-13 パターン)
98
+
99
+ ```python
100
+ def multi_target_importance_panel(models_dict, feature_names,
101
+ top_n=10, ncols=3, figsize=(20, 16)):
102
+ """
103
+ 複数ターゲットの特徴量重要度を一つの Figure にまとめて描画する。
104
+ models_dict: {target_name: fitted_model}
105
+ """
106
+ targets = list(models_dict.keys())
107
+ nrows = (len(targets) + ncols - 1) // ncols
108
+ fig, axes = plt.subplots(nrows, ncols, figsize=figsize)
109
+ axes = axes.flatten()
110
+
111
+ all_importances = []
112
+
113
+ for i, target in enumerate(targets):
114
+ model = models_dict[target]
115
+ if not hasattr(model, "feature_importances_"):
116
+ axes[i].text(0.5, 0.5, f"{target}\n(No FI available)",
117
+ ha="center", va="center", transform=axes[i].transAxes)
118
+ continue
119
+
120
+ importances = model.feature_importances_
121
+ fi_df = pd.DataFrame({
122
+ "Feature": feature_names,
123
+ "Importance": importances,
124
+ "Target": target,
125
+ }).sort_values("Importance", ascending=False)
126
+
127
+ all_importances.append(fi_df)
128
+
129
+ top = fi_df.head(top_n)
130
+ axes[i].barh(range(len(top)), top["Importance"].values[::-1],
131
+ color="steelblue", edgecolor="black")
132
+ axes[i].set_yticks(range(len(top)))
133
+ axes[i].set_yticklabels(top["Feature"].values[::-1], fontsize=8)
134
+ axes[i].set_xlabel("Importance", fontsize=9)
135
+ axes[i].set_title(target, fontweight="bold", fontsize=10)
136
+
137
+ for j in range(i + 1, len(axes)):
138
+ axes[j].set_visible(False)
139
+
140
+ plt.suptitle("Feature Importance by Target", fontsize=14, fontweight="bold")
141
+ plt.tight_layout()
142
+ plt.savefig("figures/feature_importance_panel.png", dpi=300, bbox_inches="tight")
143
+ plt.close()
144
+
145
+ # 全重要度を CSV 保存
146
+ if all_importances:
147
+ combined = pd.concat(all_importances, ignore_index=True)
148
+ combined.to_csv("results/feature_importance.csv", index=False)
149
+ return combined
150
+ return pd.DataFrame()
151
+ ```
152
+
153
+ ### 4. 部分依存プロット(PDP)
154
+
155
+ ```python
156
+ from sklearn.inspection import PartialDependenceDisplay
157
+
158
+ def partial_dependence_plots(model, X_train, feature_names,
159
+ top_features, target_name, figsize=(16, 10)):
160
+ """上位特徴量の部分依存プロットを描画する。"""
161
+ feature_indices = [list(feature_names).index(f) for f in top_features
162
+ if f in feature_names]
163
+
164
+ fig, ax = plt.subplots(figsize=figsize)
165
+ PartialDependenceDisplay.from_estimator(
166
+ model, X_train, feature_indices,
167
+ feature_names=feature_names, ax=ax
168
+ )
169
+ plt.suptitle(f"Partial Dependence: {target_name}", fontweight="bold")
170
+ plt.tight_layout()
171
+ plt.savefig(f"figures/pdp_{target_name}.png", dpi=300, bbox_inches="tight")
172
+ plt.close()
173
+ ```
174
+
175
+ ## パラメータ–物性マッピング表の自動生成
176
+
177
+ ```python
178
+ def generate_importance_mapping_table(all_fi_df, top_n=3):
179
+ """各ターゲットの上位 N 特徴量をまとめた対応表を生成する。"""
180
+ mapping = []
181
+ for target in all_fi_df["Target"].unique():
182
+ subset = all_fi_df[all_fi_df["Target"] == target].nlargest(top_n, "Importance")
183
+ for rank, (_, row) in enumerate(subset.iterrows(), 1):
184
+ mapping.append({
185
+ "Target": target,
186
+ f"Rank_{rank}": row["Feature"],
187
+ f"Importance_{rank}": f"{row['Importance']:.4f}",
188
+ })
189
+ return pd.DataFrame(mapping)
190
+ ```
191
+
192
+ ## References
193
+
194
+ ### Output Files
195
+
196
+ | ファイル | 形式 |
197
+ |---|---|
198
+ | `results/feature_importance.csv` | CSV |
199
+ | `figures/feature_importance_*.png` | PNG |
200
+ | `figures/permutation_importance_*.png` | PNG |
201
+ | `figures/feature_importance_panel.png` | PNG |
202
+ | `figures/pdp_*.png` | PNG |
203
+
204
+ #### 参照実験
205
+
206
+ - **Exp-05**: Tree-based + Permutation Importance(毒性予測)
207
+ - **Exp-12**: 6 モデルの特徴量重要度比較(エッチング)
208
+ - **Exp-13**: マルチターゲットパネル + パラメータ–物性マッピング表