@nahisaho/satori 0.21.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. package/README.md +67 -39
  2. package/package.json +1 -1
  3. package/src/.github/skills/scientific-active-learning/SKILL.md +289 -0
  4. package/src/.github/skills/scientific-advanced-visualization/SKILL.md +310 -0
  5. package/src/.github/skills/scientific-automl/SKILL.md +264 -0
  6. package/src/.github/skills/scientific-clinical-pharmacology/SKILL.md +361 -0
  7. package/src/.github/skills/scientific-clinical-standards/SKILL.md +444 -0
  8. package/src/.github/skills/scientific-crispr-design/SKILL.md +369 -0
  9. package/src/.github/skills/scientific-ensemble-methods/SKILL.md +263 -0
  10. package/src/.github/skills/scientific-environmental-ecology/SKILL.md +5 -0
  11. package/src/.github/skills/scientific-epidemiology-public-health/SKILL.md +5 -0
  12. package/src/.github/skills/scientific-epigenomics-chromatin/SKILL.md +5 -0
  13. package/src/.github/skills/scientific-glycomics/SKILL.md +274 -0
  14. package/src/.github/skills/scientific-immunoinformatics/SKILL.md +9 -0
  15. package/src/.github/skills/scientific-interactive-dashboard/SKILL.md +346 -0
  16. package/src/.github/skills/scientific-lipidomics/SKILL.md +284 -0
  17. package/src/.github/skills/scientific-metabolomics/SKILL.md +3 -0
  18. package/src/.github/skills/scientific-metagenome-assembled-genomes/SKILL.md +299 -0
  19. package/src/.github/skills/scientific-missing-data-analysis/SKILL.md +312 -0
  20. package/src/.github/skills/scientific-model-organism-db/SKILL.md +8 -0
  21. package/src/.github/skills/scientific-public-health-data/SKILL.md +11 -0
  22. package/src/.github/skills/scientific-systems-biology/SKILL.md +11 -0
  23. package/src/.github/skills/scientific-transfer-learning/SKILL.md +298 -0
  24. package/src/.github/skills/scientific-uncertainty-quantification/SKILL.md +286 -0
@@ -0,0 +1,310 @@
1
+ ---
2
+ name: scientific-advanced-visualization
3
+ description: |
4
+ 科学データ高度可視化スキル。Plotly インタラクティブ 3D ・
5
+ Altair 宣言的可視化・Seaborn 統計プロット・
6
+ アニメーション・Parallel Coordinates・出版品質図。
7
+ ---
8
+
9
+ # Scientific Advanced Visualization
10
+
11
+ 科学データのインタラクティブ可視化・3D レンダリング・
12
+ 出版品質図・アニメーションを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - インタラクティブな 3D 散布図・サーフェスプロットを描くとき
17
+ - Plotly / Altair で動的可視化を作成するとき
18
+ - 多変量データを Parallel Coordinates / Radar で可視化するとき
19
+ - 論文投稿用の出版品質 (Nature/Science style) 図を作成するとき
20
+ - 時系列・シミュレーション結果のアニメーションを作成するとき
21
+ - 複数パネルの複合図を作成するとき
22
+
23
+ ---
24
+
25
+ ## Quick Start
26
+
27
+ ## 1. Plotly インタラクティブ 3D
28
+
29
+ ```python
30
+ import numpy as np
31
+ import pandas as pd
32
+
33
+
34
+ def plotly_3d_scatter(df, x, y, z, color=None, size=None,
35
+ title="3D Scatter Plot"):
36
+ """
37
+ Plotly 3D 散布図。
38
+
39
+ Parameters:
40
+ df: pd.DataFrame — データ
41
+ x, y, z: str — 軸カラム名
42
+ color: str | None — 色分けカラム
43
+ size: str | None — サイズカラム
44
+ title: str — タイトル
45
+ """
46
+ import plotly.express as px
47
+
48
+ fig = px.scatter_3d(df, x=x, y=y, z=z, color=color, size=size,
49
+ title=title, opacity=0.7)
50
+ fig.update_layout(
51
+ scene=dict(
52
+ xaxis_title=x, yaxis_title=y, zaxis_title=z),
53
+ width=900, height=700)
54
+
55
+ path = "3d_scatter.html"
56
+ fig.write_html(path)
57
+ print(f"3D Scatter: {len(df)} points → {path}")
58
+ return fig
59
+
60
+
61
+ def plotly_surface(X_grid, Y_grid, Z_grid, title="Surface Plot"):
62
+ """
63
+ Plotly 3D サーフェスプロット。
64
+
65
+ Parameters:
66
+ X_grid, Y_grid, Z_grid: np.ndarray — メッシュグリッド
67
+ title: str — タイトル
68
+ """
69
+ import plotly.graph_objects as go
70
+
71
+ fig = go.Figure(data=[go.Surface(x=X_grid, y=Y_grid, z=Z_grid,
72
+ colorscale="Viridis")])
73
+ fig.update_layout(
74
+ title=title,
75
+ scene=dict(xaxis_title="X", yaxis_title="Y", zaxis_title="Z"),
76
+ width=900, height=700)
77
+
78
+ path = "surface_plot.html"
79
+ fig.write_html(path)
80
+ print(f"Surface: {Z_grid.shape} grid → {path}")
81
+ return fig
82
+ ```
83
+
84
+ ## 2. Altair 宣言的可視化
85
+
86
+ ```python
87
+ def altair_faceted_chart(df, x, y, color, facet_col=None,
88
+ chart_type="scatter"):
89
+ """
90
+ Altair 宣言的ファセット付きチャート。
91
+
92
+ Parameters:
93
+ df: pd.DataFrame — データ
94
+ x, y: str — 軸カラム
95
+ color: str — 色分けカラム
96
+ facet_col: str | None — ファセットカラム
97
+ chart_type: str — "scatter" / "line" / "bar" / "box"
98
+ """
99
+ import altair as alt
100
+
101
+ base = alt.Chart(df).encode(
102
+ x=alt.X(x, scale=alt.Scale(zero=False)),
103
+ y=alt.Y(y, scale=alt.Scale(zero=False)),
104
+ color=color)
105
+
106
+ if chart_type == "scatter":
107
+ chart = base.mark_circle(size=60, opacity=0.7)
108
+ elif chart_type == "line":
109
+ chart = base.mark_line()
110
+ elif chart_type == "bar":
111
+ chart = base.mark_bar()
112
+ elif chart_type == "box":
113
+ chart = base.mark_boxplot()
114
+ else:
115
+ chart = base.mark_circle()
116
+
117
+ if facet_col:
118
+ chart = chart.facet(facet_col, columns=3)
119
+
120
+ chart = chart.properties(width=300, height=250).interactive()
121
+
122
+ path = "altair_chart.html"
123
+ chart.save(path)
124
+ print(f"Altair {chart_type}: {len(df)} rows → {path}")
125
+ return chart
126
+ ```
127
+
128
+ ## 3. 多変量可視化
129
+
130
+ ```python
131
+ def parallel_coordinates_plot(df, class_col, features=None,
132
+ title="Parallel Coordinates"):
133
+ """
134
+ Parallel Coordinates プロット。
135
+
136
+ Parameters:
137
+ df: pd.DataFrame — データ
138
+ class_col: str — 分類カラム
139
+ features: list[str] | None — 表示特徴量 (None で全数値)
140
+ title: str — タイトル
141
+ """
142
+ import plotly.express as px
143
+
144
+ if features is None:
145
+ features = df.select_dtypes(include=[np.number]).columns.tolist()
146
+ if class_col in features:
147
+ features.remove(class_col)
148
+
149
+ fig = px.parallel_coordinates(
150
+ df, color=class_col, dimensions=features,
151
+ title=title, color_continuous_scale=px.colors.diverging.Tealrose)
152
+
153
+ fig.update_layout(width=1000, height=500)
154
+
155
+ path = "parallel_coordinates.html"
156
+ fig.write_html(path)
157
+ print(f"Parallel Coordinates: {len(features)} dims → {path}")
158
+ return fig
159
+
160
+
161
+ def radar_chart(categories, values_dict, title="Radar Chart"):
162
+ """
163
+ Radar (Spider) チャート — 複数グループ比較。
164
+
165
+ Parameters:
166
+ categories: list[str] — 軸ラベル
167
+ values_dict: dict[str, list[float]] — {グループ名: 値リスト}
168
+ title: str — タイトル
169
+ """
170
+ import plotly.graph_objects as go
171
+
172
+ fig = go.Figure()
173
+
174
+ for name, vals in values_dict.items():
175
+ fig.add_trace(go.Scatterpolar(
176
+ r=vals + [vals[0]],
177
+ theta=categories + [categories[0]],
178
+ fill="toself", name=name, opacity=0.6))
179
+
180
+ fig.update_layout(
181
+ polar=dict(radialaxis=dict(visible=True)),
182
+ title=title, width=600, height=500)
183
+
184
+ path = "radar_chart.html"
185
+ fig.write_html(path)
186
+ print(f"Radar: {len(values_dict)} groups × {len(categories)} axes → {path}")
187
+ return fig
188
+ ```
189
+
190
+ ## 4. 出版品質図 (Nature/Science style)
191
+
192
+ ```python
193
+ def publication_figure(plot_func, figsize=(3.5, 2.8),
194
+ dpi=300, style="nature",
195
+ output="publication_fig.pdf"):
196
+ """
197
+ 出版品質 (Nature/Science style) 図生成。
198
+
199
+ Parameters:
200
+ plot_func: callable — matplotlib 描画関数 (ax を引数に取る)
201
+ figsize: tuple — 図サイズ (インチ, Nature 1 col = 3.5in)
202
+ dpi: int — 解像度
203
+ style: str — "nature" / "science" / "acs"
204
+ output: str — 出力パス (.pdf / .svg / .png)
205
+ """
206
+ import matplotlib.pyplot as plt
207
+ import matplotlib as mpl
208
+
209
+ # Nature/Science スタイル設定
210
+ style_params = {
211
+ "nature": {
212
+ "font.family": "Arial",
213
+ "font.size": 7,
214
+ "axes.linewidth": 0.5,
215
+ "xtick.major.width": 0.5,
216
+ "ytick.major.width": 0.5,
217
+ "lines.linewidth": 1.0,
218
+ "lines.markersize": 3,
219
+ },
220
+ "science": {
221
+ "font.family": "Helvetica",
222
+ "font.size": 8,
223
+ "axes.linewidth": 0.6,
224
+ "xtick.major.width": 0.6,
225
+ "ytick.major.width": 0.6,
226
+ "lines.linewidth": 1.2,
227
+ "lines.markersize": 4,
228
+ },
229
+ "acs": {
230
+ "font.family": "Arial",
231
+ "font.size": 9,
232
+ "axes.linewidth": 0.5,
233
+ "xtick.major.width": 0.5,
234
+ "ytick.major.width": 0.5,
235
+ "lines.linewidth": 1.0,
236
+ "lines.markersize": 4,
237
+ }
238
+ }
239
+
240
+ with mpl.rc_context(style_params.get(style, style_params["nature"])):
241
+ fig, ax = plt.subplots(figsize=figsize)
242
+ plot_func(ax)
243
+ ax.spines["top"].set_visible(False)
244
+ ax.spines["right"].set_visible(False)
245
+ plt.tight_layout()
246
+ fig.savefig(output, dpi=dpi, bbox_inches="tight")
247
+ plt.close()
248
+
249
+ print(f"Publication figure ({style}): {figsize} @ {dpi}dpi → {output}")
250
+ return output
251
+ ```
252
+
253
+ ## 5. アニメーション
254
+
255
+ ```python
256
+ def create_animation(data_frames, x_col, y_col, time_col,
257
+ title="Animation", fps=10):
258
+ """
259
+ Plotly アニメーション。
260
+
261
+ Parameters:
262
+ data_frames: pd.DataFrame — 時間列を含むデータ
263
+ x_col, y_col: str — 軸カラム
264
+ time_col: str — 時間 / フレームカラム
265
+ title: str — タイトル
266
+ fps: int — フレームレート
267
+ """
268
+ import plotly.express as px
269
+
270
+ fig = px.scatter(data_frames, x=x_col, y=y_col,
271
+ animation_frame=time_col,
272
+ title=title, opacity=0.7,
273
+ range_x=[data_frames[x_col].min() * 0.9,
274
+ data_frames[x_col].max() * 1.1],
275
+ range_y=[data_frames[y_col].min() * 0.9,
276
+ data_frames[y_col].max() * 1.1])
277
+
278
+ fig.update_layout(
279
+ width=800, height=600,
280
+ updatemenus=[dict(type="buttons",
281
+ buttons=[dict(label="▶ Play",
282
+ method="animate",
283
+ args=[None, {"frame": {"duration": 1000 // fps}}])])])
284
+
285
+ path = "animation.html"
286
+ fig.write_html(path)
287
+ print(f"Animation: {data_frames[time_col].nunique()} frames @ {fps}fps → {path}")
288
+ return fig
289
+ ```
290
+
291
+ ---
292
+
293
+ ## パイプライン統合
294
+
295
+ ```
296
+ eda-correlation → advanced-visualization → presentation-design
297
+ (探索的解析) (高度可視化) (プレゼンテーション)
298
+ │ │ ↓
299
+ pca-tsne ───────────────┘ interactive-dashboard
300
+ (次元削減) (ダッシュボード)
301
+ ```
302
+
303
+ ## パイプライン出力
304
+
305
+ | ファイル | 説明 | 次スキル |
306
+ |---------|------|---------|
307
+ | `3d_scatter.html` | インタラクティブ 3D 散布図 | → dashboard |
308
+ | `publication_fig.pdf` | 出版品質図 | → presentation |
309
+ | `parallel_coordinates.html` | 多変量可視化 | → reporting |
310
+ | `animation.html` | アニメーション | → presentation |
@@ -0,0 +1,264 @@
1
+ ---
2
+ name: scientific-automl
3
+ description: |
4
+ AutoML パイプラインスキル。Optuna ハイパーパラメータ最適化・
5
+ FLAML 高速 AutoML・Auto-sklearn モデル選択・
6
+ NAS (Neural Architecture Search)・
7
+ 特徴量エンジニアリング自動化・モデル比較パイプライン。
8
+ ---
9
+
10
+ # Scientific AutoML
11
+
12
+ ハイパーパラメータ最適化・モデル選択・特徴量エンジニアリングを
13
+ 自動化する AutoML パイプラインを提供する。
14
+
15
+ ## When to Use
16
+
17
+ - Optuna/Hyperopt でハイパーパラメータを最適化するとき
18
+ - 複数モデルの自動比較・選択を行うとき
19
+ - FLAML/Auto-sklearn で高速な AutoML を実行するとき
20
+ - 特徴量エンジニアリングを自動化するとき
21
+ - Neural Architecture Search (NAS) を設計するとき
22
+ - モデル選択根拠のレポートを生成するとき
23
+
24
+ ---
25
+
26
+ ## Quick Start
27
+
28
+ ## 1. Optuna ハイパーパラメータ最適化
29
+
30
+ ```python
31
+ import optuna
32
+ import numpy as np
33
+ import pandas as pd
34
+ from sklearn.model_selection import cross_val_score
35
+ from sklearn.ensemble import (
36
+ RandomForestClassifier, GradientBoostingClassifier)
37
+ from sklearn.svm import SVC
38
+ from sklearn.metrics import make_scorer, f1_score
39
+
40
+
41
+ def optuna_optimize(X, y, model_type="rf", n_trials=100,
42
+ cv=5, scoring="f1_macro", direction="maximize"):
43
+ """
44
+ Optuna ベース ハイパーパラメータ最適化。
45
+
46
+ Parameters:
47
+ X: np.ndarray — 特徴量
48
+ y: np.ndarray — ラベル
49
+ model_type: str — "rf" / "gbm" / "svm"
50
+ n_trials: int — 試行回数
51
+ cv: int — CV 分割数
52
+ scoring: str — 評価指標
53
+ direction: str — "maximize" / "minimize"
54
+ """
55
+ def objective(trial):
56
+ if model_type == "rf":
57
+ params = {
58
+ "n_estimators": trial.suggest_int("n_estimators", 50, 500),
59
+ "max_depth": trial.suggest_int("max_depth", 3, 20),
60
+ "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
61
+ "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
62
+ "max_features": trial.suggest_categorical(
63
+ "max_features", ["sqrt", "log2", None]),
64
+ }
65
+ model = RandomForestClassifier(**params, random_state=42)
66
+
67
+ elif model_type == "gbm":
68
+ params = {
69
+ "n_estimators": trial.suggest_int("n_estimators", 50, 500),
70
+ "max_depth": trial.suggest_int("max_depth", 3, 10),
71
+ "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
72
+ "subsample": trial.suggest_float("subsample", 0.5, 1.0),
73
+ "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
74
+ }
75
+ model = GradientBoostingClassifier(**params, random_state=42)
76
+
77
+ elif model_type == "svm":
78
+ params = {
79
+ "C": trial.suggest_float("C", 0.01, 100, log=True),
80
+ "kernel": trial.suggest_categorical(
81
+ "kernel", ["rbf", "poly", "sigmoid"]),
82
+ "gamma": trial.suggest_categorical("gamma", ["scale", "auto"]),
83
+ }
84
+ model = SVC(**params, probability=True, random_state=42)
85
+
86
+ scores = cross_val_score(model, X, y, cv=cv, scoring=scoring)
87
+ return scores.mean()
88
+
89
+ optuna.logging.set_verbosity(optuna.logging.WARNING)
90
+ study = optuna.create_study(direction=direction)
91
+ study.optimize(objective, n_trials=n_trials, show_progress_bar=True)
92
+
93
+ print(f"Optuna ({model_type}): best {scoring} = {study.best_value:.4f}")
94
+ print(f" Best params: {study.best_params}")
95
+ return study
96
+ ```
97
+
98
+ ## 2. マルチモデル AutoML パイプライン
99
+
100
+ ```python
101
+ from sklearn.linear_model import LogisticRegression
102
+ from sklearn.neighbors import KNeighborsClassifier
103
+ from sklearn.tree import DecisionTreeClassifier
104
+ from sklearn.neural_network import MLPClassifier
105
+
106
+
107
+ def automl_model_selection(X, y, cv=5, scoring="f1_macro",
108
+ n_trials_per_model=50):
109
+ """
110
+ AutoML マルチモデル選択パイプライン。
111
+
112
+ Parameters:
113
+ X: np.ndarray — 特徴量
114
+ y: np.ndarray — ラベル
115
+ cv: int — CV 分割数
116
+ scoring: str — 評価指標
117
+ n_trials_per_model: int — モデルあたり試行数
118
+ """
119
+ model_types = ["rf", "gbm", "svm"]
120
+ results = []
121
+
122
+ for mt in model_types:
123
+ study = optuna_optimize(
124
+ X, y, model_type=mt,
125
+ n_trials=n_trials_per_model, cv=cv, scoring=scoring)
126
+ results.append({
127
+ "model_type": mt,
128
+ "best_score": round(study.best_value, 4),
129
+ "best_params": study.best_params,
130
+ "n_trials": len(study.trials),
131
+ })
132
+
133
+ # 簡易モデル (ベースライン)
134
+ baselines = [
135
+ ("logistic", LogisticRegression(max_iter=1000, random_state=42)),
136
+ ("knn", KNeighborsClassifier()),
137
+ ("dt", DecisionTreeClassifier(random_state=42)),
138
+ ]
139
+ for name, model in baselines:
140
+ scores = cross_val_score(model, X, y, cv=cv, scoring=scoring)
141
+ results.append({
142
+ "model_type": name,
143
+ "best_score": round(scores.mean(), 4),
144
+ "best_params": {},
145
+ "n_trials": 1,
146
+ })
147
+
148
+ df = pd.DataFrame(results).sort_values("best_score", ascending=False)
149
+ best = df.iloc[0]
150
+ print(f"AutoML: best = {best['model_type']} "
151
+ f"({scoring} = {best['best_score']})")
152
+ return df
153
+ ```
154
+
155
+ ## 3. 自動特徴量エンジニアリング
156
+
157
+ ```python
158
+ from sklearn.preprocessing import PolynomialFeatures, StandardScaler
159
+ from sklearn.feature_selection import SelectKBest, mutual_info_classif
160
+
161
+
162
+ def auto_feature_engineering(X, y, max_poly_degree=2,
163
+ top_k=None, interactions_only=False):
164
+ """
165
+ 自動特徴量エンジニアリング。
166
+
167
+ Parameters:
168
+ X: np.ndarray — 元特徴量
169
+ y: np.ndarray — ラベル
170
+ max_poly_degree: int — 多項式次数
171
+ top_k: int | None — 選択する特徴量数
172
+ interactions_only: bool — 交互作用のみ
173
+ """
174
+ scaler = StandardScaler()
175
+ X_scaled = scaler.fit_transform(X)
176
+
177
+ # 多項式特徴量
178
+ poly = PolynomialFeatures(
179
+ degree=max_poly_degree,
180
+ interaction_only=interactions_only,
181
+ include_bias=False)
182
+ X_poly = poly.fit_transform(X_scaled)
183
+
184
+ # 特徴量選択
185
+ if top_k is None:
186
+ top_k = min(X_poly.shape[1], X.shape[1] * 3)
187
+
188
+ selector = SelectKBest(mutual_info_classif, k=min(top_k, X_poly.shape[1]))
189
+ X_selected = selector.fit_transform(X_poly, y)
190
+
191
+ print(f"Feature engineering: {X.shape[1]} → {X_poly.shape[1]} "
192
+ f"→ {X_selected.shape[1]} features")
193
+ return X_selected, poly, selector
194
+ ```
195
+
196
+ ## 4. Optuna 可視化レポート
197
+
198
+ ```python
199
+ def automl_report(study, output_dir="results"):
200
+ """
201
+ Optuna Study 可視化レポート。
202
+
203
+ Parameters:
204
+ study: optuna.Study — 最適化結果
205
+ output_dir: str — 出力ディレクトリ
206
+ """
207
+ from pathlib import Path
208
+ import matplotlib.pyplot as plt
209
+
210
+ out = Path(output_dir)
211
+ out.mkdir(parents=True, exist_ok=True)
212
+
213
+ # パラメータ重要度
214
+ try:
215
+ importances = optuna.importance.get_param_importances(study)
216
+ fig, ax = plt.subplots(figsize=(8, 5))
217
+ params = list(importances.keys())
218
+ values = list(importances.values())
219
+ ax.barh(params, values)
220
+ ax.set_xlabel("Importance")
221
+ ax.set_title("Hyperparameter Importance")
222
+ fig.tight_layout()
223
+ fig.savefig(out / "param_importance.png", dpi=150)
224
+ plt.close(fig)
225
+ except Exception:
226
+ pass
227
+
228
+ # 最適化履歴
229
+ trials_df = study.trials_dataframe()
230
+ trials_df.to_csv(out / "optuna_trials.csv", index=False)
231
+
232
+ # ベストパラメータ
233
+ best = {
234
+ "best_value": study.best_value,
235
+ "best_params": study.best_params,
236
+ "n_trials": len(study.trials),
237
+ }
238
+
239
+ print(f"AutoML report → {out}")
240
+ return best
241
+ ```
242
+
243
+ ---
244
+
245
+ ## パイプライン統合
246
+
247
+ ```
248
+ eda-correlation → automl → ensemble-methods
249
+ (データ探索) (モデル選択) (アンサンブル)
250
+ │ │ ↓
251
+ feature-importance ──┘ uncertainty-quantification
252
+ (特徴量解釈) (不確実性定量化)
253
+
254
+ active-learning
255
+ (能動学習)
256
+ ```
257
+
258
+ ## パイプライン出力
259
+
260
+ | ファイル | 説明 | 次スキル |
261
+ |---------|------|---------|
262
+ | `optuna_trials.csv` | 試行履歴 | → 可視化 |
263
+ | `param_importance.png` | パラメータ重要度 | → レポート |
264
+ | `model_comparison.csv` | モデル比較 | → ensemble-methods |