npm - @nahisaho/satori - Versions diffs - 0.22.0 → 0.23.0 - Mend

@nahisaho/satori 0.22.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/src/.github/skills/scientific-ensemble-methods/SKILL.md ADDED Viewed

@@ -0,0 +1,263 @@
+---
+name: scientific-ensemble-methods
+description: |
+  アンサンブル学習スキル。Stacking/Blending 多段積層・
+  Boosting (XGBoost/LightGBM/CatBoost) 勾配ブースティング・
+  Bagging/Random Subspace・Voting 分類器/回帰器・
+  アンサンブル多様性評価・モデル統合パイプライン。
+---
+# Scientific Ensemble Methods
+複数モデルの組み合わせによる予測精度向上・安定化を実現する
+アンサンブル学習手法の設計・評価パイプラインを提供する。
+## When to Use
+- XGBoost/LightGBM/CatBoost で勾配ブースティングを実行するとき
+- Stacking/Blending で多段アンサンブルを構築するとき
+- 複数モデルの Voting/Averaging で安定予測を得るとき
+- アンサンブルの多様性を評価するとき
+- Out-of-Fold 予測でリーク防止を行うとき
+- モデルの寄与度を分析するとき
+---
+## Quick Start
+## 1. 勾配ブースティング比較
+```python
+import numpy as np
+import pandas as pd
+from sklearn.model_selection import cross_val_score
+def compare_boosting(X, y, cv=5, scoring="f1_macro",
+                     task="classification"):
+    """
+    XGBoost / LightGBM / CatBoost 比較。
+    Parameters:
+        X: np.ndarray — 特徴量
+        y: np.ndarray — ラベル
+        cv: int — CV 分割数
+        scoring: str — 評価指標
+        task: str — "classification" / "regression"
+    """
+    results = []
+    try:
+        from xgboost import XGBClassifier, XGBRegressor
+        model = (XGBClassifier(n_estimators=200, max_depth=6,
+                               learning_rate=0.1, random_state=42,
+                               use_label_encoder=False, eval_metric="logloss")
+                 if task == "classification"
+                 else XGBRegressor(n_estimators=200, max_depth=6,
+                                   learning_rate=0.1, random_state=42))
+        scores = cross_val_score(model, X, y, cv=cv, scoring=scoring)
+        results.append({"model": "XGBoost", "mean": scores.mean(),
+                        "std": scores.std()})
+    except ImportError:
+        pass
+    try:
+        from lightgbm import LGBMClassifier, LGBMRegressor
+        model = (LGBMClassifier(n_estimators=200, max_depth=6,
+                                learning_rate=0.1, random_state=42, verbose=-1)
+                 if task == "classification"
+                 else LGBMRegressor(n_estimators=200, max_depth=6,
+                                    learning_rate=0.1, random_state=42, verbose=-1))
+        scores = cross_val_score(model, X, y, cv=cv, scoring=scoring)
+        results.append({"model": "LightGBM", "mean": scores.mean(),
+                        "std": scores.std()})
+    except ImportError:
+        pass
+    try:
+        from catboost import CatBoostClassifier, CatBoostRegressor
+        model = (CatBoostClassifier(iterations=200, depth=6,
+                                    learning_rate=0.1, random_seed=42, verbose=0)
+                 if task == "classification"
+                 else CatBoostRegressor(iterations=200, depth=6,
+                                        learning_rate=0.1, random_seed=42, verbose=0))
+        scores = cross_val_score(model, X, y, cv=cv, scoring=scoring)
+        results.append({"model": "CatBoost", "mean": scores.mean(),
+                        "std": scores.std()})
+    except ImportError:
+        pass
+    df = pd.DataFrame(results).sort_values("mean", ascending=False)
+    if not df.empty:
+        print(f"Boosting: best = {df.iloc[0]['model']} "
+              f"({scoring} = {df.iloc[0]['mean']:.4f})")
+    return df
+```
+## 2. Stacking アンサンブル
+```python
+from sklearn.model_selection import StratifiedKFold
+from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
+from sklearn.svm import SVC
+def stacking_ensemble(X_train, y_train, X_test,
+                      base_models=None, meta_model=None,
+                      n_folds=5):
+    """
+    Stacking アンサンブル (Out-of-Fold 予測)。
+    Parameters:
+        X_train: np.ndarray — 学習データ
+        y_train: np.ndarray — 学習ラベル
+        X_test: np.ndarray — テストデータ
+        base_models: list | None — ベースモデル
+        meta_model: classifier | None — メタモデル
+        n_folds: int — CV 分割数
+    """
+    if base_models is None:
+        base_models = [
+            ("rf", RandomForestClassifier(n_estimators=200, random_state=42)),
+            ("gbm", GradientBoostingClassifier(n_estimators=200, random_state=42)),
+            ("svm", SVC(probability=True, random_state=42)),
+        ]
+    if meta_model is None:
+        meta_model = LogisticRegression(max_iter=1000, random_state=42)
+    kf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
+    n_classes = len(np.unique(y_train))
+    # Out-of-Fold predictions
+    oof_preds = np.zeros((len(y_train), len(base_models) * n_classes))
+    test_preds = np.zeros((len(X_test), len(base_models) * n_classes))
+    for i, (name, model) in enumerate(base_models):
+        col_start = i * n_classes
+        col_end = (i + 1) * n_classes
+        test_fold_preds = np.zeros((len(X_test), n_classes, n_folds))
+        for fold, (train_idx, val_idx) in enumerate(kf.split(X_train, y_train)):
+            m = model.__class__(**model.get_params()).fit(
+                X_train[train_idx], y_train[train_idx])
+            oof_preds[val_idx, col_start:col_end] = m.predict_proba(
+                X_train[val_idx])
+            test_fold_preds[:, :, fold] = m.predict_proba(X_test)
+        test_preds[:, col_start:col_end] = test_fold_preds.mean(axis=2)
+        print(f"  Stacking base: {name} done")
+    # Meta-model
+    meta_model.fit(oof_preds, y_train)
+    final_pred = meta_model.predict(test_preds)
+    final_proba = meta_model.predict_proba(test_preds)
+    print(f"Stacking: {len(base_models)} base models → meta-model")
+    return final_pred, final_proba, meta_model
+```
+## 3. Voting アンサンブル
+```python
+from sklearn.ensemble import VotingClassifier, VotingRegressor
+def voting_ensemble(X, y, models=None, voting="soft",
+                    cv=5, scoring="f1_macro"):
+    """
+    Voting アンサンブル。
+    Parameters:
+        X: np.ndarray — 特徴量
+        y: np.ndarray — ラベル
+        models: list | None — (name, model) ペア
+        voting: str — "soft" / "hard"
+        cv: int — CV 分割数
+        scoring: str — 評価指標
+    """
+    if models is None:
+        models = [
+            ("rf", RandomForestClassifier(n_estimators=200, random_state=42)),
+            ("gbm", GradientBoostingClassifier(n_estimators=200, random_state=42)),
+            ("lr", LogisticRegression(max_iter=1000, random_state=42)),
+        ]
+    # 個別モデル評価
+    results = []
+    for name, model in models:
+        scores = cross_val_score(model, X, y, cv=cv, scoring=scoring)
+        results.append({"model": name, "mean": scores.mean(), "std": scores.std()})
+    # Voting
+    vc = VotingClassifier(estimators=models, voting=voting)
+    scores = cross_val_score(vc, X, y, cv=cv, scoring=scoring)
+    results.append({"model": f"Voting({voting})",
+                    "mean": scores.mean(), "std": scores.std()})
+    df = pd.DataFrame(results).sort_values("mean", ascending=False)
+    print(f"Voting ensemble: {scoring} = {scores.mean():.4f} ± {scores.std():.4f}")
+    return df
+```
+## 4. アンサンブル多様性評価
+```python
+def ensemble_diversity(models, X, y):
+    """
+    アンサンブル多様性 (Q-statistic / Disagreement)。
+    Parameters:
+        models: list — 学習済みモデルリスト
+        X: np.ndarray — 評価データ
+        y: np.ndarray — 真ラベル
+    """
+    predictions = np.array([m.predict(X) for m in models])
+    n_models = len(models)
+    correct = (predictions == y).astype(int)
+    # 全ペアの Q-statistic
+    q_stats = []
+    disagree_rates = []
+    for i in range(n_models):
+        for j in range(i + 1, n_models):
+            n11 = np.sum((correct[i] == 1) & (correct[j] == 1))
+            n00 = np.sum((correct[i] == 0) & (correct[j] == 0))
+            n10 = np.sum((correct[i] == 1) & (correct[j] == 0))
+            n01 = np.sum((correct[i] == 0) & (correct[j] == 1))
+            denom = n11 * n00 - n10 * n01
+            numer = n11 * n00 + n10 * n01
+            q = denom / numer if numer != 0 else 0
+            q_stats.append(q)
+            disagree_rates.append((n10 + n01) / len(y))
+    result = {
+        "mean_q_statistic": round(np.mean(q_stats), 4),
+        "mean_disagreement": round(np.mean(disagree_rates), 4),
+        "n_models": n_models,
+    }
+    print(f"Diversity: Q={result['mean_q_statistic']:.3f}, "
+          f"Disagree={result['mean_disagreement']:.3f}")
+    return result
+```
+---
+## パイプライン統合
+```
+automl → ensemble-methods → uncertainty-quantification
+  (モデル選択)  (アンサンブル)     (不確実性定量化)
+       │            │                    ↓
+  feature-importance ┘         explainable-ai
+    (特徴量重要度)               (説明可能 AI)
+```
+## パイプライン出力
+| ファイル | 説明 | 次スキル |
+|---------|------|---------|
+| `stacking_meta.pkl` | Stacking メタモデル | → 予測 |
+| `boosting_comparison.csv` | ブースティング比較 | → レポート |
+| `ensemble_diversity.json` | 多様性指標 | → モデル改善 |

package/src/.github/skills/scientific-interactive-dashboard/SKILL.md ADDED Viewed

@@ -0,0 +1,346 @@
+---
+name: scientific-interactive-dashboard
+description: |
+  インタラクティブダッシュボードスキル。
+  Streamlit / Dash / Panel / Voilà による
+  科学データダッシュボード構築・リアルタイムパラメータ探索 UI ・
+  ウィジェット連動・データアップロード・解析パイプライン UI 化。
+---
+# Scientific Interactive Dashboard
+科学データのインタラクティブダッシュボードを構築し、
+パラメータ探索・結果共有・リアルタイム解析を実現する。
+## When to Use
+- Streamlit で迅速にデータ探索ダッシュボードを構築するとき
+- Dash でカスタマイズ性の高い解析 UI を作成するとき
+- Panel / Voilà で Jupyter ノートブックをダッシュボード化するとき
+- パラメータスライダー + リアルタイム更新の UI を実装するとき
+- 複数人で解析結果を共有するとき
+- 非プログラマーに解析ツールを提供するとき
+---
+## Quick Start
+## 1. Streamlit 科学データダッシュボード
+```python
+def generate_streamlit_dashboard(output_path="dashboard_app.py"):
+    """
+    Streamlit ダッシュボードテンプレート生成。
+    Parameters:
+        output_path: str — 出力 Python ファイル
+    """
+    code = '''
+import streamlit as st
+import pandas as pd
+import numpy as np
+import plotly.express as px
+st.set_page_config(page_title="Scientific Data Dashboard",
+                   layout="wide", page_icon="🔬")
+st.title("🔬 Scientific Data Dashboard")
+# --- サイドバー: データアップロード & パラメータ ---
+st.sidebar.header("Settings")
+uploaded_file = st.sidebar.file_uploader(
+    "Upload CSV / Excel", type=["csv", "xlsx"])
+if uploaded_file is not None:
+    if uploaded_file.name.endswith(".csv"):
+        df = pd.read_csv(uploaded_file)
+    else:
+        df = pd.read_excel(uploaded_file)
+else:
+    # デモデータ
+    np.random.seed(42)
+    n = 500
+    df = pd.DataFrame({
+        "x": np.random.randn(n),
+        "y": np.random.randn(n),
+        "z": np.random.randn(n),
+        "category": np.random.choice(["A", "B", "C"], n),
+        "value": np.random.exponential(2, n)
+    })
+    st.sidebar.info("Demo data loaded (upload your own CSV)")
+# --- データ概要 ---
+col1, col2, col3 = st.columns(3)
+col1.metric("Rows", len(df))
+col2.metric("Columns", len(df.columns))
+col3.metric("Missing", int(df.isnull().sum().sum()))
+# --- タブ ---
+tab1, tab2, tab3, tab4 = st.tabs(
+    ["📊 Explorer", "📈 Distribution", "🔗 Correlation", "📋 Data"])
+numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
+cat_cols = df.select_dtypes(exclude=[np.number]).columns.tolist()
+with tab1:
+    st.subheader("Interactive Explorer")
+    c1, c2 = st.columns(2)
+    x_col = c1.selectbox("X axis", numeric_cols, index=0)
+    y_col = c2.selectbox("Y axis", numeric_cols,
+                          index=min(1, len(numeric_cols)-1))
+    color_col = st.selectbox("Color", [None] + cat_cols + numeric_cols)
+    fig = px.scatter(df, x=x_col, y=y_col, color=color_col,
+                     opacity=0.7, title=f"{x_col} vs {y_col}")
+    st.plotly_chart(fig, use_container_width=True)
+with tab2:
+    st.subheader("Distribution Analysis")
+    dist_col = st.selectbox("Column", numeric_cols, key="dist")
+    n_bins = st.slider("Bins", 10, 100, 30)
+    fig2 = px.histogram(df, x=dist_col, nbins=n_bins,
+                        marginal="box", title=f"Distribution: {dist_col}")
+    st.plotly_chart(fig2, use_container_width=True)
+with tab3:
+    st.subheader("Correlation Matrix")
+    corr = df[numeric_cols].corr()
+    fig3 = px.imshow(corr, text_auto=".2f", color_continuous_scale="RdBu_r",
+                     title="Correlation Heatmap")
+    st.plotly_chart(fig3, use_container_width=True)
+with tab4:
+    st.subheader("Raw Data")
+    st.dataframe(df, use_container_width=True)
+    csv = df.to_csv(index=False)
+    st.download_button("Download CSV", csv, "data.csv", "text/csv")
+'''
+    with open(output_path, "w") as f:
+        f.write(code)
+    print(f"Streamlit dashboard → {output_path}")
+    print(f"  Run: streamlit run {output_path}")
+    return output_path
+```
+## 2. Dash コールバックダッシュボード
+```python
+def generate_dash_dashboard(output_path="dash_app.py"):
+    """
+    Dash ダッシュボードテンプレート生成。
+    Parameters:
+        output_path: str — 出力 Python ファイル
+    """
+    code = '''
+from dash import Dash, html, dcc, Input, Output, dash_table
+import pandas as pd
+import numpy as np
+import plotly.express as px
+app = Dash(__name__)
+# デモデータ
+np.random.seed(42)
+n = 500
+df = pd.DataFrame({
+    "x": np.random.randn(n),
+    "y": np.random.randn(n),
+    "z": np.random.randn(n),
+    "group": np.random.choice(["Control", "Treatment A", "Treatment B"], n),
+    "response": np.random.exponential(2, n)
+})
+numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
+app.layout = html.Div([
+    html.H1("Scientific Data Dashboard", style={"textAlign": "center"}),
+    html.Div([
+        html.Div([
+            html.Label("X Axis"),
+            dcc.Dropdown(id="x-col", options=numeric_cols,
+                         value=numeric_cols[0])
+        ], style={"width": "30%", "display": "inline-block"}),
+        html.Div([
+            html.Label("Y Axis"),
+            dcc.Dropdown(id="y-col", options=numeric_cols,
+                         value=numeric_cols[1])
+        ], style={"width": "30%", "display": "inline-block"}),
+        html.Div([
+            html.Label("Color"),
+            dcc.Dropdown(id="color-col",
+                         options=df.columns.tolist(),
+                         value="group")
+        ], style={"width": "30%", "display": "inline-block"}),
+    ], style={"padding": "20px"}),
+    html.Div([
+        html.Div([dcc.Graph(id="scatter-plot")],
+                 style={"width": "50%", "display": "inline-block"}),
+        html.Div([dcc.Graph(id="histogram")],
+                 style={"width": "50%", "display": "inline-block"}),
+    ]),
+    html.Div([
+        html.H3("Summary Statistics"),
+        dash_table.DataTable(
+            id="summary-table",
+            columns=[{"name": c, "id": c}
+                     for c in ["stat"] + numeric_cols],
+            style_table={"overflowX": "auto"})
+    ], style={"padding": "20px"})
+])
+@app.callback(
+    [Output("scatter-plot", "figure"),
+     Output("histogram", "figure"),
+     Output("summary-table", "data")],
+    [Input("x-col", "value"),
+     Input("y-col", "value"),
+     Input("color-col", "value")]
+)
+def update_plots(x_col, y_col, color_col):
+    fig1 = px.scatter(df, x=x_col, y=y_col, color=color_col,
+                      opacity=0.7, title=f"{x_col} vs {y_col}")
+    fig2 = px.histogram(df, x=x_col, color=color_col,
+                        marginal="box", barmode="overlay", opacity=0.7)
+    stats = df[numeric_cols].describe().reset_index()
+    stats.columns = ["stat"] + numeric_cols
+    return fig1, fig2, stats.to_dict("records")
+if __name__ == "__main__":
+    app.run(debug=True, port=8050)
+'''
+    with open(output_path, "w") as f:
+        f.write(code)
+    print(f"Dash dashboard → {output_path}")
+    print(f"  Run: python {output_path}")
+    return output_path
+```
+## 3. Panel ダッシュボード
+```python
+def generate_panel_dashboard(output_path="panel_app.py"):
+    """
+    Panel ダッシュボードテンプレート生成。
+    Parameters:
+        output_path: str — 出力 Python ファイル
+    """
+    code = '''
+import panel as pn
+import pandas as pd
+import numpy as np
+import plotly.express as px
+pn.extension("plotly")
+# デモデータ
+np.random.seed(42)
+n = 500
+df = pd.DataFrame({
+    "x": np.random.randn(n),
+    "y": np.random.randn(n),
+    "z": np.random.randn(n),
+    "group": np.random.choice(["A", "B", "C"], n),
+    "value": np.random.exponential(2, n)
+})
+numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
+# ウィジェット
+x_select = pn.widgets.Select(name="X Axis", options=numeric_cols, value="x")
+y_select = pn.widgets.Select(name="Y Axis", options=numeric_cols, value="y")
+n_bins = pn.widgets.IntSlider(name="Histogram Bins", start=10, end=100, value=30)
+@pn.depends(x_select, y_select)
+def scatter_plot(x_col, y_col):
+    fig = px.scatter(df, x=x_col, y=y_col, color="group",
+                     opacity=0.7, title=f"{x_col} vs {y_col}")
+    return fig
+@pn.depends(x_select, n_bins)
+def hist_plot(x_col, bins):
+    fig = px.histogram(df, x=x_col, nbins=bins, color="group",
+                       barmode="overlay", opacity=0.7)
+    return fig
+dashboard = pn.template.FastListTemplate(
+    title="Scientific Data Dashboard",
+    sidebar=[x_select, y_select, n_bins],
+    main=[
+        pn.Row(pn.pane.Plotly(scatter_plot, sizing_mode="stretch_width"),
+               pn.pane.Plotly(hist_plot, sizing_mode="stretch_width")),
+        pn.pane.DataFrame(df.describe().T, sizing_mode="stretch_width")
+    ]
+)
+dashboard.servable()
+'''
+    with open(output_path, "w") as f:
+        f.write(code)
+    print(f"Panel dashboard → {output_path}")
+    print(f"  Run: panel serve {output_path}")
+    return output_path
+```
+## 4. ダッシュボード比較ガイド
+```python
+def compare_dashboard_frameworks():
+    """
+    Streamlit / Dash / Panel / Voilà 比較表を出力。
+    """
+    comparison = pd.DataFrame({
+        "Framework": ["Streamlit", "Dash", "Panel", "Voilà"],
+        "Ease_of_Use": ["★★★★★", "★★★☆☆", "★★★★☆", "★★★★★"],
+        "Customization": ["★★★☆☆", "★★★★★", "★★★★☆", "★★☆☆☆"],
+        "Interactivity": ["★★★★☆", "★★★★★", "★★★★★", "★★★☆☆"],
+        "Performance": ["★★★☆☆", "★★★★★", "★★★★☆", "★★★☆☆"],
+        "Deployment": ["Streamlit Cloud", "Heroku/AWS", "Any ASGI", "Binder/Hub"],
+        "Best_For": [
+            "Rapid prototyping, data exploration",
+            "Production apps, complex callbacks",
+            "Jupyter integration, scientific viz",
+            "Notebook → dashboard conversion"
+        ]
+    })
+    print("=== Dashboard Framework Comparison ===")
+    print(comparison.to_string(index=False))
+    return comparison
+```
+---
+## パイプライン統合
+```
+advanced-visualization → interactive-dashboard → presentation-design
+    (高度可視化)           (ダッシュボード)          (プレゼン)
+         │                       │                      ↓
+  missing-data-analysis ────────┘            scientific-schematics
+    (欠損値解析)                                (図式デザイン)
+```
+## パイプライン出力
+| ファイル | 説明 | 次スキル |
+|---------|------|---------|
+| `dashboard_app.py` | Streamlit ダッシュボード | → deployment |
+| `dash_app.py` | Dash ダッシュボード | → deployment |
+| `panel_app.py` | Panel ダッシュボード | → deployment |
+| `framework_comparison.csv` | フレームワーク比較 | → 選択指針 |