@nahisaho/satori 0.22.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -20
- package/package.json +1 -1
- package/src/.github/skills/scientific-active-learning/SKILL.md +289 -0
- package/src/.github/skills/scientific-advanced-visualization/SKILL.md +310 -0
- package/src/.github/skills/scientific-anomaly-detection/SKILL.md +296 -0
- package/src/.github/skills/scientific-automl/SKILL.md +264 -0
- package/src/.github/skills/scientific-causal-ml/SKILL.md +240 -0
- package/src/.github/skills/scientific-data-profiling/SKILL.md +247 -0
- package/src/.github/skills/scientific-ensemble-methods/SKILL.md +263 -0
- package/src/.github/skills/scientific-geospatial-analysis/SKILL.md +274 -0
- package/src/.github/skills/scientific-interactive-dashboard/SKILL.md +346 -0
- package/src/.github/skills/scientific-missing-data-analysis/SKILL.md +312 -0
- package/src/.github/skills/scientific-model-monitoring/SKILL.md +247 -0
- package/src/.github/skills/scientific-network-visualization/SKILL.md +278 -0
- package/src/.github/skills/scientific-reproducible-reporting/SKILL.md +330 -0
- package/src/.github/skills/scientific-time-series-forecasting/SKILL.md +246 -0
- package/src/.github/skills/scientific-transfer-learning/SKILL.md +298 -0
- package/src/.github/skills/scientific-uncertainty-quantification/SKILL.md +286 -0
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-missing-data-analysis
|
|
3
|
+
description: |
|
|
4
|
+
欠損データ解析スキル。欠損パターン診断 (MCAR/MAR/MNAR) ・
|
|
5
|
+
Little's MCAR テスト・多重代入法 (MICE) ・KNN 補完・
|
|
6
|
+
MissForest・VAE/GAIN 補完・欠損パターン可視化・Rubin's Rules。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Missing Data Analysis
|
|
10
|
+
|
|
11
|
+
欠損データの診断・補完・感度分析パイプラインを提供し、
|
|
12
|
+
バイアスのない統計推論を実現する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- データセットの欠損パターンを診断するとき
|
|
17
|
+
- MCAR / MAR / MNAR のメカニズムを判定するとき
|
|
18
|
+
- 多重代入法 (MICE) で欠損値を補完するとき
|
|
19
|
+
- KNN / MissForest / 深層学習ベースの補完をするとき
|
|
20
|
+
- 複数の補完結果を Rubin's Rules で統合するとき
|
|
21
|
+
- 欠損パターンを可視化するとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. 欠損パターン診断
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import numpy as np
|
|
31
|
+
import pandas as pd
|
|
32
|
+
import matplotlib.pyplot as plt
|
|
33
|
+
import seaborn as sns
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def diagnose_missing_patterns(df, output_prefix="missing"):
|
|
37
|
+
"""
|
|
38
|
+
欠損パターン診断 — MCAR/MAR/MNAR 判定支援。
|
|
39
|
+
|
|
40
|
+
Parameters:
|
|
41
|
+
df: pd.DataFrame — 入力データ
|
|
42
|
+
output_prefix: str — 出力ファイル接頭辞
|
|
43
|
+
"""
|
|
44
|
+
n_rows, n_cols = df.shape
|
|
45
|
+
missing_counts = df.isnull().sum()
|
|
46
|
+
missing_pct = (missing_counts / n_rows * 100).round(2)
|
|
47
|
+
|
|
48
|
+
summary = pd.DataFrame({
|
|
49
|
+
"column": df.columns,
|
|
50
|
+
"n_missing": missing_counts.values,
|
|
51
|
+
"pct_missing": missing_pct.values,
|
|
52
|
+
"dtype": df.dtypes.values
|
|
53
|
+
}).sort_values("pct_missing", ascending=False)
|
|
54
|
+
|
|
55
|
+
# 欠損パターン行列 (msno 風)
|
|
56
|
+
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
|
|
57
|
+
|
|
58
|
+
# (1) 欠損マトリックス
|
|
59
|
+
ax = axes[0, 0]
|
|
60
|
+
missing_matrix = df.isnull().astype(int)
|
|
61
|
+
ax.imshow(missing_matrix.values[:200], aspect="auto", cmap="Greys",
|
|
62
|
+
interpolation="none")
|
|
63
|
+
ax.set_xlabel("Features")
|
|
64
|
+
ax.set_ylabel("Samples")
|
|
65
|
+
ax.set_title("Missing Pattern Matrix (first 200 rows)")
|
|
66
|
+
|
|
67
|
+
# (2) 欠損率バー
|
|
68
|
+
ax = axes[0, 1]
|
|
69
|
+
cols_with_missing = summary[summary["pct_missing"] > 0]
|
|
70
|
+
ax.barh(cols_with_missing["column"], cols_with_missing["pct_missing"])
|
|
71
|
+
ax.set_xlabel("Missing %")
|
|
72
|
+
ax.set_title("Missing Rate per Column")
|
|
73
|
+
|
|
74
|
+
# (3) 欠損相関ヒートマップ
|
|
75
|
+
ax = axes[1, 0]
|
|
76
|
+
miss_corr = df.isnull().corr()
|
|
77
|
+
sns.heatmap(miss_corr, ax=ax, cmap="RdBu_r", center=0,
|
|
78
|
+
square=True, cbar_kws={"shrink": 0.8})
|
|
79
|
+
ax.set_title("Missing Correlation")
|
|
80
|
+
|
|
81
|
+
# (4) 欠損パターン上位
|
|
82
|
+
ax = axes[1, 1]
|
|
83
|
+
patterns = df.isnull().apply(lambda x: tuple(x), axis=1)
|
|
84
|
+
pattern_counts = patterns.value_counts().head(10)
|
|
85
|
+
ax.barh(range(len(pattern_counts)),
|
|
86
|
+
pattern_counts.values)
|
|
87
|
+
ax.set_yticks(range(len(pattern_counts)))
|
|
88
|
+
ax.set_yticklabels([str(p)[:40] for p in pattern_counts.index],
|
|
89
|
+
fontsize=7)
|
|
90
|
+
ax.set_xlabel("Count")
|
|
91
|
+
ax.set_title("Top 10 Missing Patterns")
|
|
92
|
+
|
|
93
|
+
plt.tight_layout()
|
|
94
|
+
path = f"{output_prefix}_diagnosis.png"
|
|
95
|
+
plt.savefig(path, dpi=150, bbox_inches="tight")
|
|
96
|
+
plt.close()
|
|
97
|
+
|
|
98
|
+
print(f"Missing Diagnosis: {n_cols} cols, "
|
|
99
|
+
f"{missing_counts.sum()} total missing ({(missing_counts.sum()/(n_rows*n_cols)*100):.1f}%)")
|
|
100
|
+
return {"summary": summary, "fig": path}
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## 2. Little's MCAR テスト
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
def littles_mcar_test(df):
|
|
107
|
+
"""
|
|
108
|
+
Little's MCAR テスト — 完全ランダム欠損の検定。
|
|
109
|
+
|
|
110
|
+
Parameters:
|
|
111
|
+
df: pd.DataFrame — 数値データのみ
|
|
112
|
+
Returns:
|
|
113
|
+
dict — chi2 統計量, p値, 判定
|
|
114
|
+
"""
|
|
115
|
+
from scipy import stats
|
|
116
|
+
|
|
117
|
+
numeric_df = df.select_dtypes(include=[np.number])
|
|
118
|
+
n_rows, n_cols = numeric_df.shape
|
|
119
|
+
|
|
120
|
+
# 欠損パターンごとにグルーピング
|
|
121
|
+
patterns = numeric_df.isnull().apply(tuple, axis=1)
|
|
122
|
+
unique_patterns = patterns.unique()
|
|
123
|
+
|
|
124
|
+
# 全体平均と全体共分散
|
|
125
|
+
global_mean = numeric_df.mean()
|
|
126
|
+
global_cov = numeric_df.cov()
|
|
127
|
+
|
|
128
|
+
chi2_stat = 0.0
|
|
129
|
+
df_stat = 0
|
|
130
|
+
|
|
131
|
+
for pattern in unique_patterns:
|
|
132
|
+
mask = patterns == pattern
|
|
133
|
+
sub_df = numeric_df[mask]
|
|
134
|
+
n_j = len(sub_df)
|
|
135
|
+
if n_j < 2:
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
# このパターンで観測されているカラム
|
|
139
|
+
obs_cols = [i for i, m in enumerate(pattern) if not m]
|
|
140
|
+
if len(obs_cols) == 0:
|
|
141
|
+
continue
|
|
142
|
+
|
|
143
|
+
obs_mean = sub_df.iloc[:, obs_cols].mean().values
|
|
144
|
+
exp_mean = global_mean.iloc[obs_cols].values
|
|
145
|
+
diff = obs_mean - exp_mean
|
|
146
|
+
|
|
147
|
+
obs_cov = global_cov.iloc[obs_cols, obs_cols].values
|
|
148
|
+
try:
|
|
149
|
+
cov_inv = np.linalg.pinv(obs_cov / n_j)
|
|
150
|
+
except np.linalg.LinAlgError:
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
chi2_stat += diff @ cov_inv @ diff
|
|
154
|
+
df_stat += len(obs_cols)
|
|
155
|
+
|
|
156
|
+
df_stat -= n_cols # 自由度補正
|
|
157
|
+
|
|
158
|
+
if df_stat <= 0:
|
|
159
|
+
return {"chi2": np.nan, "p_value": np.nan,
|
|
160
|
+
"conclusion": "判定不能 (自由度不足)"}
|
|
161
|
+
|
|
162
|
+
p_value = 1 - stats.chi2.cdf(chi2_stat, df_stat)
|
|
163
|
+
conclusion = "MCAR (p > 0.05)" if p_value > 0.05 else "Not MCAR (p ≤ 0.05)"
|
|
164
|
+
|
|
165
|
+
print(f"Little's MCAR test: χ²={chi2_stat:.2f}, df={df_stat}, "
|
|
166
|
+
f"p={p_value:.4f} → {conclusion}")
|
|
167
|
+
return {"chi2": chi2_stat, "df": df_stat,
|
|
168
|
+
"p_value": p_value, "conclusion": conclusion}
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
## 3. 多重代入法 (MICE)
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
def mice_imputation(df, n_imputations=5, max_iter=10, random_state=42):
|
|
175
|
+
"""
|
|
176
|
+
MICE (Multiple Imputation by Chained Equations)。
|
|
177
|
+
|
|
178
|
+
Parameters:
|
|
179
|
+
df: pd.DataFrame — 欠損を含むデータ
|
|
180
|
+
n_imputations: int — 代入データセット数
|
|
181
|
+
max_iter: int — 反復回数
|
|
182
|
+
random_state: int — 乱数シード
|
|
183
|
+
"""
|
|
184
|
+
from sklearn.experimental import enable_iterative_imputer # noqa
|
|
185
|
+
from sklearn.impute import IterativeImputer
|
|
186
|
+
|
|
187
|
+
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
|
188
|
+
cat_cols = df.select_dtypes(exclude=[np.number]).columns
|
|
189
|
+
|
|
190
|
+
imputed_datasets = []
|
|
191
|
+
|
|
192
|
+
for i in range(n_imputations):
|
|
193
|
+
imputer = IterativeImputer(
|
|
194
|
+
max_iter=max_iter,
|
|
195
|
+
random_state=random_state + i,
|
|
196
|
+
sample_posterior=True)
|
|
197
|
+
|
|
198
|
+
imputed_numeric = pd.DataFrame(
|
|
199
|
+
imputer.fit_transform(df[numeric_cols]),
|
|
200
|
+
columns=numeric_cols, index=df.index)
|
|
201
|
+
|
|
202
|
+
imputed_df = imputed_numeric.copy()
|
|
203
|
+
for col in cat_cols:
|
|
204
|
+
imputed_df[col] = df[col].fillna(df[col].mode().iloc[0]
|
|
205
|
+
if not df[col].mode().empty else "UNKNOWN")
|
|
206
|
+
|
|
207
|
+
imputed_datasets.append(imputed_df)
|
|
208
|
+
|
|
209
|
+
print(f"MICE: {n_imputations} datasets × {max_iter} iterations, "
|
|
210
|
+
f"{len(numeric_cols)} numeric cols")
|
|
211
|
+
return imputed_datasets
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def rubins_rules(estimates, variances):
|
|
215
|
+
"""
|
|
216
|
+
Rubin's Rules — 多重代入結果の統合。
|
|
217
|
+
|
|
218
|
+
Parameters:
|
|
219
|
+
estimates: list[float] — 各代入データセットからの推定値
|
|
220
|
+
variances: list[float] — 各代入データセットからの分散
|
|
221
|
+
"""
|
|
222
|
+
m = len(estimates)
|
|
223
|
+
Q_bar = np.mean(estimates)
|
|
224
|
+
U_bar = np.mean(variances) # Within-imputation variance
|
|
225
|
+
B = np.var(estimates, ddof=1) # Between-imputation variance
|
|
226
|
+
T = U_bar + (1 + 1 / m) * B # Total variance
|
|
227
|
+
|
|
228
|
+
# 自由度 (Barnard-Rubin)
|
|
229
|
+
r = (1 + 1 / m) * B / U_bar if U_bar > 0 else np.inf
|
|
230
|
+
df_old = (m - 1) * (1 + 1 / r) ** 2 if r > 0 else np.inf
|
|
231
|
+
|
|
232
|
+
print(f"Rubin's Rules: Q̄={Q_bar:.4f}, T={T:.4f}, "
|
|
233
|
+
f"within={U_bar:.4f}, between={B:.4f}")
|
|
234
|
+
return {"pooled_estimate": Q_bar, "total_variance": T,
|
|
235
|
+
"within_variance": U_bar, "between_variance": B,
|
|
236
|
+
"df": df_old}
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
## 4. KNN / MissForest 補完
|
|
240
|
+
|
|
241
|
+
```python
|
|
242
|
+
def knn_imputation(df, n_neighbors=5):
|
|
243
|
+
"""
|
|
244
|
+
KNN 欠損値補完。
|
|
245
|
+
|
|
246
|
+
Parameters:
|
|
247
|
+
df: pd.DataFrame — 欠損を含むデータ
|
|
248
|
+
n_neighbors: int — 近傍数
|
|
249
|
+
"""
|
|
250
|
+
from sklearn.impute import KNNImputer
|
|
251
|
+
|
|
252
|
+
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
|
253
|
+
imputer = KNNImputer(n_neighbors=n_neighbors)
|
|
254
|
+
imputed = pd.DataFrame(
|
|
255
|
+
imputer.fit_transform(df[numeric_cols]),
|
|
256
|
+
columns=numeric_cols, index=df.index)
|
|
257
|
+
|
|
258
|
+
n_imputed = df[numeric_cols].isnull().sum().sum()
|
|
259
|
+
print(f"KNN Imputation (k={n_neighbors}): {n_imputed} values imputed")
|
|
260
|
+
return imputed
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def missforest_imputation(df, n_estimators=100, max_iter=10):
|
|
264
|
+
"""
|
|
265
|
+
MissForest (Random Forest ベースの反復補完)。
|
|
266
|
+
|
|
267
|
+
Parameters:
|
|
268
|
+
df: pd.DataFrame — 欠損を含むデータ
|
|
269
|
+
n_estimators: int — Random Forest の木の数
|
|
270
|
+
max_iter: int — 反復回数
|
|
271
|
+
"""
|
|
272
|
+
from sklearn.experimental import enable_iterative_imputer # noqa
|
|
273
|
+
from sklearn.impute import IterativeImputer
|
|
274
|
+
from sklearn.ensemble import RandomForestRegressor
|
|
275
|
+
|
|
276
|
+
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
|
277
|
+
|
|
278
|
+
imputer = IterativeImputer(
|
|
279
|
+
estimator=RandomForestRegressor(n_estimators=n_estimators,
|
|
280
|
+
random_state=42, n_jobs=-1),
|
|
281
|
+
max_iter=max_iter, random_state=42)
|
|
282
|
+
|
|
283
|
+
imputed = pd.DataFrame(
|
|
284
|
+
imputer.fit_transform(df[numeric_cols]),
|
|
285
|
+
columns=numeric_cols, index=df.index)
|
|
286
|
+
|
|
287
|
+
n_imputed = df[numeric_cols].isnull().sum().sum()
|
|
288
|
+
print(f"MissForest (n_trees={n_estimators}, iter={max_iter}): "
|
|
289
|
+
f"{n_imputed} values imputed")
|
|
290
|
+
return imputed
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
---
|
|
294
|
+
|
|
295
|
+
## パイプライン統合
|
|
296
|
+
|
|
297
|
+
```
|
|
298
|
+
eda-correlation → missing-data-analysis → ml-classification
|
|
299
|
+
(探索的解析) (欠損診断・補完) (モデリング)
|
|
300
|
+
│ │ ↓
|
|
301
|
+
statistical-testing ────┘ advanced-visualization
|
|
302
|
+
(統計検定) (結果可視化)
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
## パイプライン出力
|
|
306
|
+
|
|
307
|
+
| ファイル | 説明 | 次スキル |
|
|
308
|
+
|---------|------|---------|
|
|
309
|
+
| `missing_diagnosis.png` | 欠損パターン可視化 | → reporting |
|
|
310
|
+
| `mcar_test_result.json` | Little's MCAR テスト | → 補完戦略選択 |
|
|
311
|
+
| `imputed_datasets/` | MICE 多重代入データ | → ml-classification |
|
|
312
|
+
| `imputation_comparison.csv` | 補完手法比較 | → 最終選択 |
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-model-monitoring
|
|
3
|
+
description: |
|
|
4
|
+
MLOps モデル監視スキル。データドリフト検出 (Evidently/NannyML)・
|
|
5
|
+
モデル性能劣化検出・特徴量ドリフト・コンセプトドリフト・
|
|
6
|
+
A/B テスト統計・モデルレジストリ・再学習トリガー。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Model Monitoring
|
|
10
|
+
|
|
11
|
+
本番環境の ML モデル監視パイプラインを提供し、
|
|
12
|
+
データドリフト・性能劣化を検出して再学習トリガーを実現する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- デプロイ済みモデルの予測品質を継続監視するとき
|
|
17
|
+
- データドリフト (共変量シフト) を検出するとき
|
|
18
|
+
- コンセプトドリフト (P(Y|X) の変化) を検出するとき
|
|
19
|
+
- A/B テストで新旧モデルを比較するとき
|
|
20
|
+
- 特徴量分布の変化を追跡するとき
|
|
21
|
+
- 再学習トリガーの自動化ルールを設定するとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. データドリフト検出
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import numpy as np
|
|
31
|
+
import pandas as pd
|
|
32
|
+
from scipy import stats
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def detect_data_drift(reference_df, current_df,
|
|
36
|
+
method="ks", threshold=0.05):
|
|
37
|
+
"""
|
|
38
|
+
データドリフト検出 — 参照データ vs 現在データ。
|
|
39
|
+
|
|
40
|
+
Parameters:
|
|
41
|
+
reference_df: pd.DataFrame — 学習時データ (参照)
|
|
42
|
+
current_df: pd.DataFrame — 推論時データ (現在)
|
|
43
|
+
method: str — "ks" (KS 検定) / "psi" (PSI) / "wasserstein"
|
|
44
|
+
threshold: float — 有意水準 or PSI 閾値
|
|
45
|
+
"""
|
|
46
|
+
numeric_cols = reference_df.select_dtypes(include=[np.number]).columns
|
|
47
|
+
common_cols = [c for c in numeric_cols if c in current_df.columns]
|
|
48
|
+
|
|
49
|
+
drift_results = []
|
|
50
|
+
|
|
51
|
+
for col in common_cols:
|
|
52
|
+
ref_vals = reference_df[col].dropna().values
|
|
53
|
+
cur_vals = current_df[col].dropna().values
|
|
54
|
+
|
|
55
|
+
if method == "ks":
|
|
56
|
+
stat, p_value = stats.ks_2samp(ref_vals, cur_vals)
|
|
57
|
+
is_drift = p_value < threshold
|
|
58
|
+
drift_results.append({
|
|
59
|
+
"feature": col, "statistic": stat,
|
|
60
|
+
"p_value": p_value, "is_drift": is_drift})
|
|
61
|
+
|
|
62
|
+
elif method == "psi":
|
|
63
|
+
# Population Stability Index
|
|
64
|
+
psi_val = _compute_psi(ref_vals, cur_vals)
|
|
65
|
+
is_drift = psi_val > 0.2 # >0.2 = significant shift
|
|
66
|
+
drift_results.append({
|
|
67
|
+
"feature": col, "psi": psi_val,
|
|
68
|
+
"is_drift": is_drift,
|
|
69
|
+
"severity": "high" if psi_val > 0.25 else
|
|
70
|
+
"medium" if psi_val > 0.1 else "low"})
|
|
71
|
+
|
|
72
|
+
elif method == "wasserstein":
|
|
73
|
+
w_dist = stats.wasserstein_distance(ref_vals, cur_vals)
|
|
74
|
+
ref_std = np.std(ref_vals)
|
|
75
|
+
normalized = w_dist / ref_std if ref_std > 0 else w_dist
|
|
76
|
+
is_drift = normalized > 0.1
|
|
77
|
+
drift_results.append({
|
|
78
|
+
"feature": col, "wasserstein": w_dist,
|
|
79
|
+
"normalized": normalized, "is_drift": is_drift})
|
|
80
|
+
|
|
81
|
+
result_df = pd.DataFrame(drift_results)
|
|
82
|
+
n_drift = result_df["is_drift"].sum()
|
|
83
|
+
print(f"Data Drift ({method}): {n_drift}/{len(common_cols)} features drifted")
|
|
84
|
+
return result_df
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _compute_psi(expected, actual, n_bins=10):
|
|
88
|
+
"""PSI (Population Stability Index) 計算。"""
|
|
89
|
+
breakpoints = np.quantile(expected, np.linspace(0, 1, n_bins + 1))
|
|
90
|
+
breakpoints[0] = -np.inf
|
|
91
|
+
breakpoints[-1] = np.inf
|
|
92
|
+
|
|
93
|
+
expected_pct = np.histogram(expected, bins=breakpoints)[0] / len(expected)
|
|
94
|
+
actual_pct = np.histogram(actual, bins=breakpoints)[0] / len(actual)
|
|
95
|
+
|
|
96
|
+
expected_pct = np.clip(expected_pct, 1e-4, None)
|
|
97
|
+
actual_pct = np.clip(actual_pct, 1e-4, None)
|
|
98
|
+
|
|
99
|
+
psi = np.sum((actual_pct - expected_pct) * np.log(actual_pct / expected_pct))
|
|
100
|
+
return psi
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## 2. モデル性能劣化検出
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
def detect_performance_degradation(y_true_batches, y_pred_batches,
|
|
107
|
+
metric="accuracy",
|
|
108
|
+
window_size=10, alert_threshold=0.05):
|
|
109
|
+
"""
|
|
110
|
+
モデル性能劣化のスライディングウィンドウ検出。
|
|
111
|
+
|
|
112
|
+
Parameters:
|
|
113
|
+
y_true_batches: list[np.ndarray] — バッチごとの真値
|
|
114
|
+
y_pred_batches: list[np.ndarray] — バッチごとの予測値
|
|
115
|
+
metric: str — "accuracy" / "f1" / "rmse" / "auc"
|
|
116
|
+
window_size: int — 移動平均ウィンドウ
|
|
117
|
+
alert_threshold: float — 性能低下アラート閾値
|
|
118
|
+
"""
|
|
119
|
+
from sklearn.metrics import accuracy_score, f1_score, mean_squared_error
|
|
120
|
+
from sklearn.metrics import roc_auc_score
|
|
121
|
+
import matplotlib.pyplot as plt
|
|
122
|
+
|
|
123
|
+
metric_funcs = {
|
|
124
|
+
"accuracy": accuracy_score,
|
|
125
|
+
"f1": lambda y, p: f1_score(y, p, average="macro"),
|
|
126
|
+
"rmse": lambda y, p: -np.sqrt(mean_squared_error(y, p)),
|
|
127
|
+
"auc": lambda y, p: roc_auc_score(y, p)
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
func = metric_funcs[metric]
|
|
131
|
+
scores = [func(yt, yp) for yt, yp in zip(y_true_batches, y_pred_batches)]
|
|
132
|
+
|
|
133
|
+
# 移動平均
|
|
134
|
+
scores_arr = np.array(scores)
|
|
135
|
+
if len(scores_arr) >= window_size:
|
|
136
|
+
ma = np.convolve(scores_arr, np.ones(window_size)/window_size, mode="valid")
|
|
137
|
+
else:
|
|
138
|
+
ma = scores_arr
|
|
139
|
+
|
|
140
|
+
# ベースライン (最初の window_size バッチ)
|
|
141
|
+
baseline = np.mean(scores_arr[:window_size])
|
|
142
|
+
current = np.mean(scores_arr[-window_size:])
|
|
143
|
+
degradation = baseline - current
|
|
144
|
+
|
|
145
|
+
is_degraded = degradation > alert_threshold
|
|
146
|
+
|
|
147
|
+
# 可視化
|
|
148
|
+
fig, ax = plt.subplots(figsize=(12, 5))
|
|
149
|
+
ax.plot(scores, "b-o", markersize=3, alpha=0.5, label="Batch score")
|
|
150
|
+
if len(ma) > 0:
|
|
151
|
+
ax.plot(range(window_size - 1, window_size - 1 + len(ma)),
|
|
152
|
+
ma, "r-", linewidth=2, label=f"MA({window_size})")
|
|
153
|
+
ax.axhline(baseline, color="g", linestyle="--",
|
|
154
|
+
label=f"Baseline={baseline:.4f}")
|
|
155
|
+
ax.axhline(baseline - alert_threshold, color="orange", linestyle="--",
|
|
156
|
+
label=f"Alert={baseline - alert_threshold:.4f}")
|
|
157
|
+
ax.set_xlabel("Batch")
|
|
158
|
+
ax.set_ylabel(metric)
|
|
159
|
+
ax.set_title(f"Model Performance Monitoring ({metric})")
|
|
160
|
+
ax.legend()
|
|
161
|
+
|
|
162
|
+
path = "performance_monitoring.png"
|
|
163
|
+
plt.savefig(path, dpi=150, bbox_inches="tight")
|
|
164
|
+
plt.close()
|
|
165
|
+
|
|
166
|
+
status = "DEGRADED ⚠️" if is_degraded else "OK ✓"
|
|
167
|
+
print(f"Performance ({metric}): baseline={baseline:.4f}, "
|
|
168
|
+
f"current={current:.4f}, Δ={degradation:.4f} → {status}")
|
|
169
|
+
return {"baseline": baseline, "current": current,
|
|
170
|
+
"degradation": degradation, "is_degraded": is_degraded,
|
|
171
|
+
"scores": scores, "fig": path}
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## 3. A/B テスト統計
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
def ab_test_models(y_true, preds_a, preds_b, metric="accuracy",
|
|
178
|
+
n_bootstrap=10000, alpha=0.05):
|
|
179
|
+
"""
|
|
180
|
+
A/B テスト — 2 モデルの統計的比較。
|
|
181
|
+
|
|
182
|
+
Parameters:
|
|
183
|
+
y_true: np.ndarray — 真値
|
|
184
|
+
preds_a: np.ndarray — モデル A 予測
|
|
185
|
+
preds_b: np.ndarray — モデル B 予測
|
|
186
|
+
metric: str — 評価指標
|
|
187
|
+
n_bootstrap: int — ブートストラップ回数
|
|
188
|
+
alpha: float — 有意水準
|
|
189
|
+
"""
|
|
190
|
+
from sklearn.metrics import accuracy_score, f1_score, mean_squared_error
|
|
191
|
+
|
|
192
|
+
metric_funcs = {
|
|
193
|
+
"accuracy": accuracy_score,
|
|
194
|
+
"f1": lambda y, p: f1_score(y, p, average="macro"),
|
|
195
|
+
"rmse": lambda y, p: np.sqrt(mean_squared_error(y, p))
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
func = metric_funcs[metric]
|
|
199
|
+
score_a = func(y_true, preds_a)
|
|
200
|
+
score_b = func(y_true, preds_b)
|
|
201
|
+
|
|
202
|
+
# Bootstrap confidence interval for difference
|
|
203
|
+
diffs = []
|
|
204
|
+
n = len(y_true)
|
|
205
|
+
rng = np.random.RandomState(42)
|
|
206
|
+
|
|
207
|
+
for _ in range(n_bootstrap):
|
|
208
|
+
idx = rng.choice(n, n, replace=True)
|
|
209
|
+
sa = func(y_true[idx], preds_a[idx])
|
|
210
|
+
sb = func(y_true[idx], preds_b[idx])
|
|
211
|
+
diffs.append(sb - sa)
|
|
212
|
+
|
|
213
|
+
diffs = np.array(diffs)
|
|
214
|
+
ci_lower = np.percentile(diffs, 100 * alpha / 2)
|
|
215
|
+
ci_upper = np.percentile(diffs, 100 * (1 - alpha / 2))
|
|
216
|
+
p_value = np.mean(diffs <= 0) # P(B ≤ A)
|
|
217
|
+
|
|
218
|
+
winner = "B" if ci_lower > 0 else ("A" if ci_upper < 0 else "Tie")
|
|
219
|
+
|
|
220
|
+
print(f"A/B Test ({metric}): A={score_a:.4f}, B={score_b:.4f}")
|
|
221
|
+
print(f" Δ(B-A)={score_b - score_a:.4f}, "
|
|
222
|
+
f"95% CI=[{ci_lower:.4f}, {ci_upper:.4f}], "
|
|
223
|
+
f"p={p_value:.4f} → Winner: {winner}")
|
|
224
|
+
return {"score_a": score_a, "score_b": score_b,
|
|
225
|
+
"diff": score_b - score_a, "ci": (ci_lower, ci_upper),
|
|
226
|
+
"p_value": p_value, "winner": winner}
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
231
|
+
## パイプライン統合
|
|
232
|
+
|
|
233
|
+
```
|
|
234
|
+
ensemble-methods → model-monitoring → anomaly-detection
|
|
235
|
+
(モデル構築) (監視) (異常検知)
|
|
236
|
+
│ │ ↓
|
|
237
|
+
automl ──────────────┘ active-learning
|
|
238
|
+
(AutoML) (再学習)
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## パイプライン出力
|
|
242
|
+
|
|
243
|
+
| ファイル | 説明 | 次スキル |
|
|
244
|
+
|---------|------|---------|
|
|
245
|
+
| `drift_report.csv` | ドリフト検出結果 | → 再学習判断 |
|
|
246
|
+
| `performance_monitoring.png` | 性能推移 | → reporting |
|
|
247
|
+
| `ab_test_result.json` | A/B テスト結果 | → デプロイ判断 |
|