@nahisaho/satori 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -16
- package/package.json +1 -1
- package/src/.github/skills/scientific-anomaly-detection/SKILL.md +296 -0
- package/src/.github/skills/scientific-causal-ml/SKILL.md +240 -0
- package/src/.github/skills/scientific-data-profiling/SKILL.md +247 -0
- package/src/.github/skills/scientific-geospatial-analysis/SKILL.md +274 -0
- package/src/.github/skills/scientific-model-monitoring/SKILL.md +247 -0
- package/src/.github/skills/scientific-network-visualization/SKILL.md +278 -0
- package/src/.github/skills/scientific-reproducible-reporting/SKILL.md +330 -0
- package/src/.github/skills/scientific-time-series-forecasting/SKILL.md +246 -0
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-geospatial-analysis
|
|
3
|
+
description: |
|
|
4
|
+
地理空間データ解析スキル。GeoPandas ベクターデータ処理・
|
|
5
|
+
Rasterio ラスター解析・Folium/Kepler.gl インタラクティブ地図・
|
|
6
|
+
空間自己相関 (Moran's I)・クリギング補間・CRS 変換。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Geospatial Analysis
|
|
10
|
+
|
|
11
|
+
地理空間データの前処理・空間統計・インタラクティブ地図可視化
|
|
12
|
+
パイプラインを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- GeoPandas でベクターデータ (Shapefile/GeoJSON) を処理するとき
|
|
17
|
+
- ラスターデータ (GeoTIFF) を読み込み解析するとき
|
|
18
|
+
- 空間自己相関 (Moran's I / LISA) を検定するとき
|
|
19
|
+
- クリギング (Kriging) で空間補間するとき
|
|
20
|
+
- Folium/Kepler.gl でインタラクティブ地図を作成するとき
|
|
21
|
+
- CRS (座標参照系) 変換・空間結合をするとき
|
|
22
|
+
|
|
23
|
+
> **Note**: 環境特化 GIS (SoilGrids/WorldClim) は `scientific-environmental-geodata` を参照。
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
## 1. GeoPandas ベクターデータ処理
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import numpy as np
|
|
33
|
+
import pandas as pd
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def load_and_process_geodata(filepath, target_crs="EPSG:4326"):
|
|
37
|
+
"""
|
|
38
|
+
GeoPandas ベクター/ポイントデータ読み込み・CRS 変換。
|
|
39
|
+
|
|
40
|
+
Parameters:
|
|
41
|
+
filepath: str — Shapefile / GeoJSON / GPKG パス
|
|
42
|
+
target_crs: str — 変換先座標系
|
|
43
|
+
"""
|
|
44
|
+
import geopandas as gpd
|
|
45
|
+
|
|
46
|
+
gdf = gpd.read_file(filepath)
|
|
47
|
+
original_crs = gdf.crs
|
|
48
|
+
|
|
49
|
+
if gdf.crs != target_crs:
|
|
50
|
+
gdf = gdf.to_crs(target_crs)
|
|
51
|
+
|
|
52
|
+
# 基本統計
|
|
53
|
+
bounds = gdf.total_bounds # [minx, miny, maxx, maxy]
|
|
54
|
+
geom_types = gdf.geometry.geom_type.value_counts().to_dict()
|
|
55
|
+
|
|
56
|
+
print(f"GeoData: {len(gdf)} features, CRS: {original_crs} → {target_crs}")
|
|
57
|
+
print(f" Bounds: [{bounds[0]:.4f}, {bounds[1]:.4f}] "
|
|
58
|
+
f"to [{bounds[2]:.4f}, {bounds[3]:.4f}]")
|
|
59
|
+
print(f" Geometry types: {geom_types}")
|
|
60
|
+
return gdf
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def spatial_join(gdf_left, gdf_right, how="inner", predicate="intersects"):
|
|
64
|
+
"""
|
|
65
|
+
空間結合 (Spatial Join)。
|
|
66
|
+
|
|
67
|
+
Parameters:
|
|
68
|
+
gdf_left: GeoDataFrame — 左テーブル
|
|
69
|
+
gdf_right: GeoDataFrame — 右テーブル
|
|
70
|
+
how: str — "inner" / "left" / "right"
|
|
71
|
+
predicate: str — "intersects" / "within" / "contains"
|
|
72
|
+
"""
|
|
73
|
+
import geopandas as gpd
|
|
74
|
+
|
|
75
|
+
if gdf_left.crs != gdf_right.crs:
|
|
76
|
+
gdf_right = gdf_right.to_crs(gdf_left.crs)
|
|
77
|
+
|
|
78
|
+
joined = gpd.sjoin(gdf_left, gdf_right, how=how, predicate=predicate)
|
|
79
|
+
|
|
80
|
+
print(f"Spatial Join ({predicate}, {how}): "
|
|
81
|
+
f"{len(gdf_left)} × {len(gdf_right)} → {len(joined)}")
|
|
82
|
+
return joined
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## 2. 空間自己相関 (Moran's I / LISA)
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
def spatial_autocorrelation(gdf, value_col, weight_type="queen"):
|
|
89
|
+
"""
|
|
90
|
+
空間自己相関検定 — Global Moran's I + LISA。
|
|
91
|
+
|
|
92
|
+
Parameters:
|
|
93
|
+
gdf: GeoDataFrame — ジオメトリ + 属性データ
|
|
94
|
+
value_col: str — 解析対象カラム
|
|
95
|
+
weight_type: str — "queen" / "rook" / "knn"
|
|
96
|
+
"""
|
|
97
|
+
from libpysal.weights import Queen, Rook, KNN
|
|
98
|
+
from esda.moran import Moran, Moran_Local
|
|
99
|
+
import matplotlib.pyplot as plt
|
|
100
|
+
|
|
101
|
+
# 空間重み行列
|
|
102
|
+
if weight_type == "queen":
|
|
103
|
+
w = Queen.from_dataframe(gdf)
|
|
104
|
+
elif weight_type == "rook":
|
|
105
|
+
w = Rook.from_dataframe(gdf)
|
|
106
|
+
elif weight_type == "knn":
|
|
107
|
+
w = KNN.from_dataframe(gdf, k=5)
|
|
108
|
+
|
|
109
|
+
w.transform = "r"
|
|
110
|
+
y = gdf[value_col].values
|
|
111
|
+
|
|
112
|
+
# Global Moran's I
|
|
113
|
+
moran_global = Moran(y, w)
|
|
114
|
+
|
|
115
|
+
# LISA (Local Indicators of Spatial Association)
|
|
116
|
+
moran_local = Moran_Local(y, w)
|
|
117
|
+
|
|
118
|
+
gdf = gdf.copy()
|
|
119
|
+
gdf["lisa_cluster"] = moran_local.q # 1=HH, 2=LH, 3=LL, 4=HL
|
|
120
|
+
gdf["lisa_significant"] = moran_local.p_sim < 0.05
|
|
121
|
+
gdf["local_moran_i"] = moran_local.Is
|
|
122
|
+
|
|
123
|
+
# 可視化
|
|
124
|
+
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 7))
|
|
125
|
+
|
|
126
|
+
gdf.plot(column=value_col, ax=ax1, legend=True,
|
|
127
|
+
cmap="RdYlBu_r", edgecolor="gray", linewidth=0.3)
|
|
128
|
+
ax1.set_title(f"{value_col} (Moran's I={moran_global.I:.4f}, "
|
|
129
|
+
f"p={moran_global.p_sim:.4f})")
|
|
130
|
+
|
|
131
|
+
cluster_labels = {1: "High-High", 2: "Low-High",
|
|
132
|
+
3: "Low-Low", 4: "High-Low", 0: "Not Significant"}
|
|
133
|
+
sig_gdf = gdf[gdf["lisa_significant"]]
|
|
134
|
+
if len(sig_gdf) > 0:
|
|
135
|
+
sig_gdf.plot(column="lisa_cluster", ax=ax2,
|
|
136
|
+
categorical=True, legend=True,
|
|
137
|
+
edgecolor="gray", linewidth=0.3)
|
|
138
|
+
ax2.set_title("LISA Clusters (p < 0.05)")
|
|
139
|
+
|
|
140
|
+
plt.tight_layout()
|
|
141
|
+
path = "spatial_autocorrelation.png"
|
|
142
|
+
plt.savefig(path, dpi=150, bbox_inches="tight")
|
|
143
|
+
plt.close()
|
|
144
|
+
|
|
145
|
+
print(f"Moran's I = {moran_global.I:.4f}, p = {moran_global.p_sim:.4f}")
|
|
146
|
+
print(f"LISA: {gdf['lisa_significant'].sum()} significant clusters")
|
|
147
|
+
return {"moran_i": moran_global.I, "p_value": moran_global.p_sim,
|
|
148
|
+
"gdf": gdf, "fig": path}
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## 3. クリギング空間補間
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
def kriging_interpolation(points_df, x_col, y_col, value_col,
|
|
155
|
+
grid_resolution=100,
|
|
156
|
+
variogram_model="spherical"):
|
|
157
|
+
"""
|
|
158
|
+
Ordinary Kriging 空間補間。
|
|
159
|
+
|
|
160
|
+
Parameters:
|
|
161
|
+
points_df: pd.DataFrame — 観測点データ
|
|
162
|
+
x_col, y_col: str — 座標カラム
|
|
163
|
+
value_col: str — 補間対象カラム
|
|
164
|
+
grid_resolution: int — グリッド解像度
|
|
165
|
+
variogram_model: str — "spherical" / "exponential" / "gaussian"
|
|
166
|
+
"""
|
|
167
|
+
from pykrige.ok import OrdinaryKriging
|
|
168
|
+
import matplotlib.pyplot as plt
|
|
169
|
+
|
|
170
|
+
x = points_df[x_col].values
|
|
171
|
+
y = points_df[y_col].values
|
|
172
|
+
z = points_df[value_col].values
|
|
173
|
+
|
|
174
|
+
ok = OrdinaryKriging(
|
|
175
|
+
x, y, z,
|
|
176
|
+
variogram_model=variogram_model,
|
|
177
|
+
verbose=False, enable_plotting=False)
|
|
178
|
+
|
|
179
|
+
grid_x = np.linspace(x.min(), x.max(), grid_resolution)
|
|
180
|
+
grid_y = np.linspace(y.min(), y.max(), grid_resolution)
|
|
181
|
+
z_pred, ss_pred = ok.execute("grid", grid_x, grid_y)
|
|
182
|
+
|
|
183
|
+
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
|
|
184
|
+
|
|
185
|
+
im1 = ax1.imshow(z_pred, origin="lower",
|
|
186
|
+
extent=[x.min(), x.max(), y.min(), y.max()],
|
|
187
|
+
cmap="viridis")
|
|
188
|
+
ax1.scatter(x, y, c="red", s=10, edgecolors="black", linewidths=0.5)
|
|
189
|
+
ax1.set_title(f"Kriging Prediction ({variogram_model})")
|
|
190
|
+
plt.colorbar(im1, ax=ax1)
|
|
191
|
+
|
|
192
|
+
im2 = ax2.imshow(ss_pred, origin="lower",
|
|
193
|
+
extent=[x.min(), x.max(), y.min(), y.max()],
|
|
194
|
+
cmap="Reds")
|
|
195
|
+
ax2.set_title("Kriging Variance (Uncertainty)")
|
|
196
|
+
plt.colorbar(im2, ax=ax2)
|
|
197
|
+
|
|
198
|
+
plt.tight_layout()
|
|
199
|
+
path = "kriging_result.png"
|
|
200
|
+
plt.savefig(path, dpi=150, bbox_inches="tight")
|
|
201
|
+
plt.close()
|
|
202
|
+
|
|
203
|
+
print(f"Kriging ({variogram_model}): {grid_resolution}×{grid_resolution} grid, "
|
|
204
|
+
f"{len(x)} observation points")
|
|
205
|
+
return {"z_pred": z_pred, "variance": ss_pred,
|
|
206
|
+
"grid_x": grid_x, "grid_y": grid_y, "fig": path}
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
## 4. Folium インタラクティブ地図
|
|
210
|
+
|
|
211
|
+
```python
|
|
212
|
+
def interactive_map(gdf, value_col=None, popup_cols=None,
|
|
213
|
+
tiles="CartoDB positron",
|
|
214
|
+
output="interactive_map.html"):
|
|
215
|
+
"""
|
|
216
|
+
Folium インタラクティブ地図。
|
|
217
|
+
|
|
218
|
+
Parameters:
|
|
219
|
+
gdf: GeoDataFrame — 地理空間データ
|
|
220
|
+
value_col: str | None — Choropleth カラム
|
|
221
|
+
popup_cols: list[str] | None — ポップアップ表示カラム
|
|
222
|
+
tiles: str — タイル名
|
|
223
|
+
output: str — 出力 HTML
|
|
224
|
+
"""
|
|
225
|
+
import folium
|
|
226
|
+
|
|
227
|
+
center = [gdf.geometry.centroid.y.mean(),
|
|
228
|
+
gdf.geometry.centroid.x.mean()]
|
|
229
|
+
m = folium.Map(location=center, zoom_start=8, tiles=tiles)
|
|
230
|
+
|
|
231
|
+
if value_col and gdf.geometry.geom_type.iloc[0] in ["Polygon", "MultiPolygon"]:
|
|
232
|
+
folium.Choropleth(
|
|
233
|
+
geo_data=gdf.__geo_interface__,
|
|
234
|
+
data=gdf, columns=[gdf.index.name or "index", value_col],
|
|
235
|
+
key_on="feature.id",
|
|
236
|
+
fill_color="YlOrRd",
|
|
237
|
+
legend_name=value_col
|
|
238
|
+
).add_to(m)
|
|
239
|
+
else:
|
|
240
|
+
for _, row in gdf.iterrows():
|
|
241
|
+
popup_text = ""
|
|
242
|
+
if popup_cols:
|
|
243
|
+
popup_text = "<br>".join(
|
|
244
|
+
[f"<b>{c}</b>: {row[c]}" for c in popup_cols])
|
|
245
|
+
folium.CircleMarker(
|
|
246
|
+
location=[row.geometry.centroid.y, row.geometry.centroid.x],
|
|
247
|
+
radius=5, popup=popup_text,
|
|
248
|
+
color="blue", fill=True
|
|
249
|
+
).add_to(m)
|
|
250
|
+
|
|
251
|
+
m.save(output)
|
|
252
|
+
print(f"Interactive map → {output} ({len(gdf)} features)")
|
|
253
|
+
return output
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
258
|
+
## パイプライン統合
|
|
259
|
+
|
|
260
|
+
```
|
|
261
|
+
environmental-geodata → geospatial-analysis → advanced-visualization
|
|
262
|
+
(環境データ取得) (空間解析) (高度可視化)
|
|
263
|
+
│ │ ↓
|
|
264
|
+
epidemiology ───────────────┘ interactive-dashboard
|
|
265
|
+
(空間疫学) (ダッシュボード)
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
## パイプライン出力
|
|
269
|
+
|
|
270
|
+
| ファイル | 説明 | 次スキル |
|
|
271
|
+
|---------|------|---------|
|
|
272
|
+
| `spatial_autocorrelation.png` | Moran's I + LISA | → reporting |
|
|
273
|
+
| `kriging_result.png` | クリギング補間 | → visualization |
|
|
274
|
+
| `interactive_map.html` | Folium 地図 | → dashboard |
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-model-monitoring
|
|
3
|
+
description: |
|
|
4
|
+
MLOps モデル監視スキル。データドリフト検出 (Evidently/NannyML)・
|
|
5
|
+
モデル性能劣化検出・特徴量ドリフト・コンセプトドリフト・
|
|
6
|
+
A/B テスト統計・モデルレジストリ・再学習トリガー。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Model Monitoring
|
|
10
|
+
|
|
11
|
+
本番環境の ML モデル監視パイプラインを提供し、
|
|
12
|
+
データドリフト・性能劣化を検出して再学習トリガーを実現する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- デプロイ済みモデルの予測品質を継続監視するとき
|
|
17
|
+
- データドリフト (共変量シフト) を検出するとき
|
|
18
|
+
- コンセプトドリフト (P(Y|X) の変化) を検出するとき
|
|
19
|
+
- A/B テストで新旧モデルを比較するとき
|
|
20
|
+
- 特徴量分布の変化を追跡するとき
|
|
21
|
+
- 再学習トリガーの自動化ルールを設定するとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. データドリフト検出
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import numpy as np
|
|
31
|
+
import pandas as pd
|
|
32
|
+
from scipy import stats
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def detect_data_drift(reference_df, current_df,
|
|
36
|
+
method="ks", threshold=0.05):
|
|
37
|
+
"""
|
|
38
|
+
データドリフト検出 — 参照データ vs 現在データ。
|
|
39
|
+
|
|
40
|
+
Parameters:
|
|
41
|
+
reference_df: pd.DataFrame — 学習時データ (参照)
|
|
42
|
+
current_df: pd.DataFrame — 推論時データ (現在)
|
|
43
|
+
method: str — "ks" (KS 検定) / "psi" (PSI) / "wasserstein"
|
|
44
|
+
threshold: float — 有意水準 or PSI 閾値
|
|
45
|
+
"""
|
|
46
|
+
numeric_cols = reference_df.select_dtypes(include=[np.number]).columns
|
|
47
|
+
common_cols = [c for c in numeric_cols if c in current_df.columns]
|
|
48
|
+
|
|
49
|
+
drift_results = []
|
|
50
|
+
|
|
51
|
+
for col in common_cols:
|
|
52
|
+
ref_vals = reference_df[col].dropna().values
|
|
53
|
+
cur_vals = current_df[col].dropna().values
|
|
54
|
+
|
|
55
|
+
if method == "ks":
|
|
56
|
+
stat, p_value = stats.ks_2samp(ref_vals, cur_vals)
|
|
57
|
+
is_drift = p_value < threshold
|
|
58
|
+
drift_results.append({
|
|
59
|
+
"feature": col, "statistic": stat,
|
|
60
|
+
"p_value": p_value, "is_drift": is_drift})
|
|
61
|
+
|
|
62
|
+
elif method == "psi":
|
|
63
|
+
# Population Stability Index
|
|
64
|
+
psi_val = _compute_psi(ref_vals, cur_vals)
|
|
65
|
+
is_drift = psi_val > 0.2 # >0.2 = significant shift
|
|
66
|
+
drift_results.append({
|
|
67
|
+
"feature": col, "psi": psi_val,
|
|
68
|
+
"is_drift": is_drift,
|
|
69
|
+
"severity": "high" if psi_val > 0.25 else
|
|
70
|
+
"medium" if psi_val > 0.1 else "low"})
|
|
71
|
+
|
|
72
|
+
elif method == "wasserstein":
|
|
73
|
+
w_dist = stats.wasserstein_distance(ref_vals, cur_vals)
|
|
74
|
+
ref_std = np.std(ref_vals)
|
|
75
|
+
normalized = w_dist / ref_std if ref_std > 0 else w_dist
|
|
76
|
+
is_drift = normalized > 0.1
|
|
77
|
+
drift_results.append({
|
|
78
|
+
"feature": col, "wasserstein": w_dist,
|
|
79
|
+
"normalized": normalized, "is_drift": is_drift})
|
|
80
|
+
|
|
81
|
+
result_df = pd.DataFrame(drift_results)
|
|
82
|
+
n_drift = result_df["is_drift"].sum()
|
|
83
|
+
print(f"Data Drift ({method}): {n_drift}/{len(common_cols)} features drifted")
|
|
84
|
+
return result_df
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _compute_psi(expected, actual, n_bins=10):
|
|
88
|
+
"""PSI (Population Stability Index) 計算。"""
|
|
89
|
+
breakpoints = np.quantile(expected, np.linspace(0, 1, n_bins + 1))
|
|
90
|
+
breakpoints[0] = -np.inf
|
|
91
|
+
breakpoints[-1] = np.inf
|
|
92
|
+
|
|
93
|
+
expected_pct = np.histogram(expected, bins=breakpoints)[0] / len(expected)
|
|
94
|
+
actual_pct = np.histogram(actual, bins=breakpoints)[0] / len(actual)
|
|
95
|
+
|
|
96
|
+
expected_pct = np.clip(expected_pct, 1e-4, None)
|
|
97
|
+
actual_pct = np.clip(actual_pct, 1e-4, None)
|
|
98
|
+
|
|
99
|
+
psi = np.sum((actual_pct - expected_pct) * np.log(actual_pct / expected_pct))
|
|
100
|
+
return psi
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## 2. モデル性能劣化検出
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
def detect_performance_degradation(y_true_batches, y_pred_batches,
|
|
107
|
+
metric="accuracy",
|
|
108
|
+
window_size=10, alert_threshold=0.05):
|
|
109
|
+
"""
|
|
110
|
+
モデル性能劣化のスライディングウィンドウ検出。
|
|
111
|
+
|
|
112
|
+
Parameters:
|
|
113
|
+
y_true_batches: list[np.ndarray] — バッチごとの真値
|
|
114
|
+
y_pred_batches: list[np.ndarray] — バッチごとの予測値
|
|
115
|
+
metric: str — "accuracy" / "f1" / "rmse" / "auc"
|
|
116
|
+
window_size: int — 移動平均ウィンドウ
|
|
117
|
+
alert_threshold: float — 性能低下アラート閾値
|
|
118
|
+
"""
|
|
119
|
+
from sklearn.metrics import accuracy_score, f1_score, mean_squared_error
|
|
120
|
+
from sklearn.metrics import roc_auc_score
|
|
121
|
+
import matplotlib.pyplot as plt
|
|
122
|
+
|
|
123
|
+
metric_funcs = {
|
|
124
|
+
"accuracy": accuracy_score,
|
|
125
|
+
"f1": lambda y, p: f1_score(y, p, average="macro"),
|
|
126
|
+
"rmse": lambda y, p: -np.sqrt(mean_squared_error(y, p)),
|
|
127
|
+
"auc": lambda y, p: roc_auc_score(y, p)
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
func = metric_funcs[metric]
|
|
131
|
+
scores = [func(yt, yp) for yt, yp in zip(y_true_batches, y_pred_batches)]
|
|
132
|
+
|
|
133
|
+
# 移動平均
|
|
134
|
+
scores_arr = np.array(scores)
|
|
135
|
+
if len(scores_arr) >= window_size:
|
|
136
|
+
ma = np.convolve(scores_arr, np.ones(window_size)/window_size, mode="valid")
|
|
137
|
+
else:
|
|
138
|
+
ma = scores_arr
|
|
139
|
+
|
|
140
|
+
# ベースライン (最初の window_size バッチ)
|
|
141
|
+
baseline = np.mean(scores_arr[:window_size])
|
|
142
|
+
current = np.mean(scores_arr[-window_size:])
|
|
143
|
+
degradation = baseline - current
|
|
144
|
+
|
|
145
|
+
is_degraded = degradation > alert_threshold
|
|
146
|
+
|
|
147
|
+
# 可視化
|
|
148
|
+
fig, ax = plt.subplots(figsize=(12, 5))
|
|
149
|
+
ax.plot(scores, "b-o", markersize=3, alpha=0.5, label="Batch score")
|
|
150
|
+
if len(ma) > 0:
|
|
151
|
+
ax.plot(range(window_size - 1, window_size - 1 + len(ma)),
|
|
152
|
+
ma, "r-", linewidth=2, label=f"MA({window_size})")
|
|
153
|
+
ax.axhline(baseline, color="g", linestyle="--",
|
|
154
|
+
label=f"Baseline={baseline:.4f}")
|
|
155
|
+
ax.axhline(baseline - alert_threshold, color="orange", linestyle="--",
|
|
156
|
+
label=f"Alert={baseline - alert_threshold:.4f}")
|
|
157
|
+
ax.set_xlabel("Batch")
|
|
158
|
+
ax.set_ylabel(metric)
|
|
159
|
+
ax.set_title(f"Model Performance Monitoring ({metric})")
|
|
160
|
+
ax.legend()
|
|
161
|
+
|
|
162
|
+
path = "performance_monitoring.png"
|
|
163
|
+
plt.savefig(path, dpi=150, bbox_inches="tight")
|
|
164
|
+
plt.close()
|
|
165
|
+
|
|
166
|
+
status = "DEGRADED ⚠️" if is_degraded else "OK ✓"
|
|
167
|
+
print(f"Performance ({metric}): baseline={baseline:.4f}, "
|
|
168
|
+
f"current={current:.4f}, Δ={degradation:.4f} → {status}")
|
|
169
|
+
return {"baseline": baseline, "current": current,
|
|
170
|
+
"degradation": degradation, "is_degraded": is_degraded,
|
|
171
|
+
"scores": scores, "fig": path}
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## 3. A/B テスト統計
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
def ab_test_models(y_true, preds_a, preds_b, metric="accuracy",
|
|
178
|
+
n_bootstrap=10000, alpha=0.05):
|
|
179
|
+
"""
|
|
180
|
+
A/B テスト — 2 モデルの統計的比較。
|
|
181
|
+
|
|
182
|
+
Parameters:
|
|
183
|
+
y_true: np.ndarray — 真値
|
|
184
|
+
preds_a: np.ndarray — モデル A 予測
|
|
185
|
+
preds_b: np.ndarray — モデル B 予測
|
|
186
|
+
metric: str — 評価指標
|
|
187
|
+
n_bootstrap: int — ブートストラップ回数
|
|
188
|
+
alpha: float — 有意水準
|
|
189
|
+
"""
|
|
190
|
+
from sklearn.metrics import accuracy_score, f1_score, mean_squared_error
|
|
191
|
+
|
|
192
|
+
metric_funcs = {
|
|
193
|
+
"accuracy": accuracy_score,
|
|
194
|
+
"f1": lambda y, p: f1_score(y, p, average="macro"),
|
|
195
|
+
"rmse": lambda y, p: np.sqrt(mean_squared_error(y, p))
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
func = metric_funcs[metric]
|
|
199
|
+
score_a = func(y_true, preds_a)
|
|
200
|
+
score_b = func(y_true, preds_b)
|
|
201
|
+
|
|
202
|
+
# Bootstrap confidence interval for difference
|
|
203
|
+
diffs = []
|
|
204
|
+
n = len(y_true)
|
|
205
|
+
rng = np.random.RandomState(42)
|
|
206
|
+
|
|
207
|
+
for _ in range(n_bootstrap):
|
|
208
|
+
idx = rng.choice(n, n, replace=True)
|
|
209
|
+
sa = func(y_true[idx], preds_a[idx])
|
|
210
|
+
sb = func(y_true[idx], preds_b[idx])
|
|
211
|
+
diffs.append(sb - sa)
|
|
212
|
+
|
|
213
|
+
diffs = np.array(diffs)
|
|
214
|
+
ci_lower = np.percentile(diffs, 100 * alpha / 2)
|
|
215
|
+
ci_upper = np.percentile(diffs, 100 * (1 - alpha / 2))
|
|
216
|
+
p_value = np.mean(diffs <= 0) # P(B ≤ A)
|
|
217
|
+
|
|
218
|
+
winner = "B" if ci_lower > 0 else ("A" if ci_upper < 0 else "Tie")
|
|
219
|
+
|
|
220
|
+
print(f"A/B Test ({metric}): A={score_a:.4f}, B={score_b:.4f}")
|
|
221
|
+
print(f" Δ(B-A)={score_b - score_a:.4f}, "
|
|
222
|
+
f"95% CI=[{ci_lower:.4f}, {ci_upper:.4f}], "
|
|
223
|
+
f"p={p_value:.4f} → Winner: {winner}")
|
|
224
|
+
return {"score_a": score_a, "score_b": score_b,
|
|
225
|
+
"diff": score_b - score_a, "ci": (ci_lower, ci_upper),
|
|
226
|
+
"p_value": p_value, "winner": winner}
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
231
|
+
## パイプライン統合
|
|
232
|
+
|
|
233
|
+
```
|
|
234
|
+
ensemble-methods → model-monitoring → anomaly-detection
|
|
235
|
+
(モデル構築) (監視) (異常検知)
|
|
236
|
+
│ │ ↓
|
|
237
|
+
automl ──────────────┘ active-learning
|
|
238
|
+
(AutoML) (再学習)
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## パイプライン出力
|
|
242
|
+
|
|
243
|
+
| ファイル | 説明 | 次スキル |
|
|
244
|
+
|---------|------|---------|
|
|
245
|
+
| `drift_report.csv` | ドリフト検出結果 | → 再学習判断 |
|
|
246
|
+
| `performance_monitoring.png` | 性能推移 | → reporting |
|
|
247
|
+
| `ab_test_result.json` | A/B テスト結果 | → デプロイ判断 |
|