@nahisaho/satori 0.22.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -20
- package/package.json +1 -1
- package/src/.github/skills/scientific-active-learning/SKILL.md +289 -0
- package/src/.github/skills/scientific-advanced-visualization/SKILL.md +310 -0
- package/src/.github/skills/scientific-anomaly-detection/SKILL.md +296 -0
- package/src/.github/skills/scientific-automl/SKILL.md +264 -0
- package/src/.github/skills/scientific-causal-ml/SKILL.md +240 -0
- package/src/.github/skills/scientific-data-profiling/SKILL.md +247 -0
- package/src/.github/skills/scientific-ensemble-methods/SKILL.md +263 -0
- package/src/.github/skills/scientific-geospatial-analysis/SKILL.md +274 -0
- package/src/.github/skills/scientific-interactive-dashboard/SKILL.md +346 -0
- package/src/.github/skills/scientific-missing-data-analysis/SKILL.md +312 -0
- package/src/.github/skills/scientific-model-monitoring/SKILL.md +247 -0
- package/src/.github/skills/scientific-network-visualization/SKILL.md +278 -0
- package/src/.github/skills/scientific-reproducible-reporting/SKILL.md +330 -0
- package/src/.github/skills/scientific-time-series-forecasting/SKILL.md +246 -0
- package/src/.github/skills/scientific-transfer-learning/SKILL.md +298 -0
- package/src/.github/skills/scientific-uncertainty-quantification/SKILL.md +286 -0
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-advanced-visualization
|
|
3
|
+
description: |
|
|
4
|
+
科学データ高度可視化スキル。Plotly インタラクティブ 3D ・
|
|
5
|
+
Altair 宣言的可視化・Seaborn 統計プロット・
|
|
6
|
+
アニメーション・Parallel Coordinates・出版品質図。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Advanced Visualization
|
|
10
|
+
|
|
11
|
+
科学データのインタラクティブ可視化・3D レンダリング・
|
|
12
|
+
出版品質図・アニメーションを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- インタラクティブな 3D 散布図・サーフェスプロットを描くとき
|
|
17
|
+
- Plotly / Altair で動的可視化を作成するとき
|
|
18
|
+
- 多変量データを Parallel Coordinates / Radar で可視化するとき
|
|
19
|
+
- 論文投稿用の出版品質 (Nature/Science style) 図を作成するとき
|
|
20
|
+
- 時系列・シミュレーション結果のアニメーションを作成するとき
|
|
21
|
+
- 複数パネルの複合図を作成するとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. Plotly インタラクティブ 3D
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import numpy as np
|
|
31
|
+
import pandas as pd
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def plotly_3d_scatter(df, x, y, z, color=None, size=None,
|
|
35
|
+
title="3D Scatter Plot"):
|
|
36
|
+
"""
|
|
37
|
+
Plotly 3D 散布図。
|
|
38
|
+
|
|
39
|
+
Parameters:
|
|
40
|
+
df: pd.DataFrame — データ
|
|
41
|
+
x, y, z: str — 軸カラム名
|
|
42
|
+
color: str | None — 色分けカラム
|
|
43
|
+
size: str | None — サイズカラム
|
|
44
|
+
title: str — タイトル
|
|
45
|
+
"""
|
|
46
|
+
import plotly.express as px
|
|
47
|
+
|
|
48
|
+
fig = px.scatter_3d(df, x=x, y=y, z=z, color=color, size=size,
|
|
49
|
+
title=title, opacity=0.7)
|
|
50
|
+
fig.update_layout(
|
|
51
|
+
scene=dict(
|
|
52
|
+
xaxis_title=x, yaxis_title=y, zaxis_title=z),
|
|
53
|
+
width=900, height=700)
|
|
54
|
+
|
|
55
|
+
path = "3d_scatter.html"
|
|
56
|
+
fig.write_html(path)
|
|
57
|
+
print(f"3D Scatter: {len(df)} points → {path}")
|
|
58
|
+
return fig
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def plotly_surface(X_grid, Y_grid, Z_grid, title="Surface Plot"):
|
|
62
|
+
"""
|
|
63
|
+
Plotly 3D サーフェスプロット。
|
|
64
|
+
|
|
65
|
+
Parameters:
|
|
66
|
+
X_grid, Y_grid, Z_grid: np.ndarray — メッシュグリッド
|
|
67
|
+
title: str — タイトル
|
|
68
|
+
"""
|
|
69
|
+
import plotly.graph_objects as go
|
|
70
|
+
|
|
71
|
+
fig = go.Figure(data=[go.Surface(x=X_grid, y=Y_grid, z=Z_grid,
|
|
72
|
+
colorscale="Viridis")])
|
|
73
|
+
fig.update_layout(
|
|
74
|
+
title=title,
|
|
75
|
+
scene=dict(xaxis_title="X", yaxis_title="Y", zaxis_title="Z"),
|
|
76
|
+
width=900, height=700)
|
|
77
|
+
|
|
78
|
+
path = "surface_plot.html"
|
|
79
|
+
fig.write_html(path)
|
|
80
|
+
print(f"Surface: {Z_grid.shape} grid → {path}")
|
|
81
|
+
return fig
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## 2. Altair 宣言的可視化
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
def altair_faceted_chart(df, x, y, color, facet_col=None,
|
|
88
|
+
chart_type="scatter"):
|
|
89
|
+
"""
|
|
90
|
+
Altair 宣言的ファセット付きチャート。
|
|
91
|
+
|
|
92
|
+
Parameters:
|
|
93
|
+
df: pd.DataFrame — データ
|
|
94
|
+
x, y: str — 軸カラム
|
|
95
|
+
color: str — 色分けカラム
|
|
96
|
+
facet_col: str | None — ファセットカラム
|
|
97
|
+
chart_type: str — "scatter" / "line" / "bar" / "box"
|
|
98
|
+
"""
|
|
99
|
+
import altair as alt
|
|
100
|
+
|
|
101
|
+
base = alt.Chart(df).encode(
|
|
102
|
+
x=alt.X(x, scale=alt.Scale(zero=False)),
|
|
103
|
+
y=alt.Y(y, scale=alt.Scale(zero=False)),
|
|
104
|
+
color=color)
|
|
105
|
+
|
|
106
|
+
if chart_type == "scatter":
|
|
107
|
+
chart = base.mark_circle(size=60, opacity=0.7)
|
|
108
|
+
elif chart_type == "line":
|
|
109
|
+
chart = base.mark_line()
|
|
110
|
+
elif chart_type == "bar":
|
|
111
|
+
chart = base.mark_bar()
|
|
112
|
+
elif chart_type == "box":
|
|
113
|
+
chart = base.mark_boxplot()
|
|
114
|
+
else:
|
|
115
|
+
chart = base.mark_circle()
|
|
116
|
+
|
|
117
|
+
if facet_col:
|
|
118
|
+
chart = chart.facet(facet_col, columns=3)
|
|
119
|
+
|
|
120
|
+
chart = chart.properties(width=300, height=250).interactive()
|
|
121
|
+
|
|
122
|
+
path = "altair_chart.html"
|
|
123
|
+
chart.save(path)
|
|
124
|
+
print(f"Altair {chart_type}: {len(df)} rows → {path}")
|
|
125
|
+
return chart
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## 3. 多変量可視化
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
def parallel_coordinates_plot(df, class_col, features=None,
|
|
132
|
+
title="Parallel Coordinates"):
|
|
133
|
+
"""
|
|
134
|
+
Parallel Coordinates プロット。
|
|
135
|
+
|
|
136
|
+
Parameters:
|
|
137
|
+
df: pd.DataFrame — データ
|
|
138
|
+
class_col: str — 分類カラム
|
|
139
|
+
features: list[str] | None — 表示特徴量 (None で全数値)
|
|
140
|
+
title: str — タイトル
|
|
141
|
+
"""
|
|
142
|
+
import plotly.express as px
|
|
143
|
+
|
|
144
|
+
if features is None:
|
|
145
|
+
features = df.select_dtypes(include=[np.number]).columns.tolist()
|
|
146
|
+
if class_col in features:
|
|
147
|
+
features.remove(class_col)
|
|
148
|
+
|
|
149
|
+
fig = px.parallel_coordinates(
|
|
150
|
+
df, color=class_col, dimensions=features,
|
|
151
|
+
title=title, color_continuous_scale=px.colors.diverging.Tealrose)
|
|
152
|
+
|
|
153
|
+
fig.update_layout(width=1000, height=500)
|
|
154
|
+
|
|
155
|
+
path = "parallel_coordinates.html"
|
|
156
|
+
fig.write_html(path)
|
|
157
|
+
print(f"Parallel Coordinates: {len(features)} dims → {path}")
|
|
158
|
+
return fig
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def radar_chart(categories, values_dict, title="Radar Chart"):
|
|
162
|
+
"""
|
|
163
|
+
Radar (Spider) チャート — 複数グループ比較。
|
|
164
|
+
|
|
165
|
+
Parameters:
|
|
166
|
+
categories: list[str] — 軸ラベル
|
|
167
|
+
values_dict: dict[str, list[float]] — {グループ名: 値リスト}
|
|
168
|
+
title: str — タイトル
|
|
169
|
+
"""
|
|
170
|
+
import plotly.graph_objects as go
|
|
171
|
+
|
|
172
|
+
fig = go.Figure()
|
|
173
|
+
|
|
174
|
+
for name, vals in values_dict.items():
|
|
175
|
+
fig.add_trace(go.Scatterpolar(
|
|
176
|
+
r=vals + [vals[0]],
|
|
177
|
+
theta=categories + [categories[0]],
|
|
178
|
+
fill="toself", name=name, opacity=0.6))
|
|
179
|
+
|
|
180
|
+
fig.update_layout(
|
|
181
|
+
polar=dict(radialaxis=dict(visible=True)),
|
|
182
|
+
title=title, width=600, height=500)
|
|
183
|
+
|
|
184
|
+
path = "radar_chart.html"
|
|
185
|
+
fig.write_html(path)
|
|
186
|
+
print(f"Radar: {len(values_dict)} groups × {len(categories)} axes → {path}")
|
|
187
|
+
return fig
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
## 4. 出版品質図 (Nature/Science style)
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
def publication_figure(plot_func, figsize=(3.5, 2.8),
|
|
194
|
+
dpi=300, style="nature",
|
|
195
|
+
output="publication_fig.pdf"):
|
|
196
|
+
"""
|
|
197
|
+
出版品質 (Nature/Science style) 図生成。
|
|
198
|
+
|
|
199
|
+
Parameters:
|
|
200
|
+
plot_func: callable — matplotlib 描画関数 (ax を引数に取る)
|
|
201
|
+
figsize: tuple — 図サイズ (インチ, Nature 1 col = 3.5in)
|
|
202
|
+
dpi: int — 解像度
|
|
203
|
+
style: str — "nature" / "science" / "acs"
|
|
204
|
+
output: str — 出力パス (.pdf / .svg / .png)
|
|
205
|
+
"""
|
|
206
|
+
import matplotlib.pyplot as plt
|
|
207
|
+
import matplotlib as mpl
|
|
208
|
+
|
|
209
|
+
# Nature/Science スタイル設定
|
|
210
|
+
style_params = {
|
|
211
|
+
"nature": {
|
|
212
|
+
"font.family": "Arial",
|
|
213
|
+
"font.size": 7,
|
|
214
|
+
"axes.linewidth": 0.5,
|
|
215
|
+
"xtick.major.width": 0.5,
|
|
216
|
+
"ytick.major.width": 0.5,
|
|
217
|
+
"lines.linewidth": 1.0,
|
|
218
|
+
"lines.markersize": 3,
|
|
219
|
+
},
|
|
220
|
+
"science": {
|
|
221
|
+
"font.family": "Helvetica",
|
|
222
|
+
"font.size": 8,
|
|
223
|
+
"axes.linewidth": 0.6,
|
|
224
|
+
"xtick.major.width": 0.6,
|
|
225
|
+
"ytick.major.width": 0.6,
|
|
226
|
+
"lines.linewidth": 1.2,
|
|
227
|
+
"lines.markersize": 4,
|
|
228
|
+
},
|
|
229
|
+
"acs": {
|
|
230
|
+
"font.family": "Arial",
|
|
231
|
+
"font.size": 9,
|
|
232
|
+
"axes.linewidth": 0.5,
|
|
233
|
+
"xtick.major.width": 0.5,
|
|
234
|
+
"ytick.major.width": 0.5,
|
|
235
|
+
"lines.linewidth": 1.0,
|
|
236
|
+
"lines.markersize": 4,
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
with mpl.rc_context(style_params.get(style, style_params["nature"])):
|
|
241
|
+
fig, ax = plt.subplots(figsize=figsize)
|
|
242
|
+
plot_func(ax)
|
|
243
|
+
ax.spines["top"].set_visible(False)
|
|
244
|
+
ax.spines["right"].set_visible(False)
|
|
245
|
+
plt.tight_layout()
|
|
246
|
+
fig.savefig(output, dpi=dpi, bbox_inches="tight")
|
|
247
|
+
plt.close()
|
|
248
|
+
|
|
249
|
+
print(f"Publication figure ({style}): {figsize} @ {dpi}dpi → {output}")
|
|
250
|
+
return output
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
## 5. アニメーション
|
|
254
|
+
|
|
255
|
+
```python
|
|
256
|
+
def create_animation(data_frames, x_col, y_col, time_col,
|
|
257
|
+
title="Animation", fps=10):
|
|
258
|
+
"""
|
|
259
|
+
Plotly アニメーション。
|
|
260
|
+
|
|
261
|
+
Parameters:
|
|
262
|
+
data_frames: pd.DataFrame — 時間列を含むデータ
|
|
263
|
+
x_col, y_col: str — 軸カラム
|
|
264
|
+
time_col: str — 時間 / フレームカラム
|
|
265
|
+
title: str — タイトル
|
|
266
|
+
fps: int — フレームレート
|
|
267
|
+
"""
|
|
268
|
+
import plotly.express as px
|
|
269
|
+
|
|
270
|
+
fig = px.scatter(data_frames, x=x_col, y=y_col,
|
|
271
|
+
animation_frame=time_col,
|
|
272
|
+
title=title, opacity=0.7,
|
|
273
|
+
range_x=[data_frames[x_col].min() * 0.9,
|
|
274
|
+
data_frames[x_col].max() * 1.1],
|
|
275
|
+
range_y=[data_frames[y_col].min() * 0.9,
|
|
276
|
+
data_frames[y_col].max() * 1.1])
|
|
277
|
+
|
|
278
|
+
fig.update_layout(
|
|
279
|
+
width=800, height=600,
|
|
280
|
+
updatemenus=[dict(type="buttons",
|
|
281
|
+
buttons=[dict(label="▶ Play",
|
|
282
|
+
method="animate",
|
|
283
|
+
args=[None, {"frame": {"duration": 1000 // fps}}])])])
|
|
284
|
+
|
|
285
|
+
path = "animation.html"
|
|
286
|
+
fig.write_html(path)
|
|
287
|
+
print(f"Animation: {data_frames[time_col].nunique()} frames @ {fps}fps → {path}")
|
|
288
|
+
return fig
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
---
|
|
292
|
+
|
|
293
|
+
## パイプライン統合
|
|
294
|
+
|
|
295
|
+
```
|
|
296
|
+
eda-correlation → advanced-visualization → presentation-design
|
|
297
|
+
(探索的解析) (高度可視化) (プレゼンテーション)
|
|
298
|
+
│ │ ↓
|
|
299
|
+
pca-tsne ───────────────┘ interactive-dashboard
|
|
300
|
+
(次元削減) (ダッシュボード)
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
## パイプライン出力
|
|
304
|
+
|
|
305
|
+
| ファイル | 説明 | 次スキル |
|
|
306
|
+
|---------|------|---------|
|
|
307
|
+
| `3d_scatter.html` | インタラクティブ 3D 散布図 | → dashboard |
|
|
308
|
+
| `publication_fig.pdf` | 出版品質図 | → presentation |
|
|
309
|
+
| `parallel_coordinates.html` | 多変量可視化 | → reporting |
|
|
310
|
+
| `animation.html` | アニメーション | → presentation |
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-anomaly-detection
|
|
3
|
+
description: |
|
|
4
|
+
異常検知・外れ値検出スキル。Isolation Forest・LOF・
|
|
5
|
+
One-Class SVM・Autoencoder 異常検知・統計的工程管理 (SPC)・
|
|
6
|
+
多変量異常検知・異常スコアリング・閾値最適化。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Anomaly Detection
|
|
10
|
+
|
|
11
|
+
科学データにおける異常値・外れ値・異常パターンの検出と
|
|
12
|
+
統計的工程管理 (SPC) パイプラインを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- 実験データの外れ値を統計的に検出するとき
|
|
17
|
+
- 製造プロセスの異常監視 (SPC) をするとき
|
|
18
|
+
- 多変量データで異常パターンを発見するとき
|
|
19
|
+
- Autoencoder で複雑な異常を検出するとき
|
|
20
|
+
- 異常スコアの閾値を最適化するとき
|
|
21
|
+
- 複数手法のアンサンブル異常検知をするとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. 統計的異常検知アンサンブル
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import numpy as np
|
|
31
|
+
import pandas as pd
|
|
32
|
+
from sklearn.ensemble import IsolationForest
|
|
33
|
+
from sklearn.neighbors import LocalOutlierFactor
|
|
34
|
+
from sklearn.svm import OneClassSVM
|
|
35
|
+
from sklearn.preprocessing import StandardScaler
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def anomaly_detection_ensemble(X, contamination=0.05,
|
|
39
|
+
methods=None, threshold_vote=2):
|
|
40
|
+
"""
|
|
41
|
+
複数手法アンサンブル異常検知。
|
|
42
|
+
|
|
43
|
+
Parameters:
|
|
44
|
+
X: np.ndarray | pd.DataFrame — 入力データ
|
|
45
|
+
contamination: float — 想定異常率
|
|
46
|
+
methods: list[str] | None — 使用手法 ("iforest", "lof", "ocsvm")
|
|
47
|
+
threshold_vote: int — 最低投票数 (多数決)
|
|
48
|
+
"""
|
|
49
|
+
if methods is None:
|
|
50
|
+
methods = ["iforest", "lof", "ocsvm"]
|
|
51
|
+
|
|
52
|
+
if isinstance(X, pd.DataFrame):
|
|
53
|
+
feature_names = X.columns.tolist()
|
|
54
|
+
X_arr = X.values
|
|
55
|
+
else:
|
|
56
|
+
feature_names = [f"f{i}" for i in range(X.shape[1])]
|
|
57
|
+
X_arr = X
|
|
58
|
+
|
|
59
|
+
scaler = StandardScaler()
|
|
60
|
+
X_scaled = scaler.fit_transform(X_arr)
|
|
61
|
+
|
|
62
|
+
results = {}
|
|
63
|
+
predictions = {}
|
|
64
|
+
|
|
65
|
+
for method in methods:
|
|
66
|
+
if method == "iforest":
|
|
67
|
+
model = IsolationForest(
|
|
68
|
+
contamination=contamination, random_state=42, n_jobs=-1)
|
|
69
|
+
preds = model.fit_predict(X_scaled)
|
|
70
|
+
scores = -model.score_samples(X_scaled)
|
|
71
|
+
elif method == "lof":
|
|
72
|
+
model = LocalOutlierFactor(
|
|
73
|
+
n_neighbors=20, contamination=contamination)
|
|
74
|
+
preds = model.fit_predict(X_scaled)
|
|
75
|
+
scores = -model.negative_outlier_factor_
|
|
76
|
+
elif method == "ocsvm":
|
|
77
|
+
model = OneClassSVM(kernel="rbf", nu=contamination)
|
|
78
|
+
preds = model.fit_predict(X_scaled)
|
|
79
|
+
scores = -model.decision_function(X_scaled)
|
|
80
|
+
else:
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
is_anomaly = (preds == -1).astype(int)
|
|
84
|
+
predictions[method] = is_anomaly
|
|
85
|
+
results[method] = {
|
|
86
|
+
"n_anomalies": int(is_anomaly.sum()),
|
|
87
|
+
"scores": scores
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
# アンサンブル多数決
|
|
91
|
+
vote_matrix = np.column_stack(list(predictions.values()))
|
|
92
|
+
ensemble_votes = vote_matrix.sum(axis=1)
|
|
93
|
+
ensemble_anomaly = (ensemble_votes >= threshold_vote).astype(int)
|
|
94
|
+
|
|
95
|
+
result_df = pd.DataFrame(X_arr, columns=feature_names)
|
|
96
|
+
for method, preds in predictions.items():
|
|
97
|
+
result_df[f"anomaly_{method}"] = preds
|
|
98
|
+
result_df["ensemble_votes"] = ensemble_votes
|
|
99
|
+
result_df["is_anomaly"] = ensemble_anomaly
|
|
100
|
+
|
|
101
|
+
n_ens = ensemble_anomaly.sum()
|
|
102
|
+
print(f"Anomaly Ensemble ({len(methods)} methods, vote≥{threshold_vote}): "
|
|
103
|
+
f"{n_ens}/{len(X_arr)} anomalies ({n_ens/len(X_arr)*100:.1f}%)")
|
|
104
|
+
|
|
105
|
+
for m, r in results.items():
|
|
106
|
+
print(f" {m}: {r['n_anomalies']} anomalies")
|
|
107
|
+
|
|
108
|
+
return result_df, results
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## 2. Autoencoder 異常検知
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
def autoencoder_anomaly(X, encoding_dim=8, epochs=100,
|
|
115
|
+
threshold_percentile=95):
|
|
116
|
+
"""
|
|
117
|
+
Autoencoder ベース異常検知。
|
|
118
|
+
|
|
119
|
+
Parameters:
|
|
120
|
+
X: np.ndarray — 入力データ (正常データで学習)
|
|
121
|
+
encoding_dim: int — 潜在次元数
|
|
122
|
+
epochs: int — 学習エポック数
|
|
123
|
+
threshold_percentile: float — 再構成誤差の閾値パーセンタイル
|
|
124
|
+
"""
|
|
125
|
+
import torch
|
|
126
|
+
import torch.nn as nn
|
|
127
|
+
from torch.utils.data import DataLoader, TensorDataset
|
|
128
|
+
|
|
129
|
+
scaler = StandardScaler()
|
|
130
|
+
X_scaled = scaler.fit_transform(X)
|
|
131
|
+
n_features = X_scaled.shape[1]
|
|
132
|
+
|
|
133
|
+
# Autoencoder 定義
|
|
134
|
+
class AE(nn.Module):
|
|
135
|
+
def __init__(self):
|
|
136
|
+
super().__init__()
|
|
137
|
+
self.encoder = nn.Sequential(
|
|
138
|
+
nn.Linear(n_features, 64), nn.ReLU(),
|
|
139
|
+
nn.Linear(64, 32), nn.ReLU(),
|
|
140
|
+
nn.Linear(32, encoding_dim))
|
|
141
|
+
self.decoder = nn.Sequential(
|
|
142
|
+
nn.Linear(encoding_dim, 32), nn.ReLU(),
|
|
143
|
+
nn.Linear(32, 64), nn.ReLU(),
|
|
144
|
+
nn.Linear(64, n_features))
|
|
145
|
+
|
|
146
|
+
def forward(self, x):
|
|
147
|
+
z = self.encoder(x)
|
|
148
|
+
return self.decoder(z)
|
|
149
|
+
|
|
150
|
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
151
|
+
model = AE().to(device)
|
|
152
|
+
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
|
|
153
|
+
criterion = nn.MSELoss()
|
|
154
|
+
|
|
155
|
+
X_tensor = torch.FloatTensor(X_scaled).to(device)
|
|
156
|
+
dataset = TensorDataset(X_tensor, X_tensor)
|
|
157
|
+
loader = DataLoader(dataset, batch_size=64, shuffle=True)
|
|
158
|
+
|
|
159
|
+
model.train()
|
|
160
|
+
for epoch in range(epochs):
|
|
161
|
+
total_loss = 0
|
|
162
|
+
for batch_x, _ in loader:
|
|
163
|
+
optimizer.zero_grad()
|
|
164
|
+
recon = model(batch_x)
|
|
165
|
+
loss = criterion(recon, batch_x)
|
|
166
|
+
loss.backward()
|
|
167
|
+
optimizer.step()
|
|
168
|
+
total_loss += loss.item()
|
|
169
|
+
|
|
170
|
+
# 再構成誤差
|
|
171
|
+
model.eval()
|
|
172
|
+
with torch.no_grad():
|
|
173
|
+
recon = model(X_tensor).cpu().numpy()
|
|
174
|
+
|
|
175
|
+
recon_errors = np.mean((X_scaled - recon) ** 2, axis=1)
|
|
176
|
+
threshold = np.percentile(recon_errors, threshold_percentile)
|
|
177
|
+
is_anomaly = (recon_errors > threshold).astype(int)
|
|
178
|
+
|
|
179
|
+
print(f"Autoencoder Anomaly: threshold={threshold:.4f} (P{threshold_percentile}), "
|
|
180
|
+
f"{is_anomaly.sum()} anomalies")
|
|
181
|
+
return {"reconstruction_error": recon_errors, "threshold": threshold,
|
|
182
|
+
"is_anomaly": is_anomaly, "model": model}
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## 3. 統計的工程管理 (SPC)
|
|
186
|
+
|
|
187
|
+
```python
|
|
188
|
+
def spc_control_chart(data, column, subgroup_size=1,
|
|
189
|
+
chart_type="individuals"):
|
|
190
|
+
"""
|
|
191
|
+
SPC 管理図 (X-bar, R, Individuals-MR)。
|
|
192
|
+
|
|
193
|
+
Parameters:
|
|
194
|
+
data: pd.DataFrame | pd.Series — 時系列データ
|
|
195
|
+
column: str — 対象カラム名
|
|
196
|
+
subgroup_size: int — サブグループサイズ
|
|
197
|
+
chart_type: str — "individuals" / "xbar_r" / "cusum"
|
|
198
|
+
"""
|
|
199
|
+
import matplotlib.pyplot as plt
|
|
200
|
+
|
|
201
|
+
if isinstance(data, pd.DataFrame):
|
|
202
|
+
values = data[column].values
|
|
203
|
+
else:
|
|
204
|
+
values = data.values
|
|
205
|
+
|
|
206
|
+
if chart_type == "individuals":
|
|
207
|
+
x_bar = np.mean(values)
|
|
208
|
+
mr = np.abs(np.diff(values))
|
|
209
|
+
mr_bar = np.mean(mr)
|
|
210
|
+
d2 = 1.128 # d2 for n=2
|
|
211
|
+
|
|
212
|
+
ucl = x_bar + 3 * (mr_bar / d2)
|
|
213
|
+
lcl = x_bar - 3 * (mr_bar / d2)
|
|
214
|
+
|
|
215
|
+
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8), sharex=True)
|
|
216
|
+
|
|
217
|
+
# Individuals chart
|
|
218
|
+
ax1.plot(values, "b-o", markersize=3)
|
|
219
|
+
ax1.axhline(x_bar, color="g", linestyle="-", label=f"CL={x_bar:.3f}")
|
|
220
|
+
ax1.axhline(ucl, color="r", linestyle="--", label=f"UCL={ucl:.3f}")
|
|
221
|
+
ax1.axhline(lcl, color="r", linestyle="--", label=f"LCL={lcl:.3f}")
|
|
222
|
+
|
|
223
|
+
# OOC points
|
|
224
|
+
ooc = np.where((values > ucl) | (values < lcl))[0]
|
|
225
|
+
if len(ooc) > 0:
|
|
226
|
+
ax1.scatter(ooc, values[ooc], c="red", s=50, zorder=5,
|
|
227
|
+
label=f"OOC ({len(ooc)})")
|
|
228
|
+
ax1.set_title("Individuals Chart")
|
|
229
|
+
ax1.legend(fontsize=8)
|
|
230
|
+
|
|
231
|
+
# Moving Range chart
|
|
232
|
+
mr_ucl = 3.267 * mr_bar
|
|
233
|
+
ax2.plot(mr, "b-o", markersize=3)
|
|
234
|
+
ax2.axhline(mr_bar, color="g", linestyle="-")
|
|
235
|
+
ax2.axhline(mr_ucl, color="r", linestyle="--")
|
|
236
|
+
ax2.set_title("Moving Range Chart")
|
|
237
|
+
|
|
238
|
+
plt.tight_layout()
|
|
239
|
+
path = "spc_control_chart.png"
|
|
240
|
+
plt.savefig(path, dpi=150, bbox_inches="tight")
|
|
241
|
+
plt.close()
|
|
242
|
+
|
|
243
|
+
print(f"SPC Individuals: CL={x_bar:.3f}, UCL={ucl:.3f}, "
|
|
244
|
+
f"LCL={lcl:.3f}, OOC={len(ooc)}")
|
|
245
|
+
return {"cl": x_bar, "ucl": ucl, "lcl": lcl,
|
|
246
|
+
"ooc_indices": ooc, "fig": path}
|
|
247
|
+
|
|
248
|
+
elif chart_type == "cusum":
|
|
249
|
+
target = np.mean(values)
|
|
250
|
+
se = np.std(values)
|
|
251
|
+
k = 0.5 * se
|
|
252
|
+
h = 5 * se
|
|
253
|
+
|
|
254
|
+
cusum_pos = np.zeros(len(values))
|
|
255
|
+
cusum_neg = np.zeros(len(values))
|
|
256
|
+
|
|
257
|
+
for i in range(1, len(values)):
|
|
258
|
+
cusum_pos[i] = max(0, cusum_pos[i-1] + (values[i] - target) - k)
|
|
259
|
+
cusum_neg[i] = min(0, cusum_neg[i-1] + (values[i] - target) + k)
|
|
260
|
+
|
|
261
|
+
fig, ax = plt.subplots(figsize=(12, 5))
|
|
262
|
+
ax.plot(cusum_pos, "b-", label="CUSUM+")
|
|
263
|
+
ax.plot(cusum_neg, "r-", label="CUSUM-")
|
|
264
|
+
ax.axhline(h, color="b", linestyle="--", alpha=0.5)
|
|
265
|
+
ax.axhline(-h, color="r", linestyle="--", alpha=0.5)
|
|
266
|
+
ax.set_title("CUSUM Control Chart")
|
|
267
|
+
ax.legend()
|
|
268
|
+
|
|
269
|
+
path = "cusum_chart.png"
|
|
270
|
+
plt.savefig(path, dpi=150, bbox_inches="tight")
|
|
271
|
+
plt.close()
|
|
272
|
+
|
|
273
|
+
print(f"CUSUM: target={target:.3f}, k={k:.3f}, h={h:.3f}")
|
|
274
|
+
return {"target": target, "k": k, "h": h,
|
|
275
|
+
"cusum_pos": cusum_pos, "cusum_neg": cusum_neg, "fig": path}
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
---
|
|
279
|
+
|
|
280
|
+
## パイプライン統合
|
|
281
|
+
|
|
282
|
+
```
|
|
283
|
+
eda-correlation → anomaly-detection → ml-classification
|
|
284
|
+
(探索的解析) (外れ値検出) (モデリング)
|
|
285
|
+
│ │ ↓
|
|
286
|
+
data-profiling ────────┘ model-monitoring
|
|
287
|
+
(データ品質) (モデル監視)
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## パイプライン出力
|
|
291
|
+
|
|
292
|
+
| ファイル | 説明 | 次スキル |
|
|
293
|
+
|---------|------|---------|
|
|
294
|
+
| `anomaly_ensemble.csv` | アンサンブル異常検知結果 | → EDA |
|
|
295
|
+
| `autoencoder_anomaly.json` | AE 異常スコア | → reporting |
|
|
296
|
+
| `spc_control_chart.png` | SPC 管理図 | → process-optimization |
|