@nahisaho/satori 0.22.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -20
- package/package.json +1 -1
- package/src/.github/skills/scientific-active-learning/SKILL.md +289 -0
- package/src/.github/skills/scientific-advanced-visualization/SKILL.md +310 -0
- package/src/.github/skills/scientific-anomaly-detection/SKILL.md +296 -0
- package/src/.github/skills/scientific-automl/SKILL.md +264 -0
- package/src/.github/skills/scientific-causal-ml/SKILL.md +240 -0
- package/src/.github/skills/scientific-data-profiling/SKILL.md +247 -0
- package/src/.github/skills/scientific-ensemble-methods/SKILL.md +263 -0
- package/src/.github/skills/scientific-geospatial-analysis/SKILL.md +274 -0
- package/src/.github/skills/scientific-interactive-dashboard/SKILL.md +346 -0
- package/src/.github/skills/scientific-missing-data-analysis/SKILL.md +312 -0
- package/src/.github/skills/scientific-model-monitoring/SKILL.md +247 -0
- package/src/.github/skills/scientific-network-visualization/SKILL.md +278 -0
- package/src/.github/skills/scientific-reproducible-reporting/SKILL.md +330 -0
- package/src/.github/skills/scientific-time-series-forecasting/SKILL.md +246 -0
- package/src/.github/skills/scientific-transfer-learning/SKILL.md +298 -0
- package/src/.github/skills/scientific-uncertainty-quantification/SKILL.md +286 -0
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-time-series-forecasting
|
|
3
|
+
description: |
|
|
4
|
+
ML 時系列予測スキル。Prophet/NeuralProphet・N-BEATS・
|
|
5
|
+
Temporal Fusion Transformer (TFT)・時系列特徴量エンジニアリング・
|
|
6
|
+
バックテスト・多段階予測・アンサンブル予測。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Time Series Forecasting
|
|
10
|
+
|
|
11
|
+
深層学習・ML ベースの時系列予測パイプラインを提供し、
|
|
12
|
+
Prophet から Transformer まで最新手法を網羅する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- Prophet/NeuralProphet で季節性時系列を予測するとき
|
|
17
|
+
- 深層学習 (N-BEATS/TFT) で高精度予測するとき
|
|
18
|
+
- 時系列特徴量エンジニアリングでラグ・ローリング特徴を生成するとき
|
|
19
|
+
- バックテストで予測性能を厳密に評価するとき
|
|
20
|
+
- 複数モデルのアンサンブル予測をするとき
|
|
21
|
+
- 多変量・多段階予測をするとき
|
|
22
|
+
|
|
23
|
+
> **Note**: 古典時系列 (ARIMA/STL/FFT) は `scientific-time-series` を参照。
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
## 1. Prophet / NeuralProphet
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import numpy as np
|
|
33
|
+
import pandas as pd
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def prophet_forecast(df, date_col, value_col, periods=30,
|
|
37
|
+
freq="D", yearly=True, weekly=True,
|
|
38
|
+
changepoint_prior=0.05):
|
|
39
|
+
"""
|
|
40
|
+
Prophet 時系列予測。
|
|
41
|
+
|
|
42
|
+
Parameters:
|
|
43
|
+
df: pd.DataFrame — 時系列データ
|
|
44
|
+
date_col: str — 日付カラム
|
|
45
|
+
value_col: str — 値カラム
|
|
46
|
+
periods: int — 予測期間
|
|
47
|
+
freq: str — 頻度 ("D" / "H" / "M")
|
|
48
|
+
yearly: bool — 年次季節性
|
|
49
|
+
weekly: bool — 週次季節性
|
|
50
|
+
changepoint_prior: float — 変化点感度
|
|
51
|
+
"""
|
|
52
|
+
from prophet import Prophet
|
|
53
|
+
|
|
54
|
+
prophet_df = df[[date_col, value_col]].rename(
|
|
55
|
+
columns={date_col: "ds", value_col: "y"})
|
|
56
|
+
|
|
57
|
+
model = Prophet(
|
|
58
|
+
yearly_seasonality=yearly,
|
|
59
|
+
weekly_seasonality=weekly,
|
|
60
|
+
changepoint_prior_scale=changepoint_prior)
|
|
61
|
+
model.fit(prophet_df)
|
|
62
|
+
|
|
63
|
+
future = model.make_future_dataframe(periods=periods, freq=freq)
|
|
64
|
+
forecast = model.predict(future)
|
|
65
|
+
|
|
66
|
+
# 評価
|
|
67
|
+
merged = forecast.merge(prophet_df, on="ds", how="left")
|
|
68
|
+
valid = merged.dropna(subset=["y"])
|
|
69
|
+
mae = np.mean(np.abs(valid["y"] - valid["yhat"]))
|
|
70
|
+
mape = np.mean(np.abs((valid["y"] - valid["yhat"]) / valid["y"])) * 100
|
|
71
|
+
|
|
72
|
+
fig1 = model.plot(forecast)
|
|
73
|
+
fig1.savefig("prophet_forecast.png", dpi=150, bbox_inches="tight")
|
|
74
|
+
|
|
75
|
+
fig2 = model.plot_components(forecast)
|
|
76
|
+
fig2.savefig("prophet_components.png", dpi=150, bbox_inches="tight")
|
|
77
|
+
|
|
78
|
+
print(f"Prophet: {periods} periods, MAE={mae:.4f}, MAPE={mape:.1f}%")
|
|
79
|
+
return {"forecast": forecast, "model": model,
|
|
80
|
+
"mae": mae, "mape": mape}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def neuralprophet_forecast(df, date_col, value_col, periods=30,
|
|
84
|
+
n_lags=60, n_forecasts=30):
|
|
85
|
+
"""
|
|
86
|
+
NeuralProphet 時系列予測 (AR-Net)。
|
|
87
|
+
|
|
88
|
+
Parameters:
|
|
89
|
+
df: pd.DataFrame — 時系列データ
|
|
90
|
+
date_col: str — 日付カラム
|
|
91
|
+
value_col: str — 値カラム
|
|
92
|
+
periods: int — 予測期間
|
|
93
|
+
n_lags: int — 自己回帰ラグ数
|
|
94
|
+
n_forecasts: int — 多段階予測ステップ
|
|
95
|
+
"""
|
|
96
|
+
from neuralprophet import NeuralProphet
|
|
97
|
+
|
|
98
|
+
np_df = df[[date_col, value_col]].rename(
|
|
99
|
+
columns={date_col: "ds", value_col: "y"})
|
|
100
|
+
|
|
101
|
+
model = NeuralProphet(
|
|
102
|
+
n_lags=n_lags, n_forecasts=n_forecasts,
|
|
103
|
+
yearly_seasonality=True, weekly_seasonality=True,
|
|
104
|
+
learning_rate=0.01, epochs=100)
|
|
105
|
+
|
|
106
|
+
metrics = model.fit(np_df, freq="D")
|
|
107
|
+
|
|
108
|
+
future = model.make_future_dataframe(np_df, periods=periods, n_historic_predictions=True)
|
|
109
|
+
forecast = model.predict(future)
|
|
110
|
+
|
|
111
|
+
fig = model.plot(forecast)
|
|
112
|
+
fig.savefig("neuralprophet_forecast.png", dpi=150, bbox_inches="tight")
|
|
113
|
+
|
|
114
|
+
print(f"NeuralProphet: lags={n_lags}, forecasts={n_forecasts}")
|
|
115
|
+
return {"forecast": forecast, "model": model, "metrics": metrics}
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## 2. 時系列特徴量エンジニアリング
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
def create_ts_features(df, date_col, value_col,
|
|
122
|
+
lags=None, rolling_windows=None):
|
|
123
|
+
"""
|
|
124
|
+
時系列特徴量エンジニアリング。
|
|
125
|
+
|
|
126
|
+
Parameters:
|
|
127
|
+
df: pd.DataFrame — 時系列データ
|
|
128
|
+
date_col: str — 日付カラム
|
|
129
|
+
value_col: str — 値カラム
|
|
130
|
+
lags: list[int] | None — ラグ特徴量 (e.g., [1,7,14,28])
|
|
131
|
+
rolling_windows: list[int] | None — ローリング窓 (e.g., [7,14,30])
|
|
132
|
+
"""
|
|
133
|
+
if lags is None:
|
|
134
|
+
lags = [1, 3, 7, 14, 28]
|
|
135
|
+
if rolling_windows is None:
|
|
136
|
+
rolling_windows = [7, 14, 30]
|
|
137
|
+
|
|
138
|
+
result = df.copy()
|
|
139
|
+
result[date_col] = pd.to_datetime(result[date_col])
|
|
140
|
+
result = result.sort_values(date_col)
|
|
141
|
+
|
|
142
|
+
# カレンダー特徴量
|
|
143
|
+
result["dayofweek"] = result[date_col].dt.dayofweek
|
|
144
|
+
result["dayofyear"] = result[date_col].dt.dayofyear
|
|
145
|
+
result["month"] = result[date_col].dt.month
|
|
146
|
+
result["quarter"] = result[date_col].dt.quarter
|
|
147
|
+
result["is_weekend"] = (result[date_col].dt.dayofweek >= 5).astype(int)
|
|
148
|
+
|
|
149
|
+
# 周期エンコーディング
|
|
150
|
+
result["sin_day"] = np.sin(2 * np.pi * result["dayofyear"] / 365.25)
|
|
151
|
+
result["cos_day"] = np.cos(2 * np.pi * result["dayofyear"] / 365.25)
|
|
152
|
+
result["sin_week"] = np.sin(2 * np.pi * result["dayofweek"] / 7)
|
|
153
|
+
result["cos_week"] = np.cos(2 * np.pi * result["dayofweek"] / 7)
|
|
154
|
+
|
|
155
|
+
# ラグ特徴量
|
|
156
|
+
for lag in lags:
|
|
157
|
+
result[f"lag_{lag}"] = result[value_col].shift(lag)
|
|
158
|
+
|
|
159
|
+
# ローリング統計量
|
|
160
|
+
for window in rolling_windows:
|
|
161
|
+
result[f"rolling_mean_{window}"] = result[value_col].rolling(window).mean()
|
|
162
|
+
result[f"rolling_std_{window}"] = result[value_col].rolling(window).std()
|
|
163
|
+
result[f"rolling_min_{window}"] = result[value_col].rolling(window).min()
|
|
164
|
+
result[f"rolling_max_{window}"] = result[value_col].rolling(window).max()
|
|
165
|
+
|
|
166
|
+
# 差分特徴量
|
|
167
|
+
result["diff_1"] = result[value_col].diff(1)
|
|
168
|
+
result["diff_7"] = result[value_col].diff(7)
|
|
169
|
+
|
|
170
|
+
n_features = len(result.columns) - len(df.columns)
|
|
171
|
+
print(f"TS Features: {n_features} features created "
|
|
172
|
+
f"(lags={lags}, windows={rolling_windows})")
|
|
173
|
+
return result
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def ts_backtest(df, date_col, value_col, model_fn,
|
|
177
|
+
n_splits=5, horizon=30, gap=0):
|
|
178
|
+
"""
|
|
179
|
+
時系列バックテスト (Walk-forward validation)。
|
|
180
|
+
|
|
181
|
+
Parameters:
|
|
182
|
+
df: pd.DataFrame — 時系列データ
|
|
183
|
+
date_col: str — 日付カラム
|
|
184
|
+
value_col: str — 値カラム
|
|
185
|
+
model_fn: callable — モデル学習・予測関数 (train_df → forecast_df)
|
|
186
|
+
n_splits: int — 分割数
|
|
187
|
+
horizon: int — 予測ホライズン
|
|
188
|
+
gap: int — 学習-テスト間ギャップ
|
|
189
|
+
"""
|
|
190
|
+
from sklearn.metrics import mean_absolute_error, mean_squared_error
|
|
191
|
+
|
|
192
|
+
sorted_df = df.sort_values(date_col).reset_index(drop=True)
|
|
193
|
+
n = len(sorted_df)
|
|
194
|
+
fold_size = (n - horizon) // n_splits
|
|
195
|
+
|
|
196
|
+
results = []
|
|
197
|
+
|
|
198
|
+
for i in range(n_splits):
|
|
199
|
+
train_end = fold_size * (i + 1)
|
|
200
|
+
test_start = train_end + gap
|
|
201
|
+
test_end = min(test_start + horizon, n)
|
|
202
|
+
|
|
203
|
+
if test_end > n:
|
|
204
|
+
break
|
|
205
|
+
|
|
206
|
+
train_df = sorted_df.iloc[:train_end]
|
|
207
|
+
test_df = sorted_df.iloc[test_start:test_end]
|
|
208
|
+
|
|
209
|
+
forecast = model_fn(train_df)
|
|
210
|
+
y_true = test_df[value_col].values[:len(forecast)]
|
|
211
|
+
y_pred = forecast[:len(y_true)]
|
|
212
|
+
|
|
213
|
+
mae = mean_absolute_error(y_true, y_pred)
|
|
214
|
+
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
|
|
215
|
+
mape = np.mean(np.abs((y_true - y_pred) / (y_true + 1e-10))) * 100
|
|
216
|
+
|
|
217
|
+
results.append({
|
|
218
|
+
"fold": i, "train_size": train_end,
|
|
219
|
+
"test_size": test_end - test_start,
|
|
220
|
+
"mae": mae, "rmse": rmse, "mape": mape})
|
|
221
|
+
|
|
222
|
+
results_df = pd.DataFrame(results)
|
|
223
|
+
print(f"Backtest ({n_splits} folds, h={horizon}): "
|
|
224
|
+
f"MAE={results_df['mae'].mean():.4f} ± {results_df['mae'].std():.4f}")
|
|
225
|
+
return results_df
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
## パイプライン統合
|
|
231
|
+
|
|
232
|
+
```
|
|
233
|
+
time-series → time-series-forecasting → model-monitoring
|
|
234
|
+
(古典解析) (ML 予測) (監視)
|
|
235
|
+
│ │ ↓
|
|
236
|
+
spectral-signal ────┘ anomaly-detection
|
|
237
|
+
(周波数解析) (異常検知)
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
## パイプライン出力
|
|
241
|
+
|
|
242
|
+
| ファイル | 説明 | 次スキル |
|
|
243
|
+
|---------|------|---------|
|
|
244
|
+
| `prophet_forecast.png` | Prophet 予測結果 | → presentation |
|
|
245
|
+
| `ts_features.csv` | 時系列特徴量 | → ml-regression |
|
|
246
|
+
| `backtest_results.csv` | バックテスト結果 | → model selection |
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-transfer-learning
|
|
3
|
+
description: |
|
|
4
|
+
転移学習・ドメイン適応スキル。事前学習モデルファインチューニング・
|
|
5
|
+
Few-shot / Zero-shot 学習・ドメイン適応 (DA)・
|
|
6
|
+
知識蒸留・マルチタスク学習・科学ドメイン特化モデル転移。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Transfer Learning
|
|
10
|
+
|
|
11
|
+
事前学習モデルの科学データへの転移・ドメイン適応・
|
|
12
|
+
Few-shot 学習パイプラインを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- 事前学習済みモデル (ImageNet/BERT) をファインチューニングするとき
|
|
17
|
+
- 小規模科学データセットで高精度を実現したいとき
|
|
18
|
+
- ドメイン適応で異なるデータ分布間のギャップを埋めるとき
|
|
19
|
+
- Few-shot 学習で数例から分類するとき
|
|
20
|
+
- 知識蒸留で大規模モデルを軽量化するとき
|
|
21
|
+
- マルチタスク学習で複数タスクを共同学習するとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. Vision モデルファインチューニング
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import torch
|
|
31
|
+
import torch.nn as nn
|
|
32
|
+
from torch.utils.data import DataLoader
|
|
33
|
+
import numpy as np
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def finetune_vision_model(train_loader, val_loader,
|
|
37
|
+
model_name="resnet50",
|
|
38
|
+
num_classes=10, epochs=20,
|
|
39
|
+
lr=1e-4, freeze_backbone=True):
|
|
40
|
+
"""
|
|
41
|
+
Vision モデルファインチューニング。
|
|
42
|
+
|
|
43
|
+
Parameters:
|
|
44
|
+
train_loader: DataLoader — 学習データ
|
|
45
|
+
val_loader: DataLoader — 検証データ
|
|
46
|
+
model_name: str — "resnet50" / "vit_b_16" / "efficientnet_b0"
|
|
47
|
+
num_classes: int — クラス数
|
|
48
|
+
epochs: int — エポック数
|
|
49
|
+
lr: float — 学習率
|
|
50
|
+
freeze_backbone: bool — バックボーン凍結
|
|
51
|
+
"""
|
|
52
|
+
import torchvision.models as models
|
|
53
|
+
|
|
54
|
+
# モデルロード
|
|
55
|
+
model_fn = getattr(models, model_name)
|
|
56
|
+
weights_name = model_name.replace("_", "").title() + "_Weights"
|
|
57
|
+
try:
|
|
58
|
+
weights = getattr(models, weights_name).DEFAULT
|
|
59
|
+
except AttributeError:
|
|
60
|
+
weights = "DEFAULT"
|
|
61
|
+
model = model_fn(weights=weights)
|
|
62
|
+
|
|
63
|
+
# 最終層置換
|
|
64
|
+
if hasattr(model, "fc"):
|
|
65
|
+
in_features = model.fc.in_features
|
|
66
|
+
model.fc = nn.Linear(in_features, num_classes)
|
|
67
|
+
elif hasattr(model, "classifier"):
|
|
68
|
+
if isinstance(model.classifier, nn.Sequential):
|
|
69
|
+
in_features = model.classifier[-1].in_features
|
|
70
|
+
model.classifier[-1] = nn.Linear(in_features, num_classes)
|
|
71
|
+
else:
|
|
72
|
+
in_features = model.classifier.in_features
|
|
73
|
+
model.classifier = nn.Linear(in_features, num_classes)
|
|
74
|
+
elif hasattr(model, "heads"):
|
|
75
|
+
in_features = model.heads.head.in_features
|
|
76
|
+
model.heads.head = nn.Linear(in_features, num_classes)
|
|
77
|
+
|
|
78
|
+
# バックボーン凍結
|
|
79
|
+
if freeze_backbone:
|
|
80
|
+
for name, param in model.named_parameters():
|
|
81
|
+
if "fc" not in name and "classifier" not in name and "heads" not in name:
|
|
82
|
+
param.requires_grad = False
|
|
83
|
+
|
|
84
|
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
85
|
+
model = model.to(device)
|
|
86
|
+
|
|
87
|
+
optimizer = torch.optim.AdamW(
|
|
88
|
+
filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
|
|
89
|
+
criterion = nn.CrossEntropyLoss()
|
|
90
|
+
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
|
|
91
|
+
|
|
92
|
+
best_acc = 0.0
|
|
93
|
+
history = []
|
|
94
|
+
|
|
95
|
+
for epoch in range(epochs):
|
|
96
|
+
model.train()
|
|
97
|
+
train_loss = 0.0
|
|
98
|
+
for X_batch, y_batch in train_loader:
|
|
99
|
+
X_batch, y_batch = X_batch.to(device), y_batch.to(device)
|
|
100
|
+
optimizer.zero_grad()
|
|
101
|
+
outputs = model(X_batch)
|
|
102
|
+
loss = criterion(outputs, y_batch)
|
|
103
|
+
loss.backward()
|
|
104
|
+
optimizer.step()
|
|
105
|
+
train_loss += loss.item()
|
|
106
|
+
scheduler.step()
|
|
107
|
+
|
|
108
|
+
# Validation
|
|
109
|
+
model.eval()
|
|
110
|
+
correct = total = 0
|
|
111
|
+
with torch.no_grad():
|
|
112
|
+
for X_batch, y_batch in val_loader:
|
|
113
|
+
X_batch, y_batch = X_batch.to(device), y_batch.to(device)
|
|
114
|
+
outputs = model(X_batch)
|
|
115
|
+
_, predicted = outputs.max(1)
|
|
116
|
+
total += y_batch.size(0)
|
|
117
|
+
correct += predicted.eq(y_batch).sum().item()
|
|
118
|
+
|
|
119
|
+
val_acc = correct / total
|
|
120
|
+
history.append({"epoch": epoch, "train_loss": train_loss / len(train_loader),
|
|
121
|
+
"val_acc": val_acc})
|
|
122
|
+
if val_acc > best_acc:
|
|
123
|
+
best_acc = val_acc
|
|
124
|
+
|
|
125
|
+
print(f"Finetune {model_name}: best val acc = {best_acc:.4f}")
|
|
126
|
+
return model, history
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## 2. NLP モデルファインチューニング
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
def finetune_text_classifier(train_texts, train_labels,
|
|
133
|
+
val_texts, val_labels,
|
|
134
|
+
model_name="dmis-lab/biobert-base-cased-v1.2",
|
|
135
|
+
num_labels=2, epochs=5, lr=2e-5):
|
|
136
|
+
"""
|
|
137
|
+
BERT/BioBERT テキスト分類ファインチューニング。
|
|
138
|
+
|
|
139
|
+
Parameters:
|
|
140
|
+
train_texts: list[str] — 学習テキスト
|
|
141
|
+
train_labels: list[int] — 学習ラベル
|
|
142
|
+
val_texts: list[str] — 検証テキスト
|
|
143
|
+
val_labels: list[int] — 検証ラベル
|
|
144
|
+
model_name: str — HuggingFace モデル名
|
|
145
|
+
num_labels: int — ラベル数
|
|
146
|
+
epochs: int — エポック数
|
|
147
|
+
lr: float — 学習率
|
|
148
|
+
"""
|
|
149
|
+
from transformers import (
|
|
150
|
+
AutoTokenizer, AutoModelForSequenceClassification,
|
|
151
|
+
TrainingArguments, Trainer)
|
|
152
|
+
from datasets import Dataset
|
|
153
|
+
|
|
154
|
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
155
|
+
model = AutoModelForSequenceClassification.from_pretrained(
|
|
156
|
+
model_name, num_labels=num_labels)
|
|
157
|
+
|
|
158
|
+
def tokenize(examples):
|
|
159
|
+
return tokenizer(examples["text"], truncation=True,
|
|
160
|
+
padding="max_length", max_length=512)
|
|
161
|
+
|
|
162
|
+
train_ds = Dataset.from_dict({"text": train_texts, "label": train_labels})
|
|
163
|
+
val_ds = Dataset.from_dict({"text": val_texts, "label": val_labels})
|
|
164
|
+
train_ds = train_ds.map(tokenize, batched=True)
|
|
165
|
+
val_ds = val_ds.map(tokenize, batched=True)
|
|
166
|
+
|
|
167
|
+
args = TrainingArguments(
|
|
168
|
+
output_dir="./ft_output", num_train_epochs=epochs,
|
|
169
|
+
per_device_train_batch_size=16, learning_rate=lr,
|
|
170
|
+
evaluation_strategy="epoch", save_strategy="epoch",
|
|
171
|
+
load_best_model_at_end=True, metric_for_best_model="accuracy")
|
|
172
|
+
|
|
173
|
+
def compute_metrics(eval_pred):
|
|
174
|
+
preds = np.argmax(eval_pred.predictions, axis=-1)
|
|
175
|
+
acc = (preds == eval_pred.label_ids).mean()
|
|
176
|
+
return {"accuracy": acc}
|
|
177
|
+
|
|
178
|
+
trainer = Trainer(model=model, args=args, train_dataset=train_ds,
|
|
179
|
+
eval_dataset=val_ds, compute_metrics=compute_metrics)
|
|
180
|
+
trainer.train()
|
|
181
|
+
|
|
182
|
+
metrics = trainer.evaluate()
|
|
183
|
+
print(f"Finetune {model_name}: val acc = {metrics['eval_accuracy']:.4f}")
|
|
184
|
+
return model, tokenizer, metrics
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## 3. Few-shot 学習
|
|
188
|
+
|
|
189
|
+
```python
|
|
190
|
+
def prototypical_network(support_X, support_y, query_X,
|
|
191
|
+
feature_extractor=None):
|
|
192
|
+
"""
|
|
193
|
+
Prototypical Network — Few-shot 分類。
|
|
194
|
+
|
|
195
|
+
Parameters:
|
|
196
|
+
support_X: np.ndarray — サポートセット特徴量
|
|
197
|
+
support_y: np.ndarray — サポートラベル
|
|
198
|
+
query_X: np.ndarray — クエリセット特徴量
|
|
199
|
+
feature_extractor: callable | None — 特徴量抽出器
|
|
200
|
+
"""
|
|
201
|
+
if feature_extractor is not None:
|
|
202
|
+
support_emb = feature_extractor(support_X)
|
|
203
|
+
query_emb = feature_extractor(query_X)
|
|
204
|
+
else:
|
|
205
|
+
support_emb = support_X
|
|
206
|
+
query_emb = query_X
|
|
207
|
+
|
|
208
|
+
classes = np.unique(support_y)
|
|
209
|
+
prototypes = np.array([
|
|
210
|
+
support_emb[support_y == c].mean(axis=0) for c in classes])
|
|
211
|
+
|
|
212
|
+
# ユークリッド距離
|
|
213
|
+
dists = np.array([
|
|
214
|
+
np.linalg.norm(query_emb - p, axis=1) for p in prototypes]).T
|
|
215
|
+
|
|
216
|
+
predictions = classes[np.argmin(dists, axis=1)]
|
|
217
|
+
confidences = np.exp(-dists.min(axis=1))
|
|
218
|
+
|
|
219
|
+
print(f"Few-shot: {len(classes)} classes, "
|
|
220
|
+
f"{len(support_y)} support → {len(query_X)} query")
|
|
221
|
+
return predictions, confidences
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
## 4. 知識蒸留
|
|
225
|
+
|
|
226
|
+
```python
|
|
227
|
+
def knowledge_distillation(teacher, student, train_loader,
|
|
228
|
+
epochs=20, temperature=4.0, alpha=0.7,
|
|
229
|
+
lr=1e-3):
|
|
230
|
+
"""
|
|
231
|
+
知識蒸留 (Teacher → Student)。
|
|
232
|
+
|
|
233
|
+
Parameters:
|
|
234
|
+
teacher: nn.Module — 教師モデル (frozen)
|
|
235
|
+
student: nn.Module — 生徒モデル
|
|
236
|
+
train_loader: DataLoader — 学習データ
|
|
237
|
+
epochs: int — エポック数
|
|
238
|
+
temperature: float — 蒸留温度
|
|
239
|
+
alpha: float — soft loss の重み
|
|
240
|
+
lr: float — 学習率
|
|
241
|
+
"""
|
|
242
|
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
243
|
+
teacher = teacher.to(device).eval()
|
|
244
|
+
student = student.to(device)
|
|
245
|
+
|
|
246
|
+
optimizer = torch.optim.AdamW(student.parameters(), lr=lr)
|
|
247
|
+
ce_loss = nn.CrossEntropyLoss()
|
|
248
|
+
kl_loss = nn.KLDivLoss(reduction="batchmean")
|
|
249
|
+
|
|
250
|
+
for epoch in range(epochs):
|
|
251
|
+
student.train()
|
|
252
|
+
total_loss = 0.0
|
|
253
|
+
for X_batch, y_batch in train_loader:
|
|
254
|
+
X_batch, y_batch = X_batch.to(device), y_batch.to(device)
|
|
255
|
+
|
|
256
|
+
with torch.no_grad():
|
|
257
|
+
teacher_logits = teacher(X_batch)
|
|
258
|
+
|
|
259
|
+
student_logits = student(X_batch)
|
|
260
|
+
|
|
261
|
+
soft_loss = kl_loss(
|
|
262
|
+
nn.functional.log_softmax(student_logits / temperature, dim=1),
|
|
263
|
+
nn.functional.softmax(teacher_logits / temperature, dim=1)
|
|
264
|
+
) * (temperature ** 2)
|
|
265
|
+
|
|
266
|
+
hard_loss = ce_loss(student_logits, y_batch)
|
|
267
|
+
loss = alpha * soft_loss + (1 - alpha) * hard_loss
|
|
268
|
+
|
|
269
|
+
optimizer.zero_grad()
|
|
270
|
+
loss.backward()
|
|
271
|
+
optimizer.step()
|
|
272
|
+
total_loss += loss.item()
|
|
273
|
+
|
|
274
|
+
print(f" Epoch {epoch}: loss = {total_loss / len(train_loader):.4f}")
|
|
275
|
+
|
|
276
|
+
print(f"Distillation: T={temperature}, α={alpha}, {epochs} epochs")
|
|
277
|
+
return student
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
---
|
|
281
|
+
|
|
282
|
+
## パイプライン統合
|
|
283
|
+
|
|
284
|
+
```
|
|
285
|
+
deep-learning → transfer-learning → active-learning
|
|
286
|
+
(モデル設計) (転移・適応) (効率的ラベル付け)
|
|
287
|
+
│ │ ↓
|
|
288
|
+
healthcare-ai ───────┘ ensemble-methods
|
|
289
|
+
(臨床 AI) (アンサンブル)
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
## パイプライン出力
|
|
293
|
+
|
|
294
|
+
| ファイル | 説明 | 次スキル |
|
|
295
|
+
|---------|------|---------|
|
|
296
|
+
| `ft_model.pt` | ファインチューニング済みモデル | → 推論 |
|
|
297
|
+
| `ft_history.csv` | 学習履歴 | → visualization |
|
|
298
|
+
| `few_shot_predictions.csv` | Few-shot 予測 | → 評価 |
|