@nahisaho/satori 0.22.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,312 @@
1
+ ---
2
+ name: scientific-missing-data-analysis
3
+ description: |
4
+ 欠損データ解析スキル。欠損パターン診断 (MCAR/MAR/MNAR) ・
5
+ Little's MCAR テスト・多重代入法 (MICE) ・KNN 補完・
6
+ MissForest・VAE/GAIN 補完・欠損パターン可視化・Rubin's Rules。
7
+ ---
8
+
9
+ # Scientific Missing Data Analysis
10
+
11
+ 欠損データの診断・補完・感度分析パイプラインを提供し、
12
+ バイアスのない統計推論を実現する。
13
+
14
+ ## When to Use
15
+
16
+ - データセットの欠損パターンを診断するとき
17
+ - MCAR / MAR / MNAR のメカニズムを判定するとき
18
+ - 多重代入法 (MICE) で欠損値を補完するとき
19
+ - KNN / MissForest / 深層学習ベースの補完をするとき
20
+ - 複数の補完結果を Rubin's Rules で統合するとき
21
+ - 欠損パターンを可視化するとき
22
+
23
+ ---
24
+
25
+ ## Quick Start
26
+
27
+ ## 1. 欠損パターン診断
28
+
29
+ ```python
30
+ import numpy as np
31
+ import pandas as pd
32
+ import matplotlib.pyplot as plt
33
+ import seaborn as sns
34
+
35
+
36
+ def diagnose_missing_patterns(df, output_prefix="missing"):
37
+ """
38
+ 欠損パターン診断 — MCAR/MAR/MNAR 判定支援。
39
+
40
+ Parameters:
41
+ df: pd.DataFrame — 入力データ
42
+ output_prefix: str — 出力ファイル接頭辞
43
+ """
44
+ n_rows, n_cols = df.shape
45
+ missing_counts = df.isnull().sum()
46
+ missing_pct = (missing_counts / n_rows * 100).round(2)
47
+
48
+ summary = pd.DataFrame({
49
+ "column": df.columns,
50
+ "n_missing": missing_counts.values,
51
+ "pct_missing": missing_pct.values,
52
+ "dtype": df.dtypes.values
53
+ }).sort_values("pct_missing", ascending=False)
54
+
55
+ # 欠損パターン行列 (msno 風)
56
+ fig, axes = plt.subplots(2, 2, figsize=(16, 12))
57
+
58
+ # (1) 欠損マトリックス
59
+ ax = axes[0, 0]
60
+ missing_matrix = df.isnull().astype(int)
61
+ ax.imshow(missing_matrix.values[:200], aspect="auto", cmap="Greys",
62
+ interpolation="none")
63
+ ax.set_xlabel("Features")
64
+ ax.set_ylabel("Samples")
65
+ ax.set_title("Missing Pattern Matrix (first 200 rows)")
66
+
67
+ # (2) 欠損率バー
68
+ ax = axes[0, 1]
69
+ cols_with_missing = summary[summary["pct_missing"] > 0]
70
+ ax.barh(cols_with_missing["column"], cols_with_missing["pct_missing"])
71
+ ax.set_xlabel("Missing %")
72
+ ax.set_title("Missing Rate per Column")
73
+
74
+ # (3) 欠損相関ヒートマップ
75
+ ax = axes[1, 0]
76
+ miss_corr = df.isnull().corr()
77
+ sns.heatmap(miss_corr, ax=ax, cmap="RdBu_r", center=0,
78
+ square=True, cbar_kws={"shrink": 0.8})
79
+ ax.set_title("Missing Correlation")
80
+
81
+ # (4) 欠損パターン上位
82
+ ax = axes[1, 1]
83
+ patterns = df.isnull().apply(lambda x: tuple(x), axis=1)
84
+ pattern_counts = patterns.value_counts().head(10)
85
+ ax.barh(range(len(pattern_counts)),
86
+ pattern_counts.values)
87
+ ax.set_yticks(range(len(pattern_counts)))
88
+ ax.set_yticklabels([str(p)[:40] for p in pattern_counts.index],
89
+ fontsize=7)
90
+ ax.set_xlabel("Count")
91
+ ax.set_title("Top 10 Missing Patterns")
92
+
93
+ plt.tight_layout()
94
+ path = f"{output_prefix}_diagnosis.png"
95
+ plt.savefig(path, dpi=150, bbox_inches="tight")
96
+ plt.close()
97
+
98
+ print(f"Missing Diagnosis: {n_cols} cols, "
99
+ f"{missing_counts.sum()} total missing ({(missing_counts.sum()/(n_rows*n_cols)*100):.1f}%)")
100
+ return {"summary": summary, "fig": path}
101
+ ```
102
+
103
+ ## 2. Little's MCAR テスト
104
+
105
+ ```python
106
+ def littles_mcar_test(df):
107
+ """
108
+ Little's MCAR テスト — 完全ランダム欠損の検定。
109
+
110
+ Parameters:
111
+ df: pd.DataFrame — 数値データのみ
112
+ Returns:
113
+ dict — chi2 統計量, p値, 判定
114
+ """
115
+ from scipy import stats
116
+
117
+ numeric_df = df.select_dtypes(include=[np.number])
118
+ n_rows, n_cols = numeric_df.shape
119
+
120
+ # 欠損パターンごとにグルーピング
121
+ patterns = numeric_df.isnull().apply(tuple, axis=1)
122
+ unique_patterns = patterns.unique()
123
+
124
+ # 全体平均と全体共分散
125
+ global_mean = numeric_df.mean()
126
+ global_cov = numeric_df.cov()
127
+
128
+ chi2_stat = 0.0
129
+ df_stat = 0
130
+
131
+ for pattern in unique_patterns:
132
+ mask = patterns == pattern
133
+ sub_df = numeric_df[mask]
134
+ n_j = len(sub_df)
135
+ if n_j < 2:
136
+ continue
137
+
138
+ # このパターンで観測されているカラム
139
+ obs_cols = [i for i, m in enumerate(pattern) if not m]
140
+ if len(obs_cols) == 0:
141
+ continue
142
+
143
+ obs_mean = sub_df.iloc[:, obs_cols].mean().values
144
+ exp_mean = global_mean.iloc[obs_cols].values
145
+ diff = obs_mean - exp_mean
146
+
147
+ obs_cov = global_cov.iloc[obs_cols, obs_cols].values
148
+ try:
149
+ cov_inv = np.linalg.pinv(obs_cov / n_j)
150
+ except np.linalg.LinAlgError:
151
+ continue
152
+
153
+ chi2_stat += diff @ cov_inv @ diff
154
+ df_stat += len(obs_cols)
155
+
156
+ df_stat -= n_cols # 自由度補正
157
+
158
+ if df_stat <= 0:
159
+ return {"chi2": np.nan, "p_value": np.nan,
160
+ "conclusion": "判定不能 (自由度不足)"}
161
+
162
+ p_value = 1 - stats.chi2.cdf(chi2_stat, df_stat)
163
+ conclusion = "MCAR (p > 0.05)" if p_value > 0.05 else "Not MCAR (p ≤ 0.05)"
164
+
165
+ print(f"Little's MCAR test: χ²={chi2_stat:.2f}, df={df_stat}, "
166
+ f"p={p_value:.4f} → {conclusion}")
167
+ return {"chi2": chi2_stat, "df": df_stat,
168
+ "p_value": p_value, "conclusion": conclusion}
169
+ ```
170
+
171
+ ## 3. 多重代入法 (MICE)
172
+
173
+ ```python
174
+ def mice_imputation(df, n_imputations=5, max_iter=10, random_state=42):
175
+ """
176
+ MICE (Multiple Imputation by Chained Equations)。
177
+
178
+ Parameters:
179
+ df: pd.DataFrame — 欠損を含むデータ
180
+ n_imputations: int — 代入データセット数
181
+ max_iter: int — 反復回数
182
+ random_state: int — 乱数シード
183
+ """
184
+ from sklearn.experimental import enable_iterative_imputer # noqa
185
+ from sklearn.impute import IterativeImputer
186
+
187
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
188
+ cat_cols = df.select_dtypes(exclude=[np.number]).columns
189
+
190
+ imputed_datasets = []
191
+
192
+ for i in range(n_imputations):
193
+ imputer = IterativeImputer(
194
+ max_iter=max_iter,
195
+ random_state=random_state + i,
196
+ sample_posterior=True)
197
+
198
+ imputed_numeric = pd.DataFrame(
199
+ imputer.fit_transform(df[numeric_cols]),
200
+ columns=numeric_cols, index=df.index)
201
+
202
+ imputed_df = imputed_numeric.copy()
203
+ for col in cat_cols:
204
+ imputed_df[col] = df[col].fillna(df[col].mode().iloc[0]
205
+ if not df[col].mode().empty else "UNKNOWN")
206
+
207
+ imputed_datasets.append(imputed_df)
208
+
209
+ print(f"MICE: {n_imputations} datasets × {max_iter} iterations, "
210
+ f"{len(numeric_cols)} numeric cols")
211
+ return imputed_datasets
212
+
213
+
214
+ def rubins_rules(estimates, variances):
215
+ """
216
+ Rubin's Rules — 多重代入結果の統合。
217
+
218
+ Parameters:
219
+ estimates: list[float] — 各代入データセットからの推定値
220
+ variances: list[float] — 各代入データセットからの分散
221
+ """
222
+ m = len(estimates)
223
+ Q_bar = np.mean(estimates)
224
+ U_bar = np.mean(variances) # Within-imputation variance
225
+ B = np.var(estimates, ddof=1) # Between-imputation variance
226
+ T = U_bar + (1 + 1 / m) * B # Total variance
227
+
228
+ # 自由度 (Barnard-Rubin)
229
+ r = (1 + 1 / m) * B / U_bar if U_bar > 0 else np.inf
230
+ df_old = (m - 1) * (1 + 1 / r) ** 2 if r > 0 else np.inf
231
+
232
+ print(f"Rubin's Rules: Q̄={Q_bar:.4f}, T={T:.4f}, "
233
+ f"within={U_bar:.4f}, between={B:.4f}")
234
+ return {"pooled_estimate": Q_bar, "total_variance": T,
235
+ "within_variance": U_bar, "between_variance": B,
236
+ "df": df_old}
237
+ ```
238
+
239
+ ## 4. KNN / MissForest 補完
240
+
241
+ ```python
242
+ def knn_imputation(df, n_neighbors=5):
243
+ """
244
+ KNN 欠損値補完。
245
+
246
+ Parameters:
247
+ df: pd.DataFrame — 欠損を含むデータ
248
+ n_neighbors: int — 近傍数
249
+ """
250
+ from sklearn.impute import KNNImputer
251
+
252
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
253
+ imputer = KNNImputer(n_neighbors=n_neighbors)
254
+ imputed = pd.DataFrame(
255
+ imputer.fit_transform(df[numeric_cols]),
256
+ columns=numeric_cols, index=df.index)
257
+
258
+ n_imputed = df[numeric_cols].isnull().sum().sum()
259
+ print(f"KNN Imputation (k={n_neighbors}): {n_imputed} values imputed")
260
+ return imputed
261
+
262
+
263
+ def missforest_imputation(df, n_estimators=100, max_iter=10):
264
+ """
265
+ MissForest (Random Forest ベースの反復補完)。
266
+
267
+ Parameters:
268
+ df: pd.DataFrame — 欠損を含むデータ
269
+ n_estimators: int — Random Forest の木の数
270
+ max_iter: int — 反復回数
271
+ """
272
+ from sklearn.experimental import enable_iterative_imputer # noqa
273
+ from sklearn.impute import IterativeImputer
274
+ from sklearn.ensemble import RandomForestRegressor
275
+
276
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
277
+
278
+ imputer = IterativeImputer(
279
+ estimator=RandomForestRegressor(n_estimators=n_estimators,
280
+ random_state=42, n_jobs=-1),
281
+ max_iter=max_iter, random_state=42)
282
+
283
+ imputed = pd.DataFrame(
284
+ imputer.fit_transform(df[numeric_cols]),
285
+ columns=numeric_cols, index=df.index)
286
+
287
+ n_imputed = df[numeric_cols].isnull().sum().sum()
288
+ print(f"MissForest (n_trees={n_estimators}, iter={max_iter}): "
289
+ f"{n_imputed} values imputed")
290
+ return imputed
291
+ ```
292
+
293
+ ---
294
+
295
+ ## パイプライン統合
296
+
297
+ ```
298
+ eda-correlation → missing-data-analysis → ml-classification
299
+ (探索的解析) (欠損診断・補完) (モデリング)
300
+ │ │ ↓
301
+ statistical-testing ────┘ advanced-visualization
302
+ (統計検定) (結果可視化)
303
+ ```
304
+
305
+ ## パイプライン出力
306
+
307
+ | ファイル | 説明 | 次スキル |
308
+ |---------|------|---------|
309
+ | `missing_diagnosis.png` | 欠損パターン可視化 | → reporting |
310
+ | `mcar_test_result.json` | Little's MCAR テスト | → 補完戦略選択 |
311
+ | `imputed_datasets/` | MICE 多重代入データ | → ml-classification |
312
+ | `imputation_comparison.csv` | 補完手法比較 | → 最終選択 |
@@ -0,0 +1,298 @@
1
+ ---
2
+ name: scientific-transfer-learning
3
+ description: |
4
+ 転移学習・ドメイン適応スキル。事前学習モデルファインチューニング・
5
+ Few-shot / Zero-shot 学習・ドメイン適応 (DA)・
6
+ 知識蒸留・マルチタスク学習・科学ドメイン特化モデル転移。
7
+ ---
8
+
9
+ # Scientific Transfer Learning
10
+
11
+ 事前学習モデルの科学データへの転移・ドメイン適応・
12
+ Few-shot 学習パイプラインを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - 事前学習済みモデル (ImageNet/BERT) をファインチューニングするとき
17
+ - 小規模科学データセットで高精度を実現したいとき
18
+ - ドメイン適応で異なるデータ分布間のギャップを埋めるとき
19
+ - Few-shot 学習で数例から分類するとき
20
+ - 知識蒸留で大規模モデルを軽量化するとき
21
+ - マルチタスク学習で複数タスクを共同学習するとき
22
+
23
+ ---
24
+
25
+ ## Quick Start
26
+
27
+ ## 1. Vision モデルファインチューニング
28
+
29
+ ```python
30
+ import torch
31
+ import torch.nn as nn
32
+ from torch.utils.data import DataLoader
33
+ import numpy as np
34
+
35
+
36
+ def finetune_vision_model(train_loader, val_loader,
37
+ model_name="resnet50",
38
+ num_classes=10, epochs=20,
39
+ lr=1e-4, freeze_backbone=True):
40
+ """
41
+ Vision モデルファインチューニング。
42
+
43
+ Parameters:
44
+ train_loader: DataLoader — 学習データ
45
+ val_loader: DataLoader — 検証データ
46
+ model_name: str — "resnet50" / "vit_b_16" / "efficientnet_b0"
47
+ num_classes: int — クラス数
48
+ epochs: int — エポック数
49
+ lr: float — 学習率
50
+ freeze_backbone: bool — バックボーン凍結
51
+ """
52
+ import torchvision.models as models
53
+
54
+ # モデルロード
55
+ model_fn = getattr(models, model_name)
56
+ weights_name = model_name.replace("_", "").title() + "_Weights"
57
+ try:
58
+ weights = getattr(models, weights_name).DEFAULT
59
+ except AttributeError:
60
+ weights = "DEFAULT"
61
+ model = model_fn(weights=weights)
62
+
63
+ # 最終層置換
64
+ if hasattr(model, "fc"):
65
+ in_features = model.fc.in_features
66
+ model.fc = nn.Linear(in_features, num_classes)
67
+ elif hasattr(model, "classifier"):
68
+ if isinstance(model.classifier, nn.Sequential):
69
+ in_features = model.classifier[-1].in_features
70
+ model.classifier[-1] = nn.Linear(in_features, num_classes)
71
+ else:
72
+ in_features = model.classifier.in_features
73
+ model.classifier = nn.Linear(in_features, num_classes)
74
+ elif hasattr(model, "heads"):
75
+ in_features = model.heads.head.in_features
76
+ model.heads.head = nn.Linear(in_features, num_classes)
77
+
78
+ # バックボーン凍結
79
+ if freeze_backbone:
80
+ for name, param in model.named_parameters():
81
+ if "fc" not in name and "classifier" not in name and "heads" not in name:
82
+ param.requires_grad = False
83
+
84
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
85
+ model = model.to(device)
86
+
87
+ optimizer = torch.optim.AdamW(
88
+ filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
89
+ criterion = nn.CrossEntropyLoss()
90
+ scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
91
+
92
+ best_acc = 0.0
93
+ history = []
94
+
95
+ for epoch in range(epochs):
96
+ model.train()
97
+ train_loss = 0.0
98
+ for X_batch, y_batch in train_loader:
99
+ X_batch, y_batch = X_batch.to(device), y_batch.to(device)
100
+ optimizer.zero_grad()
101
+ outputs = model(X_batch)
102
+ loss = criterion(outputs, y_batch)
103
+ loss.backward()
104
+ optimizer.step()
105
+ train_loss += loss.item()
106
+ scheduler.step()
107
+
108
+ # Validation
109
+ model.eval()
110
+ correct = total = 0
111
+ with torch.no_grad():
112
+ for X_batch, y_batch in val_loader:
113
+ X_batch, y_batch = X_batch.to(device), y_batch.to(device)
114
+ outputs = model(X_batch)
115
+ _, predicted = outputs.max(1)
116
+ total += y_batch.size(0)
117
+ correct += predicted.eq(y_batch).sum().item()
118
+
119
+ val_acc = correct / total
120
+ history.append({"epoch": epoch, "train_loss": train_loss / len(train_loader),
121
+ "val_acc": val_acc})
122
+ if val_acc > best_acc:
123
+ best_acc = val_acc
124
+
125
+ print(f"Finetune {model_name}: best val acc = {best_acc:.4f}")
126
+ return model, history
127
+ ```
128
+
129
+ ## 2. NLP モデルファインチューニング
130
+
131
+ ```python
132
+ def finetune_text_classifier(train_texts, train_labels,
133
+ val_texts, val_labels,
134
+ model_name="dmis-lab/biobert-base-cased-v1.2",
135
+ num_labels=2, epochs=5, lr=2e-5):
136
+ """
137
+ BERT/BioBERT テキスト分類ファインチューニング。
138
+
139
+ Parameters:
140
+ train_texts: list[str] — 学習テキスト
141
+ train_labels: list[int] — 学習ラベル
142
+ val_texts: list[str] — 検証テキスト
143
+ val_labels: list[int] — 検証ラベル
144
+ model_name: str — HuggingFace モデル名
145
+ num_labels: int — ラベル数
146
+ epochs: int — エポック数
147
+ lr: float — 学習率
148
+ """
149
+ from transformers import (
150
+ AutoTokenizer, AutoModelForSequenceClassification,
151
+ TrainingArguments, Trainer)
152
+ from datasets import Dataset
153
+
154
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
155
+ model = AutoModelForSequenceClassification.from_pretrained(
156
+ model_name, num_labels=num_labels)
157
+
158
+ def tokenize(examples):
159
+ return tokenizer(examples["text"], truncation=True,
160
+ padding="max_length", max_length=512)
161
+
162
+ train_ds = Dataset.from_dict({"text": train_texts, "label": train_labels})
163
+ val_ds = Dataset.from_dict({"text": val_texts, "label": val_labels})
164
+ train_ds = train_ds.map(tokenize, batched=True)
165
+ val_ds = val_ds.map(tokenize, batched=True)
166
+
167
+ args = TrainingArguments(
168
+ output_dir="./ft_output", num_train_epochs=epochs,
169
+ per_device_train_batch_size=16, learning_rate=lr,
170
+ evaluation_strategy="epoch", save_strategy="epoch",
171
+ load_best_model_at_end=True, metric_for_best_model="accuracy")
172
+
173
+ def compute_metrics(eval_pred):
174
+ preds = np.argmax(eval_pred.predictions, axis=-1)
175
+ acc = (preds == eval_pred.label_ids).mean()
176
+ return {"accuracy": acc}
177
+
178
+ trainer = Trainer(model=model, args=args, train_dataset=train_ds,
179
+ eval_dataset=val_ds, compute_metrics=compute_metrics)
180
+ trainer.train()
181
+
182
+ metrics = trainer.evaluate()
183
+ print(f"Finetune {model_name}: val acc = {metrics['eval_accuracy']:.4f}")
184
+ return model, tokenizer, metrics
185
+ ```
186
+
187
+ ## 3. Few-shot 学習
188
+
189
+ ```python
190
+ def prototypical_network(support_X, support_y, query_X,
191
+ feature_extractor=None):
192
+ """
193
+ Prototypical Network — Few-shot 分類。
194
+
195
+ Parameters:
196
+ support_X: np.ndarray — サポートセット特徴量
197
+ support_y: np.ndarray — サポートラベル
198
+ query_X: np.ndarray — クエリセット特徴量
199
+ feature_extractor: callable | None — 特徴量抽出器
200
+ """
201
+ if feature_extractor is not None:
202
+ support_emb = feature_extractor(support_X)
203
+ query_emb = feature_extractor(query_X)
204
+ else:
205
+ support_emb = support_X
206
+ query_emb = query_X
207
+
208
+ classes = np.unique(support_y)
209
+ prototypes = np.array([
210
+ support_emb[support_y == c].mean(axis=0) for c in classes])
211
+
212
+ # ユークリッド距離
213
+ dists = np.array([
214
+ np.linalg.norm(query_emb - p, axis=1) for p in prototypes]).T
215
+
216
+ predictions = classes[np.argmin(dists, axis=1)]
217
+ confidences = np.exp(-dists.min(axis=1))
218
+
219
+ print(f"Few-shot: {len(classes)} classes, "
220
+ f"{len(support_y)} support → {len(query_X)} query")
221
+ return predictions, confidences
222
+ ```
223
+
224
+ ## 4. 知識蒸留
225
+
226
+ ```python
227
+ def knowledge_distillation(teacher, student, train_loader,
228
+ epochs=20, temperature=4.0, alpha=0.7,
229
+ lr=1e-3):
230
+ """
231
+ 知識蒸留 (Teacher → Student)。
232
+
233
+ Parameters:
234
+ teacher: nn.Module — 教師モデル (frozen)
235
+ student: nn.Module — 生徒モデル
236
+ train_loader: DataLoader — 学習データ
237
+ epochs: int — エポック数
238
+ temperature: float — 蒸留温度
239
+ alpha: float — soft loss の重み
240
+ lr: float — 学習率
241
+ """
242
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
243
+ teacher = teacher.to(device).eval()
244
+ student = student.to(device)
245
+
246
+ optimizer = torch.optim.AdamW(student.parameters(), lr=lr)
247
+ ce_loss = nn.CrossEntropyLoss()
248
+ kl_loss = nn.KLDivLoss(reduction="batchmean")
249
+
250
+ for epoch in range(epochs):
251
+ student.train()
252
+ total_loss = 0.0
253
+ for X_batch, y_batch in train_loader:
254
+ X_batch, y_batch = X_batch.to(device), y_batch.to(device)
255
+
256
+ with torch.no_grad():
257
+ teacher_logits = teacher(X_batch)
258
+
259
+ student_logits = student(X_batch)
260
+
261
+ soft_loss = kl_loss(
262
+ nn.functional.log_softmax(student_logits / temperature, dim=1),
263
+ nn.functional.softmax(teacher_logits / temperature, dim=1)
264
+ ) * (temperature ** 2)
265
+
266
+ hard_loss = ce_loss(student_logits, y_batch)
267
+ loss = alpha * soft_loss + (1 - alpha) * hard_loss
268
+
269
+ optimizer.zero_grad()
270
+ loss.backward()
271
+ optimizer.step()
272
+ total_loss += loss.item()
273
+
274
+ print(f" Epoch {epoch}: loss = {total_loss / len(train_loader):.4f}")
275
+
276
+ print(f"Distillation: T={temperature}, α={alpha}, {epochs} epochs")
277
+ return student
278
+ ```
279
+
280
+ ---
281
+
282
+ ## パイプライン統合
283
+
284
+ ```
285
+ deep-learning → transfer-learning → active-learning
286
+ (モデル設計) (転移・適応) (効率的ラベル付け)
287
+ │ │ ↓
288
+ healthcare-ai ───────┘ ensemble-methods
289
+ (臨床 AI) (アンサンブル)
290
+ ```
291
+
292
+ ## パイプライン出力
293
+
294
+ | ファイル | 説明 | 次スキル |
295
+ |---------|------|---------|
296
+ | `ft_model.pt` | ファインチューニング済みモデル | → 推論 |
297
+ | `ft_history.csv` | 学習履歴 | → visualization |
298
+ | `few_shot_predictions.csv` | Few-shot 予測 | → 評価 |