@nahisaho/satori 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,285 @@
1
+ ---
2
+ name: scientific-radiology-ai
3
+ description: |
4
+ 放射線診断支援 AI スキル。CADe/CADx パイプライン・
5
+ CT/MRI 分類・セグメンテーション・Grad-CAM 説明可能性・
6
+ 構造化レポート・AI-RADS グレーディング。
7
+ ※ scientific-medical-imaging (DICOM/WSI/Radiomics) の
8
+ 放射線診断 AI 特化拡張。
9
+ ---
10
+
11
+ # Scientific Radiology AI
12
+
13
+ 放射線画像(CT/MRI/X 線)に対する AI 診断支援
14
+ パイプラインを提供する。MONAI ベースの学習・推論・
15
+ 説明可能性・構造化レポート生成を含む。
16
+
17
+ ## When to Use
18
+
19
+ - CT/MRI/X 線画像の AI 分類・セグメンテーションを行うとき
20
+ - CADe (検出) / CADx (診断) パイプラインを構築するとき
21
+ - Grad-CAM で AI 判断の説明可能性を付与するとき
22
+ - 構造化放射線レポートを自動生成するとき
23
+ - AI-RADS スコアリングを実装するとき
24
+
25
+ ---
26
+
27
+ ## Quick Start
28
+
29
+ ## 1. MONAI 放射線 AI 分類パイプライン
30
+
31
+ ```python
32
+ import numpy as np
33
+ import torch
34
+ import torch.nn as nn
35
+
36
+
37
+ def build_radiology_classifier(in_channels=1, num_classes=2,
38
+ spatial_dims=3,
39
+ architecture="densenet121"):
40
+ """
41
+ MONAI ベース放射線画像分類モデル。
42
+
43
+ Parameters:
44
+ in_channels: int — 入力チャネル数 (CT=1, MRI multimodal=4)
45
+ num_classes: int — クラス数
46
+ spatial_dims: int — 2 (2D スライス) or 3 (3D ボリューム)
47
+ architecture: str — "densenet121" / "resnet50" / "efficientnet"
48
+ """
49
+ import monai.networks.nets as nets
50
+
51
+ models = {
52
+ "densenet121": nets.DenseNet121(
53
+ spatial_dims=spatial_dims,
54
+ in_channels=in_channels,
55
+ out_channels=num_classes),
56
+ "resnet50": nets.ResNet(
57
+ block="bottleneck", layers=[3, 4, 6, 3],
58
+ block_inplanes=[64, 128, 256, 512],
59
+ spatial_dims=spatial_dims,
60
+ n_input_channels=in_channels,
61
+ num_classes=num_classes),
62
+ "efficientnet": nets.EfficientNetBN(
63
+ "efficientnet-b0",
64
+ spatial_dims=spatial_dims,
65
+ in_channels=in_channels,
66
+ num_classes=num_classes),
67
+ }
68
+ model = models.get(architecture, models["densenet121"])
69
+ total_params = sum(p.numel() for p in model.parameters())
70
+ print(f"Radiology classifier: {architecture} | "
71
+ f"{total_params:,} params | {spatial_dims}D")
72
+ return model
73
+
74
+
75
+ def train_radiology_model(model, train_loader, val_loader,
76
+ epochs=50, lr=1e-4, device="cuda"):
77
+ """
78
+ 放射線 AI モデル学習。
79
+
80
+ Parameters:
81
+ model: nn.Module — 分類モデル
82
+ train_loader: DataLoader — 訓練データ
83
+ val_loader: DataLoader — 検証データ
84
+ epochs: int — 学習エポック数
85
+ lr: float — 学習率
86
+ device: str — デバイス
87
+ """
88
+ import pandas as pd
89
+ from monai.utils import set_determinism
90
+ set_determinism(seed=42)
91
+
92
+ model.to(device)
93
+ optimizer = torch.optim.AdamW(model.parameters(), lr=lr,
94
+ weight_decay=1e-4)
95
+ scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
96
+ optimizer, T_max=epochs)
97
+ criterion = nn.CrossEntropyLoss()
98
+ history = []
99
+
100
+ best_val_acc = 0
101
+ for epoch in range(epochs):
102
+ model.train()
103
+ train_loss, correct, total = 0, 0, 0
104
+ for batch in train_loader:
105
+ images = batch["image"].to(device)
106
+ labels = batch["label"].to(device)
107
+ optimizer.zero_grad()
108
+ outputs = model(images)
109
+ loss = criterion(outputs, labels)
110
+ loss.backward()
111
+ optimizer.step()
112
+ train_loss += loss.item()
113
+ correct += (outputs.argmax(1) == labels).sum().item()
114
+ total += len(labels)
115
+
116
+ scheduler.step()
117
+
118
+ # Validation
119
+ model.eval()
120
+ val_loss, val_correct, val_total = 0, 0, 0
121
+ with torch.no_grad():
122
+ for batch in val_loader:
123
+ images = batch["image"].to(device)
124
+ labels = batch["label"].to(device)
125
+ outputs = model(images)
126
+ val_loss += criterion(outputs, labels).item()
127
+ val_correct += (outputs.argmax(1) == labels).sum().item()
128
+ val_total += len(labels)
129
+
130
+ val_acc = val_correct / val_total
131
+ if val_acc > best_val_acc:
132
+ best_val_acc = val_acc
133
+ torch.save(model.state_dict(), "best_radiology_model.pt")
134
+
135
+ history.append({
136
+ "epoch": epoch + 1,
137
+ "train_loss": train_loss / len(train_loader),
138
+ "train_acc": correct / total,
139
+ "val_loss": val_loss / len(val_loader),
140
+ "val_acc": val_acc,
141
+ })
142
+
143
+ if (epoch + 1) % 10 == 0:
144
+ print(f"Epoch {epoch+1}: train_acc={correct/total:.3f}, "
145
+ f"val_acc={val_acc:.3f}")
146
+
147
+ print(f"Best val_acc: {best_val_acc:.4f}")
148
+ return pd.DataFrame(history)
149
+ ```
150
+
151
+ ## 2. Grad-CAM 説明可能性
152
+
153
+ ```python
154
+ def radiology_gradcam(model, image_tensor, target_layer=None,
155
+ target_class=None, device="cuda"):
156
+ """
157
+ 放射線画像に対する Grad-CAM 可視化。
158
+
159
+ Parameters:
160
+ model: nn.Module — 学習済み分類モデル
161
+ image_tensor: torch.Tensor — 入力画像 [1, C, H, W] or [1, C, D, H, W]
162
+ target_layer: nn.Module | None — CAM 対象層
163
+ target_class: int | None — 対象クラス (None=予測クラス)
164
+ device: str — デバイス
165
+ """
166
+ import matplotlib.pyplot as plt
167
+ from monai.visualize import GradCAM
168
+
169
+ model.to(device).eval()
170
+ image_tensor = image_tensor.to(device)
171
+
172
+ if target_layer is None:
173
+ # DenseNet の最終 features 層を使用
174
+ for name, module in model.named_modules():
175
+ if "features" in name or "layer4" in name:
176
+ target_layer = name
177
+ if target_layer is None:
178
+ target_layer = list(model.named_modules())[-2][0]
179
+
180
+ cam = GradCAM(nn_module=model, target_layers=target_layer)
181
+
182
+ if target_class is None:
183
+ with torch.no_grad():
184
+ target_class = model(image_tensor).argmax(1).item()
185
+
186
+ result = cam(x=image_tensor, class_idx=target_class)
187
+ cam_map = result.squeeze().cpu().numpy()
188
+
189
+ # 2D スライス可視化
190
+ if cam_map.ndim == 3:
191
+ mid_slice = cam_map.shape[0] // 2
192
+ cam_map_2d = cam_map[mid_slice]
193
+ img_2d = image_tensor.squeeze().cpu().numpy()[mid_slice]
194
+ else:
195
+ cam_map_2d = cam_map
196
+ img_2d = image_tensor.squeeze().cpu().numpy()
197
+
198
+ fig, axes = plt.subplots(1, 3, figsize=(15, 5))
199
+ axes[0].imshow(img_2d, cmap="gray")
200
+ axes[0].set_title("Original")
201
+ axes[1].imshow(cam_map_2d, cmap="jet")
202
+ axes[1].set_title(f"Grad-CAM (class={target_class})")
203
+ axes[2].imshow(img_2d, cmap="gray")
204
+ axes[2].imshow(cam_map_2d, cmap="jet", alpha=0.4)
205
+ axes[2].set_title("Overlay")
206
+ for ax in axes:
207
+ ax.axis("off")
208
+ plt.tight_layout()
209
+ plt.savefig("gradcam_radiology.png", dpi=150, bbox_inches="tight")
210
+ print(f"Grad-CAM saved → gradcam_radiology.png (class={target_class})")
211
+ return cam_map
212
+ ```
213
+
214
+ ## 3. 構造化放射線レポート
215
+
216
+ ```python
217
+ def generate_structured_report(predictions, patient_info=None,
218
+ modality="CT", body_part="Chest"):
219
+ """
220
+ AI 支援構造化放射線レポート生成。
221
+
222
+ Parameters:
223
+ predictions: dict — {"finding": str, "probability": float, ...}
224
+ patient_info: dict | None — 患者情報
225
+ modality: str — "CT" / "MRI" / "XR"
226
+ body_part: str — 検査部位
227
+ """
228
+ if patient_info is None:
229
+ patient_info = {"id": "ANON", "age": "N/A", "sex": "N/A"}
230
+
231
+ findings = []
232
+ for finding, prob in predictions.items():
233
+ if prob >= 0.5:
234
+ confidence = "High" if prob >= 0.8 else "Moderate"
235
+ findings.append(f"- {finding}: {prob:.1%} ({confidence} confidence)")
236
+
237
+ report = f"""## Structured Radiology Report (AI-Assisted)
238
+
239
+ **Patient**: {patient_info.get('id', 'N/A')} | \
240
+ Age: {patient_info.get('age', 'N/A')} | Sex: {patient_info.get('sex', 'N/A')}
241
+ **Modality**: {modality} | **Body Part**: {body_part}
242
+
243
+ ### AI Findings
244
+
245
+ {chr(10).join(findings) if findings else '- No significant findings detected'}
246
+
247
+ ### AI Confidence Summary
248
+
249
+ | Finding | Probability | AI-RADS |
250
+ |---------|:-----------:|:-------:|
251
+ """
252
+ for finding, prob in sorted(predictions.items(),
253
+ key=lambda x: x[1], reverse=True):
254
+ rads = 5 if prob >= 0.9 else 4 if prob >= 0.7 else \
255
+ 3 if prob >= 0.5 else 2 if prob >= 0.3 else 1
256
+ report += f"| {finding} | {prob:.1%} | {rads} |\n"
257
+
258
+ report += """
259
+ ### Disclaimer
260
+ > This report was generated with AI assistance and requires
261
+ > review by a qualified radiologist before clinical use.
262
+ """
263
+ print(report)
264
+ return report
265
+ ```
266
+
267
+ ---
268
+
269
+ ## パイプライン統合
270
+
271
+ ```
272
+ [DICOM 取得] → medical-imaging → radiology-ai → clinical-report
273
+ (前処理/Radiomics) (AI 診断) (臨床レポート)
274
+
275
+ explainable-ai ← deep-learning
276
+ (説明可能性) (基盤学習)
277
+ ```
278
+
279
+ ## パイプライン出力
280
+
281
+ | ファイル | 説明 | 次スキル |
282
+ |---------|------|---------|
283
+ | `best_radiology_model.pt` | 学習済み分類モデル | → 推論 |
284
+ | `gradcam_radiology.png` | Grad-CAM 可視化 | → レポート |
285
+ | `structured_report.md` | 構造化レポート | → clinical-report |
@@ -0,0 +1,210 @@
1
+ ---
2
+ name: scientific-semi-supervised-learning
3
+ description: |
4
+ 半教師あり学習スキル。Self-Training・Label Propagation・
5
+ MixMatch/FixMatch・Pseudo-Labeling・ラベル効率評価。
6
+ ---
7
+
8
+ # Scientific Semi-Supervised Learning
9
+
10
+ 少量のラベル付きデータと大量の未ラベルデータを活用する
11
+ 半教師あり学習パイプラインを提供する。
12
+
13
+ ## When to Use
14
+
15
+ - ラベル付きデータが少量しかないとき
16
+ - アノテーションコストが高く全量ラベリングが困難なとき
17
+ - Self-Training で反復的にラベルを拡張するとき
18
+ - グラフベースの Label Propagation を適用するとき
19
+ - Pseudo-Labeling の信頼度閾値を設計するとき
20
+
21
+ ---
22
+
23
+ ## Quick Start
24
+
25
+ ## 1. Self-Training パイプライン
26
+
27
+ ```python
28
+ import numpy as np
29
+ import pandas as pd
30
+ from sklearn.base import clone
31
+ from sklearn.metrics import accuracy_score, classification_report
32
+
33
+
34
+ def self_training_pipeline(X_labeled, y_labeled, X_unlabeled,
35
+ base_estimator=None, threshold=0.95,
36
+ max_iterations=10, batch_size=None,
37
+ X_test=None, y_test=None):
38
+ """
39
+ Self-Training 半教師あり学習。
40
+
41
+ Parameters:
42
+ X_labeled: np.ndarray — ラベル付き特徴量
43
+ y_labeled: np.ndarray — ラベル
44
+ X_unlabeled: np.ndarray — 未ラベル特徴量
45
+ base_estimator: sklearn estimator | None — 基底分類器
46
+ threshold: float — Pseudo-Label 採用閾値
47
+ max_iterations: int — 最大反復回数
48
+ batch_size: int | None — 各反復で追加するサンプル数上限
49
+ X_test: np.ndarray | None — テスト特徴量
50
+ y_test: np.ndarray | None — テストラベル
51
+ """
52
+ from sklearn.ensemble import GradientBoostingClassifier
53
+
54
+ if base_estimator is None:
55
+ base_estimator = GradientBoostingClassifier(
56
+ n_estimators=100, random_state=42)
57
+
58
+ X_train = X_labeled.copy()
59
+ y_train = y_labeled.copy()
60
+ X_pool = X_unlabeled.copy()
61
+ history = []
62
+
63
+ for iteration in range(max_iterations):
64
+ if len(X_pool) == 0:
65
+ print(f"Iteration {iteration}: Pool exhausted")
66
+ break
67
+
68
+ model = clone(base_estimator)
69
+ model.fit(X_train, y_train)
70
+ proba = model.predict_proba(X_pool)
71
+ max_proba = proba.max(axis=1)
72
+ pseudo_labels = proba.argmax(axis=1)
73
+
74
+ confident_mask = max_proba >= threshold
75
+ n_confident = confident_mask.sum()
76
+
77
+ if batch_size and n_confident > batch_size:
78
+ top_idx = np.argsort(max_proba)[-batch_size:]
79
+ confident_mask = np.zeros(len(X_pool), dtype=bool)
80
+ confident_mask[top_idx] = True
81
+ n_confident = batch_size
82
+
83
+ if n_confident == 0:
84
+ print(f"Iteration {iteration}: No confident samples")
85
+ break
86
+
87
+ X_train = np.vstack([X_train, X_pool[confident_mask]])
88
+ y_train = np.concatenate([
89
+ y_train, pseudo_labels[confident_mask]])
90
+ X_pool = X_pool[~confident_mask]
91
+
92
+ record = {"iteration": iteration,
93
+ "n_labeled": len(X_train),
94
+ "n_pool": len(X_pool),
95
+ "n_added": int(n_confident),
96
+ "mean_confidence": float(max_proba[confident_mask].mean())}
97
+
98
+ if X_test is not None and y_test is not None:
99
+ test_acc = accuracy_score(y_test, model.predict(X_test))
100
+ record["test_accuracy"] = test_acc
101
+
102
+ history.append(record)
103
+ print(f"Iter {iteration}: +{n_confident} samples, "
104
+ f"total={len(X_train)}, pool={len(X_pool)}")
105
+
106
+ final_model = clone(base_estimator)
107
+ final_model.fit(X_train, y_train)
108
+ return final_model, pd.DataFrame(history)
109
+ ```
110
+
111
+ ## 2. Label Propagation
112
+
113
+ ```python
114
+ def label_propagation_ssl(X_all, y_partial, kernel="rbf",
115
+ gamma=20, n_neighbors=7,
116
+ max_iter=1000):
117
+ """
118
+ グラフベース Label Propagation。
119
+
120
+ Parameters:
121
+ X_all: np.ndarray — 全サンプル特徴量 (ラベル付き+未ラベル)
122
+ y_partial: np.ndarray — ラベル (-1 = 未ラベル)
123
+ kernel: str — "rbf" / "knn"
124
+ gamma: float — RBF カーネルの γ
125
+ n_neighbors: int — KNN カーネルの k
126
+ max_iter: int — 最大反復回数
127
+ """
128
+ from sklearn.semi_supervised import (
129
+ LabelPropagation, LabelSpreading)
130
+
131
+ models = {
132
+ "propagation": LabelPropagation(
133
+ kernel=kernel, gamma=gamma,
134
+ n_neighbors=n_neighbors, max_iter=max_iter),
135
+ "spreading": LabelSpreading(
136
+ kernel=kernel, gamma=gamma,
137
+ n_neighbors=n_neighbors, max_iter=max_iter, alpha=0.2),
138
+ }
139
+
140
+ results = {}
141
+ for name, model in models.items():
142
+ model.fit(X_all, y_partial)
143
+ y_pred = model.transduction_
144
+ n_propagated = (y_partial == -1).sum()
145
+ results[name] = {
146
+ "model": model,
147
+ "predictions": y_pred,
148
+ "n_propagated": int(n_propagated),
149
+ "label_distributions": model.label_distributions_,
150
+ }
151
+ print(f"{name}: propagated {n_propagated} labels")
152
+
153
+ return results
154
+ ```
155
+
156
+ ## 3. Pseudo-Labeling 品質評価
157
+
158
+ ```python
159
+ def evaluate_pseudo_labels(y_true_unlabeled, pseudo_labels,
160
+ confidences, thresholds=None):
161
+ """
162
+ Pseudo-Label の品質を評価。
163
+
164
+ Parameters:
165
+ y_true_unlabeled: np.ndarray — 真のラベル (評価用)
166
+ pseudo_labels: np.ndarray — 予測した疑似ラベル
167
+ confidences: np.ndarray — 各予測の信頼度
168
+ thresholds: list[float] | None — 閾値リスト
169
+ """
170
+ if thresholds is None:
171
+ thresholds = [0.5, 0.7, 0.8, 0.9, 0.95, 0.99]
172
+
173
+ records = []
174
+ for t in thresholds:
175
+ mask = confidences >= t
176
+ if mask.sum() == 0:
177
+ continue
178
+ acc = accuracy_score(y_true_unlabeled[mask],
179
+ pseudo_labels[mask])
180
+ records.append({
181
+ "threshold": t,
182
+ "n_selected": int(mask.sum()),
183
+ "coverage": float(mask.mean()),
184
+ "pseudo_accuracy": acc,
185
+ })
186
+ print(f"τ={t:.2f}: {mask.sum()} samples, "
187
+ f"coverage={mask.mean():.1%}, acc={acc:.3f}")
188
+
189
+ return pd.DataFrame(records)
190
+ ```
191
+
192
+ ---
193
+
194
+ ## パイプライン統合
195
+
196
+ ```
197
+ [少量ラベル] → semi-supervised-learning → ml-classification
198
+ (ラベル拡張) (本分類)
199
+
200
+ active-learning ← data-profiling
201
+ (能動学習) (データ品質)
202
+ ```
203
+
204
+ ## パイプライン出力
205
+
206
+ | ファイル | 説明 | 次スキル |
207
+ |---------|------|---------|
208
+ | `self_training_history.csv` | 反復学習履歴 | → 収束分析 |
209
+ | `pseudo_label_quality.csv` | 疑似ラベル品質 | → 閾値選択 |
210
+ | `propagated_labels.npy` | 伝播ラベル | → ml-classification |