claude-turing 3.2.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +2 -2
- package/README.md +8 -2
- package/commands/curriculum.md +43 -0
- package/commands/feature.md +42 -0
- package/commands/merge.md +24 -0
- package/commands/prune.md +26 -0
- package/commands/quantize.md +24 -0
- package/commands/surgery.md +27 -0
- package/commands/turing.md +12 -0
- package/package.json +1 -1
- package/src/install.js +2 -0
- package/src/verify.js +6 -0
- package/templates/scripts/__pycache__/architecture_surgery.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/curriculum_optimizer.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/feature_intelligence.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/model_merger.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/model_pruning.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/model_quantization.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
- package/templates/scripts/architecture_surgery.py +238 -0
- package/templates/scripts/curriculum_optimizer.py +337 -0
- package/templates/scripts/feature_intelligence.py +369 -0
- package/templates/scripts/model_merger.py +277 -0
- package/templates/scripts/model_pruning.py +182 -0
- package/templates/scripts/model_quantization.py +177 -0
- package/templates/scripts/scaffold.py +12 -0
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Training curriculum optimization for the autoresearch pipeline.
|
|
3
|
+
|
|
4
|
+
Orders training data by difficulty and measures whether curriculum
|
|
5
|
+
learning improves convergence speed or final performance. Tests
|
|
6
|
+
easy-to-hard, hard-to-easy, self-paced, and random strategies.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python scripts/curriculum_optimizer.py exp-042
|
|
10
|
+
python scripts/curriculum_optimizer.py --strategies easy-to-hard,random
|
|
11
|
+
python scripts/curriculum_optimizer.py --json
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import argparse
|
|
17
|
+
import json
|
|
18
|
+
import sys
|
|
19
|
+
from datetime import datetime, timezone
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
import numpy as np
|
|
23
|
+
import yaml
|
|
24
|
+
|
|
25
|
+
from scripts.turing_io import load_config
|
|
26
|
+
|
|
27
|
+
DEFAULT_STRATEGIES = ["random", "easy_to_hard", "hard_to_easy", "self_paced"]
|
|
28
|
+
IMPOSSIBLE_THRESHOLD = 0.9 # Samples with difficulty > this across all strategies
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# --- Difficulty Scoring ---
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def score_difficulty_by_loss(
|
|
35
|
+
losses: np.ndarray,
|
|
36
|
+
) -> np.ndarray:
|
|
37
|
+
"""Score sample difficulty by loss value (higher loss = harder).
|
|
38
|
+
|
|
39
|
+
Normalizes to [0, 1].
|
|
40
|
+
"""
|
|
41
|
+
if len(losses) == 0:
|
|
42
|
+
return np.array([])
|
|
43
|
+
|
|
44
|
+
min_loss = np.min(losses)
|
|
45
|
+
max_loss = np.max(losses)
|
|
46
|
+
if max_loss == min_loss:
|
|
47
|
+
return np.zeros(len(losses))
|
|
48
|
+
|
|
49
|
+
return (losses - min_loss) / (max_loss - min_loss)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def score_difficulty_by_margin(
|
|
53
|
+
margins: np.ndarray,
|
|
54
|
+
) -> np.ndarray:
|
|
55
|
+
"""Score sample difficulty by margin (smaller margin = harder).
|
|
56
|
+
|
|
57
|
+
Margins = distance from decision boundary. Normalizes to [0, 1].
|
|
58
|
+
"""
|
|
59
|
+
if len(margins) == 0:
|
|
60
|
+
return np.array([])
|
|
61
|
+
|
|
62
|
+
min_m = np.min(margins)
|
|
63
|
+
max_m = np.max(margins)
|
|
64
|
+
if max_m == min_m:
|
|
65
|
+
return np.full(len(margins), 0.5)
|
|
66
|
+
|
|
67
|
+
# Invert: small margin = high difficulty
|
|
68
|
+
return 1.0 - (margins - min_m) / (max_m - min_m)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def score_difficulty_by_disagreement(
|
|
72
|
+
multi_seed_predictions: list[np.ndarray],
|
|
73
|
+
labels: np.ndarray,
|
|
74
|
+
) -> np.ndarray:
|
|
75
|
+
"""Score difficulty by prediction disagreement across seeds.
|
|
76
|
+
|
|
77
|
+
Samples where different seeds disagree are "hard" (and possibly mislabeled).
|
|
78
|
+
"""
|
|
79
|
+
if not multi_seed_predictions or len(labels) == 0:
|
|
80
|
+
return np.array([])
|
|
81
|
+
|
|
82
|
+
n_samples = len(labels)
|
|
83
|
+
n_seeds = len(multi_seed_predictions)
|
|
84
|
+
|
|
85
|
+
agreement = np.zeros(n_samples)
|
|
86
|
+
for preds in multi_seed_predictions:
|
|
87
|
+
if len(preds) == n_samples:
|
|
88
|
+
agreement += (preds == labels).astype(float)
|
|
89
|
+
|
|
90
|
+
agreement /= n_seeds # Fraction of seeds that got it right
|
|
91
|
+
|
|
92
|
+
# Disagreement = difficulty
|
|
93
|
+
return 1.0 - agreement
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# --- Curriculum Strategies ---
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def apply_curriculum(
|
|
100
|
+
indices: np.ndarray,
|
|
101
|
+
difficulties: np.ndarray,
|
|
102
|
+
strategy: str,
|
|
103
|
+
) -> np.ndarray:
|
|
104
|
+
"""Reorder sample indices according to curriculum strategy.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
indices: Original sample indices.
|
|
108
|
+
difficulties: Difficulty scores [0, 1] per sample.
|
|
109
|
+
strategy: Curriculum strategy name.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Reordered indices.
|
|
113
|
+
"""
|
|
114
|
+
if len(indices) == 0:
|
|
115
|
+
return indices
|
|
116
|
+
|
|
117
|
+
if strategy == "random":
|
|
118
|
+
np.random.shuffle(indices)
|
|
119
|
+
return indices
|
|
120
|
+
|
|
121
|
+
elif strategy == "easy_to_hard":
|
|
122
|
+
order = np.argsort(difficulties)
|
|
123
|
+
return indices[order]
|
|
124
|
+
|
|
125
|
+
elif strategy == "hard_to_easy":
|
|
126
|
+
order = np.argsort(difficulties)[::-1]
|
|
127
|
+
return indices[order]
|
|
128
|
+
|
|
129
|
+
elif strategy == "self_paced":
|
|
130
|
+
# Start with easiest 20%, then gradually include harder
|
|
131
|
+
order = np.argsort(difficulties)
|
|
132
|
+
n = len(order)
|
|
133
|
+
# Shuffle within difficulty bands
|
|
134
|
+
bands = [order[:n // 5], order[n // 5:2 * n // 5],
|
|
135
|
+
order[2 * n // 5:3 * n // 5], order[3 * n // 5:4 * n // 5],
|
|
136
|
+
order[4 * n // 5:]]
|
|
137
|
+
result = []
|
|
138
|
+
for band in bands:
|
|
139
|
+
np.random.shuffle(band)
|
|
140
|
+
result.extend(band)
|
|
141
|
+
return np.array(result)
|
|
142
|
+
|
|
143
|
+
return indices
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def detect_impossible_samples(
|
|
147
|
+
difficulties: np.ndarray,
|
|
148
|
+
threshold: float = IMPOSSIBLE_THRESHOLD,
|
|
149
|
+
) -> list[int]:
|
|
150
|
+
"""Find samples that are consistently difficult (likely mislabeled).
|
|
151
|
+
|
|
152
|
+
Returns list of sample indices.
|
|
153
|
+
"""
|
|
154
|
+
return [int(i) for i in range(len(difficulties)) if difficulties[i] > threshold]
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
# --- Strategy Comparison ---
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def compare_strategies(
|
|
161
|
+
strategy_results: dict[str, dict],
|
|
162
|
+
primary_metric: str = "accuracy",
|
|
163
|
+
) -> dict:
|
|
164
|
+
"""Compare curriculum strategy results.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
strategy_results: {strategy_name: {metric_value, convergence_epoch, ...}}
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
Comparison report with best strategy and verdict.
|
|
171
|
+
"""
|
|
172
|
+
if not strategy_results:
|
|
173
|
+
return {"best_strategy": None, "verdict": "no_data"}
|
|
174
|
+
|
|
175
|
+
# Find baseline (random)
|
|
176
|
+
baseline = strategy_results.get("random", {})
|
|
177
|
+
baseline_metric = baseline.get("metric_value", 0)
|
|
178
|
+
baseline_epochs = baseline.get("convergence_epoch")
|
|
179
|
+
|
|
180
|
+
results = []
|
|
181
|
+
for name, data in strategy_results.items():
|
|
182
|
+
metric = data.get("metric_value", 0)
|
|
183
|
+
epochs = data.get("convergence_epoch")
|
|
184
|
+
speedup = None
|
|
185
|
+
if epochs and baseline_epochs and baseline_epochs > 0:
|
|
186
|
+
speedup = round(1 - epochs / baseline_epochs, 4)
|
|
187
|
+
|
|
188
|
+
results.append({
|
|
189
|
+
"strategy": name,
|
|
190
|
+
"metric_value": round(metric, 6) if metric else None,
|
|
191
|
+
"convergence_epoch": epochs,
|
|
192
|
+
"delta_vs_random": round(metric - baseline_metric, 6) if metric and baseline_metric else None,
|
|
193
|
+
"speedup": speedup,
|
|
194
|
+
})
|
|
195
|
+
|
|
196
|
+
# Find best by metric
|
|
197
|
+
with_metric = [r for r in results if r["metric_value"] is not None]
|
|
198
|
+
best = max(with_metric, key=lambda r: r["metric_value"]) if with_metric else None
|
|
199
|
+
|
|
200
|
+
verdict = "no_improvement"
|
|
201
|
+
if best and best.get("delta_vs_random") and best["delta_vs_random"] > 0.005:
|
|
202
|
+
verdict = "curriculum_helps"
|
|
203
|
+
elif best and best.get("speedup") and best["speedup"] > 0.1:
|
|
204
|
+
verdict = "faster_convergence"
|
|
205
|
+
|
|
206
|
+
return {
|
|
207
|
+
"results": results,
|
|
208
|
+
"best_strategy": best.get("strategy") if best else None,
|
|
209
|
+
"verdict": verdict,
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
# --- Full Pipeline ---
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def curriculum_analysis(
|
|
217
|
+
difficulties: np.ndarray | None = None,
|
|
218
|
+
strategy_results: dict[str, dict] | None = None,
|
|
219
|
+
exp_id: str | None = None,
|
|
220
|
+
config_path: str = "config.yaml",
|
|
221
|
+
) -> dict:
|
|
222
|
+
"""Run curriculum analysis."""
|
|
223
|
+
config = load_config(config_path)
|
|
224
|
+
primary_metric = config.get("evaluation", {}).get("primary_metric", "accuracy")
|
|
225
|
+
|
|
226
|
+
report = {
|
|
227
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
228
|
+
"experiment_id": exp_id,
|
|
229
|
+
"primary_metric": primary_metric,
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
if difficulties is not None:
|
|
233
|
+
impossible = detect_impossible_samples(difficulties)
|
|
234
|
+
report["difficulty_stats"] = {
|
|
235
|
+
"n_samples": len(difficulties),
|
|
236
|
+
"mean_difficulty": round(float(np.mean(difficulties)), 4),
|
|
237
|
+
"n_impossible": len(impossible),
|
|
238
|
+
"impossible_indices": impossible[:20],
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
if strategy_results:
|
|
242
|
+
comparison = compare_strategies(strategy_results, primary_metric)
|
|
243
|
+
report["comparison"] = comparison
|
|
244
|
+
else:
|
|
245
|
+
report["note"] = "Provide strategy_results for comparison. Use /turing:curriculum to run strategies."
|
|
246
|
+
report["available_strategies"] = DEFAULT_STRATEGIES
|
|
247
|
+
|
|
248
|
+
return report
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
# --- Report Formatting ---
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def save_curriculum_report(report: dict, output_dir: str = "experiments/curriculum") -> Path:
|
|
255
|
+
out_path = Path(output_dir)
|
|
256
|
+
out_path.mkdir(parents=True, exist_ok=True)
|
|
257
|
+
exp_id = report.get("experiment_id", "unknown")
|
|
258
|
+
filepath = out_path / f"{exp_id}-curriculum.yaml"
|
|
259
|
+
clean = json.loads(json.dumps(report, default=str))
|
|
260
|
+
with open(filepath, "w") as f:
|
|
261
|
+
yaml.dump(clean, f, default_flow_style=False, sort_keys=False)
|
|
262
|
+
return filepath
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def format_curriculum_report(report: dict) -> str:
|
|
266
|
+
if "error" in report:
|
|
267
|
+
return f"ERROR: {report['error']}"
|
|
268
|
+
|
|
269
|
+
exp_id = report.get("experiment_id", "?")
|
|
270
|
+
metric = report.get("primary_metric", "metric")
|
|
271
|
+
|
|
272
|
+
lines = [f"# Curriculum Analysis: {exp_id}", "",
|
|
273
|
+
f"*Generated {report.get('generated_at', 'N/A')[:19]}*", ""]
|
|
274
|
+
|
|
275
|
+
# Difficulty stats
|
|
276
|
+
diff_stats = report.get("difficulty_stats")
|
|
277
|
+
if diff_stats:
|
|
278
|
+
lines.extend([
|
|
279
|
+
"## Difficulty Distribution",
|
|
280
|
+
f"- **Samples:** {diff_stats['n_samples']}",
|
|
281
|
+
f"- **Mean difficulty:** {diff_stats['mean_difficulty']:.4f}",
|
|
282
|
+
f"- **Impossible samples:** {diff_stats['n_impossible']} (likely mislabeled)",
|
|
283
|
+
"",
|
|
284
|
+
])
|
|
285
|
+
|
|
286
|
+
# Strategy comparison
|
|
287
|
+
comparison = report.get("comparison")
|
|
288
|
+
if comparison:
|
|
289
|
+
results = comparison.get("results", [])
|
|
290
|
+
if results:
|
|
291
|
+
lines.extend(["## Strategy Comparison", "",
|
|
292
|
+
f"| Strategy | {metric} | Δ vs Random | Speedup |",
|
|
293
|
+
"|----------|--------|-------------|---------|"])
|
|
294
|
+
best_name = comparison.get("best_strategy")
|
|
295
|
+
for r in results:
|
|
296
|
+
val = f"{r['metric_value']:.4f}" if r.get("metric_value") is not None else "N/A"
|
|
297
|
+
delta = f"{r['delta_vs_random']:+.4f}" if r.get("delta_vs_random") is not None else "—"
|
|
298
|
+
speedup = f"{r['speedup']:+.0%}" if r.get("speedup") is not None else "—"
|
|
299
|
+
marker = " ← BEST" if r["strategy"] == best_name else ""
|
|
300
|
+
lines.append(f"| {r['strategy']} | {val} | {delta} | {speedup} |{marker}")
|
|
301
|
+
lines.append("")
|
|
302
|
+
|
|
303
|
+
verdict_labels = {
|
|
304
|
+
"curriculum_helps": "Curriculum learning improves final performance",
|
|
305
|
+
"faster_convergence": "Curriculum learning converges faster (similar final performance)",
|
|
306
|
+
"no_improvement": "No significant improvement from curriculum ordering",
|
|
307
|
+
}
|
|
308
|
+
verdict = comparison.get("verdict", "?")
|
|
309
|
+
lines.extend(["## Verdict", "", f"**{verdict_labels.get(verdict, verdict.upper())}**"])
|
|
310
|
+
elif report.get("note"):
|
|
311
|
+
lines.append(f"*{report['note']}*")
|
|
312
|
+
|
|
313
|
+
return "\n".join(lines)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def main() -> None:
|
|
317
|
+
parser = argparse.ArgumentParser(description="Training curriculum optimization")
|
|
318
|
+
parser.add_argument("exp_id", nargs="?", help="Experiment ID")
|
|
319
|
+
parser.add_argument("--strategies", help="Comma-separated strategies")
|
|
320
|
+
parser.add_argument("--config", default="config.yaml")
|
|
321
|
+
parser.add_argument("--json", action="store_true")
|
|
322
|
+
args = parser.parse_args()
|
|
323
|
+
|
|
324
|
+
report = curriculum_analysis(exp_id=args.exp_id, config_path=args.config)
|
|
325
|
+
|
|
326
|
+
if "error" not in report:
|
|
327
|
+
filepath = save_curriculum_report(report)
|
|
328
|
+
print(f"Saved to {filepath}", file=sys.stderr)
|
|
329
|
+
|
|
330
|
+
if args.json:
|
|
331
|
+
print(json.dumps(report, indent=2, default=str))
|
|
332
|
+
else:
|
|
333
|
+
print(format_curriculum_report(report))
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
if __name__ == "__main__":
|
|
337
|
+
main()
|
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Automated feature selection and generation for the autoresearch pipeline.
|
|
3
|
+
|
|
4
|
+
Runs multiple feature importance methods (mutual information, permutation,
|
|
5
|
+
L1, tree-based), computes consensus ranking, detects redundancy, and
|
|
6
|
+
generates candidate interaction features.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python scripts/feature_intelligence.py
|
|
10
|
+
python scripts/feature_intelligence.py --method all
|
|
11
|
+
python scripts/feature_intelligence.py --method importance --top-k 15
|
|
12
|
+
python scripts/feature_intelligence.py --json
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import json
|
|
19
|
+
import sys
|
|
20
|
+
from datetime import datetime, timezone
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
import numpy as np
|
|
24
|
+
import yaml
|
|
25
|
+
|
|
26
|
+
from scripts.turing_io import load_config
|
|
27
|
+
|
|
28
|
+
DEFAULT_TOP_K = 20
|
|
29
|
+
REDUNDANCY_THRESHOLD = 0.95
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# --- Importance Methods ---
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def mutual_information_ranking(
|
|
36
|
+
X: np.ndarray,
|
|
37
|
+
y: np.ndarray,
|
|
38
|
+
feature_names: list[str] | None = None,
|
|
39
|
+
) -> list[dict]:
|
|
40
|
+
"""Rank features by mutual information with the target."""
|
|
41
|
+
from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
|
|
42
|
+
|
|
43
|
+
if feature_names is None:
|
|
44
|
+
feature_names = [f"feature_{i}" for i in range(X.shape[1])]
|
|
45
|
+
|
|
46
|
+
# Detect task type from target
|
|
47
|
+
unique_vals = len(np.unique(y))
|
|
48
|
+
if unique_vals <= 20: # Classification heuristic
|
|
49
|
+
scores = mutual_info_classif(X, y, random_state=42)
|
|
50
|
+
else:
|
|
51
|
+
scores = mutual_info_regression(X, y, random_state=42)
|
|
52
|
+
|
|
53
|
+
ranked = sorted(
|
|
54
|
+
[{"feature": feature_names[i], "score": round(float(scores[i]), 6), "rank": 0}
|
|
55
|
+
for i in range(len(scores))],
|
|
56
|
+
key=lambda x: x["score"], reverse=True,
|
|
57
|
+
)
|
|
58
|
+
for i, r in enumerate(ranked):
|
|
59
|
+
r["rank"] = i + 1
|
|
60
|
+
|
|
61
|
+
return ranked
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def l1_ranking(
|
|
65
|
+
X: np.ndarray,
|
|
66
|
+
y: np.ndarray,
|
|
67
|
+
feature_names: list[str] | None = None,
|
|
68
|
+
) -> list[dict]:
|
|
69
|
+
"""Rank features by L1 regularization coefficient magnitude."""
|
|
70
|
+
from sklearn.linear_model import Lasso, LogisticRegression
|
|
71
|
+
from sklearn.preprocessing import StandardScaler
|
|
72
|
+
|
|
73
|
+
if feature_names is None:
|
|
74
|
+
feature_names = [f"feature_{i}" for i in range(X.shape[1])]
|
|
75
|
+
|
|
76
|
+
scaler = StandardScaler()
|
|
77
|
+
X_scaled = scaler.fit_transform(X)
|
|
78
|
+
|
|
79
|
+
unique_vals = len(np.unique(y))
|
|
80
|
+
if unique_vals <= 20:
|
|
81
|
+
model = LogisticRegression(penalty="l1", solver="liblinear", C=1.0, max_iter=1000)
|
|
82
|
+
model.fit(X_scaled, y)
|
|
83
|
+
coefs = np.abs(model.coef_).mean(axis=0) if model.coef_.ndim > 1 else np.abs(model.coef_.ravel())
|
|
84
|
+
else:
|
|
85
|
+
model = Lasso(alpha=0.01, max_iter=1000)
|
|
86
|
+
model.fit(X_scaled, y)
|
|
87
|
+
coefs = np.abs(model.coef_)
|
|
88
|
+
|
|
89
|
+
ranked = sorted(
|
|
90
|
+
[{"feature": feature_names[i], "score": round(float(coefs[i]), 6), "rank": 0}
|
|
91
|
+
for i in range(len(coefs))],
|
|
92
|
+
key=lambda x: x["score"], reverse=True,
|
|
93
|
+
)
|
|
94
|
+
for i, r in enumerate(ranked):
|
|
95
|
+
r["rank"] = i + 1
|
|
96
|
+
|
|
97
|
+
return ranked
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def tree_importance_ranking(
|
|
101
|
+
X: np.ndarray,
|
|
102
|
+
y: np.ndarray,
|
|
103
|
+
feature_names: list[str] | None = None,
|
|
104
|
+
) -> list[dict]:
|
|
105
|
+
"""Rank features by tree-based importance."""
|
|
106
|
+
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
|
|
107
|
+
|
|
108
|
+
if feature_names is None:
|
|
109
|
+
feature_names = [f"feature_{i}" for i in range(X.shape[1])]
|
|
110
|
+
|
|
111
|
+
unique_vals = len(np.unique(y))
|
|
112
|
+
if unique_vals <= 20:
|
|
113
|
+
model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
|
|
114
|
+
else:
|
|
115
|
+
model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
|
|
116
|
+
|
|
117
|
+
model.fit(X, y)
|
|
118
|
+
importances = model.feature_importances_
|
|
119
|
+
|
|
120
|
+
ranked = sorted(
|
|
121
|
+
[{"feature": feature_names[i], "score": round(float(importances[i]), 6), "rank": 0}
|
|
122
|
+
for i in range(len(importances))],
|
|
123
|
+
key=lambda x: x["score"], reverse=True,
|
|
124
|
+
)
|
|
125
|
+
for i, r in enumerate(ranked):
|
|
126
|
+
r["rank"] = i + 1
|
|
127
|
+
|
|
128
|
+
return ranked
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# --- Consensus ---
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def compute_consensus(
|
|
135
|
+
rankings: dict[str, list[dict]],
|
|
136
|
+
top_k: int = DEFAULT_TOP_K,
|
|
137
|
+
) -> list[dict]:
|
|
138
|
+
"""Compute consensus ranking across multiple methods.
|
|
139
|
+
|
|
140
|
+
A feature's consensus score = number of methods that place it in top-K.
|
|
141
|
+
"""
|
|
142
|
+
n_methods = len(rankings)
|
|
143
|
+
feature_scores = {}
|
|
144
|
+
|
|
145
|
+
for method_name, ranking in rankings.items():
|
|
146
|
+
top_features = {r["feature"] for r in ranking[:top_k]}
|
|
147
|
+
for feat in top_features:
|
|
148
|
+
if feat not in feature_scores:
|
|
149
|
+
feature_scores[feat] = {"feature": feat, "methods": {}, "consensus": 0}
|
|
150
|
+
feature_scores[feat]["methods"][method_name] = next(
|
|
151
|
+
(r["rank"] for r in ranking if r["feature"] == feat), None
|
|
152
|
+
)
|
|
153
|
+
feature_scores[feat]["consensus"] += 1
|
|
154
|
+
|
|
155
|
+
# Add features not in any top-K
|
|
156
|
+
all_features = set()
|
|
157
|
+
for ranking in rankings.values():
|
|
158
|
+
for r in ranking:
|
|
159
|
+
all_features.add(r["feature"])
|
|
160
|
+
|
|
161
|
+
for feat in all_features:
|
|
162
|
+
if feat not in feature_scores:
|
|
163
|
+
feature_scores[feat] = {
|
|
164
|
+
"feature": feat,
|
|
165
|
+
"methods": {m: next((r["rank"] for r in rk if r["feature"] == feat), None) for m, rk in rankings.items()},
|
|
166
|
+
"consensus": 0,
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
result = sorted(feature_scores.values(), key=lambda x: (-x["consensus"], x["feature"]))
|
|
170
|
+
|
|
171
|
+
for r in result:
|
|
172
|
+
r["consensus_str"] = f"{r['consensus']}/{n_methods}"
|
|
173
|
+
if r["consensus"] == n_methods:
|
|
174
|
+
r["consensus_str"] += " ★"
|
|
175
|
+
elif r["consensus"] == 0:
|
|
176
|
+
r["consensus_str"] += " — DROP"
|
|
177
|
+
|
|
178
|
+
return result
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# --- Redundancy Detection ---
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def detect_redundancy(
|
|
185
|
+
X: np.ndarray,
|
|
186
|
+
feature_names: list[str] | None = None,
|
|
187
|
+
threshold: float = REDUNDANCY_THRESHOLD,
|
|
188
|
+
) -> list[dict]:
|
|
189
|
+
"""Detect highly correlated feature pairs."""
|
|
190
|
+
if feature_names is None:
|
|
191
|
+
feature_names = [f"feature_{i}" for i in range(X.shape[1])]
|
|
192
|
+
|
|
193
|
+
n = X.shape[1]
|
|
194
|
+
if n < 2:
|
|
195
|
+
return []
|
|
196
|
+
|
|
197
|
+
corr = np.corrcoef(X.T)
|
|
198
|
+
redundant = []
|
|
199
|
+
|
|
200
|
+
for i in range(n):
|
|
201
|
+
for j in range(i + 1, n):
|
|
202
|
+
c = abs(corr[i, j])
|
|
203
|
+
if not np.isnan(c) and c > threshold:
|
|
204
|
+
redundant.append({
|
|
205
|
+
"feature_a": feature_names[i],
|
|
206
|
+
"feature_b": feature_names[j],
|
|
207
|
+
"correlation": round(float(c), 4),
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
return sorted(redundant, key=lambda x: -x["correlation"])
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
# --- Feature Generation ---
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def generate_interaction_features(
|
|
217
|
+
top_features: list[str],
|
|
218
|
+
max_interactions: int = 10,
|
|
219
|
+
) -> list[dict]:
|
|
220
|
+
"""Generate candidate interaction features from top consensus features."""
|
|
221
|
+
candidates = []
|
|
222
|
+
|
|
223
|
+
for i, fa in enumerate(top_features[:5]):
|
|
224
|
+
for fb in top_features[i + 1:6]:
|
|
225
|
+
if len(candidates) >= max_interactions:
|
|
226
|
+
break
|
|
227
|
+
candidates.append({"name": f"{fa}_x_{fb}", "type": "product", "features": [fa, fb]})
|
|
228
|
+
candidates.append({"name": f"{fa}_div_{fb}", "type": "ratio", "features": [fa, fb]})
|
|
229
|
+
|
|
230
|
+
return candidates[:max_interactions]
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
# --- Full Pipeline ---
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def feature_analysis(
|
|
237
|
+
X: np.ndarray | None = None,
|
|
238
|
+
y: np.ndarray | None = None,
|
|
239
|
+
feature_names: list[str] | None = None,
|
|
240
|
+
method: str = "all",
|
|
241
|
+
top_k: int = DEFAULT_TOP_K,
|
|
242
|
+
config_path: str = "config.yaml",
|
|
243
|
+
) -> dict:
|
|
244
|
+
"""Run feature intelligence analysis."""
|
|
245
|
+
config = load_config(config_path)
|
|
246
|
+
|
|
247
|
+
if X is None or y is None:
|
|
248
|
+
return {"error": "Provide X and y arrays for feature analysis",
|
|
249
|
+
"note": "Run with --data train.npz to analyze features"}
|
|
250
|
+
|
|
251
|
+
if feature_names is None:
|
|
252
|
+
feature_names = [f"feature_{i}" for i in range(X.shape[1])]
|
|
253
|
+
|
|
254
|
+
rankings = {}
|
|
255
|
+
|
|
256
|
+
if method in ("all", "importance"):
|
|
257
|
+
rankings["mutual_info"] = mutual_information_ranking(X, y, feature_names)
|
|
258
|
+
rankings["l1"] = l1_ranking(X, y, feature_names)
|
|
259
|
+
rankings["tree"] = tree_importance_ranking(X, y, feature_names)
|
|
260
|
+
|
|
261
|
+
if not rankings:
|
|
262
|
+
return {"error": f"Unknown method: {method}"}
|
|
263
|
+
|
|
264
|
+
consensus = compute_consensus(rankings, top_k)
|
|
265
|
+
redundant = detect_redundancy(X, feature_names)
|
|
266
|
+
|
|
267
|
+
top_consensus_features = [c["feature"] for c in consensus if c["consensus"] > 0][:top_k]
|
|
268
|
+
interactions = generate_interaction_features(top_consensus_features)
|
|
269
|
+
|
|
270
|
+
drop_candidates = [c for c in consensus if c["consensus"] == 0]
|
|
271
|
+
|
|
272
|
+
report = {
|
|
273
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
274
|
+
"n_features": X.shape[1],
|
|
275
|
+
"top_k": top_k,
|
|
276
|
+
"rankings": {k: v[:top_k] for k, v in rankings.items()},
|
|
277
|
+
"consensus": consensus[:top_k * 2],
|
|
278
|
+
"drop_candidates": drop_candidates,
|
|
279
|
+
"n_drop": len(drop_candidates),
|
|
280
|
+
"redundant_pairs": redundant,
|
|
281
|
+
"interaction_candidates": interactions,
|
|
282
|
+
"recommendation": f"Drop {len(drop_candidates)} features with 0/{len(rankings)} consensus ({len(drop_candidates)/X.shape[1]*100:.0f}% of features)" if drop_candidates else "All features contribute to at least one method",
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
return report
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
# --- Report Formatting ---
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def save_feature_report(report: dict, output_dir: str = "experiments/features") -> Path:
|
|
292
|
+
out_path = Path(output_dir)
|
|
293
|
+
out_path.mkdir(parents=True, exist_ok=True)
|
|
294
|
+
date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
295
|
+
filepath = out_path / f"features-{date}.yaml"
|
|
296
|
+
clean = json.loads(json.dumps(report, default=str))
|
|
297
|
+
with open(filepath, "w") as f:
|
|
298
|
+
yaml.dump(clean, f, default_flow_style=False, sort_keys=False)
|
|
299
|
+
return filepath
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def format_feature_report(report: dict) -> str:
|
|
303
|
+
if "error" in report:
|
|
304
|
+
return f"ERROR: {report['error']}\n{report.get('note', '')}"
|
|
305
|
+
|
|
306
|
+
lines = ["# Feature Intelligence", "",
|
|
307
|
+
f"*Generated {report.get('generated_at', 'N/A')[:19]}*",
|
|
308
|
+
f"**{report.get('n_features', 0)} features analyzed, top-{report.get('top_k', 20)}**", ""]
|
|
309
|
+
|
|
310
|
+
# Consensus table
|
|
311
|
+
consensus = report.get("consensus", [])
|
|
312
|
+
if consensus:
|
|
313
|
+
methods = set()
|
|
314
|
+
for c in consensus:
|
|
315
|
+
methods.update(c.get("methods", {}).keys())
|
|
316
|
+
method_names = sorted(methods)
|
|
317
|
+
|
|
318
|
+
header = "| Feature |" + "|".join(f" {m} Rank " for m in method_names) + "| Consensus |"
|
|
319
|
+
sep = "|---------|" + "|".join("-------" for _ in method_names) + "|-----------|"
|
|
320
|
+
lines.extend(["## Consensus Ranking", "", header, sep])
|
|
321
|
+
for c in consensus[:15]:
|
|
322
|
+
ranks = "|".join(f" {c['methods'].get(m, '—')} " for m in method_names)
|
|
323
|
+
lines.append(f"| {c['feature']} |{ranks}| {c['consensus_str']} |")
|
|
324
|
+
lines.append("")
|
|
325
|
+
|
|
326
|
+
# Redundancy
|
|
327
|
+
redundant = report.get("redundant_pairs", [])
|
|
328
|
+
if redundant:
|
|
329
|
+
lines.extend(["## Redundant Pairs", ""])
|
|
330
|
+
for r in redundant[:5]:
|
|
331
|
+
lines.append(f"- **{r['feature_a']}** ↔ **{r['feature_b']}**: r={r['correlation']}")
|
|
332
|
+
lines.append("")
|
|
333
|
+
|
|
334
|
+
# Interactions
|
|
335
|
+
interactions = report.get("interaction_candidates", [])
|
|
336
|
+
if interactions:
|
|
337
|
+
lines.extend(["## Candidate Interactions", ""])
|
|
338
|
+
for i in interactions[:5]:
|
|
339
|
+
lines.append(f"- `{i['name']}` ({i['type']}: {' × '.join(i['features'])})")
|
|
340
|
+
lines.append("")
|
|
341
|
+
|
|
342
|
+
# Recommendation
|
|
343
|
+
lines.extend(["## Recommendation", "", report.get("recommendation", "")])
|
|
344
|
+
|
|
345
|
+
return "\n".join(lines)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def main() -> None:
|
|
349
|
+
parser = argparse.ArgumentParser(description="Automated feature selection")
|
|
350
|
+
parser.add_argument("--method", choices=["all", "importance", "selection", "generation"], default="all")
|
|
351
|
+
parser.add_argument("--top-k", type=int, default=DEFAULT_TOP_K)
|
|
352
|
+
parser.add_argument("--config", default="config.yaml")
|
|
353
|
+
parser.add_argument("--json", action="store_true")
|
|
354
|
+
args = parser.parse_args()
|
|
355
|
+
|
|
356
|
+
report = feature_analysis(method=args.method, top_k=args.top_k, config_path=args.config)
|
|
357
|
+
|
|
358
|
+
if "error" not in report:
|
|
359
|
+
filepath = save_feature_report(report)
|
|
360
|
+
print(f"Saved to {filepath}", file=sys.stderr)
|
|
361
|
+
|
|
362
|
+
if args.json:
|
|
363
|
+
print(json.dumps(report, indent=2, default=str))
|
|
364
|
+
else:
|
|
365
|
+
print(format_feature_report(report))
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
if __name__ == "__main__":
|
|
369
|
+
main()
|