claude-turing 3.1.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +2 -2
- package/README.md +7 -2
- package/commands/calibrate.md +47 -0
- package/commands/curriculum.md +43 -0
- package/commands/feature.md +42 -0
- package/commands/sensitivity.md +41 -0
- package/commands/turing.md +10 -0
- package/commands/xray.md +43 -0
- package/package.json +1 -1
- package/src/install.js +2 -0
- package/src/verify.js +5 -0
- package/templates/scripts/__pycache__/calibration.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/curriculum_optimizer.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/feature_intelligence.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/model_xray.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/sensitivity_analysis.cpython-314.pyc +0 -0
- package/templates/scripts/calibration.py +364 -0
- package/templates/scripts/curriculum_optimizer.py +337 -0
- package/templates/scripts/feature_intelligence.py +369 -0
- package/templates/scripts/model_xray.py +317 -0
- package/templates/scripts/scaffold.py +10 -0
- package/templates/scripts/sensitivity_analysis.py +335 -0
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Automated feature selection and generation for the autoresearch pipeline.
|
|
3
|
+
|
|
4
|
+
Runs multiple feature importance methods (mutual information, permutation,
|
|
5
|
+
L1, tree-based), computes consensus ranking, detects redundancy, and
|
|
6
|
+
generates candidate interaction features.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python scripts/feature_intelligence.py
|
|
10
|
+
python scripts/feature_intelligence.py --method all
|
|
11
|
+
python scripts/feature_intelligence.py --method importance --top-k 15
|
|
12
|
+
python scripts/feature_intelligence.py --json
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import json
|
|
19
|
+
import sys
|
|
20
|
+
from datetime import datetime, timezone
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
import numpy as np
|
|
24
|
+
import yaml
|
|
25
|
+
|
|
26
|
+
from scripts.turing_io import load_config
|
|
27
|
+
|
|
28
|
+
DEFAULT_TOP_K = 20
|
|
29
|
+
REDUNDANCY_THRESHOLD = 0.95
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# --- Importance Methods ---
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def mutual_information_ranking(
|
|
36
|
+
X: np.ndarray,
|
|
37
|
+
y: np.ndarray,
|
|
38
|
+
feature_names: list[str] | None = None,
|
|
39
|
+
) -> list[dict]:
|
|
40
|
+
"""Rank features by mutual information with the target."""
|
|
41
|
+
from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
|
|
42
|
+
|
|
43
|
+
if feature_names is None:
|
|
44
|
+
feature_names = [f"feature_{i}" for i in range(X.shape[1])]
|
|
45
|
+
|
|
46
|
+
# Detect task type from target
|
|
47
|
+
unique_vals = len(np.unique(y))
|
|
48
|
+
if unique_vals <= 20: # Classification heuristic
|
|
49
|
+
scores = mutual_info_classif(X, y, random_state=42)
|
|
50
|
+
else:
|
|
51
|
+
scores = mutual_info_regression(X, y, random_state=42)
|
|
52
|
+
|
|
53
|
+
ranked = sorted(
|
|
54
|
+
[{"feature": feature_names[i], "score": round(float(scores[i]), 6), "rank": 0}
|
|
55
|
+
for i in range(len(scores))],
|
|
56
|
+
key=lambda x: x["score"], reverse=True,
|
|
57
|
+
)
|
|
58
|
+
for i, r in enumerate(ranked):
|
|
59
|
+
r["rank"] = i + 1
|
|
60
|
+
|
|
61
|
+
return ranked
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def l1_ranking(
|
|
65
|
+
X: np.ndarray,
|
|
66
|
+
y: np.ndarray,
|
|
67
|
+
feature_names: list[str] | None = None,
|
|
68
|
+
) -> list[dict]:
|
|
69
|
+
"""Rank features by L1 regularization coefficient magnitude."""
|
|
70
|
+
from sklearn.linear_model import Lasso, LogisticRegression
|
|
71
|
+
from sklearn.preprocessing import StandardScaler
|
|
72
|
+
|
|
73
|
+
if feature_names is None:
|
|
74
|
+
feature_names = [f"feature_{i}" for i in range(X.shape[1])]
|
|
75
|
+
|
|
76
|
+
scaler = StandardScaler()
|
|
77
|
+
X_scaled = scaler.fit_transform(X)
|
|
78
|
+
|
|
79
|
+
unique_vals = len(np.unique(y))
|
|
80
|
+
if unique_vals <= 20:
|
|
81
|
+
model = LogisticRegression(penalty="l1", solver="liblinear", C=1.0, max_iter=1000)
|
|
82
|
+
model.fit(X_scaled, y)
|
|
83
|
+
coefs = np.abs(model.coef_).mean(axis=0) if model.coef_.ndim > 1 else np.abs(model.coef_.ravel())
|
|
84
|
+
else:
|
|
85
|
+
model = Lasso(alpha=0.01, max_iter=1000)
|
|
86
|
+
model.fit(X_scaled, y)
|
|
87
|
+
coefs = np.abs(model.coef_)
|
|
88
|
+
|
|
89
|
+
ranked = sorted(
|
|
90
|
+
[{"feature": feature_names[i], "score": round(float(coefs[i]), 6), "rank": 0}
|
|
91
|
+
for i in range(len(coefs))],
|
|
92
|
+
key=lambda x: x["score"], reverse=True,
|
|
93
|
+
)
|
|
94
|
+
for i, r in enumerate(ranked):
|
|
95
|
+
r["rank"] = i + 1
|
|
96
|
+
|
|
97
|
+
return ranked
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def tree_importance_ranking(
|
|
101
|
+
X: np.ndarray,
|
|
102
|
+
y: np.ndarray,
|
|
103
|
+
feature_names: list[str] | None = None,
|
|
104
|
+
) -> list[dict]:
|
|
105
|
+
"""Rank features by tree-based importance."""
|
|
106
|
+
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
|
|
107
|
+
|
|
108
|
+
if feature_names is None:
|
|
109
|
+
feature_names = [f"feature_{i}" for i in range(X.shape[1])]
|
|
110
|
+
|
|
111
|
+
unique_vals = len(np.unique(y))
|
|
112
|
+
if unique_vals <= 20:
|
|
113
|
+
model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
|
|
114
|
+
else:
|
|
115
|
+
model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
|
|
116
|
+
|
|
117
|
+
model.fit(X, y)
|
|
118
|
+
importances = model.feature_importances_
|
|
119
|
+
|
|
120
|
+
ranked = sorted(
|
|
121
|
+
[{"feature": feature_names[i], "score": round(float(importances[i]), 6), "rank": 0}
|
|
122
|
+
for i in range(len(importances))],
|
|
123
|
+
key=lambda x: x["score"], reverse=True,
|
|
124
|
+
)
|
|
125
|
+
for i, r in enumerate(ranked):
|
|
126
|
+
r["rank"] = i + 1
|
|
127
|
+
|
|
128
|
+
return ranked
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# --- Consensus ---
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def compute_consensus(
|
|
135
|
+
rankings: dict[str, list[dict]],
|
|
136
|
+
top_k: int = DEFAULT_TOP_K,
|
|
137
|
+
) -> list[dict]:
|
|
138
|
+
"""Compute consensus ranking across multiple methods.
|
|
139
|
+
|
|
140
|
+
A feature's consensus score = number of methods that place it in top-K.
|
|
141
|
+
"""
|
|
142
|
+
n_methods = len(rankings)
|
|
143
|
+
feature_scores = {}
|
|
144
|
+
|
|
145
|
+
for method_name, ranking in rankings.items():
|
|
146
|
+
top_features = {r["feature"] for r in ranking[:top_k]}
|
|
147
|
+
for feat in top_features:
|
|
148
|
+
if feat not in feature_scores:
|
|
149
|
+
feature_scores[feat] = {"feature": feat, "methods": {}, "consensus": 0}
|
|
150
|
+
feature_scores[feat]["methods"][method_name] = next(
|
|
151
|
+
(r["rank"] for r in ranking if r["feature"] == feat), None
|
|
152
|
+
)
|
|
153
|
+
feature_scores[feat]["consensus"] += 1
|
|
154
|
+
|
|
155
|
+
# Add features not in any top-K
|
|
156
|
+
all_features = set()
|
|
157
|
+
for ranking in rankings.values():
|
|
158
|
+
for r in ranking:
|
|
159
|
+
all_features.add(r["feature"])
|
|
160
|
+
|
|
161
|
+
for feat in all_features:
|
|
162
|
+
if feat not in feature_scores:
|
|
163
|
+
feature_scores[feat] = {
|
|
164
|
+
"feature": feat,
|
|
165
|
+
"methods": {m: next((r["rank"] for r in rk if r["feature"] == feat), None) for m, rk in rankings.items()},
|
|
166
|
+
"consensus": 0,
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
result = sorted(feature_scores.values(), key=lambda x: (-x["consensus"], x["feature"]))
|
|
170
|
+
|
|
171
|
+
for r in result:
|
|
172
|
+
r["consensus_str"] = f"{r['consensus']}/{n_methods}"
|
|
173
|
+
if r["consensus"] == n_methods:
|
|
174
|
+
r["consensus_str"] += " ★"
|
|
175
|
+
elif r["consensus"] == 0:
|
|
176
|
+
r["consensus_str"] += " — DROP"
|
|
177
|
+
|
|
178
|
+
return result
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# --- Redundancy Detection ---
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def detect_redundancy(
|
|
185
|
+
X: np.ndarray,
|
|
186
|
+
feature_names: list[str] | None = None,
|
|
187
|
+
threshold: float = REDUNDANCY_THRESHOLD,
|
|
188
|
+
) -> list[dict]:
|
|
189
|
+
"""Detect highly correlated feature pairs."""
|
|
190
|
+
if feature_names is None:
|
|
191
|
+
feature_names = [f"feature_{i}" for i in range(X.shape[1])]
|
|
192
|
+
|
|
193
|
+
n = X.shape[1]
|
|
194
|
+
if n < 2:
|
|
195
|
+
return []
|
|
196
|
+
|
|
197
|
+
corr = np.corrcoef(X.T)
|
|
198
|
+
redundant = []
|
|
199
|
+
|
|
200
|
+
for i in range(n):
|
|
201
|
+
for j in range(i + 1, n):
|
|
202
|
+
c = abs(corr[i, j])
|
|
203
|
+
if not np.isnan(c) and c > threshold:
|
|
204
|
+
redundant.append({
|
|
205
|
+
"feature_a": feature_names[i],
|
|
206
|
+
"feature_b": feature_names[j],
|
|
207
|
+
"correlation": round(float(c), 4),
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
return sorted(redundant, key=lambda x: -x["correlation"])
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
# --- Feature Generation ---
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def generate_interaction_features(
|
|
217
|
+
top_features: list[str],
|
|
218
|
+
max_interactions: int = 10,
|
|
219
|
+
) -> list[dict]:
|
|
220
|
+
"""Generate candidate interaction features from top consensus features."""
|
|
221
|
+
candidates = []
|
|
222
|
+
|
|
223
|
+
for i, fa in enumerate(top_features[:5]):
|
|
224
|
+
for fb in top_features[i + 1:6]:
|
|
225
|
+
if len(candidates) >= max_interactions:
|
|
226
|
+
break
|
|
227
|
+
candidates.append({"name": f"{fa}_x_{fb}", "type": "product", "features": [fa, fb]})
|
|
228
|
+
candidates.append({"name": f"{fa}_div_{fb}", "type": "ratio", "features": [fa, fb]})
|
|
229
|
+
|
|
230
|
+
return candidates[:max_interactions]
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
# --- Full Pipeline ---
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def feature_analysis(
|
|
237
|
+
X: np.ndarray | None = None,
|
|
238
|
+
y: np.ndarray | None = None,
|
|
239
|
+
feature_names: list[str] | None = None,
|
|
240
|
+
method: str = "all",
|
|
241
|
+
top_k: int = DEFAULT_TOP_K,
|
|
242
|
+
config_path: str = "config.yaml",
|
|
243
|
+
) -> dict:
|
|
244
|
+
"""Run feature intelligence analysis."""
|
|
245
|
+
config = load_config(config_path)
|
|
246
|
+
|
|
247
|
+
if X is None or y is None:
|
|
248
|
+
return {"error": "Provide X and y arrays for feature analysis",
|
|
249
|
+
"note": "Run with --data train.npz to analyze features"}
|
|
250
|
+
|
|
251
|
+
if feature_names is None:
|
|
252
|
+
feature_names = [f"feature_{i}" for i in range(X.shape[1])]
|
|
253
|
+
|
|
254
|
+
rankings = {}
|
|
255
|
+
|
|
256
|
+
if method in ("all", "importance"):
|
|
257
|
+
rankings["mutual_info"] = mutual_information_ranking(X, y, feature_names)
|
|
258
|
+
rankings["l1"] = l1_ranking(X, y, feature_names)
|
|
259
|
+
rankings["tree"] = tree_importance_ranking(X, y, feature_names)
|
|
260
|
+
|
|
261
|
+
if not rankings:
|
|
262
|
+
return {"error": f"Unknown method: {method}"}
|
|
263
|
+
|
|
264
|
+
consensus = compute_consensus(rankings, top_k)
|
|
265
|
+
redundant = detect_redundancy(X, feature_names)
|
|
266
|
+
|
|
267
|
+
top_consensus_features = [c["feature"] for c in consensus if c["consensus"] > 0][:top_k]
|
|
268
|
+
interactions = generate_interaction_features(top_consensus_features)
|
|
269
|
+
|
|
270
|
+
drop_candidates = [c for c in consensus if c["consensus"] == 0]
|
|
271
|
+
|
|
272
|
+
report = {
|
|
273
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
274
|
+
"n_features": X.shape[1],
|
|
275
|
+
"top_k": top_k,
|
|
276
|
+
"rankings": {k: v[:top_k] for k, v in rankings.items()},
|
|
277
|
+
"consensus": consensus[:top_k * 2],
|
|
278
|
+
"drop_candidates": drop_candidates,
|
|
279
|
+
"n_drop": len(drop_candidates),
|
|
280
|
+
"redundant_pairs": redundant,
|
|
281
|
+
"interaction_candidates": interactions,
|
|
282
|
+
"recommendation": f"Drop {len(drop_candidates)} features with 0/{len(rankings)} consensus ({len(drop_candidates)/X.shape[1]*100:.0f}% of features)" if drop_candidates else "All features contribute to at least one method",
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
return report
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
# --- Report Formatting ---
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def save_feature_report(report: dict, output_dir: str = "experiments/features") -> Path:
|
|
292
|
+
out_path = Path(output_dir)
|
|
293
|
+
out_path.mkdir(parents=True, exist_ok=True)
|
|
294
|
+
date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
295
|
+
filepath = out_path / f"features-{date}.yaml"
|
|
296
|
+
clean = json.loads(json.dumps(report, default=str))
|
|
297
|
+
with open(filepath, "w") as f:
|
|
298
|
+
yaml.dump(clean, f, default_flow_style=False, sort_keys=False)
|
|
299
|
+
return filepath
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def format_feature_report(report: dict) -> str:
|
|
303
|
+
if "error" in report:
|
|
304
|
+
return f"ERROR: {report['error']}\n{report.get('note', '')}"
|
|
305
|
+
|
|
306
|
+
lines = ["# Feature Intelligence", "",
|
|
307
|
+
f"*Generated {report.get('generated_at', 'N/A')[:19]}*",
|
|
308
|
+
f"**{report.get('n_features', 0)} features analyzed, top-{report.get('top_k', 20)}**", ""]
|
|
309
|
+
|
|
310
|
+
# Consensus table
|
|
311
|
+
consensus = report.get("consensus", [])
|
|
312
|
+
if consensus:
|
|
313
|
+
methods = set()
|
|
314
|
+
for c in consensus:
|
|
315
|
+
methods.update(c.get("methods", {}).keys())
|
|
316
|
+
method_names = sorted(methods)
|
|
317
|
+
|
|
318
|
+
header = "| Feature |" + "|".join(f" {m} Rank " for m in method_names) + "| Consensus |"
|
|
319
|
+
sep = "|---------|" + "|".join("-------" for _ in method_names) + "|-----------|"
|
|
320
|
+
lines.extend(["## Consensus Ranking", "", header, sep])
|
|
321
|
+
for c in consensus[:15]:
|
|
322
|
+
ranks = "|".join(f" {c['methods'].get(m, '—')} " for m in method_names)
|
|
323
|
+
lines.append(f"| {c['feature']} |{ranks}| {c['consensus_str']} |")
|
|
324
|
+
lines.append("")
|
|
325
|
+
|
|
326
|
+
# Redundancy
|
|
327
|
+
redundant = report.get("redundant_pairs", [])
|
|
328
|
+
if redundant:
|
|
329
|
+
lines.extend(["## Redundant Pairs", ""])
|
|
330
|
+
for r in redundant[:5]:
|
|
331
|
+
lines.append(f"- **{r['feature_a']}** ↔ **{r['feature_b']}**: r={r['correlation']}")
|
|
332
|
+
lines.append("")
|
|
333
|
+
|
|
334
|
+
# Interactions
|
|
335
|
+
interactions = report.get("interaction_candidates", [])
|
|
336
|
+
if interactions:
|
|
337
|
+
lines.extend(["## Candidate Interactions", ""])
|
|
338
|
+
for i in interactions[:5]:
|
|
339
|
+
lines.append(f"- `{i['name']}` ({i['type']}: {' × '.join(i['features'])})")
|
|
340
|
+
lines.append("")
|
|
341
|
+
|
|
342
|
+
# Recommendation
|
|
343
|
+
lines.extend(["## Recommendation", "", report.get("recommendation", "")])
|
|
344
|
+
|
|
345
|
+
return "\n".join(lines)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def main() -> None:
|
|
349
|
+
parser = argparse.ArgumentParser(description="Automated feature selection")
|
|
350
|
+
parser.add_argument("--method", choices=["all", "importance", "selection", "generation"], default="all")
|
|
351
|
+
parser.add_argument("--top-k", type=int, default=DEFAULT_TOP_K)
|
|
352
|
+
parser.add_argument("--config", default="config.yaml")
|
|
353
|
+
parser.add_argument("--json", action="store_true")
|
|
354
|
+
args = parser.parse_args()
|
|
355
|
+
|
|
356
|
+
report = feature_analysis(method=args.method, top_k=args.top_k, config_path=args.config)
|
|
357
|
+
|
|
358
|
+
if "error" not in report:
|
|
359
|
+
filepath = save_feature_report(report)
|
|
360
|
+
print(f"Saved to {filepath}", file=sys.stderr)
|
|
361
|
+
|
|
362
|
+
if args.json:
|
|
363
|
+
print(json.dumps(report, indent=2, default=str))
|
|
364
|
+
else:
|
|
365
|
+
print(format_feature_report(report))
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
if __name__ == "__main__":
|
|
369
|
+
main()
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Internal model diagnostics for the autoresearch pipeline.
|
|
3
|
+
|
|
4
|
+
Inspects model internals: gradient flow per layer, activation statistics,
|
|
5
|
+
dead neurons, weight distributions, decision path analysis. Answers
|
|
6
|
+
"what is the model doing internally?" rather than "what are its predictions?"
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python scripts/model_xray.py exp-042
|
|
10
|
+
python scripts/model_xray.py exp-042 --layer "encoder.layer.2"
|
|
11
|
+
python scripts/model_xray.py --compare exp-042 exp-053
|
|
12
|
+
python scripts/model_xray.py --json
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import json
|
|
19
|
+
import math
|
|
20
|
+
import sys
|
|
21
|
+
from datetime import datetime, timezone
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
import numpy as np
|
|
25
|
+
import yaml
|
|
26
|
+
|
|
27
|
+
from scripts.turing_io import load_config, load_experiments
|
|
28
|
+
|
|
29
|
+
DEFAULT_LOG_PATH = "experiments/log.jsonl"
|
|
30
|
+
DEAD_NEURON_THRESHOLD = 0.01 # Activation below this = dead
|
|
31
|
+
EXPLODING_GRADIENT_RATIO = 100 # Gradient > N * mean = exploding
|
|
32
|
+
NEAR_ZERO_WEIGHT = 0.001 # Weight below this = pruning candidate
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# --- Neural Network Diagnostics ---
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def diagnose_neural_layers(layer_stats: list[dict]) -> dict:
|
|
39
|
+
"""Analyze neural network layer statistics.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
layer_stats: List of dicts with name, grad_mean, grad_max, act_mean,
|
|
43
|
+
act_std, dead_pct, weight_mean, weight_std.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Diagnosis dict with per-layer analysis and detected issues.
|
|
47
|
+
"""
|
|
48
|
+
if not layer_stats:
|
|
49
|
+
return {"layers": [], "issues": [], "model_type": "neural"}
|
|
50
|
+
|
|
51
|
+
issues = []
|
|
52
|
+
analyzed = []
|
|
53
|
+
|
|
54
|
+
# Compute global gradient mean for relative comparison
|
|
55
|
+
grad_means = [abs(l.get("grad_mean", 0)) for l in layer_stats if l.get("grad_mean") is not None]
|
|
56
|
+
global_grad_mean = np.mean(grad_means) if grad_means else 0
|
|
57
|
+
|
|
58
|
+
for layer in layer_stats:
|
|
59
|
+
name = layer.get("name", "?")
|
|
60
|
+
analysis = {"name": name}
|
|
61
|
+
|
|
62
|
+
# Gradient analysis
|
|
63
|
+
grad_mean = abs(layer.get("grad_mean", 0))
|
|
64
|
+
grad_max = abs(layer.get("grad_max", 0))
|
|
65
|
+
analysis["grad_mean"] = grad_mean
|
|
66
|
+
analysis["grad_max"] = grad_max
|
|
67
|
+
|
|
68
|
+
if grad_mean == 0 and grad_max == 0:
|
|
69
|
+
issues.append({"layer": name, "issue": "dead_gradient", "severity": "high",
|
|
70
|
+
"message": f"{name}: zero gradients — layer is not learning"})
|
|
71
|
+
elif global_grad_mean > 0 and grad_mean < global_grad_mean / EXPLODING_GRADIENT_RATIO:
|
|
72
|
+
ratio = global_grad_mean / grad_mean if grad_mean > 0 else float("inf")
|
|
73
|
+
issues.append({"layer": name, "issue": "vanishing_gradient", "severity": "high",
|
|
74
|
+
"message": f"{name}: gradient {ratio:.0f}x weaker than average — possible vanishing gradient"})
|
|
75
|
+
elif global_grad_mean > 0 and grad_max > EXPLODING_GRADIENT_RATIO * global_grad_mean:
|
|
76
|
+
issues.append({"layer": name, "issue": "exploding_gradient", "severity": "critical",
|
|
77
|
+
"message": f"{name}: gradient max {grad_max:.2e} is {grad_max/global_grad_mean:.0f}x the average — exploding gradient"})
|
|
78
|
+
|
|
79
|
+
# Activation analysis
|
|
80
|
+
dead_pct = layer.get("dead_pct", 0)
|
|
81
|
+
analysis["dead_pct"] = dead_pct
|
|
82
|
+
if dead_pct > 20:
|
|
83
|
+
issues.append({"layer": name, "issue": "dead_neurons", "severity": "high",
|
|
84
|
+
"message": f"{name}: {dead_pct:.0f}% dead neurons — consider batch norm or layer width reduction"})
|
|
85
|
+
elif dead_pct > 5:
|
|
86
|
+
issues.append({"layer": name, "issue": "dying_neurons", "severity": "medium",
|
|
87
|
+
"message": f"{name}: {dead_pct:.0f}% near-dead neurons"})
|
|
88
|
+
|
|
89
|
+
# Weight analysis
|
|
90
|
+
weight_std = layer.get("weight_std", 0)
|
|
91
|
+
near_zero_pct = layer.get("near_zero_pct", 0)
|
|
92
|
+
analysis["weight_std"] = weight_std
|
|
93
|
+
analysis["near_zero_pct"] = near_zero_pct
|
|
94
|
+
if near_zero_pct > 50:
|
|
95
|
+
issues.append({"layer": name, "issue": "sparse_weights", "severity": "medium",
|
|
96
|
+
"message": f"{name}: {near_zero_pct:.0f}% near-zero weights — pruning candidate"})
|
|
97
|
+
|
|
98
|
+
analyzed.append(analysis)
|
|
99
|
+
|
|
100
|
+
return {"layers": analyzed, "issues": issues, "model_type": "neural"}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# --- Tree Model Diagnostics ---
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def diagnose_tree_model(tree_stats: dict) -> dict:
|
|
107
|
+
"""Analyze tree-based model statistics.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
tree_stats: Dict with n_trees, avg_depth, max_depth_allowed,
|
|
111
|
+
feature_split_freq, leaf_purity.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
Diagnosis dict.
|
|
115
|
+
"""
|
|
116
|
+
issues = []
|
|
117
|
+
|
|
118
|
+
n_trees = tree_stats.get("n_trees", 0)
|
|
119
|
+
avg_depth = tree_stats.get("avg_depth", 0)
|
|
120
|
+
max_depth = tree_stats.get("max_depth_allowed", 0)
|
|
121
|
+
feature_splits = tree_stats.get("feature_split_freq", {})
|
|
122
|
+
leaf_purity = tree_stats.get("leaf_purity", 0)
|
|
123
|
+
|
|
124
|
+
# Depth utilization
|
|
125
|
+
if max_depth > 0 and avg_depth > 0:
|
|
126
|
+
utilization = avg_depth / max_depth
|
|
127
|
+
if utilization < 0.5:
|
|
128
|
+
issues.append({"issue": "underutilized_depth", "severity": "medium",
|
|
129
|
+
"message": f"Trees use only {utilization:.0%} of allowed depth ({avg_depth:.1f}/{max_depth}) — consider reducing max_depth"})
|
|
130
|
+
elif utilization > 0.95:
|
|
131
|
+
issues.append({"issue": "depth_saturated", "severity": "medium",
|
|
132
|
+
"message": f"Trees use {utilization:.0%} of allowed depth — consider increasing max_depth"})
|
|
133
|
+
|
|
134
|
+
# Feature dominance
|
|
135
|
+
if feature_splits:
|
|
136
|
+
total_splits = sum(feature_splits.values())
|
|
137
|
+
if total_splits > 0:
|
|
138
|
+
top_feature = max(feature_splits, key=feature_splits.get)
|
|
139
|
+
top_pct = feature_splits[top_feature] / total_splits
|
|
140
|
+
if top_pct > 0.5:
|
|
141
|
+
issues.append({"issue": "feature_dominance", "severity": "medium",
|
|
142
|
+
"message": f"Feature '{top_feature}' dominates {top_pct:.0%} of splits — check for leakage or engineering opportunity"})
|
|
143
|
+
|
|
144
|
+
# Leaf purity
|
|
145
|
+
if leaf_purity > 0.99:
|
|
146
|
+
issues.append({"issue": "overfitting_risk", "severity": "medium",
|
|
147
|
+
"message": f"Leaf purity {leaf_purity:.4f} — model may be overfitting"})
|
|
148
|
+
|
|
149
|
+
return {
|
|
150
|
+
"model_type": "tree",
|
|
151
|
+
"n_trees": n_trees,
|
|
152
|
+
"avg_depth": avg_depth,
|
|
153
|
+
"max_depth_allowed": max_depth,
|
|
154
|
+
"depth_utilization": round(avg_depth / max_depth, 3) if max_depth > 0 else None,
|
|
155
|
+
"feature_split_freq": feature_splits,
|
|
156
|
+
"leaf_purity": leaf_purity,
|
|
157
|
+
"issues": issues,
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# --- sklearn Diagnostics ---
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def diagnose_sklearn_model(model_stats: dict) -> dict:
|
|
165
|
+
"""Analyze scikit-learn model statistics.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
model_stats: Dict with model_type, coefficients, feature_importances.
|
|
169
|
+
"""
|
|
170
|
+
issues = []
|
|
171
|
+
model_type = model_stats.get("model_type", "unknown")
|
|
172
|
+
|
|
173
|
+
coefficients = model_stats.get("coefficients", [])
|
|
174
|
+
if coefficients:
|
|
175
|
+
coef_arr = np.array(coefficients)
|
|
176
|
+
max_coef = float(np.max(np.abs(coef_arr)))
|
|
177
|
+
near_zero = float(np.mean(np.abs(coef_arr) < NEAR_ZERO_WEIGHT))
|
|
178
|
+
|
|
179
|
+
if max_coef > 100:
|
|
180
|
+
issues.append({"issue": "large_coefficients", "severity": "high",
|
|
181
|
+
"message": f"Max coefficient magnitude {max_coef:.1f} — consider regularization"})
|
|
182
|
+
if near_zero > 0.5:
|
|
183
|
+
issues.append({"issue": "sparse_coefficients", "severity": "medium",
|
|
184
|
+
"message": f"{near_zero:.0%} near-zero coefficients — feature selection may help"})
|
|
185
|
+
|
|
186
|
+
importances = model_stats.get("feature_importances", [])
|
|
187
|
+
if importances:
|
|
188
|
+
imp_arr = np.array(importances)
|
|
189
|
+
if len(imp_arr) > 0 and np.std(imp_arr) > 0:
|
|
190
|
+
top_k = min(3, len(imp_arr))
|
|
191
|
+
top_indices = np.argsort(imp_arr)[-top_k:]
|
|
192
|
+
top_total = float(np.sum(imp_arr[top_indices]))
|
|
193
|
+
if top_total > 0.8:
|
|
194
|
+
issues.append({"issue": "importance_concentrated", "severity": "medium",
|
|
195
|
+
"message": f"Top {top_k} features account for {top_total:.0%} of importance"})
|
|
196
|
+
|
|
197
|
+
return {
|
|
198
|
+
"model_type": model_type,
|
|
199
|
+
"n_coefficients": len(coefficients),
|
|
200
|
+
"n_importances": len(importances),
|
|
201
|
+
"issues": issues,
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
# --- Full X-Ray Pipeline ---
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def xray_model(
|
|
209
|
+
exp_id: str | None = None,
|
|
210
|
+
layer_stats: list[dict] | None = None,
|
|
211
|
+
tree_stats: dict | None = None,
|
|
212
|
+
sklearn_stats: dict | None = None,
|
|
213
|
+
config_path: str = "config.yaml",
|
|
214
|
+
log_path: str = DEFAULT_LOG_PATH,
|
|
215
|
+
) -> dict:
|
|
216
|
+
"""Run model diagnostics."""
|
|
217
|
+
config = load_config(config_path)
|
|
218
|
+
model_type_hint = config.get("model", {}).get("type", "")
|
|
219
|
+
|
|
220
|
+
diagnosis = None
|
|
221
|
+
if layer_stats is not None:
|
|
222
|
+
diagnosis = diagnose_neural_layers(layer_stats)
|
|
223
|
+
elif tree_stats is not None:
|
|
224
|
+
diagnosis = diagnose_tree_model(tree_stats)
|
|
225
|
+
elif sklearn_stats is not None:
|
|
226
|
+
diagnosis = diagnose_sklearn_model(sklearn_stats)
|
|
227
|
+
else:
|
|
228
|
+
diagnosis = {"model_type": "unknown", "issues": [],
|
|
229
|
+
"note": "No model stats provided. Run with model-specific stats for full diagnostics."}
|
|
230
|
+
|
|
231
|
+
return {
|
|
232
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
233
|
+
"experiment_id": exp_id,
|
|
234
|
+
"diagnosis": diagnosis,
|
|
235
|
+
"n_issues": len(diagnosis.get("issues", [])),
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
# --- Report Formatting ---
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def save_xray_report(report: dict, output_dir: str = "experiments/xrays") -> Path:
|
|
243
|
+
out_path = Path(output_dir)
|
|
244
|
+
out_path.mkdir(parents=True, exist_ok=True)
|
|
245
|
+
exp_id = report.get("experiment_id", "unknown")
|
|
246
|
+
filepath = out_path / f"{exp_id}-xray.yaml"
|
|
247
|
+
with open(filepath, "w") as f:
|
|
248
|
+
yaml.dump(report, f, default_flow_style=False, sort_keys=False)
|
|
249
|
+
return filepath
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def format_xray_report(report: dict) -> str:
|
|
253
|
+
if "error" in report:
|
|
254
|
+
return f"ERROR: {report['error']}"
|
|
255
|
+
|
|
256
|
+
diag = report.get("diagnosis", {})
|
|
257
|
+
model_type = diag.get("model_type", "?")
|
|
258
|
+
exp_id = report.get("experiment_id", "?")
|
|
259
|
+
issues = diag.get("issues", [])
|
|
260
|
+
|
|
261
|
+
lines = [f"# X-Ray: {exp_id} ({model_type})", "",
|
|
262
|
+
f"*Generated {report.get('generated_at', 'N/A')[:19]}*", ""]
|
|
263
|
+
|
|
264
|
+
# Neural layer table
|
|
265
|
+
layers = diag.get("layers", [])
|
|
266
|
+
if layers:
|
|
267
|
+
lines.extend(["## Layer Analysis", "",
|
|
268
|
+
"| Layer | Grad Mean | Grad Max | Dead % | Weight Std |",
|
|
269
|
+
"|-------|-----------|----------|--------|------------|"])
|
|
270
|
+
for l in layers:
|
|
271
|
+
lines.append(f"| {l['name']} | {l.get('grad_mean', 0):.2e} | {l.get('grad_max', 0):.2e} | {l.get('dead_pct', 0):.0f}% | {l.get('weight_std', 0):.4f} |")
|
|
272
|
+
lines.append("")
|
|
273
|
+
|
|
274
|
+
# Tree stats
|
|
275
|
+
if model_type == "tree":
|
|
276
|
+
lines.extend(["## Tree Statistics", "",
|
|
277
|
+
f"- **Trees:** {diag.get('n_trees', '?')}",
|
|
278
|
+
f"- **Avg depth:** {diag.get('avg_depth', '?')}/{diag.get('max_depth_allowed', '?')}",
|
|
279
|
+
f"- **Leaf purity:** {diag.get('leaf_purity', '?')}", ""])
|
|
280
|
+
|
|
281
|
+
# Issues
|
|
282
|
+
if issues:
|
|
283
|
+
lines.extend(["## Issues Detected", ""])
|
|
284
|
+
for i in issues:
|
|
285
|
+
sev = i.get("severity", "?").upper()
|
|
286
|
+
lines.append(f"- **[{sev}]** {i.get('message', 'N/A')}")
|
|
287
|
+
else:
|
|
288
|
+
lines.extend(["## Issues Detected", "", "No issues found."])
|
|
289
|
+
|
|
290
|
+
if diag.get("note"):
|
|
291
|
+
lines.extend(["", f"*{diag['note']}*"])
|
|
292
|
+
|
|
293
|
+
return "\n".join(lines)
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def main() -> None:
|
|
297
|
+
parser = argparse.ArgumentParser(description="Internal model diagnostics")
|
|
298
|
+
parser.add_argument("exp_id", nargs="?", help="Experiment ID")
|
|
299
|
+
parser.add_argument("--config", default="config.yaml")
|
|
300
|
+
parser.add_argument("--log", default=DEFAULT_LOG_PATH)
|
|
301
|
+
parser.add_argument("--json", action="store_true")
|
|
302
|
+
args = parser.parse_args()
|
|
303
|
+
|
|
304
|
+
report = xray_model(exp_id=args.exp_id, config_path=args.config, log_path=args.log)
|
|
305
|
+
|
|
306
|
+
if "error" not in report:
|
|
307
|
+
filepath = save_xray_report(report)
|
|
308
|
+
print(f"Saved to {filepath}", file=sys.stderr)
|
|
309
|
+
|
|
310
|
+
if args.json:
|
|
311
|
+
print(json.dumps(report, indent=2, default=str))
|
|
312
|
+
else:
|
|
313
|
+
print(format_xray_report(report))
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
if __name__ == "__main__":
|
|
317
|
+
main()
|