claude-turing 4.2.0 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +2 -2
- package/README.md +7 -2
- package/commands/doctor.md +30 -0
- package/commands/plan.md +27 -0
- package/commands/postmortem.md +28 -0
- package/commands/registry.md +31 -0
- package/commands/turing.md +10 -0
- package/commands/update.md +27 -0
- package/package.json +1 -1
- package/src/install.js +2 -0
- package/src/verify.js +5 -0
- package/templates/scripts/__pycache__/failure_postmortem.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/generate_model_card.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/harness_doctor.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/incremental_update.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/model_lifecycle.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/research_planner.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
- package/templates/scripts/failure_postmortem.py +510 -0
- package/templates/scripts/generate_brief.py +122 -0
- package/templates/scripts/generate_model_card.py +154 -3
- package/templates/scripts/harness_doctor.py +466 -0
- package/templates/scripts/incremental_update.py +586 -0
- package/templates/scripts/model_lifecycle.py +549 -0
- package/templates/scripts/research_planner.py +470 -0
- package/templates/scripts/scaffold.py +10 -0
|
@@ -18,6 +18,8 @@ import sys
|
|
|
18
18
|
from datetime import datetime, timezone
|
|
19
19
|
from pathlib import Path
|
|
20
20
|
|
|
21
|
+
import yaml
|
|
22
|
+
|
|
21
23
|
from scripts.turing_io import load_config, load_experiments
|
|
22
24
|
|
|
23
25
|
|
|
@@ -93,22 +95,113 @@ def load_model_contract(contract_path: str) -> dict:
|
|
|
93
95
|
return {"version": version, "bundle_format": bundle_format, "raw": text}
|
|
94
96
|
|
|
95
97
|
|
|
98
|
+
def load_registry_status(registry_path: str = "experiments/registry.yaml") -> dict | None:
|
|
99
|
+
"""Load registry status for the best model."""
|
|
100
|
+
path = Path(registry_path)
|
|
101
|
+
if not path.exists():
|
|
102
|
+
return None
|
|
103
|
+
try:
|
|
104
|
+
with open(path) as f:
|
|
105
|
+
data = yaml.safe_load(f)
|
|
106
|
+
if isinstance(data, dict) and data.get("models"):
|
|
107
|
+
return data
|
|
108
|
+
except (Exception,):
|
|
109
|
+
pass
|
|
110
|
+
return None
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def compute_fairness_metrics(
|
|
114
|
+
predictions: list | None = None,
|
|
115
|
+
labels: list | None = None,
|
|
116
|
+
protected_attribute: list | None = None,
|
|
117
|
+
group_names: list[str] | None = None,
|
|
118
|
+
) -> dict | None:
|
|
119
|
+
"""Compute demographic parity and equal opportunity metrics.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
predictions: Model predictions.
|
|
123
|
+
labels: True labels.
|
|
124
|
+
protected_attribute: Group membership for each sample.
|
|
125
|
+
group_names: Names of groups.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
Fairness metrics dict or None if insufficient data.
|
|
129
|
+
"""
|
|
130
|
+
if predictions is None or protected_attribute is None:
|
|
131
|
+
return None
|
|
132
|
+
if len(predictions) != len(protected_attribute):
|
|
133
|
+
return None
|
|
134
|
+
if len(predictions) == 0:
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
import numpy as np
|
|
138
|
+
|
|
139
|
+
preds = np.array(predictions)
|
|
140
|
+
groups = np.array(protected_attribute)
|
|
141
|
+
unique_groups = sorted(set(groups))
|
|
142
|
+
|
|
143
|
+
if group_names is None:
|
|
144
|
+
group_names = [str(g) for g in unique_groups]
|
|
145
|
+
|
|
146
|
+
# Demographic parity: P(Y_hat=1 | G=g) for each group
|
|
147
|
+
group_positive_rates = {}
|
|
148
|
+
for g, name in zip(unique_groups, group_names):
|
|
149
|
+
mask = groups == g
|
|
150
|
+
if mask.sum() == 0:
|
|
151
|
+
continue
|
|
152
|
+
rate = float(preds[mask].mean()) if preds[mask].size > 0 else 0
|
|
153
|
+
group_positive_rates[name] = round(rate, 4)
|
|
154
|
+
|
|
155
|
+
# Demographic parity difference
|
|
156
|
+
rates = list(group_positive_rates.values())
|
|
157
|
+
dp_diff = round(max(rates) - min(rates), 4) if len(rates) >= 2 else 0
|
|
158
|
+
|
|
159
|
+
result = {
|
|
160
|
+
"group_positive_rates": group_positive_rates,
|
|
161
|
+
"demographic_parity_difference": dp_diff,
|
|
162
|
+
"n_groups": len(unique_groups),
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
# Equal opportunity (if labels available): P(Y_hat=1 | Y=1, G=g)
|
|
166
|
+
if labels is not None and len(labels) == len(predictions):
|
|
167
|
+
labs = np.array(labels)
|
|
168
|
+
group_tpr = {}
|
|
169
|
+
for g, name in zip(unique_groups, group_names):
|
|
170
|
+
mask = (groups == g) & (labs == 1)
|
|
171
|
+
if mask.sum() == 0:
|
|
172
|
+
continue
|
|
173
|
+
tpr = float(preds[mask].mean()) if preds[mask].size > 0 else 0
|
|
174
|
+
group_tpr[name] = round(tpr, 4)
|
|
175
|
+
|
|
176
|
+
result["group_true_positive_rates"] = group_tpr
|
|
177
|
+
tpr_vals = list(group_tpr.values())
|
|
178
|
+
result["equal_opportunity_difference"] = round(max(tpr_vals) - min(tpr_vals), 4) if len(tpr_vals) >= 2 else 0
|
|
179
|
+
|
|
180
|
+
return result
|
|
181
|
+
|
|
182
|
+
|
|
96
183
|
def generate_card(
|
|
97
184
|
config_path: str = "config.yaml",
|
|
98
185
|
log_path: str = "experiments/log.jsonl",
|
|
99
186
|
contract_path: str = "model_contract.md",
|
|
100
187
|
output_path: str | None = None,
|
|
188
|
+
include_fairness: bool = False,
|
|
189
|
+
fairness_data: dict | None = None,
|
|
190
|
+
registry_path: str = "experiments/registry.yaml",
|
|
101
191
|
) -> str:
|
|
102
192
|
"""Produce a model card markdown document.
|
|
103
193
|
|
|
104
194
|
Combines information from the project config, experiment log,
|
|
105
|
-
|
|
195
|
+
model contract, registry, and optional fairness data.
|
|
106
196
|
|
|
107
197
|
Args:
|
|
108
198
|
config_path: Path to config.yaml.
|
|
109
199
|
log_path: Path to experiments/log.jsonl.
|
|
110
200
|
contract_path: Path to model_contract.md.
|
|
111
201
|
output_path: If given, write the card to this file.
|
|
202
|
+
include_fairness: If True, add fairness section.
|
|
203
|
+
fairness_data: Pre-computed fairness data {predictions, labels, protected_attribute}.
|
|
204
|
+
registry_path: Path to registry YAML.
|
|
112
205
|
|
|
113
206
|
Returns:
|
|
114
207
|
The model card as a markdown string.
|
|
@@ -247,7 +340,6 @@ def generate_card(
|
|
|
247
340
|
if best:
|
|
248
341
|
seed_study_path = Path("experiments/seed_studies") / f"{best.get('experiment_id', 'unknown')}-seeds.yaml"
|
|
249
342
|
if seed_study_path.exists():
|
|
250
|
-
import yaml
|
|
251
343
|
with open(seed_study_path) as f:
|
|
252
344
|
seed_study = yaml.safe_load(f) or {}
|
|
253
345
|
if seed_study and "mean" in seed_study:
|
|
@@ -306,6 +398,57 @@ def generate_card(
|
|
|
306
398
|
"- Not intended for: <placeholder for user to fill>",
|
|
307
399
|
])
|
|
308
400
|
|
|
401
|
+
# --- Registry Status ---
|
|
402
|
+
registry_data = load_registry_status(registry_path)
|
|
403
|
+
if registry_data and best:
|
|
404
|
+
exp_id = best.get("experiment_id", "")
|
|
405
|
+
for model in registry_data.get("models", []):
|
|
406
|
+
if model.get("exp_id") == exp_id:
|
|
407
|
+
lines.extend([
|
|
408
|
+
"",
|
|
409
|
+
"## Registry Status",
|
|
410
|
+
"",
|
|
411
|
+
f"- **Stage:** {model.get('stage', 'unregistered')}",
|
|
412
|
+
f"- **Version:** {model.get('version', 'N/A')}",
|
|
413
|
+
f"- **Registered:** {model.get('registered_at', 'N/A')[:10]}",
|
|
414
|
+
f"- **Gates passed:** {', '.join(model.get('gates_passed', [])) or 'none'}",
|
|
415
|
+
])
|
|
416
|
+
break
|
|
417
|
+
|
|
418
|
+
# --- Fairness ---
|
|
419
|
+
if include_fairness:
|
|
420
|
+
lines.extend([
|
|
421
|
+
"",
|
|
422
|
+
"## Fairness Analysis",
|
|
423
|
+
"",
|
|
424
|
+
])
|
|
425
|
+
if fairness_data:
|
|
426
|
+
fairness = compute_fairness_metrics(
|
|
427
|
+
predictions=fairness_data.get("predictions"),
|
|
428
|
+
labels=fairness_data.get("labels"),
|
|
429
|
+
protected_attribute=fairness_data.get("protected_attribute"),
|
|
430
|
+
group_names=fairness_data.get("group_names"),
|
|
431
|
+
)
|
|
432
|
+
if fairness:
|
|
433
|
+
lines.append("### Demographic Parity")
|
|
434
|
+
lines.append("")
|
|
435
|
+
for group, rate in fairness.get("group_positive_rates", {}).items():
|
|
436
|
+
lines.append(f"- **{group}:** {rate:.4f}")
|
|
437
|
+
lines.append(f"- **Parity difference:** {fairness['demographic_parity_difference']:.4f}")
|
|
438
|
+
|
|
439
|
+
if "group_true_positive_rates" in fairness:
|
|
440
|
+
lines.append("")
|
|
441
|
+
lines.append("### Equal Opportunity")
|
|
442
|
+
lines.append("")
|
|
443
|
+
for group, tpr in fairness["group_true_positive_rates"].items():
|
|
444
|
+
lines.append(f"- **{group}:** {tpr:.4f}")
|
|
445
|
+
lines.append(f"- **Opportunity difference:** {fairness['equal_opportunity_difference']:.4f}")
|
|
446
|
+
else:
|
|
447
|
+
lines.append("- Fairness analysis requested but insufficient data provided")
|
|
448
|
+
else:
|
|
449
|
+
lines.append("- Fairness analysis requested but no protected attribute data available")
|
|
450
|
+
lines.append("- Provide `--fairness-data` with predictions, labels, and protected attributes")
|
|
451
|
+
|
|
309
452
|
# --- Ethical Considerations ---
|
|
310
453
|
lines.extend([
|
|
311
454
|
"",
|
|
@@ -354,9 +497,17 @@ def main() -> None:
|
|
|
354
497
|
parser.add_argument("--log", default="experiments/log.jsonl", help="Path to experiment log")
|
|
355
498
|
parser.add_argument("--contract", default="model_contract.md", help="Path to model contract")
|
|
356
499
|
parser.add_argument("--output", default=None, help="Output path (default: print to stdout)")
|
|
500
|
+
parser.add_argument("--include", default=None, help="Include extra sections (e.g., 'fairness')")
|
|
501
|
+
parser.add_argument("--registry", default="experiments/registry.yaml", help="Path to model registry")
|
|
357
502
|
args = parser.parse_args()
|
|
358
503
|
|
|
359
|
-
|
|
504
|
+
include_fairness = args.include and "fairness" in args.include
|
|
505
|
+
|
|
506
|
+
card = generate_card(
|
|
507
|
+
args.config, args.log, args.contract, args.output,
|
|
508
|
+
include_fairness=include_fairness,
|
|
509
|
+
registry_path=args.registry,
|
|
510
|
+
)
|
|
360
511
|
if args.output:
|
|
361
512
|
print(f"Model card written to {args.output}")
|
|
362
513
|
else:
|
|
@@ -0,0 +1,466 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Harness self-diagnosis for the autoresearch pipeline.
|
|
3
|
+
|
|
4
|
+
Checks environment health, project integrity, resource availability,
|
|
5
|
+
and git state. Identifies common issues and auto-fixes where safe.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python scripts/harness_doctor.py
|
|
9
|
+
python scripts/harness_doctor.py --fix
|
|
10
|
+
python scripts/harness_doctor.py --verbose
|
|
11
|
+
python scripts/harness_doctor.py --json
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import argparse
|
|
17
|
+
import ast
|
|
18
|
+
import json
|
|
19
|
+
import shutil
|
|
20
|
+
import sys
|
|
21
|
+
from datetime import datetime, timezone
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
import yaml
|
|
25
|
+
|
|
26
|
+
from scripts.turing_io import load_config, load_experiments
|
|
27
|
+
|
|
28
|
+
DEFAULT_LOG_PATH = "experiments/log.jsonl"
|
|
29
|
+
MIN_DISK_MB = 1024 # 1 GB
|
|
30
|
+
|
|
31
|
+
REQUIRED_SCRIPTS = ["train.py", "prepare.py", "evaluate.py"]
|
|
32
|
+
REQUIRED_CONFIG_FIELDS = ["evaluation"]
|
|
33
|
+
|
|
34
|
+
CHECK_CATEGORIES = ["environment", "dependencies", "config", "experiment_log",
|
|
35
|
+
"scripts", "disk_space", "git_state"]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# --- Individual Checks ---
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def check_environment() -> dict:
|
|
42
|
+
"""Check Python environment health."""
|
|
43
|
+
issues = []
|
|
44
|
+
version = sys.version_info
|
|
45
|
+
|
|
46
|
+
if version < (3, 10):
|
|
47
|
+
issues.append(f"Python {version.major}.{version.minor} — recommend 3.10+")
|
|
48
|
+
|
|
49
|
+
# Check if running in a venv
|
|
50
|
+
in_venv = hasattr(sys, "real_prefix") or (hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix)
|
|
51
|
+
|
|
52
|
+
return {
|
|
53
|
+
"name": "Python environment",
|
|
54
|
+
"status": "PASS" if not issues else "WARN",
|
|
55
|
+
"detail": f"Python {version.major}.{version.minor}.{version.micro}, venv={'active' if in_venv else 'not active'}",
|
|
56
|
+
"issues": issues,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def check_dependencies(required: list[str] | None = None) -> dict:
|
|
61
|
+
"""Check that required packages are importable."""
|
|
62
|
+
if required is None:
|
|
63
|
+
required = ["yaml", "numpy", "sklearn", "pandas", "scipy"]
|
|
64
|
+
|
|
65
|
+
missing = []
|
|
66
|
+
for pkg in required:
|
|
67
|
+
try:
|
|
68
|
+
__import__(pkg)
|
|
69
|
+
except ImportError:
|
|
70
|
+
missing.append(pkg)
|
|
71
|
+
|
|
72
|
+
if missing:
|
|
73
|
+
return {
|
|
74
|
+
"name": "Dependencies",
|
|
75
|
+
"status": "FAIL",
|
|
76
|
+
"detail": f"{len(missing)} packages missing: {', '.join(missing)}",
|
|
77
|
+
"issues": [f"Cannot import: {pkg}" for pkg in missing],
|
|
78
|
+
"fix": f"pip install {' '.join(missing)}",
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return {
|
|
82
|
+
"name": "Dependencies",
|
|
83
|
+
"status": "PASS",
|
|
84
|
+
"detail": f"All {len(required)} packages importable",
|
|
85
|
+
"issues": [],
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def check_config(config_path: str = "config.yaml") -> dict:
|
|
90
|
+
"""Check config.yaml validity and required fields."""
|
|
91
|
+
path = Path(config_path)
|
|
92
|
+
issues = []
|
|
93
|
+
|
|
94
|
+
if not path.exists():
|
|
95
|
+
return {
|
|
96
|
+
"name": "Config",
|
|
97
|
+
"status": "FAIL",
|
|
98
|
+
"detail": f"{config_path} not found",
|
|
99
|
+
"issues": [f"{config_path} missing"],
|
|
100
|
+
"fix": "Run /turing:init to scaffold the project",
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
with open(path) as f:
|
|
105
|
+
config = yaml.safe_load(f)
|
|
106
|
+
except yaml.YAMLError as e:
|
|
107
|
+
return {
|
|
108
|
+
"name": "Config",
|
|
109
|
+
"status": "FAIL",
|
|
110
|
+
"detail": f"{config_path} has YAML parse error",
|
|
111
|
+
"issues": [str(e)],
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if not isinstance(config, dict):
|
|
115
|
+
return {
|
|
116
|
+
"name": "Config",
|
|
117
|
+
"status": "FAIL",
|
|
118
|
+
"detail": f"{config_path} is not a YAML mapping",
|
|
119
|
+
"issues": ["Config must be a YAML dict"],
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
for field in REQUIRED_CONFIG_FIELDS:
|
|
123
|
+
if field not in config:
|
|
124
|
+
issues.append(f"Missing required field: {field}")
|
|
125
|
+
|
|
126
|
+
status = "PASS" if not issues else "WARN"
|
|
127
|
+
return {
|
|
128
|
+
"name": "Config",
|
|
129
|
+
"status": status,
|
|
130
|
+
"detail": f"{config_path} valid, {len(config)} top-level keys",
|
|
131
|
+
"issues": issues,
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def check_experiment_log(log_path: str = DEFAULT_LOG_PATH) -> dict:
|
|
136
|
+
"""Check experiment log integrity."""
|
|
137
|
+
path = Path(log_path)
|
|
138
|
+
|
|
139
|
+
if not path.exists():
|
|
140
|
+
return {
|
|
141
|
+
"name": "Experiment log",
|
|
142
|
+
"status": "WARN",
|
|
143
|
+
"detail": "No experiment log yet — run /turing:train first",
|
|
144
|
+
"issues": [],
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
issues = []
|
|
148
|
+
total_lines = 0
|
|
149
|
+
valid_lines = 0
|
|
150
|
+
corrupt_lines = []
|
|
151
|
+
missing_fields = []
|
|
152
|
+
|
|
153
|
+
with open(path) as f:
|
|
154
|
+
for i, line in enumerate(f, 1):
|
|
155
|
+
total_lines += 1
|
|
156
|
+
line = line.strip()
|
|
157
|
+
if not line:
|
|
158
|
+
continue
|
|
159
|
+
try:
|
|
160
|
+
entry = json.loads(line)
|
|
161
|
+
valid_lines += 1
|
|
162
|
+
# Check for expected fields
|
|
163
|
+
if "metrics" not in entry:
|
|
164
|
+
missing_fields.append(i)
|
|
165
|
+
except json.JSONDecodeError:
|
|
166
|
+
corrupt_lines.append(i)
|
|
167
|
+
|
|
168
|
+
if corrupt_lines:
|
|
169
|
+
issues.append(f"{len(corrupt_lines)} corrupt lines: {corrupt_lines[:5]}")
|
|
170
|
+
if missing_fields:
|
|
171
|
+
issues.append(f"{len(missing_fields)} entries missing 'metrics' field")
|
|
172
|
+
|
|
173
|
+
status = "FAIL" if corrupt_lines else ("WARN" if missing_fields else "PASS")
|
|
174
|
+
return {
|
|
175
|
+
"name": "Experiment log",
|
|
176
|
+
"status": status,
|
|
177
|
+
"detail": f"{valid_lines}/{total_lines} valid entries",
|
|
178
|
+
"issues": issues,
|
|
179
|
+
"corrupt_lines": corrupt_lines,
|
|
180
|
+
"fixable": len(corrupt_lines) > 0,
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def check_scripts(script_dir: str = ".") -> dict:
|
|
185
|
+
"""Check that required scripts exist and are syntactically valid."""
|
|
186
|
+
issues = []
|
|
187
|
+
checked = 0
|
|
188
|
+
|
|
189
|
+
for script in REQUIRED_SCRIPTS:
|
|
190
|
+
path = Path(script_dir) / script
|
|
191
|
+
if not path.exists():
|
|
192
|
+
issues.append(f"{script} not found")
|
|
193
|
+
continue
|
|
194
|
+
|
|
195
|
+
try:
|
|
196
|
+
source = path.read_text(encoding="utf-8")
|
|
197
|
+
ast.parse(source, filename=script)
|
|
198
|
+
checked += 1
|
|
199
|
+
except SyntaxError as e:
|
|
200
|
+
issues.append(f"{script} has syntax error: {e.msg} (line {e.lineno})")
|
|
201
|
+
|
|
202
|
+
status = "PASS" if not issues else ("WARN" if checked > 0 else "FAIL")
|
|
203
|
+
return {
|
|
204
|
+
"name": "Scripts",
|
|
205
|
+
"status": status,
|
|
206
|
+
"detail": f"{checked}/{len(REQUIRED_SCRIPTS)} scripts valid",
|
|
207
|
+
"issues": issues,
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def check_disk_space(project_dir: str = ".", min_mb: int = MIN_DISK_MB) -> dict:
|
|
212
|
+
"""Check available disk space."""
|
|
213
|
+
try:
|
|
214
|
+
usage = shutil.disk_usage(project_dir)
|
|
215
|
+
free_mb = usage.free / (1024 * 1024)
|
|
216
|
+
total_mb = usage.total / (1024 * 1024)
|
|
217
|
+
|
|
218
|
+
if free_mb < min_mb:
|
|
219
|
+
return {
|
|
220
|
+
"name": "Disk space",
|
|
221
|
+
"status": "FAIL",
|
|
222
|
+
"detail": f"{free_mb:.0f} MB remaining — below {min_mb} MB threshold",
|
|
223
|
+
"issues": [f"Low disk space: {free_mb:.0f} MB free of {total_mb:.0f} MB"],
|
|
224
|
+
"fix": "Run /turing:archive to reclaim space",
|
|
225
|
+
"free_mb": round(free_mb),
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
return {
|
|
229
|
+
"name": "Disk space",
|
|
230
|
+
"status": "PASS",
|
|
231
|
+
"detail": f"{free_mb:.0f} MB free",
|
|
232
|
+
"issues": [],
|
|
233
|
+
"free_mb": round(free_mb),
|
|
234
|
+
}
|
|
235
|
+
except OSError as e:
|
|
236
|
+
return {
|
|
237
|
+
"name": "Disk space",
|
|
238
|
+
"status": "WARN",
|
|
239
|
+
"detail": f"Could not check disk: {e}",
|
|
240
|
+
"issues": [str(e)],
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def check_git_state(project_dir: str = ".") -> dict:
|
|
245
|
+
"""Check git working tree state."""
|
|
246
|
+
import subprocess
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
result = subprocess.run(
|
|
250
|
+
["git", "status", "--porcelain"],
|
|
251
|
+
capture_output=True, text=True, timeout=10,
|
|
252
|
+
cwd=project_dir,
|
|
253
|
+
)
|
|
254
|
+
if result.returncode != 0:
|
|
255
|
+
return {
|
|
256
|
+
"name": "Git state",
|
|
257
|
+
"status": "WARN",
|
|
258
|
+
"detail": "Not a git repository or git not available",
|
|
259
|
+
"issues": [],
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
modified = result.stdout.strip().split("\n") if result.stdout.strip() else []
|
|
263
|
+
issues = []
|
|
264
|
+
|
|
265
|
+
# Check if critical files are modified
|
|
266
|
+
critical = {"evaluate.py", "prepare.py"}
|
|
267
|
+
for line in modified:
|
|
268
|
+
if len(line) >= 3:
|
|
269
|
+
filepath = line[3:].strip()
|
|
270
|
+
if any(c in filepath for c in critical):
|
|
271
|
+
issues.append(f"Uncommitted changes to {filepath} — evaluation integrity at risk")
|
|
272
|
+
|
|
273
|
+
status = "WARN" if issues else "PASS"
|
|
274
|
+
detail = "Working tree clean" if not modified else f"{len(modified)} modified files"
|
|
275
|
+
|
|
276
|
+
return {
|
|
277
|
+
"name": "Git state",
|
|
278
|
+
"status": status,
|
|
279
|
+
"detail": detail,
|
|
280
|
+
"issues": issues,
|
|
281
|
+
}
|
|
282
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
283
|
+
return {
|
|
284
|
+
"name": "Git state",
|
|
285
|
+
"status": "WARN",
|
|
286
|
+
"detail": "Git check skipped (timeout or not available)",
|
|
287
|
+
"issues": [],
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
# --- Fix Operations ---
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def fix_corrupt_log(log_path: str = DEFAULT_LOG_PATH) -> dict:
|
|
295
|
+
"""Remove corrupt lines from experiment log."""
|
|
296
|
+
path = Path(log_path)
|
|
297
|
+
if not path.exists():
|
|
298
|
+
return {"fixed": False, "reason": "Log not found"}
|
|
299
|
+
|
|
300
|
+
valid_lines = []
|
|
301
|
+
removed = 0
|
|
302
|
+
|
|
303
|
+
with open(path) as f:
|
|
304
|
+
for line in f:
|
|
305
|
+
line_stripped = line.strip()
|
|
306
|
+
if not line_stripped:
|
|
307
|
+
continue
|
|
308
|
+
try:
|
|
309
|
+
json.loads(line_stripped)
|
|
310
|
+
valid_lines.append(line)
|
|
311
|
+
except json.JSONDecodeError:
|
|
312
|
+
removed += 1
|
|
313
|
+
|
|
314
|
+
if removed > 0:
|
|
315
|
+
# Backup first
|
|
316
|
+
backup = path.with_suffix(".jsonl.bak")
|
|
317
|
+
shutil.copy2(path, backup)
|
|
318
|
+
with open(path, "w") as f:
|
|
319
|
+
f.writelines(valid_lines)
|
|
320
|
+
return {"fixed": True, "removed": removed, "backup": str(backup)}
|
|
321
|
+
|
|
322
|
+
return {"fixed": False, "reason": "No corrupt lines found"}
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
# --- Full Doctor ---
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def run_doctor(
|
|
329
|
+
config_path: str = "config.yaml",
|
|
330
|
+
log_path: str = DEFAULT_LOG_PATH,
|
|
331
|
+
fix: bool = False,
|
|
332
|
+
verbose: bool = False,
|
|
333
|
+
) -> dict:
|
|
334
|
+
"""Run all diagnostic checks.
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
config_path: Path to config.yaml.
|
|
338
|
+
log_path: Path to experiment log.
|
|
339
|
+
fix: If True, auto-fix safe issues.
|
|
340
|
+
verbose: Include detailed info.
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
Doctor report with all check results and score.
|
|
344
|
+
"""
|
|
345
|
+
checks = [
|
|
346
|
+
check_environment(),
|
|
347
|
+
check_dependencies(),
|
|
348
|
+
check_config(config_path),
|
|
349
|
+
check_experiment_log(log_path),
|
|
350
|
+
check_scripts(),
|
|
351
|
+
check_disk_space(),
|
|
352
|
+
check_git_state(),
|
|
353
|
+
]
|
|
354
|
+
|
|
355
|
+
# Apply fixes if requested
|
|
356
|
+
fixes_applied = []
|
|
357
|
+
if fix:
|
|
358
|
+
log_check = next((c for c in checks if c["name"] == "Experiment log"), None)
|
|
359
|
+
if log_check and log_check.get("fixable"):
|
|
360
|
+
fix_result = fix_corrupt_log(log_path)
|
|
361
|
+
if fix_result.get("fixed"):
|
|
362
|
+
fixes_applied.append(f"Removed {fix_result['removed']} corrupt log entries (backup: {fix_result['backup']})")
|
|
363
|
+
# Re-run log check
|
|
364
|
+
for i, c in enumerate(checks):
|
|
365
|
+
if c["name"] == "Experiment log":
|
|
366
|
+
checks[i] = check_experiment_log(log_path)
|
|
367
|
+
break
|
|
368
|
+
|
|
369
|
+
# Compute score
|
|
370
|
+
passed = sum(1 for c in checks if c["status"] == "PASS")
|
|
371
|
+
warned = sum(1 for c in checks if c["status"] == "WARN")
|
|
372
|
+
failed = sum(1 for c in checks if c["status"] == "FAIL")
|
|
373
|
+
total = len(checks)
|
|
374
|
+
|
|
375
|
+
return {
|
|
376
|
+
"checks": checks,
|
|
377
|
+
"score": {"passed": passed, "warned": warned, "failed": failed, "total": total},
|
|
378
|
+
"fixes_applied": fixes_applied,
|
|
379
|
+
"overall": "HEALTHY" if failed == 0 and warned == 0 else ("DEGRADED" if failed == 0 else "UNHEALTHY"),
|
|
380
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
# --- Report Formatting ---
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def save_doctor_report(report: dict, output_dir: str = "experiments/doctor") -> Path:
|
|
388
|
+
"""Save doctor report to YAML."""
|
|
389
|
+
out_path = Path(output_dir)
|
|
390
|
+
out_path.mkdir(parents=True, exist_ok=True)
|
|
391
|
+
ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
|
|
392
|
+
filepath = out_path / f"doctor-{ts}.yaml"
|
|
393
|
+
with open(filepath, "w") as f:
|
|
394
|
+
yaml.dump(report, f, default_flow_style=False, sort_keys=False)
|
|
395
|
+
return filepath
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def format_doctor_report(report: dict) -> str:
|
|
399
|
+
"""Format doctor report as readable text."""
|
|
400
|
+
lines = ["# Turing Doctor Report", ""]
|
|
401
|
+
|
|
402
|
+
status_icons = {"PASS": "✓ PASS ", "WARN": "⚠ WARN ", "FAIL": "✗ FAIL "}
|
|
403
|
+
|
|
404
|
+
for check in report.get("checks", []):
|
|
405
|
+
icon = status_icons.get(check["status"], "? ")
|
|
406
|
+
lines.append(f"{icon} {check['name']} ({check.get('detail', '')})")
|
|
407
|
+
for issue in check.get("issues", []):
|
|
408
|
+
lines.append(f" {issue}")
|
|
409
|
+
fix = check.get("fix")
|
|
410
|
+
if fix:
|
|
411
|
+
lines.append(f" Fix: {fix}")
|
|
412
|
+
|
|
413
|
+
score = report.get("score", {})
|
|
414
|
+
lines.extend([
|
|
415
|
+
"",
|
|
416
|
+
f"Score: {score.get('passed', 0)}/{score.get('total', 0)} pass, "
|
|
417
|
+
f"{score.get('warned', 0)} warning{'s' if score.get('warned', 0) != 1 else ''}, "
|
|
418
|
+
f"{score.get('failed', 0)} failure{'s' if score.get('failed', 0) != 1 else ''}",
|
|
419
|
+
f"Overall: {report.get('overall', 'UNKNOWN')}",
|
|
420
|
+
])
|
|
421
|
+
|
|
422
|
+
fixes = report.get("fixes_applied", [])
|
|
423
|
+
if fixes:
|
|
424
|
+
lines.extend(["", "Fixes applied:"])
|
|
425
|
+
for f in fixes:
|
|
426
|
+
lines.append(f" - {f}")
|
|
427
|
+
|
|
428
|
+
lines.append("")
|
|
429
|
+
lines.append(f"*Generated: {report.get('generated_at', 'N/A')}*")
|
|
430
|
+
return "\n".join(lines)
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
# --- CLI ---
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def main():
|
|
437
|
+
parser = argparse.ArgumentParser(
|
|
438
|
+
description="Harness self-diagnosis — check environment, project, and resource health"
|
|
439
|
+
)
|
|
440
|
+
parser.add_argument("--fix", action="store_true", help="Auto-fix safe issues")
|
|
441
|
+
parser.add_argument("--verbose", action="store_true", help="Show detailed info")
|
|
442
|
+
parser.add_argument("--config", default="config.yaml", help="Path to config.yaml")
|
|
443
|
+
parser.add_argument("--log", default=DEFAULT_LOG_PATH, help="Path to experiment log")
|
|
444
|
+
parser.add_argument("--json", action="store_true", help="Output raw JSON")
|
|
445
|
+
|
|
446
|
+
args = parser.parse_args()
|
|
447
|
+
|
|
448
|
+
report = run_doctor(
|
|
449
|
+
config_path=args.config,
|
|
450
|
+
log_path=args.log,
|
|
451
|
+
fix=args.fix,
|
|
452
|
+
verbose=args.verbose,
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
if args.json:
|
|
456
|
+
print(json.dumps(report, indent=2))
|
|
457
|
+
else:
|
|
458
|
+
print(format_doctor_report(report))
|
|
459
|
+
|
|
460
|
+
saved = save_doctor_report(report)
|
|
461
|
+
if not args.json:
|
|
462
|
+
print(f"\nSaved: {saved}")
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
if __name__ == "__main__":
|
|
466
|
+
main()
|