claude-turing 4.0.0 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +2 -2
- package/README.md +8 -2
- package/commands/counterfactual.md +27 -0
- package/commands/onboard.md +20 -0
- package/commands/review.md +20 -0
- package/commands/share.md +20 -0
- package/commands/simulate.md +28 -0
- package/commands/turing.md +12 -0
- package/commands/whatif.md +31 -0
- package/package.json +1 -1
- package/src/install.js +2 -0
- package/src/verify.js +6 -0
- package/templates/scripts/__pycache__/counterfactual_explanation.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_simulator.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/generate_onboarding.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/package_experiments.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/simulate_review.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/whatif_engine.cpython-314.pyc +0 -0
- package/templates/scripts/counterfactual_explanation.py +485 -0
- package/templates/scripts/experiment_simulator.py +463 -0
- package/templates/scripts/generate_brief.py +64 -0
- package/templates/scripts/generate_onboarding.py +284 -0
- package/templates/scripts/package_experiments.py +285 -0
- package/templates/scripts/scaffold.py +11 -0
- package/templates/scripts/simulate_review.py +342 -0
- package/templates/scripts/whatif_engine.py +763 -0
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Project onboarding generator for new collaborators.
|
|
3
|
+
|
|
4
|
+
Reads config, experiments, annotations, and hypotheses to produce a
|
|
5
|
+
structured walkthrough: task description, what's been tried (grouped
|
|
6
|
+
by family), key decisions, where heading, how to start.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python scripts/generate_onboarding.py --audience researcher --depth full
|
|
10
|
+
python scripts/generate_onboarding.py --audience stakeholder --depth brief --json
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import argparse
|
|
15
|
+
import json
|
|
16
|
+
import sys
|
|
17
|
+
from datetime import datetime, timezone
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
import yaml
|
|
21
|
+
from scripts.turing_io import load_config, load_experiments, load_hypotheses
|
|
22
|
+
|
|
23
|
+
VALID_AUDIENCES = ["researcher", "engineer", "stakeholder"]
|
|
24
|
+
VALID_DEPTHS = ["brief", "full"]
|
|
25
|
+
DEFAULT_LOG = "experiments/log.jsonl"
|
|
26
|
+
DEFAULT_ANNOTATIONS = "experiments/annotations.yaml"
|
|
27
|
+
DEFAULT_HYPOTHESES = "hypotheses.yaml"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _load_yaml_list(path: str) -> list[dict]:
|
|
31
|
+
p = Path(path)
|
|
32
|
+
if not p.exists() or p.stat().st_size == 0:
|
|
33
|
+
return []
|
|
34
|
+
with open(p) as f:
|
|
35
|
+
data = yaml.safe_load(f)
|
|
36
|
+
return data if isinstance(data, list) else []
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _load_yaml_dir(directory: str, glob: str) -> list[dict]:
|
|
40
|
+
path = Path(directory)
|
|
41
|
+
if not path.exists():
|
|
42
|
+
return []
|
|
43
|
+
items = []
|
|
44
|
+
for f in sorted(path.glob(glob)):
|
|
45
|
+
try:
|
|
46
|
+
with open(f) as fh:
|
|
47
|
+
d = yaml.safe_load(fh)
|
|
48
|
+
if d and isinstance(d, dict):
|
|
49
|
+
items.append(d)
|
|
50
|
+
except (yaml.YAMLError, OSError):
|
|
51
|
+
continue
|
|
52
|
+
return items
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _family_summary(exps: list[dict], metric: str, lower_is_better: bool) -> dict:
|
|
56
|
+
total = len(exps)
|
|
57
|
+
kept = [e for e in exps if e.get("status") == "kept"]
|
|
58
|
+
best_val, best_id = None, None
|
|
59
|
+
for e in kept:
|
|
60
|
+
val = e.get("metrics", {}).get(metric)
|
|
61
|
+
if val is None:
|
|
62
|
+
continue
|
|
63
|
+
if best_val is None or (lower_is_better and val < best_val) or (not lower_is_better and val > best_val):
|
|
64
|
+
best_val, best_id = val, e.get("experiment_id")
|
|
65
|
+
return {"total": total, "kept": len(kept), "keep_rate": round(len(kept) / total, 2) if total else 0,
|
|
66
|
+
"best_metric": best_val, "best_experiment": best_id}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _find_best(experiments: list[dict], metric: str, lower_is_better: bool) -> dict | None:
|
|
70
|
+
best, best_val = None, float("inf") if lower_is_better else float("-inf")
|
|
71
|
+
for e in experiments:
|
|
72
|
+
if e.get("status") != "kept":
|
|
73
|
+
continue
|
|
74
|
+
val = e.get("metrics", {}).get(metric)
|
|
75
|
+
if val is not None and ((lower_is_better and val < best_val) or (not lower_is_better and val > best_val)):
|
|
76
|
+
best_val, best = val, e
|
|
77
|
+
return best
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _extract_decisions(packets: list[dict], annotations: list[dict]) -> list[dict]:
|
|
81
|
+
decisions = []
|
|
82
|
+
for pkt in packets:
|
|
83
|
+
if pkt.get("action") in ("promote", "abandon", "replicate"):
|
|
84
|
+
decisions.append({"type": "decision", "experiment": pkt.get("experiment_id", "?"),
|
|
85
|
+
"action": pkt["action"], "reason": pkt.get("reason", ""),
|
|
86
|
+
"date": pkt.get("timestamp", "")[:10]})
|
|
87
|
+
key_tags = {"decision", "key", "important", "milestone"}
|
|
88
|
+
for ann in annotations:
|
|
89
|
+
if set(t.lower() for t in ann.get("tags", [])) & key_tags:
|
|
90
|
+
decisions.append({"type": "annotation", "experiment": ann.get("experiment_id", "?"),
|
|
91
|
+
"text": ann.get("text", ""), "date": ann.get("date", "")[:10]})
|
|
92
|
+
decisions.sort(key=lambda d: d.get("date", ""), reverse=True)
|
|
93
|
+
return decisions
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _project_direction(hypotheses: list[dict], experiments: list[dict]) -> dict:
|
|
97
|
+
queued = [h for h in hypotheses if h.get("status") == "queued"]
|
|
98
|
+
promising = [h for h in hypotheses if h.get("status") == "promising"]
|
|
99
|
+
recent = experiments[-5:] if len(experiments) >= 5 else experiments
|
|
100
|
+
recent_kept = sum(1 for e in recent if e.get("status") == "kept")
|
|
101
|
+
if not experiments:
|
|
102
|
+
phase = "not_started"
|
|
103
|
+
elif not queued and not promising:
|
|
104
|
+
phase = "exhausted"
|
|
105
|
+
elif recent_kept == 0 and len(recent) >= 3:
|
|
106
|
+
phase = "plateaued"
|
|
107
|
+
elif promising:
|
|
108
|
+
phase = "promising_leads"
|
|
109
|
+
else:
|
|
110
|
+
phase = "active_exploration"
|
|
111
|
+
return {"phase": phase, "queued": queued[:5], "promising": promising[:3],
|
|
112
|
+
"n_queued": len(queued), "n_promising": len(promising)}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def format_onboarding_report(config, experiments, families, best, decisions,
|
|
116
|
+
direction, annotations, seeds, audience, depth,
|
|
117
|
+
metric, lower_is_better) -> str:
|
|
118
|
+
d = "lower" if lower_is_better else "higher"
|
|
119
|
+
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
|
120
|
+
L = ["# Project Onboarding", "", f"*Generated {now} for audience: {audience}*", "", "---", "",
|
|
121
|
+
"## 1. What This Project Does", ""]
|
|
122
|
+
data_cfg, eval_cfg = config.get("data", {}), config.get("evaluation", {})
|
|
123
|
+
L.append(f"**Task:** {config.get('task_description', 'N/A')}")
|
|
124
|
+
L.append(f"**Dataset:** {data_cfg.get('source', 'unknown')}")
|
|
125
|
+
L.append(f"**Primary metric:** `{metric}` ({d} is better)")
|
|
126
|
+
extra = [m for m in eval_cfg.get("metrics", []) if m != metric]
|
|
127
|
+
if extra:
|
|
128
|
+
L.append(f"**Additional metrics:** {', '.join(f'`{m}`' for m in extra)}")
|
|
129
|
+
if depth == "full" and audience != "stakeholder":
|
|
130
|
+
sr = data_cfg.get("split_ratios", {})
|
|
131
|
+
if sr:
|
|
132
|
+
L.append(f"**Data splits:** {' / '.join(f'{k}: {int(v*100)}%' for k, v in sr.items())}")
|
|
133
|
+
if data_cfg.get("target_column"):
|
|
134
|
+
L.append(f"**Target column:** `{data_cfg['target_column']}`")
|
|
135
|
+
L.extend(["", "## 2. What's Been Tried", ""])
|
|
136
|
+
total, kept_n = len(experiments), sum(1 for e in experiments if e.get("status") == "kept")
|
|
137
|
+
if total == 0:
|
|
138
|
+
L.append("No experiments yet. Start with `/turing:train`.")
|
|
139
|
+
else:
|
|
140
|
+
L.append(f"**{total} experiments**, **{kept_n} kept** ({round(kept_n/total*100)}% keep rate).")
|
|
141
|
+
L.append("")
|
|
142
|
+
if best:
|
|
143
|
+
ms = ", ".join(f"{k}={v:.4f}" if isinstance(v, float) else f"{k}={v}"
|
|
144
|
+
for k, v in best.get("metrics", {}).items())
|
|
145
|
+
L.extend([f"**Champion:** `{best.get('experiment_id','?')}` "
|
|
146
|
+
f"({best.get('config',{}).get('model_type','?')}) — {ms}", ""])
|
|
147
|
+
L.extend(["### By Family", "", "| Family | Exps | Kept | Best | Status |",
|
|
148
|
+
"|--------|------|------|------|--------|"])
|
|
149
|
+
for name, s in sorted(families.items()):
|
|
150
|
+
bv = f"{s['best_metric']:.4f}" if s["best_metric"] is not None else "---"
|
|
151
|
+
st = "Exhausted" if s["keep_rate"] == 0 and s["total"] >= 3 else (
|
|
152
|
+
"Productive" if s["keep_rate"] >= 0.5 else "Mixed")
|
|
153
|
+
L.append(f"| {name} | {s['total']} | {s['kept']} | {bv} | {st} |")
|
|
154
|
+
L.append("")
|
|
155
|
+
if depth == "full" and audience in ("researcher", "engineer"):
|
|
156
|
+
for name, s in sorted(families.items()):
|
|
157
|
+
if not s["total"]:
|
|
158
|
+
continue
|
|
159
|
+
L.append(f"#### {name}")
|
|
160
|
+
if s["best_experiment"]:
|
|
161
|
+
L.append(f"- Best: `{s['best_experiment']}` ({metric}={s['best_metric']:.4f})")
|
|
162
|
+
L.append(f"- {s['kept']}/{s['total']} kept ({s['keep_rate']:.0%})")
|
|
163
|
+
fam_ids = {e.get("experiment_id") for e in experiments if (e.get("family") or "untagged") == name}
|
|
164
|
+
notes = [a for a in annotations if a.get("experiment_id") in fam_ids]
|
|
165
|
+
for n in notes[:3]:
|
|
166
|
+
L.append(f" - {n.get('text','')[:80]}")
|
|
167
|
+
L.append("")
|
|
168
|
+
L.extend(["## 3. Key Decisions", ""])
|
|
169
|
+
if not decisions:
|
|
170
|
+
L.append("No major decisions recorded yet.")
|
|
171
|
+
else:
|
|
172
|
+
lim = 5 if depth == "brief" else 15
|
|
173
|
+
for dec in decisions[:lim]:
|
|
174
|
+
if dec["type"] == "decision":
|
|
175
|
+
L.append(f"- **{dec['date']}** `{dec['experiment']}`: **{dec['action']}** — {dec['reason']}")
|
|
176
|
+
else:
|
|
177
|
+
L.append(f"- **{dec['date']}** `{dec['experiment']}`: {dec['text'][:100]}")
|
|
178
|
+
if len(decisions) > lim:
|
|
179
|
+
L.append(f" *...and {len(decisions)-lim} more*")
|
|
180
|
+
L.extend(["", "## 4. Where We're Heading", ""])
|
|
181
|
+
phases = {"not_started": "Project has not started experiments yet.",
|
|
182
|
+
"exhausted": "All hypotheses tested. Need fresh ideas.",
|
|
183
|
+
"plateaued": "Recent experiments not improving. Consider pivoting.",
|
|
184
|
+
"promising_leads": "Promising directions identified and being pursued.",
|
|
185
|
+
"active_exploration": "Actively exploring hypothesis space."}
|
|
186
|
+
L.extend([phases.get(direction["phase"], "Unknown phase."), ""])
|
|
187
|
+
if direction["n_queued"]:
|
|
188
|
+
L.append(f"**{direction['n_queued']} hypotheses queued:**")
|
|
189
|
+
for h in direction["queued"]:
|
|
190
|
+
p = " (HIGH)" if h.get("priority") == "high" else ""
|
|
191
|
+
L.append(f"- {h.get('id','?')}: {h.get('description','?')}{p}")
|
|
192
|
+
L.append("")
|
|
193
|
+
if direction["n_promising"]:
|
|
194
|
+
L.append(f"**{direction['n_promising']} promising lead(s):**")
|
|
195
|
+
for h in direction["promising"]:
|
|
196
|
+
L.append(f"- {h.get('id','?')}: {h.get('description','?')}")
|
|
197
|
+
L.append("")
|
|
198
|
+
sensitive = [s for s in seeds if s.get("seed_sensitive")]
|
|
199
|
+
if sensitive and audience != "stakeholder":
|
|
200
|
+
L.append("**Seed sensitivity warnings:**")
|
|
201
|
+
for s in sensitive:
|
|
202
|
+
L.append(f"- `{s.get('experiment_id','?')}`: CV={s.get('cv_percent',0):.1f}%")
|
|
203
|
+
L.append("")
|
|
204
|
+
L.extend(["## 5. How to Get Started", ""])
|
|
205
|
+
cmds = {"researcher": [
|
|
206
|
+
"1. Read `config.yaml` for task and evaluation setup",
|
|
207
|
+
"2. `/turing:status` — current experiment state",
|
|
208
|
+
"3. `/turing:brief` — full research intelligence report",
|
|
209
|
+
"4. Review `hypotheses.yaml` for queued ideas",
|
|
210
|
+
"5. `/turing:try \"your hypothesis\"` — inject ideas",
|
|
211
|
+
"6. `/turing:train` — run next experiment",
|
|
212
|
+
], "engineer": [
|
|
213
|
+
"1. `pip install -r requirements.txt`",
|
|
214
|
+
"2. Review `config.yaml` for data paths",
|
|
215
|
+
"3. `/turing:status` — where things stand",
|
|
216
|
+
"4. Check `train.py` for current model",
|
|
217
|
+
"5. `/turing:train` — execute experiments",
|
|
218
|
+
], "stakeholder": [
|
|
219
|
+
"1. `/turing:brief` — high-level summary",
|
|
220
|
+
"2. Check champion performance above",
|
|
221
|
+
"3. Review 'Where We're Heading' for next steps",
|
|
222
|
+
]}
|
|
223
|
+
L.extend(cmds.get(audience, []))
|
|
224
|
+
L.extend(["", "---", f"*Generated by `/turing:onboard` — {audience}, {depth}*"])
|
|
225
|
+
return "\n".join(L)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def save_onboarding_report(content: str, path: str = "ONBOARDING.md") -> Path:
|
|
229
|
+
p = Path(path)
|
|
230
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
231
|
+
p.write_text(content)
|
|
232
|
+
return p
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def generate_onboarding(config_path="config.yaml", log_path=DEFAULT_LOG,
|
|
236
|
+
hypotheses_path=DEFAULT_HYPOTHESES,
|
|
237
|
+
annotations_path=DEFAULT_ANNOTATIONS,
|
|
238
|
+
audience="researcher", depth="full") -> dict:
|
|
239
|
+
"""Generate full onboarding report. Returns dict with report and metadata."""
|
|
240
|
+
config = load_config(config_path)
|
|
241
|
+
metric = config.get("evaluation", {}).get("primary_metric", "accuracy")
|
|
242
|
+
lower = config.get("evaluation", {}).get("lower_is_better", False)
|
|
243
|
+
experiments = load_experiments(log_path)
|
|
244
|
+
hypotheses = load_hypotheses(hypotheses_path)
|
|
245
|
+
annotations = _load_yaml_list(annotations_path)
|
|
246
|
+
packets = _load_yaml_dir("experiments/decisions", "*.yaml")
|
|
247
|
+
seeds = _load_yaml_dir("experiments/seed_studies", "*-seeds.yaml")
|
|
248
|
+
fam_groups = {}
|
|
249
|
+
for e in experiments:
|
|
250
|
+
fam_groups.setdefault(e.get("family") or "untagged", []).append(e)
|
|
251
|
+
families = {n: _family_summary(exps, metric, lower) for n, exps in fam_groups.items()}
|
|
252
|
+
best = _find_best(experiments, metric, lower)
|
|
253
|
+
decisions = _extract_decisions(packets, annotations)
|
|
254
|
+
direction = _project_direction(hypotheses, experiments)
|
|
255
|
+
report = format_onboarding_report(config, experiments, families, best, decisions,
|
|
256
|
+
direction, annotations, seeds, audience, depth, metric, lower)
|
|
257
|
+
return {"timestamp": datetime.now(timezone.utc).isoformat(), "audience": audience,
|
|
258
|
+
"depth": depth, "total_experiments": len(experiments),
|
|
259
|
+
"project_phase": direction["phase"], "report": report}
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def main() -> None:
|
|
263
|
+
parser = argparse.ArgumentParser(description="Generate project onboarding for new collaborators")
|
|
264
|
+
parser.add_argument("--config", default="config.yaml")
|
|
265
|
+
parser.add_argument("--log", default=DEFAULT_LOG)
|
|
266
|
+
parser.add_argument("--hypotheses", default=DEFAULT_HYPOTHESES)
|
|
267
|
+
parser.add_argument("--annotations", default=DEFAULT_ANNOTATIONS)
|
|
268
|
+
parser.add_argument("--audience", default="researcher", choices=VALID_AUDIENCES)
|
|
269
|
+
parser.add_argument("--depth", default="full", choices=VALID_DEPTHS)
|
|
270
|
+
parser.add_argument("--output", default="ONBOARDING.md")
|
|
271
|
+
parser.add_argument("--json", action="store_true", help="Output raw JSON")
|
|
272
|
+
args = parser.parse_args()
|
|
273
|
+
result = generate_onboarding(args.config, args.log, args.hypotheses,
|
|
274
|
+
args.annotations, args.audience, args.depth)
|
|
275
|
+
if args.json:
|
|
276
|
+
print(json.dumps(result, indent=2, default=str))
|
|
277
|
+
else:
|
|
278
|
+
saved = save_onboarding_report(result["report"], args.output)
|
|
279
|
+
print(result["report"])
|
|
280
|
+
print(f"\nSaved to {saved}", file=sys.stderr)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
if __name__ == "__main__":
|
|
284
|
+
main()
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Package experiments into portable archive for sharing.
|
|
3
|
+
|
|
4
|
+
Collects config, metrics, seed studies, annotations, decision packets
|
|
5
|
+
per experiment. Generates manifest.yaml and README.md inside the
|
|
6
|
+
package directory. Does NOT create tar.gz -- just organizes files.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python scripts/package_experiments.py
|
|
10
|
+
python scripts/package_experiments.py --experiments exp-042,exp-043
|
|
11
|
+
python scripts/package_experiments.py --include model,data-hash,figures,code
|
|
12
|
+
python scripts/package_experiments.py --json
|
|
13
|
+
"""
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import argparse
|
|
17
|
+
import hashlib
|
|
18
|
+
import json
|
|
19
|
+
import shutil
|
|
20
|
+
import sys
|
|
21
|
+
from datetime import datetime, timezone
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
import yaml
|
|
25
|
+
from scripts.turing_io import load_config, load_experiments
|
|
26
|
+
|
|
27
|
+
DEFAULT_LOG = "experiments/log.jsonl"
|
|
28
|
+
DEFAULT_OUTPUT = "exports/packages"
|
|
29
|
+
VALID_INCLUDES = ["model", "data-hash", "figures", "code"]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _load_yaml_list(path: str) -> list[dict]:
|
|
33
|
+
p = Path(path)
|
|
34
|
+
if not p.exists() or p.stat().st_size == 0:
|
|
35
|
+
return []
|
|
36
|
+
with open(p) as f:
|
|
37
|
+
data = yaml.safe_load(f)
|
|
38
|
+
return data if isinstance(data, list) else []
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _load_yaml_file(path: Path) -> dict | None:
|
|
42
|
+
if not path.exists():
|
|
43
|
+
return None
|
|
44
|
+
try:
|
|
45
|
+
with open(path) as f:
|
|
46
|
+
d = yaml.safe_load(f)
|
|
47
|
+
return d if isinstance(d, dict) else None
|
|
48
|
+
except (yaml.YAMLError, OSError):
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _file_hash(filepath: str) -> str | None:
|
|
53
|
+
p = Path(filepath)
|
|
54
|
+
if not p.exists():
|
|
55
|
+
return None
|
|
56
|
+
h = hashlib.sha256()
|
|
57
|
+
with open(p, "rb") as f:
|
|
58
|
+
for chunk in iter(lambda: f.read(8192), b""):
|
|
59
|
+
h.update(chunk)
|
|
60
|
+
return h.hexdigest()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def collect_experiment_artifacts(exp: dict, includes: list[str]) -> dict:
|
|
64
|
+
"""Collect all artifacts for a single experiment."""
|
|
65
|
+
eid = exp.get("experiment_id", "unknown")
|
|
66
|
+
art: dict = {"experiment_id": eid, "status": exp.get("status", "unknown"),
|
|
67
|
+
"metrics": exp.get("metrics", {}), "config": exp.get("config", {}),
|
|
68
|
+
"description": exp.get("description", ""), "timestamp": exp.get("timestamp", ""),
|
|
69
|
+
"family": exp.get("family")}
|
|
70
|
+
# Seed study
|
|
71
|
+
seed = _load_yaml_file(Path(f"experiments/seed_studies/{eid}-seeds.yaml"))
|
|
72
|
+
if seed:
|
|
73
|
+
art["seed_study"] = {"mean": seed.get("mean"), "std": seed.get("std"),
|
|
74
|
+
"cv_percent": seed.get("cv_percent"),
|
|
75
|
+
"seed_sensitive": seed.get("seed_sensitive", False)}
|
|
76
|
+
# Decision packet
|
|
77
|
+
dec = _load_yaml_file(Path(f"experiments/decisions/{eid}-decision.yaml"))
|
|
78
|
+
if dec:
|
|
79
|
+
art["decision"] = {"action": dec.get("action"), "reason": dec.get("reason", "")}
|
|
80
|
+
# Ablation
|
|
81
|
+
abl = _load_yaml_file(Path(f"experiments/ablations/{eid}-ablation.yaml"))
|
|
82
|
+
if abl:
|
|
83
|
+
art["ablation"] = {"metric": abl.get("metric"),
|
|
84
|
+
"n_ablations": len(abl.get("results", []))}
|
|
85
|
+
# Reproduction
|
|
86
|
+
repro = _load_yaml_file(Path(f"experiments/reproductions/{eid}-repro.yaml"))
|
|
87
|
+
if repro:
|
|
88
|
+
art["reproduction"] = {"verdict": repro.get("verdict"), "reason": repro.get("reason", "")}
|
|
89
|
+
# Optional includes
|
|
90
|
+
if "model" in includes:
|
|
91
|
+
for pat in [f"models/{eid}", f"models/{eid}.*", f"checkpoints/{eid}/*"]:
|
|
92
|
+
matches = list(Path(".").glob(pat))
|
|
93
|
+
if matches:
|
|
94
|
+
art["model_path"] = str(matches[0])
|
|
95
|
+
break
|
|
96
|
+
if "data-hash" in includes:
|
|
97
|
+
dp = exp.get("config", {}).get("data", {}).get("path")
|
|
98
|
+
if dp:
|
|
99
|
+
h = _file_hash(dp)
|
|
100
|
+
if h:
|
|
101
|
+
art["data_hash"] = h
|
|
102
|
+
if "figures" in includes:
|
|
103
|
+
fig_dir = Path(f"experiments/figures/{eid}")
|
|
104
|
+
art["figures"] = [str(f) for f in fig_dir.glob("*") if f.is_file()] if fig_dir.exists() else []
|
|
105
|
+
if "code" in includes:
|
|
106
|
+
art["train_py_hash"] = _file_hash("train.py")
|
|
107
|
+
snap = Path(f"experiments/code/{eid}")
|
|
108
|
+
if snap.exists():
|
|
109
|
+
art["code_snapshot_path"] = str(snap)
|
|
110
|
+
return art
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def build_manifest(name: str, config: dict, artifacts: list[dict], includes: list[str]) -> dict:
|
|
114
|
+
eval_cfg = config.get("evaluation", {})
|
|
115
|
+
return {
|
|
116
|
+
"package": {"name": name, "created": datetime.now(timezone.utc).isoformat(),
|
|
117
|
+
"generator": "turing:share", "version": "1.0"},
|
|
118
|
+
"project": {"task": config.get("task_description", ""),
|
|
119
|
+
"primary_metric": eval_cfg.get("primary_metric", "accuracy"),
|
|
120
|
+
"lower_is_better": eval_cfg.get("lower_is_better", False)},
|
|
121
|
+
"contents": {"experiments": len(artifacts), "includes": includes,
|
|
122
|
+
"has_seed_studies": any(a.get("seed_study") for a in artifacts),
|
|
123
|
+
"has_decisions": any(a.get("decision") for a in artifacts)},
|
|
124
|
+
"experiments": [{"id": a["experiment_id"], "status": a["status"],
|
|
125
|
+
"family": a.get("family"), "metrics": a["metrics"]} for a in artifacts],
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def build_package_readme(config: dict, artifacts: list[dict], manifest: dict) -> str:
|
|
130
|
+
metric = manifest["project"]["primary_metric"]
|
|
131
|
+
d = "lower" if manifest["project"]["lower_is_better"] else "higher"
|
|
132
|
+
L = [f"# Experiment Package: {manifest['package']['name']}", "",
|
|
133
|
+
f"*Packaged {manifest['package']['created'][:19]} UTC*", "",
|
|
134
|
+
"## Project", "", f"- **Task:** {config.get('task_description', 'N/A')}",
|
|
135
|
+
f"- **Primary metric:** `{metric}` ({d} is better)", "", "## Experiments", "",
|
|
136
|
+
f"| ID | Status | Family | {metric} |",
|
|
137
|
+
f"|----|--------|--------|{'---'*max(len(metric)//3,1)}--|"]
|
|
138
|
+
for a in artifacts:
|
|
139
|
+
v = a.get("metrics", {}).get(metric)
|
|
140
|
+
vs = f"{v:.4f}" if isinstance(v, (int, float)) else "---"
|
|
141
|
+
L.append(f"| {a['experiment_id']} | {a['status']} | {a.get('family','---')} | {vs} |")
|
|
142
|
+
seeds = [a for a in artifacts if a.get("seed_study")]
|
|
143
|
+
if seeds:
|
|
144
|
+
L.extend(["", "## Seed Studies", ""])
|
|
145
|
+
for a in seeds:
|
|
146
|
+
s = a["seed_study"]
|
|
147
|
+
tag = "SEED-SENSITIVE" if s["seed_sensitive"] else "stable"
|
|
148
|
+
L.append(f"- `{a['experiment_id']}`: mean={s['mean']:.4f} +/- {s['std']:.4f} [{tag}]")
|
|
149
|
+
decs = [a for a in artifacts if a.get("decision")]
|
|
150
|
+
if decs:
|
|
151
|
+
L.extend(["", "## Decisions", ""])
|
|
152
|
+
for a in decs:
|
|
153
|
+
L.append(f"- `{a['experiment_id']}`: **{a['decision']['action']}** — {a['decision']['reason']}")
|
|
154
|
+
L.extend(["", "## Files", "", "- `manifest.yaml` — Machine-readable manifest",
|
|
155
|
+
"- `README.md` — This file", "- `experiments/` — Per-experiment artifacts",
|
|
156
|
+
"- `config.yaml` — Project config snapshot", "", "---",
|
|
157
|
+
"*Generated by `/turing:share`*"])
|
|
158
|
+
return "\n".join(L)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def write_package(pkg_dir: Path, config: dict, artifacts: list[dict],
|
|
162
|
+
manifest: dict, readme: str, includes: list[str]) -> None:
|
|
163
|
+
"""Write all package files to the directory."""
|
|
164
|
+
pkg_dir.mkdir(parents=True, exist_ok=True)
|
|
165
|
+
with open(pkg_dir / "manifest.yaml", "w") as f:
|
|
166
|
+
yaml.dump(manifest, f, default_flow_style=False, sort_keys=False)
|
|
167
|
+
(pkg_dir / "README.md").write_text(readme)
|
|
168
|
+
with open(pkg_dir / "config.yaml", "w") as f:
|
|
169
|
+
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
|
|
170
|
+
exp_dir = pkg_dir / "experiments"
|
|
171
|
+
exp_dir.mkdir(exist_ok=True)
|
|
172
|
+
for art in artifacts:
|
|
173
|
+
sub = exp_dir / art["experiment_id"]
|
|
174
|
+
sub.mkdir(exist_ok=True)
|
|
175
|
+
with open(sub / "artifact.yaml", "w") as f:
|
|
176
|
+
yaml.dump(art, f, default_flow_style=False, sort_keys=False)
|
|
177
|
+
if "figures" in includes and art.get("figures"):
|
|
178
|
+
fd = sub / "figures"
|
|
179
|
+
fd.mkdir(exist_ok=True)
|
|
180
|
+
for fp in art["figures"]:
|
|
181
|
+
src = Path(fp)
|
|
182
|
+
if src.exists():
|
|
183
|
+
shutil.copy2(src, fd / src.name)
|
|
184
|
+
if "code" in includes and art.get("code_snapshot_path"):
|
|
185
|
+
cs = Path(art["code_snapshot_path"])
|
|
186
|
+
if cs.exists() and cs.is_dir():
|
|
187
|
+
shutil.copytree(cs, sub / "code", dirs_exist_ok=True)
|
|
188
|
+
if "model" in includes and art.get("model_path"):
|
|
189
|
+
ms = Path(art["model_path"])
|
|
190
|
+
if ms.exists():
|
|
191
|
+
md = sub / "model"
|
|
192
|
+
md.mkdir(exist_ok=True)
|
|
193
|
+
if ms.is_dir():
|
|
194
|
+
shutil.copytree(ms, md, dirs_exist_ok=True)
|
|
195
|
+
else:
|
|
196
|
+
shutil.copy2(ms, md / ms.name)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def save_package_report(result: dict, pkg_dir: Path) -> Path:
|
|
200
|
+
rp = pkg_dir / "package-report.yaml"
|
|
201
|
+
with open(rp, "w") as f:
|
|
202
|
+
yaml.dump({"timestamp": result["timestamp"], "package_name": result["package_name"],
|
|
203
|
+
"package_dir": str(result["package_dir"]),
|
|
204
|
+
"experiments_packaged": result["experiments_packaged"],
|
|
205
|
+
"includes": result["includes"]}, f, default_flow_style=False, sort_keys=False)
|
|
206
|
+
return rp
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def format_package_report(result: dict) -> str:
|
|
210
|
+
L = ["# Package Summary", "",
|
|
211
|
+
f"- **Package:** {result['package_name']}",
|
|
212
|
+
f"- **Location:** `{result['package_dir']}`",
|
|
213
|
+
f"- **Experiments:** {result['experiments_packaged']}",
|
|
214
|
+
f"- **Includes:** {', '.join(result['includes']) or 'metrics only'}", "", "## Contents", ""]
|
|
215
|
+
for a in result.get("artifacts", []):
|
|
216
|
+
extras = [k for k in ("seed_study", "decision", "ablation", "reproduction") if a.get(k)]
|
|
217
|
+
es = f" [{', '.join(extras)}]" if extras else ""
|
|
218
|
+
L.append(f"- `{a['experiment_id']}` ({a['status']}){es}")
|
|
219
|
+
return "\n".join(L)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def package_experiments(experiment_ids=None, includes=None, config_path="config.yaml",
|
|
223
|
+
log_path=DEFAULT_LOG, output_dir=DEFAULT_OUTPUT) -> dict:
|
|
224
|
+
"""Package experiments into a portable directory."""
|
|
225
|
+
includes = includes or []
|
|
226
|
+
config = load_config(config_path)
|
|
227
|
+
experiments = load_experiments(log_path)
|
|
228
|
+
annotations = _load_yaml_list("experiments/annotations.yaml")
|
|
229
|
+
if experiment_ids:
|
|
230
|
+
selected = [e for e in experiments if e.get("experiment_id") in experiment_ids]
|
|
231
|
+
if not selected:
|
|
232
|
+
return {"error": f"No matching experiments for: {experiment_ids}"}
|
|
233
|
+
else:
|
|
234
|
+
selected = [e for e in experiments if e.get("status") == "kept"]
|
|
235
|
+
if not selected:
|
|
236
|
+
return {"error": "No kept experiments to package."}
|
|
237
|
+
artifacts = [collect_experiment_artifacts(e, includes) for e in selected]
|
|
238
|
+
for art in artifacts:
|
|
239
|
+
eid = art["experiment_id"]
|
|
240
|
+
anns = [a for a in annotations if a.get("experiment_id") == eid]
|
|
241
|
+
if anns:
|
|
242
|
+
art["annotations"] = anns
|
|
243
|
+
ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
|
|
244
|
+
pkg_name = f"package-{len(artifacts)}exp-{ts}"
|
|
245
|
+
pkg_dir = Path(output_dir) / pkg_name
|
|
246
|
+
manifest = build_manifest(pkg_name, config, artifacts, includes)
|
|
247
|
+
readme = build_package_readme(config, artifacts, manifest)
|
|
248
|
+
write_package(pkg_dir, config, artifacts, manifest, readme, includes)
|
|
249
|
+
result = {"timestamp": datetime.now(timezone.utc).isoformat(), "package_name": pkg_name,
|
|
250
|
+
"package_dir": str(pkg_dir), "experiments_packaged": len(artifacts),
|
|
251
|
+
"includes": includes, "artifacts": artifacts}
|
|
252
|
+
save_package_report(result, pkg_dir)
|
|
253
|
+
return result
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def main() -> None:
|
|
257
|
+
parser = argparse.ArgumentParser(description="Package experiments into portable archive")
|
|
258
|
+
parser.add_argument("--experiments", default=None, help="Comma-separated experiment IDs")
|
|
259
|
+
parser.add_argument("--include", default=None, help="Extras: model,data-hash,figures,code")
|
|
260
|
+
parser.add_argument("--config", default="config.yaml")
|
|
261
|
+
parser.add_argument("--log", default=DEFAULT_LOG)
|
|
262
|
+
parser.add_argument("--output", default=DEFAULT_OUTPUT)
|
|
263
|
+
parser.add_argument("--json", action="store_true", help="Output raw JSON")
|
|
264
|
+
args = parser.parse_args()
|
|
265
|
+
exp_ids = [e.strip() for e in args.experiments.split(",")] if args.experiments else None
|
|
266
|
+
includes = []
|
|
267
|
+
if args.include:
|
|
268
|
+
includes = [i.strip() for i in args.include.split(",")]
|
|
269
|
+
bad = [i for i in includes if i not in VALID_INCLUDES]
|
|
270
|
+
if bad:
|
|
271
|
+
print(f"ERROR: Invalid include(s): {bad}. Valid: {VALID_INCLUDES}", file=sys.stderr)
|
|
272
|
+
sys.exit(1)
|
|
273
|
+
result = package_experiments(exp_ids, includes, args.config, args.log, args.output)
|
|
274
|
+
if "error" in result:
|
|
275
|
+
print(f"ERROR: {result['error']}", file=sys.stderr)
|
|
276
|
+
sys.exit(1)
|
|
277
|
+
if args.json:
|
|
278
|
+
print(json.dumps(result, indent=2, default=str))
|
|
279
|
+
else:
|
|
280
|
+
print(format_package_report(result))
|
|
281
|
+
print(f"\nPackage saved to: {result['package_dir']}", file=sys.stderr)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
if __name__ == "__main__":
|
|
285
|
+
main()
|
|
@@ -140,6 +140,12 @@ TEMPLATE_DIRS = {
|
|
|
140
140
|
"citation_manager.py",
|
|
141
141
|
"generate_figures.py",
|
|
142
142
|
"generate_changelog.py",
|
|
143
|
+
"generate_onboarding.py",
|
|
144
|
+
"package_experiments.py",
|
|
145
|
+
"simulate_review.py",
|
|
146
|
+
"whatif_engine.py",
|
|
147
|
+
"counterfactual_explanation.py",
|
|
148
|
+
"experiment_simulator.py",
|
|
143
149
|
],
|
|
144
150
|
"tests": ["__init__.py", "conftest.py"],
|
|
145
151
|
}
|
|
@@ -189,6 +195,11 @@ DIRECTORIES_TO_CREATE = [
|
|
|
189
195
|
"experiments/replays",
|
|
190
196
|
"experiments/citations",
|
|
191
197
|
"paper/figures",
|
|
198
|
+
"exports/packages",
|
|
199
|
+
"experiments/reviews",
|
|
200
|
+
"experiments/whatif",
|
|
201
|
+
"experiments/counterfactuals",
|
|
202
|
+
"experiments/simulations",
|
|
192
203
|
"experiments/logs",
|
|
193
204
|
"models/best",
|
|
194
205
|
"models/archive",
|