claude-turing 3.4.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/.claude-plugin/plugin.json +2 -2
  2. package/README.md +12 -2
  3. package/commands/annotate.md +23 -0
  4. package/commands/archive.md +23 -0
  5. package/commands/changelog.md +22 -0
  6. package/commands/cite.md +23 -0
  7. package/commands/flashback.md +22 -0
  8. package/commands/present.md +23 -0
  9. package/commands/replay.md +23 -0
  10. package/commands/search.md +22 -0
  11. package/commands/template.md +22 -0
  12. package/commands/trend.md +21 -0
  13. package/commands/turing.md +20 -0
  14. package/package.json +1 -1
  15. package/src/install.js +2 -0
  16. package/src/verify.js +10 -0
  17. package/templates/scripts/__pycache__/citation_manager.cpython-314.pyc +0 -0
  18. package/templates/scripts/__pycache__/experiment_annotations.cpython-314.pyc +0 -0
  19. package/templates/scripts/__pycache__/experiment_archive.cpython-314.pyc +0 -0
  20. package/templates/scripts/__pycache__/experiment_replay.cpython-314.pyc +0 -0
  21. package/templates/scripts/__pycache__/experiment_search.cpython-314.pyc +0 -0
  22. package/templates/scripts/__pycache__/experiment_templates.cpython-314.pyc +0 -0
  23. package/templates/scripts/__pycache__/generate_changelog.cpython-314.pyc +0 -0
  24. package/templates/scripts/__pycache__/generate_figures.cpython-314.pyc +0 -0
  25. package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  26. package/templates/scripts/__pycache__/session_flashback.cpython-314.pyc +0 -0
  27. package/templates/scripts/__pycache__/trend_analysis.cpython-314.pyc +0 -0
  28. package/templates/scripts/citation_manager.py +436 -0
  29. package/templates/scripts/experiment_annotations.py +392 -0
  30. package/templates/scripts/experiment_archive.py +534 -0
  31. package/templates/scripts/experiment_replay.py +592 -0
  32. package/templates/scripts/experiment_search.py +451 -0
  33. package/templates/scripts/experiment_templates.py +501 -0
  34. package/templates/scripts/generate_changelog.py +464 -0
  35. package/templates/scripts/generate_figures.py +597 -0
  36. package/templates/scripts/scaffold.py +17 -0
  37. package/templates/scripts/session_flashback.py +461 -0
  38. package/templates/scripts/trend_analysis.py +503 -0
@@ -0,0 +1,534 @@
1
+ #!/usr/bin/env python3
2
+ """Experiment lifecycle cleanup and archival for the autoresearch pipeline.
3
+
4
+ Identifies archivable experiments (older than threshold, not Pareto-optimal,
5
+ not current best), compresses artifacts, and creates a summary index.
6
+ Keeps the experiment directory lean without losing institutional knowledge.
7
+
8
+ Usage:
9
+ python scripts/experiment_archive.py --dry-run
10
+ python scripts/experiment_archive.py --older-than 30
11
+ python scripts/experiment_archive.py --keep-best 5
12
+ python scripts/experiment_archive.py --json
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import argparse
18
+ import gzip
19
+ import json
20
+ import shutil
21
+ import sys
22
+ from datetime import datetime, timedelta, timezone
23
+ from pathlib import Path
24
+
25
+ import yaml
26
+
27
+ from scripts.turing_io import load_config, load_experiments
28
+
29
+ DEFAULT_LOG_PATH = "experiments/log.jsonl"
30
+ DEFAULT_OLDER_THAN_DAYS = 30
31
+ DEFAULT_KEEP_BEST = 3
32
+ ARCHIVE_INDEX_PATH = "experiments/archive/index.yaml"
33
+ ARTIFACT_DIRS = ["checkpoints", "predictions", "profiles", "diagnoses"]
34
+
35
+
36
+ # --- Identification ---
37
+
38
+
39
+ def find_current_best(
40
+ experiments: list[dict],
41
+ metric: str,
42
+ lower_is_better: bool,
43
+ keep_best: int = DEFAULT_KEEP_BEST,
44
+ ) -> set[str]:
45
+ """Find the top-N best kept experiments by primary metric.
46
+
47
+ Returns set of experiment IDs that should never be archived.
48
+ """
49
+ kept = []
50
+ for exp in experiments:
51
+ if exp.get("status") != "kept":
52
+ continue
53
+ val = exp.get("metrics", {}).get(metric)
54
+ if val is None:
55
+ continue
56
+ try:
57
+ val = float(val)
58
+ except (ValueError, TypeError):
59
+ continue
60
+ kept.append((exp.get("experiment_id", "?"), val))
61
+
62
+ kept.sort(key=lambda x: x[1], reverse=not lower_is_better)
63
+ return {eid for eid, _ in kept[:keep_best]}
64
+
65
+
66
+ def find_pareto_optimal(
67
+ experiments: list[dict],
68
+ metrics: list[str],
69
+ lower_is_better_map: dict[str, bool],
70
+ ) -> set[str]:
71
+ """Find Pareto-optimal experiments across all configured metrics.
72
+
73
+ Returns set of experiment IDs that should never be archived.
74
+ """
75
+ kept = []
76
+ for exp in experiments:
77
+ if exp.get("status") != "kept":
78
+ continue
79
+ exp_metrics = exp.get("metrics", {})
80
+ values = {}
81
+ complete = True
82
+ for m in metrics:
83
+ v = exp_metrics.get(m)
84
+ if v is None:
85
+ complete = False
86
+ break
87
+ try:
88
+ values[m] = float(v)
89
+ except (ValueError, TypeError):
90
+ complete = False
91
+ break
92
+ if complete:
93
+ kept.append((exp.get("experiment_id", "?"), values))
94
+
95
+ if not kept:
96
+ return set()
97
+
98
+ pareto_ids = set()
99
+ for i, (eid_i, vals_i) in enumerate(kept):
100
+ dominated = False
101
+ for j, (eid_j, vals_j) in enumerate(kept):
102
+ if i == j:
103
+ continue
104
+ all_ge = True
105
+ strictly_better = False
106
+ for m in metrics:
107
+ lib = lower_is_better_map.get(m, False)
108
+ if lib:
109
+ if vals_j[m] > vals_i[m]:
110
+ all_ge = False
111
+ break
112
+ if vals_j[m] < vals_i[m]:
113
+ strictly_better = True
114
+ else:
115
+ if vals_j[m] < vals_i[m]:
116
+ all_ge = False
117
+ break
118
+ if vals_j[m] > vals_i[m]:
119
+ strictly_better = True
120
+ if all_ge and strictly_better:
121
+ dominated = True
122
+ break
123
+ if not dominated:
124
+ pareto_ids.add(eid_i)
125
+
126
+ return pareto_ids
127
+
128
+
129
+ def identify_archivable(
130
+ experiments: list[dict],
131
+ metric: str,
132
+ lower_is_better: bool,
133
+ older_than_days: int = DEFAULT_OLDER_THAN_DAYS,
134
+ keep_best: int = DEFAULT_KEEP_BEST,
135
+ metrics_list: list[str] | None = None,
136
+ ) -> tuple[list[dict], set[str]]:
137
+ """Identify experiments that can be safely archived.
138
+
139
+ An experiment is archivable if ALL of:
140
+ - older than older_than_days
141
+ - not in the top-N best
142
+ - not Pareto-optimal
143
+ - not the most recent experiment
144
+
145
+ Returns (archivable_experiments, protected_ids).
146
+ """
147
+ cutoff = datetime.now(timezone.utc) - timedelta(days=older_than_days)
148
+ cutoff_str = cutoff.isoformat()
149
+
150
+ # Protected sets
151
+ best_ids = find_current_best(experiments, metric, lower_is_better, keep_best)
152
+
153
+ lower_metrics = {"train_seconds", "latency", "latency_ms", "n_params",
154
+ "mse", "rmse", "mae", "loss", "log_loss", "error_rate"}
155
+ if metrics_list and len(metrics_list) >= 2:
156
+ lib_map = {}
157
+ for m in metrics_list:
158
+ if m == metric:
159
+ lib_map[m] = lower_is_better
160
+ else:
161
+ lib_map[m] = m in lower_metrics
162
+ pareto_ids = find_pareto_optimal(experiments, metrics_list, lib_map)
163
+ else:
164
+ pareto_ids = set()
165
+
166
+ # Most recent experiment is always protected
167
+ most_recent_id = experiments[-1].get("experiment_id", "") if experiments else ""
168
+
169
+ protected = best_ids | pareto_ids | {most_recent_id}
170
+
171
+ archivable = []
172
+ for exp in experiments:
173
+ eid = exp.get("experiment_id", "?")
174
+ ts = exp.get("timestamp", "")
175
+
176
+ if eid in protected:
177
+ continue
178
+ if ts >= cutoff_str:
179
+ continue
180
+
181
+ archivable.append(exp)
182
+
183
+ return archivable, protected
184
+
185
+
186
+ # --- Artifact Discovery ---
187
+
188
+
189
+ def find_experiment_artifacts(experiment_id: str) -> list[dict]:
190
+ """Find all artifact files associated with an experiment.
191
+
192
+ Scans known artifact directories for files matching the experiment ID.
193
+ """
194
+ artifacts = []
195
+ for dirname in ARTIFACT_DIRS:
196
+ dirpath = Path(f"experiments/{dirname}")
197
+ if not dirpath.exists():
198
+ continue
199
+ for f in dirpath.iterdir():
200
+ if experiment_id in f.name and f.is_file():
201
+ artifacts.append({
202
+ "path": str(f),
203
+ "size_bytes": f.stat().st_size,
204
+ "directory": dirname,
205
+ })
206
+
207
+ return artifacts
208
+
209
+
210
+ # --- Archival Operations ---
211
+
212
+
213
+ def compress_artifact(filepath: str) -> dict:
214
+ """Compress a single artifact file with gzip.
215
+
216
+ Returns dict with original/compressed sizes and the compressed path.
217
+ """
218
+ src = Path(filepath)
219
+ if not src.exists():
220
+ return {"error": f"File not found: {filepath}"}
221
+ if src.suffix == ".gz":
222
+ return {"skipped": True, "path": filepath, "reason": "Already compressed"}
223
+
224
+ dst = Path(f"{filepath}.gz")
225
+ original_size = src.stat().st_size
226
+
227
+ with open(src, "rb") as f_in:
228
+ with gzip.open(dst, "wb") as f_out:
229
+ shutil.copyfileobj(f_in, f_out)
230
+
231
+ compressed_size = dst.stat().st_size
232
+ src.unlink()
233
+
234
+ return {
235
+ "original_path": filepath,
236
+ "compressed_path": str(dst),
237
+ "original_size": original_size,
238
+ "compressed_size": compressed_size,
239
+ "ratio": round(compressed_size / original_size, 3) if original_size > 0 else 0,
240
+ }
241
+
242
+
243
+ def create_experiment_summary(exp: dict) -> dict:
244
+ """Create a compact summary of an experiment for the archive index."""
245
+ return {
246
+ "experiment_id": exp.get("experiment_id", "?"),
247
+ "timestamp": exp.get("timestamp", ""),
248
+ "status": exp.get("status", "?"),
249
+ "model_type": exp.get("config", {}).get("model_type", "?"),
250
+ "family": exp.get("family"),
251
+ "description": exp.get("description", ""),
252
+ "metrics": exp.get("metrics", {}),
253
+ "config_summary": {
254
+ "model_type": exp.get("config", {}).get("model_type"),
255
+ "experiment_type": exp.get("config", {}).get("experiment_type"),
256
+ },
257
+ }
258
+
259
+
260
+ def load_archive_index(path: str = ARCHIVE_INDEX_PATH) -> dict:
261
+ """Load existing archive index."""
262
+ p = Path(path)
263
+ if not p.exists():
264
+ return {
265
+ "created": datetime.now(timezone.utc).isoformat(),
266
+ "archived_experiments": [],
267
+ "total_space_saved_bytes": 0,
268
+ }
269
+ try:
270
+ with open(p) as f:
271
+ data = yaml.safe_load(f)
272
+ return data if isinstance(data, dict) else {
273
+ "created": datetime.now(timezone.utc).isoformat(),
274
+ "archived_experiments": [],
275
+ "total_space_saved_bytes": 0,
276
+ }
277
+ except (yaml.YAMLError, OSError):
278
+ return {
279
+ "created": datetime.now(timezone.utc).isoformat(),
280
+ "archived_experiments": [],
281
+ "total_space_saved_bytes": 0,
282
+ }
283
+
284
+
285
+ def save_archive_index(index: dict, path: str = ARCHIVE_INDEX_PATH) -> Path:
286
+ """Save archive index to YAML."""
287
+ p = Path(path)
288
+ p.parent.mkdir(parents=True, exist_ok=True)
289
+ with open(p, "w") as f:
290
+ yaml.dump(index, f, default_flow_style=False, sort_keys=False)
291
+ return p
292
+
293
+
294
+ def archive_experiments(
295
+ archivable: list[dict],
296
+ dry_run: bool = False,
297
+ ) -> dict:
298
+ """Archive a list of experiments: compress artifacts, update index.
299
+
300
+ Args:
301
+ archivable: List of experiment dicts to archive.
302
+ dry_run: If True, report what would happen without changing anything.
303
+
304
+ Returns:
305
+ Archive operation result dict.
306
+ """
307
+ results = []
308
+ total_saved = 0
309
+
310
+ for exp in archivable:
311
+ eid = exp.get("experiment_id", "?")
312
+ artifacts = find_experiment_artifacts(eid)
313
+ summary = create_experiment_summary(exp)
314
+
315
+ entry = {
316
+ "experiment_id": eid,
317
+ "summary": summary,
318
+ "artifacts_found": len(artifacts),
319
+ "artifacts": [],
320
+ }
321
+
322
+ for artifact in artifacts:
323
+ if dry_run:
324
+ entry["artifacts"].append({
325
+ "path": artifact["path"],
326
+ "size_bytes": artifact["size_bytes"],
327
+ "action": "would_compress",
328
+ })
329
+ else:
330
+ result = compress_artifact(artifact["path"])
331
+ entry["artifacts"].append(result)
332
+ if "original_size" in result and "compressed_size" in result:
333
+ total_saved += result["original_size"] - result["compressed_size"]
334
+
335
+ results.append(entry)
336
+
337
+ # Update index
338
+ if not dry_run and results:
339
+ index = load_archive_index()
340
+ for entry in results:
341
+ index["archived_experiments"].append({
342
+ "experiment_id": entry["experiment_id"],
343
+ "archived_at": datetime.now(timezone.utc).isoformat(),
344
+ "summary": entry["summary"],
345
+ "artifacts_compressed": len(entry["artifacts"]),
346
+ })
347
+ index["total_space_saved_bytes"] = (
348
+ index.get("total_space_saved_bytes", 0) + total_saved
349
+ )
350
+ index["last_archive"] = datetime.now(timezone.utc).isoformat()
351
+ save_archive_index(index)
352
+
353
+ return {
354
+ "archived": len(results),
355
+ "total_artifacts": sum(e["artifacts_found"] for e in results),
356
+ "space_saved_bytes": total_saved,
357
+ "dry_run": dry_run,
358
+ "entries": results,
359
+ }
360
+
361
+
362
+ # --- Report ---
363
+
364
+
365
+ def format_archive_report(report: dict) -> str:
366
+ """Format archive operation as markdown report."""
367
+ if "error" in report:
368
+ return f"ERROR: {report['error']}"
369
+
370
+ lines = [
371
+ "# Experiment Archive",
372
+ "",
373
+ f"*Generated {report.get('timestamp', '?')[:19]} UTC*",
374
+ "",
375
+ ]
376
+
377
+ summary = report.get("summary", {})
378
+ lines.extend([
379
+ "## Summary",
380
+ "",
381
+ f"| Metric | Value |",
382
+ f"|--------|-------|",
383
+ f"| Total experiments | {summary.get('total_experiments', 0)} |",
384
+ f"| Archivable | {summary.get('archivable', 0)} |",
385
+ f"| Protected | {summary.get('protected', 0)} |",
386
+ ])
387
+
388
+ protected_reasons = summary.get("protected_reasons", {})
389
+ if protected_reasons:
390
+ lines.extend([
391
+ "",
392
+ "**Protected experiments:**",
393
+ ])
394
+ for reason, ids in protected_reasons.items():
395
+ lines.append(f"- {reason}: {', '.join(ids)}")
396
+ lines.append("")
397
+
398
+ # Archive results
399
+ archive = report.get("archive", {})
400
+ if archive.get("dry_run"):
401
+ lines.extend(["## Dry Run (no changes made)", ""])
402
+ else:
403
+ lines.extend(["## Archived", ""])
404
+
405
+ if archive.get("archived", 0) > 0:
406
+ lines.append(f"**{archive['archived']}** experiments, "
407
+ f"**{archive['total_artifacts']}** artifacts")
408
+ if archive.get("space_saved_bytes", 0) > 0:
409
+ saved_mb = archive["space_saved_bytes"] / (1024 * 1024)
410
+ lines.append(f"**{saved_mb:.1f} MB** space saved by compression")
411
+ lines.append("")
412
+
413
+ lines.append("| Experiment | Status | Artifacts |")
414
+ lines.append("|------------|--------|-----------|")
415
+ for entry in archive.get("entries", []):
416
+ eid = entry["experiment_id"]
417
+ n_art = entry["artifacts_found"]
418
+ status = entry.get("summary", {}).get("status", "?")
419
+ lines.append(f"| {eid} | {status} | {n_art} files |")
420
+ else:
421
+ lines.append("No experiments to archive.")
422
+
423
+ lines.extend(["", "---"])
424
+
425
+ if archive.get("dry_run"):
426
+ lines.append("*Run without `--dry-run` to execute archival.*")
427
+ else:
428
+ lines.append(f"*Archive index saved to `{ARCHIVE_INDEX_PATH}`*")
429
+
430
+ return "\n".join(lines)
431
+
432
+
433
+ def run_archive(
434
+ config_path: str = "config.yaml",
435
+ log_path: str = DEFAULT_LOG_PATH,
436
+ older_than: int = DEFAULT_OLDER_THAN_DAYS,
437
+ keep_best: int = DEFAULT_KEEP_BEST,
438
+ dry_run: bool = False,
439
+ ) -> dict:
440
+ """Run the archive workflow.
441
+
442
+ Args:
443
+ config_path: Path to config.yaml.
444
+ log_path: Path to experiment log.
445
+ older_than: Archive experiments older than this many days.
446
+ keep_best: Never archive the top-N best experiments.
447
+ dry_run: If True, report without making changes.
448
+
449
+ Returns:
450
+ Archive result dict.
451
+ """
452
+ config = load_config(config_path)
453
+ eval_cfg = config.get("evaluation", {})
454
+ metric = eval_cfg.get("primary_metric", "accuracy")
455
+ lower_is_better = eval_cfg.get("lower_is_better", False)
456
+ metrics_list = eval_cfg.get("metrics", [metric])
457
+
458
+ experiments = load_experiments(log_path)
459
+ if not experiments:
460
+ return {
461
+ "timestamp": datetime.now(timezone.utc).isoformat(),
462
+ "error": "No experiments found",
463
+ "log_path": log_path,
464
+ }
465
+
466
+ archivable, protected = identify_archivable(
467
+ experiments, metric, lower_is_better,
468
+ older_than_days=older_than,
469
+ keep_best=keep_best,
470
+ metrics_list=metrics_list if len(metrics_list) >= 2 else None,
471
+ )
472
+
473
+ # Categorize protected experiments
474
+ best_ids = find_current_best(experiments, metric, lower_is_better, keep_best)
475
+ most_recent = experiments[-1].get("experiment_id", "") if experiments else ""
476
+ protected_reasons: dict[str, list[str]] = {}
477
+ if best_ids:
478
+ protected_reasons[f"top-{keep_best} best"] = sorted(best_ids)
479
+ if most_recent:
480
+ protected_reasons["most recent"] = [most_recent]
481
+ pareto_only = protected - best_ids - {most_recent}
482
+ if pareto_only:
483
+ protected_reasons["Pareto-optimal"] = sorted(pareto_only)
484
+
485
+ archive_result = archive_experiments(archivable, dry_run=dry_run)
486
+
487
+ return {
488
+ "timestamp": datetime.now(timezone.utc).isoformat(),
489
+ "config": {
490
+ "older_than_days": older_than,
491
+ "keep_best": keep_best,
492
+ "metric": metric,
493
+ "lower_is_better": lower_is_better,
494
+ },
495
+ "summary": {
496
+ "total_experiments": len(experiments),
497
+ "archivable": len(archivable),
498
+ "protected": len(protected),
499
+ "protected_reasons": protected_reasons,
500
+ },
501
+ "archive": archive_result,
502
+ }
503
+
504
+
505
+ def main() -> None:
506
+ """CLI entry point."""
507
+ parser = argparse.ArgumentParser(description="Experiment lifecycle cleanup and archival")
508
+ parser.add_argument("--config", default="config.yaml", help="Path to config.yaml")
509
+ parser.add_argument("--log", default=DEFAULT_LOG_PATH, help="Path to experiment log")
510
+ parser.add_argument("--older-than", type=int, default=DEFAULT_OLDER_THAN_DAYS,
511
+ help=f"Archive experiments older than N days (default: {DEFAULT_OLDER_THAN_DAYS})")
512
+ parser.add_argument("--keep-best", type=int, default=DEFAULT_KEEP_BEST,
513
+ help=f"Never archive the top-N best experiments (default: {DEFAULT_KEEP_BEST})")
514
+ parser.add_argument("--dry-run", action="store_true",
515
+ help="Report what would be archived without making changes")
516
+ parser.add_argument("--json", action="store_true", help="Output raw JSON")
517
+ args = parser.parse_args()
518
+
519
+ report = run_archive(
520
+ config_path=args.config,
521
+ log_path=args.log,
522
+ older_than=args.older_than,
523
+ keep_best=args.keep_best,
524
+ dry_run=args.dry_run,
525
+ )
526
+
527
+ if args.json:
528
+ print(json.dumps(report, indent=2, default=str))
529
+ else:
530
+ print(format_archive_report(report))
531
+
532
+
533
+ if __name__ == "__main__":
534
+ main()