claude-turing 3.4.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/.claude-plugin/plugin.json +2 -2
  2. package/README.md +12 -2
  3. package/commands/annotate.md +23 -0
  4. package/commands/archive.md +23 -0
  5. package/commands/changelog.md +22 -0
  6. package/commands/cite.md +23 -0
  7. package/commands/flashback.md +22 -0
  8. package/commands/present.md +23 -0
  9. package/commands/replay.md +23 -0
  10. package/commands/search.md +22 -0
  11. package/commands/template.md +22 -0
  12. package/commands/trend.md +21 -0
  13. package/commands/turing.md +20 -0
  14. package/package.json +1 -1
  15. package/src/install.js +2 -0
  16. package/src/verify.js +10 -0
  17. package/templates/scripts/__pycache__/citation_manager.cpython-314.pyc +0 -0
  18. package/templates/scripts/__pycache__/experiment_annotations.cpython-314.pyc +0 -0
  19. package/templates/scripts/__pycache__/experiment_archive.cpython-314.pyc +0 -0
  20. package/templates/scripts/__pycache__/experiment_replay.cpython-314.pyc +0 -0
  21. package/templates/scripts/__pycache__/experiment_search.cpython-314.pyc +0 -0
  22. package/templates/scripts/__pycache__/experiment_templates.cpython-314.pyc +0 -0
  23. package/templates/scripts/__pycache__/generate_changelog.cpython-314.pyc +0 -0
  24. package/templates/scripts/__pycache__/generate_figures.cpython-314.pyc +0 -0
  25. package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  26. package/templates/scripts/__pycache__/session_flashback.cpython-314.pyc +0 -0
  27. package/templates/scripts/__pycache__/trend_analysis.cpython-314.pyc +0 -0
  28. package/templates/scripts/citation_manager.py +436 -0
  29. package/templates/scripts/experiment_annotations.py +392 -0
  30. package/templates/scripts/experiment_archive.py +534 -0
  31. package/templates/scripts/experiment_replay.py +592 -0
  32. package/templates/scripts/experiment_search.py +451 -0
  33. package/templates/scripts/experiment_templates.py +501 -0
  34. package/templates/scripts/generate_changelog.py +464 -0
  35. package/templates/scripts/generate_figures.py +597 -0
  36. package/templates/scripts/scaffold.py +17 -0
  37. package/templates/scripts/session_flashback.py +461 -0
  38. package/templates/scripts/trend_analysis.py +503 -0
@@ -0,0 +1,461 @@
1
+ #!/usr/bin/env python3
2
+ """Session flashback — context restoration after time away.
3
+
4
+ Reads recent experiments, current best, pending hypotheses, annotations,
5
+ and budget status to produce a compact "where was I?" summary. Designed
6
+ for the moment you return to a project after hours or days away.
7
+
8
+ Usage:
9
+ python scripts/session_flashback.py [--config config.yaml] [--log experiments/log.jsonl]
10
+ python scripts/session_flashback.py --last 10 # Last 10 experiments
11
+ python scripts/session_flashback.py --days 3 # Last 3 days
12
+ python scripts/session_flashback.py --json
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import argparse
18
+ import json
19
+ import sys
20
+ from datetime import datetime, timedelta, timezone
21
+ from pathlib import Path
22
+
23
+ import yaml
24
+
25
+ from scripts.turing_io import load_config, load_experiments, load_hypotheses
26
+
27
+ DEFAULT_LOG_PATH = "experiments/log.jsonl"
28
+ DEFAULT_LAST_N = 10
29
+ DEFAULT_DAYS = 7
30
+
31
+
32
+ # --- Data Loading ---
33
+
34
+
35
+ def load_annotations(path: str = "experiments/annotations.yaml") -> list[dict]:
36
+ """Load experiment annotations from YAML."""
37
+ p = Path(path)
38
+ if not p.exists() or p.stat().st_size == 0:
39
+ return []
40
+ try:
41
+ with open(p) as f:
42
+ data = yaml.safe_load(f)
43
+ return data if isinstance(data, list) else []
44
+ except (yaml.YAMLError, OSError):
45
+ return []
46
+
47
+
48
+ def load_budget_status(
49
+ state_path: str = "experiment_state.yaml",
50
+ log_path: str = DEFAULT_LOG_PATH,
51
+ ) -> dict | None:
52
+ """Load budget status if available."""
53
+ try:
54
+ from scripts.budget_manager import get_budget_status
55
+ result = get_budget_status(state_path, log_path)
56
+ if "error" not in result:
57
+ return result
58
+ except (ImportError, Exception):
59
+ pass
60
+ return None
61
+
62
+
63
+ def load_experiment_state(path: str = "experiment_state.yaml") -> dict:
64
+ """Load experiment state file."""
65
+ p = Path(path)
66
+ if not p.exists():
67
+ return {}
68
+ try:
69
+ with open(p) as f:
70
+ return yaml.safe_load(f) or {}
71
+ except (yaml.YAMLError, OSError):
72
+ return {}
73
+
74
+
75
+ # --- Filtering ---
76
+
77
+
78
+ def filter_recent_experiments(
79
+ experiments: list[dict],
80
+ last_n: int | None = None,
81
+ days: int | None = None,
82
+ ) -> list[dict]:
83
+ """Filter experiments to recent ones by count or time window.
84
+
85
+ If both are given, uses whichever returns more experiments.
86
+ """
87
+ if not experiments:
88
+ return []
89
+
90
+ by_count = []
91
+ by_time = []
92
+
93
+ if last_n is not None:
94
+ by_count = experiments[-last_n:]
95
+
96
+ if days is not None:
97
+ cutoff = datetime.now(timezone.utc) - timedelta(days=days)
98
+ cutoff_str = cutoff.isoformat()
99
+ by_time = [
100
+ e for e in experiments
101
+ if e.get("timestamp", "") >= cutoff_str
102
+ ]
103
+
104
+ if by_count and by_time:
105
+ return by_count if len(by_count) >= len(by_time) else by_time
106
+ return by_count or by_time or experiments[-DEFAULT_LAST_N:]
107
+
108
+
109
+ def find_current_best(
110
+ experiments: list[dict],
111
+ metric: str,
112
+ lower_is_better: bool,
113
+ ) -> dict | None:
114
+ """Find the current best kept experiment."""
115
+ best = None
116
+ best_val = float("inf") if lower_is_better else float("-inf")
117
+ for exp in experiments:
118
+ if exp.get("status") != "kept":
119
+ continue
120
+ val = exp.get("metrics", {}).get(metric)
121
+ if val is None:
122
+ continue
123
+ try:
124
+ val = float(val)
125
+ except (ValueError, TypeError):
126
+ continue
127
+ if (lower_is_better and val < best_val) or (not lower_is_better and val > best_val):
128
+ best_val = val
129
+ best = exp
130
+ return best
131
+
132
+
133
+ # --- Flashback Assembly ---
134
+
135
+
136
+ def assemble_flashback(
137
+ config_path: str = "config.yaml",
138
+ log_path: str = DEFAULT_LOG_PATH,
139
+ hypotheses_path: str = "hypotheses.yaml",
140
+ annotations_path: str = "experiments/annotations.yaml",
141
+ last_n: int | None = None,
142
+ days: int | None = None,
143
+ ) -> dict:
144
+ """Assemble all context for a session flashback.
145
+
146
+ Returns a structured dict with everything needed to resume work.
147
+ """
148
+ config = load_config(config_path)
149
+ eval_cfg = config.get("evaluation", {})
150
+ metric = eval_cfg.get("primary_metric", "accuracy")
151
+ lower_is_better = eval_cfg.get("lower_is_better", False)
152
+
153
+ all_experiments = load_experiments(log_path)
154
+ if not all_experiments:
155
+ return {
156
+ "timestamp": datetime.now(timezone.utc).isoformat(),
157
+ "error": "No experiments found",
158
+ "log_path": log_path,
159
+ }
160
+
161
+ recent = filter_recent_experiments(all_experiments, last_n, days)
162
+ best = find_current_best(all_experiments, metric, lower_is_better)
163
+ hypotheses = load_hypotheses(hypotheses_path)
164
+ annotations = load_annotations(annotations_path)
165
+ budget = load_budget_status(log_path=log_path)
166
+ state = load_experiment_state()
167
+
168
+ # Summarize recent activity
169
+ recent_kept = sum(1 for e in recent if e.get("status") == "kept")
170
+ recent_discarded = sum(1 for e in recent if e.get("status") == "discarded")
171
+
172
+ # Time since last experiment
173
+ last_ts = all_experiments[-1].get("timestamp", "")
174
+ time_away = None
175
+ if last_ts:
176
+ try:
177
+ last_dt = datetime.fromisoformat(last_ts.replace("Z", "+00:00"))
178
+ delta = datetime.now(timezone.utc) - last_dt
179
+ time_away = {
180
+ "hours": round(delta.total_seconds() / 3600, 1),
181
+ "human": _format_timedelta(delta),
182
+ }
183
+ except (ValueError, TypeError):
184
+ pass
185
+
186
+ # Pending hypotheses
187
+ pending = [h for h in hypotheses if h.get("status") == "queued"]
188
+ high_priority = [h for h in pending if h.get("priority") == "high"]
189
+
190
+ # Recent annotations
191
+ recent_ids = {e.get("experiment_id") for e in recent}
192
+ relevant_annotations = [
193
+ a for a in annotations
194
+ if a.get("experiment_id") in recent_ids
195
+ ]
196
+
197
+ # Current research direction from state
198
+ current_direction = state.get("current_direction")
199
+ iteration = state.get("iteration", 0)
200
+
201
+ return {
202
+ "timestamp": datetime.now(timezone.utc).isoformat(),
203
+ "metric": metric,
204
+ "lower_is_better": lower_is_better,
205
+ "total_experiments": len(all_experiments),
206
+ "time_away": time_away,
207
+ "current_best": {
208
+ "experiment_id": best.get("experiment_id", "?"),
209
+ "model_type": best.get("config", {}).get("model_type", "?"),
210
+ "metrics": best.get("metrics", {}),
211
+ "description": best.get("description", ""),
212
+ } if best else None,
213
+ "recent": {
214
+ "count": len(recent),
215
+ "kept": recent_kept,
216
+ "discarded": recent_discarded,
217
+ "experiments": [
218
+ {
219
+ "experiment_id": e.get("experiment_id", "?"),
220
+ "status": e.get("status", "?"),
221
+ "metric_value": e.get("metrics", {}).get(metric),
222
+ "description": e.get("description", ""),
223
+ "timestamp": e.get("timestamp", ""),
224
+ }
225
+ for e in recent
226
+ ],
227
+ },
228
+ "hypotheses": {
229
+ "total_pending": len(pending),
230
+ "high_priority": len(high_priority),
231
+ "queue": [
232
+ {
233
+ "id": h.get("id", "?"),
234
+ "description": h.get("description", ""),
235
+ "priority": h.get("priority", "normal"),
236
+ "source": h.get("source", ""),
237
+ }
238
+ for h in pending[:10]
239
+ ],
240
+ },
241
+ "annotations": relevant_annotations,
242
+ "budget": _summarize_budget(budget) if budget else None,
243
+ "iteration": iteration,
244
+ "current_direction": current_direction,
245
+ }
246
+
247
+
248
+ def _summarize_budget(budget: dict) -> dict:
249
+ """Extract compact budget summary."""
250
+ usage = budget.get("usage", {})
251
+ return {
252
+ "phase": budget.get("phase", "?"),
253
+ "fraction_used": usage.get("budget_fraction", 0),
254
+ "experiments_remaining": usage.get("experiments_remaining"),
255
+ "hours_remaining": usage.get("hours_remaining"),
256
+ "exhausted": budget.get("exhausted", False),
257
+ }
258
+
259
+
260
+ def _format_timedelta(delta: timedelta) -> str:
261
+ """Format a timedelta as human-readable string."""
262
+ total_hours = delta.total_seconds() / 3600
263
+ if total_hours < 1:
264
+ return f"{int(delta.total_seconds() / 60)} minutes"
265
+ elif total_hours < 24:
266
+ return f"{total_hours:.1f} hours"
267
+ else:
268
+ days = delta.days
269
+ hours = (delta.seconds // 3600)
270
+ return f"{days}d {hours}h"
271
+
272
+
273
+ # --- Report ---
274
+
275
+
276
+ def save_flashback_report(report: dict, output_dir: str = "experiments/flashbacks") -> Path:
277
+ """Save flashback report to YAML."""
278
+ out_path = Path(output_dir)
279
+ out_path.mkdir(parents=True, exist_ok=True)
280
+ date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d_%H%M%S")
281
+ filepath = out_path / f"flashback-{date_str}.yaml"
282
+ with open(filepath, "w") as f:
283
+ yaml.dump(report, f, default_flow_style=False, sort_keys=False)
284
+ return filepath
285
+
286
+
287
+ def format_flashback_report(report: dict) -> str:
288
+ """Format flashback as a compact markdown summary."""
289
+ if "error" in report:
290
+ return f"ERROR: {report['error']}"
291
+
292
+ lines = [
293
+ "# Session Flashback",
294
+ "",
295
+ f"*Generated {report.get('timestamp', '?')[:19]} UTC*",
296
+ "",
297
+ ]
298
+
299
+ # Time away
300
+ time_away = report.get("time_away")
301
+ if time_away:
302
+ lines.append(f"You've been away **{time_away['human']}**.")
303
+ lines.append("")
304
+
305
+ # Current best
306
+ best = report.get("current_best")
307
+ if best:
308
+ metric = report.get("metric", "?")
309
+ direction = "lower" if report.get("lower_is_better") else "higher"
310
+ metric_val = best.get("metrics", {}).get(metric, "?")
311
+ lines.extend([
312
+ "## Current Best",
313
+ "",
314
+ f"**{best['experiment_id']}** ({best['model_type']})",
315
+ f"- {metric} = **{metric_val}** ({direction} is better)",
316
+ ])
317
+ other_metrics = {k: v for k, v in best.get("metrics", {}).items() if k != metric}
318
+ if other_metrics:
319
+ parts = ", ".join(f"{k}={v}" for k, v in other_metrics.items())
320
+ lines.append(f"- Other: {parts}")
321
+ if best.get("description"):
322
+ lines.append(f"- *{best['description']}*")
323
+ lines.append("")
324
+ else:
325
+ lines.extend(["## Current Best", "", "No kept experiments yet.", ""])
326
+
327
+ # Recent activity
328
+ recent = report.get("recent", {})
329
+ if recent.get("count", 0) > 0:
330
+ lines.extend([
331
+ "## Recent Activity",
332
+ "",
333
+ f"**{recent['count']}** recent experiments: "
334
+ f"{recent['kept']} kept, {recent['discarded']} discarded",
335
+ "",
336
+ ])
337
+ for exp in recent.get("experiments", [])[-5:]:
338
+ status_icon = "+" if exp["status"] == "kept" else "-"
339
+ val = exp.get("metric_value", "?")
340
+ desc = f" — {exp['description']}" if exp.get("description") else ""
341
+ lines.append(f" {status_icon} **{exp['experiment_id']}**: {val}{desc}")
342
+ if recent["count"] > 5:
343
+ lines.append(f" *...and {recent['count'] - 5} more*")
344
+ lines.append("")
345
+
346
+ # Annotations
347
+ annotations = report.get("annotations", [])
348
+ if annotations:
349
+ lines.extend(["## Notes & Annotations", ""])
350
+ for ann in annotations[:5]:
351
+ exp_id = ann.get("experiment_id", "?")
352
+ note = ann.get("note", ann.get("text", ""))
353
+ lines.append(f"- **{exp_id}**: {note}")
354
+ lines.append("")
355
+
356
+ # Hypothesis queue
357
+ hyp = report.get("hypotheses", {})
358
+ if hyp.get("total_pending", 0) > 0:
359
+ lines.extend([
360
+ "## Pending Hypotheses",
361
+ "",
362
+ f"**{hyp['total_pending']}** queued"
363
+ + (f" ({hyp['high_priority']} high priority)" if hyp.get("high_priority") else ""),
364
+ "",
365
+ ])
366
+ for h in hyp.get("queue", [])[:5]:
367
+ priority = " **(HIGH)**" if h.get("priority") == "high" else ""
368
+ source = f" [{h['source']}]" if h.get("source") else ""
369
+ lines.append(f"- {h['id']}: {h['description']}{priority}{source}")
370
+ if hyp["total_pending"] > 5:
371
+ lines.append(f" *...and {hyp['total_pending'] - 5} more*")
372
+ lines.append("")
373
+ else:
374
+ lines.extend([
375
+ "## Pending Hypotheses",
376
+ "",
377
+ "Queue is empty. Use `/turing:try` to inject ideas.",
378
+ "",
379
+ ])
380
+
381
+ # Budget
382
+ budget = report.get("budget")
383
+ if budget:
384
+ lines.extend(["## Budget", ""])
385
+ if budget.get("exhausted"):
386
+ lines.append("**EXHAUSTED** — no more experiments will run.")
387
+ else:
388
+ pct = budget.get("fraction_used", 0)
389
+ lines.append(f"**{pct:.0%} used** (phase: {budget.get('phase', '?')})")
390
+ if budget.get("experiments_remaining") is not None:
391
+ lines.append(f"- {budget['experiments_remaining']} experiments remaining")
392
+ if budget.get("hours_remaining") is not None:
393
+ lines.append(f"- {budget['hours_remaining']:.1f}h remaining")
394
+ lines.append("")
395
+
396
+ # Research state
397
+ direction = report.get("current_direction")
398
+ iteration = report.get("iteration", 0)
399
+ if direction or iteration:
400
+ lines.extend(["## Research State", ""])
401
+ if iteration:
402
+ lines.append(f"- Iteration: {iteration}")
403
+ if direction:
404
+ lines.append(f"- Direction: {direction}")
405
+ lines.append("")
406
+
407
+ # Suggested next actions
408
+ lines.extend([
409
+ "---",
410
+ "",
411
+ "**Next steps:**",
412
+ ])
413
+ if hyp.get("high_priority"):
414
+ lines.append("- Run `/turing:train` to execute high-priority hypotheses")
415
+ elif hyp.get("total_pending", 0) > 0:
416
+ lines.append("- Run `/turing:train` to execute queued hypotheses")
417
+ else:
418
+ lines.append("- Run `/turing:try` to inject a new hypothesis")
419
+ lines.append("- Run `/turing:brief` for full research intelligence report")
420
+ lines.append("- Run `/turing:trend` for long-term trend analysis")
421
+
422
+ return "\n".join(lines)
423
+
424
+
425
+ def main() -> None:
426
+ """CLI entry point."""
427
+ parser = argparse.ArgumentParser(description="Session flashback — where was I?")
428
+ parser.add_argument("--config", default="config.yaml", help="Path to config.yaml")
429
+ parser.add_argument("--log", default=DEFAULT_LOG_PATH, help="Path to experiment log")
430
+ parser.add_argument("--hypotheses", default="hypotheses.yaml", help="Path to hypotheses file")
431
+ parser.add_argument("--annotations", default="experiments/annotations.yaml",
432
+ help="Path to annotations file")
433
+ parser.add_argument("--last", type=int, default=None, help="Show last N experiments")
434
+ parser.add_argument("--days", type=int, default=None, help="Show experiments from last N days")
435
+ parser.add_argument("--json", action="store_true", help="Output raw JSON")
436
+ args = parser.parse_args()
437
+
438
+ last_n = args.last if args.last is not None else (None if args.days is not None else DEFAULT_LAST_N)
439
+ days = args.days
440
+
441
+ report = assemble_flashback(
442
+ config_path=args.config,
443
+ log_path=args.log,
444
+ hypotheses_path=args.hypotheses,
445
+ annotations_path=args.annotations,
446
+ last_n=last_n,
447
+ days=days,
448
+ )
449
+
450
+ if "error" not in report:
451
+ filepath = save_flashback_report(report)
452
+ print(f"Saved to {filepath}", file=sys.stderr)
453
+
454
+ if args.json:
455
+ print(json.dumps(report, indent=2, default=str))
456
+ else:
457
+ print(format_flashback_report(report))
458
+
459
+
460
+ if __name__ == "__main__":
461
+ main()