claude-turing 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/.claude-plugin/plugin.json +34 -0
  2. package/LICENSE +21 -0
  3. package/README.md +457 -0
  4. package/agents/ml-evaluator.md +43 -0
  5. package/agents/ml-researcher.md +74 -0
  6. package/bin/cli.js +46 -0
  7. package/bin/turing-init.sh +57 -0
  8. package/commands/brief.md +83 -0
  9. package/commands/compare.md +24 -0
  10. package/commands/design.md +97 -0
  11. package/commands/init.md +123 -0
  12. package/commands/logbook.md +51 -0
  13. package/commands/mode.md +43 -0
  14. package/commands/poster.md +89 -0
  15. package/commands/preflight.md +75 -0
  16. package/commands/report.md +97 -0
  17. package/commands/rules/loop-protocol.md +91 -0
  18. package/commands/status.md +24 -0
  19. package/commands/suggest.md +95 -0
  20. package/commands/sweep.md +45 -0
  21. package/commands/train.md +66 -0
  22. package/commands/try.md +63 -0
  23. package/commands/turing.md +54 -0
  24. package/commands/validate.md +34 -0
  25. package/config/defaults.yaml +45 -0
  26. package/config/experiment_archetypes.yaml +127 -0
  27. package/config/lifecycle.toml +31 -0
  28. package/config/novelty_aliases.yaml +107 -0
  29. package/config/relationships.toml +125 -0
  30. package/config/state.toml +24 -0
  31. package/config/task_taxonomy.yaml +110 -0
  32. package/config/taxonomy.toml +37 -0
  33. package/package.json +54 -0
  34. package/src/claude-md.js +55 -0
  35. package/src/install.js +107 -0
  36. package/src/paths.js +20 -0
  37. package/src/postinstall.js +22 -0
  38. package/src/verify.js +109 -0
  39. package/templates/MEMORY.md +36 -0
  40. package/templates/README.md +93 -0
  41. package/templates/__pycache__/evaluate.cpython-314.pyc +0 -0
  42. package/templates/__pycache__/prepare.cpython-314.pyc +0 -0
  43. package/templates/config.yaml +48 -0
  44. package/templates/evaluate.py +237 -0
  45. package/templates/features/__init__.py +0 -0
  46. package/templates/features/__pycache__/__init__.cpython-314.pyc +0 -0
  47. package/templates/features/__pycache__/featurizers.cpython-314.pyc +0 -0
  48. package/templates/features/featurizers.py +138 -0
  49. package/templates/prepare.py +171 -0
  50. package/templates/program.md +216 -0
  51. package/templates/pyproject.toml +8 -0
  52. package/templates/requirements.txt +8 -0
  53. package/templates/scripts/__init__.py +0 -0
  54. package/templates/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
  55. package/templates/scripts/__pycache__/check_convergence.cpython-314.pyc +0 -0
  56. package/templates/scripts/__pycache__/classify_task.cpython-314.pyc +0 -0
  57. package/templates/scripts/__pycache__/critique_hypothesis.cpython-314.pyc +0 -0
  58. package/templates/scripts/__pycache__/experiment_index.cpython-314.pyc +0 -0
  59. package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
  60. package/templates/scripts/__pycache__/generate_logbook.cpython-314.pyc +0 -0
  61. package/templates/scripts/__pycache__/log_experiment.cpython-314.pyc +0 -0
  62. package/templates/scripts/__pycache__/manage_hypotheses.cpython-314.pyc +0 -0
  63. package/templates/scripts/__pycache__/novelty_guard.cpython-314.pyc +0 -0
  64. package/templates/scripts/__pycache__/parse_metrics.cpython-314.pyc +0 -0
  65. package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  66. package/templates/scripts/__pycache__/show_experiment_tree.cpython-314.pyc +0 -0
  67. package/templates/scripts/__pycache__/show_families.cpython-314.pyc +0 -0
  68. package/templates/scripts/__pycache__/statistical_compare.cpython-314.pyc +0 -0
  69. package/templates/scripts/__pycache__/suggest_next.cpython-314.pyc +0 -0
  70. package/templates/scripts/__pycache__/sweep.cpython-314.pyc +0 -0
  71. package/templates/scripts/__pycache__/synthesize_decision.cpython-314.pyc +0 -0
  72. package/templates/scripts/__pycache__/turing_io.cpython-314.pyc +0 -0
  73. package/templates/scripts/__pycache__/update_state.cpython-314.pyc +0 -0
  74. package/templates/scripts/__pycache__/verify_placeholders.cpython-314.pyc +0 -0
  75. package/templates/scripts/check_convergence.py +230 -0
  76. package/templates/scripts/compare_runs.py +124 -0
  77. package/templates/scripts/critique_hypothesis.py +350 -0
  78. package/templates/scripts/experiment_index.py +288 -0
  79. package/templates/scripts/generate_brief.py +389 -0
  80. package/templates/scripts/generate_logbook.py +423 -0
  81. package/templates/scripts/log_experiment.py +243 -0
  82. package/templates/scripts/manage_hypotheses.py +543 -0
  83. package/templates/scripts/novelty_guard.py +343 -0
  84. package/templates/scripts/parse_metrics.py +139 -0
  85. package/templates/scripts/post-train-hook.sh +74 -0
  86. package/templates/scripts/preflight.py +549 -0
  87. package/templates/scripts/scaffold.py +409 -0
  88. package/templates/scripts/show_environment.py +92 -0
  89. package/templates/scripts/show_experiment_tree.py +144 -0
  90. package/templates/scripts/show_families.py +133 -0
  91. package/templates/scripts/show_metrics.py +157 -0
  92. package/templates/scripts/statistical_compare.py +259 -0
  93. package/templates/scripts/stop-hook.sh +34 -0
  94. package/templates/scripts/suggest_next.py +301 -0
  95. package/templates/scripts/sweep.py +276 -0
  96. package/templates/scripts/synthesize_decision.py +300 -0
  97. package/templates/scripts/turing_io.py +76 -0
  98. package/templates/scripts/update_state.py +296 -0
  99. package/templates/scripts/validate_stability.py +167 -0
  100. package/templates/scripts/verify_placeholders.py +119 -0
  101. package/templates/sweep_config.yaml +14 -0
  102. package/templates/tests/__init__.py +0 -0
  103. package/templates/tests/conftest.py +91 -0
  104. package/templates/train.py +240 -0
@@ -0,0 +1,409 @@
1
+ #!/usr/bin/env python3
2
+ """Unified project scaffolding for the autoresearch pipeline.
3
+
4
+ Single implementation of project scaffolding used by both:
5
+ - /turing:init (Claude Code) — calls with --interactive or pre-filled args
6
+ - bin/turing-init.sh (CLI) — calls with explicit --project-name etc.
7
+
8
+ Eliminates the dual-path divergence where init.md and turing-init.sh
9
+ had different capabilities (substitution, hooks, venv, memory).
10
+
11
+ Usage:
12
+ python scripts/scaffold.py --project-name sentiment --target-metric accuracy \\
13
+ --task-description "Predict sentiment" --ml-dir ml/sentiment \\
14
+ --data-source data/reviews.csv --metric-direction higher
15
+
16
+ python scripts/scaffold.py --interactive # Prompt for each value
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import argparse
22
+ import json
23
+ import os
24
+ import re
25
+ import shutil
26
+ import subprocess
27
+ import sys
28
+ from pathlib import Path
29
+
30
+ PLACEHOLDER_MAP = {
31
+ "PROJECT_NAME": "project_name",
32
+ "TARGET_METRIC": "target_metric",
33
+ "TASK_DESCRIPTION": "task_description",
34
+ "ML_DIR": "ml_dir",
35
+ "DATA_SOURCE": "data_source",
36
+ "METRIC_DIRECTION": "metric_direction",
37
+ }
38
+
39
+ # Files to copy from templates/ to the ML directory
40
+ TEMPLATE_FILES = [
41
+ "prepare.py",
42
+ "evaluate.py",
43
+ "train.py",
44
+ "config.yaml",
45
+ "sweep_config.yaml",
46
+ "program.md",
47
+ "README.md",
48
+ "MEMORY.md",
49
+ "requirements.txt",
50
+ "pyproject.toml",
51
+ ]
52
+
53
+ TEMPLATE_DIRS = {
54
+ "features": ["__init__.py", "featurizers.py"],
55
+ "scripts": [
56
+ "__init__.py",
57
+ "log_experiment.py",
58
+ "show_metrics.py",
59
+ "compare_runs.py",
60
+ "sweep.py",
61
+ "post-train-hook.sh",
62
+ "stop-hook.sh",
63
+ "check_convergence.py",
64
+ "verify_placeholders.py",
65
+ "manage_hypotheses.py",
66
+ "generate_brief.py",
67
+ "show_experiment_tree.py",
68
+ "statistical_compare.py",
69
+ "suggest_next.py",
70
+ "update_state.py",
71
+ "parse_metrics.py",
72
+ "scaffold.py",
73
+ "novelty_guard.py",
74
+ "synthesize_decision.py",
75
+ "show_families.py",
76
+ "critique_hypothesis.py",
77
+ "experiment_index.py",
78
+ "generate_logbook.py",
79
+ "validate_stability.py",
80
+ "show_environment.py",
81
+ "turing_io.py",
82
+ "preflight.py",
83
+ ],
84
+ "tests": ["__init__.py", "conftest.py"],
85
+ }
86
+
87
+ DIRECTORIES_TO_CREATE = [
88
+ "data/splits",
89
+ "experiments",
90
+ "models/best",
91
+ "models/archive",
92
+ ]
93
+
94
+ SHELL_SCRIPTS = [
95
+ "scripts/post-train-hook.sh",
96
+ "scripts/stop-hook.sh",
97
+ ]
98
+
99
+
100
+ def find_templates_dir() -> Path | None:
101
+ """Locate the templates directory relative to this script or plugin root."""
102
+ # When running from a scaffolded project, templates are local
103
+ script_dir = Path(__file__).parent
104
+
105
+ # Check: are we inside the plugin's templates/scripts/ ?
106
+ candidate = script_dir.parent # templates/
107
+ if (candidate / "prepare.py").exists():
108
+ return candidate
109
+
110
+ # Check: plugin root (two levels up from scripts/)
111
+ plugin_root = script_dir.parent.parent
112
+ candidate = plugin_root / "templates"
113
+ if candidate.exists() and (candidate / "prepare.py").exists():
114
+ return candidate
115
+
116
+ # Search common plugin locations
117
+ home = Path.home()
118
+ for pattern in [
119
+ home / ".claude" / "plugins" / "*" / "templates",
120
+ ]:
121
+ for match in sorted(pattern.parent.glob(pattern.name)):
122
+ if (match / "prepare.py").exists():
123
+ return match
124
+
125
+ return None
126
+
127
+
128
+ def replace_placeholders(text: str, values: dict[str, str]) -> str:
129
+ """Replace all {{PLACEHOLDER}} markers in text with values."""
130
+ for placeholder, arg_name in PLACEHOLDER_MAP.items():
131
+ value = values.get(arg_name, "")
132
+ text = text.replace(f"{{{{{placeholder}}}}}", value)
133
+ return text
134
+
135
+
136
+ def scaffold_project(
137
+ templates_dir: Path,
138
+ ml_dir: str,
139
+ values: dict[str, str],
140
+ setup_venv: bool = True,
141
+ setup_hooks: bool = True,
142
+ ) -> dict[str, int]:
143
+ """Scaffold a complete ML project.
144
+
145
+ Args:
146
+ templates_dir: Path to the templates/ directory.
147
+ ml_dir: Target ML directory (relative to cwd).
148
+ values: Dict mapping arg names to values for placeholder substitution.
149
+ setup_venv: Whether to create and populate a Python venv.
150
+ setup_hooks: Whether to configure Claude Code hooks.
151
+
152
+ Returns:
153
+ Dict with counts: files_copied, placeholders_replaced, dirs_created.
154
+ """
155
+ target = Path(ml_dir)
156
+ target.mkdir(parents=True, exist_ok=True)
157
+
158
+ stats = {"files_copied": 0, "placeholders_replaced": 0, "dirs_created": 0}
159
+
160
+ # Copy and substitute template files
161
+ for filename in TEMPLATE_FILES:
162
+ src = templates_dir / filename
163
+ if not src.exists():
164
+ print(f" Warning: template {filename} not found, skipping", file=sys.stderr)
165
+ continue
166
+ content = src.read_text(encoding="utf-8")
167
+ content = replace_placeholders(content, values)
168
+ (target / filename).write_text(content, encoding="utf-8")
169
+ stats["files_copied"] += 1
170
+
171
+ # Copy and substitute template directories
172
+ for dirname, files in TEMPLATE_DIRS.items():
173
+ dir_target = target / dirname
174
+ dir_target.mkdir(parents=True, exist_ok=True)
175
+ for filename in files:
176
+ src = templates_dir / dirname / filename
177
+ if not src.exists():
178
+ continue
179
+ content = src.read_text(encoding="utf-8")
180
+ content = replace_placeholders(content, values)
181
+ (dir_target / filename).write_text(content, encoding="utf-8")
182
+ stats["files_copied"] += 1
183
+
184
+ # Create directories
185
+ for d in DIRECTORIES_TO_CREATE:
186
+ (target / d).mkdir(parents=True, exist_ok=True)
187
+ stats["dirs_created"] += 1
188
+
189
+ # Make shell scripts executable
190
+ for script in SHELL_SCRIPTS:
191
+ script_path = target / script
192
+ if script_path.exists():
193
+ script_path.chmod(script_path.stat().st_mode | 0o111)
194
+
195
+ # Count placeholders replaced
196
+ placeholder_re = re.compile(r"\{\{[A-Z_]+\}\}")
197
+ for path in target.rglob("*"):
198
+ if path.is_file() and path.suffix in (".py", ".yaml", ".yml", ".md", ".sh", ".txt", ".toml"):
199
+ try:
200
+ text = path.read_text(encoding="utf-8")
201
+ found = placeholder_re.findall(text)
202
+ # If we find remaining placeholders, they were NOT replaced
203
+ # (this means the template had them but our values didn't cover them)
204
+ except UnicodeDecodeError:
205
+ continue
206
+
207
+ # Setup agent memory
208
+ memory_dir = Path(".claude") / "agent-memory" / "ml-researcher"
209
+ memory_dir.mkdir(parents=True, exist_ok=True)
210
+ memory_src = templates_dir / "MEMORY.md"
211
+ if memory_src.exists():
212
+ content = memory_src.read_text(encoding="utf-8")
213
+ content = replace_placeholders(content, values)
214
+ (memory_dir / "MEMORY.md").write_text(content, encoding="utf-8")
215
+
216
+ # Setup hooks
217
+ if setup_hooks:
218
+ _setup_hooks(ml_dir)
219
+
220
+ # Setup venv
221
+ if setup_venv:
222
+ _setup_venv(target)
223
+
224
+ return stats
225
+
226
+
227
+ def _setup_hooks(ml_dir: str) -> None:
228
+ """Configure Claude Code hooks in .claude/settings.local.json."""
229
+ settings_path = Path(".claude") / "settings.local.json"
230
+ settings_path.parent.mkdir(parents=True, exist_ok=True)
231
+
232
+ settings = {}
233
+ if settings_path.exists():
234
+ try:
235
+ settings = json.loads(settings_path.read_text())
236
+ except (json.JSONDecodeError, FileNotFoundError):
237
+ settings = {}
238
+
239
+ hooks = settings.get("hooks", {})
240
+
241
+ # PostToolUse hook for auto-logging
242
+ post_hooks = hooks.get("PostToolUse", [])
243
+ post_hook_cmd = f"bash {ml_dir}/scripts/post-train-hook.sh"
244
+ if not any(post_hook_cmd in str(h) for h in post_hooks):
245
+ post_hooks.append({
246
+ "matcher": "Bash",
247
+ "hooks": [{"type": "command", "command": post_hook_cmd}],
248
+ })
249
+ hooks["PostToolUse"] = post_hooks
250
+
251
+ # Stop hook for convergence
252
+ stop_hooks = hooks.get("Stop", [])
253
+ stop_hook_cmd = f"bash {ml_dir}/scripts/stop-hook.sh"
254
+ if not any(stop_hook_cmd in str(h) for h in stop_hooks):
255
+ stop_hooks.append({
256
+ "type": "command",
257
+ "command": stop_hook_cmd,
258
+ })
259
+ hooks["Stop"] = stop_hooks
260
+
261
+ settings["hooks"] = hooks
262
+ settings_path.write_text(json.dumps(settings, indent=2))
263
+
264
+
265
+ def _setup_venv(target: Path) -> None:
266
+ """Create Python venv and install requirements."""
267
+ venv_path = target / ".venv"
268
+ if venv_path.exists():
269
+ print(" Venv already exists, skipping creation.", file=sys.stderr)
270
+ return
271
+
272
+ print(" Creating virtual environment...", file=sys.stderr)
273
+ try:
274
+ subprocess.run(
275
+ [sys.executable, "-m", "venv", str(venv_path)],
276
+ check=True, capture_output=True,
277
+ )
278
+ pip = str(venv_path / "bin" / "pip")
279
+ req = str(target / "requirements.txt")
280
+ if Path(req).exists():
281
+ print(" Installing requirements...", file=sys.stderr)
282
+ subprocess.run(
283
+ [pip, "install", "-r", req],
284
+ check=True, capture_output=True,
285
+ )
286
+ except subprocess.CalledProcessError as e:
287
+ print(f" Warning: venv setup failed: {e}", file=sys.stderr)
288
+
289
+
290
+ def verify_placeholders(ml_dir: str) -> list[tuple[str, int, str]]:
291
+ """Check for unreplaced placeholders in scaffolded files.
292
+
293
+ Returns list of (filepath, line_number, placeholder) tuples.
294
+ """
295
+ target = Path(ml_dir)
296
+ placeholder_re = re.compile(r"\{\{([A-Z_]+)\}\}")
297
+ known = set(PLACEHOLDER_MAP.keys())
298
+ findings = []
299
+
300
+ for path in sorted(target.rglob("*")):
301
+ if not path.is_file():
302
+ continue
303
+ if path.suffix not in (".py", ".yaml", ".yml", ".md", ".sh", ".txt", ".toml"):
304
+ continue
305
+ if ".venv" in path.parts:
306
+ continue
307
+ try:
308
+ for i, line in enumerate(path.read_text().splitlines(), 1):
309
+ for match in placeholder_re.finditer(line):
310
+ name = match.group(1)
311
+ if name in known:
312
+ findings.append((str(path.relative_to(target)), i, name))
313
+ except UnicodeDecodeError:
314
+ continue
315
+
316
+ return findings
317
+
318
+
319
+ def interactive_prompt() -> dict[str, str]:
320
+ """Prompt the user for all scaffold values."""
321
+ print("\nTuring ML Research Harness — Project Setup\n")
322
+
323
+ values = {}
324
+ values["project_name"] = input("Project name (e.g., sentiment, churn): ").strip()
325
+ values["target_metric"] = input("Primary metric (accuracy, f1, mae, mse, auc): ").strip()
326
+ values["metric_direction"] = input("Is lower better or higher better? (lower/higher): ").strip()
327
+ values["task_description"] = input("Task description (e.g., Predict customer churn): ").strip()
328
+ values["ml_dir"] = input(f"ML directory (e.g., ml/{values['project_name']}): ").strip()
329
+ values["data_source"] = input("Data source path (e.g., data/training.csv): ").strip()
330
+
331
+ return values
332
+
333
+
334
+ def main() -> None:
335
+ """CLI entry point."""
336
+ parser = argparse.ArgumentParser(description="Scaffold an ML project")
337
+ parser.add_argument("--interactive", action="store_true", help="Prompt for values")
338
+ parser.add_argument("--project-name", default=None)
339
+ parser.add_argument("--target-metric", default=None)
340
+ parser.add_argument("--metric-direction", default=None)
341
+ parser.add_argument("--task-description", default=None)
342
+ parser.add_argument("--ml-dir", default=None)
343
+ parser.add_argument("--data-source", default=None)
344
+ parser.add_argument("--no-venv", action="store_true", help="Skip venv creation")
345
+ parser.add_argument("--no-hooks", action="store_true", help="Skip hook configuration")
346
+ parser.add_argument("--templates-dir", default=None, help="Override templates directory")
347
+ args = parser.parse_args()
348
+
349
+ # Get values
350
+ if args.interactive:
351
+ values = interactive_prompt()
352
+ else:
353
+ values = {
354
+ "project_name": args.project_name or "my-project",
355
+ "target_metric": args.target_metric or "accuracy",
356
+ "metric_direction": args.metric_direction or "higher",
357
+ "task_description": args.task_description or "ML task",
358
+ "ml_dir": args.ml_dir or ".",
359
+ "data_source": args.data_source or "data/training.csv",
360
+ }
361
+
362
+ # Find templates
363
+ if args.templates_dir:
364
+ templates_dir = Path(args.templates_dir)
365
+ else:
366
+ templates_dir = find_templates_dir()
367
+
368
+ if templates_dir is None or not templates_dir.exists():
369
+ print("Error: Cannot find templates directory.", file=sys.stderr)
370
+ print("Use --templates-dir to specify the path.", file=sys.stderr)
371
+ sys.exit(1)
372
+
373
+ ml_dir = values["ml_dir"]
374
+
375
+ print(f"\nScaffolding project: {values['project_name']}")
376
+ print(f"Directory: {ml_dir}")
377
+ print(f"Metric: {values['target_metric']} ({values['metric_direction']} is better)")
378
+ print()
379
+
380
+ # Scaffold
381
+ stats = scaffold_project(
382
+ templates_dir=templates_dir,
383
+ ml_dir=ml_dir,
384
+ values=values,
385
+ setup_venv=not args.no_venv,
386
+ setup_hooks=not args.no_hooks,
387
+ )
388
+
389
+ print(f"\nScaffolded {stats['files_copied']} files, {stats['dirs_created']} directories.")
390
+
391
+ # Verify
392
+ findings = verify_placeholders(ml_dir)
393
+ if findings:
394
+ print(f"\nWarning: {len(findings)} unreplaced placeholder(s):", file=sys.stderr)
395
+ for filepath, line_num, placeholder in findings:
396
+ print(f" {filepath}:{line_num} — {{{{{placeholder}}}}}", file=sys.stderr)
397
+ sys.exit(1)
398
+ else:
399
+ print("All placeholders replaced successfully.")
400
+
401
+ print(f"\nNext steps:")
402
+ print(f" 1. Add training data to {values['data_source']}")
403
+ print(f" 2. cd {ml_dir} && source .venv/bin/activate")
404
+ print(f" 3. python prepare.py")
405
+ print(f" 4. /turing:train (or: python train.py > run.log 2>&1)")
406
+
407
+
408
+ if __name__ == "__main__":
409
+ main()
@@ -0,0 +1,92 @@
1
+ """Display the runtime environment from train_metadata.json.
2
+
3
+ Shows python version, package versions, seeds, hardware, and config hash.
4
+ Useful for debugging reproducibility issues or comparing environments
5
+ across experiments.
6
+
7
+ Usage:
8
+ python scripts/show_environment.py # Current experiment
9
+ python scripts/show_environment.py --file path/to/metadata.json
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import argparse
15
+ import json
16
+ import sys
17
+ from pathlib import Path
18
+
19
+ DEFAULT_METADATA_PATH = "train_metadata.json"
20
+
21
+
22
+ def format_environment(metadata: dict) -> str:
23
+ """Format environment info for display."""
24
+ env = metadata.get("environment", {})
25
+ if not env:
26
+ return "No environment data recorded."
27
+
28
+ lines = ["Environment"]
29
+ lines.append("=" * 50)
30
+
31
+ # Platform
32
+ lines.append(f" Python: {env.get('python_version', '?')}")
33
+ lines.append(f" Platform: {env.get('platform', '?')}")
34
+ lines.append(f" Machine: {env.get('machine', '?')}")
35
+ lines.append(f" OS: {env.get('os', '?')}")
36
+
37
+ # GPU
38
+ gpu = env.get("gpu")
39
+ if gpu:
40
+ lines.append(f" GPU: {gpu.get('name', '?')} (CUDA {gpu.get('cuda_version', '?')})")
41
+ lines.append(f" GPUs: {gpu.get('device_count', '?')}")
42
+ else:
43
+ lines.append(" GPU: none")
44
+
45
+ # Seeds
46
+ seeds = env.get("seeds", {})
47
+ lines.append("")
48
+ lines.append("Seeds")
49
+ lines.append("-" * 50)
50
+ for key, value in sorted(seeds.items()):
51
+ lines.append(f" {key}: {value}")
52
+
53
+ # Packages
54
+ packages = env.get("packages", {})
55
+ if packages:
56
+ lines.append("")
57
+ lines.append("Packages")
58
+ lines.append("-" * 50)
59
+ for pkg, version in sorted(packages.items()):
60
+ lines.append(f" {pkg}: {version}")
61
+
62
+ # Config hash
63
+ config_hash = env.get("config_hash")
64
+ if config_hash:
65
+ lines.append("")
66
+ lines.append(f"Config hash: {config_hash}")
67
+
68
+ return "\n".join(lines)
69
+
70
+
71
+ def main() -> None:
72
+ parser = argparse.ArgumentParser(description="Show experiment environment")
73
+ parser.add_argument(
74
+ "--file",
75
+ default=DEFAULT_METADATA_PATH,
76
+ help=f"Path to metadata file (default: {DEFAULT_METADATA_PATH})",
77
+ )
78
+ args = parser.parse_args()
79
+
80
+ path = Path(args.file)
81
+ if not path.exists():
82
+ print(f"No metadata file at {path}. Run training first.", file=sys.stderr)
83
+ sys.exit(1)
84
+
85
+ with open(path) as f:
86
+ metadata = json.load(f)
87
+
88
+ print(format_environment(metadata))
89
+
90
+
91
+ if __name__ == "__main__":
92
+ main()
@@ -0,0 +1,144 @@
1
+ #!/usr/bin/env python3
2
+ """Experiment dependency tree visualizer.
3
+
4
+ Displays the experiment lineage — which experiments inspired which —
5
+ as a text tree. Makes the agent's reasoning chain visible to the human.
6
+
7
+ Usage:
8
+ python scripts/show_experiment_tree.py [--log experiments/log.jsonl]
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import argparse
14
+ import json
15
+ import sys
16
+ from pathlib import Path
17
+
18
+ from scripts.turing_io import load_experiments
19
+
20
+
21
+ def build_tree(experiments: list[dict]) -> dict[str, list[str]]:
22
+ """Build parent -> children mapping from experiments."""
23
+ children: dict[str, list[str]] = {}
24
+ for exp in experiments:
25
+ eid = exp.get("experiment_id", "")
26
+ parent = exp.get("parent_experiment")
27
+ if parent:
28
+ children.setdefault(parent, []).append(eid)
29
+ # Ensure every experiment appears as a key
30
+ children.setdefault(eid, [])
31
+ return children
32
+
33
+
34
+ def find_roots(experiments: list[dict], children: dict[str, list[str]]) -> list[str]:
35
+ """Find experiments with no parent (tree roots)."""
36
+ all_ids = {e.get("experiment_id", "") for e in experiments}
37
+ child_ids = set()
38
+ for kids in children.values():
39
+ child_ids.update(kids)
40
+
41
+ # Roots are experiments that are not children of anything
42
+ # But also need to handle experiments whose parent doesn't exist in the log
43
+ roots = []
44
+ for exp in experiments:
45
+ eid = exp.get("experiment_id", "")
46
+ parent = exp.get("parent_experiment")
47
+ if parent is None or parent not in all_ids:
48
+ roots.append(eid)
49
+ return roots
50
+
51
+
52
+ def format_tree(
53
+ node: str,
54
+ children: dict[str, list[str]],
55
+ experiments_by_id: dict[str, dict],
56
+ prefix: str = "",
57
+ is_last: bool = True,
58
+ metric_name: str = "accuracy",
59
+ ) -> list[str]:
60
+ """Recursively format a tree node and its children."""
61
+ exp = experiments_by_id.get(node, {})
62
+ status = exp.get("status", "?")
63
+ metric_val = exp.get("metrics", {}).get(metric_name)
64
+ model_type = exp.get("config", {}).get("model_type", "?")
65
+ desc = exp.get("description", "")[:40]
66
+
67
+ status_marker = "kept" if status == "kept" else "discarded" if status == "discarded" else status
68
+ metric_str = f"{metric_name}={metric_val:.4f}" if isinstance(metric_val, (int, float)) else ""
69
+ status_icon = "+" if status == "kept" else "-" if status == "discarded" else "?"
70
+
71
+ connector = "`-- " if is_last else "|-- "
72
+ line = f"{prefix}{connector}[{status_icon}] {node} ({model_type}, {metric_str}) {desc}"
73
+
74
+ lines = [line]
75
+
76
+ child_prefix = prefix + (" " if is_last else "| ")
77
+ kids = children.get(node, [])
78
+ for i, child in enumerate(kids):
79
+ child_lines = format_tree(
80
+ child, children, experiments_by_id, child_prefix,
81
+ is_last=(i == len(kids) - 1), metric_name=metric_name,
82
+ )
83
+ lines.extend(child_lines)
84
+
85
+ return lines
86
+
87
+
88
+ def show_tree(log_path: str, metric_name: str = "accuracy") -> str:
89
+ """Generate the full experiment tree as text."""
90
+ experiments = load_experiments(log_path)
91
+ if not experiments:
92
+ return "No experiments logged yet."
93
+
94
+ experiments_by_id = {e.get("experiment_id", ""): e for e in experiments}
95
+ children = build_tree(experiments)
96
+ roots = find_roots(experiments, children)
97
+
98
+ if not roots:
99
+ return "No root experiments found."
100
+
101
+ lines = ["Experiment Tree", "=" * 60]
102
+ for i, root in enumerate(roots):
103
+ root_lines = format_tree(
104
+ root, children, experiments_by_id,
105
+ prefix="", is_last=(i == len(roots) - 1),
106
+ metric_name=metric_name,
107
+ )
108
+ lines.extend(root_lines)
109
+
110
+ # Summary
111
+ total = len(experiments)
112
+ kept = sum(1 for e in experiments if e.get("status") == "kept")
113
+ max_depth = _max_depth(roots, children)
114
+ lines.extend([
115
+ "",
116
+ f"Total: {total} experiments, {kept} kept, depth={max_depth}",
117
+ f"[+] = kept, [-] = discarded, [?] = other",
118
+ ])
119
+
120
+ return "\n".join(lines)
121
+
122
+
123
+ def _max_depth(roots: list[str], children: dict[str, list[str]], depth: int = 1) -> int:
124
+ """Compute maximum tree depth."""
125
+ max_d = depth
126
+ for root in roots:
127
+ kids = children.get(root, [])
128
+ if kids:
129
+ max_d = max(max_d, _max_depth(kids, children, depth + 1))
130
+ return max_d
131
+
132
+
133
+ def main() -> None:
134
+ """CLI entry point."""
135
+ parser = argparse.ArgumentParser(description="Show experiment dependency tree")
136
+ parser.add_argument("--log", default="experiments/log.jsonl")
137
+ parser.add_argument("--metric", default="accuracy", help="Metric to display")
138
+ args = parser.parse_args()
139
+
140
+ print(show_tree(args.log, args.metric))
141
+
142
+
143
+ if __name__ == "__main__":
144
+ main()