claude-turing 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +34 -0
- package/LICENSE +21 -0
- package/README.md +457 -0
- package/agents/ml-evaluator.md +43 -0
- package/agents/ml-researcher.md +74 -0
- package/bin/cli.js +46 -0
- package/bin/turing-init.sh +57 -0
- package/commands/brief.md +83 -0
- package/commands/compare.md +24 -0
- package/commands/design.md +97 -0
- package/commands/init.md +123 -0
- package/commands/logbook.md +51 -0
- package/commands/mode.md +43 -0
- package/commands/poster.md +89 -0
- package/commands/preflight.md +75 -0
- package/commands/report.md +97 -0
- package/commands/rules/loop-protocol.md +91 -0
- package/commands/status.md +24 -0
- package/commands/suggest.md +95 -0
- package/commands/sweep.md +45 -0
- package/commands/train.md +66 -0
- package/commands/try.md +63 -0
- package/commands/turing.md +54 -0
- package/commands/validate.md +34 -0
- package/config/defaults.yaml +45 -0
- package/config/experiment_archetypes.yaml +127 -0
- package/config/lifecycle.toml +31 -0
- package/config/novelty_aliases.yaml +107 -0
- package/config/relationships.toml +125 -0
- package/config/state.toml +24 -0
- package/config/task_taxonomy.yaml +110 -0
- package/config/taxonomy.toml +37 -0
- package/package.json +54 -0
- package/src/claude-md.js +55 -0
- package/src/install.js +107 -0
- package/src/paths.js +20 -0
- package/src/postinstall.js +22 -0
- package/src/verify.js +109 -0
- package/templates/MEMORY.md +36 -0
- package/templates/README.md +93 -0
- package/templates/__pycache__/evaluate.cpython-314.pyc +0 -0
- package/templates/__pycache__/prepare.cpython-314.pyc +0 -0
- package/templates/config.yaml +48 -0
- package/templates/evaluate.py +237 -0
- package/templates/features/__init__.py +0 -0
- package/templates/features/__pycache__/__init__.cpython-314.pyc +0 -0
- package/templates/features/__pycache__/featurizers.cpython-314.pyc +0 -0
- package/templates/features/featurizers.py +138 -0
- package/templates/prepare.py +171 -0
- package/templates/program.md +216 -0
- package/templates/pyproject.toml +8 -0
- package/templates/requirements.txt +8 -0
- package/templates/scripts/__init__.py +0 -0
- package/templates/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/check_convergence.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/classify_task.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/critique_hypothesis.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_index.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/generate_logbook.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/log_experiment.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/manage_hypotheses.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/novelty_guard.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/parse_metrics.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/show_experiment_tree.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/show_families.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/statistical_compare.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/suggest_next.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/sweep.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/synthesize_decision.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/turing_io.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/update_state.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/verify_placeholders.cpython-314.pyc +0 -0
- package/templates/scripts/check_convergence.py +230 -0
- package/templates/scripts/compare_runs.py +124 -0
- package/templates/scripts/critique_hypothesis.py +350 -0
- package/templates/scripts/experiment_index.py +288 -0
- package/templates/scripts/generate_brief.py +389 -0
- package/templates/scripts/generate_logbook.py +423 -0
- package/templates/scripts/log_experiment.py +243 -0
- package/templates/scripts/manage_hypotheses.py +543 -0
- package/templates/scripts/novelty_guard.py +343 -0
- package/templates/scripts/parse_metrics.py +139 -0
- package/templates/scripts/post-train-hook.sh +74 -0
- package/templates/scripts/preflight.py +549 -0
- package/templates/scripts/scaffold.py +409 -0
- package/templates/scripts/show_environment.py +92 -0
- package/templates/scripts/show_experiment_tree.py +144 -0
- package/templates/scripts/show_families.py +133 -0
- package/templates/scripts/show_metrics.py +157 -0
- package/templates/scripts/statistical_compare.py +259 -0
- package/templates/scripts/stop-hook.sh +34 -0
- package/templates/scripts/suggest_next.py +301 -0
- package/templates/scripts/sweep.py +276 -0
- package/templates/scripts/synthesize_decision.py +300 -0
- package/templates/scripts/turing_io.py +76 -0
- package/templates/scripts/update_state.py +296 -0
- package/templates/scripts/validate_stability.py +167 -0
- package/templates/scripts/verify_placeholders.py +119 -0
- package/templates/sweep_config.yaml +14 -0
- package/templates/tests/__init__.py +0 -0
- package/templates/tests/conftest.py +91 -0
- package/templates/train.py +240 -0
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Unified project scaffolding for the autoresearch pipeline.
|
|
3
|
+
|
|
4
|
+
Single implementation of project scaffolding used by both:
|
|
5
|
+
- /turing:init (Claude Code) — calls with --interactive or pre-filled args
|
|
6
|
+
- bin/turing-init.sh (CLI) — calls with explicit --project-name etc.
|
|
7
|
+
|
|
8
|
+
Eliminates the dual-path divergence where init.md and turing-init.sh
|
|
9
|
+
had different capabilities (substitution, hooks, venv, memory).
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
python scripts/scaffold.py --project-name sentiment --target-metric accuracy \\
|
|
13
|
+
--task-description "Predict sentiment" --ml-dir ml/sentiment \\
|
|
14
|
+
--data-source data/reviews.csv --metric-direction higher
|
|
15
|
+
|
|
16
|
+
python scripts/scaffold.py --interactive # Prompt for each value
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import json
|
|
23
|
+
import os
|
|
24
|
+
import re
|
|
25
|
+
import shutil
|
|
26
|
+
import subprocess
|
|
27
|
+
import sys
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
|
|
30
|
+
PLACEHOLDER_MAP = {
|
|
31
|
+
"PROJECT_NAME": "project_name",
|
|
32
|
+
"TARGET_METRIC": "target_metric",
|
|
33
|
+
"TASK_DESCRIPTION": "task_description",
|
|
34
|
+
"ML_DIR": "ml_dir",
|
|
35
|
+
"DATA_SOURCE": "data_source",
|
|
36
|
+
"METRIC_DIRECTION": "metric_direction",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# Files to copy from templates/ to the ML directory
|
|
40
|
+
TEMPLATE_FILES = [
|
|
41
|
+
"prepare.py",
|
|
42
|
+
"evaluate.py",
|
|
43
|
+
"train.py",
|
|
44
|
+
"config.yaml",
|
|
45
|
+
"sweep_config.yaml",
|
|
46
|
+
"program.md",
|
|
47
|
+
"README.md",
|
|
48
|
+
"MEMORY.md",
|
|
49
|
+
"requirements.txt",
|
|
50
|
+
"pyproject.toml",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
TEMPLATE_DIRS = {
|
|
54
|
+
"features": ["__init__.py", "featurizers.py"],
|
|
55
|
+
"scripts": [
|
|
56
|
+
"__init__.py",
|
|
57
|
+
"log_experiment.py",
|
|
58
|
+
"show_metrics.py",
|
|
59
|
+
"compare_runs.py",
|
|
60
|
+
"sweep.py",
|
|
61
|
+
"post-train-hook.sh",
|
|
62
|
+
"stop-hook.sh",
|
|
63
|
+
"check_convergence.py",
|
|
64
|
+
"verify_placeholders.py",
|
|
65
|
+
"manage_hypotheses.py",
|
|
66
|
+
"generate_brief.py",
|
|
67
|
+
"show_experiment_tree.py",
|
|
68
|
+
"statistical_compare.py",
|
|
69
|
+
"suggest_next.py",
|
|
70
|
+
"update_state.py",
|
|
71
|
+
"parse_metrics.py",
|
|
72
|
+
"scaffold.py",
|
|
73
|
+
"novelty_guard.py",
|
|
74
|
+
"synthesize_decision.py",
|
|
75
|
+
"show_families.py",
|
|
76
|
+
"critique_hypothesis.py",
|
|
77
|
+
"experiment_index.py",
|
|
78
|
+
"generate_logbook.py",
|
|
79
|
+
"validate_stability.py",
|
|
80
|
+
"show_environment.py",
|
|
81
|
+
"turing_io.py",
|
|
82
|
+
"preflight.py",
|
|
83
|
+
],
|
|
84
|
+
"tests": ["__init__.py", "conftest.py"],
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
DIRECTORIES_TO_CREATE = [
|
|
88
|
+
"data/splits",
|
|
89
|
+
"experiments",
|
|
90
|
+
"models/best",
|
|
91
|
+
"models/archive",
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
SHELL_SCRIPTS = [
|
|
95
|
+
"scripts/post-train-hook.sh",
|
|
96
|
+
"scripts/stop-hook.sh",
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def find_templates_dir() -> Path | None:
|
|
101
|
+
"""Locate the templates directory relative to this script or plugin root."""
|
|
102
|
+
# When running from a scaffolded project, templates are local
|
|
103
|
+
script_dir = Path(__file__).parent
|
|
104
|
+
|
|
105
|
+
# Check: are we inside the plugin's templates/scripts/ ?
|
|
106
|
+
candidate = script_dir.parent # templates/
|
|
107
|
+
if (candidate / "prepare.py").exists():
|
|
108
|
+
return candidate
|
|
109
|
+
|
|
110
|
+
# Check: plugin root (two levels up from scripts/)
|
|
111
|
+
plugin_root = script_dir.parent.parent
|
|
112
|
+
candidate = plugin_root / "templates"
|
|
113
|
+
if candidate.exists() and (candidate / "prepare.py").exists():
|
|
114
|
+
return candidate
|
|
115
|
+
|
|
116
|
+
# Search common plugin locations
|
|
117
|
+
home = Path.home()
|
|
118
|
+
for pattern in [
|
|
119
|
+
home / ".claude" / "plugins" / "*" / "templates",
|
|
120
|
+
]:
|
|
121
|
+
for match in sorted(pattern.parent.glob(pattern.name)):
|
|
122
|
+
if (match / "prepare.py").exists():
|
|
123
|
+
return match
|
|
124
|
+
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def replace_placeholders(text: str, values: dict[str, str]) -> str:
|
|
129
|
+
"""Replace all {{PLACEHOLDER}} markers in text with values."""
|
|
130
|
+
for placeholder, arg_name in PLACEHOLDER_MAP.items():
|
|
131
|
+
value = values.get(arg_name, "")
|
|
132
|
+
text = text.replace(f"{{{{{placeholder}}}}}", value)
|
|
133
|
+
return text
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def scaffold_project(
|
|
137
|
+
templates_dir: Path,
|
|
138
|
+
ml_dir: str,
|
|
139
|
+
values: dict[str, str],
|
|
140
|
+
setup_venv: bool = True,
|
|
141
|
+
setup_hooks: bool = True,
|
|
142
|
+
) -> dict[str, int]:
|
|
143
|
+
"""Scaffold a complete ML project.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
templates_dir: Path to the templates/ directory.
|
|
147
|
+
ml_dir: Target ML directory (relative to cwd).
|
|
148
|
+
values: Dict mapping arg names to values for placeholder substitution.
|
|
149
|
+
setup_venv: Whether to create and populate a Python venv.
|
|
150
|
+
setup_hooks: Whether to configure Claude Code hooks.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Dict with counts: files_copied, placeholders_replaced, dirs_created.
|
|
154
|
+
"""
|
|
155
|
+
target = Path(ml_dir)
|
|
156
|
+
target.mkdir(parents=True, exist_ok=True)
|
|
157
|
+
|
|
158
|
+
stats = {"files_copied": 0, "placeholders_replaced": 0, "dirs_created": 0}
|
|
159
|
+
|
|
160
|
+
# Copy and substitute template files
|
|
161
|
+
for filename in TEMPLATE_FILES:
|
|
162
|
+
src = templates_dir / filename
|
|
163
|
+
if not src.exists():
|
|
164
|
+
print(f" Warning: template {filename} not found, skipping", file=sys.stderr)
|
|
165
|
+
continue
|
|
166
|
+
content = src.read_text(encoding="utf-8")
|
|
167
|
+
content = replace_placeholders(content, values)
|
|
168
|
+
(target / filename).write_text(content, encoding="utf-8")
|
|
169
|
+
stats["files_copied"] += 1
|
|
170
|
+
|
|
171
|
+
# Copy and substitute template directories
|
|
172
|
+
for dirname, files in TEMPLATE_DIRS.items():
|
|
173
|
+
dir_target = target / dirname
|
|
174
|
+
dir_target.mkdir(parents=True, exist_ok=True)
|
|
175
|
+
for filename in files:
|
|
176
|
+
src = templates_dir / dirname / filename
|
|
177
|
+
if not src.exists():
|
|
178
|
+
continue
|
|
179
|
+
content = src.read_text(encoding="utf-8")
|
|
180
|
+
content = replace_placeholders(content, values)
|
|
181
|
+
(dir_target / filename).write_text(content, encoding="utf-8")
|
|
182
|
+
stats["files_copied"] += 1
|
|
183
|
+
|
|
184
|
+
# Create directories
|
|
185
|
+
for d in DIRECTORIES_TO_CREATE:
|
|
186
|
+
(target / d).mkdir(parents=True, exist_ok=True)
|
|
187
|
+
stats["dirs_created"] += 1
|
|
188
|
+
|
|
189
|
+
# Make shell scripts executable
|
|
190
|
+
for script in SHELL_SCRIPTS:
|
|
191
|
+
script_path = target / script
|
|
192
|
+
if script_path.exists():
|
|
193
|
+
script_path.chmod(script_path.stat().st_mode | 0o111)
|
|
194
|
+
|
|
195
|
+
# Count placeholders replaced
|
|
196
|
+
placeholder_re = re.compile(r"\{\{[A-Z_]+\}\}")
|
|
197
|
+
for path in target.rglob("*"):
|
|
198
|
+
if path.is_file() and path.suffix in (".py", ".yaml", ".yml", ".md", ".sh", ".txt", ".toml"):
|
|
199
|
+
try:
|
|
200
|
+
text = path.read_text(encoding="utf-8")
|
|
201
|
+
found = placeholder_re.findall(text)
|
|
202
|
+
# If we find remaining placeholders, they were NOT replaced
|
|
203
|
+
# (this means the template had them but our values didn't cover them)
|
|
204
|
+
except UnicodeDecodeError:
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
# Setup agent memory
|
|
208
|
+
memory_dir = Path(".claude") / "agent-memory" / "ml-researcher"
|
|
209
|
+
memory_dir.mkdir(parents=True, exist_ok=True)
|
|
210
|
+
memory_src = templates_dir / "MEMORY.md"
|
|
211
|
+
if memory_src.exists():
|
|
212
|
+
content = memory_src.read_text(encoding="utf-8")
|
|
213
|
+
content = replace_placeholders(content, values)
|
|
214
|
+
(memory_dir / "MEMORY.md").write_text(content, encoding="utf-8")
|
|
215
|
+
|
|
216
|
+
# Setup hooks
|
|
217
|
+
if setup_hooks:
|
|
218
|
+
_setup_hooks(ml_dir)
|
|
219
|
+
|
|
220
|
+
# Setup venv
|
|
221
|
+
if setup_venv:
|
|
222
|
+
_setup_venv(target)
|
|
223
|
+
|
|
224
|
+
return stats
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _setup_hooks(ml_dir: str) -> None:
|
|
228
|
+
"""Configure Claude Code hooks in .claude/settings.local.json."""
|
|
229
|
+
settings_path = Path(".claude") / "settings.local.json"
|
|
230
|
+
settings_path.parent.mkdir(parents=True, exist_ok=True)
|
|
231
|
+
|
|
232
|
+
settings = {}
|
|
233
|
+
if settings_path.exists():
|
|
234
|
+
try:
|
|
235
|
+
settings = json.loads(settings_path.read_text())
|
|
236
|
+
except (json.JSONDecodeError, FileNotFoundError):
|
|
237
|
+
settings = {}
|
|
238
|
+
|
|
239
|
+
hooks = settings.get("hooks", {})
|
|
240
|
+
|
|
241
|
+
# PostToolUse hook for auto-logging
|
|
242
|
+
post_hooks = hooks.get("PostToolUse", [])
|
|
243
|
+
post_hook_cmd = f"bash {ml_dir}/scripts/post-train-hook.sh"
|
|
244
|
+
if not any(post_hook_cmd in str(h) for h in post_hooks):
|
|
245
|
+
post_hooks.append({
|
|
246
|
+
"matcher": "Bash",
|
|
247
|
+
"hooks": [{"type": "command", "command": post_hook_cmd}],
|
|
248
|
+
})
|
|
249
|
+
hooks["PostToolUse"] = post_hooks
|
|
250
|
+
|
|
251
|
+
# Stop hook for convergence
|
|
252
|
+
stop_hooks = hooks.get("Stop", [])
|
|
253
|
+
stop_hook_cmd = f"bash {ml_dir}/scripts/stop-hook.sh"
|
|
254
|
+
if not any(stop_hook_cmd in str(h) for h in stop_hooks):
|
|
255
|
+
stop_hooks.append({
|
|
256
|
+
"type": "command",
|
|
257
|
+
"command": stop_hook_cmd,
|
|
258
|
+
})
|
|
259
|
+
hooks["Stop"] = stop_hooks
|
|
260
|
+
|
|
261
|
+
settings["hooks"] = hooks
|
|
262
|
+
settings_path.write_text(json.dumps(settings, indent=2))
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _setup_venv(target: Path) -> None:
|
|
266
|
+
"""Create Python venv and install requirements."""
|
|
267
|
+
venv_path = target / ".venv"
|
|
268
|
+
if venv_path.exists():
|
|
269
|
+
print(" Venv already exists, skipping creation.", file=sys.stderr)
|
|
270
|
+
return
|
|
271
|
+
|
|
272
|
+
print(" Creating virtual environment...", file=sys.stderr)
|
|
273
|
+
try:
|
|
274
|
+
subprocess.run(
|
|
275
|
+
[sys.executable, "-m", "venv", str(venv_path)],
|
|
276
|
+
check=True, capture_output=True,
|
|
277
|
+
)
|
|
278
|
+
pip = str(venv_path / "bin" / "pip")
|
|
279
|
+
req = str(target / "requirements.txt")
|
|
280
|
+
if Path(req).exists():
|
|
281
|
+
print(" Installing requirements...", file=sys.stderr)
|
|
282
|
+
subprocess.run(
|
|
283
|
+
[pip, "install", "-r", req],
|
|
284
|
+
check=True, capture_output=True,
|
|
285
|
+
)
|
|
286
|
+
except subprocess.CalledProcessError as e:
|
|
287
|
+
print(f" Warning: venv setup failed: {e}", file=sys.stderr)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def verify_placeholders(ml_dir: str) -> list[tuple[str, int, str]]:
|
|
291
|
+
"""Check for unreplaced placeholders in scaffolded files.
|
|
292
|
+
|
|
293
|
+
Returns list of (filepath, line_number, placeholder) tuples.
|
|
294
|
+
"""
|
|
295
|
+
target = Path(ml_dir)
|
|
296
|
+
placeholder_re = re.compile(r"\{\{([A-Z_]+)\}\}")
|
|
297
|
+
known = set(PLACEHOLDER_MAP.keys())
|
|
298
|
+
findings = []
|
|
299
|
+
|
|
300
|
+
for path in sorted(target.rglob("*")):
|
|
301
|
+
if not path.is_file():
|
|
302
|
+
continue
|
|
303
|
+
if path.suffix not in (".py", ".yaml", ".yml", ".md", ".sh", ".txt", ".toml"):
|
|
304
|
+
continue
|
|
305
|
+
if ".venv" in path.parts:
|
|
306
|
+
continue
|
|
307
|
+
try:
|
|
308
|
+
for i, line in enumerate(path.read_text().splitlines(), 1):
|
|
309
|
+
for match in placeholder_re.finditer(line):
|
|
310
|
+
name = match.group(1)
|
|
311
|
+
if name in known:
|
|
312
|
+
findings.append((str(path.relative_to(target)), i, name))
|
|
313
|
+
except UnicodeDecodeError:
|
|
314
|
+
continue
|
|
315
|
+
|
|
316
|
+
return findings
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def interactive_prompt() -> dict[str, str]:
|
|
320
|
+
"""Prompt the user for all scaffold values."""
|
|
321
|
+
print("\nTuring ML Research Harness — Project Setup\n")
|
|
322
|
+
|
|
323
|
+
values = {}
|
|
324
|
+
values["project_name"] = input("Project name (e.g., sentiment, churn): ").strip()
|
|
325
|
+
values["target_metric"] = input("Primary metric (accuracy, f1, mae, mse, auc): ").strip()
|
|
326
|
+
values["metric_direction"] = input("Is lower better or higher better? (lower/higher): ").strip()
|
|
327
|
+
values["task_description"] = input("Task description (e.g., Predict customer churn): ").strip()
|
|
328
|
+
values["ml_dir"] = input(f"ML directory (e.g., ml/{values['project_name']}): ").strip()
|
|
329
|
+
values["data_source"] = input("Data source path (e.g., data/training.csv): ").strip()
|
|
330
|
+
|
|
331
|
+
return values
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def main() -> None:
|
|
335
|
+
"""CLI entry point."""
|
|
336
|
+
parser = argparse.ArgumentParser(description="Scaffold an ML project")
|
|
337
|
+
parser.add_argument("--interactive", action="store_true", help="Prompt for values")
|
|
338
|
+
parser.add_argument("--project-name", default=None)
|
|
339
|
+
parser.add_argument("--target-metric", default=None)
|
|
340
|
+
parser.add_argument("--metric-direction", default=None)
|
|
341
|
+
parser.add_argument("--task-description", default=None)
|
|
342
|
+
parser.add_argument("--ml-dir", default=None)
|
|
343
|
+
parser.add_argument("--data-source", default=None)
|
|
344
|
+
parser.add_argument("--no-venv", action="store_true", help="Skip venv creation")
|
|
345
|
+
parser.add_argument("--no-hooks", action="store_true", help="Skip hook configuration")
|
|
346
|
+
parser.add_argument("--templates-dir", default=None, help="Override templates directory")
|
|
347
|
+
args = parser.parse_args()
|
|
348
|
+
|
|
349
|
+
# Get values
|
|
350
|
+
if args.interactive:
|
|
351
|
+
values = interactive_prompt()
|
|
352
|
+
else:
|
|
353
|
+
values = {
|
|
354
|
+
"project_name": args.project_name or "my-project",
|
|
355
|
+
"target_metric": args.target_metric or "accuracy",
|
|
356
|
+
"metric_direction": args.metric_direction or "higher",
|
|
357
|
+
"task_description": args.task_description or "ML task",
|
|
358
|
+
"ml_dir": args.ml_dir or ".",
|
|
359
|
+
"data_source": args.data_source or "data/training.csv",
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
# Find templates
|
|
363
|
+
if args.templates_dir:
|
|
364
|
+
templates_dir = Path(args.templates_dir)
|
|
365
|
+
else:
|
|
366
|
+
templates_dir = find_templates_dir()
|
|
367
|
+
|
|
368
|
+
if templates_dir is None or not templates_dir.exists():
|
|
369
|
+
print("Error: Cannot find templates directory.", file=sys.stderr)
|
|
370
|
+
print("Use --templates-dir to specify the path.", file=sys.stderr)
|
|
371
|
+
sys.exit(1)
|
|
372
|
+
|
|
373
|
+
ml_dir = values["ml_dir"]
|
|
374
|
+
|
|
375
|
+
print(f"\nScaffolding project: {values['project_name']}")
|
|
376
|
+
print(f"Directory: {ml_dir}")
|
|
377
|
+
print(f"Metric: {values['target_metric']} ({values['metric_direction']} is better)")
|
|
378
|
+
print()
|
|
379
|
+
|
|
380
|
+
# Scaffold
|
|
381
|
+
stats = scaffold_project(
|
|
382
|
+
templates_dir=templates_dir,
|
|
383
|
+
ml_dir=ml_dir,
|
|
384
|
+
values=values,
|
|
385
|
+
setup_venv=not args.no_venv,
|
|
386
|
+
setup_hooks=not args.no_hooks,
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
print(f"\nScaffolded {stats['files_copied']} files, {stats['dirs_created']} directories.")
|
|
390
|
+
|
|
391
|
+
# Verify
|
|
392
|
+
findings = verify_placeholders(ml_dir)
|
|
393
|
+
if findings:
|
|
394
|
+
print(f"\nWarning: {len(findings)} unreplaced placeholder(s):", file=sys.stderr)
|
|
395
|
+
for filepath, line_num, placeholder in findings:
|
|
396
|
+
print(f" {filepath}:{line_num} — {{{{{placeholder}}}}}", file=sys.stderr)
|
|
397
|
+
sys.exit(1)
|
|
398
|
+
else:
|
|
399
|
+
print("All placeholders replaced successfully.")
|
|
400
|
+
|
|
401
|
+
print(f"\nNext steps:")
|
|
402
|
+
print(f" 1. Add training data to {values['data_source']}")
|
|
403
|
+
print(f" 2. cd {ml_dir} && source .venv/bin/activate")
|
|
404
|
+
print(f" 3. python prepare.py")
|
|
405
|
+
print(f" 4. /turing:train (or: python train.py > run.log 2>&1)")
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
if __name__ == "__main__":
|
|
409
|
+
main()
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""Display the runtime environment from train_metadata.json.
|
|
2
|
+
|
|
3
|
+
Shows python version, package versions, seeds, hardware, and config hash.
|
|
4
|
+
Useful for debugging reproducibility issues or comparing environments
|
|
5
|
+
across experiments.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python scripts/show_environment.py # Current experiment
|
|
9
|
+
python scripts/show_environment.py --file path/to/metadata.json
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import argparse
|
|
15
|
+
import json
|
|
16
|
+
import sys
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
DEFAULT_METADATA_PATH = "train_metadata.json"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def format_environment(metadata: dict) -> str:
|
|
23
|
+
"""Format environment info for display."""
|
|
24
|
+
env = metadata.get("environment", {})
|
|
25
|
+
if not env:
|
|
26
|
+
return "No environment data recorded."
|
|
27
|
+
|
|
28
|
+
lines = ["Environment"]
|
|
29
|
+
lines.append("=" * 50)
|
|
30
|
+
|
|
31
|
+
# Platform
|
|
32
|
+
lines.append(f" Python: {env.get('python_version', '?')}")
|
|
33
|
+
lines.append(f" Platform: {env.get('platform', '?')}")
|
|
34
|
+
lines.append(f" Machine: {env.get('machine', '?')}")
|
|
35
|
+
lines.append(f" OS: {env.get('os', '?')}")
|
|
36
|
+
|
|
37
|
+
# GPU
|
|
38
|
+
gpu = env.get("gpu")
|
|
39
|
+
if gpu:
|
|
40
|
+
lines.append(f" GPU: {gpu.get('name', '?')} (CUDA {gpu.get('cuda_version', '?')})")
|
|
41
|
+
lines.append(f" GPUs: {gpu.get('device_count', '?')}")
|
|
42
|
+
else:
|
|
43
|
+
lines.append(" GPU: none")
|
|
44
|
+
|
|
45
|
+
# Seeds
|
|
46
|
+
seeds = env.get("seeds", {})
|
|
47
|
+
lines.append("")
|
|
48
|
+
lines.append("Seeds")
|
|
49
|
+
lines.append("-" * 50)
|
|
50
|
+
for key, value in sorted(seeds.items()):
|
|
51
|
+
lines.append(f" {key}: {value}")
|
|
52
|
+
|
|
53
|
+
# Packages
|
|
54
|
+
packages = env.get("packages", {})
|
|
55
|
+
if packages:
|
|
56
|
+
lines.append("")
|
|
57
|
+
lines.append("Packages")
|
|
58
|
+
lines.append("-" * 50)
|
|
59
|
+
for pkg, version in sorted(packages.items()):
|
|
60
|
+
lines.append(f" {pkg}: {version}")
|
|
61
|
+
|
|
62
|
+
# Config hash
|
|
63
|
+
config_hash = env.get("config_hash")
|
|
64
|
+
if config_hash:
|
|
65
|
+
lines.append("")
|
|
66
|
+
lines.append(f"Config hash: {config_hash}")
|
|
67
|
+
|
|
68
|
+
return "\n".join(lines)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def main() -> None:
|
|
72
|
+
parser = argparse.ArgumentParser(description="Show experiment environment")
|
|
73
|
+
parser.add_argument(
|
|
74
|
+
"--file",
|
|
75
|
+
default=DEFAULT_METADATA_PATH,
|
|
76
|
+
help=f"Path to metadata file (default: {DEFAULT_METADATA_PATH})",
|
|
77
|
+
)
|
|
78
|
+
args = parser.parse_args()
|
|
79
|
+
|
|
80
|
+
path = Path(args.file)
|
|
81
|
+
if not path.exists():
|
|
82
|
+
print(f"No metadata file at {path}. Run training first.", file=sys.stderr)
|
|
83
|
+
sys.exit(1)
|
|
84
|
+
|
|
85
|
+
with open(path) as f:
|
|
86
|
+
metadata = json.load(f)
|
|
87
|
+
|
|
88
|
+
print(format_environment(metadata))
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
if __name__ == "__main__":
|
|
92
|
+
main()
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Experiment dependency tree visualizer.
|
|
3
|
+
|
|
4
|
+
Displays the experiment lineage — which experiments inspired which —
|
|
5
|
+
as a text tree. Makes the agent's reasoning chain visible to the human.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python scripts/show_experiment_tree.py [--log experiments/log.jsonl]
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import argparse
|
|
14
|
+
import json
|
|
15
|
+
import sys
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
from scripts.turing_io import load_experiments
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def build_tree(experiments: list[dict]) -> dict[str, list[str]]:
|
|
22
|
+
"""Build parent -> children mapping from experiments."""
|
|
23
|
+
children: dict[str, list[str]] = {}
|
|
24
|
+
for exp in experiments:
|
|
25
|
+
eid = exp.get("experiment_id", "")
|
|
26
|
+
parent = exp.get("parent_experiment")
|
|
27
|
+
if parent:
|
|
28
|
+
children.setdefault(parent, []).append(eid)
|
|
29
|
+
# Ensure every experiment appears as a key
|
|
30
|
+
children.setdefault(eid, [])
|
|
31
|
+
return children
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def find_roots(experiments: list[dict], children: dict[str, list[str]]) -> list[str]:
|
|
35
|
+
"""Find experiments with no parent (tree roots)."""
|
|
36
|
+
all_ids = {e.get("experiment_id", "") for e in experiments}
|
|
37
|
+
child_ids = set()
|
|
38
|
+
for kids in children.values():
|
|
39
|
+
child_ids.update(kids)
|
|
40
|
+
|
|
41
|
+
# Roots are experiments that are not children of anything
|
|
42
|
+
# But also need to handle experiments whose parent doesn't exist in the log
|
|
43
|
+
roots = []
|
|
44
|
+
for exp in experiments:
|
|
45
|
+
eid = exp.get("experiment_id", "")
|
|
46
|
+
parent = exp.get("parent_experiment")
|
|
47
|
+
if parent is None or parent not in all_ids:
|
|
48
|
+
roots.append(eid)
|
|
49
|
+
return roots
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def format_tree(
|
|
53
|
+
node: str,
|
|
54
|
+
children: dict[str, list[str]],
|
|
55
|
+
experiments_by_id: dict[str, dict],
|
|
56
|
+
prefix: str = "",
|
|
57
|
+
is_last: bool = True,
|
|
58
|
+
metric_name: str = "accuracy",
|
|
59
|
+
) -> list[str]:
|
|
60
|
+
"""Recursively format a tree node and its children."""
|
|
61
|
+
exp = experiments_by_id.get(node, {})
|
|
62
|
+
status = exp.get("status", "?")
|
|
63
|
+
metric_val = exp.get("metrics", {}).get(metric_name)
|
|
64
|
+
model_type = exp.get("config", {}).get("model_type", "?")
|
|
65
|
+
desc = exp.get("description", "")[:40]
|
|
66
|
+
|
|
67
|
+
status_marker = "kept" if status == "kept" else "discarded" if status == "discarded" else status
|
|
68
|
+
metric_str = f"{metric_name}={metric_val:.4f}" if isinstance(metric_val, (int, float)) else ""
|
|
69
|
+
status_icon = "+" if status == "kept" else "-" if status == "discarded" else "?"
|
|
70
|
+
|
|
71
|
+
connector = "`-- " if is_last else "|-- "
|
|
72
|
+
line = f"{prefix}{connector}[{status_icon}] {node} ({model_type}, {metric_str}) {desc}"
|
|
73
|
+
|
|
74
|
+
lines = [line]
|
|
75
|
+
|
|
76
|
+
child_prefix = prefix + (" " if is_last else "| ")
|
|
77
|
+
kids = children.get(node, [])
|
|
78
|
+
for i, child in enumerate(kids):
|
|
79
|
+
child_lines = format_tree(
|
|
80
|
+
child, children, experiments_by_id, child_prefix,
|
|
81
|
+
is_last=(i == len(kids) - 1), metric_name=metric_name,
|
|
82
|
+
)
|
|
83
|
+
lines.extend(child_lines)
|
|
84
|
+
|
|
85
|
+
return lines
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def show_tree(log_path: str, metric_name: str = "accuracy") -> str:
|
|
89
|
+
"""Generate the full experiment tree as text."""
|
|
90
|
+
experiments = load_experiments(log_path)
|
|
91
|
+
if not experiments:
|
|
92
|
+
return "No experiments logged yet."
|
|
93
|
+
|
|
94
|
+
experiments_by_id = {e.get("experiment_id", ""): e for e in experiments}
|
|
95
|
+
children = build_tree(experiments)
|
|
96
|
+
roots = find_roots(experiments, children)
|
|
97
|
+
|
|
98
|
+
if not roots:
|
|
99
|
+
return "No root experiments found."
|
|
100
|
+
|
|
101
|
+
lines = ["Experiment Tree", "=" * 60]
|
|
102
|
+
for i, root in enumerate(roots):
|
|
103
|
+
root_lines = format_tree(
|
|
104
|
+
root, children, experiments_by_id,
|
|
105
|
+
prefix="", is_last=(i == len(roots) - 1),
|
|
106
|
+
metric_name=metric_name,
|
|
107
|
+
)
|
|
108
|
+
lines.extend(root_lines)
|
|
109
|
+
|
|
110
|
+
# Summary
|
|
111
|
+
total = len(experiments)
|
|
112
|
+
kept = sum(1 for e in experiments if e.get("status") == "kept")
|
|
113
|
+
max_depth = _max_depth(roots, children)
|
|
114
|
+
lines.extend([
|
|
115
|
+
"",
|
|
116
|
+
f"Total: {total} experiments, {kept} kept, depth={max_depth}",
|
|
117
|
+
f"[+] = kept, [-] = discarded, [?] = other",
|
|
118
|
+
])
|
|
119
|
+
|
|
120
|
+
return "\n".join(lines)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _max_depth(roots: list[str], children: dict[str, list[str]], depth: int = 1) -> int:
|
|
124
|
+
"""Compute maximum tree depth."""
|
|
125
|
+
max_d = depth
|
|
126
|
+
for root in roots:
|
|
127
|
+
kids = children.get(root, [])
|
|
128
|
+
if kids:
|
|
129
|
+
max_d = max(max_d, _max_depth(kids, children, depth + 1))
|
|
130
|
+
return max_d
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def main() -> None:
|
|
134
|
+
"""CLI entry point."""
|
|
135
|
+
parser = argparse.ArgumentParser(description="Show experiment dependency tree")
|
|
136
|
+
parser.add_argument("--log", default="experiments/log.jsonl")
|
|
137
|
+
parser.add_argument("--metric", default="accuracy", help="Metric to display")
|
|
138
|
+
args = parser.parse_args()
|
|
139
|
+
|
|
140
|
+
print(show_tree(args.log, args.metric))
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
if __name__ == "__main__":
|
|
144
|
+
main()
|