claude-turing 4.8.0 → 4.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +1 -1
- package/agents/ml-evaluator.md +4 -4
- package/agents/ml-researcher.md +2 -2
- package/bin/turing-init.sh +2 -2
- package/commands/ablate.md +3 -3
- package/commands/annotate.md +2 -2
- package/commands/archive.md +2 -2
- package/commands/audit.md +3 -3
- package/commands/baseline.md +3 -3
- package/commands/brief.md +5 -5
- package/commands/budget.md +3 -3
- package/commands/calibrate.md +3 -3
- package/commands/card.md +3 -3
- package/commands/changelog.md +2 -2
- package/commands/checkpoint.md +3 -3
- package/commands/cite.md +2 -2
- package/commands/compare.md +1 -1
- package/commands/counterfactual.md +2 -2
- package/commands/curriculum.md +3 -3
- package/commands/design.md +3 -3
- package/commands/diagnose.md +4 -4
- package/commands/diff.md +3 -3
- package/commands/distill.md +3 -3
- package/commands/doctor.md +2 -2
- package/commands/ensemble.md +3 -3
- package/commands/explore.md +4 -4
- package/commands/export.md +3 -3
- package/commands/feature.md +3 -3
- package/commands/flashback.md +2 -2
- package/commands/fork.md +3 -3
- package/commands/frontier.md +3 -3
- package/commands/init.md +5 -5
- package/commands/leak.md +3 -3
- package/commands/lit.md +3 -3
- package/commands/logbook.md +5 -5
- package/commands/merge.md +2 -2
- package/commands/mode.md +1 -1
- package/commands/onboard.md +2 -2
- package/commands/paper.md +3 -3
- package/commands/plan.md +2 -2
- package/commands/poster.md +3 -3
- package/commands/postmortem.md +2 -2
- package/commands/preflight.md +5 -5
- package/commands/present.md +2 -2
- package/commands/profile.md +3 -3
- package/commands/prune.md +2 -2
- package/commands/quantize.md +2 -2
- package/commands/queue.md +3 -3
- package/commands/registry.md +2 -2
- package/commands/regress.md +3 -3
- package/commands/replay.md +2 -2
- package/commands/report.md +3 -3
- package/commands/reproduce.md +3 -3
- package/commands/retry.md +3 -3
- package/commands/review.md +2 -2
- package/commands/rules/loop-protocol.md +11 -11
- package/commands/sanity.md +3 -3
- package/commands/scale.md +4 -4
- package/commands/search.md +2 -2
- package/commands/seed.md +3 -3
- package/commands/sensitivity.md +3 -3
- package/commands/share.md +2 -2
- package/commands/simulate.md +2 -2
- package/commands/status.md +1 -1
- package/commands/stitch.md +3 -3
- package/commands/suggest.md +5 -5
- package/commands/surgery.md +2 -2
- package/commands/sweep.md +8 -8
- package/commands/template.md +2 -2
- package/commands/train.md +5 -5
- package/commands/transfer.md +3 -3
- package/commands/trend.md +2 -2
- package/commands/try.md +4 -4
- package/commands/update.md +2 -2
- package/commands/validate.md +4 -4
- package/commands/warm.md +3 -3
- package/commands/watch.md +4 -4
- package/commands/whatif.md +2 -2
- package/commands/xray.md +3 -3
- package/config/commands.yaml +1 -1
- package/package.json +1 -1
- package/skills/turing/ablate/SKILL.md +3 -3
- package/skills/turing/annotate/SKILL.md +2 -2
- package/skills/turing/archive/SKILL.md +2 -2
- package/skills/turing/audit/SKILL.md +3 -3
- package/skills/turing/baseline/SKILL.md +3 -3
- package/skills/turing/brief/SKILL.md +5 -5
- package/skills/turing/budget/SKILL.md +3 -3
- package/skills/turing/calibrate/SKILL.md +3 -3
- package/skills/turing/card/SKILL.md +3 -3
- package/skills/turing/changelog/SKILL.md +2 -2
- package/skills/turing/checkpoint/SKILL.md +3 -3
- package/skills/turing/cite/SKILL.md +2 -2
- package/skills/turing/compare/SKILL.md +1 -1
- package/skills/turing/counterfactual/SKILL.md +2 -2
- package/skills/turing/curriculum/SKILL.md +3 -3
- package/skills/turing/design/SKILL.md +3 -3
- package/skills/turing/diagnose/SKILL.md +4 -4
- package/skills/turing/diff/SKILL.md +3 -3
- package/skills/turing/distill/SKILL.md +3 -3
- package/skills/turing/doctor/SKILL.md +2 -2
- package/skills/turing/ensemble/SKILL.md +3 -3
- package/skills/turing/explore/SKILL.md +4 -4
- package/skills/turing/export/SKILL.md +3 -3
- package/skills/turing/feature/SKILL.md +3 -3
- package/skills/turing/flashback/SKILL.md +2 -2
- package/skills/turing/fork/SKILL.md +3 -3
- package/skills/turing/frontier/SKILL.md +3 -3
- package/skills/turing/init/SKILL.md +5 -5
- package/skills/turing/leak/SKILL.md +3 -3
- package/skills/turing/lit/SKILL.md +3 -3
- package/skills/turing/logbook/SKILL.md +5 -5
- package/skills/turing/merge/SKILL.md +2 -2
- package/skills/turing/mode/SKILL.md +1 -1
- package/skills/turing/onboard/SKILL.md +2 -2
- package/skills/turing/paper/SKILL.md +3 -3
- package/skills/turing/plan/SKILL.md +2 -2
- package/skills/turing/poster/SKILL.md +3 -3
- package/skills/turing/postmortem/SKILL.md +2 -2
- package/skills/turing/preflight/SKILL.md +5 -5
- package/skills/turing/present/SKILL.md +2 -2
- package/skills/turing/profile/SKILL.md +3 -3
- package/skills/turing/prune/SKILL.md +2 -2
- package/skills/turing/quantize/SKILL.md +2 -2
- package/skills/turing/queue/SKILL.md +3 -3
- package/skills/turing/registry/SKILL.md +2 -2
- package/skills/turing/regress/SKILL.md +3 -3
- package/skills/turing/replay/SKILL.md +2 -2
- package/skills/turing/report/SKILL.md +3 -3
- package/skills/turing/reproduce/SKILL.md +3 -3
- package/skills/turing/retry/SKILL.md +3 -3
- package/skills/turing/review/SKILL.md +2 -2
- package/skills/turing/rules/loop-protocol.md +11 -11
- package/skills/turing/sanity/SKILL.md +3 -3
- package/skills/turing/scale/SKILL.md +4 -4
- package/skills/turing/search/SKILL.md +2 -2
- package/skills/turing/seed/SKILL.md +3 -3
- package/skills/turing/sensitivity/SKILL.md +3 -3
- package/skills/turing/share/SKILL.md +2 -2
- package/skills/turing/simulate/SKILL.md +2 -2
- package/skills/turing/status/SKILL.md +1 -1
- package/skills/turing/stitch/SKILL.md +3 -3
- package/skills/turing/suggest/SKILL.md +5 -5
- package/skills/turing/surgery/SKILL.md +2 -2
- package/skills/turing/sweep/SKILL.md +8 -8
- package/skills/turing/template/SKILL.md +2 -2
- package/skills/turing/train/SKILL.md +5 -5
- package/skills/turing/transfer/SKILL.md +3 -3
- package/skills/turing/trend/SKILL.md +2 -2
- package/skills/turing/try/SKILL.md +4 -4
- package/skills/turing/update/SKILL.md +2 -2
- package/skills/turing/validate/SKILL.md +4 -4
- package/skills/turing/warm/SKILL.md +3 -3
- package/skills/turing/watch/SKILL.md +4 -4
- package/skills/turing/whatif/SKILL.md +2 -2
- package/skills/turing/xray/SKILL.md +3 -3
- package/templates/README.md +5 -8
- package/templates/program.md +18 -18
- package/templates/pyproject.toml +10 -0
- package/templates/requirements.txt +4 -1
- package/templates/scripts/generate_onboarding.py +1 -1
- package/templates/scripts/post-train-hook.sh +7 -8
- package/templates/scripts/scaffold.py +24 -26
- package/templates/scripts/stop-hook.sh +2 -3
- package/templates/scripts/turing-run-python.sh +9 -0
|
@@ -9,9 +9,9 @@ See inside the model. When it underperforms, the fix depends on *why*.
|
|
|
9
9
|
|
|
10
10
|
## Steps
|
|
11
11
|
|
|
12
|
-
1. **
|
|
12
|
+
1. **Sync environment:**
|
|
13
13
|
```bash
|
|
14
|
-
|
|
14
|
+
uv sync
|
|
15
15
|
```
|
|
16
16
|
|
|
17
17
|
2. **Parse arguments from `$ARGUMENTS`:**
|
|
@@ -22,7 +22,7 @@ See inside the model. When it underperforms, the fix depends on *why*.
|
|
|
22
22
|
|
|
23
23
|
3. **Run model diagnostics:**
|
|
24
24
|
```bash
|
|
25
|
-
python scripts/model_xray.py $ARGUMENTS
|
|
25
|
+
uv run python scripts/model_xray.py $ARGUMENTS
|
|
26
26
|
```
|
|
27
27
|
|
|
28
28
|
4. **Diagnostics by model type:**
|
package/templates/README.md
CHANGED
|
@@ -21,23 +21,21 @@ This separation is the invariant that makes experiment comparisons valid.
|
|
|
21
21
|
|
|
22
22
|
```bash
|
|
23
23
|
# 1. Set up the environment
|
|
24
|
-
|
|
25
|
-
source .venv/bin/activate
|
|
26
|
-
pip install -r requirements.txt
|
|
24
|
+
uv sync
|
|
27
25
|
|
|
28
26
|
# 2. Add your training data to {{DATA_SOURCE}}
|
|
29
27
|
|
|
30
28
|
# 3. Create train/val/test splits
|
|
31
|
-
python prepare.py
|
|
29
|
+
uv run python prepare.py
|
|
32
30
|
|
|
33
31
|
# 4. Run training
|
|
34
|
-
python train.py > run.log 2>&1
|
|
32
|
+
uv run python train.py > run.log 2>&1
|
|
35
33
|
|
|
36
34
|
# 5. Check results
|
|
37
35
|
grep -A 10 "^---" run.log
|
|
38
36
|
|
|
39
37
|
# 6. View experiment history
|
|
40
|
-
python scripts/show_metrics.py
|
|
38
|
+
uv run python scripts/show_metrics.py
|
|
41
39
|
```
|
|
42
40
|
|
|
43
41
|
## Using the Autoresearch Agent
|
|
@@ -88,6 +86,5 @@ For hands-off mode: `/loop 5m /turing:train`
|
|
|
88
86
|
## Running Tests
|
|
89
87
|
|
|
90
88
|
```bash
|
|
91
|
-
|
|
92
|
-
python -m pytest tests/ -v
|
|
89
|
+
uv run pytest tests/ -v
|
|
93
90
|
```
|
package/templates/program.md
CHANGED
|
@@ -54,11 +54,11 @@ Update it after each experiment with:
|
|
|
54
54
|
|
|
55
55
|
For systematic hyperparameter search:
|
|
56
56
|
1. Edit `sweep_config.yaml` with parameter ranges
|
|
57
|
-
2. Generate queue: `python scripts/sweep.py`
|
|
58
|
-
3. Check status: `python scripts/sweep.py --status`
|
|
59
|
-
4. Get next: `python scripts/sweep.py --next`
|
|
57
|
+
2. Generate queue: `uv run python scripts/sweep.py`
|
|
58
|
+
3. Check status: `uv run python scripts/sweep.py --status`
|
|
59
|
+
4. Get next: `uv run python scripts/sweep.py --next`
|
|
60
60
|
5. Apply overrides, create branch, run training
|
|
61
|
-
6. Mark done: `python scripts/sweep.py --mark <name> complete|failed`
|
|
61
|
+
6. Mark done: `uv run python scripts/sweep.py --mark <name> complete|failed`
|
|
62
62
|
|
|
63
63
|
## THE LOOP
|
|
64
64
|
|
|
@@ -66,8 +66,8 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
66
66
|
|
|
67
67
|
1. **OBSERVE** — Read recent results, check hypothesis queue, research plan, and review failed diffs:
|
|
68
68
|
```bash
|
|
69
|
-
python scripts/show_metrics.py --last 5
|
|
70
|
-
python scripts/manage_hypotheses.py next 2>/dev/null || echo "No queued hypotheses"
|
|
69
|
+
uv run python scripts/show_metrics.py --last 5
|
|
70
|
+
uv run python scripts/manage_hypotheses.py next 2>/dev/null || echo "No queued hypotheses"
|
|
71
71
|
cat RESEARCH_PLAN.md 2>/dev/null || true
|
|
72
72
|
```
|
|
73
73
|
|
|
@@ -88,12 +88,12 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
88
88
|
|
|
89
89
|
**If using a queued hypothesis:**
|
|
90
90
|
```bash
|
|
91
|
-
python scripts/manage_hypotheses.py mark hyp-NNN in-progress
|
|
91
|
+
uv run python scripts/manage_hypotheses.py mark hyp-NNN in-progress
|
|
92
92
|
```
|
|
93
93
|
|
|
94
94
|
**If generating your own hypothesis**, register it with structured detail:
|
|
95
95
|
```bash
|
|
96
|
-
python scripts/manage_hypotheses.py add "your hypothesis description" \
|
|
96
|
+
uv run python scripts/manage_hypotheses.py add "your hypothesis description" \
|
|
97
97
|
--priority medium --source agent \
|
|
98
98
|
--model-type xgboost \
|
|
99
99
|
--hyperparams '{"max_depth": 8, "n_estimators": 200}' \
|
|
@@ -101,7 +101,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
101
101
|
--tags "depth,estimators" \
|
|
102
102
|
--parent exp-NNN \
|
|
103
103
|
--expected "deeper trees should capture feature interactions"
|
|
104
|
-
python scripts/manage_hypotheses.py mark hyp-NNN in-progress
|
|
104
|
+
uv run python scripts/manage_hypotheses.py mark hyp-NNN in-progress
|
|
105
105
|
```
|
|
106
106
|
|
|
107
107
|
This creates both an index entry in `hypotheses.yaml` and a detailed file at `hypotheses/hyp-NNN.yaml` with full architecture, hyperparameters, expected outcome, and lineage.
|
|
@@ -110,7 +110,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
110
110
|
|
|
111
111
|
To read a hypothesis's full detail:
|
|
112
112
|
```bash
|
|
113
|
-
python scripts/manage_hypotheses.py show hyp-NNN
|
|
113
|
+
uv run python scripts/manage_hypotheses.py show hyp-NNN
|
|
114
114
|
```
|
|
115
115
|
|
|
116
116
|
3. **PREPARE** — Modify `config.yaml` for hyperparameter changes. Only modify `train.py` for structural code changes.
|
|
@@ -122,7 +122,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
122
122
|
|
|
123
123
|
5. **EXECUTE** training:
|
|
124
124
|
```bash
|
|
125
|
-
|
|
125
|
+
uv run python train.py > run.log 2>&1
|
|
126
126
|
```
|
|
127
127
|
|
|
128
128
|
6. **MEASURE** — Parse metrics from run.log:
|
|
@@ -144,7 +144,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
144
144
|
|
|
145
145
|
8. **RECORD** — Log the experiment (kept or discarded):
|
|
146
146
|
```bash
|
|
147
|
-
python scripts/log_experiment.py experiments/log.jsonl exp-NNN kept|discarded \
|
|
147
|
+
uv run python scripts/log_experiment.py experiments/log.jsonl exp-NNN kept|discarded \
|
|
148
148
|
'{"{{TARGET_METRIC}}": X.XX, ...}' \
|
|
149
149
|
'{"model_type": "xgboost", "hyperparams": {...}}' \
|
|
150
150
|
models/model.joblib "Description of hypothesis and outcome"
|
|
@@ -152,7 +152,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
152
152
|
|
|
153
153
|
Update the hypothesis status with result metrics:
|
|
154
154
|
```bash
|
|
155
|
-
python scripts/manage_hypotheses.py mark hyp-NNN tested \
|
|
155
|
+
uv run python scripts/manage_hypotheses.py mark hyp-NNN tested \
|
|
156
156
|
--result exp-NNN \
|
|
157
157
|
--metrics '{"{{TARGET_METRIC}}": X.XX, ...}' \
|
|
158
158
|
--notes "Brief explanation of what happened and why"
|
|
@@ -162,7 +162,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
162
162
|
|
|
163
163
|
Then synthesize a decision packet and auto-queue follow-ups:
|
|
164
164
|
```bash
|
|
165
|
-
python scripts/synthesize_decision.py --experiment exp-NNN --auto-queue
|
|
165
|
+
uv run python scripts/synthesize_decision.py --experiment exp-NNN --auto-queue
|
|
166
166
|
```
|
|
167
167
|
This produces a verdict (promote/branch_followup/abandon/fix_and_retry) and automatically queues follow-up hypotheses for `branch_followup` and `fix_and_retry` outcomes.
|
|
168
168
|
|
|
@@ -172,7 +172,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
172
172
|
- Report final best model and recommend next steps
|
|
173
173
|
- **Before declaring final results**, run a seed study to verify robustness:
|
|
174
174
|
```bash
|
|
175
|
-
python scripts/seed_runner.py --quick
|
|
175
|
+
uv run python scripts/seed_runner.py --quick
|
|
176
176
|
```
|
|
177
177
|
If CV > 5%, the result is seed-sensitive — report mean ± std, not a single-seed number.
|
|
178
178
|
|
|
@@ -180,9 +180,9 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
180
180
|
|
|
181
181
|
## Execution Rules
|
|
182
182
|
|
|
183
|
-
- **ALWAYS redirect output:** `python train.py > run.log 2>&1`
|
|
183
|
+
- **ALWAYS redirect output:** `uv run python train.py > run.log 2>&1`
|
|
184
184
|
- **ALWAYS parse with grep:** `grep -A 10 "^---" run.log | head -10`
|
|
185
|
-
- **ALWAYS
|
|
185
|
+
- **ALWAYS run Python through uv:** `uv run python ...`
|
|
186
186
|
- **NEVER install packages** without human approval
|
|
187
187
|
|
|
188
188
|
## Strategy Escalation Protocol
|
|
@@ -219,5 +219,5 @@ Starting suggestions (ordered by expected impact):
|
|
|
219
219
|
## Comparing Runs
|
|
220
220
|
|
|
221
221
|
```bash
|
|
222
|
-
python scripts/compare_runs.py exp-001 exp-002
|
|
222
|
+
uv run python scripts/compare_runs.py exp-001 exp-002
|
|
223
223
|
```
|
package/templates/pyproject.toml
CHANGED
|
@@ -2,6 +2,16 @@
|
|
|
2
2
|
name = "{{PROJECT_NAME}}-ml"
|
|
3
3
|
version = "0.1.0"
|
|
4
4
|
requires-python = ">=3.12"
|
|
5
|
+
dependencies = [
|
|
6
|
+
"scikit-learn>=1.6",
|
|
7
|
+
"xgboost>=3.2",
|
|
8
|
+
"lightgbm>=4.6",
|
|
9
|
+
"pandas>=2.2",
|
|
10
|
+
"numpy>=2.0",
|
|
11
|
+
"joblib>=1.4",
|
|
12
|
+
"pyyaml>=6.0",
|
|
13
|
+
"pytest>=8.0",
|
|
14
|
+
]
|
|
5
15
|
|
|
6
16
|
[tool.pytest.ini_options]
|
|
7
17
|
testpaths = ["tests"]
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# Compatibility export only. pyproject.toml is canonical for dependencies.
|
|
2
|
+
# Prefer: uv sync
|
|
3
|
+
|
|
1
4
|
scikit-learn>=1.6
|
|
2
5
|
xgboost>=3.2
|
|
3
6
|
lightgbm>=4.6
|
|
@@ -8,5 +11,5 @@ pyyaml>=6.0
|
|
|
8
11
|
pytest>=8.0
|
|
9
12
|
|
|
10
13
|
# Optional: tree-search-guided hypothesis exploration
|
|
11
|
-
# Install with:
|
|
14
|
+
# Install with: uv add "treequest[all]"
|
|
12
15
|
# treequest>=0.1
|
|
@@ -210,7 +210,7 @@ def format_onboarding_report(config, experiments, families, best, decisions,
|
|
|
210
210
|
"5. `/turing:try \"your hypothesis\"` — inject ideas",
|
|
211
211
|
"6. `/turing:train` — run next experiment",
|
|
212
212
|
], "engineer": [
|
|
213
|
-
"1. `
|
|
213
|
+
"1. `uv sync`",
|
|
214
214
|
"2. Review `config.yaml` for data paths",
|
|
215
215
|
"3. `/turing:status` — where things stand",
|
|
216
216
|
"4. Check `train.py` for current model",
|
|
@@ -18,24 +18,23 @@ EXPERIMENT_LOG="${ML_DIR}/experiments/log.jsonl"
|
|
|
18
18
|
if [[ -f "${ML_DIR}/run.log" ]]; then
|
|
19
19
|
LOG_FILE="${ML_DIR}/run.log"
|
|
20
20
|
elif [[ -f "run.log" ]]; then
|
|
21
|
-
LOG_FILE="run.log"
|
|
21
|
+
LOG_FILE="$(pwd)/run.log"
|
|
22
22
|
else
|
|
23
23
|
echo "post-train-hook: No run.log found, skipping."
|
|
24
24
|
exit 0
|
|
25
25
|
fi
|
|
26
26
|
|
|
27
|
-
# Activate venv and delegate to Python
|
|
28
27
|
cd "$ML_DIR"
|
|
29
|
-
source .
|
|
28
|
+
source "${SCRIPT_DIR}/turing-run-python.sh"
|
|
30
29
|
|
|
31
30
|
# Parse metrics using the canonical parser
|
|
32
|
-
PARSED=$(
|
|
31
|
+
PARSED=$(run_python scripts/parse_metrics.py "$LOG_FILE" --raw 2>/dev/null) || {
|
|
33
32
|
echo "post-train-hook: No metrics block found in run.log, skipping."
|
|
34
33
|
exit 0
|
|
35
34
|
}
|
|
36
35
|
|
|
37
36
|
# Extract metrics and metadata via Python (avoids bash JSON construction)
|
|
38
|
-
METRICS_JSON=$(
|
|
37
|
+
METRICS_JSON=$(run_python -c "
|
|
39
38
|
import json, sys
|
|
40
39
|
data = json.loads(sys.argv[1])
|
|
41
40
|
metadata_keys = {'model_type', 'train_seconds'}
|
|
@@ -43,7 +42,7 @@ metrics = {k: v for k, v in data.items() if k not in metadata_keys}
|
|
|
43
42
|
print(json.dumps(metrics))
|
|
44
43
|
" "$PARSED")
|
|
45
44
|
|
|
46
|
-
CONFIG_JSON=$(
|
|
45
|
+
CONFIG_JSON=$(run_python -c "
|
|
47
46
|
import json, sys
|
|
48
47
|
data = json.loads(sys.argv[1])
|
|
49
48
|
metadata_keys = {'model_type', 'train_seconds'}
|
|
@@ -55,14 +54,14 @@ print(json.dumps(config))
|
|
|
55
54
|
GIT_COMMIT=$(git rev-parse --short HEAD 2>/dev/null || echo "unknown")
|
|
56
55
|
|
|
57
56
|
# Get next experiment ID
|
|
58
|
-
NEXT_ID=$(
|
|
57
|
+
NEXT_ID=$(run_python -c "
|
|
59
58
|
import sys; sys.path.insert(0, 'scripts')
|
|
60
59
|
from log_experiment import get_next_experiment_id
|
|
61
60
|
print(get_next_experiment_id('$EXPERIMENT_LOG'))
|
|
62
61
|
")
|
|
63
62
|
|
|
64
63
|
# Log the experiment
|
|
65
|
-
|
|
64
|
+
run_python scripts/log_experiment.py \
|
|
66
65
|
"$EXPERIMENT_LOG" \
|
|
67
66
|
"$NEXT_ID" \
|
|
68
67
|
"kept" \
|
|
@@ -22,6 +22,7 @@ import argparse
|
|
|
22
22
|
import json
|
|
23
23
|
import os
|
|
24
24
|
import re
|
|
25
|
+
import shlex
|
|
25
26
|
import shutil
|
|
26
27
|
import subprocess
|
|
27
28
|
import sys
|
|
@@ -64,6 +65,7 @@ TEMPLATE_DIRS = {
|
|
|
64
65
|
"sweep.py",
|
|
65
66
|
"post-train-hook.sh",
|
|
66
67
|
"stop-hook.sh",
|
|
68
|
+
"turing-run-python.sh",
|
|
67
69
|
"check_convergence.py",
|
|
68
70
|
"verify_placeholders.py",
|
|
69
71
|
"manage_hypotheses.py",
|
|
@@ -220,6 +222,7 @@ DIRECTORIES_TO_CREATE = [
|
|
|
220
222
|
SHELL_SCRIPTS = [
|
|
221
223
|
"scripts/post-train-hook.sh",
|
|
222
224
|
"scripts/stop-hook.sh",
|
|
225
|
+
"scripts/turing-run-python.sh",
|
|
223
226
|
]
|
|
224
227
|
|
|
225
228
|
|
|
@@ -289,7 +292,7 @@ def scaffold_project(
|
|
|
289
292
|
templates_dir: Path to the templates/ directory.
|
|
290
293
|
ml_dir: Target ML directory (relative to cwd).
|
|
291
294
|
values: Dict mapping arg names to values for placeholder substitution.
|
|
292
|
-
setup_venv: Whether to create and populate
|
|
295
|
+
setup_venv: Whether to create and populate the uv environment.
|
|
293
296
|
setup_hooks: Whether to configure Claude Code hooks.
|
|
294
297
|
|
|
295
298
|
Returns:
|
|
@@ -361,9 +364,9 @@ def scaffold_project(
|
|
|
361
364
|
if setup_hooks:
|
|
362
365
|
_setup_hooks(ml_dir)
|
|
363
366
|
|
|
364
|
-
# Setup
|
|
367
|
+
# Setup Python environment
|
|
365
368
|
if setup_venv:
|
|
366
|
-
|
|
369
|
+
_setup_environment(target)
|
|
367
370
|
|
|
368
371
|
return stats
|
|
369
372
|
|
|
@@ -390,16 +393,18 @@ def _setup_hooks(ml_dir: str) -> None:
|
|
|
390
393
|
|
|
391
394
|
hooks = settings.get("hooks", {})
|
|
392
395
|
|
|
396
|
+
target = Path(ml_dir).resolve()
|
|
397
|
+
|
|
393
398
|
# PostToolUse hook for auto-logging
|
|
394
399
|
post_hooks = hooks.get("PostToolUse", [])
|
|
395
|
-
post_hook_cmd = f"bash {
|
|
400
|
+
post_hook_cmd = f"bash {shlex.quote(str(target / 'scripts' / 'post-train-hook.sh'))}"
|
|
396
401
|
if not any(post_hook_cmd in str(h) for h in post_hooks):
|
|
397
402
|
post_hooks.append(make_command_hook_group(post_hook_cmd, matcher="Bash"))
|
|
398
403
|
hooks["PostToolUse"] = post_hooks
|
|
399
404
|
|
|
400
405
|
# Stop hook for convergence
|
|
401
406
|
stop_hooks = hooks.get("Stop", [])
|
|
402
|
-
stop_hook_cmd = f"bash {
|
|
407
|
+
stop_hook_cmd = f"bash {shlex.quote(str(target / 'scripts' / 'stop-hook.sh'))}"
|
|
403
408
|
if not any(stop_hook_cmd in str(h) for h in stop_hooks):
|
|
404
409
|
stop_hooks.append(make_command_hook_group(stop_hook_cmd))
|
|
405
410
|
hooks["Stop"] = stop_hooks
|
|
@@ -408,29 +413,22 @@ def _setup_hooks(ml_dir: str) -> None:
|
|
|
408
413
|
settings_path.write_text(json.dumps(settings, indent=2))
|
|
409
414
|
|
|
410
415
|
|
|
411
|
-
def
|
|
412
|
-
"""Create
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
print(" Venv already exists, skipping creation.", file=sys.stderr)
|
|
416
|
+
def _setup_environment(target: Path) -> None:
|
|
417
|
+
"""Create the uv-managed Python environment."""
|
|
418
|
+
if shutil.which("uv") is None:
|
|
419
|
+
print(" Warning: uv not found; run `uv sync` from the ML directory after installing uv.", file=sys.stderr)
|
|
416
420
|
return
|
|
417
421
|
|
|
418
|
-
print("
|
|
422
|
+
print(" Syncing uv environment...", file=sys.stderr)
|
|
419
423
|
try:
|
|
420
424
|
subprocess.run(
|
|
421
|
-
[
|
|
422
|
-
|
|
425
|
+
["uv", "sync"],
|
|
426
|
+
cwd=target,
|
|
427
|
+
check=True,
|
|
428
|
+
capture_output=True,
|
|
423
429
|
)
|
|
424
|
-
pip = str(venv_path / "bin" / "pip")
|
|
425
|
-
req = str(target / "requirements.txt")
|
|
426
|
-
if Path(req).exists():
|
|
427
|
-
print(" Installing requirements...", file=sys.stderr)
|
|
428
|
-
subprocess.run(
|
|
429
|
-
[pip, "install", "-r", req],
|
|
430
|
-
check=True, capture_output=True,
|
|
431
|
-
)
|
|
432
430
|
except subprocess.CalledProcessError as e:
|
|
433
|
-
print(f" Warning:
|
|
431
|
+
print(f" Warning: uv environment setup failed: {e}", file=sys.stderr)
|
|
434
432
|
|
|
435
433
|
|
|
436
434
|
def verify_placeholders(ml_dir: str) -> list[tuple[str, int, str]]:
|
|
@@ -487,7 +485,7 @@ def main() -> None:
|
|
|
487
485
|
parser.add_argument("--task-description", default=None)
|
|
488
486
|
parser.add_argument("--ml-dir", default=None)
|
|
489
487
|
parser.add_argument("--data-source", default=None)
|
|
490
|
-
parser.add_argument("--no-venv", action="store_true", help="Skip
|
|
488
|
+
parser.add_argument("--no-venv", action="store_true", help="Skip uv environment setup")
|
|
491
489
|
parser.add_argument("--no-hooks", action="store_true", help="Skip hook configuration")
|
|
492
490
|
parser.add_argument("--templates-dir", default=None, help="Override templates directory")
|
|
493
491
|
args = parser.parse_args()
|
|
@@ -546,9 +544,9 @@ def main() -> None:
|
|
|
546
544
|
|
|
547
545
|
print(f"\nNext steps:")
|
|
548
546
|
print(f" 1. Add training data to {values['data_source']}")
|
|
549
|
-
print(f" 2. cd {ml_dir} &&
|
|
550
|
-
print(f" 3. python prepare.py")
|
|
551
|
-
print(f" 4. /turing:train (or: python train.py > run.log 2>&1)")
|
|
547
|
+
print(f" 2. cd {ml_dir} && uv sync")
|
|
548
|
+
print(f" 3. uv run python prepare.py")
|
|
549
|
+
print(f" 4. /turing:train (or: uv run python train.py > run.log 2>&1)")
|
|
552
550
|
|
|
553
551
|
|
|
554
552
|
if __name__ == "__main__":
|
|
@@ -25,10 +25,9 @@ if [[ ! -f "$EXPERIMENT_LOG" ]]; then
|
|
|
25
25
|
exit 0
|
|
26
26
|
fi
|
|
27
27
|
|
|
28
|
-
# Activate venv and delegate to Python module
|
|
29
28
|
cd "$ML_DIR"
|
|
30
|
-
source .
|
|
29
|
+
source "${SCRIPT_DIR}/turing-run-python.sh"
|
|
31
30
|
|
|
32
|
-
|
|
31
|
+
run_python scripts/check_convergence.py \
|
|
33
32
|
--config "$CONFIG_FILE" \
|
|
34
33
|
--log "$EXPERIMENT_LOG"
|