claude-turing 4.8.0 → 4.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/README.md +1 -1
  3. package/agents/ml-evaluator.md +4 -4
  4. package/agents/ml-researcher.md +2 -2
  5. package/bin/turing-init.sh +2 -2
  6. package/commands/ablate.md +3 -3
  7. package/commands/annotate.md +2 -2
  8. package/commands/archive.md +2 -2
  9. package/commands/audit.md +3 -3
  10. package/commands/baseline.md +3 -3
  11. package/commands/brief.md +5 -5
  12. package/commands/budget.md +3 -3
  13. package/commands/calibrate.md +3 -3
  14. package/commands/card.md +3 -3
  15. package/commands/changelog.md +2 -2
  16. package/commands/checkpoint.md +3 -3
  17. package/commands/cite.md +2 -2
  18. package/commands/compare.md +1 -1
  19. package/commands/counterfactual.md +2 -2
  20. package/commands/curriculum.md +3 -3
  21. package/commands/design.md +3 -3
  22. package/commands/diagnose.md +4 -4
  23. package/commands/diff.md +3 -3
  24. package/commands/distill.md +3 -3
  25. package/commands/doctor.md +2 -2
  26. package/commands/ensemble.md +3 -3
  27. package/commands/explore.md +4 -4
  28. package/commands/export.md +3 -3
  29. package/commands/feature.md +3 -3
  30. package/commands/flashback.md +2 -2
  31. package/commands/fork.md +3 -3
  32. package/commands/frontier.md +3 -3
  33. package/commands/init.md +5 -5
  34. package/commands/leak.md +3 -3
  35. package/commands/lit.md +3 -3
  36. package/commands/logbook.md +5 -5
  37. package/commands/merge.md +2 -2
  38. package/commands/mode.md +1 -1
  39. package/commands/onboard.md +2 -2
  40. package/commands/paper.md +3 -3
  41. package/commands/plan.md +2 -2
  42. package/commands/poster.md +3 -3
  43. package/commands/postmortem.md +2 -2
  44. package/commands/preflight.md +5 -5
  45. package/commands/present.md +2 -2
  46. package/commands/profile.md +3 -3
  47. package/commands/prune.md +2 -2
  48. package/commands/quantize.md +2 -2
  49. package/commands/queue.md +3 -3
  50. package/commands/registry.md +2 -2
  51. package/commands/regress.md +3 -3
  52. package/commands/replay.md +2 -2
  53. package/commands/report.md +3 -3
  54. package/commands/reproduce.md +3 -3
  55. package/commands/retry.md +3 -3
  56. package/commands/review.md +2 -2
  57. package/commands/rules/loop-protocol.md +11 -11
  58. package/commands/sanity.md +3 -3
  59. package/commands/scale.md +4 -4
  60. package/commands/search.md +2 -2
  61. package/commands/seed.md +3 -3
  62. package/commands/sensitivity.md +3 -3
  63. package/commands/share.md +2 -2
  64. package/commands/simulate.md +2 -2
  65. package/commands/status.md +1 -1
  66. package/commands/stitch.md +3 -3
  67. package/commands/suggest.md +5 -5
  68. package/commands/surgery.md +2 -2
  69. package/commands/sweep.md +8 -8
  70. package/commands/template.md +2 -2
  71. package/commands/train.md +5 -5
  72. package/commands/transfer.md +3 -3
  73. package/commands/trend.md +2 -2
  74. package/commands/try.md +4 -4
  75. package/commands/update.md +2 -2
  76. package/commands/validate.md +4 -4
  77. package/commands/warm.md +3 -3
  78. package/commands/watch.md +4 -4
  79. package/commands/whatif.md +2 -2
  80. package/commands/xray.md +3 -3
  81. package/config/commands.yaml +1 -1
  82. package/package.json +1 -1
  83. package/skills/turing/ablate/SKILL.md +3 -3
  84. package/skills/turing/annotate/SKILL.md +2 -2
  85. package/skills/turing/archive/SKILL.md +2 -2
  86. package/skills/turing/audit/SKILL.md +3 -3
  87. package/skills/turing/baseline/SKILL.md +3 -3
  88. package/skills/turing/brief/SKILL.md +5 -5
  89. package/skills/turing/budget/SKILL.md +3 -3
  90. package/skills/turing/calibrate/SKILL.md +3 -3
  91. package/skills/turing/card/SKILL.md +3 -3
  92. package/skills/turing/changelog/SKILL.md +2 -2
  93. package/skills/turing/checkpoint/SKILL.md +3 -3
  94. package/skills/turing/cite/SKILL.md +2 -2
  95. package/skills/turing/compare/SKILL.md +1 -1
  96. package/skills/turing/counterfactual/SKILL.md +2 -2
  97. package/skills/turing/curriculum/SKILL.md +3 -3
  98. package/skills/turing/design/SKILL.md +3 -3
  99. package/skills/turing/diagnose/SKILL.md +4 -4
  100. package/skills/turing/diff/SKILL.md +3 -3
  101. package/skills/turing/distill/SKILL.md +3 -3
  102. package/skills/turing/doctor/SKILL.md +2 -2
  103. package/skills/turing/ensemble/SKILL.md +3 -3
  104. package/skills/turing/explore/SKILL.md +4 -4
  105. package/skills/turing/export/SKILL.md +3 -3
  106. package/skills/turing/feature/SKILL.md +3 -3
  107. package/skills/turing/flashback/SKILL.md +2 -2
  108. package/skills/turing/fork/SKILL.md +3 -3
  109. package/skills/turing/frontier/SKILL.md +3 -3
  110. package/skills/turing/init/SKILL.md +5 -5
  111. package/skills/turing/leak/SKILL.md +3 -3
  112. package/skills/turing/lit/SKILL.md +3 -3
  113. package/skills/turing/logbook/SKILL.md +5 -5
  114. package/skills/turing/merge/SKILL.md +2 -2
  115. package/skills/turing/mode/SKILL.md +1 -1
  116. package/skills/turing/onboard/SKILL.md +2 -2
  117. package/skills/turing/paper/SKILL.md +3 -3
  118. package/skills/turing/plan/SKILL.md +2 -2
  119. package/skills/turing/poster/SKILL.md +3 -3
  120. package/skills/turing/postmortem/SKILL.md +2 -2
  121. package/skills/turing/preflight/SKILL.md +5 -5
  122. package/skills/turing/present/SKILL.md +2 -2
  123. package/skills/turing/profile/SKILL.md +3 -3
  124. package/skills/turing/prune/SKILL.md +2 -2
  125. package/skills/turing/quantize/SKILL.md +2 -2
  126. package/skills/turing/queue/SKILL.md +3 -3
  127. package/skills/turing/registry/SKILL.md +2 -2
  128. package/skills/turing/regress/SKILL.md +3 -3
  129. package/skills/turing/replay/SKILL.md +2 -2
  130. package/skills/turing/report/SKILL.md +3 -3
  131. package/skills/turing/reproduce/SKILL.md +3 -3
  132. package/skills/turing/retry/SKILL.md +3 -3
  133. package/skills/turing/review/SKILL.md +2 -2
  134. package/skills/turing/rules/loop-protocol.md +11 -11
  135. package/skills/turing/sanity/SKILL.md +3 -3
  136. package/skills/turing/scale/SKILL.md +4 -4
  137. package/skills/turing/search/SKILL.md +2 -2
  138. package/skills/turing/seed/SKILL.md +3 -3
  139. package/skills/turing/sensitivity/SKILL.md +3 -3
  140. package/skills/turing/share/SKILL.md +2 -2
  141. package/skills/turing/simulate/SKILL.md +2 -2
  142. package/skills/turing/status/SKILL.md +1 -1
  143. package/skills/turing/stitch/SKILL.md +3 -3
  144. package/skills/turing/suggest/SKILL.md +5 -5
  145. package/skills/turing/surgery/SKILL.md +2 -2
  146. package/skills/turing/sweep/SKILL.md +8 -8
  147. package/skills/turing/template/SKILL.md +2 -2
  148. package/skills/turing/train/SKILL.md +5 -5
  149. package/skills/turing/transfer/SKILL.md +3 -3
  150. package/skills/turing/trend/SKILL.md +2 -2
  151. package/skills/turing/try/SKILL.md +4 -4
  152. package/skills/turing/update/SKILL.md +2 -2
  153. package/skills/turing/validate/SKILL.md +4 -4
  154. package/skills/turing/warm/SKILL.md +3 -3
  155. package/skills/turing/watch/SKILL.md +4 -4
  156. package/skills/turing/whatif/SKILL.md +2 -2
  157. package/skills/turing/xray/SKILL.md +3 -3
  158. package/templates/README.md +5 -8
  159. package/templates/program.md +18 -18
  160. package/templates/pyproject.toml +10 -0
  161. package/templates/requirements.txt +4 -1
  162. package/templates/scripts/generate_onboarding.py +1 -1
  163. package/templates/scripts/post-train-hook.sh +7 -8
  164. package/templates/scripts/scaffold.py +24 -26
  165. package/templates/scripts/stop-hook.sh +2 -3
  166. package/templates/scripts/turing-run-python.sh +9 -0
@@ -9,9 +9,9 @@ See inside the model. When it underperforms, the fix depends on *why*.
9
9
 
10
10
  ## Steps
11
11
 
12
- 1. **Activate environment:**
12
+ 1. **Sync environment:**
13
13
  ```bash
14
- source .venv/bin/activate
14
+ uv sync
15
15
  ```
16
16
 
17
17
  2. **Parse arguments from `$ARGUMENTS`:**
@@ -22,7 +22,7 @@ See inside the model. When it underperforms, the fix depends on *why*.
22
22
 
23
23
  3. **Run model diagnostics:**
24
24
  ```bash
25
- python scripts/model_xray.py $ARGUMENTS
25
+ uv run python scripts/model_xray.py $ARGUMENTS
26
26
  ```
27
27
 
28
28
  4. **Diagnostics by model type:**
@@ -21,23 +21,21 @@ This separation is the invariant that makes experiment comparisons valid.
21
21
 
22
22
  ```bash
23
23
  # 1. Set up the environment
24
- python -m venv .venv
25
- source .venv/bin/activate
26
- pip install -r requirements.txt
24
+ uv sync
27
25
 
28
26
  # 2. Add your training data to {{DATA_SOURCE}}
29
27
 
30
28
  # 3. Create train/val/test splits
31
- python prepare.py
29
+ uv run python prepare.py
32
30
 
33
31
  # 4. Run training
34
- python train.py > run.log 2>&1
32
+ uv run python train.py > run.log 2>&1
35
33
 
36
34
  # 5. Check results
37
35
  grep -A 10 "^---" run.log
38
36
 
39
37
  # 6. View experiment history
40
- python scripts/show_metrics.py
38
+ uv run python scripts/show_metrics.py
41
39
  ```
42
40
 
43
41
  ## Using the Autoresearch Agent
@@ -88,6 +86,5 @@ For hands-off mode: `/loop 5m /turing:train`
88
86
  ## Running Tests
89
87
 
90
88
  ```bash
91
- source .venv/bin/activate
92
- python -m pytest tests/ -v
89
+ uv run pytest tests/ -v
93
90
  ```
@@ -54,11 +54,11 @@ Update it after each experiment with:
54
54
 
55
55
  For systematic hyperparameter search:
56
56
  1. Edit `sweep_config.yaml` with parameter ranges
57
- 2. Generate queue: `python scripts/sweep.py`
58
- 3. Check status: `python scripts/sweep.py --status`
59
- 4. Get next: `python scripts/sweep.py --next`
57
+ 2. Generate queue: `uv run python scripts/sweep.py`
58
+ 3. Check status: `uv run python scripts/sweep.py --status`
59
+ 4. Get next: `uv run python scripts/sweep.py --next`
60
60
  5. Apply overrides, create branch, run training
61
- 6. Mark done: `python scripts/sweep.py --mark <name> complete|failed`
61
+ 6. Mark done: `uv run python scripts/sweep.py --mark <name> complete|failed`
62
62
 
63
63
  ## THE LOOP
64
64
 
@@ -66,8 +66,8 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
66
66
 
67
67
  1. **OBSERVE** — Read recent results, check hypothesis queue, research plan, and review failed diffs:
68
68
  ```bash
69
- python scripts/show_metrics.py --last 5
70
- python scripts/manage_hypotheses.py next 2>/dev/null || echo "No queued hypotheses"
69
+ uv run python scripts/show_metrics.py --last 5
70
+ uv run python scripts/manage_hypotheses.py next 2>/dev/null || echo "No queued hypotheses"
71
71
  cat RESEARCH_PLAN.md 2>/dev/null || true
72
72
  ```
73
73
 
@@ -88,12 +88,12 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
88
88
 
89
89
  **If using a queued hypothesis:**
90
90
  ```bash
91
- python scripts/manage_hypotheses.py mark hyp-NNN in-progress
91
+ uv run python scripts/manage_hypotheses.py mark hyp-NNN in-progress
92
92
  ```
93
93
 
94
94
  **If generating your own hypothesis**, register it with structured detail:
95
95
  ```bash
96
- python scripts/manage_hypotheses.py add "your hypothesis description" \
96
+ uv run python scripts/manage_hypotheses.py add "your hypothesis description" \
97
97
  --priority medium --source agent \
98
98
  --model-type xgboost \
99
99
  --hyperparams '{"max_depth": 8, "n_estimators": 200}' \
@@ -101,7 +101,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
101
101
  --tags "depth,estimators" \
102
102
  --parent exp-NNN \
103
103
  --expected "deeper trees should capture feature interactions"
104
- python scripts/manage_hypotheses.py mark hyp-NNN in-progress
104
+ uv run python scripts/manage_hypotheses.py mark hyp-NNN in-progress
105
105
  ```
106
106
 
107
107
  This creates both an index entry in `hypotheses.yaml` and a detailed file at `hypotheses/hyp-NNN.yaml` with full architecture, hyperparameters, expected outcome, and lineage.
@@ -110,7 +110,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
110
110
 
111
111
  To read a hypothesis's full detail:
112
112
  ```bash
113
- python scripts/manage_hypotheses.py show hyp-NNN
113
+ uv run python scripts/manage_hypotheses.py show hyp-NNN
114
114
  ```
115
115
 
116
116
  3. **PREPARE** — Modify `config.yaml` for hyperparameter changes. Only modify `train.py` for structural code changes.
@@ -122,7 +122,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
122
122
 
123
123
  5. **EXECUTE** training:
124
124
  ```bash
125
- source .venv/bin/activate && python train.py > run.log 2>&1
125
+ uv run python train.py > run.log 2>&1
126
126
  ```
127
127
 
128
128
  6. **MEASURE** — Parse metrics from run.log:
@@ -144,7 +144,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
144
144
 
145
145
  8. **RECORD** — Log the experiment (kept or discarded):
146
146
  ```bash
147
- python scripts/log_experiment.py experiments/log.jsonl exp-NNN kept|discarded \
147
+ uv run python scripts/log_experiment.py experiments/log.jsonl exp-NNN kept|discarded \
148
148
  '{"{{TARGET_METRIC}}": X.XX, ...}' \
149
149
  '{"model_type": "xgboost", "hyperparams": {...}}' \
150
150
  models/model.joblib "Description of hypothesis and outcome"
@@ -152,7 +152,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
152
152
 
153
153
  Update the hypothesis status with result metrics:
154
154
  ```bash
155
- python scripts/manage_hypotheses.py mark hyp-NNN tested \
155
+ uv run python scripts/manage_hypotheses.py mark hyp-NNN tested \
156
156
  --result exp-NNN \
157
157
  --metrics '{"{{TARGET_METRIC}}": X.XX, ...}' \
158
158
  --notes "Brief explanation of what happened and why"
@@ -162,7 +162,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
162
162
 
163
163
  Then synthesize a decision packet and auto-queue follow-ups:
164
164
  ```bash
165
- python scripts/synthesize_decision.py --experiment exp-NNN --auto-queue
165
+ uv run python scripts/synthesize_decision.py --experiment exp-NNN --auto-queue
166
166
  ```
167
167
  This produces a verdict (promote/branch_followup/abandon/fix_and_retry) and automatically queues follow-up hypotheses for `branch_followup` and `fix_and_retry` outcomes.
168
168
 
@@ -172,7 +172,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
172
172
  - Report final best model and recommend next steps
173
173
  - **Before declaring final results**, run a seed study to verify robustness:
174
174
  ```bash
175
- python scripts/seed_runner.py --quick
175
+ uv run python scripts/seed_runner.py --quick
176
176
  ```
177
177
  If CV > 5%, the result is seed-sensitive — report mean ± std, not a single-seed number.
178
178
 
@@ -180,9 +180,9 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
180
180
 
181
181
  ## Execution Rules
182
182
 
183
- - **ALWAYS redirect output:** `python train.py > run.log 2>&1`
183
+ - **ALWAYS redirect output:** `uv run python train.py > run.log 2>&1`
184
184
  - **ALWAYS parse with grep:** `grep -A 10 "^---" run.log | head -10`
185
- - **ALWAYS activate venv:** `source .venv/bin/activate`
185
+ - **ALWAYS run Python through uv:** `uv run python ...`
186
186
  - **NEVER install packages** without human approval
187
187
 
188
188
  ## Strategy Escalation Protocol
@@ -219,5 +219,5 @@ Starting suggestions (ordered by expected impact):
219
219
  ## Comparing Runs
220
220
 
221
221
  ```bash
222
- python scripts/compare_runs.py exp-001 exp-002
222
+ uv run python scripts/compare_runs.py exp-001 exp-002
223
223
  ```
@@ -2,6 +2,16 @@
2
2
  name = "{{PROJECT_NAME}}-ml"
3
3
  version = "0.1.0"
4
4
  requires-python = ">=3.12"
5
+ dependencies = [
6
+ "scikit-learn>=1.6",
7
+ "xgboost>=3.2",
8
+ "lightgbm>=4.6",
9
+ "pandas>=2.2",
10
+ "numpy>=2.0",
11
+ "joblib>=1.4",
12
+ "pyyaml>=6.0",
13
+ "pytest>=8.0",
14
+ ]
5
15
 
6
16
  [tool.pytest.ini_options]
7
17
  testpaths = ["tests"]
@@ -1,3 +1,6 @@
1
+ # Compatibility export only. pyproject.toml is canonical for dependencies.
2
+ # Prefer: uv sync
3
+
1
4
  scikit-learn>=1.6
2
5
  xgboost>=3.2
3
6
  lightgbm>=4.6
@@ -8,5 +11,5 @@ pyyaml>=6.0
8
11
  pytest>=8.0
9
12
 
10
13
  # Optional: tree-search-guided hypothesis exploration
11
- # Install with: pip install "treequest[all]"
14
+ # Install with: uv add "treequest[all]"
12
15
  # treequest>=0.1
@@ -210,7 +210,7 @@ def format_onboarding_report(config, experiments, families, best, decisions,
210
210
  "5. `/turing:try \"your hypothesis\"` — inject ideas",
211
211
  "6. `/turing:train` — run next experiment",
212
212
  ], "engineer": [
213
- "1. `pip install -r requirements.txt`",
213
+ "1. `uv sync`",
214
214
  "2. Review `config.yaml` for data paths",
215
215
  "3. `/turing:status` — where things stand",
216
216
  "4. Check `train.py` for current model",
@@ -18,24 +18,23 @@ EXPERIMENT_LOG="${ML_DIR}/experiments/log.jsonl"
18
18
  if [[ -f "${ML_DIR}/run.log" ]]; then
19
19
  LOG_FILE="${ML_DIR}/run.log"
20
20
  elif [[ -f "run.log" ]]; then
21
- LOG_FILE="run.log"
21
+ LOG_FILE="$(pwd)/run.log"
22
22
  else
23
23
  echo "post-train-hook: No run.log found, skipping."
24
24
  exit 0
25
25
  fi
26
26
 
27
- # Activate venv and delegate to Python
28
27
  cd "$ML_DIR"
29
- source .venv/bin/activate 2>/dev/null || true
28
+ source "${SCRIPT_DIR}/turing-run-python.sh"
30
29
 
31
30
  # Parse metrics using the canonical parser
32
- PARSED=$(python3 scripts/parse_metrics.py "$LOG_FILE" --raw 2>/dev/null) || {
31
+ PARSED=$(run_python scripts/parse_metrics.py "$LOG_FILE" --raw 2>/dev/null) || {
33
32
  echo "post-train-hook: No metrics block found in run.log, skipping."
34
33
  exit 0
35
34
  }
36
35
 
37
36
  # Extract metrics and metadata via Python (avoids bash JSON construction)
38
- METRICS_JSON=$(python3 -c "
37
+ METRICS_JSON=$(run_python -c "
39
38
  import json, sys
40
39
  data = json.loads(sys.argv[1])
41
40
  metadata_keys = {'model_type', 'train_seconds'}
@@ -43,7 +42,7 @@ metrics = {k: v for k, v in data.items() if k not in metadata_keys}
43
42
  print(json.dumps(metrics))
44
43
  " "$PARSED")
45
44
 
46
- CONFIG_JSON=$(python3 -c "
45
+ CONFIG_JSON=$(run_python -c "
47
46
  import json, sys
48
47
  data = json.loads(sys.argv[1])
49
48
  metadata_keys = {'model_type', 'train_seconds'}
@@ -55,14 +54,14 @@ print(json.dumps(config))
55
54
  GIT_COMMIT=$(git rev-parse --short HEAD 2>/dev/null || echo "unknown")
56
55
 
57
56
  # Get next experiment ID
58
- NEXT_ID=$(python3 -c "
57
+ NEXT_ID=$(run_python -c "
59
58
  import sys; sys.path.insert(0, 'scripts')
60
59
  from log_experiment import get_next_experiment_id
61
60
  print(get_next_experiment_id('$EXPERIMENT_LOG'))
62
61
  ")
63
62
 
64
63
  # Log the experiment
65
- python3 scripts/log_experiment.py \
64
+ run_python scripts/log_experiment.py \
66
65
  "$EXPERIMENT_LOG" \
67
66
  "$NEXT_ID" \
68
67
  "kept" \
@@ -22,6 +22,7 @@ import argparse
22
22
  import json
23
23
  import os
24
24
  import re
25
+ import shlex
25
26
  import shutil
26
27
  import subprocess
27
28
  import sys
@@ -64,6 +65,7 @@ TEMPLATE_DIRS = {
64
65
  "sweep.py",
65
66
  "post-train-hook.sh",
66
67
  "stop-hook.sh",
68
+ "turing-run-python.sh",
67
69
  "check_convergence.py",
68
70
  "verify_placeholders.py",
69
71
  "manage_hypotheses.py",
@@ -220,6 +222,7 @@ DIRECTORIES_TO_CREATE = [
220
222
  SHELL_SCRIPTS = [
221
223
  "scripts/post-train-hook.sh",
222
224
  "scripts/stop-hook.sh",
225
+ "scripts/turing-run-python.sh",
223
226
  ]
224
227
 
225
228
 
@@ -289,7 +292,7 @@ def scaffold_project(
289
292
  templates_dir: Path to the templates/ directory.
290
293
  ml_dir: Target ML directory (relative to cwd).
291
294
  values: Dict mapping arg names to values for placeholder substitution.
292
- setup_venv: Whether to create and populate a Python venv.
295
+ setup_venv: Whether to create and populate the uv environment.
293
296
  setup_hooks: Whether to configure Claude Code hooks.
294
297
 
295
298
  Returns:
@@ -361,9 +364,9 @@ def scaffold_project(
361
364
  if setup_hooks:
362
365
  _setup_hooks(ml_dir)
363
366
 
364
- # Setup venv
367
+ # Setup Python environment
365
368
  if setup_venv:
366
- _setup_venv(target)
369
+ _setup_environment(target)
367
370
 
368
371
  return stats
369
372
 
@@ -390,16 +393,18 @@ def _setup_hooks(ml_dir: str) -> None:
390
393
 
391
394
  hooks = settings.get("hooks", {})
392
395
 
396
+ target = Path(ml_dir).resolve()
397
+
393
398
  # PostToolUse hook for auto-logging
394
399
  post_hooks = hooks.get("PostToolUse", [])
395
- post_hook_cmd = f"bash {ml_dir}/scripts/post-train-hook.sh"
400
+ post_hook_cmd = f"bash {shlex.quote(str(target / 'scripts' / 'post-train-hook.sh'))}"
396
401
  if not any(post_hook_cmd in str(h) for h in post_hooks):
397
402
  post_hooks.append(make_command_hook_group(post_hook_cmd, matcher="Bash"))
398
403
  hooks["PostToolUse"] = post_hooks
399
404
 
400
405
  # Stop hook for convergence
401
406
  stop_hooks = hooks.get("Stop", [])
402
- stop_hook_cmd = f"bash {ml_dir}/scripts/stop-hook.sh"
407
+ stop_hook_cmd = f"bash {shlex.quote(str(target / 'scripts' / 'stop-hook.sh'))}"
403
408
  if not any(stop_hook_cmd in str(h) for h in stop_hooks):
404
409
  stop_hooks.append(make_command_hook_group(stop_hook_cmd))
405
410
  hooks["Stop"] = stop_hooks
@@ -408,29 +413,22 @@ def _setup_hooks(ml_dir: str) -> None:
408
413
  settings_path.write_text(json.dumps(settings, indent=2))
409
414
 
410
415
 
411
- def _setup_venv(target: Path) -> None:
412
- """Create Python venv and install requirements."""
413
- venv_path = target / ".venv"
414
- if venv_path.exists():
415
- print(" Venv already exists, skipping creation.", file=sys.stderr)
416
+ def _setup_environment(target: Path) -> None:
417
+ """Create the uv-managed Python environment."""
418
+ if shutil.which("uv") is None:
419
+ print(" Warning: uv not found; run `uv sync` from the ML directory after installing uv.", file=sys.stderr)
416
420
  return
417
421
 
418
- print(" Creating virtual environment...", file=sys.stderr)
422
+ print(" Syncing uv environment...", file=sys.stderr)
419
423
  try:
420
424
  subprocess.run(
421
- [sys.executable, "-m", "venv", str(venv_path)],
422
- check=True, capture_output=True,
425
+ ["uv", "sync"],
426
+ cwd=target,
427
+ check=True,
428
+ capture_output=True,
423
429
  )
424
- pip = str(venv_path / "bin" / "pip")
425
- req = str(target / "requirements.txt")
426
- if Path(req).exists():
427
- print(" Installing requirements...", file=sys.stderr)
428
- subprocess.run(
429
- [pip, "install", "-r", req],
430
- check=True, capture_output=True,
431
- )
432
430
  except subprocess.CalledProcessError as e:
433
- print(f" Warning: venv setup failed: {e}", file=sys.stderr)
431
+ print(f" Warning: uv environment setup failed: {e}", file=sys.stderr)
434
432
 
435
433
 
436
434
  def verify_placeholders(ml_dir: str) -> list[tuple[str, int, str]]:
@@ -487,7 +485,7 @@ def main() -> None:
487
485
  parser.add_argument("--task-description", default=None)
488
486
  parser.add_argument("--ml-dir", default=None)
489
487
  parser.add_argument("--data-source", default=None)
490
- parser.add_argument("--no-venv", action="store_true", help="Skip venv creation")
488
+ parser.add_argument("--no-venv", action="store_true", help="Skip uv environment setup")
491
489
  parser.add_argument("--no-hooks", action="store_true", help="Skip hook configuration")
492
490
  parser.add_argument("--templates-dir", default=None, help="Override templates directory")
493
491
  args = parser.parse_args()
@@ -546,9 +544,9 @@ def main() -> None:
546
544
 
547
545
  print(f"\nNext steps:")
548
546
  print(f" 1. Add training data to {values['data_source']}")
549
- print(f" 2. cd {ml_dir} && source .venv/bin/activate")
550
- print(f" 3. python prepare.py")
551
- print(f" 4. /turing:train (or: python train.py > run.log 2>&1)")
547
+ print(f" 2. cd {ml_dir} && uv sync")
548
+ print(f" 3. uv run python prepare.py")
549
+ print(f" 4. /turing:train (or: uv run python train.py > run.log 2>&1)")
552
550
 
553
551
 
554
552
  if __name__ == "__main__":
@@ -25,10 +25,9 @@ if [[ ! -f "$EXPERIMENT_LOG" ]]; then
25
25
  exit 0
26
26
  fi
27
27
 
28
- # Activate venv and delegate to Python module
29
28
  cd "$ML_DIR"
30
- source .venv/bin/activate 2>/dev/null || true
29
+ source "${SCRIPT_DIR}/turing-run-python.sh"
31
30
 
32
- python3 scripts/check_convergence.py \
31
+ run_python scripts/check_convergence.py \
33
32
  --config "$CONFIG_FILE" \
34
33
  --log "$EXPERIMENT_LOG"
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env bash
2
+
3
+ run_python() {
4
+ if ! command -v uv >/dev/null 2>&1; then
5
+ echo "turing: uv is required. Install uv or run legacy environment setup manually." >&2
6
+ return 127
7
+ fi
8
+ uv run python "$@"
9
+ }