claude-turing 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/.claude-plugin/plugin.json +34 -0
  2. package/LICENSE +21 -0
  3. package/README.md +457 -0
  4. package/agents/ml-evaluator.md +43 -0
  5. package/agents/ml-researcher.md +74 -0
  6. package/bin/cli.js +46 -0
  7. package/bin/turing-init.sh +57 -0
  8. package/commands/brief.md +83 -0
  9. package/commands/compare.md +24 -0
  10. package/commands/design.md +97 -0
  11. package/commands/init.md +123 -0
  12. package/commands/logbook.md +51 -0
  13. package/commands/mode.md +43 -0
  14. package/commands/poster.md +89 -0
  15. package/commands/preflight.md +75 -0
  16. package/commands/report.md +97 -0
  17. package/commands/rules/loop-protocol.md +91 -0
  18. package/commands/status.md +24 -0
  19. package/commands/suggest.md +95 -0
  20. package/commands/sweep.md +45 -0
  21. package/commands/train.md +66 -0
  22. package/commands/try.md +63 -0
  23. package/commands/turing.md +54 -0
  24. package/commands/validate.md +34 -0
  25. package/config/defaults.yaml +45 -0
  26. package/config/experiment_archetypes.yaml +127 -0
  27. package/config/lifecycle.toml +31 -0
  28. package/config/novelty_aliases.yaml +107 -0
  29. package/config/relationships.toml +125 -0
  30. package/config/state.toml +24 -0
  31. package/config/task_taxonomy.yaml +110 -0
  32. package/config/taxonomy.toml +37 -0
  33. package/package.json +54 -0
  34. package/src/claude-md.js +55 -0
  35. package/src/install.js +107 -0
  36. package/src/paths.js +20 -0
  37. package/src/postinstall.js +22 -0
  38. package/src/verify.js +109 -0
  39. package/templates/MEMORY.md +36 -0
  40. package/templates/README.md +93 -0
  41. package/templates/__pycache__/evaluate.cpython-314.pyc +0 -0
  42. package/templates/__pycache__/prepare.cpython-314.pyc +0 -0
  43. package/templates/config.yaml +48 -0
  44. package/templates/evaluate.py +237 -0
  45. package/templates/features/__init__.py +0 -0
  46. package/templates/features/__pycache__/__init__.cpython-314.pyc +0 -0
  47. package/templates/features/__pycache__/featurizers.cpython-314.pyc +0 -0
  48. package/templates/features/featurizers.py +138 -0
  49. package/templates/prepare.py +171 -0
  50. package/templates/program.md +216 -0
  51. package/templates/pyproject.toml +8 -0
  52. package/templates/requirements.txt +8 -0
  53. package/templates/scripts/__init__.py +0 -0
  54. package/templates/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
  55. package/templates/scripts/__pycache__/check_convergence.cpython-314.pyc +0 -0
  56. package/templates/scripts/__pycache__/classify_task.cpython-314.pyc +0 -0
  57. package/templates/scripts/__pycache__/critique_hypothesis.cpython-314.pyc +0 -0
  58. package/templates/scripts/__pycache__/experiment_index.cpython-314.pyc +0 -0
  59. package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
  60. package/templates/scripts/__pycache__/generate_logbook.cpython-314.pyc +0 -0
  61. package/templates/scripts/__pycache__/log_experiment.cpython-314.pyc +0 -0
  62. package/templates/scripts/__pycache__/manage_hypotheses.cpython-314.pyc +0 -0
  63. package/templates/scripts/__pycache__/novelty_guard.cpython-314.pyc +0 -0
  64. package/templates/scripts/__pycache__/parse_metrics.cpython-314.pyc +0 -0
  65. package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  66. package/templates/scripts/__pycache__/show_experiment_tree.cpython-314.pyc +0 -0
  67. package/templates/scripts/__pycache__/show_families.cpython-314.pyc +0 -0
  68. package/templates/scripts/__pycache__/statistical_compare.cpython-314.pyc +0 -0
  69. package/templates/scripts/__pycache__/suggest_next.cpython-314.pyc +0 -0
  70. package/templates/scripts/__pycache__/sweep.cpython-314.pyc +0 -0
  71. package/templates/scripts/__pycache__/synthesize_decision.cpython-314.pyc +0 -0
  72. package/templates/scripts/__pycache__/turing_io.cpython-314.pyc +0 -0
  73. package/templates/scripts/__pycache__/update_state.cpython-314.pyc +0 -0
  74. package/templates/scripts/__pycache__/verify_placeholders.cpython-314.pyc +0 -0
  75. package/templates/scripts/check_convergence.py +230 -0
  76. package/templates/scripts/compare_runs.py +124 -0
  77. package/templates/scripts/critique_hypothesis.py +350 -0
  78. package/templates/scripts/experiment_index.py +288 -0
  79. package/templates/scripts/generate_brief.py +389 -0
  80. package/templates/scripts/generate_logbook.py +423 -0
  81. package/templates/scripts/log_experiment.py +243 -0
  82. package/templates/scripts/manage_hypotheses.py +543 -0
  83. package/templates/scripts/novelty_guard.py +343 -0
  84. package/templates/scripts/parse_metrics.py +139 -0
  85. package/templates/scripts/post-train-hook.sh +74 -0
  86. package/templates/scripts/preflight.py +549 -0
  87. package/templates/scripts/scaffold.py +409 -0
  88. package/templates/scripts/show_environment.py +92 -0
  89. package/templates/scripts/show_experiment_tree.py +144 -0
  90. package/templates/scripts/show_families.py +133 -0
  91. package/templates/scripts/show_metrics.py +157 -0
  92. package/templates/scripts/statistical_compare.py +259 -0
  93. package/templates/scripts/stop-hook.sh +34 -0
  94. package/templates/scripts/suggest_next.py +301 -0
  95. package/templates/scripts/sweep.py +276 -0
  96. package/templates/scripts/synthesize_decision.py +300 -0
  97. package/templates/scripts/turing_io.py +76 -0
  98. package/templates/scripts/update_state.py +296 -0
  99. package/templates/scripts/validate_stability.py +167 -0
  100. package/templates/scripts/verify_placeholders.py +119 -0
  101. package/templates/sweep_config.yaml +14 -0
  102. package/templates/tests/__init__.py +0 -0
  103. package/templates/tests/conftest.py +91 -0
  104. package/templates/train.py +240 -0
@@ -0,0 +1,301 @@
1
+ #!/usr/bin/env python3
2
+ """Bayesian-guided hypothesis suggestion for the autoresearch pipeline.
3
+
4
+ Reads experiment history from log.jsonl, builds a surrogate model
5
+ (Random Forest) over the hyperparameter space, and suggests the
6
+ configurations most likely to improve the primary metric.
7
+
8
+ This is the data-driven complement to human taste: the human selects
9
+ which room to search, this script suggests which coins in that room
10
+ are most likely to be biased toward heads.
11
+
12
+ Usage:
13
+ python scripts/suggest_next.py [--log experiments/log.jsonl] [--config config.yaml] [--top 3]
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import argparse
19
+ import json
20
+ import sys
21
+ from pathlib import Path
22
+
23
+ import numpy as np
24
+ import yaml
25
+
26
+ from scripts.turing_io import load_experiments
27
+
28
+
29
+ def extract_features_and_targets(
30
+ experiments: list[dict],
31
+ metric_name: str,
32
+ ) -> tuple[list[dict], list[float], list[str]]:
33
+ """Extract hyperparameter features and metric targets from experiments.
34
+
35
+ Returns (feature_dicts, metric_values, experiment_ids).
36
+ Only includes experiments with valid metric values.
37
+ """
38
+ features = []
39
+ targets = []
40
+ ids = []
41
+
42
+ for exp in experiments:
43
+ metric_val = exp.get("metrics", {}).get(metric_name)
44
+ if metric_val is None or not isinstance(metric_val, (int, float)):
45
+ continue
46
+
47
+ # Extract hyperparameters as features
48
+ config = exp.get("config", {})
49
+ hyperparams = config.get("hyperparams", {})
50
+ if not hyperparams:
51
+ # Try to find hyperparams at top level of config
52
+ hyperparams = {k: v for k, v in config.items()
53
+ if k not in ("model_type",) and isinstance(v, (int, float))}
54
+
55
+ if hyperparams:
56
+ features.append(hyperparams)
57
+ targets.append(float(metric_val))
58
+ ids.append(exp.get("experiment_id", "?"))
59
+
60
+ return features, targets, ids
61
+
62
+
63
+ def features_to_matrix(feature_dicts: list[dict]) -> tuple[np.ndarray, list[str]]:
64
+ """Convert list of feature dicts to a numpy matrix.
65
+
66
+ Handles missing keys by filling with NaN.
67
+ Returns (matrix, column_names).
68
+ """
69
+ if not feature_dicts:
70
+ return np.array([]).reshape(0, 0), []
71
+
72
+ # Collect all keys
73
+ all_keys = sorted(set(k for d in feature_dicts for k in d if isinstance(d[k], (int, float))))
74
+
75
+ if not all_keys:
76
+ return np.array([]).reshape(0, 0), []
77
+
78
+ matrix = np.full((len(feature_dicts), len(all_keys)), np.nan)
79
+ for i, d in enumerate(feature_dicts):
80
+ for j, key in enumerate(all_keys):
81
+ val = d.get(key)
82
+ if isinstance(val, (int, float)):
83
+ matrix[i, j] = val
84
+
85
+ return matrix, all_keys
86
+
87
+
88
+ def suggest_configurations(
89
+ experiments: list[dict],
90
+ metric_name: str,
91
+ lower_is_better: bool = False,
92
+ n_suggestions: int = 3,
93
+ sweep_config_path: str | None = None,
94
+ ) -> list[dict]:
95
+ """Suggest promising configurations using a Random Forest surrogate.
96
+
97
+ If a sweep config exists, generates candidates from the untried region.
98
+ Otherwise, generates perturbations of the best-known configuration.
99
+
100
+ Returns list of suggestion dicts with predicted_metric and config.
101
+ """
102
+ features, targets, ids = extract_features_and_targets(experiments, metric_name)
103
+
104
+ if len(features) < 3:
105
+ return [{
106
+ "reason": "insufficient_data",
107
+ "detail": f"Need at least 3 experiments with hyperparameters, have {len(features)}",
108
+ "suggestion": "Run more experiments before requesting data-driven suggestions",
109
+ }]
110
+
111
+ X, col_names = features_to_matrix(features)
112
+ y = np.array(targets)
113
+
114
+ if X.shape[1] == 0:
115
+ return [{
116
+ "reason": "no_numeric_hyperparameters",
117
+ "detail": "No numeric hyperparameters found in experiment configs",
118
+ "suggestion": "Ensure config includes numeric hyperparams like n_estimators, max_depth, learning_rate",
119
+ }]
120
+
121
+ # Handle NaN in features
122
+ from sklearn.impute import SimpleImputer
123
+ imputer = SimpleImputer(strategy="median")
124
+ X_clean = imputer.fit_transform(X)
125
+
126
+ # Fit surrogate model
127
+ from sklearn.ensemble import RandomForestRegressor
128
+ surrogate = RandomForestRegressor(
129
+ n_estimators=100,
130
+ random_state=42,
131
+ n_jobs=-1,
132
+ )
133
+ surrogate.fit(X_clean, y)
134
+
135
+ # Generate candidates
136
+ candidates = _generate_candidates(X_clean, col_names, sweep_config_path, n_candidates=200)
137
+
138
+ if len(candidates) == 0:
139
+ return [{
140
+ "reason": "no_candidates",
141
+ "detail": "Could not generate candidate configurations",
142
+ "suggestion": "Check sweep_config.yaml or experiment hyperparameter ranges",
143
+ }]
144
+
145
+ # Predict with surrogate
146
+ preds = surrogate.predict(candidates)
147
+
148
+ # Also get uncertainty (std across trees)
149
+ tree_preds = np.array([tree.predict(candidates) for tree in surrogate.estimators_])
150
+ uncertainties = np.std(tree_preds, axis=0)
151
+
152
+ # Acquisition function: UCB (Upper Confidence Bound)
153
+ # For higher-is-better: score = predicted + kappa * uncertainty
154
+ # For lower-is-better: score = -predicted + kappa * uncertainty
155
+ kappa = 1.5 # exploration-exploitation tradeoff
156
+ if lower_is_better:
157
+ scores = -preds + kappa * uncertainties
158
+ else:
159
+ scores = preds + kappa * uncertainties
160
+
161
+ # Select top-N by acquisition score
162
+ top_indices = np.argsort(scores)[-n_suggestions:][::-1]
163
+
164
+ suggestions = []
165
+ for idx in top_indices:
166
+ config = {col_names[j]: round(float(candidates[idx, j]), 6) for j in range(len(col_names))}
167
+ suggestions.append({
168
+ "config": config,
169
+ "predicted_metric": round(float(preds[idx]), 6),
170
+ "uncertainty": round(float(uncertainties[idx]), 6),
171
+ "acquisition_score": round(float(scores[idx]), 6),
172
+ })
173
+
174
+ return suggestions
175
+
176
+
177
+ def _generate_candidates(
178
+ X: np.ndarray,
179
+ col_names: list[str],
180
+ sweep_config_path: str | None,
181
+ n_candidates: int = 200,
182
+ ) -> np.ndarray:
183
+ """Generate candidate configurations for the surrogate to evaluate.
184
+
185
+ Uses sweep config ranges if available, otherwise perturbs existing data.
186
+ """
187
+ if sweep_config_path and Path(sweep_config_path).exists():
188
+ return _candidates_from_sweep(sweep_config_path, col_names, n_candidates)
189
+
190
+ return _candidates_from_perturbation(X, n_candidates)
191
+
192
+
193
+ def _candidates_from_sweep(
194
+ sweep_config_path: str,
195
+ col_names: list[str],
196
+ n_candidates: int,
197
+ ) -> np.ndarray:
198
+ """Generate random candidates from sweep parameter ranges."""
199
+ with open(sweep_config_path) as f:
200
+ sweep_config = yaml.safe_load(f)
201
+
202
+ sweep_params = sweep_config.get("sweep", {})
203
+ if not sweep_params:
204
+ return np.array([]).reshape(0, len(col_names))
205
+
206
+ # Map sweep param names to column indices
207
+ candidates = np.random.RandomState(42).uniform(size=(n_candidates, len(col_names)))
208
+
209
+ for j, col in enumerate(col_names):
210
+ # Try to find matching sweep param
211
+ matching_key = None
212
+ for key in sweep_params:
213
+ if key.endswith(col):
214
+ matching_key = key
215
+ break
216
+
217
+ if matching_key and isinstance(sweep_params[matching_key], list):
218
+ values = [v for v in sweep_params[matching_key] if isinstance(v, (int, float))]
219
+ if values:
220
+ lo, hi = min(values), max(values)
221
+ # Expand range slightly for exploration
222
+ margin = (hi - lo) * 0.2 if hi != lo else abs(lo) * 0.5
223
+ candidates[:, j] = np.random.RandomState(42 + j).uniform(
224
+ lo - margin, hi + margin, size=n_candidates,
225
+ )
226
+
227
+ return candidates
228
+
229
+
230
+ def _candidates_from_perturbation(
231
+ X: np.ndarray,
232
+ n_candidates: int,
233
+ ) -> np.ndarray:
234
+ """Generate candidates by perturbing existing observations."""
235
+ rng = np.random.RandomState(42)
236
+
237
+ # Compute column ranges
238
+ col_min = np.nanmin(X, axis=0)
239
+ col_max = np.nanmax(X, axis=0)
240
+ col_range = col_max - col_min
241
+ col_range[col_range == 0] = np.abs(col_min[col_range == 0]) * 0.5 + 1e-6
242
+
243
+ candidates = np.zeros((n_candidates, X.shape[1]))
244
+ for i in range(n_candidates):
245
+ # Pick a random existing point and perturb it
246
+ base_idx = rng.randint(0, X.shape[0])
247
+ perturbation = rng.normal(0, 0.3, size=X.shape[1]) * col_range
248
+ candidates[i] = X[base_idx] + perturbation
249
+
250
+ return candidates
251
+
252
+
253
+ def format_suggestions(suggestions: list[dict], metric_name: str) -> str:
254
+ """Format suggestions for display."""
255
+ if not suggestions:
256
+ return "No suggestions available."
257
+
258
+ if "reason" in suggestions[0]:
259
+ return f"Cannot suggest: {suggestions[0]['detail']}\n{suggestions[0].get('suggestion', '')}"
260
+
261
+ lines = [f"Top {len(suggestions)} suggested configurations (by expected {metric_name}):", ""]
262
+
263
+ for i, s in enumerate(suggestions, 1):
264
+ config_str = ", ".join(f"{k}={v}" for k, v in s["config"].items())
265
+ lines.append(f" {i}. {config_str}")
266
+ lines.append(f" Predicted {metric_name}: {s['predicted_metric']:.4f} (uncertainty: {s['uncertainty']:.4f})")
267
+ lines.append("")
268
+
269
+ return "\n".join(lines)
270
+
271
+
272
+ def main() -> None:
273
+ """CLI entry point."""
274
+ parser = argparse.ArgumentParser(description="Suggest next experiment configuration")
275
+ parser.add_argument("--log", default="experiments/log.jsonl")
276
+ parser.add_argument("--config", default="config.yaml")
277
+ parser.add_argument("--sweep", default="sweep_config.yaml", help="Sweep config for candidate ranges")
278
+ parser.add_argument("--top", type=int, default=3, help="Number of suggestions")
279
+ args = parser.parse_args()
280
+
281
+ # Load config
282
+ config = {}
283
+ if Path(args.config).exists():
284
+ with open(args.config) as f:
285
+ config = yaml.safe_load(f) or {}
286
+
287
+ eval_cfg = config.get("evaluation", {})
288
+ metric = eval_cfg.get("primary_metric", "accuracy")
289
+ lower_is_better = eval_cfg.get("lower_is_better", False)
290
+
291
+ experiments = load_experiments(args.log)
292
+ suggestions = suggest_configurations(
293
+ experiments, metric, lower_is_better, args.top,
294
+ sweep_config_path=args.sweep if Path(args.sweep).exists() else None,
295
+ )
296
+
297
+ print(format_suggestions(suggestions, metric))
298
+
299
+
300
+ if __name__ == "__main__":
301
+ main()
@@ -0,0 +1,276 @@
1
+ #!/usr/bin/env python3
2
+ """Hyperparameter sweep tool for the autoresearch pipeline.
3
+
4
+ Systematic exploration of the hyperparameter space via cartesian product.
5
+ Generates all combinations of configured parameter ranges and writes a
6
+ persistent queue that the agent processes sequentially.
7
+
8
+ This is grid search, not random search or Bayesian optimization — deliberate
9
+ simplicity for reproducibility and interpretability. Every point in the grid
10
+ is evaluated, making the results a complete map of the explored region.
11
+
12
+ Usage:
13
+ python scripts/sweep.py [sweep_config.yaml]
14
+ python scripts/sweep.py --status # Show queue progress
15
+ python scripts/sweep.py --next # Print next pending experiment as JSON
16
+ python scripts/sweep.py --mark <name> <status> # Mark experiment complete/failed
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import argparse
22
+ import copy
23
+ import itertools
24
+ import json
25
+ import sys
26
+ from pathlib import Path
27
+
28
+ import yaml
29
+
30
+
31
+ def apply_overrides(config: dict, overrides: dict) -> dict:
32
+ """Apply dotted-path overrides to a config dict.
33
+
34
+ Takes dotted-path keys like "model.hyperparams.n_estimators" and sets
35
+ nested values. Returns a new config dict with overrides applied.
36
+
37
+ Args:
38
+ config: Base configuration dictionary.
39
+ overrides: Dict mapping dotted-path keys to values.
40
+
41
+ Returns:
42
+ New config dict with overrides applied (original is not mutated).
43
+ """
44
+ result = copy.deepcopy(config)
45
+ for dotted_key, value in overrides.items():
46
+ parts = dotted_key.split(".")
47
+ target = result
48
+ for part in parts[:-1]:
49
+ if part not in target:
50
+ target[part] = {}
51
+ target = target[part]
52
+ target[parts[-1]] = value
53
+ return result
54
+
55
+
56
+ def _make_experiment_name(overrides: dict) -> str:
57
+ """Generate a short descriptive name from parameter overrides.
58
+
59
+ Example: {"model.hyperparams.n_estimators": 100, "model.hyperparams.max_depth": 4}
60
+ becomes "n100_d4"
61
+ """
62
+ abbreviations = {
63
+ "n_estimators": "n",
64
+ "max_depth": "d",
65
+ "learning_rate": "lr",
66
+ "min_child_weight": "mcw",
67
+ "subsample": "ss",
68
+ "colsample_bytree": "cs",
69
+ "gamma": "g",
70
+ "reg_alpha": "a",
71
+ "reg_lambda": "l",
72
+ "epochs": "ep",
73
+ "batch_size": "bs",
74
+ "hidden_size": "hs",
75
+ "dropout": "do",
76
+ "weight_decay": "wd",
77
+ }
78
+
79
+ parts = []
80
+ for key, value in overrides.items():
81
+ param_name = key.split(".")[-1]
82
+ abbrev = abbreviations.get(param_name, param_name[:3])
83
+ parts.append(f"{abbrev}{value}")
84
+
85
+ return "_".join(parts)
86
+
87
+
88
+ def generate_queue(sweep_config_path: str) -> None:
89
+ """Generate cartesian product experiment queue from sweep config.
90
+
91
+ Reads sweep parameters, computes the cartesian product of all value lists,
92
+ and writes a queue YAML file with one entry per combination.
93
+
94
+ Args:
95
+ sweep_config_path: Path to sweep config YAML file.
96
+ """
97
+ config_path = Path(sweep_config_path)
98
+ if not config_path.exists():
99
+ print(f"Error: Sweep config not found: {config_path}", file=sys.stderr)
100
+ sys.exit(1)
101
+
102
+ with open(config_path) as f:
103
+ sweep_config = yaml.safe_load(f)
104
+
105
+ sweep_params = sweep_config.get("sweep", {})
106
+ output_path = sweep_config.get("output", "experiments/queue.yaml")
107
+
108
+ if not sweep_params:
109
+ print("Error: No sweep parameters defined in config", file=sys.stderr)
110
+ sys.exit(1)
111
+
112
+ # Extract parameter names and value lists
113
+ param_names = list(sweep_params.keys())
114
+ param_values = list(sweep_params.values())
115
+
116
+ # Generate cartesian product
117
+ combinations = list(itertools.product(*param_values))
118
+
119
+ # Build experiment queue
120
+ queue = []
121
+ for combo in combinations:
122
+ overrides = dict(zip(param_names, combo))
123
+ name = _make_experiment_name(overrides)
124
+ queue.append({
125
+ "experiment_name": name,
126
+ "config_overrides": overrides,
127
+ "status": "pending",
128
+ })
129
+
130
+ # Write queue
131
+ out = Path(output_path)
132
+ out.parent.mkdir(parents=True, exist_ok=True)
133
+
134
+ with open(out, "w") as f:
135
+ yaml.dump(queue, f, default_flow_style=False, sort_keys=False)
136
+
137
+ num_params = len(param_names)
138
+ num_experiments = len(queue)
139
+ print(f"Generated {num_experiments} experiments from {num_params} parameters")
140
+ print(f"Queue written to: {output_path}")
141
+
142
+
143
+ def show_status(queue_path: str) -> None:
144
+ """Show queue progress: counts of pending/running/complete/failed experiments.
145
+
146
+ Args:
147
+ queue_path: Path to experiments/queue.yaml.
148
+ """
149
+ path = Path(queue_path)
150
+ if not path.exists():
151
+ print("No queue found. Run sweep.py to generate one.", file=sys.stderr)
152
+ sys.exit(1)
153
+
154
+ with open(path) as f:
155
+ queue = yaml.safe_load(f) or []
156
+
157
+ counts: dict[str, int] = {}
158
+ for entry in queue:
159
+ status = entry.get("status", "unknown")
160
+ counts[status] = counts.get(status, 0) + 1
161
+
162
+ total = len(queue)
163
+ print(f"Queue: {total} experiments")
164
+ for status, count in sorted(counts.items()):
165
+ print(f" {status}: {count}")
166
+
167
+
168
+ def get_next(queue_path: str) -> None:
169
+ """Print the next pending experiment as JSON for agent consumption.
170
+
171
+ Args:
172
+ queue_path: Path to experiments/queue.yaml.
173
+ """
174
+ path = Path(queue_path)
175
+ if not path.exists():
176
+ print("No queue found.", file=sys.stderr)
177
+ sys.exit(1)
178
+
179
+ with open(path) as f:
180
+ queue = yaml.safe_load(f) or []
181
+
182
+ for entry in queue:
183
+ if entry.get("status") == "pending":
184
+ print(json.dumps(entry, indent=2))
185
+ return
186
+
187
+ print("No pending experiments.", file=sys.stderr)
188
+ sys.exit(1)
189
+
190
+
191
+ def mark_experiment(queue_path: str, name: str, new_status: str) -> None:
192
+ """Mark an experiment as complete or failed in the queue.
193
+
194
+ Args:
195
+ queue_path: Path to experiments/queue.yaml.
196
+ name: Experiment name to mark.
197
+ new_status: New status (complete, failed, running).
198
+ """
199
+ path = Path(queue_path)
200
+ if not path.exists():
201
+ print("No queue found.", file=sys.stderr)
202
+ sys.exit(1)
203
+
204
+ with open(path) as f:
205
+ queue = yaml.safe_load(f) or []
206
+
207
+ found = False
208
+ for entry in queue:
209
+ if entry.get("experiment_name") == name:
210
+ entry["status"] = new_status
211
+ found = True
212
+ break
213
+
214
+ if not found:
215
+ print(f"Error: Experiment '{name}' not found in queue", file=sys.stderr)
216
+ sys.exit(1)
217
+
218
+ with open(path, "w") as f:
219
+ yaml.dump(queue, f, default_flow_style=False, sort_keys=False)
220
+
221
+ print(f"Marked '{name}' as {new_status}")
222
+
223
+
224
+ def _find_queue_path(sweep_config_path: str) -> str:
225
+ """Extract the queue output path from the sweep config."""
226
+ config_path = Path(sweep_config_path)
227
+ if config_path.exists():
228
+ with open(config_path) as f:
229
+ sweep_config = yaml.safe_load(f)
230
+ return sweep_config.get("output", "experiments/queue.yaml")
231
+ return "experiments/queue.yaml"
232
+
233
+
234
+ def main() -> None:
235
+ """CLI entry point."""
236
+ parser = argparse.ArgumentParser(
237
+ description="Hyperparameter sweep tool for the autoresearch pipeline"
238
+ )
239
+ parser.add_argument(
240
+ "sweep_config",
241
+ nargs="?",
242
+ default="sweep_config.yaml",
243
+ help="Path to sweep config YAML (default: sweep_config.yaml)",
244
+ )
245
+ parser.add_argument(
246
+ "--status",
247
+ action="store_true",
248
+ help="Show queue progress",
249
+ )
250
+ parser.add_argument(
251
+ "--next",
252
+ action="store_true",
253
+ help="Print the next pending experiment as JSON",
254
+ )
255
+ parser.add_argument(
256
+ "--mark",
257
+ nargs=2,
258
+ metavar=("NAME", "STATUS"),
259
+ help="Mark an experiment as complete/failed",
260
+ )
261
+
262
+ args = parser.parse_args()
263
+ queue_path = _find_queue_path(args.sweep_config)
264
+
265
+ if args.status:
266
+ show_status(queue_path)
267
+ elif args.next:
268
+ get_next(queue_path)
269
+ elif args.mark:
270
+ mark_experiment(queue_path, args.mark[0], args.mark[1])
271
+ else:
272
+ generate_queue(args.sweep_config)
273
+
274
+
275
+ if __name__ == "__main__":
276
+ main()