claude-turing 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +34 -0
- package/LICENSE +21 -0
- package/README.md +457 -0
- package/agents/ml-evaluator.md +43 -0
- package/agents/ml-researcher.md +74 -0
- package/bin/cli.js +46 -0
- package/bin/turing-init.sh +57 -0
- package/commands/brief.md +83 -0
- package/commands/compare.md +24 -0
- package/commands/design.md +97 -0
- package/commands/init.md +123 -0
- package/commands/logbook.md +51 -0
- package/commands/mode.md +43 -0
- package/commands/poster.md +89 -0
- package/commands/preflight.md +75 -0
- package/commands/report.md +97 -0
- package/commands/rules/loop-protocol.md +91 -0
- package/commands/status.md +24 -0
- package/commands/suggest.md +95 -0
- package/commands/sweep.md +45 -0
- package/commands/train.md +66 -0
- package/commands/try.md +63 -0
- package/commands/turing.md +54 -0
- package/commands/validate.md +34 -0
- package/config/defaults.yaml +45 -0
- package/config/experiment_archetypes.yaml +127 -0
- package/config/lifecycle.toml +31 -0
- package/config/novelty_aliases.yaml +107 -0
- package/config/relationships.toml +125 -0
- package/config/state.toml +24 -0
- package/config/task_taxonomy.yaml +110 -0
- package/config/taxonomy.toml +37 -0
- package/package.json +54 -0
- package/src/claude-md.js +55 -0
- package/src/install.js +107 -0
- package/src/paths.js +20 -0
- package/src/postinstall.js +22 -0
- package/src/verify.js +109 -0
- package/templates/MEMORY.md +36 -0
- package/templates/README.md +93 -0
- package/templates/__pycache__/evaluate.cpython-314.pyc +0 -0
- package/templates/__pycache__/prepare.cpython-314.pyc +0 -0
- package/templates/config.yaml +48 -0
- package/templates/evaluate.py +237 -0
- package/templates/features/__init__.py +0 -0
- package/templates/features/__pycache__/__init__.cpython-314.pyc +0 -0
- package/templates/features/__pycache__/featurizers.cpython-314.pyc +0 -0
- package/templates/features/featurizers.py +138 -0
- package/templates/prepare.py +171 -0
- package/templates/program.md +216 -0
- package/templates/pyproject.toml +8 -0
- package/templates/requirements.txt +8 -0
- package/templates/scripts/__init__.py +0 -0
- package/templates/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/check_convergence.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/classify_task.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/critique_hypothesis.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_index.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/generate_logbook.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/log_experiment.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/manage_hypotheses.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/novelty_guard.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/parse_metrics.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/show_experiment_tree.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/show_families.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/statistical_compare.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/suggest_next.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/sweep.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/synthesize_decision.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/turing_io.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/update_state.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/verify_placeholders.cpython-314.pyc +0 -0
- package/templates/scripts/check_convergence.py +230 -0
- package/templates/scripts/compare_runs.py +124 -0
- package/templates/scripts/critique_hypothesis.py +350 -0
- package/templates/scripts/experiment_index.py +288 -0
- package/templates/scripts/generate_brief.py +389 -0
- package/templates/scripts/generate_logbook.py +423 -0
- package/templates/scripts/log_experiment.py +243 -0
- package/templates/scripts/manage_hypotheses.py +543 -0
- package/templates/scripts/novelty_guard.py +343 -0
- package/templates/scripts/parse_metrics.py +139 -0
- package/templates/scripts/post-train-hook.sh +74 -0
- package/templates/scripts/preflight.py +549 -0
- package/templates/scripts/scaffold.py +409 -0
- package/templates/scripts/show_environment.py +92 -0
- package/templates/scripts/show_experiment_tree.py +144 -0
- package/templates/scripts/show_families.py +133 -0
- package/templates/scripts/show_metrics.py +157 -0
- package/templates/scripts/statistical_compare.py +259 -0
- package/templates/scripts/stop-hook.sh +34 -0
- package/templates/scripts/suggest_next.py +301 -0
- package/templates/scripts/sweep.py +276 -0
- package/templates/scripts/synthesize_decision.py +300 -0
- package/templates/scripts/turing_io.py +76 -0
- package/templates/scripts/update_state.py +296 -0
- package/templates/scripts/validate_stability.py +167 -0
- package/templates/scripts/verify_placeholders.py +119 -0
- package/templates/sweep_config.yaml +14 -0
- package/templates/tests/__init__.py +0 -0
- package/templates/tests/conftest.py +91 -0
- package/templates/train.py +240 -0
|
@@ -0,0 +1,543 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Hypothesis queue manager for the autoresearch pipeline.
|
|
3
|
+
|
|
4
|
+
Manages a structured queue of hypotheses — ideas for experiments
|
|
5
|
+
that can be injected by the human (via /turing:try) or generated
|
|
6
|
+
by the agent. Human-injected hypotheses take priority.
|
|
7
|
+
|
|
8
|
+
This is the mechanism by which research taste reaches the agent:
|
|
9
|
+
the human selects which coins to flip, the agent flips them.
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
python scripts/manage_hypotheses.py add "description" [--priority high] [--parent exp-NNN]
|
|
13
|
+
python scripts/manage_hypotheses.py list [--status queued]
|
|
14
|
+
python scripts/manage_hypotheses.py next
|
|
15
|
+
python scripts/manage_hypotheses.py mark <id> <status> [--result exp-NNN]
|
|
16
|
+
python scripts/manage_hypotheses.py count
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import sys
|
|
23
|
+
from datetime import datetime, timezone
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
import yaml
|
|
27
|
+
|
|
28
|
+
DEFAULT_QUEUE_PATH = "hypotheses.yaml"
|
|
29
|
+
DETAIL_DIR = "hypotheses"
|
|
30
|
+
|
|
31
|
+
VALID_STATUSES = {"queued", "in-progress", "tested", "promising", "dead-end"}
|
|
32
|
+
VALID_PRIORITIES = {"high", "medium", "low"}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def load_queue(path: str) -> list[dict]:
|
|
36
|
+
"""Load hypothesis queue from YAML file."""
|
|
37
|
+
p = Path(path)
|
|
38
|
+
if not p.exists() or p.stat().st_size == 0:
|
|
39
|
+
return []
|
|
40
|
+
with open(p) as f:
|
|
41
|
+
data = yaml.safe_load(f)
|
|
42
|
+
return data if isinstance(data, list) else []
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def save_queue(path: str, queue: list[dict]) -> None:
|
|
46
|
+
"""Save hypothesis queue to YAML file."""
|
|
47
|
+
p = Path(path)
|
|
48
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
49
|
+
with open(p, "w") as f:
|
|
50
|
+
yaml.dump(queue, f, default_flow_style=False, sort_keys=False)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def get_next_id(queue: list[dict]) -> str:
|
|
54
|
+
"""Get the next sequential hypothesis ID."""
|
|
55
|
+
if not queue:
|
|
56
|
+
return "hyp-001"
|
|
57
|
+
max_num = 0
|
|
58
|
+
for entry in queue:
|
|
59
|
+
hid = entry.get("id", "")
|
|
60
|
+
if hid.startswith("hyp-"):
|
|
61
|
+
try:
|
|
62
|
+
num = int(hid.split("-")[1])
|
|
63
|
+
max_num = max(max_num, num)
|
|
64
|
+
except (ValueError, IndexError):
|
|
65
|
+
continue
|
|
66
|
+
return f"hyp-{max_num + 1:03d}"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def create_detail_file(
|
|
70
|
+
hid: str,
|
|
71
|
+
description: str,
|
|
72
|
+
source: str = "human",
|
|
73
|
+
priority: str = "high",
|
|
74
|
+
parent_experiment: str | None = None,
|
|
75
|
+
parent_hypothesis: str | None = None,
|
|
76
|
+
family: str | None = None,
|
|
77
|
+
tags: list[str] | None = None,
|
|
78
|
+
architecture: dict | None = None,
|
|
79
|
+
hyperparameters: dict | None = None,
|
|
80
|
+
features: dict | None = None,
|
|
81
|
+
expected_outcome: dict | None = None,
|
|
82
|
+
) -> Path:
|
|
83
|
+
"""Create a detailed hypothesis file at hypotheses/hyp-NNN.yaml."""
|
|
84
|
+
detail_dir = Path(DETAIL_DIR)
|
|
85
|
+
detail_dir.mkdir(parents=True, exist_ok=True)
|
|
86
|
+
|
|
87
|
+
detail = {
|
|
88
|
+
"id": hid,
|
|
89
|
+
"description": description,
|
|
90
|
+
"source": source,
|
|
91
|
+
"status": "queued",
|
|
92
|
+
"priority": priority,
|
|
93
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
94
|
+
"architecture": architecture or {},
|
|
95
|
+
"hyperparameters": hyperparameters or {},
|
|
96
|
+
"features": features or {"add": [], "remove": [], "transform": []},
|
|
97
|
+
"expected_outcome": expected_outcome or {},
|
|
98
|
+
"result": {
|
|
99
|
+
"experiment_id": None,
|
|
100
|
+
"metrics": {},
|
|
101
|
+
"verdict": None,
|
|
102
|
+
"notes": None,
|
|
103
|
+
},
|
|
104
|
+
"parent_experiment": parent_experiment,
|
|
105
|
+
"parent_hypothesis": parent_hypothesis,
|
|
106
|
+
"family": family,
|
|
107
|
+
"tags": tags or [],
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
detail_path = detail_dir / f"{hid}.yaml"
|
|
111
|
+
with open(detail_path, "w") as f:
|
|
112
|
+
yaml.dump(detail, f, default_flow_style=False, sort_keys=False)
|
|
113
|
+
return detail_path
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def load_detail(hid: str) -> dict | None:
|
|
117
|
+
"""Load a detailed hypothesis file."""
|
|
118
|
+
detail_path = Path(DETAIL_DIR) / f"{hid}.yaml"
|
|
119
|
+
if not detail_path.exists():
|
|
120
|
+
return None
|
|
121
|
+
with open(detail_path) as f:
|
|
122
|
+
return yaml.safe_load(f) or None
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def update_detail(hid: str, updates: dict) -> bool:
|
|
126
|
+
"""Update fields in a detailed hypothesis file."""
|
|
127
|
+
detail = load_detail(hid)
|
|
128
|
+
if detail is None:
|
|
129
|
+
return False
|
|
130
|
+
|
|
131
|
+
for key, value in updates.items():
|
|
132
|
+
if isinstance(value, dict) and isinstance(detail.get(key), dict):
|
|
133
|
+
detail[key].update(value)
|
|
134
|
+
else:
|
|
135
|
+
detail[key] = value
|
|
136
|
+
|
|
137
|
+
detail_path = Path(DETAIL_DIR) / f"{hid}.yaml"
|
|
138
|
+
with open(detail_path, "w") as f:
|
|
139
|
+
yaml.dump(detail, f, default_flow_style=False, sort_keys=False)
|
|
140
|
+
return True
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def load_archetypes(config_path: str | None = None) -> dict:
|
|
144
|
+
"""Load experiment archetypes from YAML config.
|
|
145
|
+
|
|
146
|
+
Searches for config/experiment_archetypes.yaml in standard locations.
|
|
147
|
+
"""
|
|
148
|
+
candidates = []
|
|
149
|
+
if config_path:
|
|
150
|
+
candidates.append(Path(config_path))
|
|
151
|
+
candidates.extend([
|
|
152
|
+
Path("config") / "experiment_archetypes.yaml",
|
|
153
|
+
Path(__file__).parent.parent.parent / "config" / "experiment_archetypes.yaml",
|
|
154
|
+
Path(__file__).parent.parent / "config" / "experiment_archetypes.yaml",
|
|
155
|
+
])
|
|
156
|
+
for p in candidates:
|
|
157
|
+
if p.exists():
|
|
158
|
+
with open(p) as f:
|
|
159
|
+
data = yaml.safe_load(f)
|
|
160
|
+
return data.get("archetypes", {}) if data else {}
|
|
161
|
+
return {}
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def expand_archetype(archetype_name: str, config_path: str | None = None) -> tuple[str, str | None, list[str] | None]:
|
|
165
|
+
"""Expand an archetype name into a structured hypothesis description.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
archetype_name: Key from experiment_archetypes.yaml (e.g., "model_comparison").
|
|
169
|
+
config_path: Optional path to archetypes YAML.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
Tuple of (description, family_tag, tags).
|
|
173
|
+
Returns a fallback description if archetype is not found.
|
|
174
|
+
"""
|
|
175
|
+
archetypes = load_archetypes(config_path)
|
|
176
|
+
|
|
177
|
+
if archetype_name not in archetypes:
|
|
178
|
+
available = ", ".join(sorted(archetypes.keys())) if archetypes else "none loaded"
|
|
179
|
+
return (
|
|
180
|
+
f"[Unknown archetype: {archetype_name}. Available: {available}]",
|
|
181
|
+
None,
|
|
182
|
+
None,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
arch = archetypes[archetype_name]
|
|
186
|
+
name = arch.get("name", archetype_name)
|
|
187
|
+
steps = arch.get("steps", [])
|
|
188
|
+
when = arch.get("when_to_use", "")
|
|
189
|
+
expected = arch.get("expected_experiments", "?")
|
|
190
|
+
|
|
191
|
+
# Build structured description
|
|
192
|
+
lines = [f"{name}:"]
|
|
193
|
+
for i, step in enumerate(steps, 1):
|
|
194
|
+
lines.append(f" {i}. {step}")
|
|
195
|
+
if when:
|
|
196
|
+
lines.append(f" Context: {when}")
|
|
197
|
+
lines.append(f" Expected: ~{expected} experiments")
|
|
198
|
+
|
|
199
|
+
description = "\n".join(lines)
|
|
200
|
+
family_tag = arch.get("family_tag")
|
|
201
|
+
tags = [archetype_name, "archetype"]
|
|
202
|
+
|
|
203
|
+
return description, family_tag, tags
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def add_hypothesis(
|
|
207
|
+
queue_path: str,
|
|
208
|
+
description: str,
|
|
209
|
+
source: str = "human",
|
|
210
|
+
priority: str = "high",
|
|
211
|
+
parent_experiment: str | None = None,
|
|
212
|
+
parent_hypothesis: str | None = None,
|
|
213
|
+
family: str | None = None,
|
|
214
|
+
tags: list[str] | None = None,
|
|
215
|
+
architecture: dict | None = None,
|
|
216
|
+
hyperparameters: dict | None = None,
|
|
217
|
+
features: dict | None = None,
|
|
218
|
+
expected_outcome: dict | None = None,
|
|
219
|
+
) -> str:
|
|
220
|
+
"""Add a hypothesis to the queue and create its detail file.
|
|
221
|
+
|
|
222
|
+
Returns the new hypothesis ID.
|
|
223
|
+
"""
|
|
224
|
+
queue = load_queue(queue_path)
|
|
225
|
+
hid = get_next_id(queue)
|
|
226
|
+
|
|
227
|
+
# Index entry (lightweight)
|
|
228
|
+
entry = {
|
|
229
|
+
"id": hid,
|
|
230
|
+
"description": description,
|
|
231
|
+
"source": source,
|
|
232
|
+
"status": "queued",
|
|
233
|
+
"priority": priority,
|
|
234
|
+
"parent_experiment": parent_experiment,
|
|
235
|
+
"result_experiment": None,
|
|
236
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
237
|
+
}
|
|
238
|
+
queue.append(entry)
|
|
239
|
+
save_queue(queue_path, queue)
|
|
240
|
+
|
|
241
|
+
# Detail file (rich)
|
|
242
|
+
create_detail_file(
|
|
243
|
+
hid=hid,
|
|
244
|
+
description=description,
|
|
245
|
+
source=source,
|
|
246
|
+
priority=priority,
|
|
247
|
+
parent_experiment=parent_experiment,
|
|
248
|
+
parent_hypothesis=parent_hypothesis,
|
|
249
|
+
family=family,
|
|
250
|
+
tags=tags,
|
|
251
|
+
architecture=architecture,
|
|
252
|
+
hyperparameters=hyperparameters,
|
|
253
|
+
features=features,
|
|
254
|
+
expected_outcome=expected_outcome,
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
return hid
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def list_hypotheses(queue_path: str, status_filter: str | None = None) -> list[dict]:
|
|
261
|
+
"""List hypotheses, optionally filtered by status."""
|
|
262
|
+
queue = load_queue(queue_path)
|
|
263
|
+
if status_filter:
|
|
264
|
+
queue = [h for h in queue if h.get("status") == status_filter]
|
|
265
|
+
return queue
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def get_next_hypothesis(queue_path: str) -> dict | None:
|
|
269
|
+
"""Get the next queued hypothesis, prioritizing high > medium > low.
|
|
270
|
+
|
|
271
|
+
Within the same priority, human-sourced hypotheses come before agent-sourced.
|
|
272
|
+
Within the same source, earlier hypotheses come first (FIFO).
|
|
273
|
+
"""
|
|
274
|
+
queue = load_queue(queue_path)
|
|
275
|
+
queued = [h for h in queue if h.get("status") == "queued"]
|
|
276
|
+
if not queued:
|
|
277
|
+
return None
|
|
278
|
+
|
|
279
|
+
priority_order = {"high": 0, "medium": 1, "low": 2}
|
|
280
|
+
source_order = {"human": 0, "literature": 1, "taxonomy": 2, "agent": 3}
|
|
281
|
+
|
|
282
|
+
queued.sort(key=lambda h: (
|
|
283
|
+
priority_order.get(h.get("priority", "medium"), 1),
|
|
284
|
+
source_order.get(h.get("source", "agent"), 1),
|
|
285
|
+
))
|
|
286
|
+
return queued[0]
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def mark_hypothesis(
|
|
290
|
+
queue_path: str,
|
|
291
|
+
hypothesis_id: str,
|
|
292
|
+
new_status: str,
|
|
293
|
+
result_experiment: str | None = None,
|
|
294
|
+
result_metrics: dict | None = None,
|
|
295
|
+
result_notes: str | None = None,
|
|
296
|
+
) -> bool:
|
|
297
|
+
"""Update a hypothesis status in both the index and detail file.
|
|
298
|
+
|
|
299
|
+
Returns True if found and updated.
|
|
300
|
+
"""
|
|
301
|
+
if new_status not in VALID_STATUSES:
|
|
302
|
+
print(f"Invalid status: {new_status}. Valid: {', '.join(sorted(VALID_STATUSES))}", file=sys.stderr)
|
|
303
|
+
return False
|
|
304
|
+
|
|
305
|
+
# Update index
|
|
306
|
+
queue = load_queue(queue_path)
|
|
307
|
+
found = False
|
|
308
|
+
for entry in queue:
|
|
309
|
+
if entry.get("id") == hypothesis_id:
|
|
310
|
+
entry["status"] = new_status
|
|
311
|
+
if result_experiment:
|
|
312
|
+
entry["result_experiment"] = result_experiment
|
|
313
|
+
save_queue(queue_path, queue)
|
|
314
|
+
found = True
|
|
315
|
+
break
|
|
316
|
+
|
|
317
|
+
if not found:
|
|
318
|
+
return False
|
|
319
|
+
|
|
320
|
+
# Update detail file
|
|
321
|
+
detail_updates = {"status": new_status}
|
|
322
|
+
if result_experiment or result_metrics or result_notes:
|
|
323
|
+
result_update = {}
|
|
324
|
+
if result_experiment:
|
|
325
|
+
result_update["experiment_id"] = result_experiment
|
|
326
|
+
if result_metrics:
|
|
327
|
+
result_update["metrics"] = result_metrics
|
|
328
|
+
if result_notes:
|
|
329
|
+
result_update["notes"] = result_notes
|
|
330
|
+
verdict_map = {"tested": "tested", "promising": "promising", "dead-end": "dead-end"}
|
|
331
|
+
if new_status in verdict_map:
|
|
332
|
+
result_update["verdict"] = verdict_map[new_status]
|
|
333
|
+
detail_updates["result"] = result_update
|
|
334
|
+
|
|
335
|
+
update_detail(hypothesis_id, detail_updates)
|
|
336
|
+
return True
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def count_by_status(queue_path: str) -> dict[str, int]:
|
|
340
|
+
"""Count hypotheses by status."""
|
|
341
|
+
queue = load_queue(queue_path)
|
|
342
|
+
counts: dict[str, int] = {}
|
|
343
|
+
for entry in queue:
|
|
344
|
+
status = entry.get("status", "unknown")
|
|
345
|
+
counts[status] = counts.get(status, 0) + 1
|
|
346
|
+
return counts
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def format_table(hypotheses: list[dict]) -> str:
|
|
350
|
+
"""Format hypotheses as a text table."""
|
|
351
|
+
if not hypotheses:
|
|
352
|
+
return "No hypotheses in queue."
|
|
353
|
+
|
|
354
|
+
header = f"{'ID':<10} {'Status':<12} {'Priority':<8} {'Source':<8} {'Description'}"
|
|
355
|
+
sep = "-" * 80
|
|
356
|
+
lines = [header, sep]
|
|
357
|
+
|
|
358
|
+
for h in hypotheses:
|
|
359
|
+
desc = h.get("description", "")[:45]
|
|
360
|
+
line = f"{h.get('id', '?'):<10} {h.get('status', '?'):<12} {h.get('priority', '?'):<8} {h.get('source', '?'):<8} {desc}"
|
|
361
|
+
if h.get("result_experiment"):
|
|
362
|
+
line += f" -> {h['result_experiment']}"
|
|
363
|
+
lines.append(line)
|
|
364
|
+
|
|
365
|
+
return "\n".join(lines)
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def main() -> None:
|
|
369
|
+
"""CLI entry point."""
|
|
370
|
+
parser = argparse.ArgumentParser(description="Manage hypothesis queue")
|
|
371
|
+
parser.add_argument("--queue", default=DEFAULT_QUEUE_PATH, help="Path to hypotheses.yaml")
|
|
372
|
+
subparsers = parser.add_subparsers(dest="command")
|
|
373
|
+
|
|
374
|
+
# add
|
|
375
|
+
add_parser = subparsers.add_parser("add", help="Add a hypothesis")
|
|
376
|
+
add_parser.add_argument("description", nargs="?", default=None, help="What to try and why")
|
|
377
|
+
add_parser.add_argument("--archetype", default=None, help="Expand from archetype (e.g., model_comparison)")
|
|
378
|
+
add_parser.add_argument("--priority", default="high", choices=sorted(VALID_PRIORITIES))
|
|
379
|
+
add_parser.add_argument("--source", default="human", choices=["human", "agent", "literature", "taxonomy"])
|
|
380
|
+
add_parser.add_argument("--parent", default=None, help="Parent experiment ID")
|
|
381
|
+
add_parser.add_argument("--parent-hyp", default=None, help="Parent hypothesis ID")
|
|
382
|
+
add_parser.add_argument("--family", default=None, help="Experiment family (e.g., optimizer-sweep)")
|
|
383
|
+
add_parser.add_argument("--tags", default=None, help="Comma-separated tags")
|
|
384
|
+
add_parser.add_argument("--model-type", default=None, help="Proposed model type")
|
|
385
|
+
add_parser.add_argument("--hyperparams", default=None, help="JSON string of hyperparameters")
|
|
386
|
+
add_parser.add_argument("--expected", default=None, help="Expected outcome description")
|
|
387
|
+
|
|
388
|
+
# list
|
|
389
|
+
list_parser = subparsers.add_parser("list", help="List hypotheses")
|
|
390
|
+
list_parser.add_argument("--status", default=None, choices=sorted(VALID_STATUSES))
|
|
391
|
+
|
|
392
|
+
# next
|
|
393
|
+
subparsers.add_parser("next", help="Get next queued hypothesis")
|
|
394
|
+
|
|
395
|
+
# show
|
|
396
|
+
show_parser = subparsers.add_parser("show", help="Show detailed hypothesis file")
|
|
397
|
+
show_parser.add_argument("id", help="Hypothesis ID")
|
|
398
|
+
|
|
399
|
+
# mark
|
|
400
|
+
mark_parser = subparsers.add_parser("mark", help="Update hypothesis status")
|
|
401
|
+
mark_parser.add_argument("id", help="Hypothesis ID")
|
|
402
|
+
mark_parser.add_argument("status", choices=sorted(VALID_STATUSES))
|
|
403
|
+
mark_parser.add_argument("--result", default=None, help="Result experiment ID")
|
|
404
|
+
mark_parser.add_argument("--metrics", default=None, help="JSON string of result metrics")
|
|
405
|
+
mark_parser.add_argument("--notes", default=None, help="Notes about the result")
|
|
406
|
+
|
|
407
|
+
# count
|
|
408
|
+
subparsers.add_parser("count", help="Count hypotheses by status")
|
|
409
|
+
|
|
410
|
+
# critique
|
|
411
|
+
critique_parser = subparsers.add_parser("critique", help="Score a hypothesis before execution")
|
|
412
|
+
critique_parser.add_argument("id", help="Hypothesis ID to critique")
|
|
413
|
+
critique_parser.add_argument("--log", default="experiments/log.jsonl")
|
|
414
|
+
critique_parser.add_argument("--config", default="config.yaml")
|
|
415
|
+
critique_parser.add_argument("--json", action="store_true", help="Output as JSON")
|
|
416
|
+
|
|
417
|
+
args = parser.parse_args()
|
|
418
|
+
|
|
419
|
+
if args.command == "add":
|
|
420
|
+
description = args.description
|
|
421
|
+
family = args.family
|
|
422
|
+
tags = [t.strip() for t in args.tags.split(",")] if args.tags else None
|
|
423
|
+
|
|
424
|
+
# Expand archetype if specified
|
|
425
|
+
if args.archetype:
|
|
426
|
+
arch_desc, arch_family, arch_tags = expand_archetype(args.archetype)
|
|
427
|
+
description = description or arch_desc
|
|
428
|
+
family = family or arch_family
|
|
429
|
+
if arch_tags:
|
|
430
|
+
tags = (tags or []) + arch_tags
|
|
431
|
+
|
|
432
|
+
if not description:
|
|
433
|
+
print("Error: provide a description or --archetype", file=sys.stderr)
|
|
434
|
+
sys.exit(1)
|
|
435
|
+
|
|
436
|
+
# Parse optional structured fields
|
|
437
|
+
architecture = {}
|
|
438
|
+
if args.model_type:
|
|
439
|
+
architecture["model_type"] = args.model_type
|
|
440
|
+
hyperparameters = None
|
|
441
|
+
if args.hyperparams:
|
|
442
|
+
import json as _json
|
|
443
|
+
hyperparameters = _json.loads(args.hyperparams)
|
|
444
|
+
expected_outcome = {}
|
|
445
|
+
if args.expected:
|
|
446
|
+
expected_outcome["rationale"] = args.expected
|
|
447
|
+
|
|
448
|
+
hid = add_hypothesis(
|
|
449
|
+
args.queue, description, args.source, args.priority,
|
|
450
|
+
parent_experiment=args.parent,
|
|
451
|
+
parent_hypothesis=getattr(args, "parent_hyp", None),
|
|
452
|
+
family=family,
|
|
453
|
+
tags=tags,
|
|
454
|
+
architecture=architecture or None,
|
|
455
|
+
hyperparameters=hyperparameters,
|
|
456
|
+
expected_outcome=expected_outcome or None,
|
|
457
|
+
)
|
|
458
|
+
short_desc = description.split("\n")[0][:60]
|
|
459
|
+
print(f"Added {hid}: {short_desc}")
|
|
460
|
+
print(f"Detail: {DETAIL_DIR}/{hid}.yaml")
|
|
461
|
+
|
|
462
|
+
elif args.command == "show":
|
|
463
|
+
detail = load_detail(args.id)
|
|
464
|
+
if detail:
|
|
465
|
+
print(yaml.dump(detail, default_flow_style=False, sort_keys=False))
|
|
466
|
+
else:
|
|
467
|
+
print(f"No detail file for {args.id}.", file=sys.stderr)
|
|
468
|
+
sys.exit(1)
|
|
469
|
+
|
|
470
|
+
elif args.command == "list":
|
|
471
|
+
hypotheses = list_hypotheses(args.queue, args.status)
|
|
472
|
+
print(format_table(hypotheses))
|
|
473
|
+
|
|
474
|
+
elif args.command == "next":
|
|
475
|
+
h = get_next_hypothesis(args.queue)
|
|
476
|
+
if h:
|
|
477
|
+
import json
|
|
478
|
+
# Also print detail file path if it exists
|
|
479
|
+
detail = load_detail(h["id"])
|
|
480
|
+
print(json.dumps(h, indent=2))
|
|
481
|
+
if detail:
|
|
482
|
+
print(f"\nDetail: {DETAIL_DIR}/{h['id']}.yaml")
|
|
483
|
+
else:
|
|
484
|
+
print("No queued hypotheses.", file=sys.stderr)
|
|
485
|
+
sys.exit(1)
|
|
486
|
+
|
|
487
|
+
elif args.command == "mark":
|
|
488
|
+
result_metrics = None
|
|
489
|
+
if args.metrics:
|
|
490
|
+
import json as _json
|
|
491
|
+
result_metrics = _json.loads(args.metrics)
|
|
492
|
+
found = mark_hypothesis(
|
|
493
|
+
args.queue, args.id, args.status,
|
|
494
|
+
result_experiment=args.result,
|
|
495
|
+
result_metrics=result_metrics,
|
|
496
|
+
result_notes=args.notes,
|
|
497
|
+
)
|
|
498
|
+
if found:
|
|
499
|
+
print(f"Marked {args.id} as {args.status}")
|
|
500
|
+
else:
|
|
501
|
+
print(f"Hypothesis {args.id} not found.", file=sys.stderr)
|
|
502
|
+
sys.exit(1)
|
|
503
|
+
|
|
504
|
+
elif args.command == "count":
|
|
505
|
+
counts = count_by_status(args.queue)
|
|
506
|
+
total = sum(counts.values())
|
|
507
|
+
print(f"Total: {total}")
|
|
508
|
+
for status, count in sorted(counts.items()):
|
|
509
|
+
print(f" {status}: {count}")
|
|
510
|
+
|
|
511
|
+
elif args.command == "critique":
|
|
512
|
+
detail = load_detail(args.id)
|
|
513
|
+
if not detail:
|
|
514
|
+
print(f"Hypothesis {args.id} not found.", file=sys.stderr)
|
|
515
|
+
sys.exit(1)
|
|
516
|
+
|
|
517
|
+
from scripts.critique_hypothesis import critique_hypothesis, format_critique
|
|
518
|
+
import json as _json
|
|
519
|
+
|
|
520
|
+
result = critique_hypothesis(detail["description"], args.log, args.config)
|
|
521
|
+
|
|
522
|
+
# Store critique score in the detail file
|
|
523
|
+
update_detail(args.id, {"critique_score": result["overall_score"],
|
|
524
|
+
"critique_verdict": result["verdict"]})
|
|
525
|
+
|
|
526
|
+
if args.json:
|
|
527
|
+
print(_json.dumps({
|
|
528
|
+
"id": args.id,
|
|
529
|
+
"overall_score": result["overall_score"],
|
|
530
|
+
"verdict": result["verdict"],
|
|
531
|
+
"novelty_score": result["novelty"]["score"],
|
|
532
|
+
"feasibility_score": result["feasibility"]["score"],
|
|
533
|
+
"impact_score": result["impact"]["score"],
|
|
534
|
+
}, indent=2))
|
|
535
|
+
else:
|
|
536
|
+
print(format_critique(result))
|
|
537
|
+
|
|
538
|
+
else:
|
|
539
|
+
parser.print_help()
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
if __name__ == "__main__":
|
|
543
|
+
main()
|