claude-turing 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/.claude-plugin/plugin.json +34 -0
  2. package/LICENSE +21 -0
  3. package/README.md +457 -0
  4. package/agents/ml-evaluator.md +43 -0
  5. package/agents/ml-researcher.md +74 -0
  6. package/bin/cli.js +46 -0
  7. package/bin/turing-init.sh +57 -0
  8. package/commands/brief.md +83 -0
  9. package/commands/compare.md +24 -0
  10. package/commands/design.md +97 -0
  11. package/commands/init.md +123 -0
  12. package/commands/logbook.md +51 -0
  13. package/commands/mode.md +43 -0
  14. package/commands/poster.md +89 -0
  15. package/commands/preflight.md +75 -0
  16. package/commands/report.md +97 -0
  17. package/commands/rules/loop-protocol.md +91 -0
  18. package/commands/status.md +24 -0
  19. package/commands/suggest.md +95 -0
  20. package/commands/sweep.md +45 -0
  21. package/commands/train.md +66 -0
  22. package/commands/try.md +63 -0
  23. package/commands/turing.md +54 -0
  24. package/commands/validate.md +34 -0
  25. package/config/defaults.yaml +45 -0
  26. package/config/experiment_archetypes.yaml +127 -0
  27. package/config/lifecycle.toml +31 -0
  28. package/config/novelty_aliases.yaml +107 -0
  29. package/config/relationships.toml +125 -0
  30. package/config/state.toml +24 -0
  31. package/config/task_taxonomy.yaml +110 -0
  32. package/config/taxonomy.toml +37 -0
  33. package/package.json +54 -0
  34. package/src/claude-md.js +55 -0
  35. package/src/install.js +107 -0
  36. package/src/paths.js +20 -0
  37. package/src/postinstall.js +22 -0
  38. package/src/verify.js +109 -0
  39. package/templates/MEMORY.md +36 -0
  40. package/templates/README.md +93 -0
  41. package/templates/__pycache__/evaluate.cpython-314.pyc +0 -0
  42. package/templates/__pycache__/prepare.cpython-314.pyc +0 -0
  43. package/templates/config.yaml +48 -0
  44. package/templates/evaluate.py +237 -0
  45. package/templates/features/__init__.py +0 -0
  46. package/templates/features/__pycache__/__init__.cpython-314.pyc +0 -0
  47. package/templates/features/__pycache__/featurizers.cpython-314.pyc +0 -0
  48. package/templates/features/featurizers.py +138 -0
  49. package/templates/prepare.py +171 -0
  50. package/templates/program.md +216 -0
  51. package/templates/pyproject.toml +8 -0
  52. package/templates/requirements.txt +8 -0
  53. package/templates/scripts/__init__.py +0 -0
  54. package/templates/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
  55. package/templates/scripts/__pycache__/check_convergence.cpython-314.pyc +0 -0
  56. package/templates/scripts/__pycache__/classify_task.cpython-314.pyc +0 -0
  57. package/templates/scripts/__pycache__/critique_hypothesis.cpython-314.pyc +0 -0
  58. package/templates/scripts/__pycache__/experiment_index.cpython-314.pyc +0 -0
  59. package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
  60. package/templates/scripts/__pycache__/generate_logbook.cpython-314.pyc +0 -0
  61. package/templates/scripts/__pycache__/log_experiment.cpython-314.pyc +0 -0
  62. package/templates/scripts/__pycache__/manage_hypotheses.cpython-314.pyc +0 -0
  63. package/templates/scripts/__pycache__/novelty_guard.cpython-314.pyc +0 -0
  64. package/templates/scripts/__pycache__/parse_metrics.cpython-314.pyc +0 -0
  65. package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  66. package/templates/scripts/__pycache__/show_experiment_tree.cpython-314.pyc +0 -0
  67. package/templates/scripts/__pycache__/show_families.cpython-314.pyc +0 -0
  68. package/templates/scripts/__pycache__/statistical_compare.cpython-314.pyc +0 -0
  69. package/templates/scripts/__pycache__/suggest_next.cpython-314.pyc +0 -0
  70. package/templates/scripts/__pycache__/sweep.cpython-314.pyc +0 -0
  71. package/templates/scripts/__pycache__/synthesize_decision.cpython-314.pyc +0 -0
  72. package/templates/scripts/__pycache__/turing_io.cpython-314.pyc +0 -0
  73. package/templates/scripts/__pycache__/update_state.cpython-314.pyc +0 -0
  74. package/templates/scripts/__pycache__/verify_placeholders.cpython-314.pyc +0 -0
  75. package/templates/scripts/check_convergence.py +230 -0
  76. package/templates/scripts/compare_runs.py +124 -0
  77. package/templates/scripts/critique_hypothesis.py +350 -0
  78. package/templates/scripts/experiment_index.py +288 -0
  79. package/templates/scripts/generate_brief.py +389 -0
  80. package/templates/scripts/generate_logbook.py +423 -0
  81. package/templates/scripts/log_experiment.py +243 -0
  82. package/templates/scripts/manage_hypotheses.py +543 -0
  83. package/templates/scripts/novelty_guard.py +343 -0
  84. package/templates/scripts/parse_metrics.py +139 -0
  85. package/templates/scripts/post-train-hook.sh +74 -0
  86. package/templates/scripts/preflight.py +549 -0
  87. package/templates/scripts/scaffold.py +409 -0
  88. package/templates/scripts/show_environment.py +92 -0
  89. package/templates/scripts/show_experiment_tree.py +144 -0
  90. package/templates/scripts/show_families.py +133 -0
  91. package/templates/scripts/show_metrics.py +157 -0
  92. package/templates/scripts/statistical_compare.py +259 -0
  93. package/templates/scripts/stop-hook.sh +34 -0
  94. package/templates/scripts/suggest_next.py +301 -0
  95. package/templates/scripts/sweep.py +276 -0
  96. package/templates/scripts/synthesize_decision.py +300 -0
  97. package/templates/scripts/turing_io.py +76 -0
  98. package/templates/scripts/update_state.py +296 -0
  99. package/templates/scripts/validate_stability.py +167 -0
  100. package/templates/scripts/verify_placeholders.py +119 -0
  101. package/templates/sweep_config.yaml +14 -0
  102. package/templates/tests/__init__.py +0 -0
  103. package/templates/tests/conftest.py +91 -0
  104. package/templates/train.py +240 -0
@@ -0,0 +1,543 @@
1
+ #!/usr/bin/env python3
2
+ """Hypothesis queue manager for the autoresearch pipeline.
3
+
4
+ Manages a structured queue of hypotheses — ideas for experiments
5
+ that can be injected by the human (via /turing:try) or generated
6
+ by the agent. Human-injected hypotheses take priority.
7
+
8
+ This is the mechanism by which research taste reaches the agent:
9
+ the human selects which coins to flip, the agent flips them.
10
+
11
+ Usage:
12
+ python scripts/manage_hypotheses.py add "description" [--priority high] [--parent exp-NNN]
13
+ python scripts/manage_hypotheses.py list [--status queued]
14
+ python scripts/manage_hypotheses.py next
15
+ python scripts/manage_hypotheses.py mark <id> <status> [--result exp-NNN]
16
+ python scripts/manage_hypotheses.py count
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import argparse
22
+ import sys
23
+ from datetime import datetime, timezone
24
+ from pathlib import Path
25
+
26
+ import yaml
27
+
28
+ DEFAULT_QUEUE_PATH = "hypotheses.yaml"
29
+ DETAIL_DIR = "hypotheses"
30
+
31
+ VALID_STATUSES = {"queued", "in-progress", "tested", "promising", "dead-end"}
32
+ VALID_PRIORITIES = {"high", "medium", "low"}
33
+
34
+
35
+ def load_queue(path: str) -> list[dict]:
36
+ """Load hypothesis queue from YAML file."""
37
+ p = Path(path)
38
+ if not p.exists() or p.stat().st_size == 0:
39
+ return []
40
+ with open(p) as f:
41
+ data = yaml.safe_load(f)
42
+ return data if isinstance(data, list) else []
43
+
44
+
45
+ def save_queue(path: str, queue: list[dict]) -> None:
46
+ """Save hypothesis queue to YAML file."""
47
+ p = Path(path)
48
+ p.parent.mkdir(parents=True, exist_ok=True)
49
+ with open(p, "w") as f:
50
+ yaml.dump(queue, f, default_flow_style=False, sort_keys=False)
51
+
52
+
53
+ def get_next_id(queue: list[dict]) -> str:
54
+ """Get the next sequential hypothesis ID."""
55
+ if not queue:
56
+ return "hyp-001"
57
+ max_num = 0
58
+ for entry in queue:
59
+ hid = entry.get("id", "")
60
+ if hid.startswith("hyp-"):
61
+ try:
62
+ num = int(hid.split("-")[1])
63
+ max_num = max(max_num, num)
64
+ except (ValueError, IndexError):
65
+ continue
66
+ return f"hyp-{max_num + 1:03d}"
67
+
68
+
69
+ def create_detail_file(
70
+ hid: str,
71
+ description: str,
72
+ source: str = "human",
73
+ priority: str = "high",
74
+ parent_experiment: str | None = None,
75
+ parent_hypothesis: str | None = None,
76
+ family: str | None = None,
77
+ tags: list[str] | None = None,
78
+ architecture: dict | None = None,
79
+ hyperparameters: dict | None = None,
80
+ features: dict | None = None,
81
+ expected_outcome: dict | None = None,
82
+ ) -> Path:
83
+ """Create a detailed hypothesis file at hypotheses/hyp-NNN.yaml."""
84
+ detail_dir = Path(DETAIL_DIR)
85
+ detail_dir.mkdir(parents=True, exist_ok=True)
86
+
87
+ detail = {
88
+ "id": hid,
89
+ "description": description,
90
+ "source": source,
91
+ "status": "queued",
92
+ "priority": priority,
93
+ "created_at": datetime.now(timezone.utc).isoformat(),
94
+ "architecture": architecture or {},
95
+ "hyperparameters": hyperparameters or {},
96
+ "features": features or {"add": [], "remove": [], "transform": []},
97
+ "expected_outcome": expected_outcome or {},
98
+ "result": {
99
+ "experiment_id": None,
100
+ "metrics": {},
101
+ "verdict": None,
102
+ "notes": None,
103
+ },
104
+ "parent_experiment": parent_experiment,
105
+ "parent_hypothesis": parent_hypothesis,
106
+ "family": family,
107
+ "tags": tags or [],
108
+ }
109
+
110
+ detail_path = detail_dir / f"{hid}.yaml"
111
+ with open(detail_path, "w") as f:
112
+ yaml.dump(detail, f, default_flow_style=False, sort_keys=False)
113
+ return detail_path
114
+
115
+
116
+ def load_detail(hid: str) -> dict | None:
117
+ """Load a detailed hypothesis file."""
118
+ detail_path = Path(DETAIL_DIR) / f"{hid}.yaml"
119
+ if not detail_path.exists():
120
+ return None
121
+ with open(detail_path) as f:
122
+ return yaml.safe_load(f) or None
123
+
124
+
125
+ def update_detail(hid: str, updates: dict) -> bool:
126
+ """Update fields in a detailed hypothesis file."""
127
+ detail = load_detail(hid)
128
+ if detail is None:
129
+ return False
130
+
131
+ for key, value in updates.items():
132
+ if isinstance(value, dict) and isinstance(detail.get(key), dict):
133
+ detail[key].update(value)
134
+ else:
135
+ detail[key] = value
136
+
137
+ detail_path = Path(DETAIL_DIR) / f"{hid}.yaml"
138
+ with open(detail_path, "w") as f:
139
+ yaml.dump(detail, f, default_flow_style=False, sort_keys=False)
140
+ return True
141
+
142
+
143
+ def load_archetypes(config_path: str | None = None) -> dict:
144
+ """Load experiment archetypes from YAML config.
145
+
146
+ Searches for config/experiment_archetypes.yaml in standard locations.
147
+ """
148
+ candidates = []
149
+ if config_path:
150
+ candidates.append(Path(config_path))
151
+ candidates.extend([
152
+ Path("config") / "experiment_archetypes.yaml",
153
+ Path(__file__).parent.parent.parent / "config" / "experiment_archetypes.yaml",
154
+ Path(__file__).parent.parent / "config" / "experiment_archetypes.yaml",
155
+ ])
156
+ for p in candidates:
157
+ if p.exists():
158
+ with open(p) as f:
159
+ data = yaml.safe_load(f)
160
+ return data.get("archetypes", {}) if data else {}
161
+ return {}
162
+
163
+
164
+ def expand_archetype(archetype_name: str, config_path: str | None = None) -> tuple[str, str | None, list[str] | None]:
165
+ """Expand an archetype name into a structured hypothesis description.
166
+
167
+ Args:
168
+ archetype_name: Key from experiment_archetypes.yaml (e.g., "model_comparison").
169
+ config_path: Optional path to archetypes YAML.
170
+
171
+ Returns:
172
+ Tuple of (description, family_tag, tags).
173
+ Returns a fallback description if archetype is not found.
174
+ """
175
+ archetypes = load_archetypes(config_path)
176
+
177
+ if archetype_name not in archetypes:
178
+ available = ", ".join(sorted(archetypes.keys())) if archetypes else "none loaded"
179
+ return (
180
+ f"[Unknown archetype: {archetype_name}. Available: {available}]",
181
+ None,
182
+ None,
183
+ )
184
+
185
+ arch = archetypes[archetype_name]
186
+ name = arch.get("name", archetype_name)
187
+ steps = arch.get("steps", [])
188
+ when = arch.get("when_to_use", "")
189
+ expected = arch.get("expected_experiments", "?")
190
+
191
+ # Build structured description
192
+ lines = [f"{name}:"]
193
+ for i, step in enumerate(steps, 1):
194
+ lines.append(f" {i}. {step}")
195
+ if when:
196
+ lines.append(f" Context: {when}")
197
+ lines.append(f" Expected: ~{expected} experiments")
198
+
199
+ description = "\n".join(lines)
200
+ family_tag = arch.get("family_tag")
201
+ tags = [archetype_name, "archetype"]
202
+
203
+ return description, family_tag, tags
204
+
205
+
206
+ def add_hypothesis(
207
+ queue_path: str,
208
+ description: str,
209
+ source: str = "human",
210
+ priority: str = "high",
211
+ parent_experiment: str | None = None,
212
+ parent_hypothesis: str | None = None,
213
+ family: str | None = None,
214
+ tags: list[str] | None = None,
215
+ architecture: dict | None = None,
216
+ hyperparameters: dict | None = None,
217
+ features: dict | None = None,
218
+ expected_outcome: dict | None = None,
219
+ ) -> str:
220
+ """Add a hypothesis to the queue and create its detail file.
221
+
222
+ Returns the new hypothesis ID.
223
+ """
224
+ queue = load_queue(queue_path)
225
+ hid = get_next_id(queue)
226
+
227
+ # Index entry (lightweight)
228
+ entry = {
229
+ "id": hid,
230
+ "description": description,
231
+ "source": source,
232
+ "status": "queued",
233
+ "priority": priority,
234
+ "parent_experiment": parent_experiment,
235
+ "result_experiment": None,
236
+ "created_at": datetime.now(timezone.utc).isoformat(),
237
+ }
238
+ queue.append(entry)
239
+ save_queue(queue_path, queue)
240
+
241
+ # Detail file (rich)
242
+ create_detail_file(
243
+ hid=hid,
244
+ description=description,
245
+ source=source,
246
+ priority=priority,
247
+ parent_experiment=parent_experiment,
248
+ parent_hypothesis=parent_hypothesis,
249
+ family=family,
250
+ tags=tags,
251
+ architecture=architecture,
252
+ hyperparameters=hyperparameters,
253
+ features=features,
254
+ expected_outcome=expected_outcome,
255
+ )
256
+
257
+ return hid
258
+
259
+
260
+ def list_hypotheses(queue_path: str, status_filter: str | None = None) -> list[dict]:
261
+ """List hypotheses, optionally filtered by status."""
262
+ queue = load_queue(queue_path)
263
+ if status_filter:
264
+ queue = [h for h in queue if h.get("status") == status_filter]
265
+ return queue
266
+
267
+
268
+ def get_next_hypothesis(queue_path: str) -> dict | None:
269
+ """Get the next queued hypothesis, prioritizing high > medium > low.
270
+
271
+ Within the same priority, human-sourced hypotheses come before agent-sourced.
272
+ Within the same source, earlier hypotheses come first (FIFO).
273
+ """
274
+ queue = load_queue(queue_path)
275
+ queued = [h for h in queue if h.get("status") == "queued"]
276
+ if not queued:
277
+ return None
278
+
279
+ priority_order = {"high": 0, "medium": 1, "low": 2}
280
+ source_order = {"human": 0, "literature": 1, "taxonomy": 2, "agent": 3}
281
+
282
+ queued.sort(key=lambda h: (
283
+ priority_order.get(h.get("priority", "medium"), 1),
284
+ source_order.get(h.get("source", "agent"), 1),
285
+ ))
286
+ return queued[0]
287
+
288
+
289
+ def mark_hypothesis(
290
+ queue_path: str,
291
+ hypothesis_id: str,
292
+ new_status: str,
293
+ result_experiment: str | None = None,
294
+ result_metrics: dict | None = None,
295
+ result_notes: str | None = None,
296
+ ) -> bool:
297
+ """Update a hypothesis status in both the index and detail file.
298
+
299
+ Returns True if found and updated.
300
+ """
301
+ if new_status not in VALID_STATUSES:
302
+ print(f"Invalid status: {new_status}. Valid: {', '.join(sorted(VALID_STATUSES))}", file=sys.stderr)
303
+ return False
304
+
305
+ # Update index
306
+ queue = load_queue(queue_path)
307
+ found = False
308
+ for entry in queue:
309
+ if entry.get("id") == hypothesis_id:
310
+ entry["status"] = new_status
311
+ if result_experiment:
312
+ entry["result_experiment"] = result_experiment
313
+ save_queue(queue_path, queue)
314
+ found = True
315
+ break
316
+
317
+ if not found:
318
+ return False
319
+
320
+ # Update detail file
321
+ detail_updates = {"status": new_status}
322
+ if result_experiment or result_metrics or result_notes:
323
+ result_update = {}
324
+ if result_experiment:
325
+ result_update["experiment_id"] = result_experiment
326
+ if result_metrics:
327
+ result_update["metrics"] = result_metrics
328
+ if result_notes:
329
+ result_update["notes"] = result_notes
330
+ verdict_map = {"tested": "tested", "promising": "promising", "dead-end": "dead-end"}
331
+ if new_status in verdict_map:
332
+ result_update["verdict"] = verdict_map[new_status]
333
+ detail_updates["result"] = result_update
334
+
335
+ update_detail(hypothesis_id, detail_updates)
336
+ return True
337
+
338
+
339
+ def count_by_status(queue_path: str) -> dict[str, int]:
340
+ """Count hypotheses by status."""
341
+ queue = load_queue(queue_path)
342
+ counts: dict[str, int] = {}
343
+ for entry in queue:
344
+ status = entry.get("status", "unknown")
345
+ counts[status] = counts.get(status, 0) + 1
346
+ return counts
347
+
348
+
349
+ def format_table(hypotheses: list[dict]) -> str:
350
+ """Format hypotheses as a text table."""
351
+ if not hypotheses:
352
+ return "No hypotheses in queue."
353
+
354
+ header = f"{'ID':<10} {'Status':<12} {'Priority':<8} {'Source':<8} {'Description'}"
355
+ sep = "-" * 80
356
+ lines = [header, sep]
357
+
358
+ for h in hypotheses:
359
+ desc = h.get("description", "")[:45]
360
+ line = f"{h.get('id', '?'):<10} {h.get('status', '?'):<12} {h.get('priority', '?'):<8} {h.get('source', '?'):<8} {desc}"
361
+ if h.get("result_experiment"):
362
+ line += f" -> {h['result_experiment']}"
363
+ lines.append(line)
364
+
365
+ return "\n".join(lines)
366
+
367
+
368
+ def main() -> None:
369
+ """CLI entry point."""
370
+ parser = argparse.ArgumentParser(description="Manage hypothesis queue")
371
+ parser.add_argument("--queue", default=DEFAULT_QUEUE_PATH, help="Path to hypotheses.yaml")
372
+ subparsers = parser.add_subparsers(dest="command")
373
+
374
+ # add
375
+ add_parser = subparsers.add_parser("add", help="Add a hypothesis")
376
+ add_parser.add_argument("description", nargs="?", default=None, help="What to try and why")
377
+ add_parser.add_argument("--archetype", default=None, help="Expand from archetype (e.g., model_comparison)")
378
+ add_parser.add_argument("--priority", default="high", choices=sorted(VALID_PRIORITIES))
379
+ add_parser.add_argument("--source", default="human", choices=["human", "agent", "literature", "taxonomy"])
380
+ add_parser.add_argument("--parent", default=None, help="Parent experiment ID")
381
+ add_parser.add_argument("--parent-hyp", default=None, help="Parent hypothesis ID")
382
+ add_parser.add_argument("--family", default=None, help="Experiment family (e.g., optimizer-sweep)")
383
+ add_parser.add_argument("--tags", default=None, help="Comma-separated tags")
384
+ add_parser.add_argument("--model-type", default=None, help="Proposed model type")
385
+ add_parser.add_argument("--hyperparams", default=None, help="JSON string of hyperparameters")
386
+ add_parser.add_argument("--expected", default=None, help="Expected outcome description")
387
+
388
+ # list
389
+ list_parser = subparsers.add_parser("list", help="List hypotheses")
390
+ list_parser.add_argument("--status", default=None, choices=sorted(VALID_STATUSES))
391
+
392
+ # next
393
+ subparsers.add_parser("next", help="Get next queued hypothesis")
394
+
395
+ # show
396
+ show_parser = subparsers.add_parser("show", help="Show detailed hypothesis file")
397
+ show_parser.add_argument("id", help="Hypothesis ID")
398
+
399
+ # mark
400
+ mark_parser = subparsers.add_parser("mark", help="Update hypothesis status")
401
+ mark_parser.add_argument("id", help="Hypothesis ID")
402
+ mark_parser.add_argument("status", choices=sorted(VALID_STATUSES))
403
+ mark_parser.add_argument("--result", default=None, help="Result experiment ID")
404
+ mark_parser.add_argument("--metrics", default=None, help="JSON string of result metrics")
405
+ mark_parser.add_argument("--notes", default=None, help="Notes about the result")
406
+
407
+ # count
408
+ subparsers.add_parser("count", help="Count hypotheses by status")
409
+
410
+ # critique
411
+ critique_parser = subparsers.add_parser("critique", help="Score a hypothesis before execution")
412
+ critique_parser.add_argument("id", help="Hypothesis ID to critique")
413
+ critique_parser.add_argument("--log", default="experiments/log.jsonl")
414
+ critique_parser.add_argument("--config", default="config.yaml")
415
+ critique_parser.add_argument("--json", action="store_true", help="Output as JSON")
416
+
417
+ args = parser.parse_args()
418
+
419
+ if args.command == "add":
420
+ description = args.description
421
+ family = args.family
422
+ tags = [t.strip() for t in args.tags.split(",")] if args.tags else None
423
+
424
+ # Expand archetype if specified
425
+ if args.archetype:
426
+ arch_desc, arch_family, arch_tags = expand_archetype(args.archetype)
427
+ description = description or arch_desc
428
+ family = family or arch_family
429
+ if arch_tags:
430
+ tags = (tags or []) + arch_tags
431
+
432
+ if not description:
433
+ print("Error: provide a description or --archetype", file=sys.stderr)
434
+ sys.exit(1)
435
+
436
+ # Parse optional structured fields
437
+ architecture = {}
438
+ if args.model_type:
439
+ architecture["model_type"] = args.model_type
440
+ hyperparameters = None
441
+ if args.hyperparams:
442
+ import json as _json
443
+ hyperparameters = _json.loads(args.hyperparams)
444
+ expected_outcome = {}
445
+ if args.expected:
446
+ expected_outcome["rationale"] = args.expected
447
+
448
+ hid = add_hypothesis(
449
+ args.queue, description, args.source, args.priority,
450
+ parent_experiment=args.parent,
451
+ parent_hypothesis=getattr(args, "parent_hyp", None),
452
+ family=family,
453
+ tags=tags,
454
+ architecture=architecture or None,
455
+ hyperparameters=hyperparameters,
456
+ expected_outcome=expected_outcome or None,
457
+ )
458
+ short_desc = description.split("\n")[0][:60]
459
+ print(f"Added {hid}: {short_desc}")
460
+ print(f"Detail: {DETAIL_DIR}/{hid}.yaml")
461
+
462
+ elif args.command == "show":
463
+ detail = load_detail(args.id)
464
+ if detail:
465
+ print(yaml.dump(detail, default_flow_style=False, sort_keys=False))
466
+ else:
467
+ print(f"No detail file for {args.id}.", file=sys.stderr)
468
+ sys.exit(1)
469
+
470
+ elif args.command == "list":
471
+ hypotheses = list_hypotheses(args.queue, args.status)
472
+ print(format_table(hypotheses))
473
+
474
+ elif args.command == "next":
475
+ h = get_next_hypothesis(args.queue)
476
+ if h:
477
+ import json
478
+ # Also print detail file path if it exists
479
+ detail = load_detail(h["id"])
480
+ print(json.dumps(h, indent=2))
481
+ if detail:
482
+ print(f"\nDetail: {DETAIL_DIR}/{h['id']}.yaml")
483
+ else:
484
+ print("No queued hypotheses.", file=sys.stderr)
485
+ sys.exit(1)
486
+
487
+ elif args.command == "mark":
488
+ result_metrics = None
489
+ if args.metrics:
490
+ import json as _json
491
+ result_metrics = _json.loads(args.metrics)
492
+ found = mark_hypothesis(
493
+ args.queue, args.id, args.status,
494
+ result_experiment=args.result,
495
+ result_metrics=result_metrics,
496
+ result_notes=args.notes,
497
+ )
498
+ if found:
499
+ print(f"Marked {args.id} as {args.status}")
500
+ else:
501
+ print(f"Hypothesis {args.id} not found.", file=sys.stderr)
502
+ sys.exit(1)
503
+
504
+ elif args.command == "count":
505
+ counts = count_by_status(args.queue)
506
+ total = sum(counts.values())
507
+ print(f"Total: {total}")
508
+ for status, count in sorted(counts.items()):
509
+ print(f" {status}: {count}")
510
+
511
+ elif args.command == "critique":
512
+ detail = load_detail(args.id)
513
+ if not detail:
514
+ print(f"Hypothesis {args.id} not found.", file=sys.stderr)
515
+ sys.exit(1)
516
+
517
+ from scripts.critique_hypothesis import critique_hypothesis, format_critique
518
+ import json as _json
519
+
520
+ result = critique_hypothesis(detail["description"], args.log, args.config)
521
+
522
+ # Store critique score in the detail file
523
+ update_detail(args.id, {"critique_score": result["overall_score"],
524
+ "critique_verdict": result["verdict"]})
525
+
526
+ if args.json:
527
+ print(_json.dumps({
528
+ "id": args.id,
529
+ "overall_score": result["overall_score"],
530
+ "verdict": result["verdict"],
531
+ "novelty_score": result["novelty"]["score"],
532
+ "feasibility_score": result["feasibility"]["score"],
533
+ "impact_score": result["impact"]["score"],
534
+ }, indent=2))
535
+ else:
536
+ print(format_critique(result))
537
+
538
+ else:
539
+ parser.print_help()
540
+
541
+
542
+ if __name__ == "__main__":
543
+ main()