duet-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duet.py ADDED
@@ -0,0 +1,3303 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ duet.py — two CLI agents in conversation, with per-agent session memory.
4
+
5
+ Workflow this is built for:
6
+
7
+ 1. You start `claude` interactively, work out a plan, exit.
8
+ Claude prints (or you grab) a session id like 106c1c57-ca42-473f-b2f1-1ea764f78c46.
9
+ 2. You hand it to duet:
10
+
11
+ ./duet.py --resume-claude 106c1c57-ca42-473f-b2f1-1ea764f78c46 \
12
+ --partner codex:coder \
13
+ --cwd ~/code/myrepo \
14
+ --turns 10
15
+
16
+ 3. duet pulls Claude's latest message from that session, feeds it to Codex.
17
+ Codex replies. duet feeds Codex's reply back to Claude (with --resume so
18
+ Claude remembers the whole prior conversation). Ping-pong until both
19
+ agents propose convergence with an LGTM rationale and <<<LGTM>>> in
20
+ back-to-back turns, --turns is hit, or you Ctrl-C.
21
+
22
+ Each agent keeps its own session across turns:
23
+ - Claude: `claude -p --resume <session_id> --output-format json` — we capture
24
+ `session_id` from the JSON wrapper and reuse it.
25
+ - Codex: first turn `codex exec ...`; subsequent turns `codex exec resume <uuid>`
26
+ when we parsed a session id from Codex's stderr, or `codex exec resume --last`
27
+ in the same cwd as a fallback for builds that don't print one. Pinning the
28
+ UUID makes resume robust to parallel Codex sessions sharing the cwd, but
29
+ `--last` is still keyed on cwd — use `--worktree` to isolate duet's Codex
30
+ cwd from the host repo when no UUID is available.
31
+
32
+ Transcript is always logged to runs/<ts>/transcript.md for humans, but each
33
+ prompt sent to an agent is just the latest counterpart message — keeping
34
+ prompts small and letting each side rely on its own session memory.
35
+
36
+ Stdlib only. Python 3.9+.
37
+ """
38
+ from __future__ import annotations
39
+
40
+ import argparse
41
+ import dataclasses
42
+ import datetime as dt
43
+ import json
44
+ import os
45
+ import pathlib
46
+ import re
47
+ import shlex
48
+ import shutil
49
+ import signal
50
+ import subprocess
51
+ import sys
52
+ import textwrap
53
+ import threading
54
+ import time
55
+ from typing import Callable, Optional
56
+
57
+ # ---------- defaults ----------
58
+
59
+ DEFAULT_SENTINEL = "<<<LGTM>>>"
60
+ DEFAULT_TURNS = 2
61
+ DEFAULT_TIMEOUT = 60 * 15
62
+ TASK_MAX_CHARS = 512 * 1024
63
+ CONVERGENCE_RATIONALE_MIN_CHARS = 20
64
+ VERIFY_OUTPUT_TAIL_CHARS = 4000
65
+ VERIFY_LIVE_PREFIX = " │ [verify] "
66
+ SUPPORTED_BACKENDS = {"claude", "codex"}
67
+ WORKTREE_FOR_CHOICES = {"lead", "partner"}
68
+ FINISHED_CONVERGED = "converged"
69
+ FINISHED_CONVERGED_AFTER_FORCE = "converged_after_force"
70
+ FINISHED_FORCED_CONTINUATION = "forced_continuation"
71
+ FINISHED_MAX_TURNS = "max_turns"
72
+ FINISHED_FORCE_STOP = "force_stop"
73
+ FINISHED_TIMEOUT = "timeout"
74
+ FINISHED_AGENT_ERROR = "agent_error"
75
+
76
+
77
+ class AgentRunError(RuntimeError):
78
+ """Agent/backend failure that should finish the run with a specific reason."""
79
+
80
+ def __init__(self, finished_reason: str, message: str) -> None:
81
+ super().__init__(message)
82
+ self.finished_reason = finished_reason
83
+
84
+ RECAP_ADDENDUM = """Format requirement (debug tooling reads these):
85
+
86
+ Begin every reply with three header lines, then a blank line, then your full reply:
87
+
88
+ RECAP: <one short sentence describing what you produced this turn>
89
+ FILES: <comma-separated paths you touched or referenced, or "none">
90
+ STATUS: <one of: planning | implementing | reviewing | requesting-changes | ready-for-review | converged>
91
+
92
+ The headers DO NOT replace your reply — write your normal answer as usual after the blank line. Use STATUS: converged only when you would also emit the convergence sentinel with an LGTM rationale."""
93
+
94
+ CONVERGENCE_INSTRUCTION = (
95
+ "Convergence requires pair agreement, not just the sentinel. When you "
96
+ "believe the loop should stop, include a concise `LGTM rationale:` line or "
97
+ "paragraph that explains why the result satisfies the task, what you "
98
+ "checked, and any remaining low-risk follow-ups; then put {SENTINEL} on "
99
+ "its own line. A bare sentinel without that rationale is ignored. If your "
100
+ "partner proposed convergence and you disagree with the rationale, do not "
101
+ "emit the sentinel; explain the gap and ask for another round."
102
+ )
103
+
104
+ ROLE_PROMPTS = {
105
+ "planner": (
106
+ "You are the PLANNER half of a duet. Read the partner agent's latest "
107
+ "message and propose or refine a plan. Be concrete: file names, "
108
+ "functions, edge cases. You may also write or edit non-code "
109
+ "deliverables yourself when the task asks for them — synthesis "
110
+ "documents, reports, comparison matrices, configuration, README "
111
+ "updates, dashboards, etc. What you should NOT do is write production "
112
+ "feature code (that's the coder's job). When you believe the work is "
113
+ "fully done and reviewed, follow the convergence instructions."
114
+ ),
115
+ "coder": (
116
+ "You are the CODER half of a duet. Read the partner agent's latest "
117
+ "message (typically a plan or critique) and produce code. Apply edits "
118
+ "to disk. Run quick checks where reasonable. Summarise what you "
119
+ "changed. When you believe the work is fully done, follow the "
120
+ "convergence instructions."
121
+ ),
122
+ "reviewer": (
123
+ "You are the REVIEWER half of a duet. Read the partner agent's "
124
+ "latest message and critically evaluate it: bugs, missing tests, "
125
+ "security, simpler designs. When reviewing concrete code changes, "
126
+ "inspect the actual files, diffs, and test output rather than relying "
127
+ "only on the partner's summary. Be specific and brief. If the work "
128
+ "meets the task and you have no material issues, follow the "
129
+ "convergence instructions."
130
+ ),
131
+ "triage-reviewer": (
132
+ "You are the TRIAGE REVIEWER half of a duet. Read the partner agent's "
133
+ "latest message and critically evaluate it: bugs, missing tests, "
134
+ "security, simpler designs. Score every finding with [P0], [P1], "
135
+ "[P2], or [P3]. Default to [P3]; promote only when the impact is "
136
+ "concrete. [P0] means a correctness, security, data-loss, or shipped-"
137
+ "check blocker. [P1] means a real bug, logic gap, or missing edge case "
138
+ "that should block this loop. [P2] means a small bug, polish issue, "
139
+ "or naming/readability fix that is nice to handle. [P3] means a "
140
+ "follow-up, future refactor, or scope creep. When reviewing concrete "
141
+ "code changes, inspect the actual files, diffs, and test output rather "
142
+ "than relying only on the partner's summary. Be specific and brief. "
143
+ "If the coder reasonably argues a finding is over-scored, either "
144
+ "accept the lower score or explain why the higher score still applies. "
145
+ "Emit convergence only when no unfixed [P0] or [P1] findings remain. "
146
+ "When only [P2]/[P3] items remain, move them to a Follow-ups section "
147
+ "and follow the convergence instructions."
148
+ ),
149
+ }
150
+
151
+ # Tiny request used to extract Claude's most recent message when we resume
152
+ # from an existing session id, so we have something to hand to the partner.
153
+ EXTRACT_LATEST_PROMPT = (
154
+ "[duet harness] I'm about to hand your most recent plan/answer to a "
155
+ "partner coding agent. Please reproduce that plan/answer in full as "
156
+ "your reply now. Reply with the message text only — no preamble, no "
157
+ "framing, no commentary about this request."
158
+ )
159
+
160
+ # User-facing reasoning levels accepted by --reasoning / `reasoning:` in YAML.
161
+ # These are the *duet abstraction*; per-backend translation happens below so
162
+ # users can choose the common `xhigh` level directly while still getting useful
163
+ # aliases for backend-specific gaps (`minimal` for Codex, `max` for Claude).
164
+ REASONING_LEVELS = ["minimal", "low", "medium", "high", "xhigh", "max"]
165
+
166
+ # Claude Code exposes thinking control through `--effort`. We still add small
167
+ # prompt nudges for high/xhigh/max because they are useful natural-language
168
+ # guidance. `ultrathink` is a recognized one-turn in-context nudge in current
169
+ # Claude Code; the CLI flag below remains the authoritative effort control.
170
+ CLAUDE_REASONING_PROMPT_PREFIX = {
171
+ "minimal": "",
172
+ "low": "",
173
+ "medium": "",
174
+ "high": "think hard and reason step-by-step before answering. Cover edge cases.\n\n",
175
+ "xhigh": "think very hard and reason carefully before answering. Cover "
176
+ "edge cases, alternatives, and risks.\n\n",
177
+ "max": "ultrathink — reason exhaustively before answering. Enumerate edge "
178
+ "cases, alternatives, and risks. Do not skim.\n\n",
179
+ }
180
+
181
+ # Claude Code `--effort` accepts low, medium, high, xhigh, max. The duet
182
+ # abstraction has `minimal` for Codex, so Claude maps that user-facing value to
183
+ # its lowest documented level.
184
+ CLAUDE_REASONING_MAP = {
185
+ "minimal": "low",
186
+ "low": "low",
187
+ "medium": "medium",
188
+ "high": "high",
189
+ "xhigh": "xhigh",
190
+ "max": "max",
191
+ }
192
+
193
+ # Codex CLI takes a config override `-c model_reasoning_effort=<value>`.
194
+ # Its accepted values, lowest→highest, are: minimal, low, medium, high, xhigh.
195
+ # We also map duet's `max` alias to Codex's `xhigh` because Codex does not
196
+ # document a separate `max` effort value.
197
+ CODEX_REASONING_MAP = {
198
+ "minimal": "minimal",
199
+ "low": "low",
200
+ "medium": "medium",
201
+ "high": "high",
202
+ "xhigh": "xhigh",
203
+ "max": "xhigh",
204
+ }
205
+
206
+
207
+ def validate_reasoning(value: Optional[str], context: str) -> None:
208
+ if value is not None and value not in REASONING_LEVELS:
209
+ choices = "|".join(REASONING_LEVELS)
210
+ raise SystemExit(f"bad reasoning value for {context}: {value!r}; expected {choices}")
211
+
212
+
213
+ def effective_reasoning(agent: Agent, cfg_reasoning: Optional[str]) -> Optional[str]:
214
+ return agent.reasoning_effort or cfg_reasoning
215
+
216
+ # ---------- data classes ----------
217
+
218
+ @dataclasses.dataclass
219
+ class Agent:
220
+ name: str
221
+ backend: str # "claude" or "codex"
222
+ role: str = "coder" # planner | coder | reviewer | triage-reviewer | custom
223
+ role_prompt: Optional[str] = None
224
+ model: Optional[str] = None
225
+ session_id: Optional[str] = None # tracked across turns
226
+ extra_args: list[str] = dataclasses.field(default_factory=list)
227
+ cwd_override: Optional[pathlib.Path] = None # set when this agent runs in a git worktree
228
+ reasoning_effort: Optional[str] = None # one of REASONING_LEVELS; overrides cfg.reasoning
229
+
230
+ def system_prompt(self, sentinel: str, recap: bool = False) -> str:
231
+ tmpl = self.role_prompt or ROLE_PROMPTS.get(self.role)
232
+ if tmpl is None:
233
+ raise SystemExit(f"unknown role '{self.role}' for agent '{self.name}' — "
234
+ "supply role_prompt to override")
235
+ # str.replace, not str.format — role prompts often contain literal
236
+ # `{...}` (JSON schema, code samples, jq patterns). format() would
237
+ # parse those as format fields and crash with "unexpected '{' in
238
+ # field name". replace handles them as plain text.
239
+ prompt = tmpl.replace("{SENTINEL}", sentinel)
240
+ prompt += "\n\n" + CONVERGENCE_INSTRUCTION.replace("{SENTINEL}", sentinel)
241
+ if recap:
242
+ prompt += "\n\n" + RECAP_ADDENDUM
243
+ return prompt
244
+
245
+
246
+ def agent_state(a: Agent) -> dict:
247
+ data = {
248
+ "name": a.name,
249
+ "backend": a.backend,
250
+ "role": a.role,
251
+ "session_id": a.session_id,
252
+ }
253
+ if a.role_prompt is not None:
254
+ data["role_prompt"] = a.role_prompt
255
+ if a.model is not None:
256
+ data["model"] = a.model
257
+ if a.extra_args:
258
+ data["extra_args"] = a.extra_args
259
+ if a.reasoning_effort is not None:
260
+ data["reasoning_effort"] = a.reasoning_effort
261
+ return data
262
+
263
+
264
+ @dataclasses.dataclass
265
+ class DuetConfig:
266
+ cwd: pathlib.Path
267
+ agents: list[Agent] # exactly 2 for now
268
+ task: Optional[str] = None # used if no resume seed
269
+ kickoff: Optional[str] = None # explicit first message to partner
270
+ max_turns: int = DEFAULT_TURNS
271
+ sentinel: str = DEFAULT_SENTINEL
272
+ per_turn_timeout: int = DEFAULT_TIMEOUT
273
+ runs_dir: pathlib.Path = pathlib.Path("runs")
274
+ sandbox: str = "workspace-write" # codex
275
+ permission_mode: str = "acceptEdits" # claude
276
+ dry_run: bool = False
277
+ recap: bool = False
278
+ verify_cmd: Optional[str] = None # shell command that must pass before
279
+ # a convergence proposal can count
280
+ worktree: bool = False # run partner in a throwaway git worktree
281
+ worktree_for: str = "partner" # "partner" (idx 1) or "lead" (idx 0)
282
+ worktree_path: Optional[pathlib.Path] = None # reuse an existing worktree (for resume)
283
+ worktree_root: Optional[pathlib.Path] = None # parent dir for new worktrees;
284
+ # default = <run_dir>/wt (durable, gitignored)
285
+ add_dirs: list[pathlib.Path] = dataclasses.field(default_factory=list)
286
+ # extra `--add-dir` paths for claude — needed
287
+ # when the task reads/writes outside cwd
288
+ # (e.g. ../DECISION.md). Without these claude
289
+ # silently refuses paths outside cwd.
290
+ reasoning: Optional[str] = None # default reasoning effort for both agents
291
+ codex_fast: bool = False # Codex-only "fast mode": pin reasoning to
292
+ # low and add `model_reasoning_summary=concise`
293
+ # for codex coder turns this run, regardless of
294
+ # cfg.reasoning / agent.reasoning_effort. Claude's
295
+ # effort is untouched, so `--reasoning high
296
+ # --codex-fast` keeps the planner deep and the
297
+ # coder snappy.
298
+ start_speaker_idx: int = 1 # default loop starts with partner replying
299
+ continue_from: Optional[str] = None # prior run dir/id when created by --continue
300
+
301
+
302
+ def _config_error(message: str,
303
+ parser: Optional[argparse.ArgumentParser] = None) -> None:
304
+ if parser is not None:
305
+ parser.error(message)
306
+ raise SystemExit(message)
307
+
308
+
309
+ def validate_config(cfg: DuetConfig,
310
+ parser: Optional[argparse.ArgumentParser] = None) -> None:
311
+ """Validate final topology after CLI/YAML parsing and resume normalization."""
312
+ if len(cfg.agents) != 2:
313
+ _config_error(f"duet expects exactly 2 agents, got {len(cfg.agents)}", parser)
314
+ if cfg.start_speaker_idx not in (0, 1):
315
+ _config_error(
316
+ f"start_speaker_idx must be 0 or 1, got {cfg.start_speaker_idx}",
317
+ parser,
318
+ )
319
+ if cfg.worktree_for not in WORKTREE_FOR_CHOICES:
320
+ choices = "|".join(sorted(WORKTREE_FOR_CHOICES))
321
+ _config_error(
322
+ f"worktree_for must be one of {choices}, got {cfg.worktree_for!r}",
323
+ parser,
324
+ )
325
+
326
+ seen_names: set[str] = set()
327
+ for agent in cfg.agents:
328
+ if agent.backend not in SUPPORTED_BACKENDS:
329
+ choices = "|".join(sorted(SUPPORTED_BACKENDS))
330
+ _config_error(
331
+ f"unknown backend {agent.backend!r} for agent {agent.name!r}; "
332
+ f"expected {choices}",
333
+ parser,
334
+ )
335
+ if agent.name in seen_names:
336
+ _config_error(
337
+ f"duplicate agent name {agent.name!r}; agent names must be unique",
338
+ parser,
339
+ )
340
+ seen_names.add(agent.name)
341
+
342
+
343
+ def effective_agent_cwd(agent: Agent, default_cwd: pathlib.Path) -> pathlib.Path:
344
+ return (agent.cwd_override or default_cwd).resolve()
345
+
346
+
347
+ def shared_cwd_codex_peers(cfg: DuetConfig) -> bool:
348
+ codex_agents = [a for a in cfg.agents if a.backend == "codex"]
349
+ if len(codex_agents) != 2:
350
+ return False
351
+ return (
352
+ effective_agent_cwd(codex_agents[0], cfg.cwd)
353
+ == effective_agent_cwd(codex_agents[1], cfg.cwd)
354
+ )
355
+
356
+
357
+ def codex_session_is_uuid(agent: Agent) -> bool:
358
+ return bool(agent.session_id and _CODEX_UUID_RE.match(agent.session_id))
359
+
360
+
361
+ def codex_shared_cwd_isolation_error(agent: Agent) -> str:
362
+ return (
363
+ "[duet] fatal: cannot safely continue codex/codex peering in one cwd "
364
+ f"because {agent.name} did not produce a Codex session UUID. "
365
+ "`codex exec resume --last` is cwd-based and could resume the other "
366
+ "Codex peer's session. Use --worktree/--worktree-for to isolate one "
367
+ "peer, or use a Codex build that reliably emits `session id: <uuid>`."
368
+ )
369
+
370
+
371
+ def guard_codex_shared_cwd_before_call(cfg: DuetConfig,
372
+ agent: Agent,
373
+ first_turn_for_agent: bool) -> None:
374
+ if cfg.dry_run or agent.backend != "codex" or not shared_cwd_codex_peers(cfg):
375
+ return
376
+ if (not first_turn_for_agent
377
+ and agent.session_id
378
+ and not codex_session_is_uuid(agent)):
379
+ raise SystemExit(codex_shared_cwd_isolation_error(agent))
380
+
381
+
382
+ def guard_codex_shared_cwd_after_call(cfg: DuetConfig,
383
+ agent: Agent,
384
+ first_turn_for_agent: bool) -> None:
385
+ if cfg.dry_run or agent.backend != "codex" or not first_turn_for_agent:
386
+ return
387
+ if shared_cwd_codex_peers(cfg) and not codex_session_is_uuid(agent):
388
+ raise SystemExit(codex_shared_cwd_isolation_error(agent))
389
+
390
+
391
+ @dataclasses.dataclass
392
+ class VerifyResult:
393
+ ok: bool
394
+ cmd: str
395
+ cwd: pathlib.Path
396
+ exit_code: Optional[int]
397
+ stdout_tail: str
398
+ stderr_tail: str
399
+ log_path: pathlib.Path
400
+ timed_out: bool = False
401
+ error: Optional[str] = None
402
+
403
+
404
+ # ---------- active child process tracking ----------
405
+
406
+ _ACTIVE_PROCS: set[subprocess.Popen] = set()
407
+ _ACTIVE_PROCS_LOCK = threading.Lock()
408
+
409
+
410
+ def _register_proc(proc: subprocess.Popen) -> None:
411
+ with _ACTIVE_PROCS_LOCK:
412
+ _ACTIVE_PROCS.add(proc)
413
+
414
+
415
+ def _unregister_proc(proc: subprocess.Popen) -> None:
416
+ with _ACTIVE_PROCS_LOCK:
417
+ _ACTIVE_PROCS.discard(proc)
418
+
419
+
420
+ def _signal_proc_tree(proc: subprocess.Popen, sig: int) -> None:
421
+ if proc.poll() is not None:
422
+ return
423
+ try:
424
+ if hasattr(os, "killpg"):
425
+ os.killpg(proc.pid, sig)
426
+ else:
427
+ proc.send_signal(sig)
428
+ except ProcessLookupError:
429
+ pass
430
+ except Exception:
431
+ try:
432
+ proc.kill()
433
+ except Exception:
434
+ pass
435
+
436
+
437
+ def _terminate_active_processes(sig: int = signal.SIGKILL) -> None:
438
+ with _ACTIVE_PROCS_LOCK:
439
+ procs = list(_ACTIVE_PROCS)
440
+ for proc in procs:
441
+ _signal_proc_tree(proc, sig)
442
+
443
+
444
+ # ---------- git worktree helpers ----------
445
+
446
+ def is_git_repo(path: pathlib.Path) -> bool:
447
+ try:
448
+ r = subprocess.run(
449
+ ["git", "-C", str(path), "rev-parse", "--is-inside-work-tree"],
450
+ capture_output=True, text=True, timeout=5,
451
+ )
452
+ return r.returncode == 0 and r.stdout.strip() == "true"
453
+ except (FileNotFoundError, subprocess.TimeoutExpired):
454
+ return False
455
+
456
+
457
+ def setup_worktree(repo_path: pathlib.Path, branch_name: str,
458
+ dest: pathlib.Path) -> pathlib.Path:
459
+ """Create a git worktree at `dest` on a fresh branch. Returns the resolved path.
460
+
461
+ `dest` must NOT already exist (git worktree add's requirement); its parent
462
+ is created if missing. Caller controls placement — see `cfg.worktree_root`
463
+ or the default `<run_dir>/wt`.
464
+ """
465
+ dest = dest.expanduser().resolve()
466
+ dest.parent.mkdir(parents=True, exist_ok=True)
467
+ if dest.exists():
468
+ raise RuntimeError(f"worktree destination already exists: {dest}")
469
+ cmd = ["git", "-C", str(repo_path), "worktree", "add", "-b", branch_name, str(dest)]
470
+ try:
471
+ proc = subprocess.Popen(
472
+ cmd,
473
+ stdout=subprocess.PIPE,
474
+ stderr=subprocess.PIPE,
475
+ text=True,
476
+ start_new_session=True,
477
+ )
478
+ except FileNotFoundError:
479
+ raise RuntimeError("git not found on PATH")
480
+ _register_proc(proc)
481
+ try:
482
+ try:
483
+ _, err = proc.communicate(timeout=30)
484
+ except subprocess.TimeoutExpired:
485
+ _signal_proc_tree(proc, signal.SIGTERM)
486
+ try:
487
+ _, err = proc.communicate(timeout=2)
488
+ except subprocess.TimeoutExpired:
489
+ _signal_proc_tree(proc, signal.SIGKILL)
490
+ _, err = proc.communicate()
491
+ raise RuntimeError(f"git worktree add timed out: {err.strip()}")
492
+ finally:
493
+ _unregister_proc(proc)
494
+ if proc.returncode != 0:
495
+ raise RuntimeError(f"git worktree add failed: {err.strip()}")
496
+ return dest
497
+
498
+
499
+ def git_diff_summary(wt_path: pathlib.Path, max_chars: int = 8000) -> str:
500
+ """Return a short diff summary (status + truncated diff) for the worktree."""
501
+ try:
502
+ status = subprocess.run(
503
+ ["git", "-C", str(wt_path), "status", "--short"],
504
+ capture_output=True, text=True, timeout=10,
505
+ ).stdout.rstrip()
506
+ diff = subprocess.run(
507
+ ["git", "-C", str(wt_path), "diff", "HEAD", "--stat"],
508
+ capture_output=True, text=True, timeout=10,
509
+ ).stdout.rstrip()
510
+ full = subprocess.run(
511
+ ["git", "-C", str(wt_path), "diff", "HEAD"],
512
+ capture_output=True, text=True, timeout=10,
513
+ ).stdout
514
+ if len(full) > max_chars:
515
+ full = full[:max_chars] + f"\n…[truncated, {len(full)-max_chars} more chars]"
516
+ untracked = _untracked_files_summary(wt_path, max_chars=max_chars)
517
+ untracked_block = (
518
+ f"\n\n### untracked file contents\n{untracked}"
519
+ if untracked else ""
520
+ )
521
+ return (
522
+ f"### git status\n{status or '(clean)'}\n\n"
523
+ f"### diffstat\n{diff or '(none)'}\n\n"
524
+ f"### diff\n{full or '(none)'}"
525
+ f"{untracked_block}"
526
+ )
527
+ except subprocess.TimeoutExpired:
528
+ return "[duet] git diff timed out"
529
+
530
+
531
+ def _untracked_files_summary(wt_path: pathlib.Path, max_chars: int = 8000) -> str:
532
+ proc = subprocess.run(
533
+ ["git", "-C", str(wt_path), "ls-files", "--others",
534
+ "--exclude-standard", "-z"],
535
+ capture_output=True, timeout=10,
536
+ )
537
+ if proc.returncode != 0:
538
+ return ""
539
+ rel_paths = [os.fsdecode(p) for p in proc.stdout.split(b"\0") if p]
540
+ if not rel_paths:
541
+ return ""
542
+
543
+ sections: list[str] = []
544
+ remaining = max_chars
545
+ for rel_path in rel_paths:
546
+ if remaining <= 0:
547
+ sections.append("…[truncated]")
548
+ break
549
+ section = _untracked_file_summary(wt_path, rel_path)
550
+ if len(section) > remaining:
551
+ section = section[:remaining] + f"\n…[truncated, {len(section)-remaining} more chars]"
552
+ sections.append(section)
553
+ break
554
+ sections.append(section)
555
+ remaining -= len(section) + 2
556
+ return "\n\n".join(sections)
557
+
558
+
559
+ def _untracked_file_summary(wt_path: pathlib.Path, rel_path: str) -> str:
560
+ display_path = rel_path.replace("\\", "/")
561
+ file_path = wt_path / rel_path
562
+ if file_path.is_symlink():
563
+ return f"#### {display_path}\n(symlink omitted)"
564
+ if not file_path.is_file():
565
+ return f"#### {display_path}\n(non-file omitted)"
566
+ try:
567
+ data = _read_file_preview(file_path)
568
+ except OSError as e:
569
+ return f"#### {display_path}\n(unable to read: {e})"
570
+ if data is None:
571
+ return f"#### {display_path}\n(binary file omitted)"
572
+ fence = _markdown_fence(data)
573
+ return f"#### {display_path}\n{fence}text\n{data}\n{fence}"
574
+
575
+
576
+ def _read_file_preview(path: pathlib.Path, max_bytes: int = 12000) -> Optional[str]:
577
+ with path.open("rb") as f:
578
+ data = f.read(max_bytes + 1)
579
+ truncated = len(data) > max_bytes
580
+ data = data[:max_bytes]
581
+ if b"\0" in data:
582
+ return None
583
+ text = data.decode("utf-8", errors="replace")
584
+ if truncated:
585
+ text += f"\n…[truncated, file exceeds {max_bytes} bytes]"
586
+ return text
587
+
588
+
589
+ def _markdown_fence(text: str) -> str:
590
+ longest = max((len(m.group(0)) for m in re.finditer(r"`+", text)), default=0)
591
+ return "`" * max(3, longest + 1)
592
+
593
+
594
+ def _worktree_handoff_block(wt_path: pathlib.Path,
595
+ wt_branch: Optional[str] = None) -> str:
596
+ """Tell the receiving agent exactly where the edited tree lives.
597
+
598
+ Worded for clean turns too — the worktree-agent may have only explored
599
+ this turn, so we say "any code changes" rather than asserting changes
600
+ exist. Suggested commands are intentionally project-agnostic; project
601
+ test commands belong in CLAUDE.md / README, not in this generic block.
602
+ """
603
+ wt_display = str(wt_path)
604
+ wt_arg = shlex.quote(wt_display)
605
+ branch_line = f"- Branch: `{wt_branch}`\n" if wt_branch else ""
606
+ return (
607
+ "### review target\n"
608
+ "Any code changes for this turn are in the git worktree below. "
609
+ "Your current cwd may be a clean checkout, so do not use that cwd's "
610
+ "`git status` as evidence that these edits are absent.\n\n"
611
+ f"- Worktree path: `{wt_display}`\n"
612
+ f"{branch_line}"
613
+ "\n"
614
+ "Use the worktree as the source of truth when reviewing or running "
615
+ "checks:\n\n"
616
+ "```bash\n"
617
+ f"git -C {wt_arg} status --short\n"
618
+ f"git -C {wt_arg} diff HEAD\n"
619
+ "```\n"
620
+ )
621
+
622
+
623
+ def append_worktree_diff(reply: str, wt_path: pathlib.Path,
624
+ wt_branch: Optional[str] = None) -> str:
625
+ try:
626
+ diff_block = git_diff_summary(wt_path)
627
+ handoff = _worktree_handoff_block(wt_path, wt_branch)
628
+ return (f"{reply}\n\n---\n"
629
+ f"#### worktree changes ({wt_path.name})\n{handoff}\n"
630
+ f"{diff_block}")
631
+ except Exception as e:
632
+ return f"{reply}\n\n[duet] git diff failed: {e}"
633
+
634
+
635
+ def write_text_atomic(path: pathlib.Path, text: str) -> None:
636
+ """Write text through a same-directory temp file, then atomically replace."""
637
+ tmp = path.with_name(f".{path.name}.{os.getpid()}.tmp")
638
+ try:
639
+ tmp.write_text(text, encoding="utf-8")
640
+ os.replace(tmp, path)
641
+ finally:
642
+ try:
643
+ if tmp.exists():
644
+ tmp.unlink()
645
+ except OSError:
646
+ pass
647
+
648
+
649
+ def append_text_atomic(path: pathlib.Path, text: str) -> None:
650
+ prior = path.read_text(encoding="utf-8") if path.exists() else ""
651
+ write_text_atomic(path, prior + text)
652
+
653
+
654
+ # ---------- subprocess wrappers ----------
655
+
656
+ # Module-level: when True, _run forwards subprocess stderr to the user's
657
+ # terminal in real-time. Codex prints its progress (thinking, tool calls)
658
+ # to stderr, so this gives live visibility during long turns.
659
+ LIVE_STREAM = True
660
+ LIVE_PREFIX = " │ " # box-drawing prefix on every streamed line
661
+ LIVE_PREFIX_TASK = " $ "
662
+ RECAP_MODE = False
663
+
664
+
665
+ def _stream_reader(stream, sink: list[str], mirror_to=None, prefix: str = "",
666
+ tee_to=None, activity_event=None):
667
+ """Drain a pipe line-by-line, capture into `sink`, optionally mirror live and/or tee to file.
668
+
669
+ `mirror_to` is a writable text stream (typically sys.stderr) that the
670
+ line is echoed to with `prefix`. `tee_to` is an open file handle that
671
+ receives the raw line — used to persist the live stream for post-hoc
672
+ forensics. `activity_event`, if given, is `set()` on every received
673
+ line so a heartbeat thread can detect "subprocess went quiet". All
674
+ parameters are optional.
675
+ """
676
+ try:
677
+ for line in iter(stream.readline, ""):
678
+ sink.append(line)
679
+ if activity_event is not None:
680
+ activity_event.set()
681
+ if mirror_to is not None:
682
+ try:
683
+ mirror_to.write(prefix + line if prefix else line)
684
+ mirror_to.flush()
685
+ except Exception:
686
+ pass
687
+ if tee_to is not None:
688
+ try:
689
+ tee_to.write(line)
690
+ tee_to.flush()
691
+ except Exception:
692
+ pass
693
+ finally:
694
+ try:
695
+ stream.close()
696
+ except Exception:
697
+ pass
698
+
699
+
700
+ def _quiet_heartbeat(proc, mirror_to, start_monotonic: float,
701
+ activity_event, interval: int = 20,
702
+ prefix: str = LIVE_PREFIX) -> None:
703
+ """Print "[duet] still working…" when a subprocess goes quiet.
704
+
705
+ Most subprocesses emit rich stderr live (codex, gh, npm). Some don't
706
+ — `claude -p` is silent on stderr during the API call, so a long
707
+ seed-extract or claude turn can look like duet has hung. This thread
708
+ waits on `activity_event`; if no activity for `interval` seconds AND
709
+ the subprocess is still alive, it prints elapsed time and resets.
710
+ Mirrors duet's own stderr so it interleaves with live output.
711
+ """
712
+ if mirror_to is None:
713
+ return
714
+ while proc.poll() is None:
715
+ if activity_event.wait(timeout=interval):
716
+ activity_event.clear()
717
+ continue
718
+ if proc.poll() is not None:
719
+ return
720
+ try:
721
+ elapsed = int(time.monotonic() - start_monotonic)
722
+ mirror_to.write(f"{prefix}[duet] still working… ({elapsed}s; "
723
+ "subprocess silent — typical for `claude -p`)\n")
724
+ mirror_to.flush()
725
+ except Exception:
726
+ return
727
+
728
+
729
+ def _run(cmd: list[str], *, cwd: pathlib.Path, stdin: Optional[str], timeout: int,
730
+ stderr_log_path: Optional[pathlib.Path] = None,
731
+ pid_file_path: Optional[pathlib.Path] = None,
732
+ live_prefix: Optional[str] = None,
733
+ mirror_stdout: bool = False) -> tuple[int, str, str]:
734
+ """Run a subprocess. Returns (rc, stdout, stderr).
735
+
736
+ If LIVE_STREAM is on AND stderr is a TTY, the child's stderr is mirrored
737
+ to our stderr line-by-line as it's produced. stdout is captured silently
738
+ unless `mirror_stdout` is set — duet logs agent final answers to the
739
+ transcript afterwards.
740
+
741
+ If `stderr_log_path` is set, the child's stderr is also tee'd line-by-line
742
+ to that file (append mode) — useful for post-hoc forensics on long agent
743
+ turns where the live trace is otherwise lost.
744
+
745
+ If `pid_file_path` is set, the child's PID is written there at startup
746
+ and the file is removed when the call returns. External tools can read
747
+ the file + `kill -0 <pid>` to tell apart "duet is alive, agent thinking"
748
+ vs "agent crashed silently". Critical for agents like `claude -p` that
749
+ emit no stderr during their long API call.
750
+ """
751
+ mirror = sys.stderr if (LIVE_STREAM and not RECAP_MODE and sys.stderr.isatty()) else None
752
+ prefix = live_prefix if live_prefix is not None else LIVE_PREFIX
753
+ out_chunks: list[str] = []
754
+ err_chunks: list[str] = []
755
+ stderr_file = None
756
+ if stderr_log_path is not None:
757
+ try:
758
+ stderr_log_path.parent.mkdir(parents=True, exist_ok=True)
759
+ stderr_file = open(stderr_log_path, "a", encoding="utf-8", buffering=1)
760
+ stderr_file.write(
761
+ f"\n# {dt.datetime.now().isoformat(timespec='seconds')} :: "
762
+ f"{' '.join(cmd[:3])}{' …' if len(cmd) > 3 else ''}\n"
763
+ )
764
+ except OSError as e:
765
+ print(f"[duet] warn: stderr log open failed ({stderr_log_path}): {e}",
766
+ file=sys.stderr)
767
+ stderr_file = None
768
+ try:
769
+ try:
770
+ proc = subprocess.Popen(
771
+ cmd, cwd=str(cwd),
772
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
773
+ text=True, bufsize=1, # line-buffered
774
+ start_new_session=True,
775
+ )
776
+ except FileNotFoundError:
777
+ return 127, "", f"[duet] command not found: {cmd[0]}"
778
+ _register_proc(proc)
779
+ if pid_file_path is not None:
780
+ try:
781
+ pid_file_path.parent.mkdir(parents=True, exist_ok=True)
782
+ # Write atomically so a poller never reads a half-written PID.
783
+ tmp = pid_file_path.with_name(pid_file_path.name + ".tmp")
784
+ tmp.write_text(f"{proc.pid}\n")
785
+ os.replace(tmp, pid_file_path)
786
+ except OSError as e:
787
+ print(f"[duet] warn: pid file write failed ({pid_file_path}): {e}",
788
+ file=sys.stderr)
789
+ activity_event = threading.Event()
790
+ t_out = threading.Thread(target=_stream_reader,
791
+ args=(proc.stdout, out_chunks,
792
+ mirror if mirror_stdout else None,
793
+ prefix if mirror_stdout else "",
794
+ None, activity_event),
795
+ daemon=True)
796
+ t_err = threading.Thread(target=_stream_reader,
797
+ args=(proc.stderr, err_chunks, mirror, prefix,
798
+ stderr_file, activity_event),
799
+ daemon=True)
800
+ t_out.start(); t_err.start()
801
+ # Heartbeat: print elapsed-time hint when proc goes quiet (>20s no
802
+ # stderr/stdout). Useful for `claude -p`, which stays silent on
803
+ # stderr during the API call. No-op if mirror is None (--quiet).
804
+ t_hb = threading.Thread(target=_quiet_heartbeat,
805
+ args=(proc, mirror, time.monotonic(), activity_event, 20, prefix),
806
+ daemon=True)
807
+ t_hb.start()
808
+
809
+ try:
810
+ if stdin is not None and proc.stdin is not None:
811
+ try:
812
+ proc.stdin.write(stdin)
813
+ except BrokenPipeError:
814
+ pass
815
+ if proc.stdin is not None:
816
+ proc.stdin.close()
817
+ proc.wait(timeout=timeout)
818
+ except subprocess.TimeoutExpired:
819
+ _signal_proc_tree(proc, signal.SIGTERM)
820
+ try:
821
+ proc.wait(timeout=2)
822
+ except subprocess.TimeoutExpired:
823
+ _signal_proc_tree(proc, signal.SIGKILL)
824
+ proc.wait()
825
+ t_out.join(timeout=2); t_err.join(timeout=2)
826
+ return 124, "".join(out_chunks), "".join(err_chunks) + f"\n[duet] TIMEOUT after {timeout}s"
827
+ finally:
828
+ _unregister_proc(proc)
829
+ t_out.join(timeout=5); t_err.join(timeout=5)
830
+ return proc.returncode, "".join(out_chunks), "".join(err_chunks)
831
+ finally:
832
+ if stderr_file is not None:
833
+ try:
834
+ stderr_file.close()
835
+ except Exception:
836
+ pass
837
+ if pid_file_path is not None:
838
+ try:
839
+ pid_file_path.unlink(missing_ok=True)
840
+ except OSError:
841
+ pass
842
+
843
+
844
+ def _agent_finished_reason(exc: Exception) -> str:
845
+ if isinstance(exc, AgentRunError):
846
+ return exc.finished_reason
847
+ if isinstance(exc, subprocess.TimeoutExpired):
848
+ return FINISHED_TIMEOUT
849
+ return FINISHED_AGENT_ERROR
850
+
851
+
852
+ def _agent_run(cmd: list[str], *, backend: str, cwd: pathlib.Path,
853
+ stdin: Optional[str], timeout: int,
854
+ stderr_log_path: Optional[pathlib.Path],
855
+ pid_file_path: Optional[pathlib.Path]) -> tuple[int, str, str]:
856
+ try:
857
+ return _run(
858
+ cmd,
859
+ cwd=cwd,
860
+ stdin=stdin,
861
+ timeout=timeout,
862
+ stderr_log_path=stderr_log_path,
863
+ pid_file_path=pid_file_path,
864
+ )
865
+ except subprocess.TimeoutExpired as e:
866
+ raise AgentRunError(
867
+ FINISHED_TIMEOUT,
868
+ f"{backend} timed out after {e.timeout}s",
869
+ ) from e
870
+ except Exception as e:
871
+ raise AgentRunError(
872
+ FINISHED_AGENT_ERROR,
873
+ f"{backend} invocation failed: {e}",
874
+ ) from e
875
+
876
+
877
+ # ---------- verification gate ----------
878
+
879
+ def effective_verify_cwd(cfg: DuetConfig,
880
+ worktree_path: Optional[pathlib.Path]) -> pathlib.Path:
881
+ """Return the directory where the convergence verify command should run."""
882
+ return worktree_path or cfg.cwd
883
+
884
+
885
+ def _tail_text(text: str, max_chars: int = VERIFY_OUTPUT_TAIL_CHARS) -> str:
886
+ if len(text) <= max_chars:
887
+ return text
888
+ return (
889
+ f"[duet] output truncated to last {max_chars} chars\n"
890
+ + text[-max_chars:]
891
+ )
892
+
893
+
894
+ def _display_output(text: str) -> str:
895
+ return text.rstrip() if text else "(empty)"
896
+
897
+
898
+ def _format_verify_log(turn_label: str, result: VerifyResult,
899
+ stdout: str, stderr: str) -> str:
900
+ lines = [
901
+ "# duet verify",
902
+ f"turn: {turn_label}",
903
+ f"command: {result.cmd}",
904
+ f"cwd: {result.cwd}",
905
+ f"exit_code: {result.exit_code if result.exit_code is not None else 'n/a'}",
906
+ f"timed_out: {'yes' if result.timed_out else 'no'}",
907
+ ]
908
+ if result.error:
909
+ lines.append(f"error: {result.error}")
910
+ lines += [
911
+ "",
912
+ "## stdout",
913
+ stdout if stdout else "(empty)\n",
914
+ "",
915
+ "## stderr",
916
+ stderr if stderr else "(empty)\n",
917
+ ]
918
+ return "\n".join(lines)
919
+
920
+
921
+ def verify_result_state(result: VerifyResult) -> dict:
922
+ data = {
923
+ "ok": result.ok,
924
+ "command": result.cmd,
925
+ "cwd": str(result.cwd),
926
+ "exit_code": result.exit_code,
927
+ "timed_out": result.timed_out,
928
+ "log_path": str(result.log_path),
929
+ "stdout_tail": result.stdout_tail,
930
+ "stderr_tail": result.stderr_tail,
931
+ }
932
+ if result.error:
933
+ data["error"] = result.error
934
+ return data
935
+
936
+
937
+ def format_verify_success_block(result: VerifyResult) -> str:
938
+ return (
939
+ "[duet verify passed]\n"
940
+ f"command: {result.cmd}\n"
941
+ f"cwd: {result.cwd}\n"
942
+ "exit_code: 0\n"
943
+ f"log: {result.log_path}\n"
944
+ "[/duet verify passed]"
945
+ )
946
+
947
+
948
+ def format_verify_failure_block(result: VerifyResult) -> str:
949
+ exit_code = result.exit_code if result.exit_code is not None else "n/a"
950
+ lines = [
951
+ "[duet verify failed]",
952
+ f"command: {result.cmd}",
953
+ f"cwd: {result.cwd}",
954
+ f"exit_code: {exit_code}",
955
+ ]
956
+ if result.timed_out:
957
+ lines.append("timed_out: yes")
958
+ if result.error:
959
+ lines.append(f"error: {result.error}")
960
+ lines += [
961
+ f"log: {result.log_path}",
962
+ "",
963
+ "stdout tail:",
964
+ _display_output(result.stdout_tail),
965
+ "",
966
+ "stderr tail:",
967
+ _display_output(result.stderr_tail),
968
+ "[/duet verify failed]",
969
+ ]
970
+ return "\n".join(lines)
971
+
972
+
973
+ def run_verify_command(cfg: DuetConfig, run_dir: pathlib.Path, turn_label: str,
974
+ worktree_path: Optional[pathlib.Path]) -> VerifyResult:
975
+ """Run the configured verification command for a convergence proposal."""
976
+ if not cfg.verify_cmd:
977
+ raise ValueError("run_verify_command called without cfg.verify_cmd")
978
+ cwd = effective_verify_cwd(cfg, worktree_path)
979
+ log_path = run_dir / f"turn-{turn_label}-verify.log"
980
+ pid_path = run_dir / f"turn-{turn_label}-verify.pid"
981
+ started = dt.datetime.now().isoformat(timespec="seconds")
982
+ print(f"[duet] verify turn {turn_label}: {cfg.verify_cmd} (cwd={cwd})")
983
+ try:
984
+ rc, stdout, stderr = _run(
985
+ ["sh", "-c", cfg.verify_cmd],
986
+ cwd=cwd,
987
+ stdin="",
988
+ timeout=cfg.per_turn_timeout,
989
+ live_prefix=VERIFY_LIVE_PREFIX,
990
+ mirror_stdout=True,
991
+ pid_file_path=pid_path,
992
+ )
993
+ timed_out = rc == 124
994
+ result = VerifyResult(
995
+ ok=(rc == 0),
996
+ cmd=cfg.verify_cmd,
997
+ cwd=cwd,
998
+ exit_code=rc,
999
+ stdout_tail=_tail_text(stdout),
1000
+ stderr_tail=_tail_text(stderr),
1001
+ log_path=log_path,
1002
+ timed_out=timed_out,
1003
+ )
1004
+ except Exception as e:
1005
+ stdout = ""
1006
+ stderr = ""
1007
+ result = VerifyResult(
1008
+ ok=False,
1009
+ cmd=cfg.verify_cmd,
1010
+ cwd=cwd,
1011
+ exit_code=None,
1012
+ stdout_tail="",
1013
+ stderr_tail="",
1014
+ log_path=log_path,
1015
+ error=str(e),
1016
+ )
1017
+ finished = dt.datetime.now().isoformat(timespec="seconds")
1018
+ log_text = (
1019
+ f"started: {started}\n"
1020
+ f"finished: {finished}\n\n"
1021
+ + _format_verify_log(turn_label, result, stdout, stderr)
1022
+ )
1023
+ write_text_atomic(log_path, log_text)
1024
+ return result
1025
+
1026
+
1027
+ def call_claude(agent: Agent, system_prompt: str, message: str,
1028
+ cwd: pathlib.Path, perm_mode: str, timeout: int, dry: bool,
1029
+ reasoning: Optional[str] = None,
1030
+ stderr_log_path: Optional[pathlib.Path] = None,
1031
+ pid_file_path: Optional[pathlib.Path] = None,
1032
+ add_dirs: Optional[list[pathlib.Path]] = None) -> tuple[str, Optional[str]]:
1033
+ """Returns (assistant_text, new_session_id)."""
1034
+ eff_cwd = agent.cwd_override or cwd
1035
+ if reasoning:
1036
+ system_prompt = CLAUDE_REASONING_PROMPT_PREFIX.get(reasoning, "") + system_prompt
1037
+ reasoning_args: list[str] = []
1038
+ if reasoning:
1039
+ claude_value = CLAUDE_REASONING_MAP.get(reasoning, reasoning)
1040
+ reasoning_args = ["--effort", claude_value]
1041
+ if dry:
1042
+ new_sid = agent.session_id or f"dry-claude-{agent.name}-{int(time.time())}"
1043
+ wt_note = f" wt={eff_cwd}" if agent.cwd_override else ""
1044
+ rn = f" reasoning={reasoning}" if reasoning else ""
1045
+ return (
1046
+ f"[dry-run claude/{agent.name}{wt_note}{rn}] received {len(message)} chars\n"
1047
+ "LGTM rationale: dry-run accepted the harness path and has no real "
1048
+ "agent output to review.\n"
1049
+ f"{DEFAULT_SENTINEL}"
1050
+ ), new_sid
1051
+ cmd = ["claude", "-p", message,
1052
+ "--output-format", "json",
1053
+ "--append-system-prompt", system_prompt,
1054
+ "--permission-mode", perm_mode,
1055
+ *reasoning_args,
1056
+ "--add-dir", str(eff_cwd)]
1057
+ # Extra read/write roots for tasks that span outside cwd (e.g. writing
1058
+ # ../DECISION_v2.md from a cwd-scoped run). Without these, claude refuses
1059
+ # paths outside its allowlist with a generic permission error.
1060
+ for d in (add_dirs or []):
1061
+ cmd += ["--add-dir", str(d)]
1062
+ if agent.session_id:
1063
+ cmd += ["--resume", agent.session_id]
1064
+ if agent.model:
1065
+ cmd += ["--model", agent.model]
1066
+ cmd += agent.extra_args
1067
+ rc, out, err = _agent_run(
1068
+ cmd,
1069
+ backend="claude",
1070
+ cwd=eff_cwd,
1071
+ stdin=None,
1072
+ timeout=timeout,
1073
+ stderr_log_path=stderr_log_path,
1074
+ pid_file_path=pid_file_path,
1075
+ )
1076
+ if rc != 0:
1077
+ reason = FINISHED_TIMEOUT if rc == 124 else FINISHED_AGENT_ERROR
1078
+ raise AgentRunError(reason, f"claude exited {rc}\nstderr:\n{err}")
1079
+ try:
1080
+ payload = json.loads(out)
1081
+ return (payload.get("result") or "").rstrip(), payload.get("session_id") or agent.session_id
1082
+ except json.JSONDecodeError:
1083
+ snippet = out[:500].strip()
1084
+ raise AgentRunError(
1085
+ FINISHED_AGENT_ERROR,
1086
+ f"claude returned malformed JSON output: {snippet!r}",
1087
+ )
1088
+
1089
+
1090
+ # Codex's `codex exec` prints a line like `session id: 019e12ad-0b1b-7732-bd7b-6acbbd04ab46`
1091
+ # to stderr near startup; modern builds also re-emit it on resume. We pin to that
1092
+ # UUID for subsequent resumes so duet doesn't depend on `--last`'s cwd-keyed
1093
+ # lookup. Anchored to "session id" to avoid false-positives on stray UUIDs in
1094
+ # tracebacks or path strings; case-insensitive because the label has varied.
1095
+ _CODEX_UUID_PATTERN = (
1096
+ r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
1097
+ )
1098
+ _CODEX_SESSION_ID_RE = re.compile(
1099
+ r"session[ _-]?id\s*[:=]\s*(" + _CODEX_UUID_PATTERN + r")",
1100
+ re.IGNORECASE,
1101
+ )
1102
+ _CODEX_UUID_RE = re.compile(r"\A" + _CODEX_UUID_PATTERN + r"\Z", re.IGNORECASE)
1103
+
1104
+
1105
+ def _parse_codex_session_id(stderr: str) -> Optional[str]:
1106
+ """Return the last `session id: <uuid>` UUID found in Codex's stderr.
1107
+
1108
+ The last match wins so a resume that emits both the inherited id and a
1109
+ rotated id (if a future Codex build does that) ends up pinned to the
1110
+ rotated one. Returns the UUID lowercased; None if no match. We never parse
1111
+ stdout — Codex puts the assistant reply there and a UUID inside the reply
1112
+ must not be confused for the harness's session pin.
1113
+ """
1114
+ if not stderr:
1115
+ return None
1116
+ matches = _CODEX_SESSION_ID_RE.findall(stderr)
1117
+ return matches[-1].lower() if matches else None
1118
+
1119
+
1120
+ def call_codex(agent: Agent, system_prompt: str, message: str,
1121
+ cwd: pathlib.Path, sandbox: str, timeout: int, dry: bool,
1122
+ first_turn: bool, reasoning: Optional[str] = None,
1123
+ fast: bool = False,
1124
+ stderr_log_path: Optional[pathlib.Path] = None,
1125
+ pid_file_path: Optional[pathlib.Path] = None) -> tuple[str, Optional[str]]:
1126
+ """Returns (assistant_text, new_session_id).
1127
+
1128
+ Resume strategy: when stderr from a prior turn yielded a UUID we pin to
1129
+ that with `codex exec resume <uuid>`; otherwise we fall back to
1130
+ `codex exec resume --last`, which keys on cwd. `agent.session_id` carries
1131
+ either the parsed UUID, the sentinel ``"codex-current"`` (meaning "use
1132
+ --last"), or ``None`` (no prior turn for this agent).
1133
+ """
1134
+ eff_cwd = agent.cwd_override or cwd
1135
+ # Fast mode pins this Codex turn to low reasoning regardless of caller
1136
+ # intent. Codex minimal currently rejects the default tool set, while low
1137
+ # preserves tool compatibility and still trades depth for latency.
1138
+ effective = "low" if fast else reasoning
1139
+ if dry:
1140
+ new_sid = agent.session_id or f"dry-codex-{agent.name}-{int(time.time())}"
1141
+ wt_note = f" wt={eff_cwd}" if agent.cwd_override else ""
1142
+ rn = f" reasoning={effective}" if effective else ""
1143
+ fast_note = " fast" if fast else ""
1144
+ return (
1145
+ f"[dry-run codex/{agent.name}{fast_note}{wt_note}{rn}] received {len(message)} chars\n"
1146
+ "LGTM rationale: dry-run accepted the harness path and has no real "
1147
+ "agent output to review.\n"
1148
+ f"{DEFAULT_SENTINEL}"
1149
+ ), new_sid
1150
+ full_prompt = f"=== ROLE ===\n{system_prompt}\n\n=== MESSAGE FROM PARTNER ===\n{message}"
1151
+ reasoning_args: list[str] = []
1152
+ if effective:
1153
+ codex_value = CODEX_REASONING_MAP.get(effective, effective)
1154
+ # `medium` is Codex's default; only override when we actually want a
1155
+ # different effort level.
1156
+ if codex_value != "medium":
1157
+ reasoning_args = ["-c", f"model_reasoning_effort={codex_value}"]
1158
+ if fast:
1159
+ # Concise reasoning summaries cut output volume and time-to-first-token
1160
+ # on Codex turns. Pairs with low effort above; together they're the
1161
+ # "trade depth for latency while keeping tools available" knob.
1162
+ reasoning_args += ["-c", "model_reasoning_summary=concise"]
1163
+ # Codex's `exec` parses options BEFORE the positional prompt in modern
1164
+ # builds, and some flags (e.g. --ask-for-approval) have come and gone
1165
+ # across versions. We keep the default flag set conservative.
1166
+ # `extra_args` lets users add their version's approval/auto flag (e.g.
1167
+ # `["--full-auto"]` or `["--yolo"]`) and config overrides (`-c …`).
1168
+ #
1169
+ # IMPORTANT: `codex exec resume` accepts a SUBSET of `codex exec`'s
1170
+ # flags. In particular, `--sandbox` and `--cd` are exec-only — they
1171
+ # carry over from the resumed session and codex's clap parser rejects
1172
+ # them on resume with "unexpected argument '--sandbox' found". So we
1173
+ # split: exec_only_opts are passed only on the first call.
1174
+ shared_opts = ["--skip-git-repo-check"]
1175
+ if agent.model:
1176
+ shared_opts += ["--model", agent.model]
1177
+ # All options BEFORE the positional prompt — modern codex's clap parser
1178
+ # rejects flags after the prompt.
1179
+ if first_turn or not agent.session_id:
1180
+ exec_only_opts = ["--sandbox", sandbox, "--cd", str(eff_cwd)]
1181
+ options = [*exec_only_opts, *shared_opts, *reasoning_args, *agent.extra_args]
1182
+ cmd = ["codex", "exec", *options, full_prompt]
1183
+ else:
1184
+ # cwd is set via subprocess.Popen(cwd=…) so codex inherits the right
1185
+ # directory regardless of how we resume. `--sandbox` and `--cd` are
1186
+ # exec-only; sandbox carries over from the resumed session.
1187
+ options = [*shared_opts, *reasoning_args, *agent.extra_args]
1188
+ if _CODEX_UUID_RE.match(agent.session_id):
1189
+ # Pin to the UUID we parsed from a prior turn's stderr. This is
1190
+ # robust to parallel codex sessions sharing the cwd because
1191
+ # codex looks up the session by id, not by recency.
1192
+ cmd = ["codex", "exec", "resume", agent.session_id,
1193
+ *options, full_prompt]
1194
+ else:
1195
+ # Sentinel value (typically "codex-current") meaning "we know a
1196
+ # prior turn happened but never captured a UUID." Fall back to
1197
+ # the most recent codex session in this cwd. Caveat: don't run
1198
+ # parallel codex sessions in the same cwd while a duet is alive.
1199
+ cmd = ["codex", "exec", "resume", "--last",
1200
+ *options, full_prompt]
1201
+ # codex exec hangs on non-TTY stdin without explicit close (issue #20919)
1202
+ rc, out, err = _agent_run(
1203
+ cmd,
1204
+ backend="codex",
1205
+ cwd=eff_cwd,
1206
+ stdin="",
1207
+ timeout=timeout,
1208
+ stderr_log_path=stderr_log_path,
1209
+ pid_file_path=pid_file_path,
1210
+ )
1211
+ if rc != 0:
1212
+ reason = FINISHED_TIMEOUT if rc == 124 else FINISHED_AGENT_ERROR
1213
+ raise AgentRunError(
1214
+ reason,
1215
+ f"codex exited {rc}\nstderr:\n{err}\ncmd: {' '.join(cmd[:8])}…",
1216
+ )
1217
+ # Prefer a freshly-parsed UUID from stderr; fall back to whatever id we
1218
+ # were already carrying; finally fall back to the "codex-current"
1219
+ # sentinel so the next turn at least knows a prior turn happened.
1220
+ parsed_sid = _parse_codex_session_id(err)
1221
+ return out.rstrip(), parsed_sid or agent.session_id or "codex-current"
1222
+
1223
+
1224
+ def call_agent(agent: Agent, message: str, cfg: DuetConfig, first_turn_for_agent: bool,
1225
+ *, run_dir: Optional[pathlib.Path] = None,
1226
+ turn_label: Optional[str] = None) -> str:
1227
+ sys_prompt = agent.system_prompt(cfg.sentinel, recap=cfg.recap)
1228
+ reasoning = effective_reasoning(agent, cfg.reasoning)
1229
+ # Per-turn stderr log + pid file land in the run dir for forensics +
1230
+ # liveness checks, sortable by turn number. The pid file is the only
1231
+ # reliable signal for "is the agent still alive?" when stderr goes
1232
+ # silent (claude -p emits nothing during its API call).
1233
+ log_path: Optional[pathlib.Path] = None
1234
+ pid_path: Optional[pathlib.Path] = None
1235
+ if run_dir is not None and turn_label is not None:
1236
+ log_path = run_dir / f"turn-{turn_label}-{agent.name}.stderr.log"
1237
+ pid_path = run_dir / f"turn-{turn_label}-{agent.name}.pid"
1238
+ if agent.backend == "claude":
1239
+ text, new_sid = call_claude(agent, sys_prompt, message, cfg.cwd,
1240
+ cfg.permission_mode, cfg.per_turn_timeout, cfg.dry_run,
1241
+ reasoning=reasoning,
1242
+ stderr_log_path=log_path,
1243
+ pid_file_path=pid_path,
1244
+ add_dirs=cfg.add_dirs)
1245
+ agent.session_id = new_sid
1246
+ return text
1247
+ if agent.backend == "codex":
1248
+ # Fast mode is scoped to coder-role codex agents so it can't silently
1249
+ # downgrade a planner/reviewer when a user pairs `--reasoning max`
1250
+ # with `--codex-fast`. Config validation in main() warns when no
1251
+ # codex:coder agent exists at all.
1252
+ fast = cfg.codex_fast and agent.role == "coder"
1253
+ text, new_sid = call_codex(agent, sys_prompt, message, cfg.cwd,
1254
+ cfg.sandbox, cfg.per_turn_timeout, cfg.dry_run,
1255
+ first_turn=first_turn_for_agent,
1256
+ reasoning=reasoning,
1257
+ fast=fast,
1258
+ stderr_log_path=log_path,
1259
+ pid_file_path=pid_path)
1260
+ agent.session_id = new_sid
1261
+ return text
1262
+ raise SystemExit(f"unknown backend '{agent.backend}'")
1263
+
1264
+
1265
+ def _agent_failure_block(reason: str, exc: Exception, turn_label: str,
1266
+ agent: Agent, run_dir: pathlib.Path) -> str:
1267
+ kind = "TIMEOUT" if reason == FINISHED_TIMEOUT else "AGENT ERROR"
1268
+ log_path = run_dir / f"turn-{turn_label}-{agent.name}.stderr.log"
1269
+ return "\n".join([
1270
+ f"[duet] {kind}: turn {turn_label} failed for "
1271
+ f"{agent.name} ({agent.backend}/{agent.role})",
1272
+ f"[duet] finished_reason: {reason}",
1273
+ f"[duet] error: {exc}",
1274
+ f"[duet] stderr log: {log_path}",
1275
+ ])
1276
+
1277
+ # ---------- loop ----------
1278
+
1279
+ class StopFlag:
1280
+ def __init__(self) -> None:
1281
+ self.requested = False
1282
+ self.reason = ""
1283
+ def request(self, reason: str) -> None:
1284
+ self.requested = True
1285
+ self.reason = reason
1286
+
1287
+
1288
+ def _install_sigint(stop: StopFlag) -> None:
1289
+ def handler(signum, frame):
1290
+ if stop.requested:
1291
+ print("\n[duet] second SIGINT — exiting hard.", file=sys.stderr)
1292
+ _terminate_active_processes(signal.SIGKILL)
1293
+ os._exit(130)
1294
+ print("\n[duet] SIGINT received — finishing current turn, then stopping. "
1295
+ "Press Ctrl-C again to abort immediately.", file=sys.stderr)
1296
+ stop.request("SIGINT")
1297
+ signal.signal(signal.SIGINT, handler)
1298
+
1299
+
1300
+ def _convergence_markers(text: str, sentinel: str) -> tuple[bool, bool]:
1301
+ """Return (sentinel_seen, rationale_seen), ignoring fenced code blocks."""
1302
+ sentinel_re = re.compile(rf"^\s*{re.escape(sentinel)}\s*$")
1303
+ rationale_re = re.compile(
1304
+ r"^\s*(?:[-*]\s*)?(?:\*\*)?(?:LGTM\s+rationale|Rationale)"
1305
+ r"(?:\*\*)?\s*:\s*(.*)$",
1306
+ re.IGNORECASE,
1307
+ )
1308
+ in_fence = False
1309
+ fence_char = ""
1310
+ fence_len = 0
1311
+ sentinel_seen = False
1312
+ rationale_parts: list[str] = []
1313
+ collecting_rationale = False
1314
+
1315
+ for line in text.splitlines():
1316
+ m = re.match(r"^\s*(`{3,}|~{3,})", line)
1317
+ if m:
1318
+ marker = m.group(1)
1319
+ if not in_fence:
1320
+ in_fence = True
1321
+ fence_char = marker[0]
1322
+ fence_len = len(marker)
1323
+ elif marker[0] == fence_char and len(marker) >= fence_len:
1324
+ in_fence = False
1325
+ fence_char = ""
1326
+ fence_len = 0
1327
+ continue
1328
+ if in_fence:
1329
+ continue
1330
+ if sentinel_re.match(line):
1331
+ sentinel_seen = True
1332
+ collecting_rationale = False
1333
+ continue
1334
+ if sentinel_seen:
1335
+ continue
1336
+ rationale_match = rationale_re.match(line)
1337
+ if rationale_match:
1338
+ collecting_rationale = True
1339
+ rationale_parts.append(rationale_match.group(1).strip())
1340
+ continue
1341
+ if collecting_rationale:
1342
+ stripped = line.strip()
1343
+ if stripped:
1344
+ rationale_parts.append(stripped)
1345
+
1346
+ rationale_text = " ".join(part for part in rationale_parts if part)
1347
+ rationale_text = re.sub(r"\s+", " ", rationale_text).strip()
1348
+ rationale_seen = len(rationale_text) >= CONVERGENCE_RATIONALE_MIN_CHARS
1349
+ return sentinel_seen, rationale_seen
1350
+
1351
+
1352
+ def convergence_proposed(text: str, sentinel: str) -> bool:
1353
+ sentinel_seen, rationale_seen = _convergence_markers(text, sentinel)
1354
+ return sentinel_seen and rationale_seen
1355
+
1356
+
1357
+ def parse_recap_headers(text: str) -> dict[str, Optional[str]]:
1358
+ """Parse agent-emitted recap headers from the top of a reply."""
1359
+ parsed: dict[str, Optional[str]] = {"recap": None, "files": None, "status": None}
1360
+ status_values = {
1361
+ "planning", "implementing", "reviewing", "requesting-changes",
1362
+ "ready-for-review", "converged",
1363
+ }
1364
+ for line in text.splitlines()[:10]:
1365
+ m = re.match(r"^(RECAP|FILES|STATUS):\s*(.*)$", line)
1366
+ if not m:
1367
+ continue
1368
+ key = m.group(1).lower()
1369
+ value = m.group(2).strip()
1370
+ if key == "status" and value not in status_values:
1371
+ value = ""
1372
+ parsed[key] = value or None
1373
+ return parsed
1374
+
1375
+
1376
+ _FILE_PATH_RE = re.compile(
1377
+ r"\b[\w./-]+\.(?:py|md|sh|ts|tsx|js|jsx|json|yaml|yml|toml|html|css|rs|go|java|sql|txt)\b"
1378
+ )
1379
+
1380
+
1381
+ def extract_files_heuristic(text: str) -> list[str]:
1382
+ """Find plausible file paths in a reply, preserving first-seen order."""
1383
+ found: list[str] = []
1384
+ seen: set[str] = set()
1385
+
1386
+ def add(path: str) -> None:
1387
+ if path in seen or len(found) >= 8:
1388
+ return
1389
+ seen.add(path)
1390
+ found.append(path)
1391
+
1392
+ for code in re.findall(r"`([^`\n]+)`", text):
1393
+ for m in _FILE_PATH_RE.finditer(code):
1394
+ add(m.group(0))
1395
+ for m in _FILE_PATH_RE.finditer(text):
1396
+ add(m.group(0))
1397
+ return found
1398
+
1399
+
1400
+ def derive_status_heuristic(role: str, sentinel_hit: bool) -> str:
1401
+ if sentinel_hit:
1402
+ return "converged"
1403
+ if role == "planner":
1404
+ return "planning"
1405
+ if role == "coder":
1406
+ return "implementing"
1407
+ if role in {"reviewer", "triage-reviewer"}:
1408
+ return "reviewing"
1409
+ return "unknown"
1410
+
1411
+
1412
+ def _derive_recap_heuristic(text: str) -> str:
1413
+ for line in text.splitlines():
1414
+ s = line.strip()
1415
+ if not s or re.match(r"^(RECAP|FILES|STATUS):", s):
1416
+ continue
1417
+ s = re.sub(r"^\s*[-*#>\d.)]+\s*", "", s).strip()
1418
+ if s:
1419
+ return textwrap.shorten(s, width=140, placeholder="...")
1420
+ return "No concise summary available."
1421
+
1422
+
1423
+ def _format_byte_size(byte_size: int) -> str:
1424
+ if byte_size < 1024:
1425
+ return f"{byte_size}B"
1426
+ return f"{byte_size / 1024:.1f}KB"
1427
+
1428
+
1429
+ def _recap_field(parsed: dict[str, Optional[str]],
1430
+ fallbacks: dict[str, str], key: str) -> str:
1431
+ value = parsed.get(key)
1432
+ if value:
1433
+ return value
1434
+ return f"· {fallbacks.get(key, 'unknown')}"
1435
+
1436
+
1437
+ def format_recap_block(turn_no: int, agent_name: str, role: str,
1438
+ elapsed_s: float, byte_size: int, line_count: int,
1439
+ parsed: dict[str, Optional[str]],
1440
+ fallbacks: dict[str, str],
1441
+ sentinel_hit: bool) -> str:
1442
+ if not sentinel_hit and parsed.get("status") == "converged":
1443
+ parsed = dict(parsed)
1444
+ parsed["status"] = None
1445
+ recap = _recap_field(parsed, fallbacks, "recap")
1446
+ files = _recap_field(parsed, fallbacks, "files")
1447
+ status = _recap_field(parsed, fallbacks, "status")
1448
+ convergence_label = "yes" if sentinel_hit else "no"
1449
+ return (
1450
+ f"## Turn {turn_no:02d} | {agent_name} ({role}) · "
1451
+ f"{int(round(elapsed_s))}s · {_format_byte_size(byte_size)} · "
1452
+ f"{line_count} lines\n\n"
1453
+ f"RECAP: {recap}\n"
1454
+ f"FILES: {files}\n"
1455
+ f"STATUS: {status} · convergence: {convergence_label}\n\n"
1456
+ )
1457
+
1458
+
1459
+ def _format_live_recap_block(recap_block: str) -> str:
1460
+ lines = recap_block.strip("\n").splitlines()
1461
+ if lines and lines[0].startswith("## "):
1462
+ lines[0] = lines[0][3:]
1463
+ if len(lines) > 1 and lines[1] == "":
1464
+ del lines[1]
1465
+ return "\n".join(lines) + "\n"
1466
+
1467
+
1468
+ def _start_recap_inflight(turn_no: int, agent_name: str, role: str,
1469
+ started_at: float) -> tuple[threading.Event, threading.Thread]:
1470
+ stop_event = threading.Event()
1471
+
1472
+ def redraw() -> None:
1473
+ while not stop_event.is_set():
1474
+ elapsed = int(time.time() - started_at)
1475
+ sys.stdout.write(
1476
+ f"\rTurn {turn_no:02d} | {agent_name} ({role}) · "
1477
+ f"running [{elapsed // 60:02d}:{elapsed % 60:02d}]\033[K"
1478
+ )
1479
+ sys.stdout.flush()
1480
+ stop_event.wait(1)
1481
+
1482
+ t = threading.Thread(target=redraw, daemon=True)
1483
+ t.start()
1484
+ return stop_event, t
1485
+
1486
+
1487
+ def _stop_recap_inflight(stop_event: threading.Event,
1488
+ thread: threading.Thread) -> None:
1489
+ stop_event.set()
1490
+ thread.join(timeout=2)
1491
+ sys.stdout.write("\r\033[K")
1492
+ sys.stdout.flush()
1493
+
1494
+
1495
+ def derive_seed(cfg: DuetConfig, run_dir: Optional[pathlib.Path] = None) -> str:
1496
+ """Figure out the first message to send to the partner agent."""
1497
+ if cfg.kickoff:
1498
+ return cfg.kickoff
1499
+ # If agent[0] has a session_id, ask it to dump its latest plan/message.
1500
+ a0 = cfg.agents[0]
1501
+ if a0.session_id:
1502
+ print(f"[duet] extracting latest message from {a0.backend} session "
1503
+ f"{a0.session_id[:8]}…")
1504
+ if a0.backend == "claude" and run_dir is not None:
1505
+ print(f"[duet] `claude -p` is silent on stderr during the API "
1506
+ f"call; expect 30–120s.")
1507
+ print(f"[duet] from another terminal: "
1508
+ f"duet --status {run_dir.name}")
1509
+ return call_agent(a0, EXTRACT_LATEST_PROMPT, cfg,
1510
+ first_turn_for_agent=False,
1511
+ run_dir=run_dir, turn_label="00-extract")
1512
+ if cfg.task:
1513
+ return cfg.task
1514
+ raise SystemExit("nothing to start the conversation with — supply --task, "
1515
+ "--kickoff, or --resume-claude <session_id>")
1516
+
1517
+
1518
+ def _setup_run_worktree(
1519
+ cfg: DuetConfig, run_id: str, run_dir: pathlib.Path,
1520
+ ) -> tuple[Optional[pathlib.Path], Optional[str]]:
1521
+ """Resolve this run's optional git worktree and return (path, branch).
1522
+
1523
+ Honors `--worktree-path` (reuse an existing tree) and `--worktree` (create
1524
+ a fresh `duet/<run_id>` branch), points the selected agent's `cwd_override`
1525
+ at it as a side effect, and returns (None, ...) when no worktree applies or
1526
+ setup fails — duet then runs same-repo. A failed *create* still reports the
1527
+ intended branch name, matching the pre-extraction behavior.
1528
+ """
1529
+ wt_idx = {"lead": 0, "partner": 1}.get(cfg.worktree_for, 1)
1530
+ if cfg.worktree_path:
1531
+ existing = pathlib.Path(cfg.worktree_path).expanduser().resolve()
1532
+ if not existing.is_dir():
1533
+ print(f"[duet] WARNING: --worktree-path {existing} doesn't exist. "
1534
+ f"Falling back to same-repo mode.", file=sys.stderr)
1535
+ return None, None
1536
+ # Recover the branch name for logging/state; failure is non-fatal.
1537
+ try:
1538
+ r = subprocess.run(
1539
+ ["git", "-C", str(existing), "rev-parse", "--abbrev-ref", "HEAD"],
1540
+ capture_output=True, text=True, timeout=5,
1541
+ )
1542
+ wt_branch = r.stdout.strip() if r.returncode == 0 else None
1543
+ except Exception:
1544
+ wt_branch = None
1545
+ cfg.agents[wt_idx].cwd_override = existing
1546
+ print(f"[duet] reusing worktree: {existing} (branch {wt_branch}, "
1547
+ f"agent {cfg.agents[wt_idx].name})")
1548
+ return existing, wt_branch
1549
+
1550
+ if cfg.worktree:
1551
+ if not is_git_repo(cfg.cwd):
1552
+ print(f"[duet] WARNING: --worktree requested but {cfg.cwd} is not a "
1553
+ f"git repo. Falling back to same-repo mode.", file=sys.stderr)
1554
+ return None, None
1555
+ wt_branch = f"duet/{run_id}"
1556
+ # Default lives next to the transcript/state in run_dir/wt; --worktree-root
1557
+ # overrides to e.g. ~/duet-worktrees, namespaced by run_id so parallel
1558
+ # runs never collide.
1559
+ wt_dest = cfg.worktree_root / run_id if cfg.worktree_root else run_dir / "wt"
1560
+ try:
1561
+ wt_path = setup_worktree(cfg.cwd, wt_branch, wt_dest)
1562
+ except Exception as e:
1563
+ print(f"[duet] WARNING: worktree setup failed: {e}. "
1564
+ f"Continuing without.", file=sys.stderr)
1565
+ return None, wt_branch
1566
+ cfg.agents[wt_idx].cwd_override = wt_path
1567
+ print(f"[duet] worktree: {wt_path} (branch {wt_branch}, "
1568
+ f"agent {cfg.agents[wt_idx].name})")
1569
+ return wt_path, wt_branch
1570
+
1571
+ return None, None
1572
+
1573
+
1574
+ def _build_run_state(cfg: DuetConfig, *, turns_used: int, history: list,
1575
+ finished_reason: Optional[str],
1576
+ transcript_path: pathlib.Path,
1577
+ recap_path: pathlib.Path,
1578
+ last_verify: Optional[dict] = None,
1579
+ wt_path: Optional[pathlib.Path] = None,
1580
+ wt_branch: Optional[str] = None) -> dict:
1581
+ """Assemble the run's state.json payload.
1582
+
1583
+ Single source of truth for every state write in `run_duet` — the early
1584
+ dry-run/force-stop/seed-failure exits, the per-turn rolling write, and the
1585
+ final write. Centralizing it keeps `duet_pid` and the worktree/continue
1586
+ keys (which `--status` and `--continue` depend on surviving a mid-turn
1587
+ crash) on every payload; a missing key here would regress both.
1588
+ """
1589
+ state = {
1590
+ "task": cfg.task,
1591
+ "cwd": str(cfg.cwd),
1592
+ "turns_used": turns_used,
1593
+ "agents": [agent_state(a) for a in cfg.agents],
1594
+ "history": history,
1595
+ "finished_reason": finished_reason,
1596
+ "transcript_path": str(transcript_path),
1597
+ "verify_cmd": cfg.verify_cmd,
1598
+ "last_verify": last_verify,
1599
+ "worktree": str(wt_path) if wt_path else None,
1600
+ "worktree_branch": wt_branch,
1601
+ "worktree_for": cfg.worktree_for,
1602
+ "continue_from": cfg.continue_from,
1603
+ "duet_pid": os.getpid(),
1604
+ }
1605
+ if cfg.recap:
1606
+ state["recap_path"] = str(recap_path)
1607
+ return state
1608
+
1609
+
1610
+ @dataclasses.dataclass
1611
+ class _TurnResult:
1612
+ """Outcome of one agent turn, consumed by `run_duet`'s loop control."""
1613
+ reply: str
1614
+ convergence_hit: bool
1615
+ failure_reason: Optional[str]
1616
+ last_verify_state: Optional[dict]
1617
+ recap_block: Optional[str]
1618
+
1619
+
1620
+ def _execute_turn(cfg: DuetConfig, *, turn: int, speaker: Agent, last_msg: str,
1621
+ run_dir: pathlib.Path, transcript_path: pathlib.Path,
1622
+ recap_path: pathlib.Path, state_path: pathlib.Path,
1623
+ history: list, seen_first_turn: dict,
1624
+ wt_path: Optional[pathlib.Path], wt_branch: Optional[str],
1625
+ last_verify_state: Optional[dict],
1626
+ log: Callable[..., None]) -> _TurnResult:
1627
+ """Run a single agent turn: invoke, verify, recap, persist transcript+state.
1628
+
1629
+ Mutates `history` (appends this turn's entry) and `seen_first_turn` in place
1630
+ and rewrites `state.json`; returns the outcome `run_duet` needs to decide
1631
+ whether to stop or rotate. Stop-flag and speaker-rotation checks stay in the
1632
+ caller — this handles only the mechanics of one turn.
1633
+ """
1634
+ first_turn_for_agent = not seen_first_turn[speaker.name]
1635
+ guard_codex_shared_cwd_before_call(cfg, speaker, first_turn_for_agent)
1636
+ t0 = time.time()
1637
+ inflight: Optional[tuple[threading.Event, threading.Thread]] = None
1638
+ if cfg.recap:
1639
+ inflight = _start_recap_inflight(turn, speaker.name, speaker.role, t0)
1640
+ else:
1641
+ # Print BEFORE the subprocess starts so the terminal user sees something
1642
+ # happen instantly. claude -p emits nothing on stderr during its API
1643
+ # call; without this banner the user thinks duet hung.
1644
+ print(f"\n--- Turn {turn} :: {speaker.name} ({speaker.backend}/{speaker.role}) "
1645
+ f"[started {dt.datetime.now().strftime('%H:%M:%S')}] ---")
1646
+ sys.stdout.flush()
1647
+ call_succeeded = False
1648
+ failure_reason: Optional[str] = None
1649
+ failure_message: Optional[str] = None
1650
+ try:
1651
+ reply = call_agent(speaker, last_msg, cfg,
1652
+ first_turn_for_agent=first_turn_for_agent,
1653
+ run_dir=run_dir, turn_label=f"{turn:02d}")
1654
+ call_succeeded = True
1655
+ except Exception as e:
1656
+ failure_reason = _agent_finished_reason(e)
1657
+ failure_message = str(e)
1658
+ if cfg.recap and inflight is not None:
1659
+ _stop_recap_inflight(*inflight)
1660
+ inflight = None
1661
+ elapsed = time.time() - t0
1662
+ print(f"Turn {turn:02d} | {speaker.name} ({speaker.role}) · "
1663
+ f"ERROR after {int(round(elapsed))}s — "
1664
+ f"see turn-{turn:02d}-{speaker.name}.stderr.log")
1665
+ reply = _agent_failure_block(failure_reason, e, f"{turn:02d}", speaker, run_dir)
1666
+ if cfg.recap and inflight is not None:
1667
+ _stop_recap_inflight(*inflight)
1668
+ if call_succeeded:
1669
+ guard_codex_shared_cwd_after_call(cfg, speaker, first_turn_for_agent)
1670
+ seen_first_turn[speaker.name] = True
1671
+ elapsed = time.time() - t0
1672
+ raw_reply = reply
1673
+ convergence_hit = convergence_proposed(raw_reply, cfg.sentinel)
1674
+ verify_state: Optional[dict] = None
1675
+ if convergence_hit and cfg.verify_cmd and not cfg.dry_run:
1676
+ verify_result = run_verify_command(cfg, run_dir, f"{turn:02d}", wt_path)
1677
+ verify_state = verify_result_state(verify_result)
1678
+ last_verify_state = verify_state
1679
+ if verify_result.ok:
1680
+ reply = raw_reply + "\n\n" + format_verify_success_block(verify_result)
1681
+ else:
1682
+ reply = raw_reply + "\n\n" + format_verify_failure_block(verify_result)
1683
+ convergence_hit = False
1684
+
1685
+ recap_block: Optional[str] = None
1686
+ if cfg.recap:
1687
+ parsed = parse_recap_headers(raw_reply)
1688
+ files = extract_files_heuristic(raw_reply)
1689
+ fallbacks = {
1690
+ "recap": _derive_recap_heuristic(raw_reply),
1691
+ "files": ", ".join(files) if files else "none",
1692
+ "status": derive_status_heuristic(speaker.role, convergence_hit),
1693
+ }
1694
+ recap_block = format_recap_block(
1695
+ turn, speaker.name, speaker.role, elapsed,
1696
+ len(raw_reply.encode("utf-8")), raw_reply.count("\n") + 1,
1697
+ parsed, fallbacks, convergence_hit,
1698
+ )
1699
+ append_text_atomic(recap_path, recap_block)
1700
+
1701
+ if wt_path is not None and speaker.cwd_override == wt_path:
1702
+ reply = append_worktree_diff(reply, wt_path, wt_branch)
1703
+
1704
+ log(speaker.name, speaker.role, reply)
1705
+ history_entry = {"turn": turn, "agent": speaker.name, "elapsed_s": elapsed,
1706
+ "len_chars": len(reply), "session_id": speaker.session_id}
1707
+ if failure_reason is not None:
1708
+ history_entry["finished_reason"] = failure_reason
1709
+ history_entry["error"] = failure_message
1710
+ history_entry["stderr_log_path"] = str(
1711
+ run_dir / f"turn-{turn:02d}-{speaker.name}.stderr.log")
1712
+ if verify_state is not None:
1713
+ history_entry["verify"] = verify_state
1714
+ history.append(history_entry)
1715
+ turn_state = _build_run_state(
1716
+ cfg, turns_used=turn, history=history, finished_reason=failure_reason,
1717
+ transcript_path=transcript_path, recap_path=recap_path,
1718
+ last_verify=last_verify_state, wt_path=wt_path, wt_branch=wt_branch,
1719
+ )
1720
+ write_text_atomic(state_path, json.dumps(turn_state, indent=2))
1721
+ return _TurnResult(reply, convergence_hit, failure_reason,
1722
+ last_verify_state, recap_block)
1723
+
1724
+
1725
+ def _allocate_run_dir(cfg: DuetConfig) -> tuple[pathlib.Path, str]:
1726
+ """Create the runs dir and a unique timestamped run dir under it.
1727
+
1728
+ Falls back to ~/.duet/runs/<cwd-slug> when the configured runs dir is
1729
+ unwritable (mutating cfg.runs_dir), writes the auto-.gitignore once, and
1730
+ registers the run in the home index. Returns (run_dir, run_id).
1731
+ """
1732
+ try:
1733
+ cfg.runs_dir.mkdir(parents=True, exist_ok=True)
1734
+ except (OSError, PermissionError) as e:
1735
+ slug = re.sub(r"[^a-zA-Z0-9._-]+", "-", str(cfg.cwd)).strip("-")[:80]
1736
+ fallback = pathlib.Path.home() / ".duet" / "runs" / slug
1737
+ print(f"[duet] cannot create runs dir {cfg.runs_dir}: {e}; "
1738
+ f"falling back to {fallback}", file=sys.stderr)
1739
+ fallback.mkdir(parents=True, exist_ok=True)
1740
+ cfg.runs_dir = fallback
1741
+ # Auto-ignore everything duet writes (transcripts, state, worktrees) from
1742
+ # the host repo's POV. Idempotent — only written once per runs_dir.
1743
+ gi = cfg.runs_dir / ".gitignore"
1744
+ if not gi.exists():
1745
+ write_text_atomic(gi, "# auto-created by duet — ignores all run artifacts\n"
1746
+ "# (transcripts, state.json, worktrees) so they don't\n"
1747
+ "# pollute the host repo. Safe to delete or edit.\n*\n")
1748
+ base_run_id = dt.datetime.now().strftime("%Y%m%d-%H%M%S")
1749
+ for n in range(100):
1750
+ run_id = base_run_id if n == 0 else f"{base_run_id}-{n:02d}"
1751
+ run_dir = cfg.runs_dir / run_id
1752
+ try:
1753
+ run_dir.mkdir()
1754
+ break
1755
+ except FileExistsError:
1756
+ continue
1757
+ else:
1758
+ raise SystemExit(f"could not allocate a unique run dir under {cfg.runs_dir}")
1759
+ # Best-effort home-index symlink so `duet --list` / `--status <bare-id>`
1760
+ # find this run from any cwd; never fails the run.
1761
+ _register_run_in_home_index(run_dir, cfg.cwd)
1762
+ return run_dir, run_id
1763
+
1764
+
1765
+ def _dry_run_recap_state(cfg: DuetConfig, transcript_path: pathlib.Path,
1766
+ recap_path: pathlib.Path, state_path: pathlib.Path,
1767
+ history: list) -> dict:
1768
+ """Write the empty-transcript dry-run state for `--dry-run --recap` and
1769
+ return it. The non-recap dry run still flows through the normal loop (whose
1770
+ agent calls are stubbed); only the recap variant short-circuits here."""
1771
+ if not (cfg.task or cfg.kickoff or cfg.agents[0].session_id):
1772
+ raise SystemExit("nothing to start the conversation with — supply --task, "
1773
+ "--kickoff, or --resume-claude <session_id>")
1774
+ write_text_atomic(transcript_path, "")
1775
+ state = _build_run_state(
1776
+ cfg, turns_used=0, history=history, finished_reason="dry_run",
1777
+ transcript_path=transcript_path, recap_path=recap_path,
1778
+ )
1779
+ write_text_atomic(state_path, json.dumps(state, indent=2))
1780
+ print("[duet] dry-run: agents not called; no recap turn blocks written.")
1781
+ print(f"[duet] done. reason=dry_run. transcript: {transcript_path}")
1782
+ print(f"[duet] recap: {recap_path}")
1783
+ return state
1784
+
1785
+
1786
+ def _derive_seed_or_failure(
1787
+ cfg: DuetConfig, *, run_dir: pathlib.Path, transcript_path: pathlib.Path,
1788
+ recap_path: pathlib.Path, state_path: pathlib.Path, history: list,
1789
+ wt_path: Optional[pathlib.Path], wt_branch: Optional[str],
1790
+ log: Callable[..., None],
1791
+ ) -> tuple[Optional[str], Optional[dict]]:
1792
+ """Extract the opening seed message from the lead agent.
1793
+
1794
+ Returns (seed, None) on success. If the lead's extraction call fails, logs
1795
+ the failure block, records a turn-0 history entry, writes the final state,
1796
+ prints the done banner, and returns (None, state) so `run_duet` can return
1797
+ immediately.
1798
+ """
1799
+ seed_t0 = time.time()
1800
+ try:
1801
+ if not cfg.kickoff and cfg.agents[0].session_id:
1802
+ guard_codex_shared_cwd_before_call(
1803
+ cfg, cfg.agents[0], first_turn_for_agent=False
1804
+ )
1805
+ return derive_seed(cfg, run_dir=run_dir), None
1806
+ except Exception as e:
1807
+ failure_reason = _agent_finished_reason(e)
1808
+ seed_agent = cfg.agents[0]
1809
+ reply = _agent_failure_block(
1810
+ failure_reason, e, "00-extract", seed_agent, run_dir
1811
+ )
1812
+ log(seed_agent.name, seed_agent.role, reply, kind="agent_error")
1813
+ history.append({
1814
+ "turn": 0,
1815
+ "agent": seed_agent.name,
1816
+ "kind": "seed_extract",
1817
+ "elapsed_s": time.time() - seed_t0,
1818
+ "len_chars": len(reply),
1819
+ "session_id": seed_agent.session_id,
1820
+ "finished_reason": failure_reason,
1821
+ "error": str(e),
1822
+ "stderr_log_path": str(
1823
+ run_dir / f"turn-00-extract-{seed_agent.name}.stderr.log"
1824
+ ),
1825
+ })
1826
+ state = _build_run_state(
1827
+ cfg, turns_used=0, history=history, finished_reason=failure_reason,
1828
+ transcript_path=transcript_path, recap_path=recap_path,
1829
+ wt_path=wt_path, wt_branch=wt_branch,
1830
+ )
1831
+ write_text_atomic(state_path, json.dumps(state, indent=2))
1832
+ print(reply)
1833
+ print(f"\n[duet] done. reason={failure_reason}. transcript: {transcript_path}")
1834
+ if cfg.recap:
1835
+ print(f"[duet] recap: {recap_path}")
1836
+ return None, state
1837
+
1838
+
1839
+ def run_duet(cfg: DuetConfig) -> dict:
1840
+ global RECAP_MODE
1841
+ RECAP_MODE = cfg.recap
1842
+ validate_config(cfg)
1843
+
1844
+ run_dir, run_id = _allocate_run_dir(cfg)
1845
+ transcript_path = run_dir / "transcript.md"
1846
+ recap_path = run_dir / "recap.md"
1847
+ state_path = run_dir / "state.json"
1848
+
1849
+ if cfg.recap:
1850
+ append_text_atomic(
1851
+ recap_path,
1852
+ f"# duet recap — {run_dir}\n\n"
1853
+ f"run dir: {run_dir}\n"
1854
+ f"mode: recap (live)\n"
1855
+ f"transcript: {transcript_path}\n\n",
1856
+ )
1857
+
1858
+ stop = StopFlag()
1859
+ _install_sigint(stop)
1860
+
1861
+ # Tracks whether an agent has resume context or has actually been invoked.
1862
+ # A plain task/kickoff seed logged as agent[0] is not a CLI invocation.
1863
+ seen_first_turn = {a.name: bool(a.session_id) for a in cfg.agents}
1864
+ history: list[dict] = []
1865
+ transcript = ""
1866
+
1867
+ def log(speaker: str, role: str, text: str, kind: str = "agent") -> None:
1868
+ nonlocal transcript
1869
+ head = f"\n## {speaker} ({role}) — {kind}\n\n"
1870
+ transcript += head + text + "\n"
1871
+ write_text_atomic(transcript_path, transcript)
1872
+
1873
+ if cfg.recap:
1874
+ print(f"[duet] run: {run_dir}")
1875
+ print("[duet] mode: recap (live)")
1876
+ print(f"[duet] transcript: {transcript_path}")
1877
+ print(f"[duet] recap: {recap_path}")
1878
+ else:
1879
+ print(f"[duet] run dir: {run_dir}")
1880
+ if cfg.verify_cmd:
1881
+ print(f"[duet] verify cmd: {cfg.verify_cmd}")
1882
+ if cfg.agents[0].session_id:
1883
+ print(f"[duet] {cfg.agents[0].name} resumes session {cfg.agents[0].session_id}")
1884
+
1885
+ if cfg.dry_run and cfg.recap:
1886
+ return _dry_run_recap_state(
1887
+ cfg, transcript_path, recap_path, state_path, history)
1888
+
1889
+ wt_path, wt_branch = _setup_run_worktree(cfg, run_id, run_dir)
1890
+
1891
+ if stop.requested:
1892
+ state = _build_run_state(
1893
+ cfg, turns_used=0, history=history,
1894
+ finished_reason=FINISHED_FORCE_STOP,
1895
+ transcript_path=transcript_path, recap_path=recap_path,
1896
+ wt_path=wt_path, wt_branch=wt_branch,
1897
+ )
1898
+ write_text_atomic(state_path, json.dumps(state, indent=2))
1899
+ return state
1900
+
1901
+ seed, seed_failure_state = _derive_seed_or_failure(
1902
+ cfg, run_dir=run_dir, transcript_path=transcript_path,
1903
+ recap_path=recap_path, state_path=state_path, history=history,
1904
+ wt_path=wt_path, wt_branch=wt_branch, log=log,
1905
+ )
1906
+ if seed_failure_state is not None:
1907
+ return seed_failure_state
1908
+ log(cfg.agents[0].name, cfg.agents[0].role, seed, kind="seed")
1909
+ last_msg = seed
1910
+
1911
+ # Partner (agent[1]) normally speaks first in the loop, replying to the seed.
1912
+ # `--continue` may set this to the other agent so the next speaker matches
1913
+ # the previous run's last completed turn.
1914
+ speaker_idx = cfg.start_speaker_idx
1915
+ finished_reason = FINISHED_MAX_TURNS
1916
+ previous_convergence_proposal = False
1917
+ last_verify_state: Optional[dict] = None
1918
+
1919
+ for turn in range(1, cfg.max_turns + 1):
1920
+ if stop.requested:
1921
+ finished_reason = FINISHED_FORCE_STOP
1922
+ break
1923
+ speaker = cfg.agents[speaker_idx]
1924
+ result = _execute_turn(
1925
+ cfg, turn=turn, speaker=speaker, last_msg=last_msg,
1926
+ run_dir=run_dir, transcript_path=transcript_path,
1927
+ recap_path=recap_path, state_path=state_path,
1928
+ history=history, seen_first_turn=seen_first_turn,
1929
+ wt_path=wt_path, wt_branch=wt_branch,
1930
+ last_verify_state=last_verify_state, log=log,
1931
+ )
1932
+ last_verify_state = result.last_verify_state
1933
+ if cfg.recap:
1934
+ print(_format_live_recap_block(result.recap_block), end="")
1935
+ else:
1936
+ print(result.reply)
1937
+
1938
+ if result.failure_reason is not None:
1939
+ finished_reason = result.failure_reason
1940
+ break
1941
+ if result.convergence_hit and previous_convergence_proposal:
1942
+ finished_reason = FINISHED_CONVERGED
1943
+ break
1944
+ if stop.requested:
1945
+ finished_reason = FINISHED_FORCE_STOP
1946
+ break
1947
+
1948
+ last_msg = result.reply
1949
+ previous_convergence_proposal = result.convergence_hit
1950
+ speaker_idx = 1 - speaker_idx
1951
+ else:
1952
+ finished_reason = FINISHED_MAX_TURNS
1953
+
1954
+ forced_verify_state = None
1955
+ if finished_reason not in (FINISHED_TIMEOUT, FINISHED_AGENT_ERROR):
1956
+ finished_reason, forced_verify_state = ask_force(
1957
+ cfg, history, transcript_path, state_path,
1958
+ last_msg, speaker_idx, seen_first_turn,
1959
+ finished_reason, wt_path, wt_branch
1960
+ )
1961
+ if forced_verify_state is not None:
1962
+ last_verify_state = forced_verify_state
1963
+
1964
+ state = _build_run_state(
1965
+ cfg, turns_used=len(history), history=history,
1966
+ finished_reason=finished_reason,
1967
+ transcript_path=transcript_path, recap_path=recap_path,
1968
+ last_verify=last_verify_state, wt_path=wt_path, wt_branch=wt_branch,
1969
+ )
1970
+ write_text_atomic(state_path, json.dumps(state, indent=2))
1971
+ print(f"\n[duet] done. reason={finished_reason}. transcript: {transcript_path}")
1972
+ if cfg.recap:
1973
+ print(f"[duet] recap: {recap_path}")
1974
+ print(f"[duet] resumable session ids — "
1975
+ + ", ".join(f"{a.name}={a.session_id}" for a in cfg.agents if a.session_id))
1976
+ if wt_path:
1977
+ print(f"[duet] worktree left intact at {wt_path} (branch {wt_branch}).\n"
1978
+ f" merge: git -C {cfg.cwd} merge {wt_branch}\n"
1979
+ f" review: git -C {wt_path} diff HEAD\n"
1980
+ f" drop: git -C {cfg.cwd} worktree remove {wt_path} && "
1981
+ f"git -C {cfg.cwd} branch -D {wt_branch}")
1982
+ return state
1983
+
1984
+
1985
+ def _run_forced_turn(cfg: DuetConfig, *, forced_turn: int, next_speaker: Agent,
1986
+ forced_msg: str, first_turn_for_agent: bool,
1987
+ transcript_path: pathlib.Path,
1988
+ wt_path: Optional[pathlib.Path], wt_branch: Optional[str],
1989
+ history: list, seen_first_turn: dict,
1990
+ last_verify_state: Optional[dict]) -> _TurnResult:
1991
+ """Run one human-forced continuation turn (the body of `ask_force`'s loop).
1992
+
1993
+ Mirrors `_execute_turn` but with the `-forced` turn labels, a "forced"
1994
+ history flag, and the recap.md path derived from the transcript dir.
1995
+ Mutates `history`/`seen_first_turn`, prints the reply/recap, and returns the
1996
+ outcome `ask_force` needs to decide whether to keep prompting.
1997
+ """
1998
+ run_dir = transcript_path.parent
1999
+ label = f"{forced_turn:02d}-forced"
2000
+ t0 = time.time()
2001
+ inflight: Optional[tuple[threading.Event, threading.Thread]] = None
2002
+ if cfg.recap:
2003
+ inflight = _start_recap_inflight(forced_turn, next_speaker.name,
2004
+ next_speaker.role, t0)
2005
+ call_succeeded = False
2006
+ failure_reason: Optional[str] = None
2007
+ failure_message: Optional[str] = None
2008
+ try:
2009
+ reply = call_agent(next_speaker, forced_msg, cfg,
2010
+ first_turn_for_agent=first_turn_for_agent,
2011
+ run_dir=run_dir, turn_label=label)
2012
+ call_succeeded = True
2013
+ except Exception as e:
2014
+ failure_reason = _agent_finished_reason(e)
2015
+ failure_message = str(e)
2016
+ if cfg.recap and inflight is not None:
2017
+ _stop_recap_inflight(*inflight)
2018
+ inflight = None
2019
+ elapsed = time.time() - t0
2020
+ print(f"Turn {forced_turn:02d} | {next_speaker.name} "
2021
+ f"({next_speaker.role}) · ERROR after "
2022
+ f"{int(round(elapsed))}s — see "
2023
+ f"turn-{label}-{next_speaker.name}.stderr.log")
2024
+ reply = _agent_failure_block(failure_reason, e, label, next_speaker, run_dir)
2025
+ if cfg.recap and inflight is not None:
2026
+ _stop_recap_inflight(*inflight)
2027
+ if call_succeeded:
2028
+ guard_codex_shared_cwd_after_call(cfg, next_speaker, first_turn_for_agent)
2029
+ elapsed = time.time() - t0
2030
+ seen_first_turn[next_speaker.name] = True
2031
+ raw_reply = reply
2032
+ convergence_hit = convergence_proposed(reply, cfg.sentinel)
2033
+ verify_state: Optional[dict] = None
2034
+ if convergence_hit and cfg.verify_cmd and not cfg.dry_run:
2035
+ verify_result = run_verify_command(cfg, run_dir, label, wt_path)
2036
+ verify_state = verify_result_state(verify_result)
2037
+ last_verify_state = verify_state
2038
+ if verify_result.ok:
2039
+ reply = raw_reply + "\n\n" + format_verify_success_block(verify_result)
2040
+ else:
2041
+ reply = raw_reply + "\n\n" + format_verify_failure_block(verify_result)
2042
+ convergence_hit = False
2043
+ if wt_path is not None and next_speaker.cwd_override == wt_path:
2044
+ reply = append_worktree_diff(reply, wt_path, wt_branch)
2045
+ recap_block = ""
2046
+ if cfg.recap:
2047
+ # Recap describes the agent's own reply, so parse raw_reply — before any
2048
+ # verify block / worktree diff was appended to `reply` (matches
2049
+ # _execute_turn; otherwise FILES and byte/line counts pick up the diff).
2050
+ parsed = parse_recap_headers(raw_reply)
2051
+ files = extract_files_heuristic(raw_reply)
2052
+ fallbacks = {
2053
+ "recap": _derive_recap_heuristic(raw_reply),
2054
+ "files": ", ".join(files) if files else "none",
2055
+ "status": derive_status_heuristic(next_speaker.role, convergence_hit),
2056
+ }
2057
+ recap_block = format_recap_block(
2058
+ forced_turn, next_speaker.name, next_speaker.role, elapsed,
2059
+ len(raw_reply.encode("utf-8")), raw_reply.count("\n") + 1,
2060
+ parsed, fallbacks, convergence_hit,
2061
+ )
2062
+ append_text_atomic(run_dir / "recap.md", recap_block)
2063
+ append_text_atomic(
2064
+ transcript_path,
2065
+ f"\n## {next_speaker.name} ({next_speaker.role}) — forced\n\n{reply}\n",
2066
+ )
2067
+ history_entry = {"turn": forced_turn, "agent": next_speaker.name,
2068
+ "forced": True, "len_chars": len(reply),
2069
+ "session_id": next_speaker.session_id,
2070
+ **({"verify": verify_state} if verify_state is not None else {})}
2071
+ if failure_reason is not None:
2072
+ history_entry["finished_reason"] = failure_reason
2073
+ history_entry["error"] = failure_message
2074
+ history_entry["stderr_log_path"] = str(
2075
+ run_dir / f"turn-{label}-{next_speaker.name}.stderr.log")
2076
+ history.append(history_entry)
2077
+ if cfg.recap:
2078
+ print(_format_live_recap_block(recap_block), end="")
2079
+ else:
2080
+ print(reply)
2081
+ return _TurnResult(reply, convergence_hit, failure_reason,
2082
+ last_verify_state, recap_block)
2083
+
2084
+
2085
+ def ask_force(cfg: DuetConfig, history: list, transcript_path: pathlib.Path,
2086
+ state_path: pathlib.Path, last_msg: str, speaker_idx: int,
2087
+ seen_first_turn: dict, reason: str,
2088
+ wt_path: Optional[pathlib.Path] = None,
2089
+ wt_branch: Optional[str] = None) -> tuple[str, Optional[dict]]:
2090
+ """Post-loop interactive prompt: human can push another turn or accept."""
2091
+ if not sys.stdin.isatty():
2092
+ return reason, None
2093
+ last_verify_state: Optional[dict] = None
2094
+ while True:
2095
+ print(f"\n[duet] loop ended (reason={reason}). "
2096
+ f"Press Enter to finish, or type feedback to force another turn "
2097
+ f"(your text is appended as a human-feedback message and sent "
2098
+ f"to the next agent):")
2099
+ try:
2100
+ line = input("force> ").strip()
2101
+ except EOFError:
2102
+ return reason, last_verify_state
2103
+ if not line:
2104
+ return reason, last_verify_state
2105
+ next_speaker = cfg.agents[speaker_idx]
2106
+ first_turn_for_agent = not seen_first_turn[next_speaker.name]
2107
+ guard_codex_shared_cwd_before_call(cfg, next_speaker, first_turn_for_agent)
2108
+ head = f"\n## human — force-feedback (next: {next_speaker.name})\n\n"
2109
+ append_text_atomic(transcript_path, head + line + "\n")
2110
+ forced_msg = (
2111
+ f"{last_msg}\n\n---\n"
2112
+ "#### human force-feedback\n"
2113
+ f"{line}\n"
2114
+ )
2115
+ result = _run_forced_turn(
2116
+ cfg, forced_turn=len(history) + 1, next_speaker=next_speaker,
2117
+ forced_msg=forced_msg, first_turn_for_agent=first_turn_for_agent,
2118
+ transcript_path=transcript_path, wt_path=wt_path, wt_branch=wt_branch,
2119
+ history=history, seen_first_turn=seen_first_turn,
2120
+ last_verify_state=last_verify_state,
2121
+ )
2122
+ last_verify_state = result.last_verify_state
2123
+ # Persist each forced turn so a crash at the next force> prompt doesn't
2124
+ # lose it (the --status/--continue durability contract). finished_reason
2125
+ # stays None mid-loop (duet is alive at the prompt); run_duet writes the
2126
+ # final state with the real reason once ask_force returns.
2127
+ write_text_atomic(state_path, json.dumps(_build_run_state(
2128
+ cfg, turns_used=len(history), history=history,
2129
+ finished_reason=result.failure_reason,
2130
+ transcript_path=transcript_path,
2131
+ recap_path=transcript_path.parent / "recap.md",
2132
+ last_verify=last_verify_state, wt_path=wt_path, wt_branch=wt_branch,
2133
+ ), indent=2))
2134
+ if result.failure_reason is not None:
2135
+ return result.failure_reason, last_verify_state
2136
+ last_msg = result.reply
2137
+ speaker_idx = 1 - speaker_idx
2138
+ reason = FINISHED_FORCED_CONTINUATION
2139
+ if result.convergence_hit:
2140
+ return FINISHED_CONVERGED_AFTER_FORCE, last_verify_state
2141
+
2142
+ # ---------- config / cli parsing ----------
2143
+
2144
+ def parse_partner(spec: str, default_role: str = "coder") -> Agent:
2145
+ """'codex:coder' -> Agent(backend=codex, role=coder)."""
2146
+ backend, _, role = spec.partition(":")
2147
+ if not backend:
2148
+ raise SystemExit(f"bad partner spec '{spec}', expected backend or backend:role")
2149
+ role = role or default_role
2150
+ return Agent(name=f"{backend}-{role}", backend=backend, role=role)
2151
+
2152
+
2153
+ def normalize_verify_cmd(value, parser: argparse.ArgumentParser) -> Optional[str]:
2154
+ if value is None:
2155
+ return None
2156
+ if not isinstance(value, str):
2157
+ parser.error("verify_cmd must be a string")
2158
+ cmd = value.strip()
2159
+ if not cmd:
2160
+ parser.error("verify_cmd must not be empty")
2161
+ return cmd
2162
+
2163
+
2164
+ def _slot_name(backend: str, idx: int) -> str:
2165
+ slot = "lead" if idx == 0 else "partner"
2166
+ return f"{backend}-{slot}"
2167
+
2168
+
2169
+ def _slot_agent(agent: Agent, idx: int, *, rename: bool) -> Agent:
2170
+ if not rename:
2171
+ return dataclasses.replace(agent)
2172
+ return dataclasses.replace(agent, name=_slot_name(agent.backend, idx))
2173
+
2174
+
2175
+ def _default_slot_agent(backend: str, idx: int, *, rename: bool) -> Agent:
2176
+ role = "planner" if idx == 0 else "coder"
2177
+ name = _slot_name(backend, idx) if rename else f"{backend}-{role}"
2178
+ return Agent(name=name, backend=backend, role=role)
2179
+
2180
+
2181
+ def _slot_default_role(idx: int) -> str:
2182
+ return "planner" if idx == 0 else "coder"
2183
+
2184
+
2185
+ def _find_backend_idx(agents: list[Agent], backend: str,
2186
+ preferred_idx: int) -> Optional[int]:
2187
+ if len(agents) > preferred_idx and agents[preferred_idx].backend == backend:
2188
+ return preferred_idx
2189
+ for i, agent in enumerate(agents):
2190
+ if agent.backend == backend:
2191
+ return i
2192
+ return None
2193
+
2194
+
2195
+ def _force_resume_slot(
2196
+ agents: list[Agent],
2197
+ *,
2198
+ backend: str,
2199
+ slot_idx: int,
2200
+ session_id: str,
2201
+ rename_slots: bool,
2202
+ ) -> list[Agent]:
2203
+ """Move/create a resumed backend into its conventional slot.
2204
+
2205
+ If the user already put the backend in that slot, preserve their role. If
2206
+ we have to move it from the other slot, reset moved agents to the slot
2207
+ default roles so `--resume-codex --lead codex:planner --partner
2208
+ claude:coder` becomes the useful `claude/planner + codex/coder` topology.
2209
+ """
2210
+ idx = _find_backend_idx(agents, backend, slot_idx)
2211
+ other_idx = 1 - slot_idx
2212
+
2213
+ if idx is None:
2214
+ target = _default_slot_agent(backend, slot_idx, rename=rename_slots)
2215
+ else:
2216
+ moved = idx != slot_idx
2217
+ target = dataclasses.replace(
2218
+ agents[idx],
2219
+ role=(_slot_default_role(slot_idx) if moved else agents[idx].role),
2220
+ )
2221
+ target = dataclasses.replace(
2222
+ _slot_agent(target, slot_idx, rename=rename_slots),
2223
+ session_id=session_id,
2224
+ )
2225
+
2226
+ if idx == other_idx:
2227
+ candidate = agents[slot_idx]
2228
+ moved_other = True
2229
+ else:
2230
+ candidate = agents[other_idx]
2231
+ moved_other = False
2232
+
2233
+ other = dataclasses.replace(
2234
+ candidate,
2235
+ role=(
2236
+ _slot_default_role(other_idx)
2237
+ if moved_other else candidate.role
2238
+ ),
2239
+ )
2240
+ other = _slot_agent(other, other_idx, rename=rename_slots)
2241
+
2242
+ out = [agents[0], agents[1]]
2243
+ out[slot_idx] = target
2244
+ out[other_idx] = other
2245
+ return out
2246
+
2247
+
2248
+ def apply_resume_overrides(
2249
+ agents: list[Agent],
2250
+ *,
2251
+ resume_claude: Optional[str] = None,
2252
+ resume_codex: Optional[str] = None,
2253
+ rename_slots: bool = False,
2254
+ ) -> list[Agent]:
2255
+ """Attach CLI resume ids to the matching backend without silently dropping.
2256
+
2257
+ Claude resume is the historical "lead supplies the seed" path, so a
2258
+ resumed Claude agent is normalized into the lead slot. Codex resume is the
2259
+ quick-start "Codex implements with its prior plan in context" path, so a
2260
+ resumed Codex agent is normalized into the partner slot. Existing roles are
2261
+ preserved only when the backend was already in its conventional slot.
2262
+ """
2263
+ normalized = [_slot_agent(a, i, rename=rename_slots)
2264
+ for i, a in enumerate(agents)]
2265
+ if len(normalized) != 2:
2266
+ return normalized
2267
+
2268
+ if resume_claude:
2269
+ normalized = _force_resume_slot(
2270
+ normalized,
2271
+ backend="claude",
2272
+ slot_idx=0,
2273
+ session_id=resume_claude,
2274
+ rename_slots=rename_slots,
2275
+ )
2276
+
2277
+ if resume_codex:
2278
+ normalized = _force_resume_slot(
2279
+ normalized,
2280
+ backend="codex",
2281
+ slot_idx=1,
2282
+ session_id=resume_codex,
2283
+ rename_slots=rename_slots,
2284
+ )
2285
+
2286
+ if rename_slots:
2287
+ normalized = [_slot_agent(a, i, rename=True)
2288
+ for i, a in enumerate(normalized)]
2289
+ return normalized
2290
+
2291
+
2292
+ def load_yaml_or_json(path: pathlib.Path) -> dict:
2293
+ text = path.read_text()
2294
+ if path.suffix in {".yaml", ".yml"}:
2295
+ try:
2296
+ import yaml # type: ignore
2297
+ except ImportError:
2298
+ raise SystemExit("PyYAML not installed; convert to JSON or `pip install pyyaml`.")
2299
+ return yaml.safe_load(text)
2300
+ return json.loads(text)
2301
+
2302
+
2303
+ def _check_task_size(text: str, parser: argparse.ArgumentParser) -> str:
2304
+ if len(text) > TASK_MAX_CHARS:
2305
+ parser.error(f"task too large ({len(text)} chars > {TASK_MAX_CHARS}); "
2306
+ "pipe a shorter summary")
2307
+ return text
2308
+
2309
+
2310
+ def resolve_at_text(value: Optional[str], option_name: str,
2311
+ parser: argparse.ArgumentParser,
2312
+ stdin_cache: dict[str, str]) -> Optional[str]:
2313
+ """Resolve literal / @file / @- task text before a run directory exists."""
2314
+ if value is None:
2315
+ return None
2316
+ if not value.startswith("@"):
2317
+ return _check_task_size(value, parser)
2318
+ if value == "@-":
2319
+ if "stdin" not in stdin_cache:
2320
+ stdin_cache["stdin"] = sys.stdin.read()
2321
+ return _check_task_size(stdin_cache["stdin"], parser)
2322
+
2323
+ raw_path = value[1:]
2324
+ if not raw_path:
2325
+ parser.error(f"{option_name}: file not found: {raw_path}")
2326
+ path = pathlib.Path(raw_path).expanduser()
2327
+ if not path.is_file():
2328
+ parser.error(f"{option_name}: file not found: {path}")
2329
+ try:
2330
+ text = path.read_text(encoding="utf-8")
2331
+ except UnicodeDecodeError:
2332
+ parser.error(f"{option_name}: file not UTF-8 text: {path}")
2333
+ except OSError as e:
2334
+ parser.error(f"{option_name}: unable to read file: {path}: {e}")
2335
+ return _check_task_size(text, parser)
2336
+
2337
+
2338
+ def resolve_task_from_cmd(cmd_str: str, cwd: pathlib.Path, timeout: int,
2339
+ parser: argparse.ArgumentParser) -> str:
2340
+ """Run a shell command and use stdout as the task seed."""
2341
+ global LIVE_PREFIX
2342
+ old_prefix = LIVE_PREFIX
2343
+ LIVE_PREFIX = LIVE_PREFIX_TASK
2344
+ try:
2345
+ rc, out, err = _run(["sh", "-c", cmd_str], cwd=cwd, stdin=None, timeout=timeout)
2346
+ finally:
2347
+ LIVE_PREFIX = old_prefix
2348
+ if rc != 0:
2349
+ parser.error(f"--task-from-cmd exited {rc}\nstderr:\n{err}")
2350
+ if out == "":
2351
+ parser.error(f"--task-from-cmd produced empty stdout\nstderr:\n{err}")
2352
+ return _check_task_size(out, parser)
2353
+
2354
+
2355
+ def resolve_seed_inputs(*, task: Optional[str], kickoff: Optional[str],
2356
+ task_from_cmd: Optional[str], cwd: pathlib.Path,
2357
+ timeout: int, parser: argparse.ArgumentParser,
2358
+ stdin_cache: dict[str, str]) -> tuple[Optional[str], Optional[str]]:
2359
+ if task is not None and task_from_cmd is not None:
2360
+ parser.error("--task and --task-from-cmd are mutually exclusive")
2361
+ resolved_kickoff = resolve_at_text(kickoff, "--kickoff", parser, stdin_cache)
2362
+ if task_from_cmd is not None:
2363
+ resolved_task = resolve_task_from_cmd(task_from_cmd, cwd, timeout, parser)
2364
+ else:
2365
+ resolved_task = resolve_at_text(task, "--task", parser, stdin_cache)
2366
+ return resolved_task, resolved_kickoff
2367
+
2368
+
2369
+ def choose_runs_dir(raw_runs_dir: Optional[str], cwd_resolved: pathlib.Path) -> pathlib.Path:
2370
+ invocation_pwd = pathlib.Path.cwd().resolve()
2371
+ if raw_runs_dir is not None:
2372
+ return pathlib.Path(raw_runs_dir)
2373
+ if cwd_resolved != invocation_pwd:
2374
+ runs_dir = cwd_resolved / ".duet" / "runs"
2375
+ print("[duet] --cwd points outside the invocation directory; "
2376
+ f"defaulting run artifacts to {runs_dir}. "
2377
+ "Pass --runs-dir runs to use the legacy invocation-relative path.",
2378
+ file=sys.stderr)
2379
+ return runs_dir
2380
+ return pathlib.Path("runs")
2381
+
2382
+
2383
+ def _cwd_slug(cwd_resolved: pathlib.Path) -> str:
2384
+ """Slugify a cwd into a `~/.duet/runs/` subdir name. Same scheme as the
2385
+ unwritable-cwd fallback inside `run_duet`, on purpose: a fallback dir
2386
+ and a registered symlink for the same cwd land under the same slug."""
2387
+ return re.sub(r"[^a-zA-Z0-9._-]+", "-", str(cwd_resolved)).strip("-")[:80]
2388
+
2389
+
2390
+ def _register_run_in_home_index(run_dir: pathlib.Path,
2391
+ cwd_resolved: pathlib.Path) -> None:
2392
+ """Drop a symlink at `~/.duet/runs/<cwd-slug>/<run_id>` -> `run_dir`.
2393
+
2394
+ `_default_list_paths()` already scans `~/.duet/runs/<slug>/<run_id>/`
2395
+ (originally for the unwritable-cwd fallback in `run_duet`). Mirroring
2396
+ every newly-created run dir into that tree gives `duet --list` and
2397
+ `duet --status <bare-id>` a single home-rooted index of every run
2398
+ started under this user, regardless of which project's
2399
+ `<cwd>/.duet/runs/` it actually lives in. Best-effort: failures
2400
+ (filesystem read-only, symlinks not supported, target slug dir
2401
+ occupied by something weird) emit a one-line stderr notice but never
2402
+ fail the run.
2403
+ """
2404
+ home_runs = (pathlib.Path.home() / ".duet" / "runs").resolve()
2405
+ try:
2406
+ run_resolved = run_dir.resolve()
2407
+ except OSError:
2408
+ return
2409
+ # Skip when run_dir already lives under ~/.duet/runs/<slug>/ (the
2410
+ # unwritable-cwd fallback already landed there) — registering would
2411
+ # be a circular self-reference.
2412
+ if home_runs in run_resolved.parents:
2413
+ return
2414
+ slug = _cwd_slug(cwd_resolved)
2415
+ if not slug:
2416
+ return # paranoia: empty slug
2417
+ link = home_runs / slug / run_dir.name
2418
+ try:
2419
+ link.parent.mkdir(parents=True, exist_ok=True)
2420
+ if link.is_symlink():
2421
+ try:
2422
+ target = pathlib.Path(os.readlink(link))
2423
+ if target.is_absolute() and target.resolve() == run_resolved:
2424
+ return # idempotent: already correct
2425
+ except OSError:
2426
+ pass
2427
+ return # symlink points elsewhere; leave as-is
2428
+ if link.exists():
2429
+ return # not a symlink; refuse to clobber
2430
+ link.symlink_to(run_resolved)
2431
+ except (OSError, NotImplementedError) as exc:
2432
+ print(f"[duet] note: home-index symlink failed "
2433
+ f"(~/.duet/runs/{slug}/{run_dir.name}): {exc}",
2434
+ file=sys.stderr)
2435
+
2436
+
2437
+ # ---------- run-status (`duet --status <run_dir>`) ----------
2438
+
2439
+ def _pid_alive(pid: int) -> bool:
2440
+ """True if the OS process still exists. Uses signal 0 (no-op probe)."""
2441
+ try:
2442
+ os.kill(pid, 0)
2443
+ return True
2444
+ except ProcessLookupError:
2445
+ return False
2446
+ except PermissionError:
2447
+ # PID exists but is owned by someone else — still "alive" for us.
2448
+ return True
2449
+
2450
+
2451
+ def _proc_cmdline(pid: int) -> Optional[str]:
2452
+ """Best-effort read of a PID's full cmdline. Returns None on any failure.
2453
+
2454
+ Used to validate that a recorded `duet_pid` still belongs to a duet
2455
+ process (PIDs get recycled after a reboot; the alive-check alone could
2456
+ point at an unrelated app).
2457
+ """
2458
+ if sys.platform.startswith("linux"):
2459
+ try:
2460
+ return (pathlib.Path(f"/proc/{pid}/cmdline")
2461
+ .read_bytes().replace(b"\x00", b" ").decode(errors="replace"))
2462
+ except OSError:
2463
+ return None
2464
+ # macOS / BSD: shell out to ps. Cheap, ~5ms.
2465
+ try:
2466
+ r = subprocess.run(["ps", "-o", "command=", "-p", str(pid)],
2467
+ capture_output=True, text=True, timeout=2)
2468
+ if r.returncode == 0 and r.stdout.strip():
2469
+ return r.stdout.strip()
2470
+ except (subprocess.TimeoutExpired, OSError):
2471
+ pass
2472
+ return None
2473
+
2474
+
2475
+ def _is_duet_process(pid: int) -> bool:
2476
+ """True if `pid` is alive AND looks like a duet.py process (avoids stale-PID false positives)."""
2477
+ if not _pid_alive(pid):
2478
+ return False
2479
+ cmdline = _proc_cmdline(pid) or ""
2480
+ # Match "duet.py" anywhere in the cmdline OR a final path segment
2481
+ # equal to "duet" (when installed via `make install`).
2482
+ if "duet.py" in cmdline:
2483
+ return True
2484
+ # Look for ".../duet" or "duet " (the installed-symlink case).
2485
+ head = cmdline.split() and cmdline.split()[0]
2486
+ if head and pathlib.Path(head).name == "duet":
2487
+ return True
2488
+ return False
2489
+
2490
+
2491
+ def print_run_status(arg: str) -> int:
2492
+ """Print a one-shot health summary for a duet run. Returns shell exit code:
2493
+ 0 = run finished cleanly, 1 = still running, 2 = stuck/crashed, 3 = error.
2494
+
2495
+ `arg` may be a path (absolute or relative) OR a bare run id like
2496
+ `20260507-082801` — bare ids get resolved against the same default
2497
+ search paths as `--list` (./runs/, ./.duet/runs/, ~/.duet/runs/*/).
2498
+ """
2499
+ run_dir = _resolve_run_dir(arg)
2500
+ if run_dir is None:
2501
+ print(f"[duet] no such run dir: {arg}", file=sys.stderr)
2502
+ if "/" not in arg and "\\" not in arg and _RUN_ID_RE.match(arg):
2503
+ print(f"[duet] tried bare-id resolution under default paths "
2504
+ "(./runs/, ./.duet/runs/, ~/.duet/runs/*/). "
2505
+ "Use `duet --list` to see what's available.",
2506
+ file=sys.stderr)
2507
+ return 3
2508
+ state_path = run_dir / "state.json"
2509
+ state: dict = {}
2510
+ if state_path.exists():
2511
+ try:
2512
+ state = json.loads(state_path.read_text())
2513
+ except json.JSONDecodeError as e:
2514
+ print(f"[duet] state.json malformed: {e}", file=sys.stderr)
2515
+ return 3
2516
+ finished = state.get("finished_reason")
2517
+ transcript_display = state.get("transcript_path", run_dir / "transcript.md")
2518
+ recap_display = state.get("recap_path")
2519
+ if recap_display is None and (run_dir / "recap.md").exists():
2520
+ recap_display = run_dir / "recap.md"
2521
+ print(f"[duet] {run_dir}")
2522
+ print(f" turns_used: {state.get('turns_used', '?')}")
2523
+ print(f" finished_reason: {finished!r}")
2524
+ if recap_display is not None:
2525
+ print(f" recap: {recap_display}")
2526
+
2527
+ # A turn-*.pid file exists only while that turn's subprocess is alive.
2528
+ pid_files = sorted(run_dir.glob("turn-*.pid"))
2529
+ if pid_files:
2530
+ pid_file = pid_files[-1]
2531
+ try:
2532
+ pid = int(pid_file.read_text().strip())
2533
+ except (OSError, ValueError):
2534
+ pid = None
2535
+ # Filename: turn-<label>-<agent>.pid
2536
+ stem = pid_file.stem # turn-02-claude-planner
2537
+ started_at = dt.datetime.fromtimestamp(pid_file.stat().st_mtime)
2538
+ elapsed = (dt.datetime.now() - started_at).total_seconds()
2539
+ alive = _pid_alive(pid) if pid is not None else False
2540
+ print(f" in-flight turn: {stem}")
2541
+ print(f" pid: {pid} (alive: {alive})")
2542
+ print(f" started: {started_at.isoformat(timespec='seconds')} "
2543
+ f"({int(elapsed)}s ago)")
2544
+ # Heartbeat from the matching stderr log
2545
+ log = run_dir / f"{stem}.stderr.log"
2546
+ if log.exists():
2547
+ log_age = (dt.datetime.now()
2548
+ - dt.datetime.fromtimestamp(log.stat().st_mtime)).total_seconds()
2549
+ print(f" last stderr: {int(log_age)}s ago "
2550
+ f"({log.stat().st_size} bytes)")
2551
+ if not alive:
2552
+ print(" ⚠ pid file present but process is gone — turn likely "
2553
+ "crashed or was killed without cleanup")
2554
+ return 2
2555
+ return 1
2556
+ # No pid files. Either run hasn't started, has finished, or is between
2557
+ # turns (in particular, sitting at the post-loop `force>` prompt).
2558
+ if finished:
2559
+ print(f" done. transcript: {transcript_display}")
2560
+ return 0
2561
+
2562
+ # Disambiguate "between turns" from "actually crashed" using the
2563
+ # duet_pid recorded in state.json.
2564
+ duet_pid = state.get("duet_pid")
2565
+ if duet_pid is not None:
2566
+ if _is_duet_process(int(duet_pid)):
2567
+ print(f" state: between turns / awaiting force> prompt")
2568
+ print(f" duet pid: {duet_pid} (alive)")
2569
+ history = state.get("history") or []
2570
+ if history:
2571
+ last = history[-1]
2572
+ print(f" last completed: turn {last.get('turn')} "
2573
+ f"({last.get('agent')}) in {last.get('elapsed_s', 0):.1f}s, "
2574
+ f"{last.get('len_chars', 0)} chars")
2575
+ return 1
2576
+ print(f" ⚠ duet pid {duet_pid} no longer running (or recycled by an "
2577
+ "unrelated process); no finished_reason recorded — run died "
2578
+ "between turns")
2579
+ return 2
2580
+
2581
+ # state.json predates the duet_pid field — keep the old message and the
2582
+ # old conservative "looks stuck" exit code so callers don't regress.
2583
+ print(" no in-flight turn AND no finished_reason — run may have died "
2584
+ "between turns, or hasn't started yet")
2585
+ print(" (state.json predates the duet_pid field; can't auto-distinguish "
2586
+ "alive-between-turns from crashed)")
2587
+ return 2
2588
+
2589
+
2590
+ # ---------- run-list (`duet --list [PATH]`) ----------
2591
+
2592
+ # Status glyphs — same vocabulary as print_run_status, packed for table cols.
2593
+ _LIST_STATUS_FINISHED = {
2594
+ FINISHED_CONVERGED: ("✅", "converged"),
2595
+ FINISHED_CONVERGED_AFTER_FORCE: ("✅", "converged"),
2596
+ FINISHED_MAX_TURNS: ("⏰", "max_turns"),
2597
+ FINISHED_FORCE_STOP: ("🔴", "force_stop"),
2598
+ FINISHED_TIMEOUT: ("⏱", "timeout"),
2599
+ FINISHED_FORCED_CONTINUATION: ("🟡", "forced"),
2600
+ FINISHED_AGENT_ERROR: ("⚠", "agent_error"),
2601
+ }
2602
+
2603
+
2604
+ _RUN_ID_RE = re.compile(r"^\d{8}-\d{6}(?:-\d+)?$")
2605
+
2606
+
2607
+ def _default_list_paths() -> list[pathlib.Path]:
2608
+ """Where `duet --list` looks when no PATH is given. Order = display order."""
2609
+ paths: list[pathlib.Path] = []
2610
+ for p in (pathlib.Path.cwd() / "runs",
2611
+ pathlib.Path.cwd() / ".duet" / "runs"):
2612
+ if p.is_dir():
2613
+ paths.append(p)
2614
+ home = pathlib.Path.home() / ".duet" / "runs"
2615
+ if home.is_dir():
2616
+ # Each subdir under ~/.duet/runs/ is a slug like "Users-volkan-…".
2617
+ for slug in sorted(home.iterdir()):
2618
+ if slug.is_dir():
2619
+ paths.append(slug)
2620
+ return paths
2621
+
2622
+
2623
+ def _resolve_run_dir(arg: str) -> Optional[pathlib.Path]:
2624
+ """Map a `--status` argument to a real run dir.
2625
+
2626
+ Accepts:
2627
+ - an absolute or relative path that exists
2628
+ - a bare run id like `20260507-082801`, resolved against the default
2629
+ list paths so users don't have to remember `runs/` vs `.duet/runs/`
2630
+
2631
+ Returns the resolved Path, or None when nothing matches.
2632
+ """
2633
+ p = pathlib.Path(arg).expanduser()
2634
+ if p.is_dir():
2635
+ return p.resolve()
2636
+ # Bare run id (no path separators, matches the timestamp pattern) — search.
2637
+ if "/" not in arg and "\\" not in arg and _RUN_ID_RE.match(arg):
2638
+ # Collect candidates and dedupe by resolved real path so a
2639
+ # home-index symlink and the cwd-relative real dir collapse into
2640
+ # one entry instead of triggering the "multiple roots" warning.
2641
+ seen: set[pathlib.Path] = set()
2642
+ unique: list[pathlib.Path] = []
2643
+ for root in _default_list_paths():
2644
+ cand = root / arg
2645
+ if not cand.is_dir():
2646
+ continue
2647
+ try:
2648
+ real = cand.resolve()
2649
+ except OSError:
2650
+ continue
2651
+ if real in seen:
2652
+ continue
2653
+ seen.add(real)
2654
+ unique.append(cand)
2655
+ if len(unique) == 1:
2656
+ return unique[0].resolve()
2657
+ if len(unique) > 1:
2658
+ # Same id under genuinely distinct dirs is rare (timestamps
2659
+ # are seconds-precise) but possible. Prefer most-recent and
2660
+ # warn so users notice ambiguity.
2661
+ unique.sort(key=lambda c: c.stat().st_mtime, reverse=True)
2662
+ print(f"[duet] note: run id {arg!r} found under multiple roots; "
2663
+ f"using most recent: {unique[0]}",
2664
+ file=sys.stderr)
2665
+ return unique[0].resolve()
2666
+ return None
2667
+
2668
+
2669
+ def _load_run_state(run_dir: pathlib.Path,
2670
+ parser: argparse.ArgumentParser,
2671
+ option_name: str) -> dict:
2672
+ state_path = run_dir / "state.json"
2673
+ if not state_path.is_file():
2674
+ parser.error(f"{option_name}: missing state.json in {run_dir}")
2675
+ try:
2676
+ return json.loads(state_path.read_text())
2677
+ except json.JSONDecodeError as e:
2678
+ parser.error(f"{option_name}: state.json malformed: {e}")
2679
+ except OSError as e:
2680
+ parser.error(f"{option_name}: unable to read state.json: {e}")
2681
+ raise AssertionError("parser.error should have exited")
2682
+
2683
+
2684
+ def _agents_from_state(state: dict,
2685
+ parser: argparse.ArgumentParser,
2686
+ option_name: str) -> list[Agent]:
2687
+ raw_agents = state.get("agents")
2688
+ if not isinstance(raw_agents, list) or len(raw_agents) != 2:
2689
+ parser.error(f"{option_name}: state.json must contain exactly two agents")
2690
+ agents: list[Agent] = []
2691
+ for i, raw in enumerate(raw_agents):
2692
+ if not isinstance(raw, dict):
2693
+ parser.error(f"{option_name}: agents[{i}] is not an object")
2694
+ name = raw.get("name")
2695
+ backend = raw.get("backend")
2696
+ if not name or not backend:
2697
+ parser.error(f"{option_name}: agents[{i}] missing name/backend")
2698
+ raw_extra_args = raw.get("extra_args") or []
2699
+ if not isinstance(raw_extra_args, list):
2700
+ parser.error(f"{option_name}: agents[{i}].extra_args is not a list")
2701
+ agents.append(Agent(
2702
+ name=str(name),
2703
+ backend=str(backend),
2704
+ role=str(raw.get("role") or "coder"),
2705
+ role_prompt=(str(raw["role_prompt"]) if raw.get("role_prompt") else None),
2706
+ model=(str(raw["model"]) if raw.get("model") else None),
2707
+ session_id=(str(raw["session_id"]) if raw.get("session_id") else None),
2708
+ extra_args=[str(x) for x in raw_extra_args],
2709
+ reasoning_effort=(str(raw["reasoning_effort"])
2710
+ if raw.get("reasoning_effort") else None),
2711
+ ))
2712
+ return agents
2713
+
2714
+
2715
+ def _next_speaker_idx_from_state(agents: list[Agent], state: dict) -> int:
2716
+ history = state.get("history") or []
2717
+ if isinstance(history, list):
2718
+ for item in reversed(history):
2719
+ if not isinstance(item, dict):
2720
+ continue
2721
+ last_agent = item.get("agent")
2722
+ for idx, agent in enumerate(agents):
2723
+ if agent.name == last_agent:
2724
+ return 1 - idx
2725
+ try:
2726
+ turns_used = int(state.get("turns_used") or 0)
2727
+ except (TypeError, ValueError):
2728
+ turns_used = 0
2729
+ # Normal runs start with agent[1], so even turns mean agent[1] is next.
2730
+ return 1 if turns_used % 2 == 0 else 0
2731
+
2732
+
2733
+ def _continue_note_from_args(args: argparse.Namespace,
2734
+ cwd: pathlib.Path,
2735
+ timeout: int,
2736
+ parser: argparse.ArgumentParser,
2737
+ stdin_cache: dict[str, str]) -> Optional[str]:
2738
+ sources = [
2739
+ args.task is not None,
2740
+ args.kickoff is not None,
2741
+ args.task_from_cmd is not None,
2742
+ ]
2743
+ if sum(1 for x in sources if x) > 1:
2744
+ parser.error("--continue accepts only one extra instruction via "
2745
+ "--task, --kickoff, or --task-from-cmd")
2746
+ if args.task_from_cmd is not None:
2747
+ return resolve_task_from_cmd(args.task_from_cmd, cwd, timeout, parser)
2748
+ if args.kickoff is not None:
2749
+ return resolve_at_text(args.kickoff, "--kickoff", parser, stdin_cache)
2750
+ if args.task is not None:
2751
+ return resolve_at_text(args.task, "--task", parser, stdin_cache)
2752
+ return None
2753
+
2754
+
2755
+ def _default_continue_kickoff(run_dir: pathlib.Path,
2756
+ state: dict,
2757
+ next_agent: Agent,
2758
+ user_note: Optional[str],
2759
+ worktree_path: Optional[pathlib.Path]) -> str:
2760
+ history = state.get("history") or []
2761
+ last = history[-1] if isinstance(history, list) and history else {}
2762
+ transcript = state.get("transcript_path") or str(run_dir / "transcript.md")
2763
+ recap = state.get("recap_path")
2764
+ finished = state.get("finished_reason")
2765
+ turns_display = state.get(
2766
+ "turns_used",
2767
+ len(history) if isinstance(history, list) else "?",
2768
+ )
2769
+ lines = [
2770
+ "Continue the previous duet run without restarting from scratch.",
2771
+ f"Previous run: {run_dir}",
2772
+ f"Previous finished_reason: {finished!r}",
2773
+ f"Previous turns_used: {turns_display}",
2774
+ f"Next speaker: {next_agent.name} ({next_agent.backend}/{next_agent.role})",
2775
+ f"Transcript: {transcript}",
2776
+ ]
2777
+ if recap:
2778
+ lines.append(f"Recap: {recap}")
2779
+ if worktree_path is not None:
2780
+ lines.append(f"Worktree: {worktree_path}")
2781
+ if isinstance(last, dict) and last:
2782
+ lines.append(
2783
+ f"Last completed turn: {last.get('turn')} by {last.get('agent')}"
2784
+ )
2785
+ if finished is None:
2786
+ lines.append(
2787
+ "The previous run appears interrupted or crashed. Inspect the "
2788
+ "transcript, stderr logs, and any worktree changes before editing; "
2789
+ "keep useful partial work."
2790
+ )
2791
+ else:
2792
+ lines.append(
2793
+ "Use the saved session context and artifacts above, then continue "
2794
+ "with the next concrete step."
2795
+ )
2796
+ if user_note:
2797
+ lines += ["", "Human continuation instruction:", user_note]
2798
+ return "\n".join(lines)
2799
+
2800
+
2801
+ def build_continue_config(run_arg: str,
2802
+ args: argparse.Namespace,
2803
+ parser: argparse.ArgumentParser,
2804
+ stdin_cache: dict[str, str]) -> DuetConfig:
2805
+ run_dir = _resolve_run_dir(run_arg)
2806
+ if run_dir is None:
2807
+ parser.error(f"--continue: no such run dir or id: {run_arg}")
2808
+ state = _load_run_state(run_dir, parser, "--continue")
2809
+ agents = _agents_from_state(state, parser, "--continue")
2810
+ # Older runs (or runs that crashed before the first state.json roll) may
2811
+ # have Codex agents that already spoke but have no saved session_id. Without
2812
+ # a marker, run_duet would treat the next turn as a fresh `codex exec` and
2813
+ # lose the prior session. Plant the legacy "codex-current" sentinel so
2814
+ # call_codex resumes via `--last` keyed on cwd.
2815
+ history = state.get("history") or []
2816
+ if isinstance(history, list):
2817
+ codex_speakers = {item.get("agent") for item in history
2818
+ if isinstance(item, dict)}
2819
+ for agent in agents:
2820
+ if (agent.backend == "codex"
2821
+ and not agent.session_id
2822
+ and agent.name in codex_speakers):
2823
+ agent.session_id = "codex-current"
2824
+ cwd = pathlib.Path(state.get("cwd") or ".").expanduser().resolve()
2825
+ timeout = args.timeout
2826
+ user_note = _continue_note_from_args(args, cwd, timeout, parser, stdin_cache)
2827
+ next_idx = _next_speaker_idx_from_state(agents, state)
2828
+
2829
+ raw_worktree = args.worktree_path or state.get("worktree")
2830
+ if not raw_worktree:
2831
+ legacy_wt = run_dir / "wt"
2832
+ if legacy_wt.is_dir():
2833
+ raw_worktree = str(legacy_wt)
2834
+ worktree_path = (pathlib.Path(raw_worktree).expanduser().resolve()
2835
+ if raw_worktree else None)
2836
+ worktree_for = str(args.worktree_for or state.get("worktree_for") or "partner")
2837
+ kickoff = _default_continue_kickoff(
2838
+ run_dir, state, agents[next_idx], user_note, worktree_path
2839
+ )
2840
+ runs_dir = choose_runs_dir(args.runs_dir, cwd)
2841
+ return DuetConfig(
2842
+ cwd=cwd,
2843
+ agents=agents,
2844
+ task=state.get("task"),
2845
+ kickoff=kickoff,
2846
+ max_turns=args.turns,
2847
+ sentinel=args.sentinel,
2848
+ per_turn_timeout=timeout,
2849
+ runs_dir=runs_dir,
2850
+ sandbox=args.sandbox,
2851
+ permission_mode=args.permission_mode,
2852
+ dry_run=args.dry_run,
2853
+ recap=args.recap or bool(state.get("recap_path")),
2854
+ verify_cmd=normalize_verify_cmd(
2855
+ args.verify_cmd if args.verify_cmd is not None else state.get("verify_cmd"),
2856
+ parser,
2857
+ ),
2858
+ worktree=False,
2859
+ worktree_for=worktree_for,
2860
+ worktree_path=worktree_path,
2861
+ add_dirs=[pathlib.Path(d).expanduser().resolve() for d in args.add_dirs],
2862
+ reasoning=args.reasoning,
2863
+ codex_fast=bool(args.codex_fast),
2864
+ start_speaker_idx=next_idx,
2865
+ continue_from=str(run_dir),
2866
+ )
2867
+
2868
+
2869
+ def _humanize_age(seconds: int) -> str:
2870
+ if seconds < 60: return f"{seconds}s ago"
2871
+ if seconds < 3600: return f"{seconds // 60}m ago"
2872
+ if seconds < 86400: return f"{seconds // 3600}h ago"
2873
+ if seconds < 7 * 86400: return f"{seconds // 86400}d ago"
2874
+ return f"{seconds // 86400}d ago"
2875
+
2876
+
2877
+ def _last_activity_mtime(run_dir: pathlib.Path) -> Optional[float]:
2878
+ """Most recent mtime across state.json + per-turn .pid/.stderr.log files."""
2879
+ candidates = [run_dir / "state.json", *run_dir.glob("turn-*.pid"),
2880
+ *run_dir.glob("turn-*.stderr.log")]
2881
+ mtimes = []
2882
+ for c in candidates:
2883
+ try:
2884
+ mtimes.append(c.stat().st_mtime)
2885
+ except OSError:
2886
+ pass
2887
+ return max(mtimes) if mtimes else None
2888
+
2889
+
2890
+ def _classify_run(run_dir: pathlib.Path) -> tuple[str, str, dict]:
2891
+ """Returns (emoji, label, state_dict). Mirrors print_run_status's logic."""
2892
+ state_path = run_dir / "state.json"
2893
+ if not state_path.is_file():
2894
+ return ("❓", "no state.json", {})
2895
+ try:
2896
+ state = json.loads(state_path.read_text())
2897
+ except json.JSONDecodeError:
2898
+ return ("❓", "malformed state", {})
2899
+ finished = state.get("finished_reason")
2900
+ if finished:
2901
+ emoji, label = _LIST_STATUS_FINISHED.get(finished, ("✅", finished))
2902
+ return (emoji, label, state)
2903
+ # No finished_reason — running, between turns, or crashed.
2904
+ if list(run_dir.glob("turn-*.pid")):
2905
+ return ("🟢", "in-flight", state)
2906
+ pid = state.get("duet_pid")
2907
+ if pid is not None and _is_duet_process(int(pid)):
2908
+ return ("🟢", "between turns", state)
2909
+ if pid is not None:
2910
+ return ("⚠", "duet died", state)
2911
+ return ("⚠", "stuck (no pid)", state)
2912
+
2913
+
2914
+ def print_runs_list(explicit_path: Optional[pathlib.Path]) -> int:
2915
+ """`duet --list [PATH]` — print one row per run dir found."""
2916
+ if explicit_path is not None:
2917
+ roots = [explicit_path.expanduser().resolve()]
2918
+ else:
2919
+ roots = _default_list_paths()
2920
+ if not roots:
2921
+ print("[duet] no run dirs found.\n"
2922
+ " Searched ./runs/, ./.duet/runs/, and ~/.duet/runs/*/. "
2923
+ "Pass an explicit path: duet --list <DIR>", file=sys.stderr)
2924
+ return 0
2925
+
2926
+ rows: list[dict] = []
2927
+ # Dedupe by resolved real path so a run discovered via both a
2928
+ # cwd-relative root and a home-index symlink only shows once. Iter
2929
+ # order in `_default_list_paths()` puts cwd-relative roots first, so
2930
+ # the displayed `dir` column prefers the (usually more readable)
2931
+ # direct path over the symlink path.
2932
+ seen: set[pathlib.Path] = set()
2933
+ now = time.time()
2934
+ for root in roots:
2935
+ if not root.is_dir():
2936
+ print(f"[duet] {root}: not a directory", file=sys.stderr)
2937
+ continue
2938
+ for child in sorted(root.iterdir(), reverse=True):
2939
+ if not child.is_dir() or not _RUN_ID_RE.match(child.name):
2940
+ continue
2941
+ try:
2942
+ real = child.resolve()
2943
+ except OSError:
2944
+ continue
2945
+ if real in seen:
2946
+ continue
2947
+ seen.add(real)
2948
+ emoji, label, state = _classify_run(child)
2949
+ # Self-heal: backfill the home index for runs created before
2950
+ # `_register_run_in_home_index` shipped, or for runs whose
2951
+ # `--runs-dir` placed them outside the default tree. The
2952
+ # cwd is recorded in state.json (resolved-absolute by
2953
+ # main()), so we can compute the same slug used at creation
2954
+ # time. Idempotent; the helper swallows its own errors.
2955
+ state_cwd = state.get("cwd") if state else None
2956
+ if state_cwd:
2957
+ _register_run_in_home_index(child, pathlib.Path(state_cwd))
2958
+ mtime = _last_activity_mtime(child)
2959
+ age = _humanize_age(int(now - mtime)) if mtime else "—"
2960
+ history = state.get("history") or []
2961
+ turns_used = state.get("turns_used", len(history))
2962
+ rows.append({
2963
+ "emoji": emoji, "label": label,
2964
+ "id": child.name, "turns": turns_used,
2965
+ "age": age, "dir": str(child),
2966
+ })
2967
+
2968
+ if not rows:
2969
+ print(f"[duet] no runs found under: {', '.join(str(r) for r in roots)}",
2970
+ file=sys.stderr)
2971
+ return 0
2972
+
2973
+ rows.sort(key=lambda r: r["id"], reverse=True)
2974
+ # Column widths
2975
+ w_id = max(len("run id"), max(len(r["id"]) for r in rows))
2976
+ w_label = max(len("status"), max(len(r["label"]) for r in rows))
2977
+ w_turns = max(len("turns"), max(len(str(r["turns"])) for r in rows))
2978
+ w_age = max(len("activity"), max(len(r["age"]) for r in rows))
2979
+ print(f" {'':2} {'run id':<{w_id}} {'status':<{w_label}} "
2980
+ f"{'turns':<{w_turns}} {'activity':<{w_age}} dir")
2981
+ print(f" {'':2} {'-'*w_id} {'-'*w_label} {'-'*w_turns} "
2982
+ f"{'-'*w_age} ---")
2983
+ for r in rows:
2984
+ print(f" {r['emoji']:2} {r['id']:<{w_id}} {r['label']:<{w_label}} "
2985
+ f"{str(r['turns']):<{w_turns}} {r['age']:<{w_age}} {r['dir']}")
2986
+ print(f"\n {len(rows)} run(s). Per-run health: duet --status <run-id>")
2987
+ return 0
2988
+
2989
+
2990
+ def _build_arg_parser() -> argparse.ArgumentParser:
2991
+ ap = argparse.ArgumentParser(
2992
+ description="duet — two CLI agents in conversation, with per-agent session memory.")
2993
+ ap.add_argument("--resume-claude", metavar="SESSION_ID",
2994
+ help="resume an existing Claude session id; harness will pull "
2995
+ "its latest message and feed it to the partner agent.")
2996
+ ap.add_argument("--resume-codex", metavar="SESSION_ID",
2997
+ help="(advanced) seed codex with an existing session id.")
2998
+ ap.add_argument("--continue", metavar="RUN_DIR_OR_ID", dest="continue_run",
2999
+ help="start a new run from an existing run's state.json: "
3000
+ "restore agents/session ids, reuse its worktree when "
3001
+ "available, and send the next agent a continuation kickoff. "
3002
+ "--task/--kickoff/--task-from-cmd may add optional guidance.")
3003
+ ap.add_argument("--task", help="task description, @file, or @- stdin "
3004
+ "(used if no --resume-* and no --kickoff)")
3005
+ ap.add_argument("--kickoff", help="explicit first message, @file, or @- stdin "
3006
+ "to send to the partner agent")
3007
+ ap.add_argument("--task-from-cmd", metavar="CMD",
3008
+ help="run shell command with cwd=--cwd and use stdout as the task")
3009
+ ap.add_argument("--partner", default="codex:coder",
3010
+ help="partner agent spec, e.g. codex:coder, claude:reviewer (default codex:coder)")
3011
+ ap.add_argument("--lead", default="claude:planner",
3012
+ help="lead agent spec, e.g. claude:planner (default; ignored if --resume-claude given)")
3013
+ ap.add_argument("--cwd", default=".", help="working dir for both agents")
3014
+ ap.add_argument("--turns", type=int, default=DEFAULT_TURNS, help=f"max turns (default {DEFAULT_TURNS})")
3015
+ ap.add_argument("--sentinel", default=DEFAULT_SENTINEL,
3016
+ help="convergence sentinel; requires an LGTM rationale and "
3017
+ "back-to-back proposals from both agents")
3018
+ ap.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT, help="per-turn timeout seconds")
3019
+ ap.add_argument("--verify-cmd", metavar="CMD", default=None,
3020
+ help="shell command that must exit 0 before a convergence "
3021
+ "proposal can count. Runs only for valid LGTM+rationale "
3022
+ "proposals; YAML key: `verify_cmd:`.")
3023
+ ap.add_argument("--runs-dir", default=None, help="where to save transcripts")
3024
+ ap.add_argument("--sandbox", default="workspace-write",
3025
+ help="codex --sandbox: read-only|workspace-write|danger-full-access")
3026
+ ap.add_argument("--permission-mode", default="acceptEdits",
3027
+ help="claude --permission-mode: default|acceptEdits|plan|bypassPermissions")
3028
+ ap.add_argument("--config", help="optional YAML/JSON config (overrides flags except --resume-*)")
3029
+ ap.add_argument("--worktree", action="store_true",
3030
+ help="run the partner agent in a throwaway git worktree on a fresh branch; "
3031
+ "the worktree is left intact at the end so you can review/merge/drop it.")
3032
+ ap.add_argument("--worktree-for", choices=["partner", "lead"], default=None,
3033
+ help="which agent runs in the worktree (default: partner)")
3034
+ ap.add_argument("--worktree-path", metavar="PATH", default=None,
3035
+ help="reuse an EXISTING worktree (e.g. from a previous cancelled run). "
3036
+ "Codex resumes via the saved session UUID (or `--last` for "
3037
+ "older runs); cwd is preserved either way. Skips git "
3038
+ "worktree creation. Mutually exclusive with --worktree.")
3039
+ ap.add_argument("--worktree-root", metavar="PATH", default=None,
3040
+ help="parent directory for newly-created worktrees (used with --worktree). "
3041
+ "Each run lands at <PATH>/<run_id>/. Default: <runs_dir>/<run_id>/wt/, "
3042
+ "which is durable across reboots and OS temp-dir cleaners. "
3043
+ "Pass /tmp or $TMPDIR to mimic the pre-fix throwaway behavior.")
3044
+ ap.add_argument("--add-dir", action="append", metavar="PATH", default=[],
3045
+ dest="add_dirs",
3046
+ help="extra path claude is allowed to read/write outside cwd. "
3047
+ "Repeatable. Without this, tasks that touch ../foo or "
3048
+ "absolute paths outside --cwd silently fail with a "
3049
+ "permission error. YAML key: `add_dirs:` (list).")
3050
+ ap.add_argument("--reasoning", choices=REASONING_LEVELS, default=None,
3051
+ help="reasoning effort for both agents. Codex: passes "
3052
+ "`-c model_reasoning_effort=<v>` except for medium "
3053
+ "(max → xhigh). Claude: passes `--effort <v>` "
3054
+ "(minimal → low) and adds high/xhigh/max prompt nudges.")
3055
+ ap.add_argument("--codex-fast", action="store_true", dest="codex_fast",
3056
+ help="Codex-only fast mode: pin codex coder turns to "
3057
+ "`model_reasoning_effort=low` and "
3058
+ "`model_reasoning_summary=concise`, regardless of "
3059
+ "--reasoning / per-agent reasoning_effort. Trades "
3060
+ "depth for latency on codex coder turns; claude is "
3061
+ "unaffected, so `--reasoning high --codex-fast` is "
3062
+ "a real and useful combo. YAML key: `codex_fast: true`.")
3063
+ ap.add_argument("--status", metavar="RUN_DIR_OR_ID", default=None,
3064
+ help="don't run a duet — instead print a one-shot health "
3065
+ "summary of an existing run and exit. Accepts a path "
3066
+ "(absolute or relative) OR a bare run id like "
3067
+ "`20260507-082801` (resolved against the same "
3068
+ "default paths as `--list`: ./runs/, ./.duet/runs/, "
3069
+ "~/.duet/runs/*/). Exit codes: 0=done, 1=running, "
3070
+ "2=stuck/crashed, 3=error.")
3071
+ ap.add_argument("--list", metavar="PATH", nargs="?", const="__defaults__",
3072
+ default=None, dest="list_runs",
3073
+ help="don't run a duet — instead list runs found under "
3074
+ "PATH (or under the default search paths if PATH is "
3075
+ "omitted: ./runs/, ./.duet/runs/, ~/.duet/runs/*/). "
3076
+ "Each row shows status, turns_used, last-activity "
3077
+ "age, and dir. Pair with `--status <run-id>` to drill "
3078
+ "into a specific run.")
3079
+ ap.add_argument("--quiet", action="store_true",
3080
+ help="don't mirror subprocess stderr to your terminal in real-time. "
3081
+ "By default, duet prints Codex's live progress as it works.")
3082
+ ap.add_argument("--recap", action="store_true",
3083
+ help="compact per-turn debug view; suppresses live stderr mirror "
3084
+ "and writes recap.md next to transcript.md")
3085
+ ap.add_argument("--dry-run", action="store_true", help="don't actually call CLIs")
3086
+ return ap
3087
+
3088
+
3089
+ def _resolve_opt_path(*candidates: object) -> Optional[pathlib.Path]:
3090
+ """First truthy candidate as an expanded, resolved path; None if all empty.
3091
+ Lets CLI flags take precedence over config-file values for the same path."""
3092
+ for c in candidates:
3093
+ if c:
3094
+ return pathlib.Path(str(c)).expanduser().resolve()
3095
+ return None
3096
+
3097
+
3098
+ def _build_cfg_from_yaml(args: argparse.Namespace, ap: argparse.ArgumentParser,
3099
+ stdin_cache: dict) -> DuetConfig:
3100
+ """Build a DuetConfig from a --config YAML/JSON file. CLI flags only fill
3101
+ seed inputs (task/kickoff) when the file specifies none, and a handful of
3102
+ flags (runs_dir, verify_cmd, worktree*, recap, reasoning, codex_fast)
3103
+ override or OR with their file values; --resume-* still apply on top."""
3104
+ raw = load_yaml_or_json(pathlib.Path(args.config))
3105
+ cfg_cwd = pathlib.Path(raw.get("cwd", ".")).expanduser().resolve()
3106
+ cfg_timeout = int(raw.get("per_turn_timeout", DEFAULT_TIMEOUT))
3107
+ raw_task = raw.get("task")
3108
+ raw_kickoff = raw.get("kickoff")
3109
+ raw_task_from_cmd = raw.get("task_from_cmd")
3110
+ if raw_task is None and raw_kickoff is None and raw_task_from_cmd is None:
3111
+ raw_task = args.task
3112
+ raw_kickoff = args.kickoff
3113
+ raw_task_from_cmd = args.task_from_cmd
3114
+ task, kickoff = resolve_seed_inputs(
3115
+ task=raw_task,
3116
+ kickoff=raw_kickoff,
3117
+ task_from_cmd=raw_task_from_cmd,
3118
+ cwd=cfg_cwd,
3119
+ timeout=cfg_timeout,
3120
+ parser=ap,
3121
+ stdin_cache=stdin_cache,
3122
+ )
3123
+ raw_runs_dir = args.runs_dir if args.runs_dir is not None else raw.get("runs_dir")
3124
+ # Build agents before verify_cmd so a bad agent field surfaces first (parity
3125
+ # with the pre-refactor order) when a config is invalid in several ways.
3126
+ agents = [Agent(**a) for a in raw.get("agents", [])]
3127
+ verify_cmd = normalize_verify_cmd(
3128
+ args.verify_cmd if args.verify_cmd is not None else raw.get("verify_cmd"),
3129
+ ap,
3130
+ )
3131
+ cfg = DuetConfig(
3132
+ cwd=cfg_cwd,
3133
+ agents=agents,
3134
+ task=task,
3135
+ kickoff=kickoff,
3136
+ max_turns=int(raw.get("max_turns", DEFAULT_TURNS)),
3137
+ sentinel=raw.get("sentinel", DEFAULT_SENTINEL),
3138
+ per_turn_timeout=cfg_timeout,
3139
+ runs_dir=choose_runs_dir(raw_runs_dir, cfg_cwd),
3140
+ sandbox=raw.get("sandbox", "workspace-write"),
3141
+ permission_mode=raw.get("permission_mode", "acceptEdits"),
3142
+ dry_run=bool(raw.get("dry_run", False)),
3143
+ recap=bool(raw.get("recap", False)) or args.recap,
3144
+ verify_cmd=verify_cmd,
3145
+ worktree=bool(raw.get("worktree", False)) or args.worktree,
3146
+ worktree_for=raw.get("worktree_for") or args.worktree_for or "partner",
3147
+ worktree_path=_resolve_opt_path(args.worktree_path, raw.get("worktree_path")),
3148
+ worktree_root=_resolve_opt_path(args.worktree_root, raw.get("worktree_root")),
3149
+ add_dirs=[
3150
+ pathlib.Path(d).expanduser().resolve()
3151
+ for d in (args.add_dirs or raw.get("add_dirs", []))
3152
+ ],
3153
+ reasoning=args.reasoning or raw.get("reasoning"),
3154
+ codex_fast=bool(args.codex_fast or raw.get("codex_fast", False)),
3155
+ )
3156
+ cfg.agents = apply_resume_overrides(
3157
+ cfg.agents,
3158
+ resume_claude=args.resume_claude,
3159
+ resume_codex=args.resume_codex,
3160
+ )
3161
+ return cfg
3162
+
3163
+
3164
+ def _build_cfg_from_cli(args: argparse.Namespace, ap: argparse.ArgumentParser,
3165
+ stdin_cache: dict) -> DuetConfig:
3166
+ """Build a DuetConfig from --lead/--partner and the plain CLI flags.
3167
+
3168
+ Agents come from the specs, then --resume-* are attached to the matching
3169
+ backend (rename_slots=True) so an explicit topology that puts a resumed
3170
+ agent in the "wrong" slot still routes its session id correctly.
3171
+ """
3172
+ cfg_cwd = pathlib.Path(args.cwd).expanduser().resolve()
3173
+ task, kickoff = resolve_seed_inputs(
3174
+ task=args.task,
3175
+ kickoff=args.kickoff,
3176
+ task_from_cmd=args.task_from_cmd,
3177
+ cwd=cfg_cwd,
3178
+ timeout=args.timeout,
3179
+ parser=ap,
3180
+ stdin_cache=stdin_cache,
3181
+ )
3182
+ agents = apply_resume_overrides(
3183
+ [parse_partner(args.lead, default_role="planner"),
3184
+ parse_partner(args.partner, default_role="coder")],
3185
+ resume_claude=args.resume_claude,
3186
+ resume_codex=args.resume_codex,
3187
+ rename_slots=True,
3188
+ )
3189
+ return DuetConfig(
3190
+ cwd=cfg_cwd,
3191
+ agents=agents,
3192
+ task=task,
3193
+ kickoff=kickoff,
3194
+ max_turns=args.turns,
3195
+ sentinel=args.sentinel,
3196
+ per_turn_timeout=args.timeout,
3197
+ runs_dir=choose_runs_dir(args.runs_dir, cfg_cwd),
3198
+ sandbox=args.sandbox,
3199
+ permission_mode=args.permission_mode,
3200
+ dry_run=args.dry_run,
3201
+ recap=args.recap,
3202
+ verify_cmd=normalize_verify_cmd(args.verify_cmd, ap),
3203
+ worktree=args.worktree,
3204
+ worktree_for=args.worktree_for or "partner",
3205
+ worktree_path=_resolve_opt_path(args.worktree_path),
3206
+ worktree_root=_resolve_opt_path(args.worktree_root),
3207
+ add_dirs=[pathlib.Path(d).expanduser().resolve() for d in args.add_dirs],
3208
+ reasoning=args.reasoning,
3209
+ codex_fast=bool(args.codex_fast),
3210
+ )
3211
+
3212
+
3213
+ def _warn_codex_fast_scope(cfg: DuetConfig) -> None:
3214
+ """Warn (and disable) when --codex-fast can't apply, or note partial scope.
3215
+
3216
+ Fast mode only affects codex:coder agents (see call_agent). Surfacing the
3217
+ scope here means `--codex-fast --lead codex:planner` gets a loud signal
3218
+ instead of silently running the planner at low effort.
3219
+ """
3220
+ if not cfg.codex_fast:
3221
+ return
3222
+ codex_agents = [a for a in cfg.agents if a.backend == "codex"]
3223
+ codex_coders = [a for a in codex_agents if a.role == "coder"]
3224
+ codex_non_coders = [a for a in codex_agents if a.role != "coder"]
3225
+ if not codex_coders:
3226
+ print(
3227
+ "[duet] WARNING: --codex-fast had no effect — "
3228
+ "no codex agent has role=coder in this duet. "
3229
+ "Fast mode applies only to codex:coder; set per-agent "
3230
+ "`reasoning_effort: low` if you really want fast on a "
3231
+ "non-coder role.",
3232
+ file=sys.stderr,
3233
+ )
3234
+ cfg.codex_fast = False
3235
+ elif codex_non_coders:
3236
+ roles = ", ".join(f"{a.name}({a.role})" for a in codex_non_coders)
3237
+ print(
3238
+ f"[duet] note: --codex-fast applies only to codex:coder; "
3239
+ f"non-coder codex agents [{roles}] keep their normal "
3240
+ f"reasoning effort.",
3241
+ file=sys.stderr,
3242
+ )
3243
+
3244
+
3245
+ def main() -> int:
3246
+ ap = _build_arg_parser()
3247
+ args = ap.parse_args()
3248
+
3249
+ # `--status` is read-only: print run health and exit. Skip everything below.
3250
+ if args.status:
3251
+ return print_run_status(args.status)
3252
+
3253
+ # `--list` is read-only: print the run-dir table and exit.
3254
+ if args.list_runs is not None:
3255
+ explicit = (None if args.list_runs == "__defaults__"
3256
+ else pathlib.Path(args.list_runs))
3257
+ return print_runs_list(explicit)
3258
+
3259
+ if args.worktree and args.worktree_path:
3260
+ ap.error("--worktree and --worktree-path are mutually exclusive")
3261
+ if args.continue_run and args.config:
3262
+ ap.error("--continue and --config are mutually exclusive")
3263
+ if args.continue_run and (args.resume_claude or args.resume_codex):
3264
+ ap.error("--continue restores session ids from state.json; do not also pass --resume-*")
3265
+ if args.continue_run and args.worktree:
3266
+ ap.error("--continue reuses the saved worktree; use --worktree-path to override it")
3267
+
3268
+ # Live-stream subprocess stderr unless --quiet
3269
+ global LIVE_STREAM
3270
+ LIVE_STREAM = not args.quiet
3271
+
3272
+ stdin_cache: dict[str, str] = {}
3273
+ if args.continue_run:
3274
+ cfg = build_continue_config(args.continue_run, args, ap, stdin_cache)
3275
+ print(f"[duet] continuing run {args.continue_run} "
3276
+ f"(next: {cfg.agents[cfg.start_speaker_idx].name})")
3277
+ elif args.config:
3278
+ cfg = _build_cfg_from_yaml(args, ap, stdin_cache)
3279
+ else:
3280
+ cfg = _build_cfg_from_cli(args, ap, stdin_cache)
3281
+
3282
+ validate_config(cfg, ap)
3283
+ validate_reasoning(cfg.reasoning, "config reasoning")
3284
+ for agent in cfg.agents:
3285
+ validate_reasoning(agent.reasoning_effort, f"agent {agent.name} reasoning_effort")
3286
+ if cfg.worktree and cfg.worktree_path:
3287
+ raise SystemExit("--worktree and --worktree-path/worktree_path are mutually exclusive")
3288
+
3289
+ _warn_codex_fast_scope(cfg)
3290
+
3291
+ # Sanity: are CLIs on PATH?
3292
+ if not cfg.dry_run:
3293
+ for b in {a.backend for a in cfg.agents}:
3294
+ if shutil.which(b) is None:
3295
+ print(f"[duet] WARNING: '{b}' not on PATH — this run will fail. "
3296
+ f"Install it or use --dry-run.", file=sys.stderr)
3297
+
3298
+ run_duet(cfg)
3299
+ return 0
3300
+
3301
+
3302
+ if __name__ == "__main__":
3303
+ sys.exit(main())