duet-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duet.py
ADDED
|
@@ -0,0 +1,3303 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
duet.py — two CLI agents in conversation, with per-agent session memory.
|
|
4
|
+
|
|
5
|
+
Workflow this is built for:
|
|
6
|
+
|
|
7
|
+
1. You start `claude` interactively, work out a plan, exit.
|
|
8
|
+
Claude prints (or you grab) a session id like 106c1c57-ca42-473f-b2f1-1ea764f78c46.
|
|
9
|
+
2. You hand it to duet:
|
|
10
|
+
|
|
11
|
+
./duet.py --resume-claude 106c1c57-ca42-473f-b2f1-1ea764f78c46 \
|
|
12
|
+
--partner codex:coder \
|
|
13
|
+
--cwd ~/code/myrepo \
|
|
14
|
+
--turns 10
|
|
15
|
+
|
|
16
|
+
3. duet pulls Claude's latest message from that session, feeds it to Codex.
|
|
17
|
+
Codex replies. duet feeds Codex's reply back to Claude (with --resume so
|
|
18
|
+
Claude remembers the whole prior conversation). Ping-pong until both
|
|
19
|
+
agents propose convergence with an LGTM rationale and <<<LGTM>>> in
|
|
20
|
+
back-to-back turns, --turns is hit, or you Ctrl-C.
|
|
21
|
+
|
|
22
|
+
Each agent keeps its own session across turns:
|
|
23
|
+
- Claude: `claude -p --resume <session_id> --output-format json` — we capture
|
|
24
|
+
`session_id` from the JSON wrapper and reuse it.
|
|
25
|
+
- Codex: first turn `codex exec ...`; subsequent turns `codex exec resume <uuid>`
|
|
26
|
+
when we parsed a session id from Codex's stderr, or `codex exec resume --last`
|
|
27
|
+
in the same cwd as a fallback for builds that don't print one. Pinning the
|
|
28
|
+
UUID makes resume robust to parallel Codex sessions sharing the cwd, but
|
|
29
|
+
`--last` is still keyed on cwd — use `--worktree` to isolate duet's Codex
|
|
30
|
+
cwd from the host repo when no UUID is available.
|
|
31
|
+
|
|
32
|
+
Transcript is always logged to runs/<ts>/transcript.md for humans, but each
|
|
33
|
+
prompt sent to an agent is just the latest counterpart message — keeping
|
|
34
|
+
prompts small and letting each side rely on its own session memory.
|
|
35
|
+
|
|
36
|
+
Stdlib only. Python 3.9+.
|
|
37
|
+
"""
|
|
38
|
+
from __future__ import annotations
|
|
39
|
+
|
|
40
|
+
import argparse
|
|
41
|
+
import dataclasses
|
|
42
|
+
import datetime as dt
|
|
43
|
+
import json
|
|
44
|
+
import os
|
|
45
|
+
import pathlib
|
|
46
|
+
import re
|
|
47
|
+
import shlex
|
|
48
|
+
import shutil
|
|
49
|
+
import signal
|
|
50
|
+
import subprocess
|
|
51
|
+
import sys
|
|
52
|
+
import textwrap
|
|
53
|
+
import threading
|
|
54
|
+
import time
|
|
55
|
+
from typing import Callable, Optional
|
|
56
|
+
|
|
57
|
+
# ---------- defaults ----------
|
|
58
|
+
|
|
59
|
+
DEFAULT_SENTINEL = "<<<LGTM>>>"
|
|
60
|
+
DEFAULT_TURNS = 2
|
|
61
|
+
DEFAULT_TIMEOUT = 60 * 15
|
|
62
|
+
TASK_MAX_CHARS = 512 * 1024
|
|
63
|
+
CONVERGENCE_RATIONALE_MIN_CHARS = 20
|
|
64
|
+
VERIFY_OUTPUT_TAIL_CHARS = 4000
|
|
65
|
+
VERIFY_LIVE_PREFIX = " │ [verify] "
|
|
66
|
+
SUPPORTED_BACKENDS = {"claude", "codex"}
|
|
67
|
+
WORKTREE_FOR_CHOICES = {"lead", "partner"}
|
|
68
|
+
FINISHED_CONVERGED = "converged"
|
|
69
|
+
FINISHED_CONVERGED_AFTER_FORCE = "converged_after_force"
|
|
70
|
+
FINISHED_FORCED_CONTINUATION = "forced_continuation"
|
|
71
|
+
FINISHED_MAX_TURNS = "max_turns"
|
|
72
|
+
FINISHED_FORCE_STOP = "force_stop"
|
|
73
|
+
FINISHED_TIMEOUT = "timeout"
|
|
74
|
+
FINISHED_AGENT_ERROR = "agent_error"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class AgentRunError(RuntimeError):
|
|
78
|
+
"""Agent/backend failure that should finish the run with a specific reason."""
|
|
79
|
+
|
|
80
|
+
def __init__(self, finished_reason: str, message: str) -> None:
|
|
81
|
+
super().__init__(message)
|
|
82
|
+
self.finished_reason = finished_reason
|
|
83
|
+
|
|
84
|
+
RECAP_ADDENDUM = """Format requirement (debug tooling reads these):
|
|
85
|
+
|
|
86
|
+
Begin every reply with three header lines, then a blank line, then your full reply:
|
|
87
|
+
|
|
88
|
+
RECAP: <one short sentence describing what you produced this turn>
|
|
89
|
+
FILES: <comma-separated paths you touched or referenced, or "none">
|
|
90
|
+
STATUS: <one of: planning | implementing | reviewing | requesting-changes | ready-for-review | converged>
|
|
91
|
+
|
|
92
|
+
The headers DO NOT replace your reply — write your normal answer as usual after the blank line. Use STATUS: converged only when you would also emit the convergence sentinel with an LGTM rationale."""
|
|
93
|
+
|
|
94
|
+
CONVERGENCE_INSTRUCTION = (
|
|
95
|
+
"Convergence requires pair agreement, not just the sentinel. When you "
|
|
96
|
+
"believe the loop should stop, include a concise `LGTM rationale:` line or "
|
|
97
|
+
"paragraph that explains why the result satisfies the task, what you "
|
|
98
|
+
"checked, and any remaining low-risk follow-ups; then put {SENTINEL} on "
|
|
99
|
+
"its own line. A bare sentinel without that rationale is ignored. If your "
|
|
100
|
+
"partner proposed convergence and you disagree with the rationale, do not "
|
|
101
|
+
"emit the sentinel; explain the gap and ask for another round."
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
ROLE_PROMPTS = {
|
|
105
|
+
"planner": (
|
|
106
|
+
"You are the PLANNER half of a duet. Read the partner agent's latest "
|
|
107
|
+
"message and propose or refine a plan. Be concrete: file names, "
|
|
108
|
+
"functions, edge cases. You may also write or edit non-code "
|
|
109
|
+
"deliverables yourself when the task asks for them — synthesis "
|
|
110
|
+
"documents, reports, comparison matrices, configuration, README "
|
|
111
|
+
"updates, dashboards, etc. What you should NOT do is write production "
|
|
112
|
+
"feature code (that's the coder's job). When you believe the work is "
|
|
113
|
+
"fully done and reviewed, follow the convergence instructions."
|
|
114
|
+
),
|
|
115
|
+
"coder": (
|
|
116
|
+
"You are the CODER half of a duet. Read the partner agent's latest "
|
|
117
|
+
"message (typically a plan or critique) and produce code. Apply edits "
|
|
118
|
+
"to disk. Run quick checks where reasonable. Summarise what you "
|
|
119
|
+
"changed. When you believe the work is fully done, follow the "
|
|
120
|
+
"convergence instructions."
|
|
121
|
+
),
|
|
122
|
+
"reviewer": (
|
|
123
|
+
"You are the REVIEWER half of a duet. Read the partner agent's "
|
|
124
|
+
"latest message and critically evaluate it: bugs, missing tests, "
|
|
125
|
+
"security, simpler designs. When reviewing concrete code changes, "
|
|
126
|
+
"inspect the actual files, diffs, and test output rather than relying "
|
|
127
|
+
"only on the partner's summary. Be specific and brief. If the work "
|
|
128
|
+
"meets the task and you have no material issues, follow the "
|
|
129
|
+
"convergence instructions."
|
|
130
|
+
),
|
|
131
|
+
"triage-reviewer": (
|
|
132
|
+
"You are the TRIAGE REVIEWER half of a duet. Read the partner agent's "
|
|
133
|
+
"latest message and critically evaluate it: bugs, missing tests, "
|
|
134
|
+
"security, simpler designs. Score every finding with [P0], [P1], "
|
|
135
|
+
"[P2], or [P3]. Default to [P3]; promote only when the impact is "
|
|
136
|
+
"concrete. [P0] means a correctness, security, data-loss, or shipped-"
|
|
137
|
+
"check blocker. [P1] means a real bug, logic gap, or missing edge case "
|
|
138
|
+
"that should block this loop. [P2] means a small bug, polish issue, "
|
|
139
|
+
"or naming/readability fix that is nice to handle. [P3] means a "
|
|
140
|
+
"follow-up, future refactor, or scope creep. When reviewing concrete "
|
|
141
|
+
"code changes, inspect the actual files, diffs, and test output rather "
|
|
142
|
+
"than relying only on the partner's summary. Be specific and brief. "
|
|
143
|
+
"If the coder reasonably argues a finding is over-scored, either "
|
|
144
|
+
"accept the lower score or explain why the higher score still applies. "
|
|
145
|
+
"Emit convergence only when no unfixed [P0] or [P1] findings remain. "
|
|
146
|
+
"When only [P2]/[P3] items remain, move them to a Follow-ups section "
|
|
147
|
+
"and follow the convergence instructions."
|
|
148
|
+
),
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
# Tiny request used to extract Claude's most recent message when we resume
|
|
152
|
+
# from an existing session id, so we have something to hand to the partner.
|
|
153
|
+
EXTRACT_LATEST_PROMPT = (
|
|
154
|
+
"[duet harness] I'm about to hand your most recent plan/answer to a "
|
|
155
|
+
"partner coding agent. Please reproduce that plan/answer in full as "
|
|
156
|
+
"your reply now. Reply with the message text only — no preamble, no "
|
|
157
|
+
"framing, no commentary about this request."
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# User-facing reasoning levels accepted by --reasoning / `reasoning:` in YAML.
|
|
161
|
+
# These are the *duet abstraction*; per-backend translation happens below so
|
|
162
|
+
# users can choose the common `xhigh` level directly while still getting useful
|
|
163
|
+
# aliases for backend-specific gaps (`minimal` for Codex, `max` for Claude).
|
|
164
|
+
REASONING_LEVELS = ["minimal", "low", "medium", "high", "xhigh", "max"]
|
|
165
|
+
|
|
166
|
+
# Claude Code exposes thinking control through `--effort`. We still add small
|
|
167
|
+
# prompt nudges for high/xhigh/max because they are useful natural-language
|
|
168
|
+
# guidance. `ultrathink` is a recognized one-turn in-context nudge in current
|
|
169
|
+
# Claude Code; the CLI flag below remains the authoritative effort control.
|
|
170
|
+
CLAUDE_REASONING_PROMPT_PREFIX = {
|
|
171
|
+
"minimal": "",
|
|
172
|
+
"low": "",
|
|
173
|
+
"medium": "",
|
|
174
|
+
"high": "think hard and reason step-by-step before answering. Cover edge cases.\n\n",
|
|
175
|
+
"xhigh": "think very hard and reason carefully before answering. Cover "
|
|
176
|
+
"edge cases, alternatives, and risks.\n\n",
|
|
177
|
+
"max": "ultrathink — reason exhaustively before answering. Enumerate edge "
|
|
178
|
+
"cases, alternatives, and risks. Do not skim.\n\n",
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
# Claude Code `--effort` accepts low, medium, high, xhigh, max. The duet
|
|
182
|
+
# abstraction has `minimal` for Codex, so Claude maps that user-facing value to
|
|
183
|
+
# its lowest documented level.
|
|
184
|
+
CLAUDE_REASONING_MAP = {
|
|
185
|
+
"minimal": "low",
|
|
186
|
+
"low": "low",
|
|
187
|
+
"medium": "medium",
|
|
188
|
+
"high": "high",
|
|
189
|
+
"xhigh": "xhigh",
|
|
190
|
+
"max": "max",
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
# Codex CLI takes a config override `-c model_reasoning_effort=<value>`.
|
|
194
|
+
# Its accepted values, lowest→highest, are: minimal, low, medium, high, xhigh.
|
|
195
|
+
# We also map duet's `max` alias to Codex's `xhigh` because Codex does not
|
|
196
|
+
# document a separate `max` effort value.
|
|
197
|
+
CODEX_REASONING_MAP = {
|
|
198
|
+
"minimal": "minimal",
|
|
199
|
+
"low": "low",
|
|
200
|
+
"medium": "medium",
|
|
201
|
+
"high": "high",
|
|
202
|
+
"xhigh": "xhigh",
|
|
203
|
+
"max": "xhigh",
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def validate_reasoning(value: Optional[str], context: str) -> None:
|
|
208
|
+
if value is not None and value not in REASONING_LEVELS:
|
|
209
|
+
choices = "|".join(REASONING_LEVELS)
|
|
210
|
+
raise SystemExit(f"bad reasoning value for {context}: {value!r}; expected {choices}")
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def effective_reasoning(agent: Agent, cfg_reasoning: Optional[str]) -> Optional[str]:
|
|
214
|
+
return agent.reasoning_effort or cfg_reasoning
|
|
215
|
+
|
|
216
|
+
# ---------- data classes ----------
|
|
217
|
+
|
|
218
|
+
@dataclasses.dataclass
|
|
219
|
+
class Agent:
|
|
220
|
+
name: str
|
|
221
|
+
backend: str # "claude" or "codex"
|
|
222
|
+
role: str = "coder" # planner | coder | reviewer | triage-reviewer | custom
|
|
223
|
+
role_prompt: Optional[str] = None
|
|
224
|
+
model: Optional[str] = None
|
|
225
|
+
session_id: Optional[str] = None # tracked across turns
|
|
226
|
+
extra_args: list[str] = dataclasses.field(default_factory=list)
|
|
227
|
+
cwd_override: Optional[pathlib.Path] = None # set when this agent runs in a git worktree
|
|
228
|
+
reasoning_effort: Optional[str] = None # one of REASONING_LEVELS; overrides cfg.reasoning
|
|
229
|
+
|
|
230
|
+
def system_prompt(self, sentinel: str, recap: bool = False) -> str:
|
|
231
|
+
tmpl = self.role_prompt or ROLE_PROMPTS.get(self.role)
|
|
232
|
+
if tmpl is None:
|
|
233
|
+
raise SystemExit(f"unknown role '{self.role}' for agent '{self.name}' — "
|
|
234
|
+
"supply role_prompt to override")
|
|
235
|
+
# str.replace, not str.format — role prompts often contain literal
|
|
236
|
+
# `{...}` (JSON schema, code samples, jq patterns). format() would
|
|
237
|
+
# parse those as format fields and crash with "unexpected '{' in
|
|
238
|
+
# field name". replace handles them as plain text.
|
|
239
|
+
prompt = tmpl.replace("{SENTINEL}", sentinel)
|
|
240
|
+
prompt += "\n\n" + CONVERGENCE_INSTRUCTION.replace("{SENTINEL}", sentinel)
|
|
241
|
+
if recap:
|
|
242
|
+
prompt += "\n\n" + RECAP_ADDENDUM
|
|
243
|
+
return prompt
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def agent_state(a: Agent) -> dict:
|
|
247
|
+
data = {
|
|
248
|
+
"name": a.name,
|
|
249
|
+
"backend": a.backend,
|
|
250
|
+
"role": a.role,
|
|
251
|
+
"session_id": a.session_id,
|
|
252
|
+
}
|
|
253
|
+
if a.role_prompt is not None:
|
|
254
|
+
data["role_prompt"] = a.role_prompt
|
|
255
|
+
if a.model is not None:
|
|
256
|
+
data["model"] = a.model
|
|
257
|
+
if a.extra_args:
|
|
258
|
+
data["extra_args"] = a.extra_args
|
|
259
|
+
if a.reasoning_effort is not None:
|
|
260
|
+
data["reasoning_effort"] = a.reasoning_effort
|
|
261
|
+
return data
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
@dataclasses.dataclass
|
|
265
|
+
class DuetConfig:
|
|
266
|
+
cwd: pathlib.Path
|
|
267
|
+
agents: list[Agent] # exactly 2 for now
|
|
268
|
+
task: Optional[str] = None # used if no resume seed
|
|
269
|
+
kickoff: Optional[str] = None # explicit first message to partner
|
|
270
|
+
max_turns: int = DEFAULT_TURNS
|
|
271
|
+
sentinel: str = DEFAULT_SENTINEL
|
|
272
|
+
per_turn_timeout: int = DEFAULT_TIMEOUT
|
|
273
|
+
runs_dir: pathlib.Path = pathlib.Path("runs")
|
|
274
|
+
sandbox: str = "workspace-write" # codex
|
|
275
|
+
permission_mode: str = "acceptEdits" # claude
|
|
276
|
+
dry_run: bool = False
|
|
277
|
+
recap: bool = False
|
|
278
|
+
verify_cmd: Optional[str] = None # shell command that must pass before
|
|
279
|
+
# a convergence proposal can count
|
|
280
|
+
worktree: bool = False # run partner in a throwaway git worktree
|
|
281
|
+
worktree_for: str = "partner" # "partner" (idx 1) or "lead" (idx 0)
|
|
282
|
+
worktree_path: Optional[pathlib.Path] = None # reuse an existing worktree (for resume)
|
|
283
|
+
worktree_root: Optional[pathlib.Path] = None # parent dir for new worktrees;
|
|
284
|
+
# default = <run_dir>/wt (durable, gitignored)
|
|
285
|
+
add_dirs: list[pathlib.Path] = dataclasses.field(default_factory=list)
|
|
286
|
+
# extra `--add-dir` paths for claude — needed
|
|
287
|
+
# when the task reads/writes outside cwd
|
|
288
|
+
# (e.g. ../DECISION.md). Without these claude
|
|
289
|
+
# silently refuses paths outside cwd.
|
|
290
|
+
reasoning: Optional[str] = None # default reasoning effort for both agents
|
|
291
|
+
codex_fast: bool = False # Codex-only "fast mode": pin reasoning to
|
|
292
|
+
# low and add `model_reasoning_summary=concise`
|
|
293
|
+
# for codex coder turns this run, regardless of
|
|
294
|
+
# cfg.reasoning / agent.reasoning_effort. Claude's
|
|
295
|
+
# effort is untouched, so `--reasoning high
|
|
296
|
+
# --codex-fast` keeps the planner deep and the
|
|
297
|
+
# coder snappy.
|
|
298
|
+
start_speaker_idx: int = 1 # default loop starts with partner replying
|
|
299
|
+
continue_from: Optional[str] = None # prior run dir/id when created by --continue
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def _config_error(message: str,
|
|
303
|
+
parser: Optional[argparse.ArgumentParser] = None) -> None:
|
|
304
|
+
if parser is not None:
|
|
305
|
+
parser.error(message)
|
|
306
|
+
raise SystemExit(message)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def validate_config(cfg: DuetConfig,
|
|
310
|
+
parser: Optional[argparse.ArgumentParser] = None) -> None:
|
|
311
|
+
"""Validate final topology after CLI/YAML parsing and resume normalization."""
|
|
312
|
+
if len(cfg.agents) != 2:
|
|
313
|
+
_config_error(f"duet expects exactly 2 agents, got {len(cfg.agents)}", parser)
|
|
314
|
+
if cfg.start_speaker_idx not in (0, 1):
|
|
315
|
+
_config_error(
|
|
316
|
+
f"start_speaker_idx must be 0 or 1, got {cfg.start_speaker_idx}",
|
|
317
|
+
parser,
|
|
318
|
+
)
|
|
319
|
+
if cfg.worktree_for not in WORKTREE_FOR_CHOICES:
|
|
320
|
+
choices = "|".join(sorted(WORKTREE_FOR_CHOICES))
|
|
321
|
+
_config_error(
|
|
322
|
+
f"worktree_for must be one of {choices}, got {cfg.worktree_for!r}",
|
|
323
|
+
parser,
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
seen_names: set[str] = set()
|
|
327
|
+
for agent in cfg.agents:
|
|
328
|
+
if agent.backend not in SUPPORTED_BACKENDS:
|
|
329
|
+
choices = "|".join(sorted(SUPPORTED_BACKENDS))
|
|
330
|
+
_config_error(
|
|
331
|
+
f"unknown backend {agent.backend!r} for agent {agent.name!r}; "
|
|
332
|
+
f"expected {choices}",
|
|
333
|
+
parser,
|
|
334
|
+
)
|
|
335
|
+
if agent.name in seen_names:
|
|
336
|
+
_config_error(
|
|
337
|
+
f"duplicate agent name {agent.name!r}; agent names must be unique",
|
|
338
|
+
parser,
|
|
339
|
+
)
|
|
340
|
+
seen_names.add(agent.name)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def effective_agent_cwd(agent: Agent, default_cwd: pathlib.Path) -> pathlib.Path:
|
|
344
|
+
return (agent.cwd_override or default_cwd).resolve()
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def shared_cwd_codex_peers(cfg: DuetConfig) -> bool:
|
|
348
|
+
codex_agents = [a for a in cfg.agents if a.backend == "codex"]
|
|
349
|
+
if len(codex_agents) != 2:
|
|
350
|
+
return False
|
|
351
|
+
return (
|
|
352
|
+
effective_agent_cwd(codex_agents[0], cfg.cwd)
|
|
353
|
+
== effective_agent_cwd(codex_agents[1], cfg.cwd)
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def codex_session_is_uuid(agent: Agent) -> bool:
|
|
358
|
+
return bool(agent.session_id and _CODEX_UUID_RE.match(agent.session_id))
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def codex_shared_cwd_isolation_error(agent: Agent) -> str:
|
|
362
|
+
return (
|
|
363
|
+
"[duet] fatal: cannot safely continue codex/codex peering in one cwd "
|
|
364
|
+
f"because {agent.name} did not produce a Codex session UUID. "
|
|
365
|
+
"`codex exec resume --last` is cwd-based and could resume the other "
|
|
366
|
+
"Codex peer's session. Use --worktree/--worktree-for to isolate one "
|
|
367
|
+
"peer, or use a Codex build that reliably emits `session id: <uuid>`."
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def guard_codex_shared_cwd_before_call(cfg: DuetConfig,
|
|
372
|
+
agent: Agent,
|
|
373
|
+
first_turn_for_agent: bool) -> None:
|
|
374
|
+
if cfg.dry_run or agent.backend != "codex" or not shared_cwd_codex_peers(cfg):
|
|
375
|
+
return
|
|
376
|
+
if (not first_turn_for_agent
|
|
377
|
+
and agent.session_id
|
|
378
|
+
and not codex_session_is_uuid(agent)):
|
|
379
|
+
raise SystemExit(codex_shared_cwd_isolation_error(agent))
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def guard_codex_shared_cwd_after_call(cfg: DuetConfig,
|
|
383
|
+
agent: Agent,
|
|
384
|
+
first_turn_for_agent: bool) -> None:
|
|
385
|
+
if cfg.dry_run or agent.backend != "codex" or not first_turn_for_agent:
|
|
386
|
+
return
|
|
387
|
+
if shared_cwd_codex_peers(cfg) and not codex_session_is_uuid(agent):
|
|
388
|
+
raise SystemExit(codex_shared_cwd_isolation_error(agent))
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
@dataclasses.dataclass
|
|
392
|
+
class VerifyResult:
|
|
393
|
+
ok: bool
|
|
394
|
+
cmd: str
|
|
395
|
+
cwd: pathlib.Path
|
|
396
|
+
exit_code: Optional[int]
|
|
397
|
+
stdout_tail: str
|
|
398
|
+
stderr_tail: str
|
|
399
|
+
log_path: pathlib.Path
|
|
400
|
+
timed_out: bool = False
|
|
401
|
+
error: Optional[str] = None
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
# ---------- active child process tracking ----------
|
|
405
|
+
|
|
406
|
+
_ACTIVE_PROCS: set[subprocess.Popen] = set()
|
|
407
|
+
_ACTIVE_PROCS_LOCK = threading.Lock()
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def _register_proc(proc: subprocess.Popen) -> None:
|
|
411
|
+
with _ACTIVE_PROCS_LOCK:
|
|
412
|
+
_ACTIVE_PROCS.add(proc)
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def _unregister_proc(proc: subprocess.Popen) -> None:
|
|
416
|
+
with _ACTIVE_PROCS_LOCK:
|
|
417
|
+
_ACTIVE_PROCS.discard(proc)
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def _signal_proc_tree(proc: subprocess.Popen, sig: int) -> None:
|
|
421
|
+
if proc.poll() is not None:
|
|
422
|
+
return
|
|
423
|
+
try:
|
|
424
|
+
if hasattr(os, "killpg"):
|
|
425
|
+
os.killpg(proc.pid, sig)
|
|
426
|
+
else:
|
|
427
|
+
proc.send_signal(sig)
|
|
428
|
+
except ProcessLookupError:
|
|
429
|
+
pass
|
|
430
|
+
except Exception:
|
|
431
|
+
try:
|
|
432
|
+
proc.kill()
|
|
433
|
+
except Exception:
|
|
434
|
+
pass
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def _terminate_active_processes(sig: int = signal.SIGKILL) -> None:
|
|
438
|
+
with _ACTIVE_PROCS_LOCK:
|
|
439
|
+
procs = list(_ACTIVE_PROCS)
|
|
440
|
+
for proc in procs:
|
|
441
|
+
_signal_proc_tree(proc, sig)
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
# ---------- git worktree helpers ----------
|
|
445
|
+
|
|
446
|
+
def is_git_repo(path: pathlib.Path) -> bool:
|
|
447
|
+
try:
|
|
448
|
+
r = subprocess.run(
|
|
449
|
+
["git", "-C", str(path), "rev-parse", "--is-inside-work-tree"],
|
|
450
|
+
capture_output=True, text=True, timeout=5,
|
|
451
|
+
)
|
|
452
|
+
return r.returncode == 0 and r.stdout.strip() == "true"
|
|
453
|
+
except (FileNotFoundError, subprocess.TimeoutExpired):
|
|
454
|
+
return False
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def setup_worktree(repo_path: pathlib.Path, branch_name: str,
|
|
458
|
+
dest: pathlib.Path) -> pathlib.Path:
|
|
459
|
+
"""Create a git worktree at `dest` on a fresh branch. Returns the resolved path.
|
|
460
|
+
|
|
461
|
+
`dest` must NOT already exist (git worktree add's requirement); its parent
|
|
462
|
+
is created if missing. Caller controls placement — see `cfg.worktree_root`
|
|
463
|
+
or the default `<run_dir>/wt`.
|
|
464
|
+
"""
|
|
465
|
+
dest = dest.expanduser().resolve()
|
|
466
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
467
|
+
if dest.exists():
|
|
468
|
+
raise RuntimeError(f"worktree destination already exists: {dest}")
|
|
469
|
+
cmd = ["git", "-C", str(repo_path), "worktree", "add", "-b", branch_name, str(dest)]
|
|
470
|
+
try:
|
|
471
|
+
proc = subprocess.Popen(
|
|
472
|
+
cmd,
|
|
473
|
+
stdout=subprocess.PIPE,
|
|
474
|
+
stderr=subprocess.PIPE,
|
|
475
|
+
text=True,
|
|
476
|
+
start_new_session=True,
|
|
477
|
+
)
|
|
478
|
+
except FileNotFoundError:
|
|
479
|
+
raise RuntimeError("git not found on PATH")
|
|
480
|
+
_register_proc(proc)
|
|
481
|
+
try:
|
|
482
|
+
try:
|
|
483
|
+
_, err = proc.communicate(timeout=30)
|
|
484
|
+
except subprocess.TimeoutExpired:
|
|
485
|
+
_signal_proc_tree(proc, signal.SIGTERM)
|
|
486
|
+
try:
|
|
487
|
+
_, err = proc.communicate(timeout=2)
|
|
488
|
+
except subprocess.TimeoutExpired:
|
|
489
|
+
_signal_proc_tree(proc, signal.SIGKILL)
|
|
490
|
+
_, err = proc.communicate()
|
|
491
|
+
raise RuntimeError(f"git worktree add timed out: {err.strip()}")
|
|
492
|
+
finally:
|
|
493
|
+
_unregister_proc(proc)
|
|
494
|
+
if proc.returncode != 0:
|
|
495
|
+
raise RuntimeError(f"git worktree add failed: {err.strip()}")
|
|
496
|
+
return dest
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
def git_diff_summary(wt_path: pathlib.Path, max_chars: int = 8000) -> str:
|
|
500
|
+
"""Return a short diff summary (status + truncated diff) for the worktree."""
|
|
501
|
+
try:
|
|
502
|
+
status = subprocess.run(
|
|
503
|
+
["git", "-C", str(wt_path), "status", "--short"],
|
|
504
|
+
capture_output=True, text=True, timeout=10,
|
|
505
|
+
).stdout.rstrip()
|
|
506
|
+
diff = subprocess.run(
|
|
507
|
+
["git", "-C", str(wt_path), "diff", "HEAD", "--stat"],
|
|
508
|
+
capture_output=True, text=True, timeout=10,
|
|
509
|
+
).stdout.rstrip()
|
|
510
|
+
full = subprocess.run(
|
|
511
|
+
["git", "-C", str(wt_path), "diff", "HEAD"],
|
|
512
|
+
capture_output=True, text=True, timeout=10,
|
|
513
|
+
).stdout
|
|
514
|
+
if len(full) > max_chars:
|
|
515
|
+
full = full[:max_chars] + f"\n…[truncated, {len(full)-max_chars} more chars]"
|
|
516
|
+
untracked = _untracked_files_summary(wt_path, max_chars=max_chars)
|
|
517
|
+
untracked_block = (
|
|
518
|
+
f"\n\n### untracked file contents\n{untracked}"
|
|
519
|
+
if untracked else ""
|
|
520
|
+
)
|
|
521
|
+
return (
|
|
522
|
+
f"### git status\n{status or '(clean)'}\n\n"
|
|
523
|
+
f"### diffstat\n{diff or '(none)'}\n\n"
|
|
524
|
+
f"### diff\n{full or '(none)'}"
|
|
525
|
+
f"{untracked_block}"
|
|
526
|
+
)
|
|
527
|
+
except subprocess.TimeoutExpired:
|
|
528
|
+
return "[duet] git diff timed out"
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
def _untracked_files_summary(wt_path: pathlib.Path, max_chars: int = 8000) -> str:
|
|
532
|
+
proc = subprocess.run(
|
|
533
|
+
["git", "-C", str(wt_path), "ls-files", "--others",
|
|
534
|
+
"--exclude-standard", "-z"],
|
|
535
|
+
capture_output=True, timeout=10,
|
|
536
|
+
)
|
|
537
|
+
if proc.returncode != 0:
|
|
538
|
+
return ""
|
|
539
|
+
rel_paths = [os.fsdecode(p) for p in proc.stdout.split(b"\0") if p]
|
|
540
|
+
if not rel_paths:
|
|
541
|
+
return ""
|
|
542
|
+
|
|
543
|
+
sections: list[str] = []
|
|
544
|
+
remaining = max_chars
|
|
545
|
+
for rel_path in rel_paths:
|
|
546
|
+
if remaining <= 0:
|
|
547
|
+
sections.append("…[truncated]")
|
|
548
|
+
break
|
|
549
|
+
section = _untracked_file_summary(wt_path, rel_path)
|
|
550
|
+
if len(section) > remaining:
|
|
551
|
+
section = section[:remaining] + f"\n…[truncated, {len(section)-remaining} more chars]"
|
|
552
|
+
sections.append(section)
|
|
553
|
+
break
|
|
554
|
+
sections.append(section)
|
|
555
|
+
remaining -= len(section) + 2
|
|
556
|
+
return "\n\n".join(sections)
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
def _untracked_file_summary(wt_path: pathlib.Path, rel_path: str) -> str:
|
|
560
|
+
display_path = rel_path.replace("\\", "/")
|
|
561
|
+
file_path = wt_path / rel_path
|
|
562
|
+
if file_path.is_symlink():
|
|
563
|
+
return f"#### {display_path}\n(symlink omitted)"
|
|
564
|
+
if not file_path.is_file():
|
|
565
|
+
return f"#### {display_path}\n(non-file omitted)"
|
|
566
|
+
try:
|
|
567
|
+
data = _read_file_preview(file_path)
|
|
568
|
+
except OSError as e:
|
|
569
|
+
return f"#### {display_path}\n(unable to read: {e})"
|
|
570
|
+
if data is None:
|
|
571
|
+
return f"#### {display_path}\n(binary file omitted)"
|
|
572
|
+
fence = _markdown_fence(data)
|
|
573
|
+
return f"#### {display_path}\n{fence}text\n{data}\n{fence}"
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def _read_file_preview(path: pathlib.Path, max_bytes: int = 12000) -> Optional[str]:
|
|
577
|
+
with path.open("rb") as f:
|
|
578
|
+
data = f.read(max_bytes + 1)
|
|
579
|
+
truncated = len(data) > max_bytes
|
|
580
|
+
data = data[:max_bytes]
|
|
581
|
+
if b"\0" in data:
|
|
582
|
+
return None
|
|
583
|
+
text = data.decode("utf-8", errors="replace")
|
|
584
|
+
if truncated:
|
|
585
|
+
text += f"\n…[truncated, file exceeds {max_bytes} bytes]"
|
|
586
|
+
return text
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
def _markdown_fence(text: str) -> str:
|
|
590
|
+
longest = max((len(m.group(0)) for m in re.finditer(r"`+", text)), default=0)
|
|
591
|
+
return "`" * max(3, longest + 1)
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
def _worktree_handoff_block(wt_path: pathlib.Path,
|
|
595
|
+
wt_branch: Optional[str] = None) -> str:
|
|
596
|
+
"""Tell the receiving agent exactly where the edited tree lives.
|
|
597
|
+
|
|
598
|
+
Worded for clean turns too — the worktree-agent may have only explored
|
|
599
|
+
this turn, so we say "any code changes" rather than asserting changes
|
|
600
|
+
exist. Suggested commands are intentionally project-agnostic; project
|
|
601
|
+
test commands belong in CLAUDE.md / README, not in this generic block.
|
|
602
|
+
"""
|
|
603
|
+
wt_display = str(wt_path)
|
|
604
|
+
wt_arg = shlex.quote(wt_display)
|
|
605
|
+
branch_line = f"- Branch: `{wt_branch}`\n" if wt_branch else ""
|
|
606
|
+
return (
|
|
607
|
+
"### review target\n"
|
|
608
|
+
"Any code changes for this turn are in the git worktree below. "
|
|
609
|
+
"Your current cwd may be a clean checkout, so do not use that cwd's "
|
|
610
|
+
"`git status` as evidence that these edits are absent.\n\n"
|
|
611
|
+
f"- Worktree path: `{wt_display}`\n"
|
|
612
|
+
f"{branch_line}"
|
|
613
|
+
"\n"
|
|
614
|
+
"Use the worktree as the source of truth when reviewing or running "
|
|
615
|
+
"checks:\n\n"
|
|
616
|
+
"```bash\n"
|
|
617
|
+
f"git -C {wt_arg} status --short\n"
|
|
618
|
+
f"git -C {wt_arg} diff HEAD\n"
|
|
619
|
+
"```\n"
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
def append_worktree_diff(reply: str, wt_path: pathlib.Path,
|
|
624
|
+
wt_branch: Optional[str] = None) -> str:
|
|
625
|
+
try:
|
|
626
|
+
diff_block = git_diff_summary(wt_path)
|
|
627
|
+
handoff = _worktree_handoff_block(wt_path, wt_branch)
|
|
628
|
+
return (f"{reply}\n\n---\n"
|
|
629
|
+
f"#### worktree changes ({wt_path.name})\n{handoff}\n"
|
|
630
|
+
f"{diff_block}")
|
|
631
|
+
except Exception as e:
|
|
632
|
+
return f"{reply}\n\n[duet] git diff failed: {e}"
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def write_text_atomic(path: pathlib.Path, text: str) -> None:
|
|
636
|
+
"""Write text through a same-directory temp file, then atomically replace."""
|
|
637
|
+
tmp = path.with_name(f".{path.name}.{os.getpid()}.tmp")
|
|
638
|
+
try:
|
|
639
|
+
tmp.write_text(text, encoding="utf-8")
|
|
640
|
+
os.replace(tmp, path)
|
|
641
|
+
finally:
|
|
642
|
+
try:
|
|
643
|
+
if tmp.exists():
|
|
644
|
+
tmp.unlink()
|
|
645
|
+
except OSError:
|
|
646
|
+
pass
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
def append_text_atomic(path: pathlib.Path, text: str) -> None:
|
|
650
|
+
prior = path.read_text(encoding="utf-8") if path.exists() else ""
|
|
651
|
+
write_text_atomic(path, prior + text)
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
# ---------- subprocess wrappers ----------
|
|
655
|
+
|
|
656
|
+
# Module-level: when True, _run forwards subprocess stderr to the user's
|
|
657
|
+
# terminal in real-time. Codex prints its progress (thinking, tool calls)
|
|
658
|
+
# to stderr, so this gives live visibility during long turns.
|
|
659
|
+
LIVE_STREAM = True
|
|
660
|
+
LIVE_PREFIX = " │ " # box-drawing prefix on every streamed line
|
|
661
|
+
LIVE_PREFIX_TASK = " $ "
|
|
662
|
+
RECAP_MODE = False
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
def _stream_reader(stream, sink: list[str], mirror_to=None, prefix: str = "",
|
|
666
|
+
tee_to=None, activity_event=None):
|
|
667
|
+
"""Drain a pipe line-by-line, capture into `sink`, optionally mirror live and/or tee to file.
|
|
668
|
+
|
|
669
|
+
`mirror_to` is a writable text stream (typically sys.stderr) that the
|
|
670
|
+
line is echoed to with `prefix`. `tee_to` is an open file handle that
|
|
671
|
+
receives the raw line — used to persist the live stream for post-hoc
|
|
672
|
+
forensics. `activity_event`, if given, is `set()` on every received
|
|
673
|
+
line so a heartbeat thread can detect "subprocess went quiet". All
|
|
674
|
+
parameters are optional.
|
|
675
|
+
"""
|
|
676
|
+
try:
|
|
677
|
+
for line in iter(stream.readline, ""):
|
|
678
|
+
sink.append(line)
|
|
679
|
+
if activity_event is not None:
|
|
680
|
+
activity_event.set()
|
|
681
|
+
if mirror_to is not None:
|
|
682
|
+
try:
|
|
683
|
+
mirror_to.write(prefix + line if prefix else line)
|
|
684
|
+
mirror_to.flush()
|
|
685
|
+
except Exception:
|
|
686
|
+
pass
|
|
687
|
+
if tee_to is not None:
|
|
688
|
+
try:
|
|
689
|
+
tee_to.write(line)
|
|
690
|
+
tee_to.flush()
|
|
691
|
+
except Exception:
|
|
692
|
+
pass
|
|
693
|
+
finally:
|
|
694
|
+
try:
|
|
695
|
+
stream.close()
|
|
696
|
+
except Exception:
|
|
697
|
+
pass
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
def _quiet_heartbeat(proc, mirror_to, start_monotonic: float,
|
|
701
|
+
activity_event, interval: int = 20,
|
|
702
|
+
prefix: str = LIVE_PREFIX) -> None:
|
|
703
|
+
"""Print "[duet] still working…" when a subprocess goes quiet.
|
|
704
|
+
|
|
705
|
+
Most subprocesses emit rich stderr live (codex, gh, npm). Some don't
|
|
706
|
+
— `claude -p` is silent on stderr during the API call, so a long
|
|
707
|
+
seed-extract or claude turn can look like duet has hung. This thread
|
|
708
|
+
waits on `activity_event`; if no activity for `interval` seconds AND
|
|
709
|
+
the subprocess is still alive, it prints elapsed time and resets.
|
|
710
|
+
Mirrors duet's own stderr so it interleaves with live output.
|
|
711
|
+
"""
|
|
712
|
+
if mirror_to is None:
|
|
713
|
+
return
|
|
714
|
+
while proc.poll() is None:
|
|
715
|
+
if activity_event.wait(timeout=interval):
|
|
716
|
+
activity_event.clear()
|
|
717
|
+
continue
|
|
718
|
+
if proc.poll() is not None:
|
|
719
|
+
return
|
|
720
|
+
try:
|
|
721
|
+
elapsed = int(time.monotonic() - start_monotonic)
|
|
722
|
+
mirror_to.write(f"{prefix}[duet] still working… ({elapsed}s; "
|
|
723
|
+
"subprocess silent — typical for `claude -p`)\n")
|
|
724
|
+
mirror_to.flush()
|
|
725
|
+
except Exception:
|
|
726
|
+
return
|
|
727
|
+
|
|
728
|
+
|
|
729
|
+
def _run(cmd: list[str], *, cwd: pathlib.Path, stdin: Optional[str], timeout: int,
|
|
730
|
+
stderr_log_path: Optional[pathlib.Path] = None,
|
|
731
|
+
pid_file_path: Optional[pathlib.Path] = None,
|
|
732
|
+
live_prefix: Optional[str] = None,
|
|
733
|
+
mirror_stdout: bool = False) -> tuple[int, str, str]:
|
|
734
|
+
"""Run a subprocess. Returns (rc, stdout, stderr).
|
|
735
|
+
|
|
736
|
+
If LIVE_STREAM is on AND stderr is a TTY, the child's stderr is mirrored
|
|
737
|
+
to our stderr line-by-line as it's produced. stdout is captured silently
|
|
738
|
+
unless `mirror_stdout` is set — duet logs agent final answers to the
|
|
739
|
+
transcript afterwards.
|
|
740
|
+
|
|
741
|
+
If `stderr_log_path` is set, the child's stderr is also tee'd line-by-line
|
|
742
|
+
to that file (append mode) — useful for post-hoc forensics on long agent
|
|
743
|
+
turns where the live trace is otherwise lost.
|
|
744
|
+
|
|
745
|
+
If `pid_file_path` is set, the child's PID is written there at startup
|
|
746
|
+
and the file is removed when the call returns. External tools can read
|
|
747
|
+
the file + `kill -0 <pid>` to tell apart "duet is alive, agent thinking"
|
|
748
|
+
vs "agent crashed silently". Critical for agents like `claude -p` that
|
|
749
|
+
emit no stderr during their long API call.
|
|
750
|
+
"""
|
|
751
|
+
mirror = sys.stderr if (LIVE_STREAM and not RECAP_MODE and sys.stderr.isatty()) else None
|
|
752
|
+
prefix = live_prefix if live_prefix is not None else LIVE_PREFIX
|
|
753
|
+
out_chunks: list[str] = []
|
|
754
|
+
err_chunks: list[str] = []
|
|
755
|
+
stderr_file = None
|
|
756
|
+
if stderr_log_path is not None:
|
|
757
|
+
try:
|
|
758
|
+
stderr_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
759
|
+
stderr_file = open(stderr_log_path, "a", encoding="utf-8", buffering=1)
|
|
760
|
+
stderr_file.write(
|
|
761
|
+
f"\n# {dt.datetime.now().isoformat(timespec='seconds')} :: "
|
|
762
|
+
f"{' '.join(cmd[:3])}{' …' if len(cmd) > 3 else ''}\n"
|
|
763
|
+
)
|
|
764
|
+
except OSError as e:
|
|
765
|
+
print(f"[duet] warn: stderr log open failed ({stderr_log_path}): {e}",
|
|
766
|
+
file=sys.stderr)
|
|
767
|
+
stderr_file = None
|
|
768
|
+
try:
|
|
769
|
+
try:
|
|
770
|
+
proc = subprocess.Popen(
|
|
771
|
+
cmd, cwd=str(cwd),
|
|
772
|
+
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
773
|
+
text=True, bufsize=1, # line-buffered
|
|
774
|
+
start_new_session=True,
|
|
775
|
+
)
|
|
776
|
+
except FileNotFoundError:
|
|
777
|
+
return 127, "", f"[duet] command not found: {cmd[0]}"
|
|
778
|
+
_register_proc(proc)
|
|
779
|
+
if pid_file_path is not None:
|
|
780
|
+
try:
|
|
781
|
+
pid_file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
782
|
+
# Write atomically so a poller never reads a half-written PID.
|
|
783
|
+
tmp = pid_file_path.with_name(pid_file_path.name + ".tmp")
|
|
784
|
+
tmp.write_text(f"{proc.pid}\n")
|
|
785
|
+
os.replace(tmp, pid_file_path)
|
|
786
|
+
except OSError as e:
|
|
787
|
+
print(f"[duet] warn: pid file write failed ({pid_file_path}): {e}",
|
|
788
|
+
file=sys.stderr)
|
|
789
|
+
activity_event = threading.Event()
|
|
790
|
+
t_out = threading.Thread(target=_stream_reader,
|
|
791
|
+
args=(proc.stdout, out_chunks,
|
|
792
|
+
mirror if mirror_stdout else None,
|
|
793
|
+
prefix if mirror_stdout else "",
|
|
794
|
+
None, activity_event),
|
|
795
|
+
daemon=True)
|
|
796
|
+
t_err = threading.Thread(target=_stream_reader,
|
|
797
|
+
args=(proc.stderr, err_chunks, mirror, prefix,
|
|
798
|
+
stderr_file, activity_event),
|
|
799
|
+
daemon=True)
|
|
800
|
+
t_out.start(); t_err.start()
|
|
801
|
+
# Heartbeat: print elapsed-time hint when proc goes quiet (>20s no
|
|
802
|
+
# stderr/stdout). Useful for `claude -p`, which stays silent on
|
|
803
|
+
# stderr during the API call. No-op if mirror is None (--quiet).
|
|
804
|
+
t_hb = threading.Thread(target=_quiet_heartbeat,
|
|
805
|
+
args=(proc, mirror, time.monotonic(), activity_event, 20, prefix),
|
|
806
|
+
daemon=True)
|
|
807
|
+
t_hb.start()
|
|
808
|
+
|
|
809
|
+
try:
|
|
810
|
+
if stdin is not None and proc.stdin is not None:
|
|
811
|
+
try:
|
|
812
|
+
proc.stdin.write(stdin)
|
|
813
|
+
except BrokenPipeError:
|
|
814
|
+
pass
|
|
815
|
+
if proc.stdin is not None:
|
|
816
|
+
proc.stdin.close()
|
|
817
|
+
proc.wait(timeout=timeout)
|
|
818
|
+
except subprocess.TimeoutExpired:
|
|
819
|
+
_signal_proc_tree(proc, signal.SIGTERM)
|
|
820
|
+
try:
|
|
821
|
+
proc.wait(timeout=2)
|
|
822
|
+
except subprocess.TimeoutExpired:
|
|
823
|
+
_signal_proc_tree(proc, signal.SIGKILL)
|
|
824
|
+
proc.wait()
|
|
825
|
+
t_out.join(timeout=2); t_err.join(timeout=2)
|
|
826
|
+
return 124, "".join(out_chunks), "".join(err_chunks) + f"\n[duet] TIMEOUT after {timeout}s"
|
|
827
|
+
finally:
|
|
828
|
+
_unregister_proc(proc)
|
|
829
|
+
t_out.join(timeout=5); t_err.join(timeout=5)
|
|
830
|
+
return proc.returncode, "".join(out_chunks), "".join(err_chunks)
|
|
831
|
+
finally:
|
|
832
|
+
if stderr_file is not None:
|
|
833
|
+
try:
|
|
834
|
+
stderr_file.close()
|
|
835
|
+
except Exception:
|
|
836
|
+
pass
|
|
837
|
+
if pid_file_path is not None:
|
|
838
|
+
try:
|
|
839
|
+
pid_file_path.unlink(missing_ok=True)
|
|
840
|
+
except OSError:
|
|
841
|
+
pass
|
|
842
|
+
|
|
843
|
+
|
|
844
|
+
def _agent_finished_reason(exc: Exception) -> str:
|
|
845
|
+
if isinstance(exc, AgentRunError):
|
|
846
|
+
return exc.finished_reason
|
|
847
|
+
if isinstance(exc, subprocess.TimeoutExpired):
|
|
848
|
+
return FINISHED_TIMEOUT
|
|
849
|
+
return FINISHED_AGENT_ERROR
|
|
850
|
+
|
|
851
|
+
|
|
852
|
+
def _agent_run(cmd: list[str], *, backend: str, cwd: pathlib.Path,
|
|
853
|
+
stdin: Optional[str], timeout: int,
|
|
854
|
+
stderr_log_path: Optional[pathlib.Path],
|
|
855
|
+
pid_file_path: Optional[pathlib.Path]) -> tuple[int, str, str]:
|
|
856
|
+
try:
|
|
857
|
+
return _run(
|
|
858
|
+
cmd,
|
|
859
|
+
cwd=cwd,
|
|
860
|
+
stdin=stdin,
|
|
861
|
+
timeout=timeout,
|
|
862
|
+
stderr_log_path=stderr_log_path,
|
|
863
|
+
pid_file_path=pid_file_path,
|
|
864
|
+
)
|
|
865
|
+
except subprocess.TimeoutExpired as e:
|
|
866
|
+
raise AgentRunError(
|
|
867
|
+
FINISHED_TIMEOUT,
|
|
868
|
+
f"{backend} timed out after {e.timeout}s",
|
|
869
|
+
) from e
|
|
870
|
+
except Exception as e:
|
|
871
|
+
raise AgentRunError(
|
|
872
|
+
FINISHED_AGENT_ERROR,
|
|
873
|
+
f"{backend} invocation failed: {e}",
|
|
874
|
+
) from e
|
|
875
|
+
|
|
876
|
+
|
|
877
|
+
# ---------- verification gate ----------
|
|
878
|
+
|
|
879
|
+
def effective_verify_cwd(cfg: DuetConfig,
|
|
880
|
+
worktree_path: Optional[pathlib.Path]) -> pathlib.Path:
|
|
881
|
+
"""Return the directory where the convergence verify command should run."""
|
|
882
|
+
return worktree_path or cfg.cwd
|
|
883
|
+
|
|
884
|
+
|
|
885
|
+
def _tail_text(text: str, max_chars: int = VERIFY_OUTPUT_TAIL_CHARS) -> str:
|
|
886
|
+
if len(text) <= max_chars:
|
|
887
|
+
return text
|
|
888
|
+
return (
|
|
889
|
+
f"[duet] output truncated to last {max_chars} chars\n"
|
|
890
|
+
+ text[-max_chars:]
|
|
891
|
+
)
|
|
892
|
+
|
|
893
|
+
|
|
894
|
+
def _display_output(text: str) -> str:
|
|
895
|
+
return text.rstrip() if text else "(empty)"
|
|
896
|
+
|
|
897
|
+
|
|
898
|
+
def _format_verify_log(turn_label: str, result: VerifyResult,
|
|
899
|
+
stdout: str, stderr: str) -> str:
|
|
900
|
+
lines = [
|
|
901
|
+
"# duet verify",
|
|
902
|
+
f"turn: {turn_label}",
|
|
903
|
+
f"command: {result.cmd}",
|
|
904
|
+
f"cwd: {result.cwd}",
|
|
905
|
+
f"exit_code: {result.exit_code if result.exit_code is not None else 'n/a'}",
|
|
906
|
+
f"timed_out: {'yes' if result.timed_out else 'no'}",
|
|
907
|
+
]
|
|
908
|
+
if result.error:
|
|
909
|
+
lines.append(f"error: {result.error}")
|
|
910
|
+
lines += [
|
|
911
|
+
"",
|
|
912
|
+
"## stdout",
|
|
913
|
+
stdout if stdout else "(empty)\n",
|
|
914
|
+
"",
|
|
915
|
+
"## stderr",
|
|
916
|
+
stderr if stderr else "(empty)\n",
|
|
917
|
+
]
|
|
918
|
+
return "\n".join(lines)
|
|
919
|
+
|
|
920
|
+
|
|
921
|
+
def verify_result_state(result: VerifyResult) -> dict:
|
|
922
|
+
data = {
|
|
923
|
+
"ok": result.ok,
|
|
924
|
+
"command": result.cmd,
|
|
925
|
+
"cwd": str(result.cwd),
|
|
926
|
+
"exit_code": result.exit_code,
|
|
927
|
+
"timed_out": result.timed_out,
|
|
928
|
+
"log_path": str(result.log_path),
|
|
929
|
+
"stdout_tail": result.stdout_tail,
|
|
930
|
+
"stderr_tail": result.stderr_tail,
|
|
931
|
+
}
|
|
932
|
+
if result.error:
|
|
933
|
+
data["error"] = result.error
|
|
934
|
+
return data
|
|
935
|
+
|
|
936
|
+
|
|
937
|
+
def format_verify_success_block(result: VerifyResult) -> str:
|
|
938
|
+
return (
|
|
939
|
+
"[duet verify passed]\n"
|
|
940
|
+
f"command: {result.cmd}\n"
|
|
941
|
+
f"cwd: {result.cwd}\n"
|
|
942
|
+
"exit_code: 0\n"
|
|
943
|
+
f"log: {result.log_path}\n"
|
|
944
|
+
"[/duet verify passed]"
|
|
945
|
+
)
|
|
946
|
+
|
|
947
|
+
|
|
948
|
+
def format_verify_failure_block(result: VerifyResult) -> str:
|
|
949
|
+
exit_code = result.exit_code if result.exit_code is not None else "n/a"
|
|
950
|
+
lines = [
|
|
951
|
+
"[duet verify failed]",
|
|
952
|
+
f"command: {result.cmd}",
|
|
953
|
+
f"cwd: {result.cwd}",
|
|
954
|
+
f"exit_code: {exit_code}",
|
|
955
|
+
]
|
|
956
|
+
if result.timed_out:
|
|
957
|
+
lines.append("timed_out: yes")
|
|
958
|
+
if result.error:
|
|
959
|
+
lines.append(f"error: {result.error}")
|
|
960
|
+
lines += [
|
|
961
|
+
f"log: {result.log_path}",
|
|
962
|
+
"",
|
|
963
|
+
"stdout tail:",
|
|
964
|
+
_display_output(result.stdout_tail),
|
|
965
|
+
"",
|
|
966
|
+
"stderr tail:",
|
|
967
|
+
_display_output(result.stderr_tail),
|
|
968
|
+
"[/duet verify failed]",
|
|
969
|
+
]
|
|
970
|
+
return "\n".join(lines)
|
|
971
|
+
|
|
972
|
+
|
|
973
|
+
def run_verify_command(cfg: DuetConfig, run_dir: pathlib.Path, turn_label: str,
|
|
974
|
+
worktree_path: Optional[pathlib.Path]) -> VerifyResult:
|
|
975
|
+
"""Run the configured verification command for a convergence proposal."""
|
|
976
|
+
if not cfg.verify_cmd:
|
|
977
|
+
raise ValueError("run_verify_command called without cfg.verify_cmd")
|
|
978
|
+
cwd = effective_verify_cwd(cfg, worktree_path)
|
|
979
|
+
log_path = run_dir / f"turn-{turn_label}-verify.log"
|
|
980
|
+
pid_path = run_dir / f"turn-{turn_label}-verify.pid"
|
|
981
|
+
started = dt.datetime.now().isoformat(timespec="seconds")
|
|
982
|
+
print(f"[duet] verify turn {turn_label}: {cfg.verify_cmd} (cwd={cwd})")
|
|
983
|
+
try:
|
|
984
|
+
rc, stdout, stderr = _run(
|
|
985
|
+
["sh", "-c", cfg.verify_cmd],
|
|
986
|
+
cwd=cwd,
|
|
987
|
+
stdin="",
|
|
988
|
+
timeout=cfg.per_turn_timeout,
|
|
989
|
+
live_prefix=VERIFY_LIVE_PREFIX,
|
|
990
|
+
mirror_stdout=True,
|
|
991
|
+
pid_file_path=pid_path,
|
|
992
|
+
)
|
|
993
|
+
timed_out = rc == 124
|
|
994
|
+
result = VerifyResult(
|
|
995
|
+
ok=(rc == 0),
|
|
996
|
+
cmd=cfg.verify_cmd,
|
|
997
|
+
cwd=cwd,
|
|
998
|
+
exit_code=rc,
|
|
999
|
+
stdout_tail=_tail_text(stdout),
|
|
1000
|
+
stderr_tail=_tail_text(stderr),
|
|
1001
|
+
log_path=log_path,
|
|
1002
|
+
timed_out=timed_out,
|
|
1003
|
+
)
|
|
1004
|
+
except Exception as e:
|
|
1005
|
+
stdout = ""
|
|
1006
|
+
stderr = ""
|
|
1007
|
+
result = VerifyResult(
|
|
1008
|
+
ok=False,
|
|
1009
|
+
cmd=cfg.verify_cmd,
|
|
1010
|
+
cwd=cwd,
|
|
1011
|
+
exit_code=None,
|
|
1012
|
+
stdout_tail="",
|
|
1013
|
+
stderr_tail="",
|
|
1014
|
+
log_path=log_path,
|
|
1015
|
+
error=str(e),
|
|
1016
|
+
)
|
|
1017
|
+
finished = dt.datetime.now().isoformat(timespec="seconds")
|
|
1018
|
+
log_text = (
|
|
1019
|
+
f"started: {started}\n"
|
|
1020
|
+
f"finished: {finished}\n\n"
|
|
1021
|
+
+ _format_verify_log(turn_label, result, stdout, stderr)
|
|
1022
|
+
)
|
|
1023
|
+
write_text_atomic(log_path, log_text)
|
|
1024
|
+
return result
|
|
1025
|
+
|
|
1026
|
+
|
|
1027
|
+
def call_claude(agent: Agent, system_prompt: str, message: str,
|
|
1028
|
+
cwd: pathlib.Path, perm_mode: str, timeout: int, dry: bool,
|
|
1029
|
+
reasoning: Optional[str] = None,
|
|
1030
|
+
stderr_log_path: Optional[pathlib.Path] = None,
|
|
1031
|
+
pid_file_path: Optional[pathlib.Path] = None,
|
|
1032
|
+
add_dirs: Optional[list[pathlib.Path]] = None) -> tuple[str, Optional[str]]:
|
|
1033
|
+
"""Returns (assistant_text, new_session_id)."""
|
|
1034
|
+
eff_cwd = agent.cwd_override or cwd
|
|
1035
|
+
if reasoning:
|
|
1036
|
+
system_prompt = CLAUDE_REASONING_PROMPT_PREFIX.get(reasoning, "") + system_prompt
|
|
1037
|
+
reasoning_args: list[str] = []
|
|
1038
|
+
if reasoning:
|
|
1039
|
+
claude_value = CLAUDE_REASONING_MAP.get(reasoning, reasoning)
|
|
1040
|
+
reasoning_args = ["--effort", claude_value]
|
|
1041
|
+
if dry:
|
|
1042
|
+
new_sid = agent.session_id or f"dry-claude-{agent.name}-{int(time.time())}"
|
|
1043
|
+
wt_note = f" wt={eff_cwd}" if agent.cwd_override else ""
|
|
1044
|
+
rn = f" reasoning={reasoning}" if reasoning else ""
|
|
1045
|
+
return (
|
|
1046
|
+
f"[dry-run claude/{agent.name}{wt_note}{rn}] received {len(message)} chars\n"
|
|
1047
|
+
"LGTM rationale: dry-run accepted the harness path and has no real "
|
|
1048
|
+
"agent output to review.\n"
|
|
1049
|
+
f"{DEFAULT_SENTINEL}"
|
|
1050
|
+
), new_sid
|
|
1051
|
+
cmd = ["claude", "-p", message,
|
|
1052
|
+
"--output-format", "json",
|
|
1053
|
+
"--append-system-prompt", system_prompt,
|
|
1054
|
+
"--permission-mode", perm_mode,
|
|
1055
|
+
*reasoning_args,
|
|
1056
|
+
"--add-dir", str(eff_cwd)]
|
|
1057
|
+
# Extra read/write roots for tasks that span outside cwd (e.g. writing
|
|
1058
|
+
# ../DECISION_v2.md from a cwd-scoped run). Without these, claude refuses
|
|
1059
|
+
# paths outside its allowlist with a generic permission error.
|
|
1060
|
+
for d in (add_dirs or []):
|
|
1061
|
+
cmd += ["--add-dir", str(d)]
|
|
1062
|
+
if agent.session_id:
|
|
1063
|
+
cmd += ["--resume", agent.session_id]
|
|
1064
|
+
if agent.model:
|
|
1065
|
+
cmd += ["--model", agent.model]
|
|
1066
|
+
cmd += agent.extra_args
|
|
1067
|
+
rc, out, err = _agent_run(
|
|
1068
|
+
cmd,
|
|
1069
|
+
backend="claude",
|
|
1070
|
+
cwd=eff_cwd,
|
|
1071
|
+
stdin=None,
|
|
1072
|
+
timeout=timeout,
|
|
1073
|
+
stderr_log_path=stderr_log_path,
|
|
1074
|
+
pid_file_path=pid_file_path,
|
|
1075
|
+
)
|
|
1076
|
+
if rc != 0:
|
|
1077
|
+
reason = FINISHED_TIMEOUT if rc == 124 else FINISHED_AGENT_ERROR
|
|
1078
|
+
raise AgentRunError(reason, f"claude exited {rc}\nstderr:\n{err}")
|
|
1079
|
+
try:
|
|
1080
|
+
payload = json.loads(out)
|
|
1081
|
+
return (payload.get("result") or "").rstrip(), payload.get("session_id") or agent.session_id
|
|
1082
|
+
except json.JSONDecodeError:
|
|
1083
|
+
snippet = out[:500].strip()
|
|
1084
|
+
raise AgentRunError(
|
|
1085
|
+
FINISHED_AGENT_ERROR,
|
|
1086
|
+
f"claude returned malformed JSON output: {snippet!r}",
|
|
1087
|
+
)
|
|
1088
|
+
|
|
1089
|
+
|
|
1090
|
+
# Codex's `codex exec` prints a line like `session id: 019e12ad-0b1b-7732-bd7b-6acbbd04ab46`
|
|
1091
|
+
# to stderr near startup; modern builds also re-emit it on resume. We pin to that
|
|
1092
|
+
# UUID for subsequent resumes so duet doesn't depend on `--last`'s cwd-keyed
|
|
1093
|
+
# lookup. Anchored to "session id" to avoid false-positives on stray UUIDs in
|
|
1094
|
+
# tracebacks or path strings; case-insensitive because the label has varied.
|
|
1095
|
+
_CODEX_UUID_PATTERN = (
|
|
1096
|
+
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
|
|
1097
|
+
)
|
|
1098
|
+
_CODEX_SESSION_ID_RE = re.compile(
|
|
1099
|
+
r"session[ _-]?id\s*[:=]\s*(" + _CODEX_UUID_PATTERN + r")",
|
|
1100
|
+
re.IGNORECASE,
|
|
1101
|
+
)
|
|
1102
|
+
_CODEX_UUID_RE = re.compile(r"\A" + _CODEX_UUID_PATTERN + r"\Z", re.IGNORECASE)
|
|
1103
|
+
|
|
1104
|
+
|
|
1105
|
+
def _parse_codex_session_id(stderr: str) -> Optional[str]:
|
|
1106
|
+
"""Return the last `session id: <uuid>` UUID found in Codex's stderr.
|
|
1107
|
+
|
|
1108
|
+
The last match wins so a resume that emits both the inherited id and a
|
|
1109
|
+
rotated id (if a future Codex build does that) ends up pinned to the
|
|
1110
|
+
rotated one. Returns the UUID lowercased; None if no match. We never parse
|
|
1111
|
+
stdout — Codex puts the assistant reply there and a UUID inside the reply
|
|
1112
|
+
must not be confused for the harness's session pin.
|
|
1113
|
+
"""
|
|
1114
|
+
if not stderr:
|
|
1115
|
+
return None
|
|
1116
|
+
matches = _CODEX_SESSION_ID_RE.findall(stderr)
|
|
1117
|
+
return matches[-1].lower() if matches else None
|
|
1118
|
+
|
|
1119
|
+
|
|
1120
|
+
def call_codex(agent: Agent, system_prompt: str, message: str,
|
|
1121
|
+
cwd: pathlib.Path, sandbox: str, timeout: int, dry: bool,
|
|
1122
|
+
first_turn: bool, reasoning: Optional[str] = None,
|
|
1123
|
+
fast: bool = False,
|
|
1124
|
+
stderr_log_path: Optional[pathlib.Path] = None,
|
|
1125
|
+
pid_file_path: Optional[pathlib.Path] = None) -> tuple[str, Optional[str]]:
|
|
1126
|
+
"""Returns (assistant_text, new_session_id).
|
|
1127
|
+
|
|
1128
|
+
Resume strategy: when stderr from a prior turn yielded a UUID we pin to
|
|
1129
|
+
that with `codex exec resume <uuid>`; otherwise we fall back to
|
|
1130
|
+
`codex exec resume --last`, which keys on cwd. `agent.session_id` carries
|
|
1131
|
+
either the parsed UUID, the sentinel ``"codex-current"`` (meaning "use
|
|
1132
|
+
--last"), or ``None`` (no prior turn for this agent).
|
|
1133
|
+
"""
|
|
1134
|
+
eff_cwd = agent.cwd_override or cwd
|
|
1135
|
+
# Fast mode pins this Codex turn to low reasoning regardless of caller
|
|
1136
|
+
# intent. Codex minimal currently rejects the default tool set, while low
|
|
1137
|
+
# preserves tool compatibility and still trades depth for latency.
|
|
1138
|
+
effective = "low" if fast else reasoning
|
|
1139
|
+
if dry:
|
|
1140
|
+
new_sid = agent.session_id or f"dry-codex-{agent.name}-{int(time.time())}"
|
|
1141
|
+
wt_note = f" wt={eff_cwd}" if agent.cwd_override else ""
|
|
1142
|
+
rn = f" reasoning={effective}" if effective else ""
|
|
1143
|
+
fast_note = " fast" if fast else ""
|
|
1144
|
+
return (
|
|
1145
|
+
f"[dry-run codex/{agent.name}{fast_note}{wt_note}{rn}] received {len(message)} chars\n"
|
|
1146
|
+
"LGTM rationale: dry-run accepted the harness path and has no real "
|
|
1147
|
+
"agent output to review.\n"
|
|
1148
|
+
f"{DEFAULT_SENTINEL}"
|
|
1149
|
+
), new_sid
|
|
1150
|
+
full_prompt = f"=== ROLE ===\n{system_prompt}\n\n=== MESSAGE FROM PARTNER ===\n{message}"
|
|
1151
|
+
reasoning_args: list[str] = []
|
|
1152
|
+
if effective:
|
|
1153
|
+
codex_value = CODEX_REASONING_MAP.get(effective, effective)
|
|
1154
|
+
# `medium` is Codex's default; only override when we actually want a
|
|
1155
|
+
# different effort level.
|
|
1156
|
+
if codex_value != "medium":
|
|
1157
|
+
reasoning_args = ["-c", f"model_reasoning_effort={codex_value}"]
|
|
1158
|
+
if fast:
|
|
1159
|
+
# Concise reasoning summaries cut output volume and time-to-first-token
|
|
1160
|
+
# on Codex turns. Pairs with low effort above; together they're the
|
|
1161
|
+
# "trade depth for latency while keeping tools available" knob.
|
|
1162
|
+
reasoning_args += ["-c", "model_reasoning_summary=concise"]
|
|
1163
|
+
# Codex's `exec` parses options BEFORE the positional prompt in modern
|
|
1164
|
+
# builds, and some flags (e.g. --ask-for-approval) have come and gone
|
|
1165
|
+
# across versions. We keep the default flag set conservative.
|
|
1166
|
+
# `extra_args` lets users add their version's approval/auto flag (e.g.
|
|
1167
|
+
# `["--full-auto"]` or `["--yolo"]`) and config overrides (`-c …`).
|
|
1168
|
+
#
|
|
1169
|
+
# IMPORTANT: `codex exec resume` accepts a SUBSET of `codex exec`'s
|
|
1170
|
+
# flags. In particular, `--sandbox` and `--cd` are exec-only — they
|
|
1171
|
+
# carry over from the resumed session and codex's clap parser rejects
|
|
1172
|
+
# them on resume with "unexpected argument '--sandbox' found". So we
|
|
1173
|
+
# split: exec_only_opts are passed only on the first call.
|
|
1174
|
+
shared_opts = ["--skip-git-repo-check"]
|
|
1175
|
+
if agent.model:
|
|
1176
|
+
shared_opts += ["--model", agent.model]
|
|
1177
|
+
# All options BEFORE the positional prompt — modern codex's clap parser
|
|
1178
|
+
# rejects flags after the prompt.
|
|
1179
|
+
if first_turn or not agent.session_id:
|
|
1180
|
+
exec_only_opts = ["--sandbox", sandbox, "--cd", str(eff_cwd)]
|
|
1181
|
+
options = [*exec_only_opts, *shared_opts, *reasoning_args, *agent.extra_args]
|
|
1182
|
+
cmd = ["codex", "exec", *options, full_prompt]
|
|
1183
|
+
else:
|
|
1184
|
+
# cwd is set via subprocess.Popen(cwd=…) so codex inherits the right
|
|
1185
|
+
# directory regardless of how we resume. `--sandbox` and `--cd` are
|
|
1186
|
+
# exec-only; sandbox carries over from the resumed session.
|
|
1187
|
+
options = [*shared_opts, *reasoning_args, *agent.extra_args]
|
|
1188
|
+
if _CODEX_UUID_RE.match(agent.session_id):
|
|
1189
|
+
# Pin to the UUID we parsed from a prior turn's stderr. This is
|
|
1190
|
+
# robust to parallel codex sessions sharing the cwd because
|
|
1191
|
+
# codex looks up the session by id, not by recency.
|
|
1192
|
+
cmd = ["codex", "exec", "resume", agent.session_id,
|
|
1193
|
+
*options, full_prompt]
|
|
1194
|
+
else:
|
|
1195
|
+
# Sentinel value (typically "codex-current") meaning "we know a
|
|
1196
|
+
# prior turn happened but never captured a UUID." Fall back to
|
|
1197
|
+
# the most recent codex session in this cwd. Caveat: don't run
|
|
1198
|
+
# parallel codex sessions in the same cwd while a duet is alive.
|
|
1199
|
+
cmd = ["codex", "exec", "resume", "--last",
|
|
1200
|
+
*options, full_prompt]
|
|
1201
|
+
# codex exec hangs on non-TTY stdin without explicit close (issue #20919)
|
|
1202
|
+
rc, out, err = _agent_run(
|
|
1203
|
+
cmd,
|
|
1204
|
+
backend="codex",
|
|
1205
|
+
cwd=eff_cwd,
|
|
1206
|
+
stdin="",
|
|
1207
|
+
timeout=timeout,
|
|
1208
|
+
stderr_log_path=stderr_log_path,
|
|
1209
|
+
pid_file_path=pid_file_path,
|
|
1210
|
+
)
|
|
1211
|
+
if rc != 0:
|
|
1212
|
+
reason = FINISHED_TIMEOUT if rc == 124 else FINISHED_AGENT_ERROR
|
|
1213
|
+
raise AgentRunError(
|
|
1214
|
+
reason,
|
|
1215
|
+
f"codex exited {rc}\nstderr:\n{err}\ncmd: {' '.join(cmd[:8])}…",
|
|
1216
|
+
)
|
|
1217
|
+
# Prefer a freshly-parsed UUID from stderr; fall back to whatever id we
|
|
1218
|
+
# were already carrying; finally fall back to the "codex-current"
|
|
1219
|
+
# sentinel so the next turn at least knows a prior turn happened.
|
|
1220
|
+
parsed_sid = _parse_codex_session_id(err)
|
|
1221
|
+
return out.rstrip(), parsed_sid or agent.session_id or "codex-current"
|
|
1222
|
+
|
|
1223
|
+
|
|
1224
|
+
def call_agent(agent: Agent, message: str, cfg: DuetConfig, first_turn_for_agent: bool,
|
|
1225
|
+
*, run_dir: Optional[pathlib.Path] = None,
|
|
1226
|
+
turn_label: Optional[str] = None) -> str:
|
|
1227
|
+
sys_prompt = agent.system_prompt(cfg.sentinel, recap=cfg.recap)
|
|
1228
|
+
reasoning = effective_reasoning(agent, cfg.reasoning)
|
|
1229
|
+
# Per-turn stderr log + pid file land in the run dir for forensics +
|
|
1230
|
+
# liveness checks, sortable by turn number. The pid file is the only
|
|
1231
|
+
# reliable signal for "is the agent still alive?" when stderr goes
|
|
1232
|
+
# silent (claude -p emits nothing during its API call).
|
|
1233
|
+
log_path: Optional[pathlib.Path] = None
|
|
1234
|
+
pid_path: Optional[pathlib.Path] = None
|
|
1235
|
+
if run_dir is not None and turn_label is not None:
|
|
1236
|
+
log_path = run_dir / f"turn-{turn_label}-{agent.name}.stderr.log"
|
|
1237
|
+
pid_path = run_dir / f"turn-{turn_label}-{agent.name}.pid"
|
|
1238
|
+
if agent.backend == "claude":
|
|
1239
|
+
text, new_sid = call_claude(agent, sys_prompt, message, cfg.cwd,
|
|
1240
|
+
cfg.permission_mode, cfg.per_turn_timeout, cfg.dry_run,
|
|
1241
|
+
reasoning=reasoning,
|
|
1242
|
+
stderr_log_path=log_path,
|
|
1243
|
+
pid_file_path=pid_path,
|
|
1244
|
+
add_dirs=cfg.add_dirs)
|
|
1245
|
+
agent.session_id = new_sid
|
|
1246
|
+
return text
|
|
1247
|
+
if agent.backend == "codex":
|
|
1248
|
+
# Fast mode is scoped to coder-role codex agents so it can't silently
|
|
1249
|
+
# downgrade a planner/reviewer when a user pairs `--reasoning max`
|
|
1250
|
+
# with `--codex-fast`. Config validation in main() warns when no
|
|
1251
|
+
# codex:coder agent exists at all.
|
|
1252
|
+
fast = cfg.codex_fast and agent.role == "coder"
|
|
1253
|
+
text, new_sid = call_codex(agent, sys_prompt, message, cfg.cwd,
|
|
1254
|
+
cfg.sandbox, cfg.per_turn_timeout, cfg.dry_run,
|
|
1255
|
+
first_turn=first_turn_for_agent,
|
|
1256
|
+
reasoning=reasoning,
|
|
1257
|
+
fast=fast,
|
|
1258
|
+
stderr_log_path=log_path,
|
|
1259
|
+
pid_file_path=pid_path)
|
|
1260
|
+
agent.session_id = new_sid
|
|
1261
|
+
return text
|
|
1262
|
+
raise SystemExit(f"unknown backend '{agent.backend}'")
|
|
1263
|
+
|
|
1264
|
+
|
|
1265
|
+
def _agent_failure_block(reason: str, exc: Exception, turn_label: str,
|
|
1266
|
+
agent: Agent, run_dir: pathlib.Path) -> str:
|
|
1267
|
+
kind = "TIMEOUT" if reason == FINISHED_TIMEOUT else "AGENT ERROR"
|
|
1268
|
+
log_path = run_dir / f"turn-{turn_label}-{agent.name}.stderr.log"
|
|
1269
|
+
return "\n".join([
|
|
1270
|
+
f"[duet] {kind}: turn {turn_label} failed for "
|
|
1271
|
+
f"{agent.name} ({agent.backend}/{agent.role})",
|
|
1272
|
+
f"[duet] finished_reason: {reason}",
|
|
1273
|
+
f"[duet] error: {exc}",
|
|
1274
|
+
f"[duet] stderr log: {log_path}",
|
|
1275
|
+
])
|
|
1276
|
+
|
|
1277
|
+
# ---------- loop ----------
|
|
1278
|
+
|
|
1279
|
+
class StopFlag:
|
|
1280
|
+
def __init__(self) -> None:
|
|
1281
|
+
self.requested = False
|
|
1282
|
+
self.reason = ""
|
|
1283
|
+
def request(self, reason: str) -> None:
|
|
1284
|
+
self.requested = True
|
|
1285
|
+
self.reason = reason
|
|
1286
|
+
|
|
1287
|
+
|
|
1288
|
+
def _install_sigint(stop: StopFlag) -> None:
|
|
1289
|
+
def handler(signum, frame):
|
|
1290
|
+
if stop.requested:
|
|
1291
|
+
print("\n[duet] second SIGINT — exiting hard.", file=sys.stderr)
|
|
1292
|
+
_terminate_active_processes(signal.SIGKILL)
|
|
1293
|
+
os._exit(130)
|
|
1294
|
+
print("\n[duet] SIGINT received — finishing current turn, then stopping. "
|
|
1295
|
+
"Press Ctrl-C again to abort immediately.", file=sys.stderr)
|
|
1296
|
+
stop.request("SIGINT")
|
|
1297
|
+
signal.signal(signal.SIGINT, handler)
|
|
1298
|
+
|
|
1299
|
+
|
|
1300
|
+
def _convergence_markers(text: str, sentinel: str) -> tuple[bool, bool]:
|
|
1301
|
+
"""Return (sentinel_seen, rationale_seen), ignoring fenced code blocks."""
|
|
1302
|
+
sentinel_re = re.compile(rf"^\s*{re.escape(sentinel)}\s*$")
|
|
1303
|
+
rationale_re = re.compile(
|
|
1304
|
+
r"^\s*(?:[-*]\s*)?(?:\*\*)?(?:LGTM\s+rationale|Rationale)"
|
|
1305
|
+
r"(?:\*\*)?\s*:\s*(.*)$",
|
|
1306
|
+
re.IGNORECASE,
|
|
1307
|
+
)
|
|
1308
|
+
in_fence = False
|
|
1309
|
+
fence_char = ""
|
|
1310
|
+
fence_len = 0
|
|
1311
|
+
sentinel_seen = False
|
|
1312
|
+
rationale_parts: list[str] = []
|
|
1313
|
+
collecting_rationale = False
|
|
1314
|
+
|
|
1315
|
+
for line in text.splitlines():
|
|
1316
|
+
m = re.match(r"^\s*(`{3,}|~{3,})", line)
|
|
1317
|
+
if m:
|
|
1318
|
+
marker = m.group(1)
|
|
1319
|
+
if not in_fence:
|
|
1320
|
+
in_fence = True
|
|
1321
|
+
fence_char = marker[0]
|
|
1322
|
+
fence_len = len(marker)
|
|
1323
|
+
elif marker[0] == fence_char and len(marker) >= fence_len:
|
|
1324
|
+
in_fence = False
|
|
1325
|
+
fence_char = ""
|
|
1326
|
+
fence_len = 0
|
|
1327
|
+
continue
|
|
1328
|
+
if in_fence:
|
|
1329
|
+
continue
|
|
1330
|
+
if sentinel_re.match(line):
|
|
1331
|
+
sentinel_seen = True
|
|
1332
|
+
collecting_rationale = False
|
|
1333
|
+
continue
|
|
1334
|
+
if sentinel_seen:
|
|
1335
|
+
continue
|
|
1336
|
+
rationale_match = rationale_re.match(line)
|
|
1337
|
+
if rationale_match:
|
|
1338
|
+
collecting_rationale = True
|
|
1339
|
+
rationale_parts.append(rationale_match.group(1).strip())
|
|
1340
|
+
continue
|
|
1341
|
+
if collecting_rationale:
|
|
1342
|
+
stripped = line.strip()
|
|
1343
|
+
if stripped:
|
|
1344
|
+
rationale_parts.append(stripped)
|
|
1345
|
+
|
|
1346
|
+
rationale_text = " ".join(part for part in rationale_parts if part)
|
|
1347
|
+
rationale_text = re.sub(r"\s+", " ", rationale_text).strip()
|
|
1348
|
+
rationale_seen = len(rationale_text) >= CONVERGENCE_RATIONALE_MIN_CHARS
|
|
1349
|
+
return sentinel_seen, rationale_seen
|
|
1350
|
+
|
|
1351
|
+
|
|
1352
|
+
def convergence_proposed(text: str, sentinel: str) -> bool:
|
|
1353
|
+
sentinel_seen, rationale_seen = _convergence_markers(text, sentinel)
|
|
1354
|
+
return sentinel_seen and rationale_seen
|
|
1355
|
+
|
|
1356
|
+
|
|
1357
|
+
def parse_recap_headers(text: str) -> dict[str, Optional[str]]:
|
|
1358
|
+
"""Parse agent-emitted recap headers from the top of a reply."""
|
|
1359
|
+
parsed: dict[str, Optional[str]] = {"recap": None, "files": None, "status": None}
|
|
1360
|
+
status_values = {
|
|
1361
|
+
"planning", "implementing", "reviewing", "requesting-changes",
|
|
1362
|
+
"ready-for-review", "converged",
|
|
1363
|
+
}
|
|
1364
|
+
for line in text.splitlines()[:10]:
|
|
1365
|
+
m = re.match(r"^(RECAP|FILES|STATUS):\s*(.*)$", line)
|
|
1366
|
+
if not m:
|
|
1367
|
+
continue
|
|
1368
|
+
key = m.group(1).lower()
|
|
1369
|
+
value = m.group(2).strip()
|
|
1370
|
+
if key == "status" and value not in status_values:
|
|
1371
|
+
value = ""
|
|
1372
|
+
parsed[key] = value or None
|
|
1373
|
+
return parsed
|
|
1374
|
+
|
|
1375
|
+
|
|
1376
|
+
_FILE_PATH_RE = re.compile(
|
|
1377
|
+
r"\b[\w./-]+\.(?:py|md|sh|ts|tsx|js|jsx|json|yaml|yml|toml|html|css|rs|go|java|sql|txt)\b"
|
|
1378
|
+
)
|
|
1379
|
+
|
|
1380
|
+
|
|
1381
|
+
def extract_files_heuristic(text: str) -> list[str]:
|
|
1382
|
+
"""Find plausible file paths in a reply, preserving first-seen order."""
|
|
1383
|
+
found: list[str] = []
|
|
1384
|
+
seen: set[str] = set()
|
|
1385
|
+
|
|
1386
|
+
def add(path: str) -> None:
|
|
1387
|
+
if path in seen or len(found) >= 8:
|
|
1388
|
+
return
|
|
1389
|
+
seen.add(path)
|
|
1390
|
+
found.append(path)
|
|
1391
|
+
|
|
1392
|
+
for code in re.findall(r"`([^`\n]+)`", text):
|
|
1393
|
+
for m in _FILE_PATH_RE.finditer(code):
|
|
1394
|
+
add(m.group(0))
|
|
1395
|
+
for m in _FILE_PATH_RE.finditer(text):
|
|
1396
|
+
add(m.group(0))
|
|
1397
|
+
return found
|
|
1398
|
+
|
|
1399
|
+
|
|
1400
|
+
def derive_status_heuristic(role: str, sentinel_hit: bool) -> str:
|
|
1401
|
+
if sentinel_hit:
|
|
1402
|
+
return "converged"
|
|
1403
|
+
if role == "planner":
|
|
1404
|
+
return "planning"
|
|
1405
|
+
if role == "coder":
|
|
1406
|
+
return "implementing"
|
|
1407
|
+
if role in {"reviewer", "triage-reviewer"}:
|
|
1408
|
+
return "reviewing"
|
|
1409
|
+
return "unknown"
|
|
1410
|
+
|
|
1411
|
+
|
|
1412
|
+
def _derive_recap_heuristic(text: str) -> str:
|
|
1413
|
+
for line in text.splitlines():
|
|
1414
|
+
s = line.strip()
|
|
1415
|
+
if not s or re.match(r"^(RECAP|FILES|STATUS):", s):
|
|
1416
|
+
continue
|
|
1417
|
+
s = re.sub(r"^\s*[-*#>\d.)]+\s*", "", s).strip()
|
|
1418
|
+
if s:
|
|
1419
|
+
return textwrap.shorten(s, width=140, placeholder="...")
|
|
1420
|
+
return "No concise summary available."
|
|
1421
|
+
|
|
1422
|
+
|
|
1423
|
+
def _format_byte_size(byte_size: int) -> str:
|
|
1424
|
+
if byte_size < 1024:
|
|
1425
|
+
return f"{byte_size}B"
|
|
1426
|
+
return f"{byte_size / 1024:.1f}KB"
|
|
1427
|
+
|
|
1428
|
+
|
|
1429
|
+
def _recap_field(parsed: dict[str, Optional[str]],
|
|
1430
|
+
fallbacks: dict[str, str], key: str) -> str:
|
|
1431
|
+
value = parsed.get(key)
|
|
1432
|
+
if value:
|
|
1433
|
+
return value
|
|
1434
|
+
return f"· {fallbacks.get(key, 'unknown')}"
|
|
1435
|
+
|
|
1436
|
+
|
|
1437
|
+
def format_recap_block(turn_no: int, agent_name: str, role: str,
|
|
1438
|
+
elapsed_s: float, byte_size: int, line_count: int,
|
|
1439
|
+
parsed: dict[str, Optional[str]],
|
|
1440
|
+
fallbacks: dict[str, str],
|
|
1441
|
+
sentinel_hit: bool) -> str:
|
|
1442
|
+
if not sentinel_hit and parsed.get("status") == "converged":
|
|
1443
|
+
parsed = dict(parsed)
|
|
1444
|
+
parsed["status"] = None
|
|
1445
|
+
recap = _recap_field(parsed, fallbacks, "recap")
|
|
1446
|
+
files = _recap_field(parsed, fallbacks, "files")
|
|
1447
|
+
status = _recap_field(parsed, fallbacks, "status")
|
|
1448
|
+
convergence_label = "yes" if sentinel_hit else "no"
|
|
1449
|
+
return (
|
|
1450
|
+
f"## Turn {turn_no:02d} | {agent_name} ({role}) · "
|
|
1451
|
+
f"{int(round(elapsed_s))}s · {_format_byte_size(byte_size)} · "
|
|
1452
|
+
f"{line_count} lines\n\n"
|
|
1453
|
+
f"RECAP: {recap}\n"
|
|
1454
|
+
f"FILES: {files}\n"
|
|
1455
|
+
f"STATUS: {status} · convergence: {convergence_label}\n\n"
|
|
1456
|
+
)
|
|
1457
|
+
|
|
1458
|
+
|
|
1459
|
+
def _format_live_recap_block(recap_block: str) -> str:
|
|
1460
|
+
lines = recap_block.strip("\n").splitlines()
|
|
1461
|
+
if lines and lines[0].startswith("## "):
|
|
1462
|
+
lines[0] = lines[0][3:]
|
|
1463
|
+
if len(lines) > 1 and lines[1] == "":
|
|
1464
|
+
del lines[1]
|
|
1465
|
+
return "\n".join(lines) + "\n"
|
|
1466
|
+
|
|
1467
|
+
|
|
1468
|
+
def _start_recap_inflight(turn_no: int, agent_name: str, role: str,
|
|
1469
|
+
started_at: float) -> tuple[threading.Event, threading.Thread]:
|
|
1470
|
+
stop_event = threading.Event()
|
|
1471
|
+
|
|
1472
|
+
def redraw() -> None:
|
|
1473
|
+
while not stop_event.is_set():
|
|
1474
|
+
elapsed = int(time.time() - started_at)
|
|
1475
|
+
sys.stdout.write(
|
|
1476
|
+
f"\rTurn {turn_no:02d} | {agent_name} ({role}) · "
|
|
1477
|
+
f"running [{elapsed // 60:02d}:{elapsed % 60:02d}]\033[K"
|
|
1478
|
+
)
|
|
1479
|
+
sys.stdout.flush()
|
|
1480
|
+
stop_event.wait(1)
|
|
1481
|
+
|
|
1482
|
+
t = threading.Thread(target=redraw, daemon=True)
|
|
1483
|
+
t.start()
|
|
1484
|
+
return stop_event, t
|
|
1485
|
+
|
|
1486
|
+
|
|
1487
|
+
def _stop_recap_inflight(stop_event: threading.Event,
|
|
1488
|
+
thread: threading.Thread) -> None:
|
|
1489
|
+
stop_event.set()
|
|
1490
|
+
thread.join(timeout=2)
|
|
1491
|
+
sys.stdout.write("\r\033[K")
|
|
1492
|
+
sys.stdout.flush()
|
|
1493
|
+
|
|
1494
|
+
|
|
1495
|
+
def derive_seed(cfg: DuetConfig, run_dir: Optional[pathlib.Path] = None) -> str:
|
|
1496
|
+
"""Figure out the first message to send to the partner agent."""
|
|
1497
|
+
if cfg.kickoff:
|
|
1498
|
+
return cfg.kickoff
|
|
1499
|
+
# If agent[0] has a session_id, ask it to dump its latest plan/message.
|
|
1500
|
+
a0 = cfg.agents[0]
|
|
1501
|
+
if a0.session_id:
|
|
1502
|
+
print(f"[duet] extracting latest message from {a0.backend} session "
|
|
1503
|
+
f"{a0.session_id[:8]}…")
|
|
1504
|
+
if a0.backend == "claude" and run_dir is not None:
|
|
1505
|
+
print(f"[duet] `claude -p` is silent on stderr during the API "
|
|
1506
|
+
f"call; expect 30–120s.")
|
|
1507
|
+
print(f"[duet] from another terminal: "
|
|
1508
|
+
f"duet --status {run_dir.name}")
|
|
1509
|
+
return call_agent(a0, EXTRACT_LATEST_PROMPT, cfg,
|
|
1510
|
+
first_turn_for_agent=False,
|
|
1511
|
+
run_dir=run_dir, turn_label="00-extract")
|
|
1512
|
+
if cfg.task:
|
|
1513
|
+
return cfg.task
|
|
1514
|
+
raise SystemExit("nothing to start the conversation with — supply --task, "
|
|
1515
|
+
"--kickoff, or --resume-claude <session_id>")
|
|
1516
|
+
|
|
1517
|
+
|
|
1518
|
+
def _setup_run_worktree(
|
|
1519
|
+
cfg: DuetConfig, run_id: str, run_dir: pathlib.Path,
|
|
1520
|
+
) -> tuple[Optional[pathlib.Path], Optional[str]]:
|
|
1521
|
+
"""Resolve this run's optional git worktree and return (path, branch).
|
|
1522
|
+
|
|
1523
|
+
Honors `--worktree-path` (reuse an existing tree) and `--worktree` (create
|
|
1524
|
+
a fresh `duet/<run_id>` branch), points the selected agent's `cwd_override`
|
|
1525
|
+
at it as a side effect, and returns (None, ...) when no worktree applies or
|
|
1526
|
+
setup fails — duet then runs same-repo. A failed *create* still reports the
|
|
1527
|
+
intended branch name, matching the pre-extraction behavior.
|
|
1528
|
+
"""
|
|
1529
|
+
wt_idx = {"lead": 0, "partner": 1}.get(cfg.worktree_for, 1)
|
|
1530
|
+
if cfg.worktree_path:
|
|
1531
|
+
existing = pathlib.Path(cfg.worktree_path).expanduser().resolve()
|
|
1532
|
+
if not existing.is_dir():
|
|
1533
|
+
print(f"[duet] WARNING: --worktree-path {existing} doesn't exist. "
|
|
1534
|
+
f"Falling back to same-repo mode.", file=sys.stderr)
|
|
1535
|
+
return None, None
|
|
1536
|
+
# Recover the branch name for logging/state; failure is non-fatal.
|
|
1537
|
+
try:
|
|
1538
|
+
r = subprocess.run(
|
|
1539
|
+
["git", "-C", str(existing), "rev-parse", "--abbrev-ref", "HEAD"],
|
|
1540
|
+
capture_output=True, text=True, timeout=5,
|
|
1541
|
+
)
|
|
1542
|
+
wt_branch = r.stdout.strip() if r.returncode == 0 else None
|
|
1543
|
+
except Exception:
|
|
1544
|
+
wt_branch = None
|
|
1545
|
+
cfg.agents[wt_idx].cwd_override = existing
|
|
1546
|
+
print(f"[duet] reusing worktree: {existing} (branch {wt_branch}, "
|
|
1547
|
+
f"agent {cfg.agents[wt_idx].name})")
|
|
1548
|
+
return existing, wt_branch
|
|
1549
|
+
|
|
1550
|
+
if cfg.worktree:
|
|
1551
|
+
if not is_git_repo(cfg.cwd):
|
|
1552
|
+
print(f"[duet] WARNING: --worktree requested but {cfg.cwd} is not a "
|
|
1553
|
+
f"git repo. Falling back to same-repo mode.", file=sys.stderr)
|
|
1554
|
+
return None, None
|
|
1555
|
+
wt_branch = f"duet/{run_id}"
|
|
1556
|
+
# Default lives next to the transcript/state in run_dir/wt; --worktree-root
|
|
1557
|
+
# overrides to e.g. ~/duet-worktrees, namespaced by run_id so parallel
|
|
1558
|
+
# runs never collide.
|
|
1559
|
+
wt_dest = cfg.worktree_root / run_id if cfg.worktree_root else run_dir / "wt"
|
|
1560
|
+
try:
|
|
1561
|
+
wt_path = setup_worktree(cfg.cwd, wt_branch, wt_dest)
|
|
1562
|
+
except Exception as e:
|
|
1563
|
+
print(f"[duet] WARNING: worktree setup failed: {e}. "
|
|
1564
|
+
f"Continuing without.", file=sys.stderr)
|
|
1565
|
+
return None, wt_branch
|
|
1566
|
+
cfg.agents[wt_idx].cwd_override = wt_path
|
|
1567
|
+
print(f"[duet] worktree: {wt_path} (branch {wt_branch}, "
|
|
1568
|
+
f"agent {cfg.agents[wt_idx].name})")
|
|
1569
|
+
return wt_path, wt_branch
|
|
1570
|
+
|
|
1571
|
+
return None, None
|
|
1572
|
+
|
|
1573
|
+
|
|
1574
|
+
def _build_run_state(cfg: DuetConfig, *, turns_used: int, history: list,
|
|
1575
|
+
finished_reason: Optional[str],
|
|
1576
|
+
transcript_path: pathlib.Path,
|
|
1577
|
+
recap_path: pathlib.Path,
|
|
1578
|
+
last_verify: Optional[dict] = None,
|
|
1579
|
+
wt_path: Optional[pathlib.Path] = None,
|
|
1580
|
+
wt_branch: Optional[str] = None) -> dict:
|
|
1581
|
+
"""Assemble the run's state.json payload.
|
|
1582
|
+
|
|
1583
|
+
Single source of truth for every state write in `run_duet` — the early
|
|
1584
|
+
dry-run/force-stop/seed-failure exits, the per-turn rolling write, and the
|
|
1585
|
+
final write. Centralizing it keeps `duet_pid` and the worktree/continue
|
|
1586
|
+
keys (which `--status` and `--continue` depend on surviving a mid-turn
|
|
1587
|
+
crash) on every payload; a missing key here would regress both.
|
|
1588
|
+
"""
|
|
1589
|
+
state = {
|
|
1590
|
+
"task": cfg.task,
|
|
1591
|
+
"cwd": str(cfg.cwd),
|
|
1592
|
+
"turns_used": turns_used,
|
|
1593
|
+
"agents": [agent_state(a) for a in cfg.agents],
|
|
1594
|
+
"history": history,
|
|
1595
|
+
"finished_reason": finished_reason,
|
|
1596
|
+
"transcript_path": str(transcript_path),
|
|
1597
|
+
"verify_cmd": cfg.verify_cmd,
|
|
1598
|
+
"last_verify": last_verify,
|
|
1599
|
+
"worktree": str(wt_path) if wt_path else None,
|
|
1600
|
+
"worktree_branch": wt_branch,
|
|
1601
|
+
"worktree_for": cfg.worktree_for,
|
|
1602
|
+
"continue_from": cfg.continue_from,
|
|
1603
|
+
"duet_pid": os.getpid(),
|
|
1604
|
+
}
|
|
1605
|
+
if cfg.recap:
|
|
1606
|
+
state["recap_path"] = str(recap_path)
|
|
1607
|
+
return state
|
|
1608
|
+
|
|
1609
|
+
|
|
1610
|
+
@dataclasses.dataclass
|
|
1611
|
+
class _TurnResult:
|
|
1612
|
+
"""Outcome of one agent turn, consumed by `run_duet`'s loop control."""
|
|
1613
|
+
reply: str
|
|
1614
|
+
convergence_hit: bool
|
|
1615
|
+
failure_reason: Optional[str]
|
|
1616
|
+
last_verify_state: Optional[dict]
|
|
1617
|
+
recap_block: Optional[str]
|
|
1618
|
+
|
|
1619
|
+
|
|
1620
|
+
def _execute_turn(cfg: DuetConfig, *, turn: int, speaker: Agent, last_msg: str,
|
|
1621
|
+
run_dir: pathlib.Path, transcript_path: pathlib.Path,
|
|
1622
|
+
recap_path: pathlib.Path, state_path: pathlib.Path,
|
|
1623
|
+
history: list, seen_first_turn: dict,
|
|
1624
|
+
wt_path: Optional[pathlib.Path], wt_branch: Optional[str],
|
|
1625
|
+
last_verify_state: Optional[dict],
|
|
1626
|
+
log: Callable[..., None]) -> _TurnResult:
|
|
1627
|
+
"""Run a single agent turn: invoke, verify, recap, persist transcript+state.
|
|
1628
|
+
|
|
1629
|
+
Mutates `history` (appends this turn's entry) and `seen_first_turn` in place
|
|
1630
|
+
and rewrites `state.json`; returns the outcome `run_duet` needs to decide
|
|
1631
|
+
whether to stop or rotate. Stop-flag and speaker-rotation checks stay in the
|
|
1632
|
+
caller — this handles only the mechanics of one turn.
|
|
1633
|
+
"""
|
|
1634
|
+
first_turn_for_agent = not seen_first_turn[speaker.name]
|
|
1635
|
+
guard_codex_shared_cwd_before_call(cfg, speaker, first_turn_for_agent)
|
|
1636
|
+
t0 = time.time()
|
|
1637
|
+
inflight: Optional[tuple[threading.Event, threading.Thread]] = None
|
|
1638
|
+
if cfg.recap:
|
|
1639
|
+
inflight = _start_recap_inflight(turn, speaker.name, speaker.role, t0)
|
|
1640
|
+
else:
|
|
1641
|
+
# Print BEFORE the subprocess starts so the terminal user sees something
|
|
1642
|
+
# happen instantly. claude -p emits nothing on stderr during its API
|
|
1643
|
+
# call; without this banner the user thinks duet hung.
|
|
1644
|
+
print(f"\n--- Turn {turn} :: {speaker.name} ({speaker.backend}/{speaker.role}) "
|
|
1645
|
+
f"[started {dt.datetime.now().strftime('%H:%M:%S')}] ---")
|
|
1646
|
+
sys.stdout.flush()
|
|
1647
|
+
call_succeeded = False
|
|
1648
|
+
failure_reason: Optional[str] = None
|
|
1649
|
+
failure_message: Optional[str] = None
|
|
1650
|
+
try:
|
|
1651
|
+
reply = call_agent(speaker, last_msg, cfg,
|
|
1652
|
+
first_turn_for_agent=first_turn_for_agent,
|
|
1653
|
+
run_dir=run_dir, turn_label=f"{turn:02d}")
|
|
1654
|
+
call_succeeded = True
|
|
1655
|
+
except Exception as e:
|
|
1656
|
+
failure_reason = _agent_finished_reason(e)
|
|
1657
|
+
failure_message = str(e)
|
|
1658
|
+
if cfg.recap and inflight is not None:
|
|
1659
|
+
_stop_recap_inflight(*inflight)
|
|
1660
|
+
inflight = None
|
|
1661
|
+
elapsed = time.time() - t0
|
|
1662
|
+
print(f"Turn {turn:02d} | {speaker.name} ({speaker.role}) · "
|
|
1663
|
+
f"ERROR after {int(round(elapsed))}s — "
|
|
1664
|
+
f"see turn-{turn:02d}-{speaker.name}.stderr.log")
|
|
1665
|
+
reply = _agent_failure_block(failure_reason, e, f"{turn:02d}", speaker, run_dir)
|
|
1666
|
+
if cfg.recap and inflight is not None:
|
|
1667
|
+
_stop_recap_inflight(*inflight)
|
|
1668
|
+
if call_succeeded:
|
|
1669
|
+
guard_codex_shared_cwd_after_call(cfg, speaker, first_turn_for_agent)
|
|
1670
|
+
seen_first_turn[speaker.name] = True
|
|
1671
|
+
elapsed = time.time() - t0
|
|
1672
|
+
raw_reply = reply
|
|
1673
|
+
convergence_hit = convergence_proposed(raw_reply, cfg.sentinel)
|
|
1674
|
+
verify_state: Optional[dict] = None
|
|
1675
|
+
if convergence_hit and cfg.verify_cmd and not cfg.dry_run:
|
|
1676
|
+
verify_result = run_verify_command(cfg, run_dir, f"{turn:02d}", wt_path)
|
|
1677
|
+
verify_state = verify_result_state(verify_result)
|
|
1678
|
+
last_verify_state = verify_state
|
|
1679
|
+
if verify_result.ok:
|
|
1680
|
+
reply = raw_reply + "\n\n" + format_verify_success_block(verify_result)
|
|
1681
|
+
else:
|
|
1682
|
+
reply = raw_reply + "\n\n" + format_verify_failure_block(verify_result)
|
|
1683
|
+
convergence_hit = False
|
|
1684
|
+
|
|
1685
|
+
recap_block: Optional[str] = None
|
|
1686
|
+
if cfg.recap:
|
|
1687
|
+
parsed = parse_recap_headers(raw_reply)
|
|
1688
|
+
files = extract_files_heuristic(raw_reply)
|
|
1689
|
+
fallbacks = {
|
|
1690
|
+
"recap": _derive_recap_heuristic(raw_reply),
|
|
1691
|
+
"files": ", ".join(files) if files else "none",
|
|
1692
|
+
"status": derive_status_heuristic(speaker.role, convergence_hit),
|
|
1693
|
+
}
|
|
1694
|
+
recap_block = format_recap_block(
|
|
1695
|
+
turn, speaker.name, speaker.role, elapsed,
|
|
1696
|
+
len(raw_reply.encode("utf-8")), raw_reply.count("\n") + 1,
|
|
1697
|
+
parsed, fallbacks, convergence_hit,
|
|
1698
|
+
)
|
|
1699
|
+
append_text_atomic(recap_path, recap_block)
|
|
1700
|
+
|
|
1701
|
+
if wt_path is not None and speaker.cwd_override == wt_path:
|
|
1702
|
+
reply = append_worktree_diff(reply, wt_path, wt_branch)
|
|
1703
|
+
|
|
1704
|
+
log(speaker.name, speaker.role, reply)
|
|
1705
|
+
history_entry = {"turn": turn, "agent": speaker.name, "elapsed_s": elapsed,
|
|
1706
|
+
"len_chars": len(reply), "session_id": speaker.session_id}
|
|
1707
|
+
if failure_reason is not None:
|
|
1708
|
+
history_entry["finished_reason"] = failure_reason
|
|
1709
|
+
history_entry["error"] = failure_message
|
|
1710
|
+
history_entry["stderr_log_path"] = str(
|
|
1711
|
+
run_dir / f"turn-{turn:02d}-{speaker.name}.stderr.log")
|
|
1712
|
+
if verify_state is not None:
|
|
1713
|
+
history_entry["verify"] = verify_state
|
|
1714
|
+
history.append(history_entry)
|
|
1715
|
+
turn_state = _build_run_state(
|
|
1716
|
+
cfg, turns_used=turn, history=history, finished_reason=failure_reason,
|
|
1717
|
+
transcript_path=transcript_path, recap_path=recap_path,
|
|
1718
|
+
last_verify=last_verify_state, wt_path=wt_path, wt_branch=wt_branch,
|
|
1719
|
+
)
|
|
1720
|
+
write_text_atomic(state_path, json.dumps(turn_state, indent=2))
|
|
1721
|
+
return _TurnResult(reply, convergence_hit, failure_reason,
|
|
1722
|
+
last_verify_state, recap_block)
|
|
1723
|
+
|
|
1724
|
+
|
|
1725
|
+
def _allocate_run_dir(cfg: DuetConfig) -> tuple[pathlib.Path, str]:
|
|
1726
|
+
"""Create the runs dir and a unique timestamped run dir under it.
|
|
1727
|
+
|
|
1728
|
+
Falls back to ~/.duet/runs/<cwd-slug> when the configured runs dir is
|
|
1729
|
+
unwritable (mutating cfg.runs_dir), writes the auto-.gitignore once, and
|
|
1730
|
+
registers the run in the home index. Returns (run_dir, run_id).
|
|
1731
|
+
"""
|
|
1732
|
+
try:
|
|
1733
|
+
cfg.runs_dir.mkdir(parents=True, exist_ok=True)
|
|
1734
|
+
except (OSError, PermissionError) as e:
|
|
1735
|
+
slug = re.sub(r"[^a-zA-Z0-9._-]+", "-", str(cfg.cwd)).strip("-")[:80]
|
|
1736
|
+
fallback = pathlib.Path.home() / ".duet" / "runs" / slug
|
|
1737
|
+
print(f"[duet] cannot create runs dir {cfg.runs_dir}: {e}; "
|
|
1738
|
+
f"falling back to {fallback}", file=sys.stderr)
|
|
1739
|
+
fallback.mkdir(parents=True, exist_ok=True)
|
|
1740
|
+
cfg.runs_dir = fallback
|
|
1741
|
+
# Auto-ignore everything duet writes (transcripts, state, worktrees) from
|
|
1742
|
+
# the host repo's POV. Idempotent — only written once per runs_dir.
|
|
1743
|
+
gi = cfg.runs_dir / ".gitignore"
|
|
1744
|
+
if not gi.exists():
|
|
1745
|
+
write_text_atomic(gi, "# auto-created by duet — ignores all run artifacts\n"
|
|
1746
|
+
"# (transcripts, state.json, worktrees) so they don't\n"
|
|
1747
|
+
"# pollute the host repo. Safe to delete or edit.\n*\n")
|
|
1748
|
+
base_run_id = dt.datetime.now().strftime("%Y%m%d-%H%M%S")
|
|
1749
|
+
for n in range(100):
|
|
1750
|
+
run_id = base_run_id if n == 0 else f"{base_run_id}-{n:02d}"
|
|
1751
|
+
run_dir = cfg.runs_dir / run_id
|
|
1752
|
+
try:
|
|
1753
|
+
run_dir.mkdir()
|
|
1754
|
+
break
|
|
1755
|
+
except FileExistsError:
|
|
1756
|
+
continue
|
|
1757
|
+
else:
|
|
1758
|
+
raise SystemExit(f"could not allocate a unique run dir under {cfg.runs_dir}")
|
|
1759
|
+
# Best-effort home-index symlink so `duet --list` / `--status <bare-id>`
|
|
1760
|
+
# find this run from any cwd; never fails the run.
|
|
1761
|
+
_register_run_in_home_index(run_dir, cfg.cwd)
|
|
1762
|
+
return run_dir, run_id
|
|
1763
|
+
|
|
1764
|
+
|
|
1765
|
+
def _dry_run_recap_state(cfg: DuetConfig, transcript_path: pathlib.Path,
|
|
1766
|
+
recap_path: pathlib.Path, state_path: pathlib.Path,
|
|
1767
|
+
history: list) -> dict:
|
|
1768
|
+
"""Write the empty-transcript dry-run state for `--dry-run --recap` and
|
|
1769
|
+
return it. The non-recap dry run still flows through the normal loop (whose
|
|
1770
|
+
agent calls are stubbed); only the recap variant short-circuits here."""
|
|
1771
|
+
if not (cfg.task or cfg.kickoff or cfg.agents[0].session_id):
|
|
1772
|
+
raise SystemExit("nothing to start the conversation with — supply --task, "
|
|
1773
|
+
"--kickoff, or --resume-claude <session_id>")
|
|
1774
|
+
write_text_atomic(transcript_path, "")
|
|
1775
|
+
state = _build_run_state(
|
|
1776
|
+
cfg, turns_used=0, history=history, finished_reason="dry_run",
|
|
1777
|
+
transcript_path=transcript_path, recap_path=recap_path,
|
|
1778
|
+
)
|
|
1779
|
+
write_text_atomic(state_path, json.dumps(state, indent=2))
|
|
1780
|
+
print("[duet] dry-run: agents not called; no recap turn blocks written.")
|
|
1781
|
+
print(f"[duet] done. reason=dry_run. transcript: {transcript_path}")
|
|
1782
|
+
print(f"[duet] recap: {recap_path}")
|
|
1783
|
+
return state
|
|
1784
|
+
|
|
1785
|
+
|
|
1786
|
+
def _derive_seed_or_failure(
|
|
1787
|
+
cfg: DuetConfig, *, run_dir: pathlib.Path, transcript_path: pathlib.Path,
|
|
1788
|
+
recap_path: pathlib.Path, state_path: pathlib.Path, history: list,
|
|
1789
|
+
wt_path: Optional[pathlib.Path], wt_branch: Optional[str],
|
|
1790
|
+
log: Callable[..., None],
|
|
1791
|
+
) -> tuple[Optional[str], Optional[dict]]:
|
|
1792
|
+
"""Extract the opening seed message from the lead agent.
|
|
1793
|
+
|
|
1794
|
+
Returns (seed, None) on success. If the lead's extraction call fails, logs
|
|
1795
|
+
the failure block, records a turn-0 history entry, writes the final state,
|
|
1796
|
+
prints the done banner, and returns (None, state) so `run_duet` can return
|
|
1797
|
+
immediately.
|
|
1798
|
+
"""
|
|
1799
|
+
seed_t0 = time.time()
|
|
1800
|
+
try:
|
|
1801
|
+
if not cfg.kickoff and cfg.agents[0].session_id:
|
|
1802
|
+
guard_codex_shared_cwd_before_call(
|
|
1803
|
+
cfg, cfg.agents[0], first_turn_for_agent=False
|
|
1804
|
+
)
|
|
1805
|
+
return derive_seed(cfg, run_dir=run_dir), None
|
|
1806
|
+
except Exception as e:
|
|
1807
|
+
failure_reason = _agent_finished_reason(e)
|
|
1808
|
+
seed_agent = cfg.agents[0]
|
|
1809
|
+
reply = _agent_failure_block(
|
|
1810
|
+
failure_reason, e, "00-extract", seed_agent, run_dir
|
|
1811
|
+
)
|
|
1812
|
+
log(seed_agent.name, seed_agent.role, reply, kind="agent_error")
|
|
1813
|
+
history.append({
|
|
1814
|
+
"turn": 0,
|
|
1815
|
+
"agent": seed_agent.name,
|
|
1816
|
+
"kind": "seed_extract",
|
|
1817
|
+
"elapsed_s": time.time() - seed_t0,
|
|
1818
|
+
"len_chars": len(reply),
|
|
1819
|
+
"session_id": seed_agent.session_id,
|
|
1820
|
+
"finished_reason": failure_reason,
|
|
1821
|
+
"error": str(e),
|
|
1822
|
+
"stderr_log_path": str(
|
|
1823
|
+
run_dir / f"turn-00-extract-{seed_agent.name}.stderr.log"
|
|
1824
|
+
),
|
|
1825
|
+
})
|
|
1826
|
+
state = _build_run_state(
|
|
1827
|
+
cfg, turns_used=0, history=history, finished_reason=failure_reason,
|
|
1828
|
+
transcript_path=transcript_path, recap_path=recap_path,
|
|
1829
|
+
wt_path=wt_path, wt_branch=wt_branch,
|
|
1830
|
+
)
|
|
1831
|
+
write_text_atomic(state_path, json.dumps(state, indent=2))
|
|
1832
|
+
print(reply)
|
|
1833
|
+
print(f"\n[duet] done. reason={failure_reason}. transcript: {transcript_path}")
|
|
1834
|
+
if cfg.recap:
|
|
1835
|
+
print(f"[duet] recap: {recap_path}")
|
|
1836
|
+
return None, state
|
|
1837
|
+
|
|
1838
|
+
|
|
1839
|
+
def run_duet(cfg: DuetConfig) -> dict:
|
|
1840
|
+
global RECAP_MODE
|
|
1841
|
+
RECAP_MODE = cfg.recap
|
|
1842
|
+
validate_config(cfg)
|
|
1843
|
+
|
|
1844
|
+
run_dir, run_id = _allocate_run_dir(cfg)
|
|
1845
|
+
transcript_path = run_dir / "transcript.md"
|
|
1846
|
+
recap_path = run_dir / "recap.md"
|
|
1847
|
+
state_path = run_dir / "state.json"
|
|
1848
|
+
|
|
1849
|
+
if cfg.recap:
|
|
1850
|
+
append_text_atomic(
|
|
1851
|
+
recap_path,
|
|
1852
|
+
f"# duet recap — {run_dir}\n\n"
|
|
1853
|
+
f"run dir: {run_dir}\n"
|
|
1854
|
+
f"mode: recap (live)\n"
|
|
1855
|
+
f"transcript: {transcript_path}\n\n",
|
|
1856
|
+
)
|
|
1857
|
+
|
|
1858
|
+
stop = StopFlag()
|
|
1859
|
+
_install_sigint(stop)
|
|
1860
|
+
|
|
1861
|
+
# Tracks whether an agent has resume context or has actually been invoked.
|
|
1862
|
+
# A plain task/kickoff seed logged as agent[0] is not a CLI invocation.
|
|
1863
|
+
seen_first_turn = {a.name: bool(a.session_id) for a in cfg.agents}
|
|
1864
|
+
history: list[dict] = []
|
|
1865
|
+
transcript = ""
|
|
1866
|
+
|
|
1867
|
+
def log(speaker: str, role: str, text: str, kind: str = "agent") -> None:
|
|
1868
|
+
nonlocal transcript
|
|
1869
|
+
head = f"\n## {speaker} ({role}) — {kind}\n\n"
|
|
1870
|
+
transcript += head + text + "\n"
|
|
1871
|
+
write_text_atomic(transcript_path, transcript)
|
|
1872
|
+
|
|
1873
|
+
if cfg.recap:
|
|
1874
|
+
print(f"[duet] run: {run_dir}")
|
|
1875
|
+
print("[duet] mode: recap (live)")
|
|
1876
|
+
print(f"[duet] transcript: {transcript_path}")
|
|
1877
|
+
print(f"[duet] recap: {recap_path}")
|
|
1878
|
+
else:
|
|
1879
|
+
print(f"[duet] run dir: {run_dir}")
|
|
1880
|
+
if cfg.verify_cmd:
|
|
1881
|
+
print(f"[duet] verify cmd: {cfg.verify_cmd}")
|
|
1882
|
+
if cfg.agents[0].session_id:
|
|
1883
|
+
print(f"[duet] {cfg.agents[0].name} resumes session {cfg.agents[0].session_id}")
|
|
1884
|
+
|
|
1885
|
+
if cfg.dry_run and cfg.recap:
|
|
1886
|
+
return _dry_run_recap_state(
|
|
1887
|
+
cfg, transcript_path, recap_path, state_path, history)
|
|
1888
|
+
|
|
1889
|
+
wt_path, wt_branch = _setup_run_worktree(cfg, run_id, run_dir)
|
|
1890
|
+
|
|
1891
|
+
if stop.requested:
|
|
1892
|
+
state = _build_run_state(
|
|
1893
|
+
cfg, turns_used=0, history=history,
|
|
1894
|
+
finished_reason=FINISHED_FORCE_STOP,
|
|
1895
|
+
transcript_path=transcript_path, recap_path=recap_path,
|
|
1896
|
+
wt_path=wt_path, wt_branch=wt_branch,
|
|
1897
|
+
)
|
|
1898
|
+
write_text_atomic(state_path, json.dumps(state, indent=2))
|
|
1899
|
+
return state
|
|
1900
|
+
|
|
1901
|
+
seed, seed_failure_state = _derive_seed_or_failure(
|
|
1902
|
+
cfg, run_dir=run_dir, transcript_path=transcript_path,
|
|
1903
|
+
recap_path=recap_path, state_path=state_path, history=history,
|
|
1904
|
+
wt_path=wt_path, wt_branch=wt_branch, log=log,
|
|
1905
|
+
)
|
|
1906
|
+
if seed_failure_state is not None:
|
|
1907
|
+
return seed_failure_state
|
|
1908
|
+
log(cfg.agents[0].name, cfg.agents[0].role, seed, kind="seed")
|
|
1909
|
+
last_msg = seed
|
|
1910
|
+
|
|
1911
|
+
# Partner (agent[1]) normally speaks first in the loop, replying to the seed.
|
|
1912
|
+
# `--continue` may set this to the other agent so the next speaker matches
|
|
1913
|
+
# the previous run's last completed turn.
|
|
1914
|
+
speaker_idx = cfg.start_speaker_idx
|
|
1915
|
+
finished_reason = FINISHED_MAX_TURNS
|
|
1916
|
+
previous_convergence_proposal = False
|
|
1917
|
+
last_verify_state: Optional[dict] = None
|
|
1918
|
+
|
|
1919
|
+
for turn in range(1, cfg.max_turns + 1):
|
|
1920
|
+
if stop.requested:
|
|
1921
|
+
finished_reason = FINISHED_FORCE_STOP
|
|
1922
|
+
break
|
|
1923
|
+
speaker = cfg.agents[speaker_idx]
|
|
1924
|
+
result = _execute_turn(
|
|
1925
|
+
cfg, turn=turn, speaker=speaker, last_msg=last_msg,
|
|
1926
|
+
run_dir=run_dir, transcript_path=transcript_path,
|
|
1927
|
+
recap_path=recap_path, state_path=state_path,
|
|
1928
|
+
history=history, seen_first_turn=seen_first_turn,
|
|
1929
|
+
wt_path=wt_path, wt_branch=wt_branch,
|
|
1930
|
+
last_verify_state=last_verify_state, log=log,
|
|
1931
|
+
)
|
|
1932
|
+
last_verify_state = result.last_verify_state
|
|
1933
|
+
if cfg.recap:
|
|
1934
|
+
print(_format_live_recap_block(result.recap_block), end="")
|
|
1935
|
+
else:
|
|
1936
|
+
print(result.reply)
|
|
1937
|
+
|
|
1938
|
+
if result.failure_reason is not None:
|
|
1939
|
+
finished_reason = result.failure_reason
|
|
1940
|
+
break
|
|
1941
|
+
if result.convergence_hit and previous_convergence_proposal:
|
|
1942
|
+
finished_reason = FINISHED_CONVERGED
|
|
1943
|
+
break
|
|
1944
|
+
if stop.requested:
|
|
1945
|
+
finished_reason = FINISHED_FORCE_STOP
|
|
1946
|
+
break
|
|
1947
|
+
|
|
1948
|
+
last_msg = result.reply
|
|
1949
|
+
previous_convergence_proposal = result.convergence_hit
|
|
1950
|
+
speaker_idx = 1 - speaker_idx
|
|
1951
|
+
else:
|
|
1952
|
+
finished_reason = FINISHED_MAX_TURNS
|
|
1953
|
+
|
|
1954
|
+
forced_verify_state = None
|
|
1955
|
+
if finished_reason not in (FINISHED_TIMEOUT, FINISHED_AGENT_ERROR):
|
|
1956
|
+
finished_reason, forced_verify_state = ask_force(
|
|
1957
|
+
cfg, history, transcript_path, state_path,
|
|
1958
|
+
last_msg, speaker_idx, seen_first_turn,
|
|
1959
|
+
finished_reason, wt_path, wt_branch
|
|
1960
|
+
)
|
|
1961
|
+
if forced_verify_state is not None:
|
|
1962
|
+
last_verify_state = forced_verify_state
|
|
1963
|
+
|
|
1964
|
+
state = _build_run_state(
|
|
1965
|
+
cfg, turns_used=len(history), history=history,
|
|
1966
|
+
finished_reason=finished_reason,
|
|
1967
|
+
transcript_path=transcript_path, recap_path=recap_path,
|
|
1968
|
+
last_verify=last_verify_state, wt_path=wt_path, wt_branch=wt_branch,
|
|
1969
|
+
)
|
|
1970
|
+
write_text_atomic(state_path, json.dumps(state, indent=2))
|
|
1971
|
+
print(f"\n[duet] done. reason={finished_reason}. transcript: {transcript_path}")
|
|
1972
|
+
if cfg.recap:
|
|
1973
|
+
print(f"[duet] recap: {recap_path}")
|
|
1974
|
+
print(f"[duet] resumable session ids — "
|
|
1975
|
+
+ ", ".join(f"{a.name}={a.session_id}" for a in cfg.agents if a.session_id))
|
|
1976
|
+
if wt_path:
|
|
1977
|
+
print(f"[duet] worktree left intact at {wt_path} (branch {wt_branch}).\n"
|
|
1978
|
+
f" merge: git -C {cfg.cwd} merge {wt_branch}\n"
|
|
1979
|
+
f" review: git -C {wt_path} diff HEAD\n"
|
|
1980
|
+
f" drop: git -C {cfg.cwd} worktree remove {wt_path} && "
|
|
1981
|
+
f"git -C {cfg.cwd} branch -D {wt_branch}")
|
|
1982
|
+
return state
|
|
1983
|
+
|
|
1984
|
+
|
|
1985
|
+
def _run_forced_turn(cfg: DuetConfig, *, forced_turn: int, next_speaker: Agent,
|
|
1986
|
+
forced_msg: str, first_turn_for_agent: bool,
|
|
1987
|
+
transcript_path: pathlib.Path,
|
|
1988
|
+
wt_path: Optional[pathlib.Path], wt_branch: Optional[str],
|
|
1989
|
+
history: list, seen_first_turn: dict,
|
|
1990
|
+
last_verify_state: Optional[dict]) -> _TurnResult:
|
|
1991
|
+
"""Run one human-forced continuation turn (the body of `ask_force`'s loop).
|
|
1992
|
+
|
|
1993
|
+
Mirrors `_execute_turn` but with the `-forced` turn labels, a "forced"
|
|
1994
|
+
history flag, and the recap.md path derived from the transcript dir.
|
|
1995
|
+
Mutates `history`/`seen_first_turn`, prints the reply/recap, and returns the
|
|
1996
|
+
outcome `ask_force` needs to decide whether to keep prompting.
|
|
1997
|
+
"""
|
|
1998
|
+
run_dir = transcript_path.parent
|
|
1999
|
+
label = f"{forced_turn:02d}-forced"
|
|
2000
|
+
t0 = time.time()
|
|
2001
|
+
inflight: Optional[tuple[threading.Event, threading.Thread]] = None
|
|
2002
|
+
if cfg.recap:
|
|
2003
|
+
inflight = _start_recap_inflight(forced_turn, next_speaker.name,
|
|
2004
|
+
next_speaker.role, t0)
|
|
2005
|
+
call_succeeded = False
|
|
2006
|
+
failure_reason: Optional[str] = None
|
|
2007
|
+
failure_message: Optional[str] = None
|
|
2008
|
+
try:
|
|
2009
|
+
reply = call_agent(next_speaker, forced_msg, cfg,
|
|
2010
|
+
first_turn_for_agent=first_turn_for_agent,
|
|
2011
|
+
run_dir=run_dir, turn_label=label)
|
|
2012
|
+
call_succeeded = True
|
|
2013
|
+
except Exception as e:
|
|
2014
|
+
failure_reason = _agent_finished_reason(e)
|
|
2015
|
+
failure_message = str(e)
|
|
2016
|
+
if cfg.recap and inflight is not None:
|
|
2017
|
+
_stop_recap_inflight(*inflight)
|
|
2018
|
+
inflight = None
|
|
2019
|
+
elapsed = time.time() - t0
|
|
2020
|
+
print(f"Turn {forced_turn:02d} | {next_speaker.name} "
|
|
2021
|
+
f"({next_speaker.role}) · ERROR after "
|
|
2022
|
+
f"{int(round(elapsed))}s — see "
|
|
2023
|
+
f"turn-{label}-{next_speaker.name}.stderr.log")
|
|
2024
|
+
reply = _agent_failure_block(failure_reason, e, label, next_speaker, run_dir)
|
|
2025
|
+
if cfg.recap and inflight is not None:
|
|
2026
|
+
_stop_recap_inflight(*inflight)
|
|
2027
|
+
if call_succeeded:
|
|
2028
|
+
guard_codex_shared_cwd_after_call(cfg, next_speaker, first_turn_for_agent)
|
|
2029
|
+
elapsed = time.time() - t0
|
|
2030
|
+
seen_first_turn[next_speaker.name] = True
|
|
2031
|
+
raw_reply = reply
|
|
2032
|
+
convergence_hit = convergence_proposed(reply, cfg.sentinel)
|
|
2033
|
+
verify_state: Optional[dict] = None
|
|
2034
|
+
if convergence_hit and cfg.verify_cmd and not cfg.dry_run:
|
|
2035
|
+
verify_result = run_verify_command(cfg, run_dir, label, wt_path)
|
|
2036
|
+
verify_state = verify_result_state(verify_result)
|
|
2037
|
+
last_verify_state = verify_state
|
|
2038
|
+
if verify_result.ok:
|
|
2039
|
+
reply = raw_reply + "\n\n" + format_verify_success_block(verify_result)
|
|
2040
|
+
else:
|
|
2041
|
+
reply = raw_reply + "\n\n" + format_verify_failure_block(verify_result)
|
|
2042
|
+
convergence_hit = False
|
|
2043
|
+
if wt_path is not None and next_speaker.cwd_override == wt_path:
|
|
2044
|
+
reply = append_worktree_diff(reply, wt_path, wt_branch)
|
|
2045
|
+
recap_block = ""
|
|
2046
|
+
if cfg.recap:
|
|
2047
|
+
# Recap describes the agent's own reply, so parse raw_reply — before any
|
|
2048
|
+
# verify block / worktree diff was appended to `reply` (matches
|
|
2049
|
+
# _execute_turn; otherwise FILES and byte/line counts pick up the diff).
|
|
2050
|
+
parsed = parse_recap_headers(raw_reply)
|
|
2051
|
+
files = extract_files_heuristic(raw_reply)
|
|
2052
|
+
fallbacks = {
|
|
2053
|
+
"recap": _derive_recap_heuristic(raw_reply),
|
|
2054
|
+
"files": ", ".join(files) if files else "none",
|
|
2055
|
+
"status": derive_status_heuristic(next_speaker.role, convergence_hit),
|
|
2056
|
+
}
|
|
2057
|
+
recap_block = format_recap_block(
|
|
2058
|
+
forced_turn, next_speaker.name, next_speaker.role, elapsed,
|
|
2059
|
+
len(raw_reply.encode("utf-8")), raw_reply.count("\n") + 1,
|
|
2060
|
+
parsed, fallbacks, convergence_hit,
|
|
2061
|
+
)
|
|
2062
|
+
append_text_atomic(run_dir / "recap.md", recap_block)
|
|
2063
|
+
append_text_atomic(
|
|
2064
|
+
transcript_path,
|
|
2065
|
+
f"\n## {next_speaker.name} ({next_speaker.role}) — forced\n\n{reply}\n",
|
|
2066
|
+
)
|
|
2067
|
+
history_entry = {"turn": forced_turn, "agent": next_speaker.name,
|
|
2068
|
+
"forced": True, "len_chars": len(reply),
|
|
2069
|
+
"session_id": next_speaker.session_id,
|
|
2070
|
+
**({"verify": verify_state} if verify_state is not None else {})}
|
|
2071
|
+
if failure_reason is not None:
|
|
2072
|
+
history_entry["finished_reason"] = failure_reason
|
|
2073
|
+
history_entry["error"] = failure_message
|
|
2074
|
+
history_entry["stderr_log_path"] = str(
|
|
2075
|
+
run_dir / f"turn-{label}-{next_speaker.name}.stderr.log")
|
|
2076
|
+
history.append(history_entry)
|
|
2077
|
+
if cfg.recap:
|
|
2078
|
+
print(_format_live_recap_block(recap_block), end="")
|
|
2079
|
+
else:
|
|
2080
|
+
print(reply)
|
|
2081
|
+
return _TurnResult(reply, convergence_hit, failure_reason,
|
|
2082
|
+
last_verify_state, recap_block)
|
|
2083
|
+
|
|
2084
|
+
|
|
2085
|
+
def ask_force(cfg: DuetConfig, history: list, transcript_path: pathlib.Path,
|
|
2086
|
+
state_path: pathlib.Path, last_msg: str, speaker_idx: int,
|
|
2087
|
+
seen_first_turn: dict, reason: str,
|
|
2088
|
+
wt_path: Optional[pathlib.Path] = None,
|
|
2089
|
+
wt_branch: Optional[str] = None) -> tuple[str, Optional[dict]]:
|
|
2090
|
+
"""Post-loop interactive prompt: human can push another turn or accept."""
|
|
2091
|
+
if not sys.stdin.isatty():
|
|
2092
|
+
return reason, None
|
|
2093
|
+
last_verify_state: Optional[dict] = None
|
|
2094
|
+
while True:
|
|
2095
|
+
print(f"\n[duet] loop ended (reason={reason}). "
|
|
2096
|
+
f"Press Enter to finish, or type feedback to force another turn "
|
|
2097
|
+
f"(your text is appended as a human-feedback message and sent "
|
|
2098
|
+
f"to the next agent):")
|
|
2099
|
+
try:
|
|
2100
|
+
line = input("force> ").strip()
|
|
2101
|
+
except EOFError:
|
|
2102
|
+
return reason, last_verify_state
|
|
2103
|
+
if not line:
|
|
2104
|
+
return reason, last_verify_state
|
|
2105
|
+
next_speaker = cfg.agents[speaker_idx]
|
|
2106
|
+
first_turn_for_agent = not seen_first_turn[next_speaker.name]
|
|
2107
|
+
guard_codex_shared_cwd_before_call(cfg, next_speaker, first_turn_for_agent)
|
|
2108
|
+
head = f"\n## human — force-feedback (next: {next_speaker.name})\n\n"
|
|
2109
|
+
append_text_atomic(transcript_path, head + line + "\n")
|
|
2110
|
+
forced_msg = (
|
|
2111
|
+
f"{last_msg}\n\n---\n"
|
|
2112
|
+
"#### human force-feedback\n"
|
|
2113
|
+
f"{line}\n"
|
|
2114
|
+
)
|
|
2115
|
+
result = _run_forced_turn(
|
|
2116
|
+
cfg, forced_turn=len(history) + 1, next_speaker=next_speaker,
|
|
2117
|
+
forced_msg=forced_msg, first_turn_for_agent=first_turn_for_agent,
|
|
2118
|
+
transcript_path=transcript_path, wt_path=wt_path, wt_branch=wt_branch,
|
|
2119
|
+
history=history, seen_first_turn=seen_first_turn,
|
|
2120
|
+
last_verify_state=last_verify_state,
|
|
2121
|
+
)
|
|
2122
|
+
last_verify_state = result.last_verify_state
|
|
2123
|
+
# Persist each forced turn so a crash at the next force> prompt doesn't
|
|
2124
|
+
# lose it (the --status/--continue durability contract). finished_reason
|
|
2125
|
+
# stays None mid-loop (duet is alive at the prompt); run_duet writes the
|
|
2126
|
+
# final state with the real reason once ask_force returns.
|
|
2127
|
+
write_text_atomic(state_path, json.dumps(_build_run_state(
|
|
2128
|
+
cfg, turns_used=len(history), history=history,
|
|
2129
|
+
finished_reason=result.failure_reason,
|
|
2130
|
+
transcript_path=transcript_path,
|
|
2131
|
+
recap_path=transcript_path.parent / "recap.md",
|
|
2132
|
+
last_verify=last_verify_state, wt_path=wt_path, wt_branch=wt_branch,
|
|
2133
|
+
), indent=2))
|
|
2134
|
+
if result.failure_reason is not None:
|
|
2135
|
+
return result.failure_reason, last_verify_state
|
|
2136
|
+
last_msg = result.reply
|
|
2137
|
+
speaker_idx = 1 - speaker_idx
|
|
2138
|
+
reason = FINISHED_FORCED_CONTINUATION
|
|
2139
|
+
if result.convergence_hit:
|
|
2140
|
+
return FINISHED_CONVERGED_AFTER_FORCE, last_verify_state
|
|
2141
|
+
|
|
2142
|
+
# ---------- config / cli parsing ----------
|
|
2143
|
+
|
|
2144
|
+
def parse_partner(spec: str, default_role: str = "coder") -> Agent:
|
|
2145
|
+
"""'codex:coder' -> Agent(backend=codex, role=coder)."""
|
|
2146
|
+
backend, _, role = spec.partition(":")
|
|
2147
|
+
if not backend:
|
|
2148
|
+
raise SystemExit(f"bad partner spec '{spec}', expected backend or backend:role")
|
|
2149
|
+
role = role or default_role
|
|
2150
|
+
return Agent(name=f"{backend}-{role}", backend=backend, role=role)
|
|
2151
|
+
|
|
2152
|
+
|
|
2153
|
+
def normalize_verify_cmd(value, parser: argparse.ArgumentParser) -> Optional[str]:
|
|
2154
|
+
if value is None:
|
|
2155
|
+
return None
|
|
2156
|
+
if not isinstance(value, str):
|
|
2157
|
+
parser.error("verify_cmd must be a string")
|
|
2158
|
+
cmd = value.strip()
|
|
2159
|
+
if not cmd:
|
|
2160
|
+
parser.error("verify_cmd must not be empty")
|
|
2161
|
+
return cmd
|
|
2162
|
+
|
|
2163
|
+
|
|
2164
|
+
def _slot_name(backend: str, idx: int) -> str:
|
|
2165
|
+
slot = "lead" if idx == 0 else "partner"
|
|
2166
|
+
return f"{backend}-{slot}"
|
|
2167
|
+
|
|
2168
|
+
|
|
2169
|
+
def _slot_agent(agent: Agent, idx: int, *, rename: bool) -> Agent:
|
|
2170
|
+
if not rename:
|
|
2171
|
+
return dataclasses.replace(agent)
|
|
2172
|
+
return dataclasses.replace(agent, name=_slot_name(agent.backend, idx))
|
|
2173
|
+
|
|
2174
|
+
|
|
2175
|
+
def _default_slot_agent(backend: str, idx: int, *, rename: bool) -> Agent:
|
|
2176
|
+
role = "planner" if idx == 0 else "coder"
|
|
2177
|
+
name = _slot_name(backend, idx) if rename else f"{backend}-{role}"
|
|
2178
|
+
return Agent(name=name, backend=backend, role=role)
|
|
2179
|
+
|
|
2180
|
+
|
|
2181
|
+
def _slot_default_role(idx: int) -> str:
|
|
2182
|
+
return "planner" if idx == 0 else "coder"
|
|
2183
|
+
|
|
2184
|
+
|
|
2185
|
+
def _find_backend_idx(agents: list[Agent], backend: str,
|
|
2186
|
+
preferred_idx: int) -> Optional[int]:
|
|
2187
|
+
if len(agents) > preferred_idx and agents[preferred_idx].backend == backend:
|
|
2188
|
+
return preferred_idx
|
|
2189
|
+
for i, agent in enumerate(agents):
|
|
2190
|
+
if agent.backend == backend:
|
|
2191
|
+
return i
|
|
2192
|
+
return None
|
|
2193
|
+
|
|
2194
|
+
|
|
2195
|
+
def _force_resume_slot(
|
|
2196
|
+
agents: list[Agent],
|
|
2197
|
+
*,
|
|
2198
|
+
backend: str,
|
|
2199
|
+
slot_idx: int,
|
|
2200
|
+
session_id: str,
|
|
2201
|
+
rename_slots: bool,
|
|
2202
|
+
) -> list[Agent]:
|
|
2203
|
+
"""Move/create a resumed backend into its conventional slot.
|
|
2204
|
+
|
|
2205
|
+
If the user already put the backend in that slot, preserve their role. If
|
|
2206
|
+
we have to move it from the other slot, reset moved agents to the slot
|
|
2207
|
+
default roles so `--resume-codex --lead codex:planner --partner
|
|
2208
|
+
claude:coder` becomes the useful `claude/planner + codex/coder` topology.
|
|
2209
|
+
"""
|
|
2210
|
+
idx = _find_backend_idx(agents, backend, slot_idx)
|
|
2211
|
+
other_idx = 1 - slot_idx
|
|
2212
|
+
|
|
2213
|
+
if idx is None:
|
|
2214
|
+
target = _default_slot_agent(backend, slot_idx, rename=rename_slots)
|
|
2215
|
+
else:
|
|
2216
|
+
moved = idx != slot_idx
|
|
2217
|
+
target = dataclasses.replace(
|
|
2218
|
+
agents[idx],
|
|
2219
|
+
role=(_slot_default_role(slot_idx) if moved else agents[idx].role),
|
|
2220
|
+
)
|
|
2221
|
+
target = dataclasses.replace(
|
|
2222
|
+
_slot_agent(target, slot_idx, rename=rename_slots),
|
|
2223
|
+
session_id=session_id,
|
|
2224
|
+
)
|
|
2225
|
+
|
|
2226
|
+
if idx == other_idx:
|
|
2227
|
+
candidate = agents[slot_idx]
|
|
2228
|
+
moved_other = True
|
|
2229
|
+
else:
|
|
2230
|
+
candidate = agents[other_idx]
|
|
2231
|
+
moved_other = False
|
|
2232
|
+
|
|
2233
|
+
other = dataclasses.replace(
|
|
2234
|
+
candidate,
|
|
2235
|
+
role=(
|
|
2236
|
+
_slot_default_role(other_idx)
|
|
2237
|
+
if moved_other else candidate.role
|
|
2238
|
+
),
|
|
2239
|
+
)
|
|
2240
|
+
other = _slot_agent(other, other_idx, rename=rename_slots)
|
|
2241
|
+
|
|
2242
|
+
out = [agents[0], agents[1]]
|
|
2243
|
+
out[slot_idx] = target
|
|
2244
|
+
out[other_idx] = other
|
|
2245
|
+
return out
|
|
2246
|
+
|
|
2247
|
+
|
|
2248
|
+
def apply_resume_overrides(
|
|
2249
|
+
agents: list[Agent],
|
|
2250
|
+
*,
|
|
2251
|
+
resume_claude: Optional[str] = None,
|
|
2252
|
+
resume_codex: Optional[str] = None,
|
|
2253
|
+
rename_slots: bool = False,
|
|
2254
|
+
) -> list[Agent]:
|
|
2255
|
+
"""Attach CLI resume ids to the matching backend without silently dropping.
|
|
2256
|
+
|
|
2257
|
+
Claude resume is the historical "lead supplies the seed" path, so a
|
|
2258
|
+
resumed Claude agent is normalized into the lead slot. Codex resume is the
|
|
2259
|
+
quick-start "Codex implements with its prior plan in context" path, so a
|
|
2260
|
+
resumed Codex agent is normalized into the partner slot. Existing roles are
|
|
2261
|
+
preserved only when the backend was already in its conventional slot.
|
|
2262
|
+
"""
|
|
2263
|
+
normalized = [_slot_agent(a, i, rename=rename_slots)
|
|
2264
|
+
for i, a in enumerate(agents)]
|
|
2265
|
+
if len(normalized) != 2:
|
|
2266
|
+
return normalized
|
|
2267
|
+
|
|
2268
|
+
if resume_claude:
|
|
2269
|
+
normalized = _force_resume_slot(
|
|
2270
|
+
normalized,
|
|
2271
|
+
backend="claude",
|
|
2272
|
+
slot_idx=0,
|
|
2273
|
+
session_id=resume_claude,
|
|
2274
|
+
rename_slots=rename_slots,
|
|
2275
|
+
)
|
|
2276
|
+
|
|
2277
|
+
if resume_codex:
|
|
2278
|
+
normalized = _force_resume_slot(
|
|
2279
|
+
normalized,
|
|
2280
|
+
backend="codex",
|
|
2281
|
+
slot_idx=1,
|
|
2282
|
+
session_id=resume_codex,
|
|
2283
|
+
rename_slots=rename_slots,
|
|
2284
|
+
)
|
|
2285
|
+
|
|
2286
|
+
if rename_slots:
|
|
2287
|
+
normalized = [_slot_agent(a, i, rename=True)
|
|
2288
|
+
for i, a in enumerate(normalized)]
|
|
2289
|
+
return normalized
|
|
2290
|
+
|
|
2291
|
+
|
|
2292
|
+
def load_yaml_or_json(path: pathlib.Path) -> dict:
|
|
2293
|
+
text = path.read_text()
|
|
2294
|
+
if path.suffix in {".yaml", ".yml"}:
|
|
2295
|
+
try:
|
|
2296
|
+
import yaml # type: ignore
|
|
2297
|
+
except ImportError:
|
|
2298
|
+
raise SystemExit("PyYAML not installed; convert to JSON or `pip install pyyaml`.")
|
|
2299
|
+
return yaml.safe_load(text)
|
|
2300
|
+
return json.loads(text)
|
|
2301
|
+
|
|
2302
|
+
|
|
2303
|
+
def _check_task_size(text: str, parser: argparse.ArgumentParser) -> str:
|
|
2304
|
+
if len(text) > TASK_MAX_CHARS:
|
|
2305
|
+
parser.error(f"task too large ({len(text)} chars > {TASK_MAX_CHARS}); "
|
|
2306
|
+
"pipe a shorter summary")
|
|
2307
|
+
return text
|
|
2308
|
+
|
|
2309
|
+
|
|
2310
|
+
def resolve_at_text(value: Optional[str], option_name: str,
|
|
2311
|
+
parser: argparse.ArgumentParser,
|
|
2312
|
+
stdin_cache: dict[str, str]) -> Optional[str]:
|
|
2313
|
+
"""Resolve literal / @file / @- task text before a run directory exists."""
|
|
2314
|
+
if value is None:
|
|
2315
|
+
return None
|
|
2316
|
+
if not value.startswith("@"):
|
|
2317
|
+
return _check_task_size(value, parser)
|
|
2318
|
+
if value == "@-":
|
|
2319
|
+
if "stdin" not in stdin_cache:
|
|
2320
|
+
stdin_cache["stdin"] = sys.stdin.read()
|
|
2321
|
+
return _check_task_size(stdin_cache["stdin"], parser)
|
|
2322
|
+
|
|
2323
|
+
raw_path = value[1:]
|
|
2324
|
+
if not raw_path:
|
|
2325
|
+
parser.error(f"{option_name}: file not found: {raw_path}")
|
|
2326
|
+
path = pathlib.Path(raw_path).expanduser()
|
|
2327
|
+
if not path.is_file():
|
|
2328
|
+
parser.error(f"{option_name}: file not found: {path}")
|
|
2329
|
+
try:
|
|
2330
|
+
text = path.read_text(encoding="utf-8")
|
|
2331
|
+
except UnicodeDecodeError:
|
|
2332
|
+
parser.error(f"{option_name}: file not UTF-8 text: {path}")
|
|
2333
|
+
except OSError as e:
|
|
2334
|
+
parser.error(f"{option_name}: unable to read file: {path}: {e}")
|
|
2335
|
+
return _check_task_size(text, parser)
|
|
2336
|
+
|
|
2337
|
+
|
|
2338
|
+
def resolve_task_from_cmd(cmd_str: str, cwd: pathlib.Path, timeout: int,
|
|
2339
|
+
parser: argparse.ArgumentParser) -> str:
|
|
2340
|
+
"""Run a shell command and use stdout as the task seed."""
|
|
2341
|
+
global LIVE_PREFIX
|
|
2342
|
+
old_prefix = LIVE_PREFIX
|
|
2343
|
+
LIVE_PREFIX = LIVE_PREFIX_TASK
|
|
2344
|
+
try:
|
|
2345
|
+
rc, out, err = _run(["sh", "-c", cmd_str], cwd=cwd, stdin=None, timeout=timeout)
|
|
2346
|
+
finally:
|
|
2347
|
+
LIVE_PREFIX = old_prefix
|
|
2348
|
+
if rc != 0:
|
|
2349
|
+
parser.error(f"--task-from-cmd exited {rc}\nstderr:\n{err}")
|
|
2350
|
+
if out == "":
|
|
2351
|
+
parser.error(f"--task-from-cmd produced empty stdout\nstderr:\n{err}")
|
|
2352
|
+
return _check_task_size(out, parser)
|
|
2353
|
+
|
|
2354
|
+
|
|
2355
|
+
def resolve_seed_inputs(*, task: Optional[str], kickoff: Optional[str],
|
|
2356
|
+
task_from_cmd: Optional[str], cwd: pathlib.Path,
|
|
2357
|
+
timeout: int, parser: argparse.ArgumentParser,
|
|
2358
|
+
stdin_cache: dict[str, str]) -> tuple[Optional[str], Optional[str]]:
|
|
2359
|
+
if task is not None and task_from_cmd is not None:
|
|
2360
|
+
parser.error("--task and --task-from-cmd are mutually exclusive")
|
|
2361
|
+
resolved_kickoff = resolve_at_text(kickoff, "--kickoff", parser, stdin_cache)
|
|
2362
|
+
if task_from_cmd is not None:
|
|
2363
|
+
resolved_task = resolve_task_from_cmd(task_from_cmd, cwd, timeout, parser)
|
|
2364
|
+
else:
|
|
2365
|
+
resolved_task = resolve_at_text(task, "--task", parser, stdin_cache)
|
|
2366
|
+
return resolved_task, resolved_kickoff
|
|
2367
|
+
|
|
2368
|
+
|
|
2369
|
+
def choose_runs_dir(raw_runs_dir: Optional[str], cwd_resolved: pathlib.Path) -> pathlib.Path:
|
|
2370
|
+
invocation_pwd = pathlib.Path.cwd().resolve()
|
|
2371
|
+
if raw_runs_dir is not None:
|
|
2372
|
+
return pathlib.Path(raw_runs_dir)
|
|
2373
|
+
if cwd_resolved != invocation_pwd:
|
|
2374
|
+
runs_dir = cwd_resolved / ".duet" / "runs"
|
|
2375
|
+
print("[duet] --cwd points outside the invocation directory; "
|
|
2376
|
+
f"defaulting run artifacts to {runs_dir}. "
|
|
2377
|
+
"Pass --runs-dir runs to use the legacy invocation-relative path.",
|
|
2378
|
+
file=sys.stderr)
|
|
2379
|
+
return runs_dir
|
|
2380
|
+
return pathlib.Path("runs")
|
|
2381
|
+
|
|
2382
|
+
|
|
2383
|
+
def _cwd_slug(cwd_resolved: pathlib.Path) -> str:
|
|
2384
|
+
"""Slugify a cwd into a `~/.duet/runs/` subdir name. Same scheme as the
|
|
2385
|
+
unwritable-cwd fallback inside `run_duet`, on purpose: a fallback dir
|
|
2386
|
+
and a registered symlink for the same cwd land under the same slug."""
|
|
2387
|
+
return re.sub(r"[^a-zA-Z0-9._-]+", "-", str(cwd_resolved)).strip("-")[:80]
|
|
2388
|
+
|
|
2389
|
+
|
|
2390
|
+
def _register_run_in_home_index(run_dir: pathlib.Path,
|
|
2391
|
+
cwd_resolved: pathlib.Path) -> None:
|
|
2392
|
+
"""Drop a symlink at `~/.duet/runs/<cwd-slug>/<run_id>` -> `run_dir`.
|
|
2393
|
+
|
|
2394
|
+
`_default_list_paths()` already scans `~/.duet/runs/<slug>/<run_id>/`
|
|
2395
|
+
(originally for the unwritable-cwd fallback in `run_duet`). Mirroring
|
|
2396
|
+
every newly-created run dir into that tree gives `duet --list` and
|
|
2397
|
+
`duet --status <bare-id>` a single home-rooted index of every run
|
|
2398
|
+
started under this user, regardless of which project's
|
|
2399
|
+
`<cwd>/.duet/runs/` it actually lives in. Best-effort: failures
|
|
2400
|
+
(filesystem read-only, symlinks not supported, target slug dir
|
|
2401
|
+
occupied by something weird) emit a one-line stderr notice but never
|
|
2402
|
+
fail the run.
|
|
2403
|
+
"""
|
|
2404
|
+
home_runs = (pathlib.Path.home() / ".duet" / "runs").resolve()
|
|
2405
|
+
try:
|
|
2406
|
+
run_resolved = run_dir.resolve()
|
|
2407
|
+
except OSError:
|
|
2408
|
+
return
|
|
2409
|
+
# Skip when run_dir already lives under ~/.duet/runs/<slug>/ (the
|
|
2410
|
+
# unwritable-cwd fallback already landed there) — registering would
|
|
2411
|
+
# be a circular self-reference.
|
|
2412
|
+
if home_runs in run_resolved.parents:
|
|
2413
|
+
return
|
|
2414
|
+
slug = _cwd_slug(cwd_resolved)
|
|
2415
|
+
if not slug:
|
|
2416
|
+
return # paranoia: empty slug
|
|
2417
|
+
link = home_runs / slug / run_dir.name
|
|
2418
|
+
try:
|
|
2419
|
+
link.parent.mkdir(parents=True, exist_ok=True)
|
|
2420
|
+
if link.is_symlink():
|
|
2421
|
+
try:
|
|
2422
|
+
target = pathlib.Path(os.readlink(link))
|
|
2423
|
+
if target.is_absolute() and target.resolve() == run_resolved:
|
|
2424
|
+
return # idempotent: already correct
|
|
2425
|
+
except OSError:
|
|
2426
|
+
pass
|
|
2427
|
+
return # symlink points elsewhere; leave as-is
|
|
2428
|
+
if link.exists():
|
|
2429
|
+
return # not a symlink; refuse to clobber
|
|
2430
|
+
link.symlink_to(run_resolved)
|
|
2431
|
+
except (OSError, NotImplementedError) as exc:
|
|
2432
|
+
print(f"[duet] note: home-index symlink failed "
|
|
2433
|
+
f"(~/.duet/runs/{slug}/{run_dir.name}): {exc}",
|
|
2434
|
+
file=sys.stderr)
|
|
2435
|
+
|
|
2436
|
+
|
|
2437
|
+
# ---------- run-status (`duet --status <run_dir>`) ----------
|
|
2438
|
+
|
|
2439
|
+
def _pid_alive(pid: int) -> bool:
|
|
2440
|
+
"""True if the OS process still exists. Uses signal 0 (no-op probe)."""
|
|
2441
|
+
try:
|
|
2442
|
+
os.kill(pid, 0)
|
|
2443
|
+
return True
|
|
2444
|
+
except ProcessLookupError:
|
|
2445
|
+
return False
|
|
2446
|
+
except PermissionError:
|
|
2447
|
+
# PID exists but is owned by someone else — still "alive" for us.
|
|
2448
|
+
return True
|
|
2449
|
+
|
|
2450
|
+
|
|
2451
|
+
def _proc_cmdline(pid: int) -> Optional[str]:
|
|
2452
|
+
"""Best-effort read of a PID's full cmdline. Returns None on any failure.
|
|
2453
|
+
|
|
2454
|
+
Used to validate that a recorded `duet_pid` still belongs to a duet
|
|
2455
|
+
process (PIDs get recycled after a reboot; the alive-check alone could
|
|
2456
|
+
point at an unrelated app).
|
|
2457
|
+
"""
|
|
2458
|
+
if sys.platform.startswith("linux"):
|
|
2459
|
+
try:
|
|
2460
|
+
return (pathlib.Path(f"/proc/{pid}/cmdline")
|
|
2461
|
+
.read_bytes().replace(b"\x00", b" ").decode(errors="replace"))
|
|
2462
|
+
except OSError:
|
|
2463
|
+
return None
|
|
2464
|
+
# macOS / BSD: shell out to ps. Cheap, ~5ms.
|
|
2465
|
+
try:
|
|
2466
|
+
r = subprocess.run(["ps", "-o", "command=", "-p", str(pid)],
|
|
2467
|
+
capture_output=True, text=True, timeout=2)
|
|
2468
|
+
if r.returncode == 0 and r.stdout.strip():
|
|
2469
|
+
return r.stdout.strip()
|
|
2470
|
+
except (subprocess.TimeoutExpired, OSError):
|
|
2471
|
+
pass
|
|
2472
|
+
return None
|
|
2473
|
+
|
|
2474
|
+
|
|
2475
|
+
def _is_duet_process(pid: int) -> bool:
|
|
2476
|
+
"""True if `pid` is alive AND looks like a duet.py process (avoids stale-PID false positives)."""
|
|
2477
|
+
if not _pid_alive(pid):
|
|
2478
|
+
return False
|
|
2479
|
+
cmdline = _proc_cmdline(pid) or ""
|
|
2480
|
+
# Match "duet.py" anywhere in the cmdline OR a final path segment
|
|
2481
|
+
# equal to "duet" (when installed via `make install`).
|
|
2482
|
+
if "duet.py" in cmdline:
|
|
2483
|
+
return True
|
|
2484
|
+
# Look for ".../duet" or "duet " (the installed-symlink case).
|
|
2485
|
+
head = cmdline.split() and cmdline.split()[0]
|
|
2486
|
+
if head and pathlib.Path(head).name == "duet":
|
|
2487
|
+
return True
|
|
2488
|
+
return False
|
|
2489
|
+
|
|
2490
|
+
|
|
2491
|
+
def print_run_status(arg: str) -> int:
|
|
2492
|
+
"""Print a one-shot health summary for a duet run. Returns shell exit code:
|
|
2493
|
+
0 = run finished cleanly, 1 = still running, 2 = stuck/crashed, 3 = error.
|
|
2494
|
+
|
|
2495
|
+
`arg` may be a path (absolute or relative) OR a bare run id like
|
|
2496
|
+
`20260507-082801` — bare ids get resolved against the same default
|
|
2497
|
+
search paths as `--list` (./runs/, ./.duet/runs/, ~/.duet/runs/*/).
|
|
2498
|
+
"""
|
|
2499
|
+
run_dir = _resolve_run_dir(arg)
|
|
2500
|
+
if run_dir is None:
|
|
2501
|
+
print(f"[duet] no such run dir: {arg}", file=sys.stderr)
|
|
2502
|
+
if "/" not in arg and "\\" not in arg and _RUN_ID_RE.match(arg):
|
|
2503
|
+
print(f"[duet] tried bare-id resolution under default paths "
|
|
2504
|
+
"(./runs/, ./.duet/runs/, ~/.duet/runs/*/). "
|
|
2505
|
+
"Use `duet --list` to see what's available.",
|
|
2506
|
+
file=sys.stderr)
|
|
2507
|
+
return 3
|
|
2508
|
+
state_path = run_dir / "state.json"
|
|
2509
|
+
state: dict = {}
|
|
2510
|
+
if state_path.exists():
|
|
2511
|
+
try:
|
|
2512
|
+
state = json.loads(state_path.read_text())
|
|
2513
|
+
except json.JSONDecodeError as e:
|
|
2514
|
+
print(f"[duet] state.json malformed: {e}", file=sys.stderr)
|
|
2515
|
+
return 3
|
|
2516
|
+
finished = state.get("finished_reason")
|
|
2517
|
+
transcript_display = state.get("transcript_path", run_dir / "transcript.md")
|
|
2518
|
+
recap_display = state.get("recap_path")
|
|
2519
|
+
if recap_display is None and (run_dir / "recap.md").exists():
|
|
2520
|
+
recap_display = run_dir / "recap.md"
|
|
2521
|
+
print(f"[duet] {run_dir}")
|
|
2522
|
+
print(f" turns_used: {state.get('turns_used', '?')}")
|
|
2523
|
+
print(f" finished_reason: {finished!r}")
|
|
2524
|
+
if recap_display is not None:
|
|
2525
|
+
print(f" recap: {recap_display}")
|
|
2526
|
+
|
|
2527
|
+
# A turn-*.pid file exists only while that turn's subprocess is alive.
|
|
2528
|
+
pid_files = sorted(run_dir.glob("turn-*.pid"))
|
|
2529
|
+
if pid_files:
|
|
2530
|
+
pid_file = pid_files[-1]
|
|
2531
|
+
try:
|
|
2532
|
+
pid = int(pid_file.read_text().strip())
|
|
2533
|
+
except (OSError, ValueError):
|
|
2534
|
+
pid = None
|
|
2535
|
+
# Filename: turn-<label>-<agent>.pid
|
|
2536
|
+
stem = pid_file.stem # turn-02-claude-planner
|
|
2537
|
+
started_at = dt.datetime.fromtimestamp(pid_file.stat().st_mtime)
|
|
2538
|
+
elapsed = (dt.datetime.now() - started_at).total_seconds()
|
|
2539
|
+
alive = _pid_alive(pid) if pid is not None else False
|
|
2540
|
+
print(f" in-flight turn: {stem}")
|
|
2541
|
+
print(f" pid: {pid} (alive: {alive})")
|
|
2542
|
+
print(f" started: {started_at.isoformat(timespec='seconds')} "
|
|
2543
|
+
f"({int(elapsed)}s ago)")
|
|
2544
|
+
# Heartbeat from the matching stderr log
|
|
2545
|
+
log = run_dir / f"{stem}.stderr.log"
|
|
2546
|
+
if log.exists():
|
|
2547
|
+
log_age = (dt.datetime.now()
|
|
2548
|
+
- dt.datetime.fromtimestamp(log.stat().st_mtime)).total_seconds()
|
|
2549
|
+
print(f" last stderr: {int(log_age)}s ago "
|
|
2550
|
+
f"({log.stat().st_size} bytes)")
|
|
2551
|
+
if not alive:
|
|
2552
|
+
print(" ⚠ pid file present but process is gone — turn likely "
|
|
2553
|
+
"crashed or was killed without cleanup")
|
|
2554
|
+
return 2
|
|
2555
|
+
return 1
|
|
2556
|
+
# No pid files. Either run hasn't started, has finished, or is between
|
|
2557
|
+
# turns (in particular, sitting at the post-loop `force>` prompt).
|
|
2558
|
+
if finished:
|
|
2559
|
+
print(f" done. transcript: {transcript_display}")
|
|
2560
|
+
return 0
|
|
2561
|
+
|
|
2562
|
+
# Disambiguate "between turns" from "actually crashed" using the
|
|
2563
|
+
# duet_pid recorded in state.json.
|
|
2564
|
+
duet_pid = state.get("duet_pid")
|
|
2565
|
+
if duet_pid is not None:
|
|
2566
|
+
if _is_duet_process(int(duet_pid)):
|
|
2567
|
+
print(f" state: between turns / awaiting force> prompt")
|
|
2568
|
+
print(f" duet pid: {duet_pid} (alive)")
|
|
2569
|
+
history = state.get("history") or []
|
|
2570
|
+
if history:
|
|
2571
|
+
last = history[-1]
|
|
2572
|
+
print(f" last completed: turn {last.get('turn')} "
|
|
2573
|
+
f"({last.get('agent')}) in {last.get('elapsed_s', 0):.1f}s, "
|
|
2574
|
+
f"{last.get('len_chars', 0)} chars")
|
|
2575
|
+
return 1
|
|
2576
|
+
print(f" ⚠ duet pid {duet_pid} no longer running (or recycled by an "
|
|
2577
|
+
"unrelated process); no finished_reason recorded — run died "
|
|
2578
|
+
"between turns")
|
|
2579
|
+
return 2
|
|
2580
|
+
|
|
2581
|
+
# state.json predates the duet_pid field — keep the old message and the
|
|
2582
|
+
# old conservative "looks stuck" exit code so callers don't regress.
|
|
2583
|
+
print(" no in-flight turn AND no finished_reason — run may have died "
|
|
2584
|
+
"between turns, or hasn't started yet")
|
|
2585
|
+
print(" (state.json predates the duet_pid field; can't auto-distinguish "
|
|
2586
|
+
"alive-between-turns from crashed)")
|
|
2587
|
+
return 2
|
|
2588
|
+
|
|
2589
|
+
|
|
2590
|
+
# ---------- run-list (`duet --list [PATH]`) ----------
|
|
2591
|
+
|
|
2592
|
+
# Status glyphs — same vocabulary as print_run_status, packed for table cols.
|
|
2593
|
+
_LIST_STATUS_FINISHED = {
|
|
2594
|
+
FINISHED_CONVERGED: ("✅", "converged"),
|
|
2595
|
+
FINISHED_CONVERGED_AFTER_FORCE: ("✅", "converged"),
|
|
2596
|
+
FINISHED_MAX_TURNS: ("⏰", "max_turns"),
|
|
2597
|
+
FINISHED_FORCE_STOP: ("🔴", "force_stop"),
|
|
2598
|
+
FINISHED_TIMEOUT: ("⏱", "timeout"),
|
|
2599
|
+
FINISHED_FORCED_CONTINUATION: ("🟡", "forced"),
|
|
2600
|
+
FINISHED_AGENT_ERROR: ("⚠", "agent_error"),
|
|
2601
|
+
}
|
|
2602
|
+
|
|
2603
|
+
|
|
2604
|
+
_RUN_ID_RE = re.compile(r"^\d{8}-\d{6}(?:-\d+)?$")
|
|
2605
|
+
|
|
2606
|
+
|
|
2607
|
+
def _default_list_paths() -> list[pathlib.Path]:
|
|
2608
|
+
"""Where `duet --list` looks when no PATH is given. Order = display order."""
|
|
2609
|
+
paths: list[pathlib.Path] = []
|
|
2610
|
+
for p in (pathlib.Path.cwd() / "runs",
|
|
2611
|
+
pathlib.Path.cwd() / ".duet" / "runs"):
|
|
2612
|
+
if p.is_dir():
|
|
2613
|
+
paths.append(p)
|
|
2614
|
+
home = pathlib.Path.home() / ".duet" / "runs"
|
|
2615
|
+
if home.is_dir():
|
|
2616
|
+
# Each subdir under ~/.duet/runs/ is a slug like "Users-volkan-…".
|
|
2617
|
+
for slug in sorted(home.iterdir()):
|
|
2618
|
+
if slug.is_dir():
|
|
2619
|
+
paths.append(slug)
|
|
2620
|
+
return paths
|
|
2621
|
+
|
|
2622
|
+
|
|
2623
|
+
def _resolve_run_dir(arg: str) -> Optional[pathlib.Path]:
|
|
2624
|
+
"""Map a `--status` argument to a real run dir.
|
|
2625
|
+
|
|
2626
|
+
Accepts:
|
|
2627
|
+
- an absolute or relative path that exists
|
|
2628
|
+
- a bare run id like `20260507-082801`, resolved against the default
|
|
2629
|
+
list paths so users don't have to remember `runs/` vs `.duet/runs/`
|
|
2630
|
+
|
|
2631
|
+
Returns the resolved Path, or None when nothing matches.
|
|
2632
|
+
"""
|
|
2633
|
+
p = pathlib.Path(arg).expanduser()
|
|
2634
|
+
if p.is_dir():
|
|
2635
|
+
return p.resolve()
|
|
2636
|
+
# Bare run id (no path separators, matches the timestamp pattern) — search.
|
|
2637
|
+
if "/" not in arg and "\\" not in arg and _RUN_ID_RE.match(arg):
|
|
2638
|
+
# Collect candidates and dedupe by resolved real path so a
|
|
2639
|
+
# home-index symlink and the cwd-relative real dir collapse into
|
|
2640
|
+
# one entry instead of triggering the "multiple roots" warning.
|
|
2641
|
+
seen: set[pathlib.Path] = set()
|
|
2642
|
+
unique: list[pathlib.Path] = []
|
|
2643
|
+
for root in _default_list_paths():
|
|
2644
|
+
cand = root / arg
|
|
2645
|
+
if not cand.is_dir():
|
|
2646
|
+
continue
|
|
2647
|
+
try:
|
|
2648
|
+
real = cand.resolve()
|
|
2649
|
+
except OSError:
|
|
2650
|
+
continue
|
|
2651
|
+
if real in seen:
|
|
2652
|
+
continue
|
|
2653
|
+
seen.add(real)
|
|
2654
|
+
unique.append(cand)
|
|
2655
|
+
if len(unique) == 1:
|
|
2656
|
+
return unique[0].resolve()
|
|
2657
|
+
if len(unique) > 1:
|
|
2658
|
+
# Same id under genuinely distinct dirs is rare (timestamps
|
|
2659
|
+
# are seconds-precise) but possible. Prefer most-recent and
|
|
2660
|
+
# warn so users notice ambiguity.
|
|
2661
|
+
unique.sort(key=lambda c: c.stat().st_mtime, reverse=True)
|
|
2662
|
+
print(f"[duet] note: run id {arg!r} found under multiple roots; "
|
|
2663
|
+
f"using most recent: {unique[0]}",
|
|
2664
|
+
file=sys.stderr)
|
|
2665
|
+
return unique[0].resolve()
|
|
2666
|
+
return None
|
|
2667
|
+
|
|
2668
|
+
|
|
2669
|
+
def _load_run_state(run_dir: pathlib.Path,
|
|
2670
|
+
parser: argparse.ArgumentParser,
|
|
2671
|
+
option_name: str) -> dict:
|
|
2672
|
+
state_path = run_dir / "state.json"
|
|
2673
|
+
if not state_path.is_file():
|
|
2674
|
+
parser.error(f"{option_name}: missing state.json in {run_dir}")
|
|
2675
|
+
try:
|
|
2676
|
+
return json.loads(state_path.read_text())
|
|
2677
|
+
except json.JSONDecodeError as e:
|
|
2678
|
+
parser.error(f"{option_name}: state.json malformed: {e}")
|
|
2679
|
+
except OSError as e:
|
|
2680
|
+
parser.error(f"{option_name}: unable to read state.json: {e}")
|
|
2681
|
+
raise AssertionError("parser.error should have exited")
|
|
2682
|
+
|
|
2683
|
+
|
|
2684
|
+
def _agents_from_state(state: dict,
|
|
2685
|
+
parser: argparse.ArgumentParser,
|
|
2686
|
+
option_name: str) -> list[Agent]:
|
|
2687
|
+
raw_agents = state.get("agents")
|
|
2688
|
+
if not isinstance(raw_agents, list) or len(raw_agents) != 2:
|
|
2689
|
+
parser.error(f"{option_name}: state.json must contain exactly two agents")
|
|
2690
|
+
agents: list[Agent] = []
|
|
2691
|
+
for i, raw in enumerate(raw_agents):
|
|
2692
|
+
if not isinstance(raw, dict):
|
|
2693
|
+
parser.error(f"{option_name}: agents[{i}] is not an object")
|
|
2694
|
+
name = raw.get("name")
|
|
2695
|
+
backend = raw.get("backend")
|
|
2696
|
+
if not name or not backend:
|
|
2697
|
+
parser.error(f"{option_name}: agents[{i}] missing name/backend")
|
|
2698
|
+
raw_extra_args = raw.get("extra_args") or []
|
|
2699
|
+
if not isinstance(raw_extra_args, list):
|
|
2700
|
+
parser.error(f"{option_name}: agents[{i}].extra_args is not a list")
|
|
2701
|
+
agents.append(Agent(
|
|
2702
|
+
name=str(name),
|
|
2703
|
+
backend=str(backend),
|
|
2704
|
+
role=str(raw.get("role") or "coder"),
|
|
2705
|
+
role_prompt=(str(raw["role_prompt"]) if raw.get("role_prompt") else None),
|
|
2706
|
+
model=(str(raw["model"]) if raw.get("model") else None),
|
|
2707
|
+
session_id=(str(raw["session_id"]) if raw.get("session_id") else None),
|
|
2708
|
+
extra_args=[str(x) for x in raw_extra_args],
|
|
2709
|
+
reasoning_effort=(str(raw["reasoning_effort"])
|
|
2710
|
+
if raw.get("reasoning_effort") else None),
|
|
2711
|
+
))
|
|
2712
|
+
return agents
|
|
2713
|
+
|
|
2714
|
+
|
|
2715
|
+
def _next_speaker_idx_from_state(agents: list[Agent], state: dict) -> int:
|
|
2716
|
+
history = state.get("history") or []
|
|
2717
|
+
if isinstance(history, list):
|
|
2718
|
+
for item in reversed(history):
|
|
2719
|
+
if not isinstance(item, dict):
|
|
2720
|
+
continue
|
|
2721
|
+
last_agent = item.get("agent")
|
|
2722
|
+
for idx, agent in enumerate(agents):
|
|
2723
|
+
if agent.name == last_agent:
|
|
2724
|
+
return 1 - idx
|
|
2725
|
+
try:
|
|
2726
|
+
turns_used = int(state.get("turns_used") or 0)
|
|
2727
|
+
except (TypeError, ValueError):
|
|
2728
|
+
turns_used = 0
|
|
2729
|
+
# Normal runs start with agent[1], so even turns mean agent[1] is next.
|
|
2730
|
+
return 1 if turns_used % 2 == 0 else 0
|
|
2731
|
+
|
|
2732
|
+
|
|
2733
|
+
def _continue_note_from_args(args: argparse.Namespace,
|
|
2734
|
+
cwd: pathlib.Path,
|
|
2735
|
+
timeout: int,
|
|
2736
|
+
parser: argparse.ArgumentParser,
|
|
2737
|
+
stdin_cache: dict[str, str]) -> Optional[str]:
|
|
2738
|
+
sources = [
|
|
2739
|
+
args.task is not None,
|
|
2740
|
+
args.kickoff is not None,
|
|
2741
|
+
args.task_from_cmd is not None,
|
|
2742
|
+
]
|
|
2743
|
+
if sum(1 for x in sources if x) > 1:
|
|
2744
|
+
parser.error("--continue accepts only one extra instruction via "
|
|
2745
|
+
"--task, --kickoff, or --task-from-cmd")
|
|
2746
|
+
if args.task_from_cmd is not None:
|
|
2747
|
+
return resolve_task_from_cmd(args.task_from_cmd, cwd, timeout, parser)
|
|
2748
|
+
if args.kickoff is not None:
|
|
2749
|
+
return resolve_at_text(args.kickoff, "--kickoff", parser, stdin_cache)
|
|
2750
|
+
if args.task is not None:
|
|
2751
|
+
return resolve_at_text(args.task, "--task", parser, stdin_cache)
|
|
2752
|
+
return None
|
|
2753
|
+
|
|
2754
|
+
|
|
2755
|
+
def _default_continue_kickoff(run_dir: pathlib.Path,
|
|
2756
|
+
state: dict,
|
|
2757
|
+
next_agent: Agent,
|
|
2758
|
+
user_note: Optional[str],
|
|
2759
|
+
worktree_path: Optional[pathlib.Path]) -> str:
|
|
2760
|
+
history = state.get("history") or []
|
|
2761
|
+
last = history[-1] if isinstance(history, list) and history else {}
|
|
2762
|
+
transcript = state.get("transcript_path") or str(run_dir / "transcript.md")
|
|
2763
|
+
recap = state.get("recap_path")
|
|
2764
|
+
finished = state.get("finished_reason")
|
|
2765
|
+
turns_display = state.get(
|
|
2766
|
+
"turns_used",
|
|
2767
|
+
len(history) if isinstance(history, list) else "?",
|
|
2768
|
+
)
|
|
2769
|
+
lines = [
|
|
2770
|
+
"Continue the previous duet run without restarting from scratch.",
|
|
2771
|
+
f"Previous run: {run_dir}",
|
|
2772
|
+
f"Previous finished_reason: {finished!r}",
|
|
2773
|
+
f"Previous turns_used: {turns_display}",
|
|
2774
|
+
f"Next speaker: {next_agent.name} ({next_agent.backend}/{next_agent.role})",
|
|
2775
|
+
f"Transcript: {transcript}",
|
|
2776
|
+
]
|
|
2777
|
+
if recap:
|
|
2778
|
+
lines.append(f"Recap: {recap}")
|
|
2779
|
+
if worktree_path is not None:
|
|
2780
|
+
lines.append(f"Worktree: {worktree_path}")
|
|
2781
|
+
if isinstance(last, dict) and last:
|
|
2782
|
+
lines.append(
|
|
2783
|
+
f"Last completed turn: {last.get('turn')} by {last.get('agent')}"
|
|
2784
|
+
)
|
|
2785
|
+
if finished is None:
|
|
2786
|
+
lines.append(
|
|
2787
|
+
"The previous run appears interrupted or crashed. Inspect the "
|
|
2788
|
+
"transcript, stderr logs, and any worktree changes before editing; "
|
|
2789
|
+
"keep useful partial work."
|
|
2790
|
+
)
|
|
2791
|
+
else:
|
|
2792
|
+
lines.append(
|
|
2793
|
+
"Use the saved session context and artifacts above, then continue "
|
|
2794
|
+
"with the next concrete step."
|
|
2795
|
+
)
|
|
2796
|
+
if user_note:
|
|
2797
|
+
lines += ["", "Human continuation instruction:", user_note]
|
|
2798
|
+
return "\n".join(lines)
|
|
2799
|
+
|
|
2800
|
+
|
|
2801
|
+
def build_continue_config(run_arg: str,
|
|
2802
|
+
args: argparse.Namespace,
|
|
2803
|
+
parser: argparse.ArgumentParser,
|
|
2804
|
+
stdin_cache: dict[str, str]) -> DuetConfig:
|
|
2805
|
+
run_dir = _resolve_run_dir(run_arg)
|
|
2806
|
+
if run_dir is None:
|
|
2807
|
+
parser.error(f"--continue: no such run dir or id: {run_arg}")
|
|
2808
|
+
state = _load_run_state(run_dir, parser, "--continue")
|
|
2809
|
+
agents = _agents_from_state(state, parser, "--continue")
|
|
2810
|
+
# Older runs (or runs that crashed before the first state.json roll) may
|
|
2811
|
+
# have Codex agents that already spoke but have no saved session_id. Without
|
|
2812
|
+
# a marker, run_duet would treat the next turn as a fresh `codex exec` and
|
|
2813
|
+
# lose the prior session. Plant the legacy "codex-current" sentinel so
|
|
2814
|
+
# call_codex resumes via `--last` keyed on cwd.
|
|
2815
|
+
history = state.get("history") or []
|
|
2816
|
+
if isinstance(history, list):
|
|
2817
|
+
codex_speakers = {item.get("agent") for item in history
|
|
2818
|
+
if isinstance(item, dict)}
|
|
2819
|
+
for agent in agents:
|
|
2820
|
+
if (agent.backend == "codex"
|
|
2821
|
+
and not agent.session_id
|
|
2822
|
+
and agent.name in codex_speakers):
|
|
2823
|
+
agent.session_id = "codex-current"
|
|
2824
|
+
cwd = pathlib.Path(state.get("cwd") or ".").expanduser().resolve()
|
|
2825
|
+
timeout = args.timeout
|
|
2826
|
+
user_note = _continue_note_from_args(args, cwd, timeout, parser, stdin_cache)
|
|
2827
|
+
next_idx = _next_speaker_idx_from_state(agents, state)
|
|
2828
|
+
|
|
2829
|
+
raw_worktree = args.worktree_path or state.get("worktree")
|
|
2830
|
+
if not raw_worktree:
|
|
2831
|
+
legacy_wt = run_dir / "wt"
|
|
2832
|
+
if legacy_wt.is_dir():
|
|
2833
|
+
raw_worktree = str(legacy_wt)
|
|
2834
|
+
worktree_path = (pathlib.Path(raw_worktree).expanduser().resolve()
|
|
2835
|
+
if raw_worktree else None)
|
|
2836
|
+
worktree_for = str(args.worktree_for or state.get("worktree_for") or "partner")
|
|
2837
|
+
kickoff = _default_continue_kickoff(
|
|
2838
|
+
run_dir, state, agents[next_idx], user_note, worktree_path
|
|
2839
|
+
)
|
|
2840
|
+
runs_dir = choose_runs_dir(args.runs_dir, cwd)
|
|
2841
|
+
return DuetConfig(
|
|
2842
|
+
cwd=cwd,
|
|
2843
|
+
agents=agents,
|
|
2844
|
+
task=state.get("task"),
|
|
2845
|
+
kickoff=kickoff,
|
|
2846
|
+
max_turns=args.turns,
|
|
2847
|
+
sentinel=args.sentinel,
|
|
2848
|
+
per_turn_timeout=timeout,
|
|
2849
|
+
runs_dir=runs_dir,
|
|
2850
|
+
sandbox=args.sandbox,
|
|
2851
|
+
permission_mode=args.permission_mode,
|
|
2852
|
+
dry_run=args.dry_run,
|
|
2853
|
+
recap=args.recap or bool(state.get("recap_path")),
|
|
2854
|
+
verify_cmd=normalize_verify_cmd(
|
|
2855
|
+
args.verify_cmd if args.verify_cmd is not None else state.get("verify_cmd"),
|
|
2856
|
+
parser,
|
|
2857
|
+
),
|
|
2858
|
+
worktree=False,
|
|
2859
|
+
worktree_for=worktree_for,
|
|
2860
|
+
worktree_path=worktree_path,
|
|
2861
|
+
add_dirs=[pathlib.Path(d).expanduser().resolve() for d in args.add_dirs],
|
|
2862
|
+
reasoning=args.reasoning,
|
|
2863
|
+
codex_fast=bool(args.codex_fast),
|
|
2864
|
+
start_speaker_idx=next_idx,
|
|
2865
|
+
continue_from=str(run_dir),
|
|
2866
|
+
)
|
|
2867
|
+
|
|
2868
|
+
|
|
2869
|
+
def _humanize_age(seconds: int) -> str:
|
|
2870
|
+
if seconds < 60: return f"{seconds}s ago"
|
|
2871
|
+
if seconds < 3600: return f"{seconds // 60}m ago"
|
|
2872
|
+
if seconds < 86400: return f"{seconds // 3600}h ago"
|
|
2873
|
+
if seconds < 7 * 86400: return f"{seconds // 86400}d ago"
|
|
2874
|
+
return f"{seconds // 86400}d ago"
|
|
2875
|
+
|
|
2876
|
+
|
|
2877
|
+
def _last_activity_mtime(run_dir: pathlib.Path) -> Optional[float]:
|
|
2878
|
+
"""Most recent mtime across state.json + per-turn .pid/.stderr.log files."""
|
|
2879
|
+
candidates = [run_dir / "state.json", *run_dir.glob("turn-*.pid"),
|
|
2880
|
+
*run_dir.glob("turn-*.stderr.log")]
|
|
2881
|
+
mtimes = []
|
|
2882
|
+
for c in candidates:
|
|
2883
|
+
try:
|
|
2884
|
+
mtimes.append(c.stat().st_mtime)
|
|
2885
|
+
except OSError:
|
|
2886
|
+
pass
|
|
2887
|
+
return max(mtimes) if mtimes else None
|
|
2888
|
+
|
|
2889
|
+
|
|
2890
|
+
def _classify_run(run_dir: pathlib.Path) -> tuple[str, str, dict]:
|
|
2891
|
+
"""Returns (emoji, label, state_dict). Mirrors print_run_status's logic."""
|
|
2892
|
+
state_path = run_dir / "state.json"
|
|
2893
|
+
if not state_path.is_file():
|
|
2894
|
+
return ("❓", "no state.json", {})
|
|
2895
|
+
try:
|
|
2896
|
+
state = json.loads(state_path.read_text())
|
|
2897
|
+
except json.JSONDecodeError:
|
|
2898
|
+
return ("❓", "malformed state", {})
|
|
2899
|
+
finished = state.get("finished_reason")
|
|
2900
|
+
if finished:
|
|
2901
|
+
emoji, label = _LIST_STATUS_FINISHED.get(finished, ("✅", finished))
|
|
2902
|
+
return (emoji, label, state)
|
|
2903
|
+
# No finished_reason — running, between turns, or crashed.
|
|
2904
|
+
if list(run_dir.glob("turn-*.pid")):
|
|
2905
|
+
return ("🟢", "in-flight", state)
|
|
2906
|
+
pid = state.get("duet_pid")
|
|
2907
|
+
if pid is not None and _is_duet_process(int(pid)):
|
|
2908
|
+
return ("🟢", "between turns", state)
|
|
2909
|
+
if pid is not None:
|
|
2910
|
+
return ("⚠", "duet died", state)
|
|
2911
|
+
return ("⚠", "stuck (no pid)", state)
|
|
2912
|
+
|
|
2913
|
+
|
|
2914
|
+
def print_runs_list(explicit_path: Optional[pathlib.Path]) -> int:
|
|
2915
|
+
"""`duet --list [PATH]` — print one row per run dir found."""
|
|
2916
|
+
if explicit_path is not None:
|
|
2917
|
+
roots = [explicit_path.expanduser().resolve()]
|
|
2918
|
+
else:
|
|
2919
|
+
roots = _default_list_paths()
|
|
2920
|
+
if not roots:
|
|
2921
|
+
print("[duet] no run dirs found.\n"
|
|
2922
|
+
" Searched ./runs/, ./.duet/runs/, and ~/.duet/runs/*/. "
|
|
2923
|
+
"Pass an explicit path: duet --list <DIR>", file=sys.stderr)
|
|
2924
|
+
return 0
|
|
2925
|
+
|
|
2926
|
+
rows: list[dict] = []
|
|
2927
|
+
# Dedupe by resolved real path so a run discovered via both a
|
|
2928
|
+
# cwd-relative root and a home-index symlink only shows once. Iter
|
|
2929
|
+
# order in `_default_list_paths()` puts cwd-relative roots first, so
|
|
2930
|
+
# the displayed `dir` column prefers the (usually more readable)
|
|
2931
|
+
# direct path over the symlink path.
|
|
2932
|
+
seen: set[pathlib.Path] = set()
|
|
2933
|
+
now = time.time()
|
|
2934
|
+
for root in roots:
|
|
2935
|
+
if not root.is_dir():
|
|
2936
|
+
print(f"[duet] {root}: not a directory", file=sys.stderr)
|
|
2937
|
+
continue
|
|
2938
|
+
for child in sorted(root.iterdir(), reverse=True):
|
|
2939
|
+
if not child.is_dir() or not _RUN_ID_RE.match(child.name):
|
|
2940
|
+
continue
|
|
2941
|
+
try:
|
|
2942
|
+
real = child.resolve()
|
|
2943
|
+
except OSError:
|
|
2944
|
+
continue
|
|
2945
|
+
if real in seen:
|
|
2946
|
+
continue
|
|
2947
|
+
seen.add(real)
|
|
2948
|
+
emoji, label, state = _classify_run(child)
|
|
2949
|
+
# Self-heal: backfill the home index for runs created before
|
|
2950
|
+
# `_register_run_in_home_index` shipped, or for runs whose
|
|
2951
|
+
# `--runs-dir` placed them outside the default tree. The
|
|
2952
|
+
# cwd is recorded in state.json (resolved-absolute by
|
|
2953
|
+
# main()), so we can compute the same slug used at creation
|
|
2954
|
+
# time. Idempotent; the helper swallows its own errors.
|
|
2955
|
+
state_cwd = state.get("cwd") if state else None
|
|
2956
|
+
if state_cwd:
|
|
2957
|
+
_register_run_in_home_index(child, pathlib.Path(state_cwd))
|
|
2958
|
+
mtime = _last_activity_mtime(child)
|
|
2959
|
+
age = _humanize_age(int(now - mtime)) if mtime else "—"
|
|
2960
|
+
history = state.get("history") or []
|
|
2961
|
+
turns_used = state.get("turns_used", len(history))
|
|
2962
|
+
rows.append({
|
|
2963
|
+
"emoji": emoji, "label": label,
|
|
2964
|
+
"id": child.name, "turns": turns_used,
|
|
2965
|
+
"age": age, "dir": str(child),
|
|
2966
|
+
})
|
|
2967
|
+
|
|
2968
|
+
if not rows:
|
|
2969
|
+
print(f"[duet] no runs found under: {', '.join(str(r) for r in roots)}",
|
|
2970
|
+
file=sys.stderr)
|
|
2971
|
+
return 0
|
|
2972
|
+
|
|
2973
|
+
rows.sort(key=lambda r: r["id"], reverse=True)
|
|
2974
|
+
# Column widths
|
|
2975
|
+
w_id = max(len("run id"), max(len(r["id"]) for r in rows))
|
|
2976
|
+
w_label = max(len("status"), max(len(r["label"]) for r in rows))
|
|
2977
|
+
w_turns = max(len("turns"), max(len(str(r["turns"])) for r in rows))
|
|
2978
|
+
w_age = max(len("activity"), max(len(r["age"]) for r in rows))
|
|
2979
|
+
print(f" {'':2} {'run id':<{w_id}} {'status':<{w_label}} "
|
|
2980
|
+
f"{'turns':<{w_turns}} {'activity':<{w_age}} dir")
|
|
2981
|
+
print(f" {'':2} {'-'*w_id} {'-'*w_label} {'-'*w_turns} "
|
|
2982
|
+
f"{'-'*w_age} ---")
|
|
2983
|
+
for r in rows:
|
|
2984
|
+
print(f" {r['emoji']:2} {r['id']:<{w_id}} {r['label']:<{w_label}} "
|
|
2985
|
+
f"{str(r['turns']):<{w_turns}} {r['age']:<{w_age}} {r['dir']}")
|
|
2986
|
+
print(f"\n {len(rows)} run(s). Per-run health: duet --status <run-id>")
|
|
2987
|
+
return 0
|
|
2988
|
+
|
|
2989
|
+
|
|
2990
|
+
def _build_arg_parser() -> argparse.ArgumentParser:
|
|
2991
|
+
ap = argparse.ArgumentParser(
|
|
2992
|
+
description="duet — two CLI agents in conversation, with per-agent session memory.")
|
|
2993
|
+
ap.add_argument("--resume-claude", metavar="SESSION_ID",
|
|
2994
|
+
help="resume an existing Claude session id; harness will pull "
|
|
2995
|
+
"its latest message and feed it to the partner agent.")
|
|
2996
|
+
ap.add_argument("--resume-codex", metavar="SESSION_ID",
|
|
2997
|
+
help="(advanced) seed codex with an existing session id.")
|
|
2998
|
+
ap.add_argument("--continue", metavar="RUN_DIR_OR_ID", dest="continue_run",
|
|
2999
|
+
help="start a new run from an existing run's state.json: "
|
|
3000
|
+
"restore agents/session ids, reuse its worktree when "
|
|
3001
|
+
"available, and send the next agent a continuation kickoff. "
|
|
3002
|
+
"--task/--kickoff/--task-from-cmd may add optional guidance.")
|
|
3003
|
+
ap.add_argument("--task", help="task description, @file, or @- stdin "
|
|
3004
|
+
"(used if no --resume-* and no --kickoff)")
|
|
3005
|
+
ap.add_argument("--kickoff", help="explicit first message, @file, or @- stdin "
|
|
3006
|
+
"to send to the partner agent")
|
|
3007
|
+
ap.add_argument("--task-from-cmd", metavar="CMD",
|
|
3008
|
+
help="run shell command with cwd=--cwd and use stdout as the task")
|
|
3009
|
+
ap.add_argument("--partner", default="codex:coder",
|
|
3010
|
+
help="partner agent spec, e.g. codex:coder, claude:reviewer (default codex:coder)")
|
|
3011
|
+
ap.add_argument("--lead", default="claude:planner",
|
|
3012
|
+
help="lead agent spec, e.g. claude:planner (default; ignored if --resume-claude given)")
|
|
3013
|
+
ap.add_argument("--cwd", default=".", help="working dir for both agents")
|
|
3014
|
+
ap.add_argument("--turns", type=int, default=DEFAULT_TURNS, help=f"max turns (default {DEFAULT_TURNS})")
|
|
3015
|
+
ap.add_argument("--sentinel", default=DEFAULT_SENTINEL,
|
|
3016
|
+
help="convergence sentinel; requires an LGTM rationale and "
|
|
3017
|
+
"back-to-back proposals from both agents")
|
|
3018
|
+
ap.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT, help="per-turn timeout seconds")
|
|
3019
|
+
ap.add_argument("--verify-cmd", metavar="CMD", default=None,
|
|
3020
|
+
help="shell command that must exit 0 before a convergence "
|
|
3021
|
+
"proposal can count. Runs only for valid LGTM+rationale "
|
|
3022
|
+
"proposals; YAML key: `verify_cmd:`.")
|
|
3023
|
+
ap.add_argument("--runs-dir", default=None, help="where to save transcripts")
|
|
3024
|
+
ap.add_argument("--sandbox", default="workspace-write",
|
|
3025
|
+
help="codex --sandbox: read-only|workspace-write|danger-full-access")
|
|
3026
|
+
ap.add_argument("--permission-mode", default="acceptEdits",
|
|
3027
|
+
help="claude --permission-mode: default|acceptEdits|plan|bypassPermissions")
|
|
3028
|
+
ap.add_argument("--config", help="optional YAML/JSON config (overrides flags except --resume-*)")
|
|
3029
|
+
ap.add_argument("--worktree", action="store_true",
|
|
3030
|
+
help="run the partner agent in a throwaway git worktree on a fresh branch; "
|
|
3031
|
+
"the worktree is left intact at the end so you can review/merge/drop it.")
|
|
3032
|
+
ap.add_argument("--worktree-for", choices=["partner", "lead"], default=None,
|
|
3033
|
+
help="which agent runs in the worktree (default: partner)")
|
|
3034
|
+
ap.add_argument("--worktree-path", metavar="PATH", default=None,
|
|
3035
|
+
help="reuse an EXISTING worktree (e.g. from a previous cancelled run). "
|
|
3036
|
+
"Codex resumes via the saved session UUID (or `--last` for "
|
|
3037
|
+
"older runs); cwd is preserved either way. Skips git "
|
|
3038
|
+
"worktree creation. Mutually exclusive with --worktree.")
|
|
3039
|
+
ap.add_argument("--worktree-root", metavar="PATH", default=None,
|
|
3040
|
+
help="parent directory for newly-created worktrees (used with --worktree). "
|
|
3041
|
+
"Each run lands at <PATH>/<run_id>/. Default: <runs_dir>/<run_id>/wt/, "
|
|
3042
|
+
"which is durable across reboots and OS temp-dir cleaners. "
|
|
3043
|
+
"Pass /tmp or $TMPDIR to mimic the pre-fix throwaway behavior.")
|
|
3044
|
+
ap.add_argument("--add-dir", action="append", metavar="PATH", default=[],
|
|
3045
|
+
dest="add_dirs",
|
|
3046
|
+
help="extra path claude is allowed to read/write outside cwd. "
|
|
3047
|
+
"Repeatable. Without this, tasks that touch ../foo or "
|
|
3048
|
+
"absolute paths outside --cwd silently fail with a "
|
|
3049
|
+
"permission error. YAML key: `add_dirs:` (list).")
|
|
3050
|
+
ap.add_argument("--reasoning", choices=REASONING_LEVELS, default=None,
|
|
3051
|
+
help="reasoning effort for both agents. Codex: passes "
|
|
3052
|
+
"`-c model_reasoning_effort=<v>` except for medium "
|
|
3053
|
+
"(max → xhigh). Claude: passes `--effort <v>` "
|
|
3054
|
+
"(minimal → low) and adds high/xhigh/max prompt nudges.")
|
|
3055
|
+
ap.add_argument("--codex-fast", action="store_true", dest="codex_fast",
|
|
3056
|
+
help="Codex-only fast mode: pin codex coder turns to "
|
|
3057
|
+
"`model_reasoning_effort=low` and "
|
|
3058
|
+
"`model_reasoning_summary=concise`, regardless of "
|
|
3059
|
+
"--reasoning / per-agent reasoning_effort. Trades "
|
|
3060
|
+
"depth for latency on codex coder turns; claude is "
|
|
3061
|
+
"unaffected, so `--reasoning high --codex-fast` is "
|
|
3062
|
+
"a real and useful combo. YAML key: `codex_fast: true`.")
|
|
3063
|
+
ap.add_argument("--status", metavar="RUN_DIR_OR_ID", default=None,
|
|
3064
|
+
help="don't run a duet — instead print a one-shot health "
|
|
3065
|
+
"summary of an existing run and exit. Accepts a path "
|
|
3066
|
+
"(absolute or relative) OR a bare run id like "
|
|
3067
|
+
"`20260507-082801` (resolved against the same "
|
|
3068
|
+
"default paths as `--list`: ./runs/, ./.duet/runs/, "
|
|
3069
|
+
"~/.duet/runs/*/). Exit codes: 0=done, 1=running, "
|
|
3070
|
+
"2=stuck/crashed, 3=error.")
|
|
3071
|
+
ap.add_argument("--list", metavar="PATH", nargs="?", const="__defaults__",
|
|
3072
|
+
default=None, dest="list_runs",
|
|
3073
|
+
help="don't run a duet — instead list runs found under "
|
|
3074
|
+
"PATH (or under the default search paths if PATH is "
|
|
3075
|
+
"omitted: ./runs/, ./.duet/runs/, ~/.duet/runs/*/). "
|
|
3076
|
+
"Each row shows status, turns_used, last-activity "
|
|
3077
|
+
"age, and dir. Pair with `--status <run-id>` to drill "
|
|
3078
|
+
"into a specific run.")
|
|
3079
|
+
ap.add_argument("--quiet", action="store_true",
|
|
3080
|
+
help="don't mirror subprocess stderr to your terminal in real-time. "
|
|
3081
|
+
"By default, duet prints Codex's live progress as it works.")
|
|
3082
|
+
ap.add_argument("--recap", action="store_true",
|
|
3083
|
+
help="compact per-turn debug view; suppresses live stderr mirror "
|
|
3084
|
+
"and writes recap.md next to transcript.md")
|
|
3085
|
+
ap.add_argument("--dry-run", action="store_true", help="don't actually call CLIs")
|
|
3086
|
+
return ap
|
|
3087
|
+
|
|
3088
|
+
|
|
3089
|
+
def _resolve_opt_path(*candidates: object) -> Optional[pathlib.Path]:
|
|
3090
|
+
"""First truthy candidate as an expanded, resolved path; None if all empty.
|
|
3091
|
+
Lets CLI flags take precedence over config-file values for the same path."""
|
|
3092
|
+
for c in candidates:
|
|
3093
|
+
if c:
|
|
3094
|
+
return pathlib.Path(str(c)).expanduser().resolve()
|
|
3095
|
+
return None
|
|
3096
|
+
|
|
3097
|
+
|
|
3098
|
+
def _build_cfg_from_yaml(args: argparse.Namespace, ap: argparse.ArgumentParser,
|
|
3099
|
+
stdin_cache: dict) -> DuetConfig:
|
|
3100
|
+
"""Build a DuetConfig from a --config YAML/JSON file. CLI flags only fill
|
|
3101
|
+
seed inputs (task/kickoff) when the file specifies none, and a handful of
|
|
3102
|
+
flags (runs_dir, verify_cmd, worktree*, recap, reasoning, codex_fast)
|
|
3103
|
+
override or OR with their file values; --resume-* still apply on top."""
|
|
3104
|
+
raw = load_yaml_or_json(pathlib.Path(args.config))
|
|
3105
|
+
cfg_cwd = pathlib.Path(raw.get("cwd", ".")).expanduser().resolve()
|
|
3106
|
+
cfg_timeout = int(raw.get("per_turn_timeout", DEFAULT_TIMEOUT))
|
|
3107
|
+
raw_task = raw.get("task")
|
|
3108
|
+
raw_kickoff = raw.get("kickoff")
|
|
3109
|
+
raw_task_from_cmd = raw.get("task_from_cmd")
|
|
3110
|
+
if raw_task is None and raw_kickoff is None and raw_task_from_cmd is None:
|
|
3111
|
+
raw_task = args.task
|
|
3112
|
+
raw_kickoff = args.kickoff
|
|
3113
|
+
raw_task_from_cmd = args.task_from_cmd
|
|
3114
|
+
task, kickoff = resolve_seed_inputs(
|
|
3115
|
+
task=raw_task,
|
|
3116
|
+
kickoff=raw_kickoff,
|
|
3117
|
+
task_from_cmd=raw_task_from_cmd,
|
|
3118
|
+
cwd=cfg_cwd,
|
|
3119
|
+
timeout=cfg_timeout,
|
|
3120
|
+
parser=ap,
|
|
3121
|
+
stdin_cache=stdin_cache,
|
|
3122
|
+
)
|
|
3123
|
+
raw_runs_dir = args.runs_dir if args.runs_dir is not None else raw.get("runs_dir")
|
|
3124
|
+
# Build agents before verify_cmd so a bad agent field surfaces first (parity
|
|
3125
|
+
# with the pre-refactor order) when a config is invalid in several ways.
|
|
3126
|
+
agents = [Agent(**a) for a in raw.get("agents", [])]
|
|
3127
|
+
verify_cmd = normalize_verify_cmd(
|
|
3128
|
+
args.verify_cmd if args.verify_cmd is not None else raw.get("verify_cmd"),
|
|
3129
|
+
ap,
|
|
3130
|
+
)
|
|
3131
|
+
cfg = DuetConfig(
|
|
3132
|
+
cwd=cfg_cwd,
|
|
3133
|
+
agents=agents,
|
|
3134
|
+
task=task,
|
|
3135
|
+
kickoff=kickoff,
|
|
3136
|
+
max_turns=int(raw.get("max_turns", DEFAULT_TURNS)),
|
|
3137
|
+
sentinel=raw.get("sentinel", DEFAULT_SENTINEL),
|
|
3138
|
+
per_turn_timeout=cfg_timeout,
|
|
3139
|
+
runs_dir=choose_runs_dir(raw_runs_dir, cfg_cwd),
|
|
3140
|
+
sandbox=raw.get("sandbox", "workspace-write"),
|
|
3141
|
+
permission_mode=raw.get("permission_mode", "acceptEdits"),
|
|
3142
|
+
dry_run=bool(raw.get("dry_run", False)),
|
|
3143
|
+
recap=bool(raw.get("recap", False)) or args.recap,
|
|
3144
|
+
verify_cmd=verify_cmd,
|
|
3145
|
+
worktree=bool(raw.get("worktree", False)) or args.worktree,
|
|
3146
|
+
worktree_for=raw.get("worktree_for") or args.worktree_for or "partner",
|
|
3147
|
+
worktree_path=_resolve_opt_path(args.worktree_path, raw.get("worktree_path")),
|
|
3148
|
+
worktree_root=_resolve_opt_path(args.worktree_root, raw.get("worktree_root")),
|
|
3149
|
+
add_dirs=[
|
|
3150
|
+
pathlib.Path(d).expanduser().resolve()
|
|
3151
|
+
for d in (args.add_dirs or raw.get("add_dirs", []))
|
|
3152
|
+
],
|
|
3153
|
+
reasoning=args.reasoning or raw.get("reasoning"),
|
|
3154
|
+
codex_fast=bool(args.codex_fast or raw.get("codex_fast", False)),
|
|
3155
|
+
)
|
|
3156
|
+
cfg.agents = apply_resume_overrides(
|
|
3157
|
+
cfg.agents,
|
|
3158
|
+
resume_claude=args.resume_claude,
|
|
3159
|
+
resume_codex=args.resume_codex,
|
|
3160
|
+
)
|
|
3161
|
+
return cfg
|
|
3162
|
+
|
|
3163
|
+
|
|
3164
|
+
def _build_cfg_from_cli(args: argparse.Namespace, ap: argparse.ArgumentParser,
|
|
3165
|
+
stdin_cache: dict) -> DuetConfig:
|
|
3166
|
+
"""Build a DuetConfig from --lead/--partner and the plain CLI flags.
|
|
3167
|
+
|
|
3168
|
+
Agents come from the specs, then --resume-* are attached to the matching
|
|
3169
|
+
backend (rename_slots=True) so an explicit topology that puts a resumed
|
|
3170
|
+
agent in the "wrong" slot still routes its session id correctly.
|
|
3171
|
+
"""
|
|
3172
|
+
cfg_cwd = pathlib.Path(args.cwd).expanduser().resolve()
|
|
3173
|
+
task, kickoff = resolve_seed_inputs(
|
|
3174
|
+
task=args.task,
|
|
3175
|
+
kickoff=args.kickoff,
|
|
3176
|
+
task_from_cmd=args.task_from_cmd,
|
|
3177
|
+
cwd=cfg_cwd,
|
|
3178
|
+
timeout=args.timeout,
|
|
3179
|
+
parser=ap,
|
|
3180
|
+
stdin_cache=stdin_cache,
|
|
3181
|
+
)
|
|
3182
|
+
agents = apply_resume_overrides(
|
|
3183
|
+
[parse_partner(args.lead, default_role="planner"),
|
|
3184
|
+
parse_partner(args.partner, default_role="coder")],
|
|
3185
|
+
resume_claude=args.resume_claude,
|
|
3186
|
+
resume_codex=args.resume_codex,
|
|
3187
|
+
rename_slots=True,
|
|
3188
|
+
)
|
|
3189
|
+
return DuetConfig(
|
|
3190
|
+
cwd=cfg_cwd,
|
|
3191
|
+
agents=agents,
|
|
3192
|
+
task=task,
|
|
3193
|
+
kickoff=kickoff,
|
|
3194
|
+
max_turns=args.turns,
|
|
3195
|
+
sentinel=args.sentinel,
|
|
3196
|
+
per_turn_timeout=args.timeout,
|
|
3197
|
+
runs_dir=choose_runs_dir(args.runs_dir, cfg_cwd),
|
|
3198
|
+
sandbox=args.sandbox,
|
|
3199
|
+
permission_mode=args.permission_mode,
|
|
3200
|
+
dry_run=args.dry_run,
|
|
3201
|
+
recap=args.recap,
|
|
3202
|
+
verify_cmd=normalize_verify_cmd(args.verify_cmd, ap),
|
|
3203
|
+
worktree=args.worktree,
|
|
3204
|
+
worktree_for=args.worktree_for or "partner",
|
|
3205
|
+
worktree_path=_resolve_opt_path(args.worktree_path),
|
|
3206
|
+
worktree_root=_resolve_opt_path(args.worktree_root),
|
|
3207
|
+
add_dirs=[pathlib.Path(d).expanduser().resolve() for d in args.add_dirs],
|
|
3208
|
+
reasoning=args.reasoning,
|
|
3209
|
+
codex_fast=bool(args.codex_fast),
|
|
3210
|
+
)
|
|
3211
|
+
|
|
3212
|
+
|
|
3213
|
+
def _warn_codex_fast_scope(cfg: DuetConfig) -> None:
|
|
3214
|
+
"""Warn (and disable) when --codex-fast can't apply, or note partial scope.
|
|
3215
|
+
|
|
3216
|
+
Fast mode only affects codex:coder agents (see call_agent). Surfacing the
|
|
3217
|
+
scope here means `--codex-fast --lead codex:planner` gets a loud signal
|
|
3218
|
+
instead of silently running the planner at low effort.
|
|
3219
|
+
"""
|
|
3220
|
+
if not cfg.codex_fast:
|
|
3221
|
+
return
|
|
3222
|
+
codex_agents = [a for a in cfg.agents if a.backend == "codex"]
|
|
3223
|
+
codex_coders = [a for a in codex_agents if a.role == "coder"]
|
|
3224
|
+
codex_non_coders = [a for a in codex_agents if a.role != "coder"]
|
|
3225
|
+
if not codex_coders:
|
|
3226
|
+
print(
|
|
3227
|
+
"[duet] WARNING: --codex-fast had no effect — "
|
|
3228
|
+
"no codex agent has role=coder in this duet. "
|
|
3229
|
+
"Fast mode applies only to codex:coder; set per-agent "
|
|
3230
|
+
"`reasoning_effort: low` if you really want fast on a "
|
|
3231
|
+
"non-coder role.",
|
|
3232
|
+
file=sys.stderr,
|
|
3233
|
+
)
|
|
3234
|
+
cfg.codex_fast = False
|
|
3235
|
+
elif codex_non_coders:
|
|
3236
|
+
roles = ", ".join(f"{a.name}({a.role})" for a in codex_non_coders)
|
|
3237
|
+
print(
|
|
3238
|
+
f"[duet] note: --codex-fast applies only to codex:coder; "
|
|
3239
|
+
f"non-coder codex agents [{roles}] keep their normal "
|
|
3240
|
+
f"reasoning effort.",
|
|
3241
|
+
file=sys.stderr,
|
|
3242
|
+
)
|
|
3243
|
+
|
|
3244
|
+
|
|
3245
|
+
def main() -> int:
|
|
3246
|
+
ap = _build_arg_parser()
|
|
3247
|
+
args = ap.parse_args()
|
|
3248
|
+
|
|
3249
|
+
# `--status` is read-only: print run health and exit. Skip everything below.
|
|
3250
|
+
if args.status:
|
|
3251
|
+
return print_run_status(args.status)
|
|
3252
|
+
|
|
3253
|
+
# `--list` is read-only: print the run-dir table and exit.
|
|
3254
|
+
if args.list_runs is not None:
|
|
3255
|
+
explicit = (None if args.list_runs == "__defaults__"
|
|
3256
|
+
else pathlib.Path(args.list_runs))
|
|
3257
|
+
return print_runs_list(explicit)
|
|
3258
|
+
|
|
3259
|
+
if args.worktree and args.worktree_path:
|
|
3260
|
+
ap.error("--worktree and --worktree-path are mutually exclusive")
|
|
3261
|
+
if args.continue_run and args.config:
|
|
3262
|
+
ap.error("--continue and --config are mutually exclusive")
|
|
3263
|
+
if args.continue_run and (args.resume_claude or args.resume_codex):
|
|
3264
|
+
ap.error("--continue restores session ids from state.json; do not also pass --resume-*")
|
|
3265
|
+
if args.continue_run and args.worktree:
|
|
3266
|
+
ap.error("--continue reuses the saved worktree; use --worktree-path to override it")
|
|
3267
|
+
|
|
3268
|
+
# Live-stream subprocess stderr unless --quiet
|
|
3269
|
+
global LIVE_STREAM
|
|
3270
|
+
LIVE_STREAM = not args.quiet
|
|
3271
|
+
|
|
3272
|
+
stdin_cache: dict[str, str] = {}
|
|
3273
|
+
if args.continue_run:
|
|
3274
|
+
cfg = build_continue_config(args.continue_run, args, ap, stdin_cache)
|
|
3275
|
+
print(f"[duet] continuing run {args.continue_run} "
|
|
3276
|
+
f"(next: {cfg.agents[cfg.start_speaker_idx].name})")
|
|
3277
|
+
elif args.config:
|
|
3278
|
+
cfg = _build_cfg_from_yaml(args, ap, stdin_cache)
|
|
3279
|
+
else:
|
|
3280
|
+
cfg = _build_cfg_from_cli(args, ap, stdin_cache)
|
|
3281
|
+
|
|
3282
|
+
validate_config(cfg, ap)
|
|
3283
|
+
validate_reasoning(cfg.reasoning, "config reasoning")
|
|
3284
|
+
for agent in cfg.agents:
|
|
3285
|
+
validate_reasoning(agent.reasoning_effort, f"agent {agent.name} reasoning_effort")
|
|
3286
|
+
if cfg.worktree and cfg.worktree_path:
|
|
3287
|
+
raise SystemExit("--worktree and --worktree-path/worktree_path are mutually exclusive")
|
|
3288
|
+
|
|
3289
|
+
_warn_codex_fast_scope(cfg)
|
|
3290
|
+
|
|
3291
|
+
# Sanity: are CLIs on PATH?
|
|
3292
|
+
if not cfg.dry_run:
|
|
3293
|
+
for b in {a.backend for a in cfg.agents}:
|
|
3294
|
+
if shutil.which(b) is None:
|
|
3295
|
+
print(f"[duet] WARNING: '{b}' not on PATH — this run will fail. "
|
|
3296
|
+
f"Install it or use --dry-run.", file=sys.stderr)
|
|
3297
|
+
|
|
3298
|
+
run_duet(cfg)
|
|
3299
|
+
return 0
|
|
3300
|
+
|
|
3301
|
+
|
|
3302
|
+
if __name__ == "__main__":
|
|
3303
|
+
sys.exit(main())
|