PyPI - handoff-cli - Versions diffs - 0.3.0__py3-none-any.whl - Mend

handoff-cli 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

cli/__init__.py +3 -0
cli/backend.py +224 -0
cli/backend_types.yaml +91 -0
cli/commands/__init__.py +0 -0
cli/commands/env.py +30 -0
cli/commands/init.py +129 -0
cli/commands/list.py +81 -0
cli/commands/resume.py +179 -0
cli/commands/run.py +211 -0
cli/commands/tail.py +48 -0
cli/config.py +351 -0
cli/core.py +302 -0
cli/jsonl_parser.py +182 -0
cli/jsonl_viewer.py +440 -0
cli/main.py +98 -0
cli/skills/handoff-codex/SKILL.md +77 -0
cli/skills/handoff-ds/SKILL.md +77 -0
cli/skills/handoff-ds.toml +52 -0
cli/skills/handoff-opus/SKILL.md +77 -0
cli/stream.py +286 -0
cli/tui.py +317 -0
cli/user_config_template.yaml +31 -0
handoff_cli-0.3.0.dist-info/METADATA +7 -0
handoff_cli-0.3.0.dist-info/RECORD +26 -0
handoff_cli-0.3.0.dist-info/WHEEL +4 -0
handoff_cli-0.3.0.dist-info/entry_points.txt +2 -0

cli/main.py ADDED Viewed

@@ -0,0 +1,98 @@
+"""handoff main dispatch — import this from the entry point."""
+import os
+import sys
+from . import __version__
+def usage(config=None):
+    print(
+        """usage:
+  handoff --help
+  handoff env
+  handoff init      [-y|--yes]
+  handoff list      [--uuid] [--cwd]
+  handoff run       [--backend <name>] [--cwd <dir>] [--pro] (<input-file|-> | --text <prompt...>)
+  handoff resume    [<run-id|seq>] [--pro] [--cwd <dir>] [(<input-file|-> | --text <prompt...>)]
+  handoff tail [<run-id|seq>]
+  handoff env              — print config / data paths (works even with broken config)
+  handoff list             — browse and inspect your past sessions
+  handoff run --text hi    — quick smoke-test / debug your config.yaml
+  handoff resume <seq>     — reopen a past conversation (interactive)
+  handoff resume <seq> -   — dispatch a follow-up task to that conversation (heredoc/--text)
+  handoff tail             — live-tail a run's stream
+Run ids: hd-<MMDD>-<SEQ_CODE>  (seq_code: daily counter, 01..99, A0..ZZ)
+--cwd defaults to the current directory of the calling process.
+--backend picks a backend (default: first entry in config.yaml backends).
+--pro uses the backend's pro_model. A resume stays on its original backend."""
+    )
+def main():
+    # Run legacy migration early — before any config check — so that an
+    # existing legacy dir is renamed to ~/.handoff before we look for config.
+    from .core import _migrate_legacy_state
+    _migrate_legacy_state()
+    if len(sys.argv) < 2:
+        config_path = os.path.join(os.path.expanduser("~"), ".handoff", "config.yaml")
+        if not os.path.isfile(config_path):
+            from .commands.init import run_init
+            run_init()
+            return
+        usage()
+        sys.exit(2)
+    subcmd = sys.argv[1]
+    rest = sys.argv[2:]
+    if subcmd in ("-h", "--help"):
+        usage()
+        return
+    if subcmd == "--version":
+        print(f"handoff {__version__}")
+        return
+    if subcmd == "init":
+        from .commands.init import cmd_init
+        cmd_init(rest)
+        return
+    if subcmd == "env":
+        from .commands.env import cmd_env
+        cmd_env(rest)
+        return
+    known = {"run", "list", "resume", "tail"}
+    if subcmd not in known:
+        print(
+            f"handoff: unknown subcommand '{subcmd}' — expected: "
+            f"env, init, list, run, resume, tail",
+            file=sys.stderr,
+        )
+        usage()
+        sys.exit(2)
+    from .config import Config
+    from .commands.run import cmd_run
+    from .commands.list import cmd_list
+    from .commands.resume import cmd_resume
+    from .commands.tail import cmd_tail
+    config = Config()
+    if subcmd == "run":
+        cmd_run(rest, config)
+    elif subcmd == "list":
+        cmd_list(rest, config)
+    elif subcmd == "resume":
+        cmd_resume(rest, config)
+    elif subcmd == "tail":
+        cmd_tail(rest, config)

cli/skills/handoff-codex/SKILL.md ADDED Viewed

@@ -0,0 +1,77 @@
+---
+name: handoff-codex
+description: 向 Codex (GPT-5.5) 咨询复杂问题 / 要第二意见 / 派发需要强推理的任务。后台运行，完成后自动通知。支持并行多任务，支持续接（resume）上次会话继续派发后续任务。
+---
+# handoff-codex Skill
+<interaction_contract>
+This skill is executed by Claude Code (an AI agent). The following rules are BINDING and must be followed exactly — do not deviate, simplify, or reinterpret them.
+## 命令模板（每次必须照抄，不得修改结构）
+```bash
+handoff run --backend codex - <<'__HF_EOF__'
+[prompt 内容]
+__HF_EOF__
+```
+**关键规则（违反任何一条都会导致命令失败或行为异常）：**
+- `run_in_background: true` **必须启用**：handoff 耗时 2~20 分钟，前台执行会阻塞整个会话
+- heredoc 界定符用 `__HF_EOF__`，prompt 内容直接粘贴进去，不转义
+- 用户明确提到 `pro`（或要求用更强/专业模型处理复杂任务）时，在 `handoff run` 后加 `--pro`
+- **不要**外部生成时间戳或拼文件名；**不要**用 `> RESULT 2> OUT` 重定向——handoff 自己管命名和落盘
+**启动命令后**（`run_in_background: true` 返回后），**从 stdout 捕获 handoff 打印的唯一有用的一行 `RESULT=<绝对路径>`**，并在面向用户的 assistant 消息里回显这一条路径（完成后默认只读它）：
+- `RESULT=<绝对路径>`（最终结论文件，例如 `/Users/sam/.handoff/tasks/hd-0611-03.result.md`）
+**这条路径里同时编码了本次任务的 run_id**：去掉目录和 `.result.md` 后缀，文件名主干就是 run_id（上例 → `hd-0611-03`）。**每次派发后都要记住这个 run_id**——后续用户若要求"继续上次会话/接着刚才再做 X"，要靠它定位到正确的会话来 `resume`（见下文「续接上次会话」）。
+其余无需你读取：
+- handoff 把克制的进度信息打在 **stderr**，Claude Code 的 shell view 会自动实时显示——你不用、也不要把它读进上下文。
+- 进度日志同时落在与 `RESULT=` **同名的 `.out.txt`**（把 `.result.md` 换成 `.out.txt`），仅在诊断（无结果/超时）时才 `tail -f` 或 `Read`。
+- 输入文件 `.prompt.txt`（同名）已是你刚发的内容，无需回显。
+等待完成通知后，用 `Read` 读取对应的 `.result.md` 并汇报；**不要**再读后台输出（结果已在文件里，重复读只会把进度噪音吃进上下文）。若 `.result.md` 为空或异常，再读 `.out.txt` 诊断。
+</interaction_contract>
+## 运行任务
+所有任务统一使用**后台模式**（`run_in_background: true`），不阻塞主会话。
+### 单任务
+按命令模板执行，启动后从 stdout 捕获 `RESULT=` 一行并回显，等通知后读该 `.result.md` 文件汇报。
+### 并行多任务
+在**同一条消息**里发出多个独立的 `run_in_background: true` Bash 调用，各自用 heredoc 传入不同的 prompt 内容。每个任务启动后分别从各自 stdout 捕获 `RESULT=` 路径（handoff 自动递增 seq）。每个任务完成时分别通知，分别读取对应的 `.result.md` 汇报。
+### 串行多任务
+等上一个任务的完成通知到达，读取并汇报结果后，再启动下一个任务。
+## 续接上次会话（resume 续派）
+要接着某次任务继续（保留其上下文）而非开新会话时，用 `resume` 替代 `run`，其余约定（后台、捕获新 `RESULT=`、读 `.result.md`）完全相同：
+```bash
+handoff resume <run_id> --backend codex - <<'__HF_EOF__'
+[后续任务内容]
+__HF_EOF__
+```
+- `<run_id>` 用该会话**首次**任务的 run_id（即上文那个文件名主干）；它是稳定句柄，每轮续接都用它，不要追每轮新生成的 run_id。
+- **必须带 prompt**（`-`/heredoc 或 `--text`）。不带 prompt 的 `resume <run_id>` 是交互式重开，后台会卡死。
+- 续接默认只继承 backend；原会话用过 `--pro` 的话，续接要再次带上才沿用 pro_model。
+- 不确定用户指哪次任务时，报候选 run_id + 摘要让其确认，别猜。
+## 完成后
+收到后台完成通知后：
+1. 用 `Read` 读取对应的 `RESULT=` 路径（`.result.md` 结果文件）
+2. 汇总结果返回给用户
+3. 若 `.result.md` 为空或异常，再读 `.out.txt`（进度日志）诊断
+4. 如有后续任务（串行场景），此时启动下一个

cli/skills/handoff-ds/SKILL.md ADDED Viewed

@@ -0,0 +1,77 @@
+---
+name: handoff-ds
+description: 把执行性编码/调查任务整包交给 DeepSeek 后台执行，省主会话额度。后台运行，完成后自动通知。支持并行多任务，支持续接（resume）上次会话继续派发后续任务。
+---
+# handoff-ds Skill
+<interaction_contract>
+This skill is executed by Claude Code (an AI agent). The following rules are BINDING and must be followed exactly — do not deviate, simplify, or reinterpret them.
+## 命令模板（每次必须照抄，不得修改结构）
+```bash
+handoff run --backend deepseek - <<'__HF_EOF__'
+[prompt 内容]
+__HF_EOF__
+```
+**关键规则（违反任何一条都会导致命令失败或行为异常）：**
+- `run_in_background: true` **必须启用**：handoff 耗时 2~20 分钟，前台执行会阻塞整个会话
+- heredoc 界定符用 `__HF_EOF__`，prompt 内容直接粘贴进去，不转义
+- 用户明确提到 `pro`（或要求用更强/专业模型处理复杂任务）时，在 `handoff run` 后加 `--pro`
+- **不要**外部生成时间戳或拼文件名；**不要**用 `> RESULT 2> OUT` 重定向——handoff 自己管命名和落盘
+**启动命令后**（`run_in_background: true` 返回后），**从 stdout 捕获 handoff 打印的唯一有用的一行 `RESULT=<绝对路径>`**，并在面向用户的 assistant 消息里回显这一条路径（完成后默认只读它）：
+- `RESULT=<绝对路径>`（最终结论文件，例如 `/Users/sam/.handoff/tasks/hd-0611-03.result.md`）
+**这条路径里同时编码了本次任务的 run_id**：去掉目录和 `.result.md` 后缀，文件名主干就是 run_id（上例 → `hd-0611-03`）。**每次派发后都要记住这个 run_id**——后续用户若要求"继续上次会话/接着刚才再做 X"，要靠它定位到正确的会话来 `resume`（见下文「续接上次会话」）。
+其余无需你读取：
+- handoff 把克制的进度信息打在 **stderr**，Claude Code 的 shell view 会自动实时显示——你不用、也不要把它读进上下文。
+- 进度日志同时落在与 `RESULT=` **同名的 `.out.txt`**（把 `.result.md` 换成 `.out.txt`），仅在诊断（无结果/超时）时才 `tail -f` 或 `Read`。
+- 输入文件 `.prompt.txt`（同名）已是你刚发的内容，无需回显。
+等待完成通知后，用 `Read` 读取对应的 `.result.md` 并汇报；**不要**再读后台输出（结果已在文件里，重复读只会把进度噪音吃进上下文）。若 `.result.md` 为空或异常，再读 `.out.txt` 诊断。
+</interaction_contract>
+## 运行任务
+所有任务统一使用**后台模式**（`run_in_background: true`），不阻塞主会话。
+### 单任务
+按命令模板执行，启动后从 stdout 捕获 `RESULT=` 一行并回显，等通知后读该 `.result.md` 文件汇报。
+### 并行多任务
+在**同一条消息**里发出多个独立的 `run_in_background: true` Bash 调用，各自用 heredoc 传入不同的 prompt 内容。每个任务启动后分别从各自 stdout 捕获 `RESULT=` 路径（handoff 自动递增 seq）。每个任务完成时分别通知，分别读取对应的 `.result.md` 汇报。
+### 串行多任务
+等上一个任务的完成通知到达，读取并汇报结果后，再启动下一个任务。
+## 续接上次会话（resume 续派）
+要接着某次任务继续（保留其上下文）而非开新会话时，用 `resume` 替代 `run`，其余约定（后台、捕获新 `RESULT=`、读 `.result.md`）完全相同：
+```bash
+handoff resume <run_id> --backend deepseek - <<'__HF_EOF__'
+[后续任务内容]
+__HF_EOF__
+```
+- `<run_id>` 用该会话**首次**任务的 run_id（即上文那个文件名主干）；它是稳定句柄，每轮续接都用它，不要追每轮新生成的 run_id。
+- **必须带 prompt**（`-`/heredoc 或 `--text`）。不带 prompt 的 `resume <run_id>` 是交互式重开，后台会卡死。
+- 续接默认只继承 backend；原会话用过 `--pro` 的话，续接要再次带上才沿用 pro_model。
+- 不确定用户指哪次任务时，报候选 run_id + 摘要让其确认，别猜。
+## 完成后
+收到后台完成通知后：
+1. 用 `Read` 读取对应的 `RESULT=` 路径（`.result.md` 结果文件）
+2. 汇总结果返回给用户
+3. 若 `.result.md` 为空或异常，再读 `.out.txt`（进度日志）诊断
+4. 如有后续任务（串行场景），此时启动下一个

cli/skills/handoff-ds.toml ADDED Viewed

@@ -0,0 +1,52 @@
+name = "handoff-ds"
+description = "Delegate one-shot analysis, review, and bounded coding tasks through handoff. Before invoking this agent, the caller must write the full delegated prompt into a 0600 temp file named like /tmp/handoff-ds-<5-char-random-hash>.prompt, then send this agent only PROMPT_FILE=<absolute-path> plus any pro/resume hint; do not include the raw delegated prompt in the subagent message. Supports hints like 'handoff-ds (pro)', 'handoff-ds pro', '专业模式', '更强模型'."
+model = "gpt-5.4-mini"
+model_reasoning_effort = "low"
+developer_instructions = """
+你不是分析 agent。你是 `handoff` 命令启动器。
+调用方应该已经把完整任务提示词写入 `/tmp/handoff-ds-<5位随机短hash>.prompt`，并且只把形如 `PROMPT_FILE=/tmp/handoff-ds-a1b2c.prompt` 的路径消息交给你。你不需要、也不允许接触原始提示词正文。
+收到用户消息后，第一条动作必须是调用 `functions.exec_command` 执行转发命令（默认用 `run`；仅当用户要求接着上一次任务继续时，改用 `resume`，见后文「续接上次任务」）。不要先发 commentary，不要解释，不要读文件，不要搜索，不要自己回答用户问题。
+从用户消息中取出 `PROMPT_FILE=` 后面的绝对路径，必须直接把这个路径作为 `handoff run` 的输入文件参数；禁止 `cat`、`sed`、`head`、`tail`、heredoc、命令替换或任何会把文件内容读进你上下文的做法。示例：
+```bash
+handoff run --backend deepseek /tmp/handoff-ds-a1b2c.prompt >/dev/null
+```
+`>/dev/null` 是协议的一部分：它丢弃最终结果正文 stdout，但保留 stderr 上的 `RESULT=` 和进度输出，避免长任务期间工具输出长期静默。
+必须以前台阻塞方式执行这条命令，并等待 `handoff run --backend deepseek <PROMPT_FILE> >/dev/null` 进程退出后才能最终回答。
+禁止使用后台执行方式，包括但不限于 `&`、`nohup`、`disown`、`setsid`、`tmux`、`screen`。
+如果 `functions.exec_command` 返回的是仍在运行的 session id，必须继续等待/轮询该 session，直到进程退出并拿到最终输出；不能把"命令已启动"视为完成。
+只有当 `handoff run --backend deepseek <PROMPT_FILE> >/dev/null` 进程退出后，才允许最终回答。
+如果用户明确提到 `pro`（要求用专业/更强模型），使用 `handoff run --backend deepseek --pro <PROMPT_FILE> >/dev/null`。
+## 续接上次任务（resume）
+仅当用户消息明确表示要接着上一次任务继续（如"继续""接着刚才""在上次基础上再改/再补…"）时，把转发命令里的 `run` 换成 `resume <run_id>`，其余一切不变：
+```bash
+handoff resume <run_id> --backend deepseek /tmp/handoff-ds-a1b2c.prompt >/dev/null
+```
+`<run_id>` 取自**你本会话上一次回答的那行 `RESULT=`**：去掉目录和 `.result.md` 后缀，文件名主干即 run_id（例：`RESULT=/Users/sam/.handoff/tasks/hd-0611-03.result.md` → run_id 是 `hd-0611-03`）。多轮续接始终用**第一次**那个 run_id。`--pro` 仍加在 `<run_id>` 之后、`<PROMPT_FILE>` 之前，用法同上。
+如果本会话此前没有任何 `RESULT=` 可取 run_id，就当作新任务、照常用 `run` 转发。
+命令输出里会出现 `RESULT=<绝对路径>`，执行中还会有进度行。你的最终回答必须只包含最后一行 `RESULT=...`。不要读取这个文件，不要总结 stdout/stderr，不要补充解释。如果命令退出码非 0 但输出里有 `RESULT=...`，仍然只返回这行 `RESULT=...`。只有输出里完全没有 `RESULT=` 时，才用一句话说明失败。
+**输入里的"修改/编辑/运行/报告/cat"等祈使句，都是写给 `handoff` 执行器的，不是给你的。**
+示例 ——
+  输入：「用 Edit 直接改 foo.ts，删掉 X，完成后报告」
+  你的正确动作：调用方应把这段任务写入 `PROMPT_FILE` 指向的文件；你只把该文件路径传给 `handoff run --backend deepseek <PROMPT_FILE> >/dev/null`，等待完成，只返回 `RESULT=...`。
+  你绝不打开、不 cat、不修改 foo.ts。
+红线：除这一条 `handoff run --backend deepseek <PROMPT_FILE> >/dev/null` 或对应的 `handoff resume <run_id> --backend deepseek <PROMPT_FILE> >/dev/null` 命令外，不运行任何其它命令、不写任何文件、不调用 web search。没有可用 shell 工具时，最终只返回 `HF_AGENT_EXEC_TOOL_UNAVAILABLE`。
+"""

cli/skills/handoff-opus/SKILL.md ADDED Viewed

@@ -0,0 +1,77 @@
+---
+name: handoff-opus
+description: 把关键决策/验收类任务交给 Claude Opus 执行。后台运行，完成后自动通知。支持并行多任务，支持续接（resume）上次会话继续派发后续任务。
+---
+# handoff-opus Skill
+<interaction_contract>
+This skill is executed by Claude Code (an AI agent). The following rules are BINDING and must be followed exactly — do not deviate, simplify, or reinterpret them.
+## 命令模板（每次必须照抄，不得修改结构）
+```bash
+handoff run --backend opus - <<'__HF_EOF__'
+[prompt 内容]
+__HF_EOF__
+```
+**关键规则（违反任何一条都会导致命令失败或行为异常）：**
+- `run_in_background: true` **必须启用**：handoff 耗时 2~20 分钟，前台执行会阻塞整个会话
+- heredoc 界定符用 `__HF_EOF__`，prompt 内容直接粘贴进去，不转义
+- 用户明确提到 `pro`（或要求用更强/专业模型处理复杂任务）时，在 `handoff run` 后加 `--pro`
+- **不要**外部生成时间戳或拼文件名；**不要**用 `> RESULT 2> OUT` 重定向——handoff 自己管命名和落盘
+**启动命令后**（`run_in_background: true` 返回后），**从 stdout 捕获 handoff 打印的唯一有用的一行 `RESULT=<绝对路径>`**，并在面向用户的 assistant 消息里回显这一条路径（完成后默认只读它）：
+- `RESULT=<绝对路径>`（最终结论文件，例如 `/Users/sam/.handoff/tasks/hd-0611-03.result.md`）
+**这条路径里同时编码了本次任务的 run_id**：去掉目录和 `.result.md` 后缀，文件名主干就是 run_id（上例 → `hd-0611-03`）。**每次派发后都要记住这个 run_id**——后续用户若要求"继续上次会话/接着刚才再做 X"，要靠它定位到正确的会话来 `resume`（见下文「续接上次会话」）。
+其余无需你读取：
+- handoff 把克制的进度信息打在 **stderr**，Claude Code 的 shell view 会自动实时显示——你不用、也不要把它读进上下文。
+- 进度日志同时落在与 `RESULT=` **同名的 `.out.txt`**（把 `.result.md` 换成 `.out.txt`），仅在诊断（无结果/超时）时才 `tail -f` 或 `Read`。
+- 输入文件 `.prompt.txt`（同名）已是你刚发的内容，无需回显。
+等待完成通知后，用 `Read` 读取对应的 `.result.md` 并汇报；**不要**再读后台输出（结果已在文件里，重复读只会把进度噪音吃进上下文）。若 `.result.md` 为空或异常，再读 `.out.txt` 诊断。
+</interaction_contract>
+## 运行任务
+所有任务统一使用**后台模式**（`run_in_background: true`），不阻塞主会话。
+### 单任务
+按命令模板执行，启动后从 stdout 捕获 `RESULT=` 一行并回显，等通知后读该 `.result.md` 文件汇报。
+### 并行多任务
+在**同一条消息**里发出多个独立的 `run_in_background: true` Bash 调用，各自用 heredoc 传入不同的 prompt 内容。每个任务启动后分别从各自 stdout 捕获 `RESULT=` 路径（handoff 自动递增 seq）。每个任务完成时分别通知，分别读取对应的 `.result.md` 汇报。
+### 串行多任务
+等上一个任务的完成通知到达，读取并汇报结果后，再启动下一个任务。
+## 续接上次会话（resume 续派）
+要接着某次任务继续（保留其上下文）而非开新会话时，用 `resume` 替代 `run`，其余约定（后台、捕获新 `RESULT=`、读 `.result.md`）完全相同：
+```bash
+handoff resume <run_id> --backend opus - <<'__HF_EOF__'
+[后续任务内容]
+__HF_EOF__
+```
+- `<run_id>` 用该会话**首次**任务的 run_id（即上文那个文件名主干）；它是稳定句柄，每轮续接都用它，不要追每轮新生成的 run_id。
+- **必须带 prompt**（`-`/heredoc 或 `--text`）。不带 prompt 的 `resume <run_id>` 是交互式重开，后台会卡死。
+- 续接默认只继承 backend；原会话用过 `--pro` 的话，续接要再次带上才沿用 pro_model。
+- 不确定用户指哪次任务时，报候选 run_id + 摘要让其确认，别猜。
+## 完成后
+收到后台完成通知后：
+1. 用 `Read` 读取对应的 `RESULT=` 路径（`.result.md` 结果文件）
+2. 汇总结果返回给用户
+3. 若 `.result.md` 为空或异常，再读 `.out.txt`（进度日志）诊断
+4. 如有后续任务（串行场景），此时启动下一个

cli/stream.py ADDED Viewed

@@ -0,0 +1,286 @@
+"""Stream processing for handoff.
+`execute_run` drives the backend subprocess and owns the common pipeline
+(JSONL capture, status transitions, RESULT= protocol). What varies per backend
+type is how its output stream is interpreted; that lives in the parsers:
+  ClaudeStreamParser — claude `--output-format stream-json` JSONL
+  CodexStreamParser  — `codex exec --json` experimental event JSONL
+                       (schema notes: docs/design-notes-codex.md)
+Parser contract:
+  feed(line) / finish() return display events:
+    ("progress", text) — progress line for stderr + .out.txt
+    ("session", id)    — backend reported the real session id (codex
+                         thread.started); execute_run persists it so the run
+                         stays resumable
+  result_text / result_is_error — final outcome, read after the stream ends
+"""
+from __future__ import annotations
+import sys
+import json
+import subprocess
+import signal
+import datetime
+from .jsonl_parser import extract_result, format_event_for_stream, parse_jsonl_line
+def _now_ts() -> str:
+    return datetime.datetime.now().strftime("%H:%M:%S")
+class ClaudeStreamParser:
+    """Parses claude stream-json output. Faithful port of the original
+    execute_run loop: same event handling, same pending/dedupe semantics."""
+    def __init__(self):
+        self.result_text = None
+        self.result_is_error = False
+        self.session_id = None
+        self._last_ts = ""
+        self._last_plan = ""
+        self._pending = None  # (ts, plan_text)
+    def feed(self, line: str) -> list[tuple[str, str]]:
+        out: list[tuple[str, str]] = []
+        if not line.startswith("{"):
+            return out
+        events = parse_jsonl_line(line, self._last_ts)
+        for event in events:
+            if event.ts:
+                self._last_ts = event.ts
+            if event.kind == "result":
+                self._pending = None
+                continue
+            if event.kind == "result_text" and event.text:
+                self._pending = None
+                self.result_text = event.text
+                continue
+            plan_text = format_event_for_stream(event)
+            if not plan_text:
+                self._flush(out)
+                continue
+            self._flush(out)
+            ts = event.ts or _now_ts()
+            self._pending = (ts, plan_text)
+        return out
+    def finish(self) -> list[tuple[str, str]]:
+        out: list[tuple[str, str]] = []
+        self._flush(out)
+        return out
+    def _flush(self, out: list):
+        if not self._pending:
+            return
+        ts, plan_text = self._pending
+        self._pending = None
+        if not plan_text or plan_text == self._last_plan:
+            return
+        out.append(("progress", f"{ts} {plan_text}"))
+        self._last_plan = plan_text
+def _first_line(text: str, limit: int = 200) -> str:
+    line = text.strip().splitlines()[0] if text.strip() else ""
+    return line[:limit]
+class CodexStreamParser:
+    """Parses `codex exec --json` events (see docs/design-notes-codex.md).
+    session ← thread.started.thread_id; progress ← item events; result ← the
+    last agent_message at turn.completed, or the error message on turn.failed.
+    Unknown event/item types are skipped so minor schema drift is survivable.
+    """
+    def __init__(self):
+        self.result_text = None
+        self.result_is_error = False
+        self.session_id = None
+        self._last_agent_message = None
+    def feed(self, line: str) -> list[tuple[str, str]]:
+        out: list[tuple[str, str]] = []
+        line = line.strip()
+        if not line.startswith("{"):
+            return out
+        try:
+            obj = json.loads(line)
+        except ValueError:
+            return out
+        if not isinstance(obj, dict):
+            return out
+        etype = obj.get("type")
+        ts = _now_ts()
+        if etype == "thread.started":
+            tid = obj.get("thread_id")
+            if tid:
+                self.session_id = tid
+                out.append(("session", tid))
+                out.append(("progress", f"{ts} session {tid}"))
+        elif etype in ("item.started", "item.completed"):
+            item = obj.get("item") or {}
+            itype = item.get("type")
+            if itype == "command_execution" and etype == "item.started":
+                command = item.get("command", "")
+                if command:
+                    out.append(("progress", f"{ts} $ {_first_line(command)}"))
+            elif itype == "reasoning" and etype == "item.completed":
+                text = item.get("text", "")
+                if text:
+                    out.append(("progress", f"{ts} {_first_line(text)}"))
+            elif itype == "agent_message" and etype == "item.completed":
+                text = item.get("text", "")
+                if text:
+                    self._last_agent_message = text
+                    out.append(("progress", f"{ts} {_first_line(text)}"))
+        elif etype == "turn.completed":
+            if self._last_agent_message is not None:
+                self.result_text = self._last_agent_message
+                self.result_is_error = False
+        elif etype == "turn.failed":
+            message = (obj.get("error") or {}).get("message") or "turn failed"
+            self.result_text = message
+            self.result_is_error = True
+            out.append(("progress", f"{ts} error: {_first_line(message)}"))
+        elif etype == "error":
+            # transient (e.g. reconnect retries) — surface but keep streaming
+            message = obj.get("message", "")
+            if message:
+                out.append(("progress", f"{ts} error: {_first_line(message)}"))
+        return out
+    def finish(self) -> list[tuple[str, str]]:
+        return []
+def make_parser(backend_type: str):
+    return CodexStreamParser() if backend_type == "codex" else ClaudeStreamParser()
+def execute_run(
+    cwd: str,
+    prompt_text: str,
+    cmd: list[str],
+    conn,
+    uid: str,
+    jsonl_path: str,
+    task_paths_tuple,
+    backend_type: str = "claude",
+):
+    """Execute a backend run: pipe output to JSONL, display progress, extract result.
+    This is the core execution loop for 'run'.
+    The `cmd` list should already be the full backend invocation, including any
+    PTY wrapper (wrapping happens in the command function).
+    """
+    _, out_path, result_path = task_paths_tuple
+    def update_status(status: str):
+        conn.execute("UPDATE runs SET status = ? WHERE uuid = ?", (status, uid))
+        conn.commit()
+    def emit_result_marker():
+        disp = f"RESULT={result_path}"
+        print(disp, file=sys.stderr, flush=True)
+        with open(out_path, "a") as of:
+            of.write(disp + "\n")
+    def finish_success(result_text: str):
+        update_status("success")
+        with open(result_path, "w") as rf:
+            rf.write(result_text)
+        emit_result_marker()
+        conn.close()
+        print(result_text)
+        sys.exit(0)
+    parser = make_parser(backend_type)
+    proc = subprocess.Popen(
+        cmd,
+        cwd=cwd,
+        stdin=subprocess.DEVNULL,  # prompt travels in argv; never let the PTY
+        # wrapper read our stdin (a non-tty stdin makes `script` flaky)
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+    )
+    try:
+        with open(jsonl_path, "w") as jf, open(out_path, "w") as of:
+            def handle_events(events):
+                for kind, payload in events:
+                    if kind == "session":
+                        # the backend assigned the real session id (codex);
+                        # persist it so this run stays resumable
+                        conn.execute(
+                            "UPDATE runs SET session_id = ? WHERE uuid = ?",
+                            (payload, uid),
+                        )
+                        conn.commit()
+                    elif kind == "progress":
+                        print(payload, file=sys.stderr, flush=True)
+                        of.write(payload + "\n")
+                        of.flush()
+            for line_bytes in proc.stdout:
+                try:
+                    line = line_bytes.decode("utf-8", errors="replace").rstrip("\r\n")
+                except UnicodeDecodeError:
+                    line = line_bytes.decode("latin-1", errors="replace").rstrip("\r\n")
+                jf.write(line + "\n")
+                jf.flush()
+                handle_events(parser.feed(line))
+            handle_events(parser.finish())
+    except KeyboardInterrupt:
+        proc.send_signal(signal.SIGINT)
+        try:
+            proc.wait(timeout=5)
+        except subprocess.TimeoutExpired:
+            proc.kill()
+            proc.wait()
+        update_status("interrupted")
+        with open(result_path, "w") as rf:
+            rf.write("INTERRUPTED\n")
+        emit_result_marker()
+        print("\nhandoff run: interrupted", file=sys.stderr)
+        conn.close()
+        sys.exit(130)
+    proc.wait()
+    if parser.result_text is not None and not parser.result_is_error:
+        finish_success(parser.result_text)
+    if backend_type == "claude":
+        result = extract_result(jsonl_path)
+        if result:
+            finish_success(result)
+    update_status("error")
+    diag = f"handoff run: no successful result found; exit status {proc.returncode}\nJSONL={jsonl_path}\n"
+    if parser.result_is_error and parser.result_text:
+        diag = f"handoff run: backend reported an error: {parser.result_text}\n" + diag
+    print(diag.rstrip(), file=sys.stderr)
+    print(f"JSONL={jsonl_path}", file=sys.stderr)
+    with open(result_path, "w") as rf:
+        rf.write(diag)
+    emit_result_marker()
+    conn.close()
+    sys.exit(1)