npm - oh-my-codex - Versions diffs - 0.14.2 → 0.14.3 - Mend

oh-my-codex 0.14.2 → 0.14.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

package/Cargo.lock +5 -5
package/Cargo.toml +1 -1
package/dist/cli/__tests__/cleanup.test.js +27 -0
package/dist/cli/__tests__/cleanup.test.js.map +1 -1
package/dist/cli/__tests__/explore.test.js +58 -1
package/dist/cli/__tests__/explore.test.js.map +1 -1
package/dist/cli/__tests__/index.test.js +63 -0
package/dist/cli/__tests__/index.test.js.map +1 -1
package/dist/cli/__tests__/question.test.js +146 -1
package/dist/cli/__tests__/question.test.js.map +1 -1
package/dist/cli/cleanup.d.ts.map +1 -1
package/dist/cli/cleanup.js +18 -2
package/dist/cli/cleanup.js.map +1 -1
package/dist/cli/codex-home.d.ts +8 -0
package/dist/cli/codex-home.d.ts.map +1 -0
package/dist/cli/codex-home.js +54 -0
package/dist/cli/codex-home.js.map +1 -0
package/dist/cli/explore.d.ts +1 -0
package/dist/cli/explore.d.ts.map +1 -1
package/dist/cli/explore.js +15 -6
package/dist/cli/explore.js.map +1 -1
package/dist/cli/index.d.ts +1 -6
package/dist/cli/index.d.ts.map +1 -1
package/dist/cli/index.js +2 -49
package/dist/cli/index.js.map +1 -1
package/dist/config/__tests__/generator-idempotent.test.js +44 -0
package/dist/config/__tests__/generator-idempotent.test.js.map +1 -1
package/dist/config/generator.d.ts.map +1 -1
package/dist/config/generator.js +59 -21
package/dist/config/generator.js.map +1 -1
package/dist/hooks/__tests__/clawhip-event-contract.test.js +7 -0
package/dist/hooks/__tests__/clawhip-event-contract.test.js.map +1 -1
package/dist/hooks/__tests__/deep-interview-contract.test.js +17 -0
package/dist/hooks/__tests__/deep-interview-contract.test.js.map +1 -1
package/dist/hooks/__tests__/keyword-detector.test.js +87 -0
package/dist/hooks/__tests__/keyword-detector.test.js.map +1 -1
package/dist/hooks/__tests__/skill-guidance-contract.test.js +8 -1
package/dist/hooks/__tests__/skill-guidance-contract.test.js.map +1 -1
package/dist/hooks/extensibility/__tests__/events.test.js +6 -0
package/dist/hooks/extensibility/__tests__/events.test.js.map +1 -1
package/dist/hooks/extensibility/types.d.ts +1 -1
package/dist/hooks/extensibility/types.d.ts.map +1 -1
package/dist/hooks/keyword-detector.d.ts +3 -0
package/dist/hooks/keyword-detector.d.ts.map +1 -1
package/dist/hooks/keyword-detector.js +11 -4
package/dist/hooks/keyword-detector.js.map +1 -1
package/dist/hooks/prompt-guidance-contract.d.ts.map +1 -1
package/dist/hooks/prompt-guidance-contract.js +31 -15
package/dist/hooks/prompt-guidance-contract.js.map +1 -1
package/dist/hud/__tests__/reconcile.test.js +22 -0
package/dist/hud/__tests__/reconcile.test.js.map +1 -1
package/dist/hud/reconcile.d.ts +2 -1
package/dist/hud/reconcile.d.ts.map +1 -1
package/dist/hud/reconcile.js +3 -2
package/dist/hud/reconcile.js.map +1 -1
package/dist/hud/tmux.d.ts +3 -2
package/dist/hud/tmux.d.ts.map +1 -1
package/dist/hud/tmux.js +11 -4
package/dist/hud/tmux.js.map +1 -1
package/dist/question/__tests__/deep-interview.test.js +58 -1
package/dist/question/__tests__/deep-interview.test.js.map +1 -1
package/dist/question/__tests__/renderer.test.js +282 -24
package/dist/question/__tests__/renderer.test.js.map +1 -1
package/dist/question/__tests__/state.test.js +27 -0
package/dist/question/__tests__/state.test.js.map +1 -1
package/dist/question/__tests__/ui.test.js +129 -0
package/dist/question/__tests__/ui.test.js.map +1 -1
package/dist/question/deep-interview.d.ts +1 -0
package/dist/question/deep-interview.d.ts.map +1 -1
package/dist/question/deep-interview.js +80 -2
package/dist/question/deep-interview.js.map +1 -1
package/dist/question/renderer.d.ts +4 -1
package/dist/question/renderer.d.ts.map +1 -1
package/dist/question/renderer.js +101 -4
package/dist/question/renderer.js.map +1 -1
package/dist/question/state.js +1 -1
package/dist/question/state.js.map +1 -1
package/dist/question/ui.d.ts +3 -1
package/dist/question/ui.d.ts.map +1 -1
package/dist/question/ui.js +14 -6
package/dist/question/ui.js.map +1 -1
package/dist/scripts/__tests__/codex-native-hook.test.js +265 -3
package/dist/scripts/__tests__/codex-native-hook.test.js.map +1 -1
package/dist/scripts/codex-native-hook.d.ts.map +1 -1
package/dist/scripts/codex-native-hook.js +47 -9
package/dist/scripts/codex-native-hook.js.map +1 -1
package/dist/scripts/codex-native-pre-post.d.ts.map +1 -1
package/dist/scripts/codex-native-pre-post.js +47 -0
package/dist/scripts/codex-native-pre-post.js.map +1 -1
package/dist/scripts/notify-hook/__tests__/operational-events.test.d.ts +2 -0
package/dist/scripts/notify-hook/__tests__/operational-events.test.d.ts.map +1 -0
package/dist/scripts/notify-hook/__tests__/operational-events.test.js +24 -0
package/dist/scripts/notify-hook/__tests__/operational-events.test.js.map +1 -0
package/dist/scripts/notify-hook/team-dispatch.d.ts.map +1 -1
package/dist/scripts/notify-hook/team-dispatch.js +9 -0
package/dist/scripts/notify-hook/team-dispatch.js.map +1 -1
package/dist/team/__tests__/events.test.js +25 -0
package/dist/team/__tests__/events.test.js.map +1 -1
package/dist/team/__tests__/runtime.test.js +8 -1
package/dist/team/__tests__/runtime.test.js.map +1 -1
package/dist/team/__tests__/tmux-session.test.js +293 -2
package/dist/team/__tests__/tmux-session.test.js.map +1 -1
package/dist/team/runtime.d.ts.map +1 -1
package/dist/team/runtime.js +30 -0
package/dist/team/runtime.js.map +1 -1
package/dist/team/tmux-session.d.ts.map +1 -1
package/dist/team/tmux-session.js +60 -3
package/dist/team/tmux-session.js.map +1 -1
package/package.json +2 -2
package/skills/deep-interview/SKILL.md +13 -2
package/skills/ultrawork/SKILL.md +91 -59
package/src/scripts/__tests__/codex-native-hook.test.ts +318 -3
package/src/scripts/codex-native-hook.ts +49 -6
package/src/scripts/codex-native-pre-post.ts +45 -0
package/src/scripts/notify-hook/__tests__/operational-events.test.ts +24 -0
package/src/scripts/notify-hook/team-dispatch.ts +9 -0

package/skills/ultrawork/SKILL.md CHANGED Viewed

@@ -4,58 +4,80 @@ description: Parallel execution engine for high-throughput task completion
 ---
 <Purpose>
-Ultrawork is a parallel execution engine that runs multiple agents simultaneously for independent tasks. It is a component, not a standalone persistence mode -- it provides parallelism and smart model routing but not persistence, verification loops, or state management.
+Ultrawork is a parallel execution engine for high-throughput task completion. It is a component, not a standalone persistence mode: it provides parallelism, context discipline, and smart delegation guidance, but not Ralph's persistence loop, architect sign-off, or long-running completion guarantees.
 </Purpose>
 <Use_When>
 - Multiple independent tasks can run simultaneously
-- User says "ulw", "ultrawork", or wants parallel execution
-- You need to delegate work to multiple agents at once
-- Task benefits from concurrent execution but the user will manage completion themselves
+- User says "ulw", "ultrawork", or explicitly wants parallel execution
+- Task benefits from concurrent execution plus lightweight evidence before wrap-up
+- You need a direct-tool lane plus optional background evidence lanes without entering Ralph
 </Use_When>
 <Do_Not_Use_When>
-- Task requires guaranteed completion with verification -- use `ralph` instead (ralph includes ultrawork)
-- Task requires a full autonomous pipeline -- use `autopilot` instead (autopilot includes ralph which includes ultrawork)
-- There is only one sequential task with no parallelism opportunity -- delegate directly to an executor agent
-- User needs session persistence for resume -- use `ralph` which adds persistence on top of ultrawork
+- Task requires guaranteed completion with persistence, architect verification, or deslop/reverification -- use `ralph` instead (Ralph includes ultrawork)
+- Task requires a full autonomous pipeline -- use `autopilot` instead (autopilot includes Ralph which includes ultrawork)
+- There is only one sequential task with no parallelism opportunity -- execute directly or delegate to a single `executor`
+- The request is still in plan-consensus mode -- keep planning artifacts in `ralplan` until execution is explicitly authorized
+- User needs session persistence for resume -- use `ralph`, which adds persistence on top of ultrawork
 </Do_Not_Use_When>
 <Why_This_Exists>
-Sequential task execution wastes time when tasks are independent. Ultrawork enables firing multiple agents simultaneously and routing each to the right model tier, reducing total execution time while controlling token costs. It is designed as a composable component that ralph and autopilot layer on top of.
+Sequential task execution wastes time when tasks are independent. Ultrawork keeps the execution branch fast while tightening the protocol: gather enough context first, define pass/fail acceptance criteria before editing, decide deliberately between local execution and delegation, and finish with evidence rather than vibes.
 </Why_This_Exists>
 <Execution_Policy>
-- Fire all independent agent calls simultaneously -- never serialize independent work
-- Always pass the `model` parameter explicitly when delegating
-- Read `docs/shared/agent-tiers.md` before first delegation for agent selection guidance
-- Auto-delegate `researcher` when official docs, version-aware framework guidance, best practices, or external dependency behavior materially affect task correctness; treat it as an evidence lane, not a replacement primary workflow
-- Use `run_in_background: true` for operations over ~30 seconds (installs, builds, tests)
-- Run quick commands (git status, file reads, simple checks) in the foreground
+- Gather enough context before implementation. Start with the task intent, desired outcome, constraints, likely touchpoints, and any uncertainty that would change the execution path.
+- If uncertainty is still material after a quick repo read, do a focused evidence pass first instead of immediately editing.
+- Define pass/fail acceptance criteria before launching execution lanes. Include the command, artifact, or manual check that will prove success.
+- Prefer direct tool work when the task is small, coupled, or blocked on immediate local context. Delegate only when the work is independent enough to benefit from parallel execution.
+- When useful, run a direct-tool lane and one or more background evidence lanes at the same time. Evidence lanes can cover docs, tests, regression mapping, or bounded repo analysis.
+- Fire independent agent calls simultaneously -- never serialize independent work.
+- Always pass the `model` parameter explicitly when delegating.
+- Read `docs/shared/agent-tiers.md` before first delegation for agent selection guidance.
+- Auto-delegate `researcher` when official docs, version-aware framework guidance, best practices, or external dependency behavior materially affect task correctness; treat it as an evidence lane, not a replacement primary workflow.
+- Use `run_in_background: true` for operations over ~30 seconds (installs, builds, tests).
+- Run quick commands (git status, file reads, simple checks) in the foreground.
+- Default to concise, evidence-dense progress and completion reporting. If a lane is speculative or blocked, say so explicitly.
+- Treat newer user task updates as local overrides for the active workflow branch while preserving earlier non-conflicting constraints.
+- If the user says `continue` after ultrawork already has a clear next step, continue the current execution branch instead of restarting planning or asking for reconfirmation.
 </Execution_Policy>
 <Steps>
-1. **Read agent reference**: Load `docs/shared/agent-tiers.md` for tier selection
-2. **Classify tasks by independence**: Identify which tasks can run in parallel vs which have dependencies
-3. **Route to correct tiers**:
-   - Simple lookups/definitions: LOW tier
-   - Standard implementation: STANDARD tier
-   - Complex analysis/refactoring: THOROUGH tier
-4. **Fire independent tasks simultaneously**: Launch all parallel-safe tasks at once
-5. **Run dependent tasks sequentially**: Wait for prerequisites before launching dependent work
-6. **Background long operations**: Builds, installs, and test suites use `run_in_background: true`
-7. **Verify when all tasks complete** (lightweight):
-   - Build/typecheck passes
-   - Affected tests pass
-   - No new errors introduced
+1. **Read agent reference**: Load `docs/shared/agent-tiers.md` for tier selection.
+2. **Context + certainty check**:
+   - State the task intent in one sentence.
+   - List the constraints and unknowns that could invalidate a quick fix.
+   - If confidence is low, explore first and narrow the task before editing.
+3. **Define acceptance criteria before execution**:
+   - What must be true at the end?
+   - Which command or artifact proves it?
+   - Which manual QA check is required, if any?
+4. **Classify the work by dependency shape**:
+   - Independent tasks -> parallel lanes.
+   - Shared-file or prerequisite-heavy tasks -> local execution or staged lanes.
+5. **Choose self vs delegate deliberately**:
+   - Work locally when the next step depends on immediate repo context, shared files, or tight iteration.
+   - Delegate when the task slice is bounded, independent, and materially improves throughput.
+6. **Run execution lanes**:
+   - Direct-tool lane for immediate implementation or verification work.
+   - Background evidence lanes for tests, docs, repo analysis, or regression checks.
+7. **Run dependent tasks sequentially**: Wait for prerequisites before launching dependent work.
+8. **Close with lightweight evidence**:
+   - Build/typecheck passes when relevant.
+   - Affected tests pass.
+   - Manual QA notes are recorded when the task needs a human-visible or behavior-level check.
+   - No new errors introduced.
 </Steps>
 <Tool_Usage>
-- Use LOW-tier delegation for simple changes
-- Use STANDARD-tier delegation for standard work
-- Use THOROUGH-tier delegation for complex work
-- Use `run_in_background: true` for package installs, builds, and test suites
-- Use foreground execution for quick status checks and file operations
+- Use LOW-tier delegation for simple lookups and bounded evidence gathering.
+- Use STANDARD-tier delegation for standard implementation and regression work.
+- Use THOROUGH-tier delegation for complex analysis, architectural review, or risky multi-file changes.
+- Prefer a direct-tool lane when the immediate next step is blocked on local context.
+- Prefer background evidence lanes when you can learn something useful in parallel with implementation.
+- Use `run_in_background: true` for package installs, builds, and test suites.
+- Use foreground execution for quick status checks and file operations.
 </Tool_Usage>
 ## State Management
@@ -73,64 +95,74 @@ Use `omx_state` MCP tools for ultrawork lifecycle state.
 <Examples>
 <Good>
-Three independent tasks fired simultaneously:
+Two-track execution with acceptance criteria up front:
 ```
-delegate(role="executor", tier="LOW", task="Add missing type export for Config interface")
-delegate(role="executor", tier="STANDARD", task="Implement the /api/users endpoint with validation")
-delegate(role="test-engineer", tier="STANDARD", task="Add integration tests for the auth middleware")
+Acceptance criteria:
+- `npm run build` passes
+- `node --test dist/scripts/__tests__/codex-native-hook.test.js` passes
+- Manual QA: verify `$ultrawork` activation message still points to the session state file
+Direct-tool lane:
+- update `skills/ultrawork/SKILL.md`
+Background evidence lane:
+- delegate(role="test-engineer", tier="STANDARD", task="Map which hook tests cover ultrawork activation messaging", model="...")
 ```
-Why good: Independent tasks at appropriate tiers, all fired at once.
+Why good: Context is grounded first, acceptance criteria are explicit, and the direct-tool lane runs alongside a bounded evidence lane.
 </Good>
 <Good>
-Correct use of background execution:
+Correct use of self-vs-delegate judgment:
 ```
-delegate(role="executor", tier="STANDARD", task="npm install && npm run build", run_in_background=true)
-delegate(role="writer", tier="LOW", task="Update the README with new API endpoints")
+Shared-file edit in progress across `src/scripts/codex-native-hook.ts` and its test -> keep implementation local.
+Independent regression mapping for keyword-detector coverage -> delegate to a test-engineer lane.
 ```
-Why good: Long build runs in background while short task runs in foreground.
+Why good: Shared-file work stays local; independent evidence work fans out.
 </Good>
 <Bad>
-Sequential execution of independent work:
+Parallelizing before the task is grounded:
 ```
-result1 = delegate(executor, LOW, "Add type export")  # wait...
-result2 = delegate(executor, STANDARD, "Implement endpoint")     # wait...
-result3 = delegate(test-engineer, STANDARD, "Add tests")              # wait...
+delegate(role="executor", tier="STANDARD", task="Implement whatever seems necessary", model="...")
+delegate(role="test-engineer", tier="STANDARD", task="Figure out how to test it later", model="...")
 ```
-Why bad: These tasks are independent. Running them sequentially wastes time.
+Why bad: No context snapshot, no pass/fail target, and delegation starts before the work is shaped.
 </Bad>
 <Bad>
-Wrong tier selection:
+Claiming success without evidence or manual QA:
 ```
-delegate(role="executor", tier="THOROUGH", task="Add a missing semicolon")
+Made the changes. Ultrawork should be updated now.
 ```
-Why bad: THOROUGH tier is expensive overkill for a trivial fix. Use LOW-tier execution instead.
+Why bad: No verification output, no acceptance evidence, and no manual QA note when the behavior is user-visible.
 </Bad>
 </Examples>
 <Escalation_And_Stop_Conditions>
-- When ultrawork is invoked directly (not via ralph), apply lightweight verification only -- build passes, tests pass, no new errors
-- For full persistence and comprehensive architect verification, recommend switching to `ralph` mode
-- If a task fails repeatedly across retries, report the issue rather than retrying indefinitely
-- Escalate to the user when tasks have unclear dependencies or conflicting requirements
+- When ultrawork is invoked directly (not via Ralph), apply lightweight verification only -- build/typecheck passes when relevant, affected tests pass, and manual QA notes are captured when needed.
+- Ralph owns persistence, architect verification, deslop, and the full verified-completion promise. Do not claim those guarantees from direct ultrawork alone.
+- If a task fails repeatedly across retries, report the issue rather than retrying indefinitely.
+- Escalate to the user when tasks have unclear dependencies, conflicting requirements, or a materially branching acceptance target.
 </Escalation_And_Stop_Conditions>
 <Final_Checklist>
-- [ ] All parallel tasks completed
-- [ ] Build/typecheck passes
+- [ ] Task intent and constraints were grounded before editing
+- [ ] Pass/fail acceptance criteria were stated before execution
+- [ ] Parallel lanes were used only for independent work
+- [ ] Build/typecheck passes when relevant
 - [ ] Affected tests pass
+- [ ] Manual QA notes recorded when behavior is user-visible
 - [ ] No new errors introduced
+- [ ] Completion claim stays inside ultrawork's lightweight-verification boundary
 </Final_Checklist>
 <Advanced>
 ## Relationship to Other Modes
 ```
-ralph (persistence wrapper)
+ralph (persistence + verified completion wrapper)
  \-- includes: ultrawork (this skill)
-     \-- provides: parallel execution only
+     \-- provides: high-throughput execution + lightweight evidence
 autopilot (autonomous execution)
  \-- includes: ralph
@@ -140,5 +172,5 @@ ecomode (token efficiency)
  \-- modifies: ultrawork's model selection
 ```
-Ultrawork is the parallelism layer. Ralph adds persistence and verification. Autopilot adds the full lifecycle pipeline. Ecomode adjusts ultrawork's model routing to favor cheaper models.
+Ultrawork is the parallelism and execution-discipline layer. Ralph adds persistence, architect verification, deslop, and retry-until-done behavior. Autopilot adds the broader autonomous lifecycle pipeline. Ecomode adjusts ultrawork's model routing to favor cheaper models.
 </Advanced>

package/src/scripts/__tests__/codex-native-hook.test.ts CHANGED Viewed

@@ -101,6 +101,7 @@ const TEAM_ENV_KEYS = [
   "OMX_TEAM_STATE_ROOT",
   "OMX_TEAM_LEADER_CWD",
   "OMX_SESSION_ID",
+  "TMUX_PANE",
 ] as const;
 const priorTeamEnv = new Map<(typeof TEAM_ENV_KEYS)[number], string | undefined>();
@@ -635,6 +636,37 @@ describe("codex native hook dispatch", () => {
     }
   });
+  it("adds ultrawork-specific activation guidance only for true ultrawork workflow activation", async () => {
+    const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-ultrawork-routing-"));
+    try {
+      await mkdir(join(cwd, ".omx", "state"), { recursive: true });
+      const result = await dispatchCodexNativeHook(
+        {
+          hook_event_name: "UserPromptSubmit",
+          cwd,
+          session_id: "sess-ultrawork-msg",
+          thread_id: "thread-ultrawork-msg",
+          turn_id: "turn-ultrawork-msg",
+          prompt: "$ultrawork fan out the regression checks",
+        },
+        { cwd },
+      );
+      assert.equal(result.omxEventName, "keyword-detector");
+      assert.equal(result.skillState?.skill, "ultrawork");
+      const message = String(
+        (result.outputJson as { hookSpecificOutput?: { additionalContext?: string } })?.hookSpecificOutput?.additionalContext || "",
+      );
+      assert.match(message, /\$ultrawork" -> ultrawork/);
+      assert.match(message, /ground the task before editing/i);
+      assert.match(message, /define pass\/fail acceptance criteria/i);
+      assert.match(message, /direct-tool plus background evidence lanes/i);
+      assert.match(message, /Ralph owns persistence and the full verified-completion promise/i);
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  });
   it("does not activate Ralph workflow state from a plain conversational mention", async () => {
     const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-ralph-plain-text-"));
     try {
@@ -840,13 +872,56 @@ describe("codex native hook dispatch", () => {
       assert.match(message, /do not fall back to `request_user_input` or plain-text questioning/i);
       assert.match(message, /After starting `omx question` in a background terminal, wait for that terminal to finish and read the JSON answer before continuing the interview\./);
       assert.match(message, /If bare `omx question` is unavailable in this reused session, use the current-session CLI bridge command:/);
-      assert.match(message, /`'.+' '.+dist\/cli\/omx\.js' question`/);
+      assert.match(message, /'.+' '.+dist\/cli\/omx\.js' question/);
+      assert.doesNotMatch(message, /OMX_QUESTION_RETURN_PANE=/);
+      assert.doesNotMatch(message, /preserve the leader pane/i);
       assert.match(message, /Stop remains blocked while a deep-interview question obligation is pending\./);
     } finally {
       await rm(cwd, { recursive: true, force: true });
     }
   });
+  it("includes leader-pane preservation guidance when a pane hint is available", async () => {
+    const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-deep-interview-pane-hint-"));
+    try {
+      const sessionId = "sess-deep-interview-pane-hint";
+      const sessionDir = join(cwd, ".omx", "state", "sessions", sessionId);
+      await mkdir(sessionDir, { recursive: true });
+      await writeJson(join(sessionDir, "deep-interview-state.json"), {
+        active: true,
+        mode: "deep-interview",
+        current_phase: "intent-first",
+        started_at: "2026-04-21T10:00:00.000Z",
+        updated_at: "2026-04-21T10:00:00.000Z",
+        tmux_pane_id: "%77",
+      });
+      const result = await dispatchCodexNativeHook(
+        {
+          hook_event_name: "UserPromptSubmit",
+          cwd,
+          session_id: sessionId,
+          thread_id: "thread-deep-interview-pane-hint",
+          turn_id: "turn-deep-interview-pane-hint",
+          prompt: "$deep-interview gather requirements",
+        },
+        { cwd },
+      );
+      assert.equal(result.omxEventName, "keyword-detector");
+      assert.equal(result.skillState?.skill, "deep-interview");
+      const message = String(
+        (result.outputJson as { hookSpecificOutput?: { additionalContext?: string } })?.hookSpecificOutput?.additionalContext || "",
+      );
+      assert.match(message, /OMX_QUESTION_RETURN_PANE='%77'/);
+      assert.match(message, /preserve the leader pane/i);
+      assert.match(message, /OMX_QUESTION_RETURN_PANE=%77/);
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  });
   it("keeps bare keep-going continuation on the active ralph skill without resetting through generic keep-going routing", async () => {
     const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-ralph-bare-continuation-"));
     try {
@@ -1219,8 +1294,8 @@ esac
       assert.equal(result.omxEventName, "keyword-detector");
       const tmuxCalls = await readFile(tmuxLog, "utf-8");
-      assert.match(tmuxCalls, /list-panes/);
-      assert.match(tmuxCalls, /split-window/);
+      assert.match(tmuxCalls, /list-panes -t %1 -F/);
+      assert.match(tmuxCalls, /split-window -v -l 3 -d -t %1 -c/);
       assert.match(tmuxCalls, /resize-pane -t %9 -y 3/);
       assert.match(tmuxCalls, /dist\/cli\/omx\.js' hud --watch --preset=focused/);
       assert.doesNotMatch(tmuxCalls, /\/tmp\/codex-host-binary' hud --watch/);
@@ -1241,6 +1316,91 @@ esac
     }
   });
+  it("blocks Bash omx question when no leader-pane return hint is preserved", async () => {
+    const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-pretool-question-enforce-"));
+    try {
+      const result = await dispatchCodexNativeHook(
+        {
+          hook_event_name: "PreToolUse",
+          cwd,
+          tool_name: "Bash",
+          tool_use_id: "tool-question-block",
+          tool_input: { command: `omx question --json --input '{"question":"Q?","options":["A"],"allow_other":true}'` },
+        },
+        { cwd },
+      );
+      assert.equal(result.omxEventName, "pre-tool-use");
+      assert.equal((result.outputJson as { decision?: string } | null)?.decision, "block");
+      assert.match(String((result.outputJson as { systemMessage?: string } | null)?.systemMessage || ""), /OMX_QUESTION_RETURN_PANE=\$TMUX_PANE/);
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  });
+  it("allows Bash omx question when the command preserves the leader-pane return hint", async () => {
+    const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-pretool-question-allow-"));
+    try {
+      const result = await dispatchCodexNativeHook(
+        {
+          hook_event_name: "PreToolUse",
+          cwd,
+          tool_name: "Bash",
+          tool_use_id: "tool-question-allow",
+          tool_input: { command: `OMX_QUESTION_RETURN_PANE=$TMUX_PANE omx question --json --input '{"question":"Q?","options":["A"],"allow_other":true}'` },
+        },
+        { cwd },
+      );
+      assert.equal(result.omxEventName, "pre-tool-use");
+      assert.equal(result.outputJson, null);
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  });
+  it("allows the quoted pane env assignment emitted by the deep-interview bridge command", async () => {
+    const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-pretool-question-quoted-allow-"));
+    try {
+      const result = await dispatchCodexNativeHook(
+        {
+          hook_event_name: "PreToolUse",
+          cwd,
+          tool_name: "Bash",
+          tool_use_id: "tool-question-quoted-allow",
+          tool_input: { command: `OMX_QUESTION_RETURN_PANE='%42' node ./dist/cli/omx.js question --json --input '{"question":"Q?","options":["A"],"allow_other":true}'` },
+        },
+        { cwd },
+      );
+      assert.equal(result.omxEventName, "pre-tool-use");
+      assert.equal(result.outputJson, null);
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  });
+  it("blocks Bash node omx.js question when the command does not preserve the leader-pane return hint", async () => {
+    const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-pretool-question-node-block-"));
+    try {
+      const result = await dispatchCodexNativeHook(
+        {
+          hook_event_name: "PreToolUse",
+          cwd,
+          tool_name: "Bash",
+          tool_use_id: "tool-question-node-block",
+          tool_input: { command: `node ./dist/cli/omx.js question --json --input '{"question":"Q?","options":["A"],"allow_other":true}'` },
+        },
+        { cwd },
+      );
+      assert.equal(result.omxEventName, "pre-tool-use");
+      assert.equal((result.outputJson as { decision?: string } | null)?.decision, "block");
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  });
   it("returns a destructive-command caution on PreToolUse for rm -rf dist", async () => {
     const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-pretool-danger-"));
     try {
@@ -2322,6 +2482,76 @@ esac
     }
   });
+  it("suppresses duplicate Autopilot planning Stop replays so stale planning state cannot loop indefinitely", async () => {
+    const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-stop-autopilot-planning-replay-"));
+    try {
+      const stateDir = join(cwd, ".omx", "state");
+      await mkdir(stateDir, { recursive: true });
+      await writeJson(join(stateDir, "autopilot-state.json"), {
+        active: true,
+        current_phase: "planning",
+      });
+      const payload = {
+        hook_event_name: "Stop",
+        cwd,
+        session_id: "sess-stop-autopilot-planning-replay",
+        thread_id: "thread-stop-autopilot-planning-replay",
+        turn_id: "turn-stop-autopilot-planning-replay",
+        last_assistant_message: "Autopilot planning is still active.",
+      };
+      const first = await dispatchCodexNativeHook(payload, { cwd });
+      const replay = await dispatchCodexNativeHook(
+        {
+          ...payload,
+          stop_hook_active: true,
+        },
+        { cwd },
+      );
+      assert.equal(first.omxEventName, "stop");
+      assert.deepEqual(first.outputJson, {
+        decision: "block",
+        reason:
+          "OMX autopilot is still active (phase: planning); continue the task and gather fresh verification evidence before stopping.",
+        stopReason: "autopilot_planning",
+        systemMessage: "OMX autopilot is still active (phase: planning).",
+      });
+      assert.equal(replay.omxEventName, "stop");
+      assert.equal(replay.outputJson, null);
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  });
+  it("does not block Stop from stale root Autopilot planning state when the explicit session has no scoped state", async () => {
+    const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-stop-stale-root-autopilot-planning-"));
+    try {
+      const stateDir = join(cwd, ".omx", "state");
+      await mkdir(join(stateDir, "sessions", "sess-current"), { recursive: true });
+      await writeJson(join(stateDir, "session.json"), { session_id: "sess-current", cwd });
+      await writeJson(join(stateDir, "autopilot-state.json"), {
+        active: true,
+        mode: "autopilot",
+        current_phase: "planning",
+      });
+      const result = await dispatchCodexNativeHook(
+        {
+          hook_event_name: "Stop",
+          cwd,
+          session_id: "sess-current",
+        },
+        { cwd },
+      );
+      assert.equal(result.omxEventName, "stop");
+      assert.equal(result.outputJson, null);
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  });
   it("does not block Stop when an explicit blocked_on_user run_outcome is present on a mode state", async () => {
     const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-stop-autopilot-blocked-outcome-"));
     try {
@@ -3350,6 +3580,91 @@ esac
     }
   });
+  it("does not re-block Stop after a same-session deep-interview question record is already answered", async () => {
+    const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-stop-deep-interview-question-answered-"));
+    try {
+      const sessionId = "sess-stop-deep-interview-question-answered";
+      const stateDir = join(cwd, ".omx", "state");
+      const sessionDir = join(stateDir, "sessions", sessionId);
+      await mkdir(join(sessionDir, "questions"), { recursive: true });
+      await writeJson(join(stateDir, "session.json"), { session_id: sessionId });
+      await writeJson(join(sessionDir, "skill-active-state.json"), {
+        version: 1,
+        active: true,
+        skill: "deep-interview",
+        phase: "planning",
+        session_id: sessionId,
+        thread_id: "thread-stop-deep-interview-question-answered",
+      });
+      await writeJson(join(sessionDir, "deep-interview-state.json"), {
+        active: false,
+        mode: "deep-interview",
+        current_phase: "intent-first",
+        lifecycle_outcome: "askuserQuestion",
+        run_outcome: "blocked_on_user",
+        completed_at: "2026-04-19T03:20:30.000Z",
+        session_id: sessionId,
+        thread_id: "thread-stop-deep-interview-question-answered",
+        question_enforcement: {
+          obligation_id: "obligation-answered",
+          source: "omx-question",
+          status: "pending",
+          lifecycle_outcome: "askuserQuestion",
+          requested_at: "2026-04-19T03:20:00.000Z",
+        },
+      });
+      await writeJson(join(sessionDir, "questions", "question-answered.json"), {
+        kind: "omx.question/v1",
+        question_id: "question-answered",
+        session_id: sessionId,
+        created_at: "2026-04-19T03:20:05.000Z",
+        updated_at: "2026-04-19T03:20:10.000Z",
+        status: "answered",
+        question: "What should happen next?",
+        options: [{ label: "Continue", value: "continue" }],
+        allow_other: false,
+        other_label: "Other",
+        multi_select: false,
+        type: "single-answerable",
+        source: "deep-interview",
+        answer: {
+          kind: "option",
+          value: "continue",
+          selected_labels: ["Continue"],
+          selected_values: ["continue"],
+        },
+      });
+      const result = await dispatchCodexNativeHook(
+        {
+          hook_event_name: "Stop",
+          cwd,
+          session_id: sessionId,
+          thread_id: "thread-stop-deep-interview-question-answered",
+        },
+        { cwd },
+      );
+      assert.equal(result.omxEventName, "stop");
+      assert.equal(result.outputJson, null);
+      const state = JSON.parse(
+        await readFile(join(sessionDir, "deep-interview-state.json"), "utf-8"),
+      ) as {
+        lifecycle_outcome?: string;
+        question_enforcement?: { status?: string; question_id?: string; satisfied_at?: string };
+        run_outcome?: string;
+      };
+      assert.equal(state.question_enforcement?.status, "satisfied");
+      assert.equal(state.question_enforcement?.question_id, "question-answered");
+      assert.ok(state.question_enforcement?.satisfied_at);
+      assert.equal(state.lifecycle_outcome, undefined);
+      assert.equal(state.run_outcome, undefined);
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  });
   it("keeps blocking pending deep-interview question Stop replays until the obligation changes", async () => {
     const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-stop-deep-interview-question-replay-"));
     try {