oh-my-codex 0.14.2 → 0.14.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/Cargo.lock +5 -5
  2. package/Cargo.toml +1 -1
  3. package/dist/cli/__tests__/cleanup.test.js +27 -0
  4. package/dist/cli/__tests__/cleanup.test.js.map +1 -1
  5. package/dist/cli/__tests__/explore.test.js +58 -1
  6. package/dist/cli/__tests__/explore.test.js.map +1 -1
  7. package/dist/cli/__tests__/index.test.js +63 -0
  8. package/dist/cli/__tests__/index.test.js.map +1 -1
  9. package/dist/cli/__tests__/question.test.js +146 -1
  10. package/dist/cli/__tests__/question.test.js.map +1 -1
  11. package/dist/cli/cleanup.d.ts.map +1 -1
  12. package/dist/cli/cleanup.js +18 -2
  13. package/dist/cli/cleanup.js.map +1 -1
  14. package/dist/cli/codex-home.d.ts +8 -0
  15. package/dist/cli/codex-home.d.ts.map +1 -0
  16. package/dist/cli/codex-home.js +54 -0
  17. package/dist/cli/codex-home.js.map +1 -0
  18. package/dist/cli/explore.d.ts +1 -0
  19. package/dist/cli/explore.d.ts.map +1 -1
  20. package/dist/cli/explore.js +15 -6
  21. package/dist/cli/explore.js.map +1 -1
  22. package/dist/cli/index.d.ts +1 -6
  23. package/dist/cli/index.d.ts.map +1 -1
  24. package/dist/cli/index.js +2 -49
  25. package/dist/cli/index.js.map +1 -1
  26. package/dist/config/__tests__/generator-idempotent.test.js +44 -0
  27. package/dist/config/__tests__/generator-idempotent.test.js.map +1 -1
  28. package/dist/config/generator.d.ts.map +1 -1
  29. package/dist/config/generator.js +59 -21
  30. package/dist/config/generator.js.map +1 -1
  31. package/dist/hooks/__tests__/clawhip-event-contract.test.js +7 -0
  32. package/dist/hooks/__tests__/clawhip-event-contract.test.js.map +1 -1
  33. package/dist/hooks/__tests__/deep-interview-contract.test.js +17 -0
  34. package/dist/hooks/__tests__/deep-interview-contract.test.js.map +1 -1
  35. package/dist/hooks/__tests__/keyword-detector.test.js +87 -0
  36. package/dist/hooks/__tests__/keyword-detector.test.js.map +1 -1
  37. package/dist/hooks/__tests__/skill-guidance-contract.test.js +8 -1
  38. package/dist/hooks/__tests__/skill-guidance-contract.test.js.map +1 -1
  39. package/dist/hooks/extensibility/__tests__/events.test.js +6 -0
  40. package/dist/hooks/extensibility/__tests__/events.test.js.map +1 -1
  41. package/dist/hooks/extensibility/types.d.ts +1 -1
  42. package/dist/hooks/extensibility/types.d.ts.map +1 -1
  43. package/dist/hooks/keyword-detector.d.ts +3 -0
  44. package/dist/hooks/keyword-detector.d.ts.map +1 -1
  45. package/dist/hooks/keyword-detector.js +11 -4
  46. package/dist/hooks/keyword-detector.js.map +1 -1
  47. package/dist/hooks/prompt-guidance-contract.d.ts.map +1 -1
  48. package/dist/hooks/prompt-guidance-contract.js +31 -15
  49. package/dist/hooks/prompt-guidance-contract.js.map +1 -1
  50. package/dist/hud/__tests__/reconcile.test.js +22 -0
  51. package/dist/hud/__tests__/reconcile.test.js.map +1 -1
  52. package/dist/hud/reconcile.d.ts +2 -1
  53. package/dist/hud/reconcile.d.ts.map +1 -1
  54. package/dist/hud/reconcile.js +3 -2
  55. package/dist/hud/reconcile.js.map +1 -1
  56. package/dist/hud/tmux.d.ts +3 -2
  57. package/dist/hud/tmux.d.ts.map +1 -1
  58. package/dist/hud/tmux.js +11 -4
  59. package/dist/hud/tmux.js.map +1 -1
  60. package/dist/question/__tests__/deep-interview.test.js +58 -1
  61. package/dist/question/__tests__/deep-interview.test.js.map +1 -1
  62. package/dist/question/__tests__/renderer.test.js +282 -24
  63. package/dist/question/__tests__/renderer.test.js.map +1 -1
  64. package/dist/question/__tests__/state.test.js +27 -0
  65. package/dist/question/__tests__/state.test.js.map +1 -1
  66. package/dist/question/__tests__/ui.test.js +129 -0
  67. package/dist/question/__tests__/ui.test.js.map +1 -1
  68. package/dist/question/deep-interview.d.ts +1 -0
  69. package/dist/question/deep-interview.d.ts.map +1 -1
  70. package/dist/question/deep-interview.js +80 -2
  71. package/dist/question/deep-interview.js.map +1 -1
  72. package/dist/question/renderer.d.ts +4 -1
  73. package/dist/question/renderer.d.ts.map +1 -1
  74. package/dist/question/renderer.js +101 -4
  75. package/dist/question/renderer.js.map +1 -1
  76. package/dist/question/state.js +1 -1
  77. package/dist/question/state.js.map +1 -1
  78. package/dist/question/ui.d.ts +3 -1
  79. package/dist/question/ui.d.ts.map +1 -1
  80. package/dist/question/ui.js +14 -6
  81. package/dist/question/ui.js.map +1 -1
  82. package/dist/scripts/__tests__/codex-native-hook.test.js +265 -3
  83. package/dist/scripts/__tests__/codex-native-hook.test.js.map +1 -1
  84. package/dist/scripts/codex-native-hook.d.ts.map +1 -1
  85. package/dist/scripts/codex-native-hook.js +47 -9
  86. package/dist/scripts/codex-native-hook.js.map +1 -1
  87. package/dist/scripts/codex-native-pre-post.d.ts.map +1 -1
  88. package/dist/scripts/codex-native-pre-post.js +47 -0
  89. package/dist/scripts/codex-native-pre-post.js.map +1 -1
  90. package/dist/scripts/notify-hook/__tests__/operational-events.test.d.ts +2 -0
  91. package/dist/scripts/notify-hook/__tests__/operational-events.test.d.ts.map +1 -0
  92. package/dist/scripts/notify-hook/__tests__/operational-events.test.js +24 -0
  93. package/dist/scripts/notify-hook/__tests__/operational-events.test.js.map +1 -0
  94. package/dist/scripts/notify-hook/team-dispatch.d.ts.map +1 -1
  95. package/dist/scripts/notify-hook/team-dispatch.js +9 -0
  96. package/dist/scripts/notify-hook/team-dispatch.js.map +1 -1
  97. package/dist/team/__tests__/events.test.js +25 -0
  98. package/dist/team/__tests__/events.test.js.map +1 -1
  99. package/dist/team/__tests__/runtime.test.js +8 -1
  100. package/dist/team/__tests__/runtime.test.js.map +1 -1
  101. package/dist/team/__tests__/tmux-session.test.js +293 -2
  102. package/dist/team/__tests__/tmux-session.test.js.map +1 -1
  103. package/dist/team/runtime.d.ts.map +1 -1
  104. package/dist/team/runtime.js +30 -0
  105. package/dist/team/runtime.js.map +1 -1
  106. package/dist/team/tmux-session.d.ts.map +1 -1
  107. package/dist/team/tmux-session.js +60 -3
  108. package/dist/team/tmux-session.js.map +1 -1
  109. package/package.json +2 -2
  110. package/skills/deep-interview/SKILL.md +13 -2
  111. package/skills/ultrawork/SKILL.md +91 -59
  112. package/src/scripts/__tests__/codex-native-hook.test.ts +318 -3
  113. package/src/scripts/codex-native-hook.ts +49 -6
  114. package/src/scripts/codex-native-pre-post.ts +45 -0
  115. package/src/scripts/notify-hook/__tests__/operational-events.test.ts +24 -0
  116. package/src/scripts/notify-hook/team-dispatch.ts +9 -0
@@ -4,58 +4,80 @@ description: Parallel execution engine for high-throughput task completion
4
4
  ---
5
5
 
6
6
  <Purpose>
7
- Ultrawork is a parallel execution engine that runs multiple agents simultaneously for independent tasks. It is a component, not a standalone persistence mode -- it provides parallelism and smart model routing but not persistence, verification loops, or state management.
7
+ Ultrawork is a parallel execution engine for high-throughput task completion. It is a component, not a standalone persistence mode: it provides parallelism, context discipline, and smart delegation guidance, but not Ralph's persistence loop, architect sign-off, or long-running completion guarantees.
8
8
  </Purpose>
9
9
 
10
10
  <Use_When>
11
11
  - Multiple independent tasks can run simultaneously
12
- - User says "ulw", "ultrawork", or wants parallel execution
13
- - You need to delegate work to multiple agents at once
14
- - Task benefits from concurrent execution but the user will manage completion themselves
12
+ - User says "ulw", "ultrawork", or explicitly wants parallel execution
13
+ - Task benefits from concurrent execution plus lightweight evidence before wrap-up
14
+ - You need a direct-tool lane plus optional background evidence lanes without entering Ralph
15
15
  </Use_When>
16
16
 
17
17
  <Do_Not_Use_When>
18
- - Task requires guaranteed completion with verification -- use `ralph` instead (ralph includes ultrawork)
19
- - Task requires a full autonomous pipeline -- use `autopilot` instead (autopilot includes ralph which includes ultrawork)
20
- - There is only one sequential task with no parallelism opportunity -- delegate directly to an executor agent
21
- - User needs session persistence for resume -- use `ralph` which adds persistence on top of ultrawork
18
+ - Task requires guaranteed completion with persistence, architect verification, or deslop/reverification -- use `ralph` instead (Ralph includes ultrawork)
19
+ - Task requires a full autonomous pipeline -- use `autopilot` instead (autopilot includes Ralph which includes ultrawork)
20
+ - There is only one sequential task with no parallelism opportunity -- execute directly or delegate to a single `executor`
21
+ - The request is still in plan-consensus mode -- keep planning artifacts in `ralplan` until execution is explicitly authorized
22
+ - User needs session persistence for resume -- use `ralph`, which adds persistence on top of ultrawork
22
23
  </Do_Not_Use_When>
23
24
 
24
25
  <Why_This_Exists>
25
- Sequential task execution wastes time when tasks are independent. Ultrawork enables firing multiple agents simultaneously and routing each to the right model tier, reducing total execution time while controlling token costs. It is designed as a composable component that ralph and autopilot layer on top of.
26
+ Sequential task execution wastes time when tasks are independent. Ultrawork keeps the execution branch fast while tightening the protocol: gather enough context first, define pass/fail acceptance criteria before editing, decide deliberately between local execution and delegation, and finish with evidence rather than vibes.
26
27
  </Why_This_Exists>
27
28
 
28
29
  <Execution_Policy>
29
- - Fire all independent agent calls simultaneously -- never serialize independent work
30
- - Always pass the `model` parameter explicitly when delegating
31
- - Read `docs/shared/agent-tiers.md` before first delegation for agent selection guidance
32
- - Auto-delegate `researcher` when official docs, version-aware framework guidance, best practices, or external dependency behavior materially affect task correctness; treat it as an evidence lane, not a replacement primary workflow
33
- - Use `run_in_background: true` for operations over ~30 seconds (installs, builds, tests)
34
- - Run quick commands (git status, file reads, simple checks) in the foreground
30
+ - Gather enough context before implementation. Start with the task intent, desired outcome, constraints, likely touchpoints, and any uncertainty that would change the execution path.
31
+ - If uncertainty is still material after a quick repo read, do a focused evidence pass first instead of immediately editing.
32
+ - Define pass/fail acceptance criteria before launching execution lanes. Include the command, artifact, or manual check that will prove success.
33
+ - Prefer direct tool work when the task is small, coupled, or blocked on immediate local context. Delegate only when the work is independent enough to benefit from parallel execution.
34
+ - When useful, run a direct-tool lane and one or more background evidence lanes at the same time. Evidence lanes can cover docs, tests, regression mapping, or bounded repo analysis.
35
+ - Fire independent agent calls simultaneously -- never serialize independent work.
36
+ - Always pass the `model` parameter explicitly when delegating.
37
+ - Read `docs/shared/agent-tiers.md` before first delegation for agent selection guidance.
38
+ - Auto-delegate `researcher` when official docs, version-aware framework guidance, best practices, or external dependency behavior materially affect task correctness; treat it as an evidence lane, not a replacement primary workflow.
39
+ - Use `run_in_background: true` for operations over ~30 seconds (installs, builds, tests).
40
+ - Run quick commands (git status, file reads, simple checks) in the foreground.
41
+ - Default to concise, evidence-dense progress and completion reporting. If a lane is speculative or blocked, say so explicitly.
42
+ - Treat newer user task updates as local overrides for the active workflow branch while preserving earlier non-conflicting constraints.
43
+ - If the user says `continue` after ultrawork already has a clear next step, continue the current execution branch instead of restarting planning or asking for reconfirmation.
35
44
  </Execution_Policy>
36
45
 
37
46
  <Steps>
38
- 1. **Read agent reference**: Load `docs/shared/agent-tiers.md` for tier selection
39
- 2. **Classify tasks by independence**: Identify which tasks can run in parallel vs which have dependencies
40
- 3. **Route to correct tiers**:
41
- - Simple lookups/definitions: LOW tier
42
- - Standard implementation: STANDARD tier
43
- - Complex analysis/refactoring: THOROUGH tier
44
- 4. **Fire independent tasks simultaneously**: Launch all parallel-safe tasks at once
45
- 5. **Run dependent tasks sequentially**: Wait for prerequisites before launching dependent work
46
- 6. **Background long operations**: Builds, installs, and test suites use `run_in_background: true`
47
- 7. **Verify when all tasks complete** (lightweight):
48
- - Build/typecheck passes
49
- - Affected tests pass
50
- - No new errors introduced
47
+ 1. **Read agent reference**: Load `docs/shared/agent-tiers.md` for tier selection.
48
+ 2. **Context + certainty check**:
49
+ - State the task intent in one sentence.
50
+ - List the constraints and unknowns that could invalidate a quick fix.
51
+ - If confidence is low, explore first and narrow the task before editing.
52
+ 3. **Define acceptance criteria before execution**:
53
+ - What must be true at the end?
54
+ - Which command or artifact proves it?
55
+ - Which manual QA check is required, if any?
56
+ 4. **Classify the work by dependency shape**:
57
+ - Independent tasks -> parallel lanes.
58
+ - Shared-file or prerequisite-heavy tasks -> local execution or staged lanes.
59
+ 5. **Choose self vs delegate deliberately**:
60
+ - Work locally when the next step depends on immediate repo context, shared files, or tight iteration.
61
+ - Delegate when the task slice is bounded, independent, and materially improves throughput.
62
+ 6. **Run execution lanes**:
63
+ - Direct-tool lane for immediate implementation or verification work.
64
+ - Background evidence lanes for tests, docs, repo analysis, or regression checks.
65
+ 7. **Run dependent tasks sequentially**: Wait for prerequisites before launching dependent work.
66
+ 8. **Close with lightweight evidence**:
67
+ - Build/typecheck passes when relevant.
68
+ - Affected tests pass.
69
+ - Manual QA notes are recorded when the task needs a human-visible or behavior-level check.
70
+ - No new errors introduced.
51
71
  </Steps>
52
72
 
53
73
  <Tool_Usage>
54
- - Use LOW-tier delegation for simple changes
55
- - Use STANDARD-tier delegation for standard work
56
- - Use THOROUGH-tier delegation for complex work
57
- - Use `run_in_background: true` for package installs, builds, and test suites
58
- - Use foreground execution for quick status checks and file operations
74
+ - Use LOW-tier delegation for simple lookups and bounded evidence gathering.
75
+ - Use STANDARD-tier delegation for standard implementation and regression work.
76
+ - Use THOROUGH-tier delegation for complex analysis, architectural review, or risky multi-file changes.
77
+ - Prefer a direct-tool lane when the immediate next step is blocked on local context.
78
+ - Prefer background evidence lanes when you can learn something useful in parallel with implementation.
79
+ - Use `run_in_background: true` for package installs, builds, and test suites.
80
+ - Use foreground execution for quick status checks and file operations.
59
81
  </Tool_Usage>
60
82
 
61
83
  ## State Management
@@ -73,64 +95,74 @@ Use `omx_state` MCP tools for ultrawork lifecycle state.
73
95
 
74
96
  <Examples>
75
97
  <Good>
76
- Three independent tasks fired simultaneously:
98
+ Two-track execution with acceptance criteria up front:
77
99
  ```
78
- delegate(role="executor", tier="LOW", task="Add missing type export for Config interface")
79
- delegate(role="executor", tier="STANDARD", task="Implement the /api/users endpoint with validation")
80
- delegate(role="test-engineer", tier="STANDARD", task="Add integration tests for the auth middleware")
100
+ Acceptance criteria:
101
+ - `npm run build` passes
102
+ - `node --test dist/scripts/__tests__/codex-native-hook.test.js` passes
103
+ - Manual QA: verify `$ultrawork` activation message still points to the session state file
104
+
105
+ Direct-tool lane:
106
+ - update `skills/ultrawork/SKILL.md`
107
+
108
+ Background evidence lane:
109
+ - delegate(role="test-engineer", tier="STANDARD", task="Map which hook tests cover ultrawork activation messaging", model="...")
81
110
  ```
82
- Why good: Independent tasks at appropriate tiers, all fired at once.
111
+ Why good: Context is grounded first, acceptance criteria are explicit, and the direct-tool lane runs alongside a bounded evidence lane.
83
112
  </Good>
84
113
 
85
114
  <Good>
86
- Correct use of background execution:
115
+ Correct use of self-vs-delegate judgment:
87
116
  ```
88
- delegate(role="executor", tier="STANDARD", task="npm install && npm run build", run_in_background=true)
89
- delegate(role="writer", tier="LOW", task="Update the README with new API endpoints")
117
+ Shared-file edit in progress across `src/scripts/codex-native-hook.ts` and its test -> keep implementation local.
118
+ Independent regression mapping for keyword-detector coverage -> delegate to a test-engineer lane.
90
119
  ```
91
- Why good: Long build runs in background while short task runs in foreground.
120
+ Why good: Shared-file work stays local; independent evidence work fans out.
92
121
  </Good>
93
122
 
94
123
  <Bad>
95
- Sequential execution of independent work:
124
+ Parallelizing before the task is grounded:
96
125
  ```
97
- result1 = delegate(executor, LOW, "Add type export") # wait...
98
- result2 = delegate(executor, STANDARD, "Implement endpoint") # wait...
99
- result3 = delegate(test-engineer, STANDARD, "Add tests") # wait...
126
+ delegate(role="executor", tier="STANDARD", task="Implement whatever seems necessary", model="...")
127
+ delegate(role="test-engineer", tier="STANDARD", task="Figure out how to test it later", model="...")
100
128
  ```
101
- Why bad: These tasks are independent. Running them sequentially wastes time.
129
+ Why bad: No context snapshot, no pass/fail target, and delegation starts before the work is shaped.
102
130
  </Bad>
103
131
 
104
132
  <Bad>
105
- Wrong tier selection:
133
+ Claiming success without evidence or manual QA:
106
134
  ```
107
- delegate(role="executor", tier="THOROUGH", task="Add a missing semicolon")
135
+ Made the changes. Ultrawork should be updated now.
108
136
  ```
109
- Why bad: THOROUGH tier is expensive overkill for a trivial fix. Use LOW-tier execution instead.
137
+ Why bad: No verification output, no acceptance evidence, and no manual QA note when the behavior is user-visible.
110
138
  </Bad>
111
139
  </Examples>
112
140
 
113
141
  <Escalation_And_Stop_Conditions>
114
- - When ultrawork is invoked directly (not via ralph), apply lightweight verification only -- build passes, tests pass, no new errors
115
- - For full persistence and comprehensive architect verification, recommend switching to `ralph` mode
116
- - If a task fails repeatedly across retries, report the issue rather than retrying indefinitely
117
- - Escalate to the user when tasks have unclear dependencies or conflicting requirements
142
+ - When ultrawork is invoked directly (not via Ralph), apply lightweight verification only -- build/typecheck passes when relevant, affected tests pass, and manual QA notes are captured when needed.
143
+ - Ralph owns persistence, architect verification, deslop, and the full verified-completion promise. Do not claim those guarantees from direct ultrawork alone.
144
+ - If a task fails repeatedly across retries, report the issue rather than retrying indefinitely.
145
+ - Escalate to the user when tasks have unclear dependencies, conflicting requirements, or a materially branching acceptance target.
118
146
  </Escalation_And_Stop_Conditions>
119
147
 
120
148
  <Final_Checklist>
121
- - [ ] All parallel tasks completed
122
- - [ ] Build/typecheck passes
149
+ - [ ] Task intent and constraints were grounded before editing
150
+ - [ ] Pass/fail acceptance criteria were stated before execution
151
+ - [ ] Parallel lanes were used only for independent work
152
+ - [ ] Build/typecheck passes when relevant
123
153
  - [ ] Affected tests pass
154
+ - [ ] Manual QA notes recorded when behavior is user-visible
124
155
  - [ ] No new errors introduced
156
+ - [ ] Completion claim stays inside ultrawork's lightweight-verification boundary
125
157
  </Final_Checklist>
126
158
 
127
159
  <Advanced>
128
160
  ## Relationship to Other Modes
129
161
 
130
162
  ```
131
- ralph (persistence wrapper)
163
+ ralph (persistence + verified completion wrapper)
132
164
  \-- includes: ultrawork (this skill)
133
- \-- provides: parallel execution only
165
+ \-- provides: high-throughput execution + lightweight evidence
134
166
 
135
167
  autopilot (autonomous execution)
136
168
  \-- includes: ralph
@@ -140,5 +172,5 @@ ecomode (token efficiency)
140
172
  \-- modifies: ultrawork's model selection
141
173
  ```
142
174
 
143
- Ultrawork is the parallelism layer. Ralph adds persistence and verification. Autopilot adds the full lifecycle pipeline. Ecomode adjusts ultrawork's model routing to favor cheaper models.
175
+ Ultrawork is the parallelism and execution-discipline layer. Ralph adds persistence, architect verification, deslop, and retry-until-done behavior. Autopilot adds the broader autonomous lifecycle pipeline. Ecomode adjusts ultrawork's model routing to favor cheaper models.
144
176
  </Advanced>
@@ -101,6 +101,7 @@ const TEAM_ENV_KEYS = [
101
101
  "OMX_TEAM_STATE_ROOT",
102
102
  "OMX_TEAM_LEADER_CWD",
103
103
  "OMX_SESSION_ID",
104
+ "TMUX_PANE",
104
105
  ] as const;
105
106
 
106
107
  const priorTeamEnv = new Map<(typeof TEAM_ENV_KEYS)[number], string | undefined>();
@@ -635,6 +636,37 @@ describe("codex native hook dispatch", () => {
635
636
  }
636
637
  });
637
638
 
639
+ it("adds ultrawork-specific activation guidance only for true ultrawork workflow activation", async () => {
640
+ const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-ultrawork-routing-"));
641
+ try {
642
+ await mkdir(join(cwd, ".omx", "state"), { recursive: true });
643
+ const result = await dispatchCodexNativeHook(
644
+ {
645
+ hook_event_name: "UserPromptSubmit",
646
+ cwd,
647
+ session_id: "sess-ultrawork-msg",
648
+ thread_id: "thread-ultrawork-msg",
649
+ turn_id: "turn-ultrawork-msg",
650
+ prompt: "$ultrawork fan out the regression checks",
651
+ },
652
+ { cwd },
653
+ );
654
+
655
+ assert.equal(result.omxEventName, "keyword-detector");
656
+ assert.equal(result.skillState?.skill, "ultrawork");
657
+ const message = String(
658
+ (result.outputJson as { hookSpecificOutput?: { additionalContext?: string } })?.hookSpecificOutput?.additionalContext || "",
659
+ );
660
+ assert.match(message, /\$ultrawork" -> ultrawork/);
661
+ assert.match(message, /ground the task before editing/i);
662
+ assert.match(message, /define pass\/fail acceptance criteria/i);
663
+ assert.match(message, /direct-tool plus background evidence lanes/i);
664
+ assert.match(message, /Ralph owns persistence and the full verified-completion promise/i);
665
+ } finally {
666
+ await rm(cwd, { recursive: true, force: true });
667
+ }
668
+ });
669
+
638
670
  it("does not activate Ralph workflow state from a plain conversational mention", async () => {
639
671
  const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-ralph-plain-text-"));
640
672
  try {
@@ -840,13 +872,56 @@ describe("codex native hook dispatch", () => {
840
872
  assert.match(message, /do not fall back to `request_user_input` or plain-text questioning/i);
841
873
  assert.match(message, /After starting `omx question` in a background terminal, wait for that terminal to finish and read the JSON answer before continuing the interview\./);
842
874
  assert.match(message, /If bare `omx question` is unavailable in this reused session, use the current-session CLI bridge command:/);
843
- assert.match(message, /`'.+' '.+dist\/cli\/omx\.js' question`/);
875
+ assert.match(message, /'.+' '.+dist\/cli\/omx\.js' question/);
876
+ assert.doesNotMatch(message, /OMX_QUESTION_RETURN_PANE=/);
877
+ assert.doesNotMatch(message, /preserve the leader pane/i);
844
878
  assert.match(message, /Stop remains blocked while a deep-interview question obligation is pending\./);
845
879
  } finally {
846
880
  await rm(cwd, { recursive: true, force: true });
847
881
  }
848
882
  });
849
883
 
884
+
885
+ it("includes leader-pane preservation guidance when a pane hint is available", async () => {
886
+ const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-deep-interview-pane-hint-"));
887
+ try {
888
+ const sessionId = "sess-deep-interview-pane-hint";
889
+ const sessionDir = join(cwd, ".omx", "state", "sessions", sessionId);
890
+ await mkdir(sessionDir, { recursive: true });
891
+ await writeJson(join(sessionDir, "deep-interview-state.json"), {
892
+ active: true,
893
+ mode: "deep-interview",
894
+ current_phase: "intent-first",
895
+ started_at: "2026-04-21T10:00:00.000Z",
896
+ updated_at: "2026-04-21T10:00:00.000Z",
897
+ tmux_pane_id: "%77",
898
+ });
899
+
900
+ const result = await dispatchCodexNativeHook(
901
+ {
902
+ hook_event_name: "UserPromptSubmit",
903
+ cwd,
904
+ session_id: sessionId,
905
+ thread_id: "thread-deep-interview-pane-hint",
906
+ turn_id: "turn-deep-interview-pane-hint",
907
+ prompt: "$deep-interview gather requirements",
908
+ },
909
+ { cwd },
910
+ );
911
+
912
+ assert.equal(result.omxEventName, "keyword-detector");
913
+ assert.equal(result.skillState?.skill, "deep-interview");
914
+ const message = String(
915
+ (result.outputJson as { hookSpecificOutput?: { additionalContext?: string } })?.hookSpecificOutput?.additionalContext || "",
916
+ );
917
+ assert.match(message, /OMX_QUESTION_RETURN_PANE='%77'/);
918
+ assert.match(message, /preserve the leader pane/i);
919
+ assert.match(message, /OMX_QUESTION_RETURN_PANE=%77/);
920
+ } finally {
921
+ await rm(cwd, { recursive: true, force: true });
922
+ }
923
+ });
924
+
850
925
  it("keeps bare keep-going continuation on the active ralph skill without resetting through generic keep-going routing", async () => {
851
926
  const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-ralph-bare-continuation-"));
852
927
  try {
@@ -1219,8 +1294,8 @@ esac
1219
1294
 
1220
1295
  assert.equal(result.omxEventName, "keyword-detector");
1221
1296
  const tmuxCalls = await readFile(tmuxLog, "utf-8");
1222
- assert.match(tmuxCalls, /list-panes/);
1223
- assert.match(tmuxCalls, /split-window/);
1297
+ assert.match(tmuxCalls, /list-panes -t %1 -F/);
1298
+ assert.match(tmuxCalls, /split-window -v -l 3 -d -t %1 -c/);
1224
1299
  assert.match(tmuxCalls, /resize-pane -t %9 -y 3/);
1225
1300
  assert.match(tmuxCalls, /dist\/cli\/omx\.js' hud --watch --preset=focused/);
1226
1301
  assert.doesNotMatch(tmuxCalls, /\/tmp\/codex-host-binary' hud --watch/);
@@ -1241,6 +1316,91 @@ esac
1241
1316
  }
1242
1317
  });
1243
1318
 
1319
+ it("blocks Bash omx question when no leader-pane return hint is preserved", async () => {
1320
+ const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-pretool-question-enforce-"));
1321
+ try {
1322
+ const result = await dispatchCodexNativeHook(
1323
+ {
1324
+ hook_event_name: "PreToolUse",
1325
+ cwd,
1326
+ tool_name: "Bash",
1327
+ tool_use_id: "tool-question-block",
1328
+ tool_input: { command: `omx question --json --input '{"question":"Q?","options":["A"],"allow_other":true}'` },
1329
+ },
1330
+ { cwd },
1331
+ );
1332
+
1333
+ assert.equal(result.omxEventName, "pre-tool-use");
1334
+ assert.equal((result.outputJson as { decision?: string } | null)?.decision, "block");
1335
+ assert.match(String((result.outputJson as { systemMessage?: string } | null)?.systemMessage || ""), /OMX_QUESTION_RETURN_PANE=\$TMUX_PANE/);
1336
+ } finally {
1337
+ await rm(cwd, { recursive: true, force: true });
1338
+ }
1339
+ });
1340
+
1341
+ it("allows Bash omx question when the command preserves the leader-pane return hint", async () => {
1342
+ const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-pretool-question-allow-"));
1343
+ try {
1344
+ const result = await dispatchCodexNativeHook(
1345
+ {
1346
+ hook_event_name: "PreToolUse",
1347
+ cwd,
1348
+ tool_name: "Bash",
1349
+ tool_use_id: "tool-question-allow",
1350
+ tool_input: { command: `OMX_QUESTION_RETURN_PANE=$TMUX_PANE omx question --json --input '{"question":"Q?","options":["A"],"allow_other":true}'` },
1351
+ },
1352
+ { cwd },
1353
+ );
1354
+
1355
+ assert.equal(result.omxEventName, "pre-tool-use");
1356
+ assert.equal(result.outputJson, null);
1357
+ } finally {
1358
+ await rm(cwd, { recursive: true, force: true });
1359
+ }
1360
+ });
1361
+
1362
+ it("allows the quoted pane env assignment emitted by the deep-interview bridge command", async () => {
1363
+ const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-pretool-question-quoted-allow-"));
1364
+ try {
1365
+ const result = await dispatchCodexNativeHook(
1366
+ {
1367
+ hook_event_name: "PreToolUse",
1368
+ cwd,
1369
+ tool_name: "Bash",
1370
+ tool_use_id: "tool-question-quoted-allow",
1371
+ tool_input: { command: `OMX_QUESTION_RETURN_PANE='%42' node ./dist/cli/omx.js question --json --input '{"question":"Q?","options":["A"],"allow_other":true}'` },
1372
+ },
1373
+ { cwd },
1374
+ );
1375
+
1376
+ assert.equal(result.omxEventName, "pre-tool-use");
1377
+ assert.equal(result.outputJson, null);
1378
+ } finally {
1379
+ await rm(cwd, { recursive: true, force: true });
1380
+ }
1381
+ });
1382
+
1383
+ it("blocks Bash node omx.js question when the command does not preserve the leader-pane return hint", async () => {
1384
+ const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-pretool-question-node-block-"));
1385
+ try {
1386
+ const result = await dispatchCodexNativeHook(
1387
+ {
1388
+ hook_event_name: "PreToolUse",
1389
+ cwd,
1390
+ tool_name: "Bash",
1391
+ tool_use_id: "tool-question-node-block",
1392
+ tool_input: { command: `node ./dist/cli/omx.js question --json --input '{"question":"Q?","options":["A"],"allow_other":true}'` },
1393
+ },
1394
+ { cwd },
1395
+ );
1396
+
1397
+ assert.equal(result.omxEventName, "pre-tool-use");
1398
+ assert.equal((result.outputJson as { decision?: string } | null)?.decision, "block");
1399
+ } finally {
1400
+ await rm(cwd, { recursive: true, force: true });
1401
+ }
1402
+ });
1403
+
1244
1404
  it("returns a destructive-command caution on PreToolUse for rm -rf dist", async () => {
1245
1405
  const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-pretool-danger-"));
1246
1406
  try {
@@ -2322,6 +2482,76 @@ esac
2322
2482
  }
2323
2483
  });
2324
2484
 
2485
+ it("suppresses duplicate Autopilot planning Stop replays so stale planning state cannot loop indefinitely", async () => {
2486
+ const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-stop-autopilot-planning-replay-"));
2487
+ try {
2488
+ const stateDir = join(cwd, ".omx", "state");
2489
+ await mkdir(stateDir, { recursive: true });
2490
+ await writeJson(join(stateDir, "autopilot-state.json"), {
2491
+ active: true,
2492
+ current_phase: "planning",
2493
+ });
2494
+ const payload = {
2495
+ hook_event_name: "Stop",
2496
+ cwd,
2497
+ session_id: "sess-stop-autopilot-planning-replay",
2498
+ thread_id: "thread-stop-autopilot-planning-replay",
2499
+ turn_id: "turn-stop-autopilot-planning-replay",
2500
+ last_assistant_message: "Autopilot planning is still active.",
2501
+ };
2502
+
2503
+ const first = await dispatchCodexNativeHook(payload, { cwd });
2504
+ const replay = await dispatchCodexNativeHook(
2505
+ {
2506
+ ...payload,
2507
+ stop_hook_active: true,
2508
+ },
2509
+ { cwd },
2510
+ );
2511
+
2512
+ assert.equal(first.omxEventName, "stop");
2513
+ assert.deepEqual(first.outputJson, {
2514
+ decision: "block",
2515
+ reason:
2516
+ "OMX autopilot is still active (phase: planning); continue the task and gather fresh verification evidence before stopping.",
2517
+ stopReason: "autopilot_planning",
2518
+ systemMessage: "OMX autopilot is still active (phase: planning).",
2519
+ });
2520
+ assert.equal(replay.omxEventName, "stop");
2521
+ assert.equal(replay.outputJson, null);
2522
+ } finally {
2523
+ await rm(cwd, { recursive: true, force: true });
2524
+ }
2525
+ });
2526
+
2527
+ it("does not block Stop from stale root Autopilot planning state when the explicit session has no scoped state", async () => {
2528
+ const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-stop-stale-root-autopilot-planning-"));
2529
+ try {
2530
+ const stateDir = join(cwd, ".omx", "state");
2531
+ await mkdir(join(stateDir, "sessions", "sess-current"), { recursive: true });
2532
+ await writeJson(join(stateDir, "session.json"), { session_id: "sess-current", cwd });
2533
+ await writeJson(join(stateDir, "autopilot-state.json"), {
2534
+ active: true,
2535
+ mode: "autopilot",
2536
+ current_phase: "planning",
2537
+ });
2538
+
2539
+ const result = await dispatchCodexNativeHook(
2540
+ {
2541
+ hook_event_name: "Stop",
2542
+ cwd,
2543
+ session_id: "sess-current",
2544
+ },
2545
+ { cwd },
2546
+ );
2547
+
2548
+ assert.equal(result.omxEventName, "stop");
2549
+ assert.equal(result.outputJson, null);
2550
+ } finally {
2551
+ await rm(cwd, { recursive: true, force: true });
2552
+ }
2553
+ });
2554
+
2325
2555
  it("does not block Stop when an explicit blocked_on_user run_outcome is present on a mode state", async () => {
2326
2556
  const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-stop-autopilot-blocked-outcome-"));
2327
2557
  try {
@@ -3350,6 +3580,91 @@ esac
3350
3580
  }
3351
3581
  });
3352
3582
 
3583
+ it("does not re-block Stop after a same-session deep-interview question record is already answered", async () => {
3584
+ const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-stop-deep-interview-question-answered-"));
3585
+ try {
3586
+ const sessionId = "sess-stop-deep-interview-question-answered";
3587
+ const stateDir = join(cwd, ".omx", "state");
3588
+ const sessionDir = join(stateDir, "sessions", sessionId);
3589
+ await mkdir(join(sessionDir, "questions"), { recursive: true });
3590
+ await writeJson(join(stateDir, "session.json"), { session_id: sessionId });
3591
+ await writeJson(join(sessionDir, "skill-active-state.json"), {
3592
+ version: 1,
3593
+ active: true,
3594
+ skill: "deep-interview",
3595
+ phase: "planning",
3596
+ session_id: sessionId,
3597
+ thread_id: "thread-stop-deep-interview-question-answered",
3598
+ });
3599
+ await writeJson(join(sessionDir, "deep-interview-state.json"), {
3600
+ active: false,
3601
+ mode: "deep-interview",
3602
+ current_phase: "intent-first",
3603
+ lifecycle_outcome: "askuserQuestion",
3604
+ run_outcome: "blocked_on_user",
3605
+ completed_at: "2026-04-19T03:20:30.000Z",
3606
+ session_id: sessionId,
3607
+ thread_id: "thread-stop-deep-interview-question-answered",
3608
+ question_enforcement: {
3609
+ obligation_id: "obligation-answered",
3610
+ source: "omx-question",
3611
+ status: "pending",
3612
+ lifecycle_outcome: "askuserQuestion",
3613
+ requested_at: "2026-04-19T03:20:00.000Z",
3614
+ },
3615
+ });
3616
+ await writeJson(join(sessionDir, "questions", "question-answered.json"), {
3617
+ kind: "omx.question/v1",
3618
+ question_id: "question-answered",
3619
+ session_id: sessionId,
3620
+ created_at: "2026-04-19T03:20:05.000Z",
3621
+ updated_at: "2026-04-19T03:20:10.000Z",
3622
+ status: "answered",
3623
+ question: "What should happen next?",
3624
+ options: [{ label: "Continue", value: "continue" }],
3625
+ allow_other: false,
3626
+ other_label: "Other",
3627
+ multi_select: false,
3628
+ type: "single-answerable",
3629
+ source: "deep-interview",
3630
+ answer: {
3631
+ kind: "option",
3632
+ value: "continue",
3633
+ selected_labels: ["Continue"],
3634
+ selected_values: ["continue"],
3635
+ },
3636
+ });
3637
+
3638
+ const result = await dispatchCodexNativeHook(
3639
+ {
3640
+ hook_event_name: "Stop",
3641
+ cwd,
3642
+ session_id: sessionId,
3643
+ thread_id: "thread-stop-deep-interview-question-answered",
3644
+ },
3645
+ { cwd },
3646
+ );
3647
+
3648
+ assert.equal(result.omxEventName, "stop");
3649
+ assert.equal(result.outputJson, null);
3650
+
3651
+ const state = JSON.parse(
3652
+ await readFile(join(sessionDir, "deep-interview-state.json"), "utf-8"),
3653
+ ) as {
3654
+ lifecycle_outcome?: string;
3655
+ question_enforcement?: { status?: string; question_id?: string; satisfied_at?: string };
3656
+ run_outcome?: string;
3657
+ };
3658
+ assert.equal(state.question_enforcement?.status, "satisfied");
3659
+ assert.equal(state.question_enforcement?.question_id, "question-answered");
3660
+ assert.ok(state.question_enforcement?.satisfied_at);
3661
+ assert.equal(state.lifecycle_outcome, undefined);
3662
+ assert.equal(state.run_outcome, undefined);
3663
+ } finally {
3664
+ await rm(cwd, { recursive: true, force: true });
3665
+ }
3666
+ });
3667
+
3353
3668
  it("keeps blocking pending deep-interview question Stop replays until the obligation changes", async () => {
3354
3669
  const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-stop-deep-interview-question-replay-"));
3355
3670
  try {