oh-my-codex 0.18.9 → 0.18.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/Cargo.lock +6 -6
  2. package/Cargo.toml +1 -1
  3. package/README.md +4 -0
  4. package/dist/autopilot/__tests__/deep-interview-gate.test.d.ts +2 -0
  5. package/dist/autopilot/__tests__/deep-interview-gate.test.d.ts.map +1 -0
  6. package/dist/autopilot/__tests__/deep-interview-gate.test.js +215 -0
  7. package/dist/autopilot/__tests__/deep-interview-gate.test.js.map +1 -0
  8. package/dist/autopilot/__tests__/ralplan-gate.test.js +148 -0
  9. package/dist/autopilot/__tests__/ralplan-gate.test.js.map +1 -1
  10. package/dist/autopilot/deep-interview-gate.d.ts.map +1 -1
  11. package/dist/autopilot/deep-interview-gate.js +140 -0
  12. package/dist/autopilot/deep-interview-gate.js.map +1 -1
  13. package/dist/cli/__tests__/auth.test.js +36 -3
  14. package/dist/cli/__tests__/auth.test.js.map +1 -1
  15. package/dist/cli/__tests__/codex-feature-probe.test.d.ts +2 -0
  16. package/dist/cli/__tests__/codex-feature-probe.test.d.ts.map +1 -0
  17. package/dist/cli/__tests__/codex-feature-probe.test.js +46 -0
  18. package/dist/cli/__tests__/codex-feature-probe.test.js.map +1 -0
  19. package/dist/cli/__tests__/doctor-warning-copy.test.js +2 -0
  20. package/dist/cli/__tests__/doctor-warning-copy.test.js.map +1 -1
  21. package/dist/cli/__tests__/index.test.js +251 -5
  22. package/dist/cli/__tests__/index.test.js.map +1 -1
  23. package/dist/cli/__tests__/launch-fallback.test.js +19 -5
  24. package/dist/cli/__tests__/launch-fallback.test.js.map +1 -1
  25. package/dist/cli/__tests__/package-bin-contract.test.js +19 -6
  26. package/dist/cli/__tests__/package-bin-contract.test.js.map +1 -1
  27. package/dist/cli/__tests__/setup-refresh.test.js +6 -2
  28. package/dist/cli/__tests__/setup-refresh.test.js.map +1 -1
  29. package/dist/cli/__tests__/sparkshell-packaging.test.js +45 -2
  30. package/dist/cli/__tests__/sparkshell-packaging.test.js.map +1 -1
  31. package/dist/cli/__tests__/team-decompose.test.js +10 -5
  32. package/dist/cli/__tests__/team-decompose.test.js.map +1 -1
  33. package/dist/cli/__tests__/team.test.js +45 -1
  34. package/dist/cli/__tests__/team.test.js.map +1 -1
  35. package/dist/cli/__tests__/ultragoal.test.js +75 -0
  36. package/dist/cli/__tests__/ultragoal.test.js.map +1 -1
  37. package/dist/cli/auth.d.ts.map +1 -1
  38. package/dist/cli/auth.js +25 -1
  39. package/dist/cli/auth.js.map +1 -1
  40. package/dist/cli/codex-feature-probe.d.ts +5 -2
  41. package/dist/cli/codex-feature-probe.d.ts.map +1 -1
  42. package/dist/cli/codex-feature-probe.js +25 -9
  43. package/dist/cli/codex-feature-probe.js.map +1 -1
  44. package/dist/cli/index.d.ts +28 -2
  45. package/dist/cli/index.d.ts.map +1 -1
  46. package/dist/cli/index.js +149 -88
  47. package/dist/cli/index.js.map +1 -1
  48. package/dist/cli/setup.d.ts.map +1 -1
  49. package/dist/cli/setup.js +9 -1
  50. package/dist/cli/setup.js.map +1 -1
  51. package/dist/cli/team.d.ts +4 -0
  52. package/dist/cli/team.d.ts.map +1 -1
  53. package/dist/cli/team.js +43 -4
  54. package/dist/cli/team.js.map +1 -1
  55. package/dist/cli/ultragoal.d.ts.map +1 -1
  56. package/dist/cli/ultragoal.js +29 -0
  57. package/dist/cli/ultragoal.js.map +1 -1
  58. package/dist/hooks/__tests__/agents-overlay.test.js +1 -0
  59. package/dist/hooks/__tests__/agents-overlay.test.js.map +1 -1
  60. package/dist/hooks/__tests__/autopilot-skill-contract.test.js +15 -0
  61. package/dist/hooks/__tests__/autopilot-skill-contract.test.js.map +1 -1
  62. package/dist/hooks/__tests__/deep-interview-contract.test.js +16 -0
  63. package/dist/hooks/__tests__/deep-interview-contract.test.js.map +1 -1
  64. package/dist/hooks/__tests__/skill-guidance-contract.test.js +14 -5
  65. package/dist/hooks/__tests__/skill-guidance-contract.test.js.map +1 -1
  66. package/dist/hooks/agents-overlay.d.ts.map +1 -1
  67. package/dist/hooks/agents-overlay.js +2 -1
  68. package/dist/hooks/agents-overlay.js.map +1 -1
  69. package/dist/hooks/extensibility/__tests__/plugin-runner.test.js +112 -1
  70. package/dist/hooks/extensibility/__tests__/plugin-runner.test.js.map +1 -1
  71. package/dist/hooks/extensibility/plugin-runner-stdin.d.ts +2 -0
  72. package/dist/hooks/extensibility/plugin-runner-stdin.d.ts.map +1 -0
  73. package/dist/hooks/extensibility/plugin-runner-stdin.js +16 -0
  74. package/dist/hooks/extensibility/plugin-runner-stdin.js.map +1 -0
  75. package/dist/hooks/extensibility/plugin-runner.js +2 -4
  76. package/dist/hooks/extensibility/plugin-runner.js.map +1 -1
  77. package/dist/hud/__tests__/index.test.js +23 -2
  78. package/dist/hud/__tests__/index.test.js.map +1 -1
  79. package/dist/hud/__tests__/reconcile.test.js +266 -0
  80. package/dist/hud/__tests__/reconcile.test.js.map +1 -1
  81. package/dist/hud/__tests__/tmux.test.js +118 -7
  82. package/dist/hud/__tests__/tmux.test.js.map +1 -1
  83. package/dist/hud/index.d.ts +6 -1
  84. package/dist/hud/index.d.ts.map +1 -1
  85. package/dist/hud/index.js +12 -3
  86. package/dist/hud/index.js.map +1 -1
  87. package/dist/hud/reconcile.d.ts +6 -2
  88. package/dist/hud/reconcile.d.ts.map +1 -1
  89. package/dist/hud/reconcile.js +58 -28
  90. package/dist/hud/reconcile.js.map +1 -1
  91. package/dist/hud/tmux.d.ts +14 -1
  92. package/dist/hud/tmux.d.ts.map +1 -1
  93. package/dist/hud/tmux.js +129 -15
  94. package/dist/hud/tmux.js.map +1 -1
  95. package/dist/ralplan/consensus-gate.js +9 -1
  96. package/dist/ralplan/consensus-gate.js.map +1 -1
  97. package/dist/scripts/__tests__/codex-native-hook.test.js +168 -15
  98. package/dist/scripts/__tests__/codex-native-hook.test.js.map +1 -1
  99. package/dist/scripts/__tests__/run-test-files.test.js +115 -1
  100. package/dist/scripts/__tests__/run-test-files.test.js.map +1 -1
  101. package/dist/scripts/codex-native-hook.d.ts.map +1 -1
  102. package/dist/scripts/codex-native-hook.js +74 -11
  103. package/dist/scripts/codex-native-hook.js.map +1 -1
  104. package/dist/scripts/notify-hook/team-worker-stop.d.ts.map +1 -1
  105. package/dist/scripts/notify-hook/team-worker-stop.js +54 -21
  106. package/dist/scripts/notify-hook/team-worker-stop.js.map +1 -1
  107. package/dist/scripts/run-test-files.js +218 -160
  108. package/dist/scripts/run-test-files.js.map +1 -1
  109. package/dist/state/__tests__/operations.test.js +463 -0
  110. package/dist/state/__tests__/operations.test.js.map +1 -1
  111. package/dist/team/__tests__/delivery-log.test.js +18 -0
  112. package/dist/team/__tests__/delivery-log.test.js.map +1 -1
  113. package/dist/team/__tests__/runtime.test.js +48 -0
  114. package/dist/team/__tests__/runtime.test.js.map +1 -1
  115. package/dist/team/__tests__/tmux-session.test.js +107 -0
  116. package/dist/team/__tests__/tmux-session.test.js.map +1 -1
  117. package/dist/team/__tests__/tmux-test-fixture.d.ts.map +1 -1
  118. package/dist/team/__tests__/tmux-test-fixture.js +14 -2
  119. package/dist/team/__tests__/tmux-test-fixture.js.map +1 -1
  120. package/dist/team/__tests__/tmux-test-fixture.test.js +1 -0
  121. package/dist/team/__tests__/tmux-test-fixture.test.js.map +1 -1
  122. package/dist/team/__tests__/worker-bootstrap.test.js +54 -1
  123. package/dist/team/__tests__/worker-bootstrap.test.js.map +1 -1
  124. package/dist/team/delivery-log.d.ts +1 -1
  125. package/dist/team/delivery-log.d.ts.map +1 -1
  126. package/dist/team/delivery-log.js.map +1 -1
  127. package/dist/team/repo-aware-decomposition.d.ts +4 -0
  128. package/dist/team/repo-aware-decomposition.d.ts.map +1 -1
  129. package/dist/team/repo-aware-decomposition.js.map +1 -1
  130. package/dist/team/runtime.d.ts.map +1 -1
  131. package/dist/team/runtime.js +78 -9
  132. package/dist/team/runtime.js.map +1 -1
  133. package/dist/team/tmux-session.d.ts +1 -0
  134. package/dist/team/tmux-session.d.ts.map +1 -1
  135. package/dist/team/tmux-session.js +16 -5
  136. package/dist/team/tmux-session.js.map +1 -1
  137. package/dist/team/ultragoal-context.d.ts +12 -0
  138. package/dist/team/ultragoal-context.d.ts.map +1 -1
  139. package/dist/team/ultragoal-context.js +32 -8
  140. package/dist/team/ultragoal-context.js.map +1 -1
  141. package/dist/utils/__tests__/paths.test.js +23 -0
  142. package/dist/utils/__tests__/paths.test.js.map +1 -1
  143. package/dist/utils/paths.d.ts.map +1 -1
  144. package/dist/utils/paths.js +4 -2
  145. package/dist/utils/paths.js.map +1 -1
  146. package/dist/utils/toml.d.ts +4 -0
  147. package/dist/utils/toml.d.ts.map +1 -0
  148. package/dist/utils/toml.js +75 -0
  149. package/dist/utils/toml.js.map +1 -0
  150. package/package.json +1 -1
  151. package/plugins/oh-my-codex/.codex-plugin/plugin.json +1 -1
  152. package/plugins/oh-my-codex/skills/autopilot/SKILL.md +3 -0
  153. package/plugins/oh-my-codex/skills/deep-interview/SKILL.md +34 -0
  154. package/plugins/oh-my-codex/skills/ultrawork/SKILL.md +32 -17
  155. package/skills/autopilot/SKILL.md +3 -0
  156. package/skills/deep-interview/SKILL.md +34 -0
  157. package/skills/ultrawork/SKILL.md +32 -17
  158. package/src/scripts/__tests__/codex-native-hook.test.ts +216 -26
  159. package/src/scripts/__tests__/run-test-files.test.ts +138 -2
  160. package/src/scripts/codex-native-hook.ts +80 -10
  161. package/src/scripts/notify-hook/team-worker-stop.ts +58 -18
  162. package/src/scripts/run-test-files.ts +229 -150
  163. package/templates/AGENTS.md +40 -199
@@ -4,22 +4,23 @@ description: Parallel execution engine for high-throughput task completion
4
4
  ---
5
5
 
6
6
  <Purpose>
7
- Ultrawork is a parallel execution engine for high-throughput task completion. It is a component, not a standalone persistence mode: it provides parallelism, context discipline, and smart delegation guidance, but not Ralph's persistence loop, architect sign-off, or long-running completion guarantees.
7
+ Ultrawork is a parallel execution engine for high-throughput task completion. It is a component, not a standalone persistence or verification mode: it provides parallelism, context discipline, and smart delegation guidance, but not durable goal tracking, Team's tmux worker lifecycle, Ralph's legacy persistence loop, architect sign-off, or long-running completion guarantees.
8
8
  </Purpose>
9
9
 
10
10
  <Use_When>
11
11
  - Multiple independent tasks can run simultaneously
12
12
  - User says "ulw", "ultrawork", or explicitly wants parallel execution
13
13
  - Task benefits from concurrent execution plus lightweight evidence before wrap-up
14
- - You need a direct-tool lane plus optional background evidence lanes without entering Ralph
14
+ - You need a direct-tool lane plus optional background evidence lanes without entering Team or a durable goal workflow
15
15
  </Use_When>
16
16
 
17
17
  <Do_Not_Use_When>
18
- - Task requires guaranteed completion with persistence, architect verification, or deslop/reverification -- use `ralph` instead (Ralph includes ultrawork)
19
- - Task requires a full autonomous pipeline -- use `autopilot` instead (autopilot defaults to Ultragoal, with Team/parallel execution used only when needed)
20
- - There is only one sequential task with no parallelism opportunity -- execute directly or delegate to a single `executor`
18
+ - Task needs durable goal tracking, ledger checkpoints, or resume across stories -- use `ultragoal` instead
19
+ - Task needs coordinated tmux workers, shared task state, mailbox/dispatch coordination, or long-running parallel execution -- use `team` instead
20
+ - Task requires a full autonomous pipeline -- use `autopilot` instead (default loop: `deep-interview -> ralplan -> ultragoal`, with `team` only when needed)
21
+ - Task intentionally requires the legacy persistent single-owner completion/verification loop -- use `ralph` explicitly; do not present it as the default durable path
22
+ - There is only one sequential task with no parallelism opportunity -- execute directly, use `ultragoal` for durable tracking, or delegate to a single `executor`
21
23
  - The request is still in plan-consensus mode -- keep planning artifacts in `ralplan` until execution is explicitly authorized
22
- - User needs session persistence for resume -- use `ralph`, which adds persistence on top of ultrawork
23
24
  </Do_Not_Use_When>
24
25
 
25
26
  <Why_This_Exists>
@@ -138,8 +139,12 @@ Why bad: No verification output, no acceptance evidence, and no manual QA note w
138
139
  </Examples>
139
140
 
140
141
  <Escalation_And_Stop_Conditions>
141
- - When ultrawork is invoked directly (not via Ralph), apply lightweight verification only -- build/typecheck passes when relevant, affected tests pass, and manual QA notes are captured when needed.
142
- - Ralph owns persistence, architect verification, deslop, and the full verified-completion promise. Do not claim those guarantees from direct ultrawork alone.
142
+ - When ultrawork is invoked directly, apply lightweight verification only -- build/typecheck passes when relevant, affected tests pass, and manual QA notes are captured when needed.
143
+ - Ultrawork does not own persistence, durable ledgers, architect verification, deslop, full QA, or the full verified-completion promise. Do not claim those guarantees from direct ultrawork alone.
144
+ - Escalate to `ultragoal` when the work needs durable goal state, story checkpoints, or resume across implementation steps.
145
+ - Escalate to `team` when the work needs coordinated tmux workers, shared task state, or durable multi-worker lifecycle control.
146
+ - Escalate to explicitly requested `ralph` only for the supported legacy single-owner persistence/verification fallback.
147
+ - Ralph owns persistence, architect verification, deslop, and the full verified-completion promise only when explicitly selected as the supported legacy fallback; direct ultrawork does not own those guarantees.
143
148
  - If a task fails repeatedly across retries, report the issue rather than retrying indefinitely.
144
149
  - Escalate to the user when tasks have unclear dependencies, conflicting requirements, or a materially branching acceptance target.
145
150
  </Escalation_And_Stop_Conditions>
@@ -159,17 +164,27 @@ Why bad: No verification output, no acceptance evidence, and no manual QA note w
159
164
  ## Relationship to Other Modes
160
165
 
161
166
  ```
162
- ralph (persistence + verified completion wrapper)
163
- \-- includes: ultrawork (this skill)
164
- \-- provides: high-throughput execution + lightweight evidence
167
+ ultrawork (this skill)
168
+ \-- provides: in-session parallel execution discipline + lightweight evidence
165
169
 
166
- autopilot (autonomous execution)
167
- \-- includes: ralph
168
- \-- includes: ultrawork (this skill)
170
+ ultragoal (durable goal execution)
171
+ \-- owns: goal ledger, checkpoints, resume across stories, final gate discipline
172
+ \-- may use: team for parallel lanes when a story benefits from coordinated workers
169
173
 
170
- ecomode (token efficiency)
171
- \-- modifies: ultrawork's model selection
174
+ team (tmux coordinated execution)
175
+ \-- owns: worker panes, shared task state, mailbox/dispatch, lifecycle control
176
+ \-- can return: checkpoint-ready evidence to an Ultragoal leader
177
+
178
+ autopilot (strict autonomous delivery loop)
179
+ \-- default flow: deep-interview -> ralplan -> ultragoal -> code-review -> ultraqa
180
+ \-- may use: team only when an Ultragoal story needs parallel execution
181
+
182
+ ralph (supported legacy explicit fallback)
183
+ \-- owns: single-owner persistence loop + architect verification when intentionally selected
184
+
185
+ ecomode (deprecated compatibility-only)
186
+ \-- do not route users there from ultrawork; it is not the current model-selection path
172
187
  ```
173
188
 
174
- Ultrawork is the parallelism and execution-discipline layer. Ralph adds persistence, architect verification, deslop, and retry-until-done behavior. Autopilot adds the broader autonomous lifecycle pipeline. Ecomode adjusts ultrawork's model routing to favor cheaper models.
189
+ Ultrawork is the parallelism and execution-discipline layer. Ultragoal is the current default durable goal/ledger follow-up. Team is the coordinated tmux parallel runtime, often nested under an Ultragoal story when durable work needs multiple lanes. Autopilot orchestrates the full default lifecycle through deep-interview, ralplan, ultragoal, code-review, and ultraqa. Ralph remains active as an explicit legacy fallback for persistent single-owner verification, but it is not the recommended default durable path. Ecomode is deprecated compatibility-only and should not be advertised as the ultrawork model-selection route.
175
190
  </Advanced>
@@ -441,23 +441,77 @@ describe("codex native hook dispatch", () => {
441
441
  );
442
442
  });
443
443
 
444
- it("emits schema-safe JSON stdout when CLI stdin is malformed", () => {
445
- const stdout = runNativeHookCli("{");
444
+ it("emits Stop-schema-safe block JSON when unidentifiable malformed stdin has native Stop runtime surface", async () => {
445
+ const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-cli-malformed-stop-surface-"));
446
+ try {
447
+ await mkdir(join(cwd, ".omx"), { recursive: true });
448
+ const result = spawnSync(process.execPath, [nativeHookScriptPath()], {
449
+ cwd,
450
+ input: "{",
451
+ encoding: "utf-8",
452
+ stdio: ["pipe", "pipe", "pipe"],
453
+ });
446
454
 
447
- const output = parseSingleJsonStdout(stdout) as {
448
- continue?: boolean;
449
- stopReason?: string;
450
- systemMessage?: string;
451
- hookSpecificOutput?: unknown;
452
- };
455
+ assert.equal(result.status, 0, result.stderr || result.stdout);
456
+ assert.equal(result.stderr, "");
457
+ const output = parseSingleJsonStdout(result.stdout) as {
458
+ decision?: string;
459
+ continue?: boolean;
460
+ reason?: string;
461
+ stopReason?: string;
462
+ systemMessage?: string;
463
+ hookSpecificOutput?: unknown;
464
+ };
453
465
 
454
- assert.equal(output.continue, false);
455
- assert.equal(output.stopReason, "native_hook_stdin_parse_error");
456
- assert.equal(output.hookSpecificOutput, undefined);
457
- assert.match(
458
- String(output.systemMessage ?? ""),
459
- /stdin JSON parsing failed inside codex-native-hook:/,
460
- );
466
+ assert.equal(output.decision, "block");
467
+ assert.equal(output.continue, undefined);
468
+ assert.equal(
469
+ output.reason,
470
+ "OMX native hook received malformed JSON input. Preserve runtime state, inspect the emitting hook payload yourself, and retry with valid JSON.",
471
+ );
472
+ assert.equal(output.stopReason, "native_hook_stdin_parse_error");
473
+ assert.equal(output.hookSpecificOutput, undefined);
474
+ assert.match(
475
+ String(output.systemMessage ?? ""),
476
+ /stdin JSON parsing failed inside codex-native-hook:/,
477
+ );
478
+ } finally {
479
+ await rm(cwd, { recursive: true, force: true });
480
+ }
481
+ });
482
+
483
+ it("preserves non-Stop fail-closed JSON when malformed stdin identifies a non-Stop hook", async () => {
484
+ const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-cli-malformed-nonstop-"));
485
+ try {
486
+ await mkdir(join(cwd, ".omx"), { recursive: true });
487
+ const result = spawnSync(process.execPath, [nativeHookScriptPath()], {
488
+ cwd,
489
+ input: '{hook_event_name:"PreToolUse",',
490
+ encoding: "utf-8",
491
+ stdio: ["pipe", "pipe", "pipe"],
492
+ });
493
+
494
+ assert.equal(result.status, 0, result.stderr || result.stdout);
495
+ assert.equal(result.stderr, "");
496
+ const output = parseSingleJsonStdout(result.stdout) as {
497
+ continue?: boolean;
498
+ decision?: string;
499
+ stopReason?: string;
500
+ systemMessage?: string;
501
+ hookSpecificOutput?: unknown;
502
+ };
503
+
504
+ assert.equal(output.continue, false);
505
+ assert.equal(output.decision, undefined);
506
+ assert.equal(output.stopReason, "native_hook_stdin_parse_error");
507
+ assert.equal(output.hookSpecificOutput, undefined);
508
+ assert.match(
509
+ String(output.systemMessage ?? ""),
510
+ /stdin JSON parsing failed inside codex-native-hook:/,
511
+ );
512
+ } finally {
513
+ await rm(cwd, { recursive: true, force: true });
514
+ }
461
515
  });
462
516
 
463
517
  it("redacts unterminated prompt-like malformed stdin fields", async () => {
@@ -5849,6 +5903,76 @@ exit 0
5849
5903
  }
5850
5904
  });
5851
5905
 
5906
+ it("allows null-device fd redirects while deep-interview blocks real Bash writes", async () => {
5907
+ const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-pretool-deep-interview-null-redirect-"));
5908
+ try {
5909
+ const stateDir = join(cwd, ".omx", "state");
5910
+ const sessionDir = join(stateDir, "sessions", "sess-di-null-redirect");
5911
+ await mkdir(sessionDir, { recursive: true });
5912
+ await writeJson(join(stateDir, "session.json"), { session_id: "sess-di-null-redirect", cwd });
5913
+ await writeJson(join(sessionDir, "skill-active-state.json"), {
5914
+ version: 1,
5915
+ active: true,
5916
+ skill: "deep-interview",
5917
+ phase: "planning",
5918
+ session_id: "sess-di-null-redirect",
5919
+ active_skills: [{ skill: "deep-interview", phase: "planning", active: true, session_id: "sess-di-null-redirect" }],
5920
+ });
5921
+ await writeJson(join(sessionDir, "deep-interview-state.json"), {
5922
+ active: true,
5923
+ mode: "deep-interview",
5924
+ current_phase: "intent-first",
5925
+ session_id: "sess-di-null-redirect",
5926
+ });
5927
+
5928
+ const allowedCommands = [
5929
+ "find application -type d -name 'bug-tracking*' 2>/dev/null | head -20",
5930
+ "find application -type d -name 'bug-tracking*' 2> /dev/null | head -20",
5931
+ "find application -type d -name 'bug-tracking*' 2>NUL | head -20",
5932
+ "find application -type d -name 'bug-tracking*' 1>/dev/null",
5933
+ "find application -type d -name 'bug-tracking*' &>/dev/null",
5934
+ ];
5935
+
5936
+ for (const [index, command] of allowedCommands.entries()) {
5937
+ const result = await dispatchCodexNativeHook(
5938
+ {
5939
+ hook_event_name: "PreToolUse",
5940
+ cwd,
5941
+ session_id: "sess-di-null-redirect",
5942
+ tool_name: "Bash",
5943
+ tool_use_id: `tool-di-null-redirect-${index}`,
5944
+ tool_input: { command },
5945
+ },
5946
+ { cwd },
5947
+ );
5948
+ assert.equal(result.outputJson, null, command);
5949
+ }
5950
+
5951
+ const blockedCommands = [
5952
+ "find application -type d -name 'bug-tracking*' 2>errors.log | head -20",
5953
+ "find application -type d -name 'bug-tracking*' > /tmp/bug-tracking.txt",
5954
+ "find application -type d -name 'bug-tracking*' | tee /dev/null",
5955
+ ];
5956
+
5957
+ for (const [index, command] of blockedCommands.entries()) {
5958
+ const result = await dispatchCodexNativeHook(
5959
+ {
5960
+ hook_event_name: "PreToolUse",
5961
+ cwd,
5962
+ session_id: "sess-di-null-redirect",
5963
+ tool_name: "Bash",
5964
+ tool_use_id: `tool-di-real-redirect-${index}`,
5965
+ tool_input: { command },
5966
+ },
5967
+ { cwd },
5968
+ );
5969
+ assert.equal((result.outputJson as { decision?: string } | null)?.decision, "block", command);
5970
+ }
5971
+ } finally {
5972
+ await rm(cwd, { recursive: true, force: true });
5973
+ }
5974
+ });
5975
+
5852
5976
  it("allows implementation tools after an explicit deep-interview handoff deactivates the mode", async () => {
5853
5977
  const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-pretool-deep-interview-handoff-"));
5854
5978
  try {
@@ -8341,6 +8465,52 @@ exit 0
8341
8465
  }
8342
8466
  });
8343
8467
 
8468
+ it("suppresses parent Autopilot Stop continuation in side conversations", async () => {
8469
+ const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-stop-autopilot-side-conversation-"));
8470
+ try {
8471
+ const stateDir = join(cwd, ".omx", "state");
8472
+ const sessionId = "sess-stop-autopilot-side-conversation";
8473
+ const transcriptPath = join(cwd, "side-conversation-rollout.jsonl");
8474
+ await mkdir(join(stateDir, "sessions", sessionId), { recursive: true });
8475
+ await writeJson(join(stateDir, "sessions", sessionId, "autopilot-state.json"), {
8476
+ active: true,
8477
+ mode: "autopilot",
8478
+ current_phase: "deep-interview",
8479
+ });
8480
+ await writeFile(
8481
+ transcriptPath,
8482
+ `${JSON.stringify({
8483
+ type: "message",
8484
+ role: "user",
8485
+ content: [
8486
+ "Side conversation boundary.",
8487
+ "Everything before this boundary is inherited history from the parent thread. It is reference context only. It is not your current task.",
8488
+ "Only messages submitted after this boundary are active user instructions for this side conversation.",
8489
+ "You are a side-conversation assistant, separate from the main thread.",
8490
+ ].join("\n\n"),
8491
+ })}\n`,
8492
+ "utf-8",
8493
+ );
8494
+
8495
+ const result = await dispatchCodexNativeHook(
8496
+ {
8497
+ hook_event_name: "Stop",
8498
+ cwd,
8499
+ session_id: sessionId,
8500
+ thread_id: "thread-stop-autopilot-side-conversation",
8501
+ transcript_path: transcriptPath,
8502
+ last_assistant_message: "Waiting for a new side-conversation question.",
8503
+ },
8504
+ { cwd },
8505
+ );
8506
+
8507
+ assert.equal(result.omxEventName, "stop");
8508
+ assert.equal(result.outputJson, null);
8509
+ } finally {
8510
+ await rm(cwd, { recursive: true, force: true });
8511
+ }
8512
+ });
8513
+
8344
8514
  it("requires Autopilot code review after a compact-boundary Stop exemption", async () => {
8345
8515
  const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-stop-autopilot-review-compact-"));
8346
8516
  try {
@@ -9082,7 +9252,7 @@ exit 0
9082
9252
  }
9083
9253
  });
9084
9254
 
9085
- it("queues worker Stop leader nudge with Tab and submit when leader pane is busy", async () => {
9255
+ it("steers worker Stop leader nudge directly when leader pane is busy", async () => {
9086
9256
  const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-stop-team-worker-busy-leader-"));
9087
9257
  const prevTeamWorker = process.env.OMX_TEAM_WORKER;
9088
9258
  const prevTeamStateRoot = process.env.OMX_TEAM_STATE_ROOT;
@@ -9155,14 +9325,11 @@ exit 0
9155
9325
  assert.equal(result.outputJson, null);
9156
9326
  const tmuxLog = await readFile(tmuxLogPath, "utf-8");
9157
9327
  assert.match(tmuxLog, /send-keys -t %42 -l \[OMX\] worker-1 native Stop allowed/);
9158
- assert.match(tmuxLog, /send-keys -t %42 Tab/);
9159
- assert.match(tmuxLog, /send-keys -t %42 C-m/);
9160
- assert.ok(
9161
- tmuxLog.indexOf("send-keys -t %42 Tab") < tmuxLog.indexOf("send-keys -t %42 C-m"),
9162
- "busy worker-stop nudge should press Tab before C-m",
9163
- );
9328
+ assert.doesNotMatch(tmuxLog, /send-keys -t %42 Tab/);
9329
+ const submits = tmuxLog.match(/send-keys -t %42 C-m/g) || [];
9330
+ assert.equal(submits.length, 2, "busy worker-stop nudge should submit directly as steering, not queue via Tab");
9164
9331
  const nudgeState = JSON.parse(await readFile(join(workerDir, "worker-stop-nudge.json"), "utf-8"));
9165
- assert.equal(nudgeState.delivery, "queued");
9332
+ assert.equal(nudgeState.delivery, "steered");
9166
9333
  } finally {
9167
9334
  if (typeof prevTeamWorker === "string") process.env.OMX_TEAM_WORKER = prevTeamWorker;
9168
9335
  else delete process.env.OMX_TEAM_WORKER;
@@ -9298,6 +9465,14 @@ exit 0
9298
9465
  });
9299
9466
  assert.equal(shutdownResult.result, "team_state_gone_or_shutdown");
9300
9467
  assert.equal(existsSync(join(stateDir, "team", "shutdown-team", "worker-stop-nudge.json")), false);
9468
+ const deliveryLogPath = join(logsDir, `team-delivery-${new Date().toISOString().split("T")[0]}.jsonl`);
9469
+ const deliveryEvents = (await readFile(deliveryLogPath, "utf-8"))
9470
+ .trim()
9471
+ .split("\n")
9472
+ .map((line) => JSON.parse(line));
9473
+ const suppressedEvents = deliveryEvents.filter((event) => event.reason === "team_state_gone_or_shutdown");
9474
+ assert.equal(suppressedEvents.length, 2, "late closed-team Stop nudges should be diagnostics, not queued prompts");
9475
+ assert.equal(suppressedEvents.every((event) => event.result === "suppressed" && event.transport === "none"), true);
9301
9476
  } finally {
9302
9477
  await rm(cwd, { recursive: true, force: true });
9303
9478
  }
@@ -9338,13 +9513,13 @@ exit 0
9338
9513
  workerContext: { teamName, workerName: "worker-2" },
9339
9514
  });
9340
9515
 
9341
- assert.equal(result.result, "queued");
9516
+ assert.equal(result.result, "steered");
9342
9517
  const tmuxLog = await readFile(tmuxLogPath, "utf-8");
9343
9518
  assert.match(tmuxLog, /send-keys -t %42 -l \[OMX\] worker-2 native Stop allowed/);
9344
- assert.match(tmuxLog, /send-keys -t %42 Tab/);
9519
+ assert.doesNotMatch(tmuxLog, /send-keys -t %42 Tab/);
9345
9520
  const teamNudgeState = JSON.parse(await readFile(join(teamDir, "worker-stop-nudge.json"), "utf-8"));
9346
9521
  assert.equal(teamNudgeState.worker, "worker-2");
9347
- assert.equal(teamNudgeState.delivery, "queued");
9522
+ assert.equal(teamNudgeState.delivery, "steered");
9348
9523
  } finally {
9349
9524
  if (typeof prevPath === "string") process.env.PATH = prevPath;
9350
9525
  else delete process.env.PATH;
@@ -9477,6 +9652,21 @@ exit 0
9477
9652
  assert.equal(existsSync(teamDir), false, "deferred worker Stop recording must not recreate removed team state");
9478
9653
  const tmuxLog = await readFile(tmuxLogPath, "utf-8");
9479
9654
  assert.doesNotMatch(tmuxLog, /send-keys -t %42 -l \[OMX\] worker-1 native Stop allowed/);
9655
+ const deliveryLogPath = join(logsDir, `team-delivery-${new Date().toISOString().split("T")[0]}.jsonl`);
9656
+ const deliveryEvents = (await readFile(deliveryLogPath, "utf-8"))
9657
+ .trim()
9658
+ .split("\n")
9659
+ .map((line) => JSON.parse(line));
9660
+ assert.equal(
9661
+ deliveryEvents.some((event) =>
9662
+ event.team === teamName
9663
+ && event.result === "suppressed"
9664
+ && event.transport === "none"
9665
+ && event.reason === "team_state_gone_or_shutdown"
9666
+ ),
9667
+ true,
9668
+ "teardown-race worker Stop nudges should be diagnostic suppression events, not queued prompts",
9669
+ );
9480
9670
  } finally {
9481
9671
  if (typeof prevPath === "string") process.env.PATH = prevPath;
9482
9672
  else delete process.env.PATH;
@@ -5,7 +5,7 @@ import { join } from 'node:path';
5
5
  import { describe, it } from 'node:test';
6
6
  import assert from 'node:assert/strict';
7
7
 
8
- function runCompiledRunner(root: string, envOverrides: Record<string, string> = {}, timeoutMs = 5_000) {
8
+ function runCompiledRunner(root: string, envOverrides: Record<string, string> = {}, timeoutMs = 15_000) {
9
9
  return spawnSync(process.execPath, ['dist/scripts/run-test-files.js', root], {
10
10
  cwd: process.cwd(),
11
11
  encoding: 'utf-8',
@@ -79,7 +79,6 @@ describe('run-test-files diagnostics', () => {
79
79
  }
80
80
  });
81
81
 
82
-
83
82
  it('script-level force exit terminates a completed test child that blocks process exit', () => {
84
83
  const wd = mkdtempSync(join(tmpdir(), 'omx-run-test-files-'));
85
84
  try {
@@ -181,6 +180,33 @@ describe('run-test-files diagnostics', () => {
181
180
  }
182
181
  });
183
182
 
183
+ it('applies the runner timeout per test file instead of skipping later files after cumulative runtime', () => {
184
+ const wd = mkdtempSync(join(tmpdir(), 'omx-run-test-files-'));
185
+ try {
186
+ const testsDir = join(wd, '__tests__');
187
+ mkdirSync(testsDir, { recursive: true });
188
+ for (const name of ['a-slow-pass.test.js', 'b-slow-pass.test.js']) {
189
+ writeFileSync(
190
+ join(testsDir, name),
191
+ [
192
+ "import { test } from 'node:test';",
193
+ "test('passes after a short delay', async () => {",
194
+ " await new Promise((resolve) => setTimeout(resolve, 450));",
195
+ "});",
196
+ '',
197
+ ].join('\n'),
198
+ );
199
+ }
200
+
201
+ const result = runCompiledRunner(wd, { OMX_NODE_TEST_RUNNER_TIMEOUT_MS: '750' }, 3_000);
202
+
203
+ assert.equal(result.status, 0, result.stderr || result.stdout);
204
+ assert.doesNotMatch(result.stderr, /timeout before/);
205
+ } finally {
206
+ rmSync(wd, { recursive: true, force: true });
207
+ }
208
+ });
209
+
184
210
  it('logs that per-test timeout is disabled by default', () => {
185
211
  const wd = mkdtempSync(join(tmpdir(), 'omx-run-test-files-'));
186
212
  try {
@@ -204,6 +230,29 @@ describe('run-test-files diagnostics', () => {
204
230
  }
205
231
  });
206
232
 
233
+ it('serializes local test files by default to avoid runaway full-suite fan-out', () => {
234
+ const wd = mkdtempSync(join(tmpdir(), 'omx-run-test-files-'));
235
+ try {
236
+ const testsDir = join(wd, '__tests__');
237
+ mkdirSync(testsDir, { recursive: true });
238
+ writeFileSync(
239
+ join(testsDir, 'pass.test.js'),
240
+ [
241
+ "import { test } from 'node:test';",
242
+ "test('passes', () => {});",
243
+ '',
244
+ ].join('\n'),
245
+ );
246
+
247
+ const result = runCompiledRunner(wd, { CI: 'false', GITHUB_ACTIONS: 'false' });
248
+
249
+ assert.equal(result.status, 0, result.stderr || result.stdout);
250
+ assert.match(result.stderr, /test concurrency 1/);
251
+ } finally {
252
+ rmSync(wd, { recursive: true, force: true });
253
+ }
254
+ });
255
+
207
256
  it('serializes test files by default in CI to avoid cross-file child-process leaks', () => {
208
257
  const wd = mkdtempSync(join(tmpdir(), 'omx-run-test-files-'));
209
258
  try {
@@ -250,6 +299,40 @@ describe('run-test-files diagnostics', () => {
250
299
  }
251
300
  });
252
301
 
302
+ it('isolates process env mutations between test files', () => {
303
+ const wd = mkdtempSync(join(tmpdir(), 'omx-run-test-files-'));
304
+ try {
305
+ const testsDir = join(wd, '__tests__');
306
+ mkdirSync(testsDir, { recursive: true });
307
+ writeFileSync(
308
+ join(testsDir, 'a-mutate-env.test.js'),
309
+ [
310
+ "import { test } from 'node:test';",
311
+ "test('mutates process env', () => { process.env.OMX_TEST_FILE_LEAK = 'leaked'; });",
312
+ '',
313
+ ].join('\n'),
314
+ );
315
+ writeFileSync(
316
+ join(testsDir, 'b-observe-env.test.js'),
317
+ [
318
+ "import { test } from 'node:test';",
319
+ "import assert from 'node:assert/strict';",
320
+ "test('does not inherit prior file env mutation', () => {",
321
+ " assert.equal(process.env.OMX_TEST_FILE_LEAK, undefined);",
322
+ "});",
323
+ '',
324
+ ].join('\n'),
325
+ );
326
+
327
+ const result = runCompiledRunner(wd);
328
+
329
+ assert.equal(result.status, 0, result.stderr || result.stdout);
330
+ assert.match(result.stderr, /per-file process isolation/);
331
+ } finally {
332
+ rmSync(wd, { recursive: true, force: true });
333
+ }
334
+ });
335
+
253
336
  it('sanitizes live OMX runtime state env from child test processes by default', () => {
254
337
  const wd = mkdtempSync(join(tmpdir(), 'omx-run-test-files-'));
255
338
  try {
@@ -265,8 +348,16 @@ describe('run-test-files diagnostics', () => {
265
348
  " assert.equal(process.env.OMX_STATE_ROOT, undefined);",
266
349
  " assert.equal(process.env.OMX_TEAM_STATE_ROOT, undefined);",
267
350
  " assert.equal(process.env.OMX_SESSION_ID, undefined);",
351
+ " assert.equal(process.env.OMX_RUNS_DIR, undefined);",
352
+ " assert.equal(process.env.OMXBOX_ACTIVE, undefined);",
353
+ " assert.equal(process.env.OMX_MADMAX_DETACHED_CONTEXT, undefined);",
354
+ " assert.equal(process.env.OMX_DEFAULT_STANDARD_MODEL, undefined);",
355
+ " assert.equal(process.env.USE_OMX_EXPLORE_CMD, undefined);",
268
356
  " assert.equal(process.env.CODEX_SESSION_ID, undefined);",
357
+ " assert.equal(process.env.CODEX_HOME, undefined);",
269
358
  " assert.equal(process.env.SESSION_ID, undefined);",
359
+ " assert.equal(process.env.TMUX, undefined);",
360
+ " assert.equal(process.env.TMUX_PANE, undefined);",
270
361
  "});",
271
362
  '',
272
363
  ].join('\n'),
@@ -277,8 +368,49 @@ describe('run-test-files diagnostics', () => {
277
368
  OMX_STATE_ROOT: '/tmp/live-omx-state-root',
278
369
  OMX_TEAM_STATE_ROOT: '/tmp/live-team-state-root',
279
370
  OMX_SESSION_ID: 'live-omx-session',
371
+ OMX_RUNS_DIR: '/tmp/live-omx-runs',
372
+ OMXBOX_ACTIVE: '1',
373
+ OMX_MADMAX_DETACHED_CONTEXT: 'live-context',
374
+ OMX_DEFAULT_STANDARD_MODEL: 'ambient-model',
375
+ USE_OMX_EXPLORE_CMD: '1',
280
376
  CODEX_SESSION_ID: 'live-codex-session',
377
+ CODEX_HOME: '/tmp/live-codex-home',
281
378
  SESSION_ID: 'live-shell-session',
379
+ TMUX: '/tmp/live-tmux,1,2',
380
+ TMUX_PANE: '%live',
381
+ });
382
+
383
+ assert.equal(result.status, 0, result.stderr || result.stdout);
384
+ } finally {
385
+ rmSync(wd, { recursive: true, force: true });
386
+ }
387
+ });
388
+
389
+ it('preserves explicit test-runner controls and explore harness override while scrubbing live runtime env', () => {
390
+ const wd = mkdtempSync(join(tmpdir(), 'omx-run-test-files-'));
391
+ try {
392
+ const testsDir = join(wd, '__tests__');
393
+ mkdirSync(testsDir, { recursive: true });
394
+ writeFileSync(
395
+ join(testsDir, 'env-allowlist.test.js'),
396
+ [
397
+ "import { test } from 'node:test';",
398
+ "import assert from 'node:assert/strict';",
399
+ "test('runner env allowlist is narrow', () => {",
400
+ " assert.equal(process.env.OMX_EXPLORE_BIN, '/tmp/fake-explore');",
401
+ " assert.equal(process.env.OMX_NODE_TEST_CONCURRENCY, '1');",
402
+ " assert.equal(process.env.OMX_ROOT, undefined);",
403
+ " assert.equal(process.env.CODEX_HOME, undefined);",
404
+ "});",
405
+ '',
406
+ ].join('\n'),
407
+ );
408
+
409
+ const result = runCompiledRunner(wd, {
410
+ OMX_EXPLORE_BIN: '/tmp/fake-explore',
411
+ OMX_NODE_TEST_CONCURRENCY: '1',
412
+ OMX_ROOT: '/tmp/live-omx-root',
413
+ CODEX_HOME: '/tmp/live-codex-home',
282
414
  });
283
415
 
284
416
  assert.equal(result.status, 0, result.stderr || result.stdout);
@@ -300,6 +432,8 @@ describe('run-test-files diagnostics', () => {
300
432
  "test('runtime env is preserved', () => {",
301
433
  " assert.equal(process.env.OMX_ROOT, '/tmp/live-omx-root');",
302
434
  " assert.equal(process.env.OMX_SESSION_ID, 'live-omx-session');",
435
+ " assert.equal(process.env.USE_OMX_EXPLORE_CMD, '1');",
436
+ " assert.equal(process.env.CODEX_HOME, '/tmp/live-codex-home');",
303
437
  "});",
304
438
  '',
305
439
  ].join('\n'),
@@ -309,6 +443,8 @@ describe('run-test-files diagnostics', () => {
309
443
  OMX_NODE_TEST_PRESERVE_RUNTIME_ENV: '1',
310
444
  OMX_ROOT: '/tmp/live-omx-root',
311
445
  OMX_SESSION_ID: 'live-omx-session',
446
+ USE_OMX_EXPLORE_CMD: '1',
447
+ CODEX_HOME: '/tmp/live-codex-home',
312
448
  });
313
449
 
314
450
  assert.equal(result.status, 0, result.stderr || result.stdout);