npm - oh-my-codex - Versions diffs - 0.18.9 → 0.18.10 - Mend

oh-my-codex 0.18.9 → 0.18.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (163) hide show

package/Cargo.lock +6 -6
package/Cargo.toml +1 -1
package/README.md +4 -0
package/dist/autopilot/__tests__/deep-interview-gate.test.d.ts +2 -0
package/dist/autopilot/__tests__/deep-interview-gate.test.d.ts.map +1 -0
package/dist/autopilot/__tests__/deep-interview-gate.test.js +215 -0
package/dist/autopilot/__tests__/deep-interview-gate.test.js.map +1 -0
package/dist/autopilot/__tests__/ralplan-gate.test.js +148 -0
package/dist/autopilot/__tests__/ralplan-gate.test.js.map +1 -1
package/dist/autopilot/deep-interview-gate.d.ts.map +1 -1
package/dist/autopilot/deep-interview-gate.js +140 -0
package/dist/autopilot/deep-interview-gate.js.map +1 -1
package/dist/cli/__tests__/auth.test.js +36 -3
package/dist/cli/__tests__/auth.test.js.map +1 -1
package/dist/cli/__tests__/codex-feature-probe.test.d.ts +2 -0
package/dist/cli/__tests__/codex-feature-probe.test.d.ts.map +1 -0
package/dist/cli/__tests__/codex-feature-probe.test.js +46 -0
package/dist/cli/__tests__/codex-feature-probe.test.js.map +1 -0
package/dist/cli/__tests__/doctor-warning-copy.test.js +2 -0
package/dist/cli/__tests__/doctor-warning-copy.test.js.map +1 -1
package/dist/cli/__tests__/index.test.js +251 -5
package/dist/cli/__tests__/index.test.js.map +1 -1
package/dist/cli/__tests__/launch-fallback.test.js +19 -5
package/dist/cli/__tests__/launch-fallback.test.js.map +1 -1
package/dist/cli/__tests__/package-bin-contract.test.js +19 -6
package/dist/cli/__tests__/package-bin-contract.test.js.map +1 -1
package/dist/cli/__tests__/setup-refresh.test.js +6 -2
package/dist/cli/__tests__/setup-refresh.test.js.map +1 -1
package/dist/cli/__tests__/sparkshell-packaging.test.js +45 -2
package/dist/cli/__tests__/sparkshell-packaging.test.js.map +1 -1
package/dist/cli/__tests__/team-decompose.test.js +10 -5
package/dist/cli/__tests__/team-decompose.test.js.map +1 -1
package/dist/cli/__tests__/team.test.js +45 -1
package/dist/cli/__tests__/team.test.js.map +1 -1
package/dist/cli/__tests__/ultragoal.test.js +75 -0
package/dist/cli/__tests__/ultragoal.test.js.map +1 -1
package/dist/cli/auth.d.ts.map +1 -1
package/dist/cli/auth.js +25 -1
package/dist/cli/auth.js.map +1 -1
package/dist/cli/codex-feature-probe.d.ts +5 -2
package/dist/cli/codex-feature-probe.d.ts.map +1 -1
package/dist/cli/codex-feature-probe.js +25 -9
package/dist/cli/codex-feature-probe.js.map +1 -1
package/dist/cli/index.d.ts +28 -2
package/dist/cli/index.d.ts.map +1 -1
package/dist/cli/index.js +149 -88
package/dist/cli/index.js.map +1 -1
package/dist/cli/setup.d.ts.map +1 -1
package/dist/cli/setup.js +9 -1
package/dist/cli/setup.js.map +1 -1
package/dist/cli/team.d.ts +4 -0
package/dist/cli/team.d.ts.map +1 -1
package/dist/cli/team.js +43 -4
package/dist/cli/team.js.map +1 -1
package/dist/cli/ultragoal.d.ts.map +1 -1
package/dist/cli/ultragoal.js +29 -0
package/dist/cli/ultragoal.js.map +1 -1
package/dist/hooks/__tests__/agents-overlay.test.js +1 -0
package/dist/hooks/__tests__/agents-overlay.test.js.map +1 -1
package/dist/hooks/__tests__/autopilot-skill-contract.test.js +15 -0
package/dist/hooks/__tests__/autopilot-skill-contract.test.js.map +1 -1
package/dist/hooks/__tests__/deep-interview-contract.test.js +16 -0
package/dist/hooks/__tests__/deep-interview-contract.test.js.map +1 -1
package/dist/hooks/__tests__/skill-guidance-contract.test.js +14 -5
package/dist/hooks/__tests__/skill-guidance-contract.test.js.map +1 -1
package/dist/hooks/agents-overlay.d.ts.map +1 -1
package/dist/hooks/agents-overlay.js +2 -1
package/dist/hooks/agents-overlay.js.map +1 -1
package/dist/hooks/extensibility/__tests__/plugin-runner.test.js +112 -1
package/dist/hooks/extensibility/__tests__/plugin-runner.test.js.map +1 -1
package/dist/hooks/extensibility/plugin-runner-stdin.d.ts +2 -0
package/dist/hooks/extensibility/plugin-runner-stdin.d.ts.map +1 -0
package/dist/hooks/extensibility/plugin-runner-stdin.js +16 -0
package/dist/hooks/extensibility/plugin-runner-stdin.js.map +1 -0
package/dist/hooks/extensibility/plugin-runner.js +2 -4
package/dist/hooks/extensibility/plugin-runner.js.map +1 -1
package/dist/hud/__tests__/index.test.js +23 -2
package/dist/hud/__tests__/index.test.js.map +1 -1
package/dist/hud/__tests__/reconcile.test.js +266 -0
package/dist/hud/__tests__/reconcile.test.js.map +1 -1
package/dist/hud/__tests__/tmux.test.js +118 -7
package/dist/hud/__tests__/tmux.test.js.map +1 -1
package/dist/hud/index.d.ts +6 -1
package/dist/hud/index.d.ts.map +1 -1
package/dist/hud/index.js +12 -3
package/dist/hud/index.js.map +1 -1
package/dist/hud/reconcile.d.ts +6 -2
package/dist/hud/reconcile.d.ts.map +1 -1
package/dist/hud/reconcile.js +58 -28
package/dist/hud/reconcile.js.map +1 -1
package/dist/hud/tmux.d.ts +14 -1
package/dist/hud/tmux.d.ts.map +1 -1
package/dist/hud/tmux.js +129 -15
package/dist/hud/tmux.js.map +1 -1
package/dist/ralplan/consensus-gate.js +9 -1
package/dist/ralplan/consensus-gate.js.map +1 -1
package/dist/scripts/__tests__/codex-native-hook.test.js +168 -15
package/dist/scripts/__tests__/codex-native-hook.test.js.map +1 -1
package/dist/scripts/__tests__/run-test-files.test.js +115 -1
package/dist/scripts/__tests__/run-test-files.test.js.map +1 -1
package/dist/scripts/codex-native-hook.d.ts.map +1 -1
package/dist/scripts/codex-native-hook.js +74 -11
package/dist/scripts/codex-native-hook.js.map +1 -1
package/dist/scripts/notify-hook/team-worker-stop.d.ts.map +1 -1
package/dist/scripts/notify-hook/team-worker-stop.js +54 -21
package/dist/scripts/notify-hook/team-worker-stop.js.map +1 -1
package/dist/scripts/run-test-files.js +218 -160
package/dist/scripts/run-test-files.js.map +1 -1
package/dist/state/__tests__/operations.test.js +463 -0
package/dist/state/__tests__/operations.test.js.map +1 -1
package/dist/team/__tests__/delivery-log.test.js +18 -0
package/dist/team/__tests__/delivery-log.test.js.map +1 -1
package/dist/team/__tests__/runtime.test.js +48 -0
package/dist/team/__tests__/runtime.test.js.map +1 -1
package/dist/team/__tests__/tmux-session.test.js +107 -0
package/dist/team/__tests__/tmux-session.test.js.map +1 -1
package/dist/team/__tests__/tmux-test-fixture.d.ts.map +1 -1
package/dist/team/__tests__/tmux-test-fixture.js +14 -2
package/dist/team/__tests__/tmux-test-fixture.js.map +1 -1
package/dist/team/__tests__/tmux-test-fixture.test.js +1 -0
package/dist/team/__tests__/tmux-test-fixture.test.js.map +1 -1
package/dist/team/__tests__/worker-bootstrap.test.js +54 -1
package/dist/team/__tests__/worker-bootstrap.test.js.map +1 -1
package/dist/team/delivery-log.d.ts +1 -1
package/dist/team/delivery-log.d.ts.map +1 -1
package/dist/team/delivery-log.js.map +1 -1
package/dist/team/repo-aware-decomposition.d.ts +4 -0
package/dist/team/repo-aware-decomposition.d.ts.map +1 -1
package/dist/team/repo-aware-decomposition.js.map +1 -1
package/dist/team/runtime.d.ts.map +1 -1
package/dist/team/runtime.js +78 -9
package/dist/team/runtime.js.map +1 -1
package/dist/team/tmux-session.d.ts +1 -0
package/dist/team/tmux-session.d.ts.map +1 -1
package/dist/team/tmux-session.js +16 -5
package/dist/team/tmux-session.js.map +1 -1
package/dist/team/ultragoal-context.d.ts +12 -0
package/dist/team/ultragoal-context.d.ts.map +1 -1
package/dist/team/ultragoal-context.js +32 -8
package/dist/team/ultragoal-context.js.map +1 -1
package/dist/utils/__tests__/paths.test.js +23 -0
package/dist/utils/__tests__/paths.test.js.map +1 -1
package/dist/utils/paths.d.ts.map +1 -1
package/dist/utils/paths.js +4 -2
package/dist/utils/paths.js.map +1 -1
package/dist/utils/toml.d.ts +4 -0
package/dist/utils/toml.d.ts.map +1 -0
package/dist/utils/toml.js +75 -0
package/dist/utils/toml.js.map +1 -0
package/package.json +1 -1
package/plugins/oh-my-codex/.codex-plugin/plugin.json +1 -1
package/plugins/oh-my-codex/skills/autopilot/SKILL.md +3 -0
package/plugins/oh-my-codex/skills/deep-interview/SKILL.md +34 -0
package/plugins/oh-my-codex/skills/ultrawork/SKILL.md +32 -17
package/skills/autopilot/SKILL.md +3 -0
package/skills/deep-interview/SKILL.md +34 -0
package/skills/ultrawork/SKILL.md +32 -17
package/src/scripts/__tests__/codex-native-hook.test.ts +216 -26
package/src/scripts/__tests__/run-test-files.test.ts +138 -2
package/src/scripts/codex-native-hook.ts +80 -10
package/src/scripts/notify-hook/team-worker-stop.ts +58 -18
package/src/scripts/run-test-files.ts +229 -150
package/templates/AGENTS.md +40 -199

package/skills/ultrawork/SKILL.md CHANGED Viewed

@@ -4,22 +4,23 @@ description: Parallel execution engine for high-throughput task completion
 ---
 <Purpose>
-Ultrawork is a parallel execution engine for high-throughput task completion. It is a component, not a standalone persistence mode: it provides parallelism, context discipline, and smart delegation guidance, but not Ralph's persistence loop, architect sign-off, or long-running completion guarantees.
+Ultrawork is a parallel execution engine for high-throughput task completion. It is a component, not a standalone persistence or verification mode: it provides parallelism, context discipline, and smart delegation guidance, but not durable goal tracking, Team's tmux worker lifecycle, Ralph's legacy persistence loop, architect sign-off, or long-running completion guarantees.
 </Purpose>
 <Use_When>
 - Multiple independent tasks can run simultaneously
 - User says "ulw", "ultrawork", or explicitly wants parallel execution
 - Task benefits from concurrent execution plus lightweight evidence before wrap-up
-- You need a direct-tool lane plus optional background evidence lanes without entering Ralph
+- You need a direct-tool lane plus optional background evidence lanes without entering Team or a durable goal workflow
 </Use_When>
 <Do_Not_Use_When>
-- Task requires guaranteed completion with persistence, architect verification, or deslop/reverification -- use `ralph` instead (Ralph includes ultrawork)
-- Task requires a full autonomous pipeline -- use `autopilot` instead (autopilot defaults to Ultragoal, with Team/parallel execution used only when needed)
-- There is only one sequential task with no parallelism opportunity -- execute directly or delegate to a single `executor`
+- Task needs durable goal tracking, ledger checkpoints, or resume across stories -- use `ultragoal` instead
+- Task needs coordinated tmux workers, shared task state, mailbox/dispatch coordination, or long-running parallel execution -- use `team` instead
+- Task requires a full autonomous pipeline -- use `autopilot` instead (default loop: `deep-interview -> ralplan -> ultragoal`, with `team` only when needed)
+- Task intentionally requires the legacy persistent single-owner completion/verification loop -- use `ralph` explicitly; do not present it as the default durable path
+- There is only one sequential task with no parallelism opportunity -- execute directly, use `ultragoal` for durable tracking, or delegate to a single `executor`
 - The request is still in plan-consensus mode -- keep planning artifacts in `ralplan` until execution is explicitly authorized
-- User needs session persistence for resume -- use `ralph`, which adds persistence on top of ultrawork
 </Do_Not_Use_When>
 <Why_This_Exists>
@@ -138,8 +139,12 @@ Why bad: No verification output, no acceptance evidence, and no manual QA note w
 </Examples>
 <Escalation_And_Stop_Conditions>
-- When ultrawork is invoked directly (not via Ralph), apply lightweight verification only -- build/typecheck passes when relevant, affected tests pass, and manual QA notes are captured when needed.
-- Ralph owns persistence, architect verification, deslop, and the full verified-completion promise. Do not claim those guarantees from direct ultrawork alone.
+- When ultrawork is invoked directly, apply lightweight verification only -- build/typecheck passes when relevant, affected tests pass, and manual QA notes are captured when needed.
+- Ultrawork does not own persistence, durable ledgers, architect verification, deslop, full QA, or the full verified-completion promise. Do not claim those guarantees from direct ultrawork alone.
+- Escalate to `ultragoal` when the work needs durable goal state, story checkpoints, or resume across implementation steps.
+- Escalate to `team` when the work needs coordinated tmux workers, shared task state, or durable multi-worker lifecycle control.
+- Escalate to explicitly requested `ralph` only for the supported legacy single-owner persistence/verification fallback.
+- Ralph owns persistence, architect verification, deslop, and the full verified-completion promise only when explicitly selected as the supported legacy fallback; direct ultrawork does not own those guarantees.
 - If a task fails repeatedly across retries, report the issue rather than retrying indefinitely.
 - Escalate to the user when tasks have unclear dependencies, conflicting requirements, or a materially branching acceptance target.
 </Escalation_And_Stop_Conditions>
@@ -159,17 +164,27 @@ Why bad: No verification output, no acceptance evidence, and no manual QA note w
 ## Relationship to Other Modes
 ```
-ralph (persistence + verified completion wrapper)
- \-- includes: ultrawork (this skill)
-     \-- provides: high-throughput execution + lightweight evidence
+ultrawork (this skill)
+ \-- provides: in-session parallel execution discipline + lightweight evidence
-autopilot (autonomous execution)
- \-- includes: ralph
-     \-- includes: ultrawork (this skill)
+ultragoal (durable goal execution)
+ \-- owns: goal ledger, checkpoints, resume across stories, final gate discipline
+ \-- may use: team for parallel lanes when a story benefits from coordinated workers
-ecomode (token efficiency)
- \-- modifies: ultrawork's model selection
+team (tmux coordinated execution)
+ \-- owns: worker panes, shared task state, mailbox/dispatch, lifecycle control
+ \-- can return: checkpoint-ready evidence to an Ultragoal leader
+autopilot (strict autonomous delivery loop)
+ \-- default flow: deep-interview -> ralplan -> ultragoal -> code-review -> ultraqa
+ \-- may use: team only when an Ultragoal story needs parallel execution
+ralph (supported legacy explicit fallback)
+ \-- owns: single-owner persistence loop + architect verification when intentionally selected
+ecomode (deprecated compatibility-only)
+ \-- do not route users there from ultrawork; it is not the current model-selection path
 ```
-Ultrawork is the parallelism and execution-discipline layer. Ralph adds persistence, architect verification, deslop, and retry-until-done behavior. Autopilot adds the broader autonomous lifecycle pipeline. Ecomode adjusts ultrawork's model routing to favor cheaper models.
+Ultrawork is the parallelism and execution-discipline layer. Ultragoal is the current default durable goal/ledger follow-up. Team is the coordinated tmux parallel runtime, often nested under an Ultragoal story when durable work needs multiple lanes. Autopilot orchestrates the full default lifecycle through deep-interview, ralplan, ultragoal, code-review, and ultraqa. Ralph remains active as an explicit legacy fallback for persistent single-owner verification, but it is not the recommended default durable path. Ecomode is deprecated compatibility-only and should not be advertised as the ultrawork model-selection route.
 </Advanced>

package/src/scripts/__tests__/codex-native-hook.test.ts CHANGED Viewed

@@ -441,23 +441,77 @@ describe("codex native hook dispatch", () => {
     );
   });
-  it("emits schema-safe JSON stdout when CLI stdin is malformed", () => {
-    const stdout = runNativeHookCli("{");
+  it("emits Stop-schema-safe block JSON when unidentifiable malformed stdin has native Stop runtime surface", async () => {
+    const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-cli-malformed-stop-surface-"));
+    try {
+      await mkdir(join(cwd, ".omx"), { recursive: true });
+      const result = spawnSync(process.execPath, [nativeHookScriptPath()], {
+        cwd,
+        input: "{",
+        encoding: "utf-8",
+        stdio: ["pipe", "pipe", "pipe"],
+      });
-    const output = parseSingleJsonStdout(stdout) as {
-      continue?: boolean;
-      stopReason?: string;
-      systemMessage?: string;
-      hookSpecificOutput?: unknown;
-    };
+      assert.equal(result.status, 0, result.stderr || result.stdout);
+      assert.equal(result.stderr, "");
+      const output = parseSingleJsonStdout(result.stdout) as {
+        decision?: string;
+        continue?: boolean;
+        reason?: string;
+        stopReason?: string;
+        systemMessage?: string;
+        hookSpecificOutput?: unknown;
+      };
-    assert.equal(output.continue, false);
-    assert.equal(output.stopReason, "native_hook_stdin_parse_error");
-    assert.equal(output.hookSpecificOutput, undefined);
-    assert.match(
-      String(output.systemMessage ?? ""),
-      /stdin JSON parsing failed inside codex-native-hook:/,
-    );
+      assert.equal(output.decision, "block");
+      assert.equal(output.continue, undefined);
+      assert.equal(
+        output.reason,
+        "OMX native hook received malformed JSON input. Preserve runtime state, inspect the emitting hook payload yourself, and retry with valid JSON.",
+      );
+      assert.equal(output.stopReason, "native_hook_stdin_parse_error");
+      assert.equal(output.hookSpecificOutput, undefined);
+      assert.match(
+        String(output.systemMessage ?? ""),
+        /stdin JSON parsing failed inside codex-native-hook:/,
+      );
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  });
+  it("preserves non-Stop fail-closed JSON when malformed stdin identifies a non-Stop hook", async () => {
+    const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-cli-malformed-nonstop-"));
+    try {
+      await mkdir(join(cwd, ".omx"), { recursive: true });
+      const result = spawnSync(process.execPath, [nativeHookScriptPath()], {
+        cwd,
+        input: '{hook_event_name:"PreToolUse",',
+        encoding: "utf-8",
+        stdio: ["pipe", "pipe", "pipe"],
+      });
+      assert.equal(result.status, 0, result.stderr || result.stdout);
+      assert.equal(result.stderr, "");
+      const output = parseSingleJsonStdout(result.stdout) as {
+        continue?: boolean;
+        decision?: string;
+        stopReason?: string;
+        systemMessage?: string;
+        hookSpecificOutput?: unknown;
+      };
+      assert.equal(output.continue, false);
+      assert.equal(output.decision, undefined);
+      assert.equal(output.stopReason, "native_hook_stdin_parse_error");
+      assert.equal(output.hookSpecificOutput, undefined);
+      assert.match(
+        String(output.systemMessage ?? ""),
+        /stdin JSON parsing failed inside codex-native-hook:/,
+      );
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
   });
   it("redacts unterminated prompt-like malformed stdin fields", async () => {
@@ -5849,6 +5903,76 @@ exit 0
     }
   });
+  it("allows null-device fd redirects while deep-interview blocks real Bash writes", async () => {
+    const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-pretool-deep-interview-null-redirect-"));
+    try {
+      const stateDir = join(cwd, ".omx", "state");
+      const sessionDir = join(stateDir, "sessions", "sess-di-null-redirect");
+      await mkdir(sessionDir, { recursive: true });
+      await writeJson(join(stateDir, "session.json"), { session_id: "sess-di-null-redirect", cwd });
+      await writeJson(join(sessionDir, "skill-active-state.json"), {
+        version: 1,
+        active: true,
+        skill: "deep-interview",
+        phase: "planning",
+        session_id: "sess-di-null-redirect",
+        active_skills: [{ skill: "deep-interview", phase: "planning", active: true, session_id: "sess-di-null-redirect" }],
+      });
+      await writeJson(join(sessionDir, "deep-interview-state.json"), {
+        active: true,
+        mode: "deep-interview",
+        current_phase: "intent-first",
+        session_id: "sess-di-null-redirect",
+      });
+      const allowedCommands = [
+        "find application -type d -name 'bug-tracking*' 2>/dev/null | head -20",
+        "find application -type d -name 'bug-tracking*' 2> /dev/null | head -20",
+        "find application -type d -name 'bug-tracking*' 2>NUL | head -20",
+        "find application -type d -name 'bug-tracking*' 1>/dev/null",
+        "find application -type d -name 'bug-tracking*' &>/dev/null",
+      ];
+      for (const [index, command] of allowedCommands.entries()) {
+        const result = await dispatchCodexNativeHook(
+          {
+            hook_event_name: "PreToolUse",
+            cwd,
+            session_id: "sess-di-null-redirect",
+            tool_name: "Bash",
+            tool_use_id: `tool-di-null-redirect-${index}`,
+            tool_input: { command },
+          },
+          { cwd },
+        );
+        assert.equal(result.outputJson, null, command);
+      }
+      const blockedCommands = [
+        "find application -type d -name 'bug-tracking*' 2>errors.log | head -20",
+        "find application -type d -name 'bug-tracking*' > /tmp/bug-tracking.txt",
+        "find application -type d -name 'bug-tracking*' | tee /dev/null",
+      ];
+      for (const [index, command] of blockedCommands.entries()) {
+        const result = await dispatchCodexNativeHook(
+          {
+            hook_event_name: "PreToolUse",
+            cwd,
+            session_id: "sess-di-null-redirect",
+            tool_name: "Bash",
+            tool_use_id: `tool-di-real-redirect-${index}`,
+            tool_input: { command },
+          },
+          { cwd },
+        );
+        assert.equal((result.outputJson as { decision?: string } | null)?.decision, "block", command);
+      }
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  });
   it("allows implementation tools after an explicit deep-interview handoff deactivates the mode", async () => {
     const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-pretool-deep-interview-handoff-"));
     try {
@@ -8341,6 +8465,52 @@ exit 0
     }
   });
+  it("suppresses parent Autopilot Stop continuation in side conversations", async () => {
+    const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-stop-autopilot-side-conversation-"));
+    try {
+      const stateDir = join(cwd, ".omx", "state");
+      const sessionId = "sess-stop-autopilot-side-conversation";
+      const transcriptPath = join(cwd, "side-conversation-rollout.jsonl");
+      await mkdir(join(stateDir, "sessions", sessionId), { recursive: true });
+      await writeJson(join(stateDir, "sessions", sessionId, "autopilot-state.json"), {
+        active: true,
+        mode: "autopilot",
+        current_phase: "deep-interview",
+      });
+      await writeFile(
+        transcriptPath,
+        `${JSON.stringify({
+          type: "message",
+          role: "user",
+          content: [
+            "Side conversation boundary.",
+            "Everything before this boundary is inherited history from the parent thread. It is reference context only. It is not your current task.",
+            "Only messages submitted after this boundary are active user instructions for this side conversation.",
+            "You are a side-conversation assistant, separate from the main thread.",
+          ].join("\n\n"),
+        })}\n`,
+        "utf-8",
+      );
+      const result = await dispatchCodexNativeHook(
+        {
+          hook_event_name: "Stop",
+          cwd,
+          session_id: sessionId,
+          thread_id: "thread-stop-autopilot-side-conversation",
+          transcript_path: transcriptPath,
+          last_assistant_message: "Waiting for a new side-conversation question.",
+        },
+        { cwd },
+      );
+      assert.equal(result.omxEventName, "stop");
+      assert.equal(result.outputJson, null);
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  });
   it("requires Autopilot code review after a compact-boundary Stop exemption", async () => {
     const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-stop-autopilot-review-compact-"));
     try {
@@ -9082,7 +9252,7 @@ exit 0
     }
   });
-  it("queues worker Stop leader nudge with Tab and submit when leader pane is busy", async () => {
+  it("steers worker Stop leader nudge directly when leader pane is busy", async () => {
     const cwd = await mkdtemp(join(tmpdir(), "omx-native-hook-stop-team-worker-busy-leader-"));
     const prevTeamWorker = process.env.OMX_TEAM_WORKER;
     const prevTeamStateRoot = process.env.OMX_TEAM_STATE_ROOT;
@@ -9155,14 +9325,11 @@ exit 0
       assert.equal(result.outputJson, null);
       const tmuxLog = await readFile(tmuxLogPath, "utf-8");
       assert.match(tmuxLog, /send-keys -t %42 -l \[OMX\] worker-1 native Stop allowed/);
-      assert.match(tmuxLog, /send-keys -t %42 Tab/);
-      assert.match(tmuxLog, /send-keys -t %42 C-m/);
-      assert.ok(
-        tmuxLog.indexOf("send-keys -t %42 Tab") < tmuxLog.indexOf("send-keys -t %42 C-m"),
-        "busy worker-stop nudge should press Tab before C-m",
-      );
+      assert.doesNotMatch(tmuxLog, /send-keys -t %42 Tab/);
+      const submits = tmuxLog.match(/send-keys -t %42 C-m/g) || [];
+      assert.equal(submits.length, 2, "busy worker-stop nudge should submit directly as steering, not queue via Tab");
       const nudgeState = JSON.parse(await readFile(join(workerDir, "worker-stop-nudge.json"), "utf-8"));
-      assert.equal(nudgeState.delivery, "queued");
+      assert.equal(nudgeState.delivery, "steered");
     } finally {
       if (typeof prevTeamWorker === "string") process.env.OMX_TEAM_WORKER = prevTeamWorker;
       else delete process.env.OMX_TEAM_WORKER;
@@ -9298,6 +9465,14 @@ exit 0
       });
       assert.equal(shutdownResult.result, "team_state_gone_or_shutdown");
       assert.equal(existsSync(join(stateDir, "team", "shutdown-team", "worker-stop-nudge.json")), false);
+      const deliveryLogPath = join(logsDir, `team-delivery-${new Date().toISOString().split("T")[0]}.jsonl`);
+      const deliveryEvents = (await readFile(deliveryLogPath, "utf-8"))
+        .trim()
+        .split("\n")
+        .map((line) => JSON.parse(line));
+      const suppressedEvents = deliveryEvents.filter((event) => event.reason === "team_state_gone_or_shutdown");
+      assert.equal(suppressedEvents.length, 2, "late closed-team Stop nudges should be diagnostics, not queued prompts");
+      assert.equal(suppressedEvents.every((event) => event.result === "suppressed" && event.transport === "none"), true);
     } finally {
       await rm(cwd, { recursive: true, force: true });
     }
@@ -9338,13 +9513,13 @@ exit 0
         workerContext: { teamName, workerName: "worker-2" },
       });
-      assert.equal(result.result, "queued");
+      assert.equal(result.result, "steered");
       const tmuxLog = await readFile(tmuxLogPath, "utf-8");
       assert.match(tmuxLog, /send-keys -t %42 -l \[OMX\] worker-2 native Stop allowed/);
-      assert.match(tmuxLog, /send-keys -t %42 Tab/);
+      assert.doesNotMatch(tmuxLog, /send-keys -t %42 Tab/);
       const teamNudgeState = JSON.parse(await readFile(join(teamDir, "worker-stop-nudge.json"), "utf-8"));
       assert.equal(teamNudgeState.worker, "worker-2");
-      assert.equal(teamNudgeState.delivery, "queued");
+      assert.equal(teamNudgeState.delivery, "steered");
     } finally {
       if (typeof prevPath === "string") process.env.PATH = prevPath;
       else delete process.env.PATH;
@@ -9477,6 +9652,21 @@ exit 0
       assert.equal(existsSync(teamDir), false, "deferred worker Stop recording must not recreate removed team state");
       const tmuxLog = await readFile(tmuxLogPath, "utf-8");
       assert.doesNotMatch(tmuxLog, /send-keys -t %42 -l \[OMX\] worker-1 native Stop allowed/);
+      const deliveryLogPath = join(logsDir, `team-delivery-${new Date().toISOString().split("T")[0]}.jsonl`);
+      const deliveryEvents = (await readFile(deliveryLogPath, "utf-8"))
+        .trim()
+        .split("\n")
+        .map((line) => JSON.parse(line));
+      assert.equal(
+        deliveryEvents.some((event) =>
+          event.team === teamName
+          && event.result === "suppressed"
+          && event.transport === "none"
+          && event.reason === "team_state_gone_or_shutdown"
+        ),
+        true,
+        "teardown-race worker Stop nudges should be diagnostic suppression events, not queued prompts",
+      );
     } finally {
       if (typeof prevPath === "string") process.env.PATH = prevPath;
       else delete process.env.PATH;

package/src/scripts/__tests__/run-test-files.test.ts CHANGED Viewed

@@ -5,7 +5,7 @@ import { join } from 'node:path';
 import { describe, it } from 'node:test';
 import assert from 'node:assert/strict';
-function runCompiledRunner(root: string, envOverrides: Record<string, string> = {}, timeoutMs = 5_000) {
+function runCompiledRunner(root: string, envOverrides: Record<string, string> = {}, timeoutMs = 15_000) {
   return spawnSync(process.execPath, ['dist/scripts/run-test-files.js', root], {
     cwd: process.cwd(),
     encoding: 'utf-8',
@@ -79,7 +79,6 @@ describe('run-test-files diagnostics', () => {
     }
   });
   it('script-level force exit terminates a completed test child that blocks process exit', () => {
     const wd = mkdtempSync(join(tmpdir(), 'omx-run-test-files-'));
     try {
@@ -181,6 +180,33 @@ describe('run-test-files diagnostics', () => {
     }
   });
+  it('applies the runner timeout per test file instead of skipping later files after cumulative runtime', () => {
+    const wd = mkdtempSync(join(tmpdir(), 'omx-run-test-files-'));
+    try {
+      const testsDir = join(wd, '__tests__');
+      mkdirSync(testsDir, { recursive: true });
+      for (const name of ['a-slow-pass.test.js', 'b-slow-pass.test.js']) {
+        writeFileSync(
+          join(testsDir, name),
+          [
+            "import { test } from 'node:test';",
+            "test('passes after a short delay', async () => {",
+            "  await new Promise((resolve) => setTimeout(resolve, 450));",
+            "});",
+            '',
+          ].join('\n'),
+        );
+      }
+      const result = runCompiledRunner(wd, { OMX_NODE_TEST_RUNNER_TIMEOUT_MS: '750' }, 3_000);
+      assert.equal(result.status, 0, result.stderr || result.stdout);
+      assert.doesNotMatch(result.stderr, /timeout before/);
+    } finally {
+      rmSync(wd, { recursive: true, force: true });
+    }
+  });
   it('logs that per-test timeout is disabled by default', () => {
     const wd = mkdtempSync(join(tmpdir(), 'omx-run-test-files-'));
     try {
@@ -204,6 +230,29 @@ describe('run-test-files diagnostics', () => {
     }
   });
+  it('serializes local test files by default to avoid runaway full-suite fan-out', () => {
+    const wd = mkdtempSync(join(tmpdir(), 'omx-run-test-files-'));
+    try {
+      const testsDir = join(wd, '__tests__');
+      mkdirSync(testsDir, { recursive: true });
+      writeFileSync(
+        join(testsDir, 'pass.test.js'),
+        [
+          "import { test } from 'node:test';",
+          "test('passes', () => {});",
+          '',
+        ].join('\n'),
+      );
+      const result = runCompiledRunner(wd, { CI: 'false', GITHUB_ACTIONS: 'false' });
+      assert.equal(result.status, 0, result.stderr || result.stdout);
+      assert.match(result.stderr, /test concurrency 1/);
+    } finally {
+      rmSync(wd, { recursive: true, force: true });
+    }
+  });
   it('serializes test files by default in CI to avoid cross-file child-process leaks', () => {
     const wd = mkdtempSync(join(tmpdir(), 'omx-run-test-files-'));
     try {
@@ -250,6 +299,40 @@ describe('run-test-files diagnostics', () => {
     }
   });
+  it('isolates process env mutations between test files', () => {
+    const wd = mkdtempSync(join(tmpdir(), 'omx-run-test-files-'));
+    try {
+      const testsDir = join(wd, '__tests__');
+      mkdirSync(testsDir, { recursive: true });
+      writeFileSync(
+        join(testsDir, 'a-mutate-env.test.js'),
+        [
+          "import { test } from 'node:test';",
+          "test('mutates process env', () => { process.env.OMX_TEST_FILE_LEAK = 'leaked'; });",
+          '',
+        ].join('\n'),
+      );
+      writeFileSync(
+        join(testsDir, 'b-observe-env.test.js'),
+        [
+          "import { test } from 'node:test';",
+          "import assert from 'node:assert/strict';",
+          "test('does not inherit prior file env mutation', () => {",
+          "  assert.equal(process.env.OMX_TEST_FILE_LEAK, undefined);",
+          "});",
+          '',
+        ].join('\n'),
+      );
+      const result = runCompiledRunner(wd);
+      assert.equal(result.status, 0, result.stderr || result.stdout);
+      assert.match(result.stderr, /per-file process isolation/);
+    } finally {
+      rmSync(wd, { recursive: true, force: true });
+    }
+  });
   it('sanitizes live OMX runtime state env from child test processes by default', () => {
     const wd = mkdtempSync(join(tmpdir(), 'omx-run-test-files-'));
     try {
@@ -265,8 +348,16 @@ describe('run-test-files diagnostics', () => {
           "  assert.equal(process.env.OMX_STATE_ROOT, undefined);",
           "  assert.equal(process.env.OMX_TEAM_STATE_ROOT, undefined);",
           "  assert.equal(process.env.OMX_SESSION_ID, undefined);",
+          "  assert.equal(process.env.OMX_RUNS_DIR, undefined);",
+          "  assert.equal(process.env.OMXBOX_ACTIVE, undefined);",
+          "  assert.equal(process.env.OMX_MADMAX_DETACHED_CONTEXT, undefined);",
+          "  assert.equal(process.env.OMX_DEFAULT_STANDARD_MODEL, undefined);",
+          "  assert.equal(process.env.USE_OMX_EXPLORE_CMD, undefined);",
           "  assert.equal(process.env.CODEX_SESSION_ID, undefined);",
+          "  assert.equal(process.env.CODEX_HOME, undefined);",
           "  assert.equal(process.env.SESSION_ID, undefined);",
+          "  assert.equal(process.env.TMUX, undefined);",
+          "  assert.equal(process.env.TMUX_PANE, undefined);",
           "});",
           '',
         ].join('\n'),
@@ -277,8 +368,49 @@ describe('run-test-files diagnostics', () => {
         OMX_STATE_ROOT: '/tmp/live-omx-state-root',
         OMX_TEAM_STATE_ROOT: '/tmp/live-team-state-root',
         OMX_SESSION_ID: 'live-omx-session',
+        OMX_RUNS_DIR: '/tmp/live-omx-runs',
+        OMXBOX_ACTIVE: '1',
+        OMX_MADMAX_DETACHED_CONTEXT: 'live-context',
+        OMX_DEFAULT_STANDARD_MODEL: 'ambient-model',
+        USE_OMX_EXPLORE_CMD: '1',
         CODEX_SESSION_ID: 'live-codex-session',
+        CODEX_HOME: '/tmp/live-codex-home',
         SESSION_ID: 'live-shell-session',
+        TMUX: '/tmp/live-tmux,1,2',
+        TMUX_PANE: '%live',
+      });
+      assert.equal(result.status, 0, result.stderr || result.stdout);
+    } finally {
+      rmSync(wd, { recursive: true, force: true });
+    }
+  });
+  it('preserves explicit test-runner controls and explore harness override while scrubbing live runtime env', () => {
+    const wd = mkdtempSync(join(tmpdir(), 'omx-run-test-files-'));
+    try {
+      const testsDir = join(wd, '__tests__');
+      mkdirSync(testsDir, { recursive: true });
+      writeFileSync(
+        join(testsDir, 'env-allowlist.test.js'),
+        [
+          "import { test } from 'node:test';",
+          "import assert from 'node:assert/strict';",
+          "test('runner env allowlist is narrow', () => {",
+          "  assert.equal(process.env.OMX_EXPLORE_BIN, '/tmp/fake-explore');",
+          "  assert.equal(process.env.OMX_NODE_TEST_CONCURRENCY, '1');",
+          "  assert.equal(process.env.OMX_ROOT, undefined);",
+          "  assert.equal(process.env.CODEX_HOME, undefined);",
+          "});",
+          '',
+        ].join('\n'),
+      );
+      const result = runCompiledRunner(wd, {
+        OMX_EXPLORE_BIN: '/tmp/fake-explore',
+        OMX_NODE_TEST_CONCURRENCY: '1',
+        OMX_ROOT: '/tmp/live-omx-root',
+        CODEX_HOME: '/tmp/live-codex-home',
       });
       assert.equal(result.status, 0, result.stderr || result.stdout);
@@ -300,6 +432,8 @@ describe('run-test-files diagnostics', () => {
           "test('runtime env is preserved', () => {",
           "  assert.equal(process.env.OMX_ROOT, '/tmp/live-omx-root');",
           "  assert.equal(process.env.OMX_SESSION_ID, 'live-omx-session');",
+          "  assert.equal(process.env.USE_OMX_EXPLORE_CMD, '1');",
+          "  assert.equal(process.env.CODEX_HOME, '/tmp/live-codex-home');",
           "});",
           '',
         ].join('\n'),
@@ -309,6 +443,8 @@ describe('run-test-files diagnostics', () => {
         OMX_NODE_TEST_PRESERVE_RUNTIME_ENV: '1',
         OMX_ROOT: '/tmp/live-omx-root',
         OMX_SESSION_ID: 'live-omx-session',
+        USE_OMX_EXPLORE_CMD: '1',
+        CODEX_HOME: '/tmp/live-codex-home',
       });
       assert.equal(result.status, 0, result.stderr || result.stdout);