npm - @pushpalsdev/cli - Versions diffs - 1.0.61 → 1.0.63 - Mend

@pushpalsdev/cli 1.0.61 → 1.0.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pushpalsdev/cli",
-  "version": "1.0.61",
+  "version": "1.0.63",
   "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
   "license": "MIT",
   "repository": {

package/runtime/prompts/workerpals/openai_codex_command_router_policy.md ADDED Viewed

@@ -0,0 +1,16 @@
+## Base Guidance
+Command-router policy: shell commands are allowed, but invoke the actual command directly instead of wrapping it with `/bin/bash -lc`, `bash -c`, `sh -lc`, `cmd /c`, `powershell -Command`, or `pwsh -Command`. If a wrapper command is rejected, rerun its inner command directly through the command tool.
+## Recovery Guidance
+Command-router recovery: the previous attempt retried disallowed shell wrappers.
+Retry once using shell commands normally, but invoke the inner command directly instead of wrapping it in `/bin/bash -lc`, `bash -c`, `sh -lc`, `cmd /c`, `powershell -Command`, or `pwsh -Command`.
+You are not limited to a fixed allowlist of commands. The constraint is only that command execution must target the actual program/argv directly rather than a wrapper shell.
+## Hard Recovery Guidance
+Command-router escalation: the previous retry still attempted disallowed shell wrappers.
+Do not invoke `bash`, `/bin/bash`, `sh`, `cmd`, `powershell`, `powershell.exe`, `pwsh`, or `pwsh.exe` as the command itself on this attempt.
+Your first command invocation on this retry must be one of the direct replacements listed below, with no wrapper shell around it.
+After you re-establish repo context, continue using ordinary shell commands directly without wrapper shells.
+## Rejection Detail
+Codex repeatedly attempted disallowed shell-wrapper commands that the command router rejected. Shell commands are allowed, but wrapper shells are not; invoke the inner command directly and avoid wrapper retries.

package/runtime/sandbox/.pushpals-remotebuddy-fallback.js CHANGED Viewed

@@ -4018,6 +4018,7 @@ var BREADTH_ORDER = {
 var IDEATION_SYSTEM_PROMPT = loadPromptTemplate("remotebuddy/autonomy_ideation_system_prompt.md").trim();
 var SCORING_SYSTEM_PROMPT = loadPromptTemplate("remotebuddy/autonomy_scoring_system_prompt.md").trim();
 var PLANNING_SYSTEM_PROMPT = loadPromptTemplate("remotebuddy/autonomy_planning_system_prompt.md").trim();
+var IDEATION_TIMEOUT_RECOVERY_INSTRUCTION = "Previous ideation timed out before you returned JSON. For this round only, stay within the time budget: prioritize the top 1-3 highest-confidence candidates, keep reasoning brief, avoid exhaustive exploration, and return valid JSON as soon as possible.";
 var VISION_DOC_FNAME = "vision.md";
 var MAX_VISION_SECTION_CHARS = 1200;
 var DOCS_MIN_IMPACT_SIGNAL_FOR_NO_PENALTY = 0.45;
@@ -5779,6 +5780,7 @@ class RemoteBuddyAutonomousEngine {
   baseBranch;
   llm;
   comm;
+  llmCfg;
   cfg;
   runtimeEnabled = true;
   timer = null;
@@ -5792,6 +5794,7 @@ class RemoteBuddyAutonomousEngine {
   lastOutcome = "none";
   lastDetail = "not_started";
   lastCompletedAtMs = 0;
+  pendingIdeationTimeoutRecovery = null;
   constructor(opts) {
     this.server = opts.server;
     this.sessionId = opts.sessionId;
@@ -5805,6 +5808,7 @@ class RemoteBuddyAutonomousEngine {
     this.baseBranch = String(opts.config.sourceControlManager.baseBranch || "main").trim() || "main";
     this.llm = opts.llm;
     this.comm = opts.comm;
+    this.llmCfg = opts.config.remotebuddy.llm;
     this.cfg = opts.config.remotebuddy.autonomy;
     this.runtimeEnabled = this.cfg.enabled;
   }
@@ -5859,10 +5863,30 @@ class RemoteBuddyAutonomousEngine {
     return headers;
   }
   lockTtlMs() {
-    return Math.max(this.cfg.tickIntervalMs * 3, this.cfg.ideationBudgetMs * 2 + this.cfg.llmTimeoutMs * 6, 30000);
+    const maxPhaseTimeoutMs = Math.max(this.phaseTimeoutMs("ideation"), this.phaseTimeoutMs("scoring"), this.phaseTimeoutMs("planning"));
+    return Math.max(this.cfg.tickIntervalMs * 3, this.cfg.ideationBudgetMs * 2 + maxPhaseTimeoutMs * 6, 30000);
   }
   cycleBudgetMs() {
-    return Math.max(this.cfg.ideationBudgetMs + this.cfg.llmTimeoutMs * 3, this.cfg.llmTimeoutMs * 4, 20000);
+    const ideationTimeoutMs = this.phaseTimeoutMs("ideation");
+    const scoringTimeoutMs = this.phaseTimeoutMs("scoring");
+    const planningTimeoutMs = this.phaseTimeoutMs("planning");
+    const maxPhaseTimeoutMs = Math.max(ideationTimeoutMs, scoringTimeoutMs, planningTimeoutMs);
+    return Math.max(this.cfg.ideationBudgetMs + ideationTimeoutMs + scoringTimeoutMs + planningTimeoutMs, maxPhaseTimeoutMs * 4, 20000);
+  }
+  phaseTimeoutMs(phase) {
+    const configuredTimeoutMs = Math.max(1000, this.cfg.llmTimeoutMs);
+    if (phase !== "ideation")
+      return configuredTimeoutMs;
+    if (String(this.llmCfg.backend || "").trim().toLowerCase() !== "openai_codex") {
+      return configuredTimeoutMs;
+    }
+    const codexTimeoutMs2 = Math.max(configuredTimeoutMs, this.llmCfg.codexTimeoutMs || 0);
+    return Math.min(codexTimeoutMs2, Math.max(configuredTimeoutMs, 90000));
+  }
+  consumeIdeationTimeoutRecovery() {
+    const recovery = this.pendingIdeationTimeoutRecovery;
+    this.pendingIdeationTimeoutRecovery = null;
+    return recovery;
   }
   loadVisionContext(runId) {
     const maxVisionContextChars = this.cfg.visionContextMaxChars;
@@ -6180,6 +6204,7 @@ class RemoteBuddyAutonomousEngine {
     }).catch(() => {});
   }
   async llmPhase(phase, runId, snapshotId, input, objectiveId) {
+    const timeoutMs = this.phaseTimeoutMs(phase);
     const requestPayload = {
       phase,
       system: input.system,
@@ -6188,10 +6213,30 @@ class RemoteBuddyAutonomousEngine {
       maxTokens: input.maxTokens ?? null,
       temperature: input.temperature ?? null
     };
+    const systemChars = input.system.length;
+    const messageChars = (input.messages ?? []).reduce((sum, message) => sum + (message.content?.length ?? 0), 0);
+    const requestBytes = Buffer.byteLength(JSON.stringify(requestPayload), "utf8");
     const startedAt = Date.now();
-    const output = await withTimeout(this.llm.generate(input), this.cfg.llmTimeoutMs, `autonomy ${phase} phase timeout`);
+    console.log(`[RemoteBuddyAutonomousEngine] ${phase} phase start: timeout_ms=${timeoutMs} system_chars=${systemChars} message_chars=${messageChars} request_bytes=${requestBytes} max_tokens=${input.maxTokens ?? "default"} temperature=${input.temperature ?? "default"}`);
+    let output;
+    try {
+      output = await withTimeout(this.llm.generate(input), timeoutMs, `autonomy ${phase} phase timeout`);
+    } catch (error) {
+      const elapsedMs = Date.now() - startedAt;
+      if (phase === "ideation" && error instanceof Error && error.message === "autonomy ideation phase timeout") {
+        this.pendingIdeationTimeoutRecovery = {
+          previousRunId: runId,
+          timedOutAt: new Date().toISOString(),
+          timeoutMs
+        };
+      }
+      console.warn(`[RemoteBuddyAutonomousEngine] ${phase} phase failed: elapsed_ms=${elapsedMs} timeout_ms=${timeoutMs} system_chars=${systemChars} message_chars=${messageChars} request_bytes=${requestBytes} error=${error instanceof Error ? error.message : String(error)}`);
+      throw error;
+    }
     const responseJson = parseJsonObject(output.text);
     const tokenUsage = output.usage ?? null;
+    const latencyMs = Date.now() - startedAt;
+    console.log(`[RemoteBuddyAutonomousEngine] ${phase} phase completed: elapsed_ms=${latencyMs} timeout_ms=${timeoutMs} response_chars=${output.text.length} prompt_tokens=${tokenUsage?.promptTokens ?? "unknown"} completion_tokens=${tokenUsage?.completionTokens ?? "unknown"}`);
     return {
       json: responseJson,
       llmCall: {
@@ -6211,11 +6256,11 @@ ${JSON.stringify(input.messages ?? [])}`),
         },
         modelId: "configured",
         temperature: input.temperature ?? null,
-        timeoutMs: this.cfg.llmTimeoutMs,
+        timeoutMs,
         response: responseJson,
         responseHash: sha256(output.text),
         tokenUsage,
-        latencyMs: Date.now() - startedAt
+        latencyMs
       }
     };
   }
@@ -6550,26 +6595,43 @@ ${JSON.stringify(input.messages ?? [])}`),
         return;
       }
       this.setPhase("ideation");
+      const ideationRecovery = this.consumeIdeationTimeoutRecovery();
+      if (ideationRecovery) {
+        console.warn(`[RemoteBuddyAutonomousEngine] tick ${runId}: applying one-shot ideation timeout recovery from ${ideationRecovery.previousRunId} after ${ideationRecovery.timeoutMs}ms timeout.`);
+      }
+      const ideationTopSignals = snapshot.top_signals.slice(0, ideationRecovery ? 10 : 16);
+      const ideationStateTraits = snapshot.state_traits.slice(0, ideationRecovery ? 14 : 24);
+      const ideationFeedbackPriors = snapshot.feedback_priors.slice(0, ideationRecovery ? 12 : 20);
+      const ideationEngineIdeaPriors = (snapshot.engine_idea_priors ?? []).slice(0, ideationRecovery ? 12 : 20);
+      const ideationOpenObjectives = snapshot.open_objectives.slice(0, ideationRecovery ? 12 : 20);
+      const ideationActiveCooldowns = snapshot.active_cooldowns.slice(0, ideationRecovery ? 12 : 20);
+      const ideationRepoTargets = repoTargets.slice(0, ideationRecovery ? 8 : repoTargets.length);
       const ideationPhase = await this.llmPhase("ideation", runId, snapshot.snapshot_id, {
         system: IDEATION_SYSTEM_PROMPT,
         json: true,
-        maxTokens: 2800,
+        maxTokens: ideationRecovery ? 1400 : 2800,
         temperature: 0.2,
         messages: [
+          ...ideationRecovery ? [
+            {
+              role: "user",
+              content: `${IDEATION_TIMEOUT_RECOVERY_INSTRUCTION} Previous timed-out run: ${ideationRecovery.previousRunId}. Timeout budget for this round: ${this.phaseTimeoutMs("ideation")}ms.`
+            }
+          ] : [],
           {
             role: "user",
             content: JSON.stringify({
               snapshot: {
                 snapshot_id: snapshot.snapshot_id,
-                top_signals: snapshot.top_signals.slice(0, 16),
-                state_traits: snapshot.state_traits.slice(0, 24),
-                feedback_priors: snapshot.feedback_priors.slice(0, 20),
-                engine_idea_priors: (snapshot.engine_idea_priors ?? []).slice(0, 20),
-                open_objectives: snapshot.open_objectives.slice(0, 20),
-                active_cooldowns: snapshot.active_cooldowns.slice(0, 20)
+                top_signals: ideationTopSignals,
+                state_traits: ideationStateTraits,
+                feedback_priors: ideationFeedbackPriors,
+                engine_idea_priors: ideationEngineIdeaPriors,
+                open_objectives: ideationOpenObjectives,
+                active_cooldowns: ideationActiveCooldowns
               },
               vision: visionContext,
-              repo_targets: repoTargets.map((target) => ({
+              repo_targets: ideationRepoTargets.map((target) => ({
                 component_area: target.component_area,
                 target_paths: target.target_paths,
                 write_globs: target.write_globs,

package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py CHANGED Viewed

@@ -55,6 +55,7 @@ _DEFAULT_TASK_SYSTEM_PROMPT_PATH = "workerpals/openai_codex_default_system_promp
 _MANDATORY_RUNTIME_POLICY_APPENDIX_PATH = "workerpals/openai_codex_runtime_policy_appendix.md"
 _INSTRUCTION_WRAPPER_PROMPT_PATH = "workerpals/openai_codex_instruction_wrapper.md"
 _SUPPLEMENTAL_GUIDANCE_SECTION_PATH = "workerpals/openai_codex_supplemental_guidance_section.md"
+_COMMAND_ROUTER_POLICY_PATH = "workerpals/openai_codex_command_router_policy.md"
 _CODEX_WORKAROUND_PATTERNS = (
     re.compile(
         r"\bcodex cli\b.{0,120}\b(isn't|is not|not)\b.{0,120}\bavailable\b.{0,120}\b(so|therefore|instead|fallback|workaround|without|using)\b",
@@ -94,12 +95,7 @@ _VALID_SANDBOX_POLICIES = {"read-only", "workspace-write", "danger-full-access"}
 _VALID_COLORS = {"always", "never", "auto"}
 _VALID_AUTH_MODES = {"auto", "api_key", "chatgpt"}
 _VALID_REASONING_EFFORTS = {"low", "medium", "high", "xhigh"}
-_DIRECT_COMMAND_POLICY_GUIDANCE = (
-    "Command-router policy: use direct commands only. Do not invoke `/bin/bash -lc`, `bash -c`, "
-    "`sh -lc`, `cmd /c`, `powershell -Command`, or `pwsh -Command`. Run the direct command "
-    "instead, such as `pwd`, `git status --porcelain`, `git diff -- path`, `ls dir`, "
-    "`cat file`, `sed -n '1,160p' file`, or `bun test <path>`."
-)
+_MAX_WRAPPER_RECOVERY_ATTEMPTS = 2
 def _model_supports_xhigh_reasoning(model: str) -> bool:
@@ -254,6 +250,79 @@ def _load_prompt_template(
     return _PROMPT_TOKEN_REGEX.sub(_replace, template)
+def _load_markdown_h2_section(relative_path: str, heading: str) -> str:
+    document = _load_prompt_template(relative_path)
+    if not document:
+        return ""
+    lines = document.splitlines()
+    needle = f"## {heading}".strip().lower()
+    start: Optional[int] = None
+    for idx, line in enumerate(lines):
+        if line.strip().lower() == needle:
+            start = idx + 1
+            break
+    if start is None:
+        return ""
+    collected: List[str] = []
+    for line in lines[start:]:
+        if line.startswith("## "):
+            break
+        collected.append(line)
+    return "\n".join(collected).strip()
+def _command_router_policy_guidance() -> str:
+    guidance = _load_markdown_h2_section(_COMMAND_ROUTER_POLICY_PATH, "Base Guidance")
+    if guidance:
+        return guidance
+    return (
+        "Command-router policy: shell commands are allowed, but invoke the actual command directly "
+        "instead of wrapping it with `/bin/bash -lc`, `bash -c`, `sh -lc`, `cmd /c`, "
+        "`powershell -Command`, or `pwsh -Command`. If a wrapper command is rejected, rerun its "
+        "inner command directly through the command tool."
+    )
+def _command_router_recovery_guidance() -> str:
+    guidance = _load_markdown_h2_section(_COMMAND_ROUTER_POLICY_PATH, "Recovery Guidance")
+    if guidance:
+        return guidance
+    return (
+        "Command-router recovery: the previous attempt retried disallowed shell wrappers.\n"
+        "Retry once using shell commands normally, but invoke the inner command directly instead of "
+        "wrapping it in `/bin/bash -lc`, `bash -c`, `sh -lc`, `cmd /c`, `powershell -Command`, or "
+        "`pwsh -Command`.\n"
+        "You are not limited to a fixed allowlist of commands. The constraint is only that command "
+        "execution must target the actual program/argv directly rather than a wrapper shell."
+    )
+def _command_router_hard_recovery_guidance() -> str:
+    guidance = _load_markdown_h2_section(_COMMAND_ROUTER_POLICY_PATH, "Hard Recovery Guidance")
+    if guidance:
+        return guidance
+    return (
+        "Command-router escalation: the previous retry still attempted disallowed shell wrappers.\n"
+        "Do not invoke `bash`, `/bin/bash`, `sh`, `cmd`, `powershell`, `powershell.exe`, `pwsh`, "
+        "or `pwsh.exe` as the command itself on this attempt.\n"
+        "Your first command invocation on this retry must be one of the direct replacements listed "
+        "below, with no wrapper shell around it.\n"
+        "After you re-establish repo context, continue using ordinary shell commands directly "
+        "without wrapper shells."
+    )
+def _command_router_rejection_detail_intro() -> str:
+    guidance = _load_markdown_h2_section(_COMMAND_ROUTER_POLICY_PATH, "Rejection Detail")
+    if guidance:
+        return guidance
+    return (
+        "Codex repeatedly attempted disallowed shell-wrapper commands that the command router "
+        "rejected. Shell commands are allowed, but wrapper shells are not; invoke the inner "
+        "command directly and avoid wrapper retries."
+    )
 def _to_positive_int(raw: str) -> Optional[int]:
     try:
         parsed = int(raw)
@@ -1013,7 +1082,7 @@ def _unwrap_shell_wrapper_command(command: str) -> str:
     return ""
-def _build_wrapper_recovery_guidance(rejected_commands: List[str]) -> str:
+def _build_wrapper_direct_replacements(rejected_commands: List[str]) -> List[str]:
     direct_equivalents: List[str] = []
     seen: set[str] = set()
     for command in rejected_commands:
@@ -1023,11 +1092,16 @@ def _build_wrapper_recovery_guidance(rejected_commands: List[str]) -> str:
             continue
         seen.add(lowered)
         direct_equivalents.append(f"- `{command}` -> `{direct}`")
+    return direct_equivalents
+def _build_wrapper_recovery_guidance(rejected_commands: List[str], *, hard: bool = False) -> str:
     guidance_lines = [
-        "Command-router recovery: the previous attempt retried disallowed shell wrappers.",
-        "Retry once using direct commands only. Do not use `/bin/bash -lc`, `bash -c`, `sh -lc`, `cmd /c`, `powershell -Command`, `pwsh -Command`, pipelines, or chained shell snippets.",
-        "If you need to inspect files or git state, run the direct command itself (for example `git diff --name-only`, `git status --porcelain`, `ls path`, `cat file`, or `sed -n '1,120p' file`).",
+        _command_router_hard_recovery_guidance()
+        if hard
+        else _command_router_recovery_guidance()
     ]
+    direct_equivalents = _build_wrapper_direct_replacements(rejected_commands)
     if direct_equivalents:
         guidance_lines.append("Use these direct replacements for the rejected commands:")
         guidance_lines.extend(direct_equivalents[:6])
@@ -1064,7 +1138,7 @@ def _augment_supplemental_guidance(supplemental_guidance: List[str]) -> List[str
     joined = "\n".join(normalized).lower()
     if "direct commands only" in joined or "shell-wrapper" in joined or "/bin/bash -lc" in joined:
         return normalized
-    return [_DIRECT_COMMAND_POLICY_GUIDANCE, *normalized]
+    return [_command_router_policy_guidance(), *normalized]
 def _read_text_if_exists(path: Path) -> str:
@@ -1466,11 +1540,20 @@ def _run_codex_task(
         log_git_status(repo, log)
         if command_policy_rejection_loop:
-            if wrapper_recovery_attempt < 1:
-                recovery_guidance = _build_wrapper_recovery_guidance(rejected_shell_wrappers)
+            if wrapper_recovery_attempt < _MAX_WRAPPER_RECOVERY_ATTEMPTS:
+                hard_recovery = wrapper_recovery_attempt >= 1
+                recovery_guidance = _build_wrapper_recovery_guidance(
+                    rejected_shell_wrappers,
+                    hard=hard_recovery,
+                )
                 if recovery_guidance:
                     log.warning(
-                        "Codex hit a shell-wrapper rejection loop; retrying once with direct-command recovery guidance."
+                        "Codex hit a shell-wrapper rejection loop; retrying once with "
+                        + (
+                            "strict no-wrapper recovery guidance."
+                            if hard_recovery
+                            else "direct-command recovery guidance."
+                        )
                     )
                     retry_result = _run_codex_task(
                         repo,
@@ -1480,19 +1563,19 @@ def _run_codex_task(
                         baseline_changes=baseline_snapshot,
                     )
                     retry_result["usage"] = _merge_usage_records(usage, retry_result.get("usage"))
-                    if retry_result.get("ok"):
+                    if wrapper_recovery_attempt == 0 and retry_result.get("ok"):
                         recovered_stdout = str(retry_result.get("stdout") or "").strip()
                         retry_result["stdout"] = _truncate(
                             (
-                                "Recovered after the first Codex attempt hit command-router shell-wrapper rejections.\n\n"
+                                "Recovered after Codex attempts hit command-router shell-wrapper rejections.\n\n"
                                 f"{recovered_stdout}"
                             ).strip()
                         )
-                    else:
+                    elif wrapper_recovery_attempt == 0:
                         retry_stderr = str(retry_result.get("stderr") or "").strip()
                         retry_result["stderr"] = _truncate(
                             (
-                                "The first Codex attempt hit command-router shell-wrapper rejections and was retried once with direct-command recovery guidance.\n\n"
+                                "Earlier Codex attempts hit command-router shell-wrapper rejections and were retried with stricter recovery guidance.\n\n"
                                 f"{retry_stderr}"
                             ).strip()
                         )
@@ -1503,8 +1586,7 @@ def _run_codex_task(
                 else "- (no command details captured)"
             )
             detail = (
-                "Codex repeatedly attempted disallowed shell-wrapper commands that the command "
-                "router rejected. Switch to direct commands only and avoid wrapper retries.\n"
+                f"{_command_router_rejection_detail_intro()}\n"
                 f"Rejected commands:\n{command_lines}"
             )
             if last_message:

package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py CHANGED Viewed

@@ -1,8 +1,11 @@
 import os
 import re
+import json
+import subprocess
 import sys
 import unittest
 import tempfile
+from unittest import mock
 from pathlib import Path
 _HERE = Path(__file__).resolve().parent
@@ -21,6 +24,8 @@ from executor_base import (
 from openai_codex_executor import (
     OpenAICodexRuntimeConfig,
     _augment_supplemental_guidance,
+    _build_wrapper_recovery_guidance,
+    _run_codex_task,
     _resolve_reasoning_effort,
     _build_instruction,
     _collect_disallowed_shell_wrapper_rejections,
@@ -266,10 +271,116 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
     def test_augments_guidance_with_direct_command_policy_once(self) -> None:
         guidance = _augment_supplemental_guidance(["Run bun test tests/example.test.ts"])
         self.assertGreaterEqual(len(guidance), 2)
-        self.assertIn("direct commands only", guidance[0].lower())
+        self.assertIn("shell commands are allowed", guidance[0].lower())
         guidance_again = _augment_supplemental_guidance(guidance)
         self.assertEqual(guidance_again, guidance)
+    def test_wrapper_recovery_guidance_allows_arbitrary_shell_commands_without_wrappers(self) -> None:
+        guidance = _build_wrapper_recovery_guidance(
+            ["/bin/bash -lc 'git status --porcelain'", "/bin/bash -lc pwd"]
+        )
+        lowered = guidance.lower()
+        self.assertIn("shell commands normally", lowered)
+        self.assertIn("not limited to a fixed allowlist", lowered)
+        self.assertIn("`/bin/bash -lc 'git status --porcelain'` -> `git status --porcelain`", guidance)
+    def test_wrapper_hard_recovery_guidance_requires_direct_replacements_first(self) -> None:
+        guidance = _build_wrapper_recovery_guidance(
+            ["/bin/bash -lc 'git status --porcelain'", "/bin/bash -lc pwd"],
+            hard=True,
+        )
+        lowered = guidance.lower()
+        self.assertIn("previous retry still attempted disallowed shell wrappers", lowered)
+        self.assertIn("do not invoke `bash`", lowered)
+        self.assertIn("first command invocation on this retry must be one of the direct replacements", lowered)
+        self.assertIn("`/bin/bash -lc 'git status --porcelain'` -> `git status --porcelain`", guidance)
+    def test_run_codex_task_escalates_wrapper_recovery_and_recovers(self) -> None:
+        with tempfile.TemporaryDirectory(prefix="pushpals-codex-wrapper-recovery-") as temp_dir:
+            repo = Path(temp_dir) / "repo"
+            repo.mkdir(parents=True, exist_ok=True)
+            (repo / "README.md").write_text("# wrapper recovery test\n", encoding="utf-8")
+            subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "config", "user.name", "PushPals Test"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(
+                ["git", "config", "user.email", "pushpals-tests@example.com"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "commit", "-m", "chore: seed wrapper recovery repo"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            stub_path = Path(temp_dir) / "fake_codex_wrapper_recovery.py"
+            stub_path.write_text(
+                "\n".join(
+                    [
+                        "from pathlib import Path",
+                        "import sys",
+                        "import time",
+                        "",
+                        "argv = sys.argv[1:]",
+                        "last_message_path = None",
+                        "for index, arg in enumerate(argv):",
+                        "    if arg == '--output-last-message' and index + 1 < len(argv):",
+                        "        last_message_path = argv[index + 1]",
+                        "        break",
+                        "",
+                        "prompt = sys.stdin.read()",
+                        "hard_marker = 'Your first command invocation on this retry must be one of the direct replacements listed below'",
+                        "if hard_marker in prompt:",
+                        "    if last_message_path:",
+                        "        Path(last_message_path).write_text(",
+                        "            'Recovered by switching to direct commands after strict wrapper recovery.',",
+                        "            encoding='utf-8',",
+                        "        )",
+                        "    print('item.completed | Used direct commands after strict recovery guidance.', flush=True)",
+                        "    sys.exit(0)",
+                        "",
+                        "for line in (",
+                        "    'error=exec_command failed for `/bin/bash -lc pwd`: CreateProcess { message: \"Rejected\" }',",
+                        "    'error=exec_command failed for `/bin/bash -lc \\'git branch --show-current\\'`: CreateProcess { message: \"Rejected\" }',",
+                        "    'error=exec_command failed for `/bin/bash -lc ls`: CreateProcess { message: \"Rejected\" }',",
+                        "    'error=exec_command failed for `/bin/bash -lc \\'git status --porcelain\\'`: CreateProcess { message: \"Rejected\" }',",
+                        "):",
+                        "    print(line, file=sys.stderr, flush=True)",
+                        "time.sleep(10)",
+                    ]
+                ),
+                encoding="utf-8",
+            )
+            env_overrides = {
+                "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
+                "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
+                "OPENAI_API_KEY": "pushpals-wrapper-recovery-test-key",
+                "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "10",
+                "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
+            }
+            with mock.patch.dict(os.environ, env_overrides, clear=False):
+                result = _run_codex_task(
+                    str(repo),
+                    "Inspect the repo and report the current branch.",
+                    [],
+                )
+        self.assertTrue(result.get("ok"), result)
+        self.assertIn("Recovered after Codex attempts hit command-router shell-wrapper rejections.", str(result.get("stdout") or ""))
+        self.assertIn("strict wrapper recovery", str(result.get("stdout") or "").lower())
     def test_usage_falls_back_to_estimate_when_trace_has_no_usage(self) -> None:
         usage = _usage_from_trace_or_estimate({}, "abc" * 30, "done", model="gpt-5.4")
         self.assertTrue(usage["estimated"])

package/runtime/sandbox/apps/workerpals/src/docker_executor.ts CHANGED Viewed

@@ -757,11 +757,17 @@ export class DockerExecutor {
     if (backend !== "openai_codex") return [];
     const hostCodexHomeRaw = (process.env.PUSHPALS_OPENAI_CODEX_HOST_CODEX_HOME || "").trim();
+    if (hostCodexHomeRaw && !isAbsolute(hostCodexHomeRaw)) {
+      console.warn(
+        `[DockerExecutor] Ignoring relative PUSHPALS_OPENAI_CODEX_HOST_CODEX_HOME=${hostCodexHomeRaw}; using ${resolve(
+          homedir(),
+          ".codex",
+        )} so Codex state stays outside the repo worktree.`,
+      );
+    }
     const hostCodexHome = (
-      hostCodexHomeRaw
-        ? isAbsolute(hostCodexHomeRaw)
-          ? hostCodexHomeRaw
-          : resolve(this.options.repo, hostCodexHomeRaw)
+      hostCodexHomeRaw && isAbsolute(hostCodexHomeRaw)
+        ? hostCodexHomeRaw
         : resolve(homedir(), ".codex")
     ).trim();
     if (!hostCodexHome) return [];

package/runtime/sandbox/apps/workerpals/src/execute_job.ts CHANGED Viewed

@@ -3,7 +3,7 @@
  * Used by both the host Worker (direct mode) and the Docker job runner.
  */
-import { existsSync, readFileSync, unlinkSync } from "fs";
+import { existsSync, readFileSync, rmSync, unlinkSync } from "fs";
 import { resolve } from "path";
 import {
   deriveAutonomyComponentArea,
@@ -2606,6 +2606,41 @@ export async function syncHiddenRefWithRemoteBranchByRebase(
   publicBranchName: string,
   jobId: string,
 ): Promise<{ ok: true; sha: string } | { ok: false; error: string }> {
+  const scrubKnownPreSyncArtifacts = async (): Promise<
+    { ok: true } | { ok: false; error: string }
+  > => {
+    const codexPath = resolve(repo, ".codex");
+    if (!existsSync(codexPath)) return { ok: true };
+    const trackedCodex = await git(repo, ["ls-files", "--error-unmatch", "--", ".codex"]);
+    if (trackedCodex.ok) {
+      return {
+        ok: false,
+        error:
+          "Tracked .codex path blocks branch sync. Move Codex state outside the repo worktree before retrying.",
+      };
+    }
+    try {
+      rmSync(codexPath, { recursive: true, force: true });
+    } catch (error) {
+      return {
+        ok: false,
+        error: `Failed to scrub transient .codex artifact before branch sync: ${String(error)}`,
+      };
+    }
+    if (existsSync(codexPath)) {
+      return {
+        ok: false,
+        error: "Failed to scrub transient .codex artifact before branch sync: path still exists.",
+      };
+    }
+    console.warn("[WorkerPals] Removed transient .codex artifact before branch sync.");
+    return { ok: true };
+  };
   const pullRebaseNonInteractive = () =>
     git(repo, [
       "-c",
@@ -2652,6 +2687,10 @@ export async function syncHiddenRefWithRemoteBranchByRebase(
     const maxPullRebaseAttempts = 5;
     let syncedWithRemote = false;
     for (let attempt = 1; attempt <= maxPullRebaseAttempts; attempt++) {
+      const preSyncGuard = await scrubKnownPreSyncArtifacts();
+      if (!preSyncGuard.ok) {
+        return { ok: false, error: preSyncGuard.error };
+      }
       let pullRebase = await pullRebaseNonInteractive();
       if (!pullRebase.ok && isPullRebaseDirtyWorkingTreeOutput(combinedGitOutput(pullRebase))) {
         // Recover from dirty index/worktree left by previous attempts and retry non-interactively.

package/runtime/sandbox/prompts/workerpals/openai_codex_command_router_policy.md ADDED Viewed

@@ -0,0 +1,16 @@
+## Base Guidance
+Command-router policy: shell commands are allowed, but invoke the actual command directly instead of wrapping it with `/bin/bash -lc`, `bash -c`, `sh -lc`, `cmd /c`, `powershell -Command`, or `pwsh -Command`. If a wrapper command is rejected, rerun its inner command directly through the command tool.
+## Recovery Guidance
+Command-router recovery: the previous attempt retried disallowed shell wrappers.
+Retry once using shell commands normally, but invoke the inner command directly instead of wrapping it in `/bin/bash -lc`, `bash -c`, `sh -lc`, `cmd /c`, `powershell -Command`, or `pwsh -Command`.
+You are not limited to a fixed allowlist of commands. The constraint is only that command execution must target the actual program/argv directly rather than a wrapper shell.
+## Hard Recovery Guidance
+Command-router escalation: the previous retry still attempted disallowed shell wrappers.
+Do not invoke `bash`, `/bin/bash`, `sh`, `cmd`, `powershell`, `powershell.exe`, `pwsh`, or `pwsh.exe` as the command itself on this attempt.
+Your first command invocation on this retry must be one of the direct replacements listed below, with no wrapper shell around it.
+After you re-establish repo context, continue using ordinary shell commands directly without wrapper shells.
+## Rejection Detail
+Codex repeatedly attempted disallowed shell-wrapper commands that the command router rejected. Shell commands are allowed, but wrapper shells are not; invoke the inner command directly and avoid wrapper retries.