@pushpalsdev/cli 1.0.62 → 1.0.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/runtime/prompts/workerpals/openai_codex_command_router_policy.md +6 -0
- package/runtime/sandbox/.pushpals-remotebuddy-fallback.js +75 -13
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +43 -9
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +101 -0
- package/runtime/sandbox/prompts/workerpals/openai_codex_command_router_policy.md +6 -0
package/package.json
CHANGED
|
@@ -6,5 +6,11 @@ Command-router recovery: the previous attempt retried disallowed shell wrappers.
|
|
|
6
6
|
Retry once using shell commands normally, but invoke the inner command directly instead of wrapping it in `/bin/bash -lc`, `bash -c`, `sh -lc`, `cmd /c`, `powershell -Command`, or `pwsh -Command`.
|
|
7
7
|
You are not limited to a fixed allowlist of commands. The constraint is only that command execution must target the actual program/argv directly rather than a wrapper shell.
|
|
8
8
|
|
|
9
|
+
## Hard Recovery Guidance
|
|
10
|
+
Command-router escalation: the previous retry still attempted disallowed shell wrappers.
|
|
11
|
+
Do not invoke `bash`, `/bin/bash`, `sh`, `cmd`, `powershell`, `powershell.exe`, `pwsh`, or `pwsh.exe` as the command itself on this attempt.
|
|
12
|
+
Your first command invocation on this retry must be one of the direct replacements listed below, with no wrapper shell around it.
|
|
13
|
+
After you re-establish repo context, continue using ordinary shell commands directly without wrapper shells.
|
|
14
|
+
|
|
9
15
|
## Rejection Detail
|
|
10
16
|
Codex repeatedly attempted disallowed shell-wrapper commands that the command router rejected. Shell commands are allowed, but wrapper shells are not; invoke the inner command directly and avoid wrapper retries.
|
|
@@ -4018,6 +4018,7 @@ var BREADTH_ORDER = {
|
|
|
4018
4018
|
var IDEATION_SYSTEM_PROMPT = loadPromptTemplate("remotebuddy/autonomy_ideation_system_prompt.md").trim();
|
|
4019
4019
|
var SCORING_SYSTEM_PROMPT = loadPromptTemplate("remotebuddy/autonomy_scoring_system_prompt.md").trim();
|
|
4020
4020
|
var PLANNING_SYSTEM_PROMPT = loadPromptTemplate("remotebuddy/autonomy_planning_system_prompt.md").trim();
|
|
4021
|
+
var IDEATION_TIMEOUT_RECOVERY_INSTRUCTION = "Previous ideation timed out before you returned JSON. For this round only, stay within the time budget: prioritize the top 1-3 highest-confidence candidates, keep reasoning brief, avoid exhaustive exploration, and return valid JSON as soon as possible.";
|
|
4021
4022
|
var VISION_DOC_FNAME = "vision.md";
|
|
4022
4023
|
var MAX_VISION_SECTION_CHARS = 1200;
|
|
4023
4024
|
var DOCS_MIN_IMPACT_SIGNAL_FOR_NO_PENALTY = 0.45;
|
|
@@ -5779,6 +5780,7 @@ class RemoteBuddyAutonomousEngine {
|
|
|
5779
5780
|
baseBranch;
|
|
5780
5781
|
llm;
|
|
5781
5782
|
comm;
|
|
5783
|
+
llmCfg;
|
|
5782
5784
|
cfg;
|
|
5783
5785
|
runtimeEnabled = true;
|
|
5784
5786
|
timer = null;
|
|
@@ -5792,6 +5794,7 @@ class RemoteBuddyAutonomousEngine {
|
|
|
5792
5794
|
lastOutcome = "none";
|
|
5793
5795
|
lastDetail = "not_started";
|
|
5794
5796
|
lastCompletedAtMs = 0;
|
|
5797
|
+
pendingIdeationTimeoutRecovery = null;
|
|
5795
5798
|
constructor(opts) {
|
|
5796
5799
|
this.server = opts.server;
|
|
5797
5800
|
this.sessionId = opts.sessionId;
|
|
@@ -5805,6 +5808,7 @@ class RemoteBuddyAutonomousEngine {
|
|
|
5805
5808
|
this.baseBranch = String(opts.config.sourceControlManager.baseBranch || "main").trim() || "main";
|
|
5806
5809
|
this.llm = opts.llm;
|
|
5807
5810
|
this.comm = opts.comm;
|
|
5811
|
+
this.llmCfg = opts.config.remotebuddy.llm;
|
|
5808
5812
|
this.cfg = opts.config.remotebuddy.autonomy;
|
|
5809
5813
|
this.runtimeEnabled = this.cfg.enabled;
|
|
5810
5814
|
}
|
|
@@ -5859,10 +5863,30 @@ class RemoteBuddyAutonomousEngine {
|
|
|
5859
5863
|
return headers;
|
|
5860
5864
|
}
|
|
5861
5865
|
lockTtlMs() {
|
|
5862
|
-
|
|
5866
|
+
const maxPhaseTimeoutMs = Math.max(this.phaseTimeoutMs("ideation"), this.phaseTimeoutMs("scoring"), this.phaseTimeoutMs("planning"));
|
|
5867
|
+
return Math.max(this.cfg.tickIntervalMs * 3, this.cfg.ideationBudgetMs * 2 + maxPhaseTimeoutMs * 6, 30000);
|
|
5863
5868
|
}
|
|
5864
5869
|
cycleBudgetMs() {
|
|
5865
|
-
|
|
5870
|
+
const ideationTimeoutMs = this.phaseTimeoutMs("ideation");
|
|
5871
|
+
const scoringTimeoutMs = this.phaseTimeoutMs("scoring");
|
|
5872
|
+
const planningTimeoutMs = this.phaseTimeoutMs("planning");
|
|
5873
|
+
const maxPhaseTimeoutMs = Math.max(ideationTimeoutMs, scoringTimeoutMs, planningTimeoutMs);
|
|
5874
|
+
return Math.max(this.cfg.ideationBudgetMs + ideationTimeoutMs + scoringTimeoutMs + planningTimeoutMs, maxPhaseTimeoutMs * 4, 20000);
|
|
5875
|
+
}
|
|
5876
|
+
phaseTimeoutMs(phase) {
|
|
5877
|
+
const configuredTimeoutMs = Math.max(1000, this.cfg.llmTimeoutMs);
|
|
5878
|
+
if (phase !== "ideation")
|
|
5879
|
+
return configuredTimeoutMs;
|
|
5880
|
+
if (String(this.llmCfg.backend || "").trim().toLowerCase() !== "openai_codex") {
|
|
5881
|
+
return configuredTimeoutMs;
|
|
5882
|
+
}
|
|
5883
|
+
const codexTimeoutMs2 = Math.max(configuredTimeoutMs, this.llmCfg.codexTimeoutMs || 0);
|
|
5884
|
+
return Math.min(codexTimeoutMs2, Math.max(configuredTimeoutMs, 90000));
|
|
5885
|
+
}
|
|
5886
|
+
consumeIdeationTimeoutRecovery() {
|
|
5887
|
+
const recovery = this.pendingIdeationTimeoutRecovery;
|
|
5888
|
+
this.pendingIdeationTimeoutRecovery = null;
|
|
5889
|
+
return recovery;
|
|
5866
5890
|
}
|
|
5867
5891
|
loadVisionContext(runId) {
|
|
5868
5892
|
const maxVisionContextChars = this.cfg.visionContextMaxChars;
|
|
@@ -6180,6 +6204,7 @@ class RemoteBuddyAutonomousEngine {
|
|
|
6180
6204
|
}).catch(() => {});
|
|
6181
6205
|
}
|
|
6182
6206
|
async llmPhase(phase, runId, snapshotId, input, objectiveId) {
|
|
6207
|
+
const timeoutMs = this.phaseTimeoutMs(phase);
|
|
6183
6208
|
const requestPayload = {
|
|
6184
6209
|
phase,
|
|
6185
6210
|
system: input.system,
|
|
@@ -6188,10 +6213,30 @@ class RemoteBuddyAutonomousEngine {
|
|
|
6188
6213
|
maxTokens: input.maxTokens ?? null,
|
|
6189
6214
|
temperature: input.temperature ?? null
|
|
6190
6215
|
};
|
|
6216
|
+
const systemChars = input.system.length;
|
|
6217
|
+
const messageChars = (input.messages ?? []).reduce((sum, message) => sum + (message.content?.length ?? 0), 0);
|
|
6218
|
+
const requestBytes = Buffer.byteLength(JSON.stringify(requestPayload), "utf8");
|
|
6191
6219
|
const startedAt = Date.now();
|
|
6192
|
-
|
|
6220
|
+
console.log(`[RemoteBuddyAutonomousEngine] ${phase} phase start: timeout_ms=${timeoutMs} system_chars=${systemChars} message_chars=${messageChars} request_bytes=${requestBytes} max_tokens=${input.maxTokens ?? "default"} temperature=${input.temperature ?? "default"}`);
|
|
6221
|
+
let output;
|
|
6222
|
+
try {
|
|
6223
|
+
output = await withTimeout(this.llm.generate(input), timeoutMs, `autonomy ${phase} phase timeout`);
|
|
6224
|
+
} catch (error) {
|
|
6225
|
+
const elapsedMs = Date.now() - startedAt;
|
|
6226
|
+
if (phase === "ideation" && error instanceof Error && error.message === "autonomy ideation phase timeout") {
|
|
6227
|
+
this.pendingIdeationTimeoutRecovery = {
|
|
6228
|
+
previousRunId: runId,
|
|
6229
|
+
timedOutAt: new Date().toISOString(),
|
|
6230
|
+
timeoutMs
|
|
6231
|
+
};
|
|
6232
|
+
}
|
|
6233
|
+
console.warn(`[RemoteBuddyAutonomousEngine] ${phase} phase failed: elapsed_ms=${elapsedMs} timeout_ms=${timeoutMs} system_chars=${systemChars} message_chars=${messageChars} request_bytes=${requestBytes} error=${error instanceof Error ? error.message : String(error)}`);
|
|
6234
|
+
throw error;
|
|
6235
|
+
}
|
|
6193
6236
|
const responseJson = parseJsonObject(output.text);
|
|
6194
6237
|
const tokenUsage = output.usage ?? null;
|
|
6238
|
+
const latencyMs = Date.now() - startedAt;
|
|
6239
|
+
console.log(`[RemoteBuddyAutonomousEngine] ${phase} phase completed: elapsed_ms=${latencyMs} timeout_ms=${timeoutMs} response_chars=${output.text.length} prompt_tokens=${tokenUsage?.promptTokens ?? "unknown"} completion_tokens=${tokenUsage?.completionTokens ?? "unknown"}`);
|
|
6195
6240
|
return {
|
|
6196
6241
|
json: responseJson,
|
|
6197
6242
|
llmCall: {
|
|
@@ -6211,11 +6256,11 @@ ${JSON.stringify(input.messages ?? [])}`),
|
|
|
6211
6256
|
},
|
|
6212
6257
|
modelId: "configured",
|
|
6213
6258
|
temperature: input.temperature ?? null,
|
|
6214
|
-
timeoutMs
|
|
6259
|
+
timeoutMs,
|
|
6215
6260
|
response: responseJson,
|
|
6216
6261
|
responseHash: sha256(output.text),
|
|
6217
6262
|
tokenUsage,
|
|
6218
|
-
latencyMs
|
|
6263
|
+
latencyMs
|
|
6219
6264
|
}
|
|
6220
6265
|
};
|
|
6221
6266
|
}
|
|
@@ -6550,26 +6595,43 @@ ${JSON.stringify(input.messages ?? [])}`),
|
|
|
6550
6595
|
return;
|
|
6551
6596
|
}
|
|
6552
6597
|
this.setPhase("ideation");
|
|
6598
|
+
const ideationRecovery = this.consumeIdeationTimeoutRecovery();
|
|
6599
|
+
if (ideationRecovery) {
|
|
6600
|
+
console.warn(`[RemoteBuddyAutonomousEngine] tick ${runId}: applying one-shot ideation timeout recovery from ${ideationRecovery.previousRunId} after ${ideationRecovery.timeoutMs}ms timeout.`);
|
|
6601
|
+
}
|
|
6602
|
+
const ideationTopSignals = snapshot.top_signals.slice(0, ideationRecovery ? 10 : 16);
|
|
6603
|
+
const ideationStateTraits = snapshot.state_traits.slice(0, ideationRecovery ? 14 : 24);
|
|
6604
|
+
const ideationFeedbackPriors = snapshot.feedback_priors.slice(0, ideationRecovery ? 12 : 20);
|
|
6605
|
+
const ideationEngineIdeaPriors = (snapshot.engine_idea_priors ?? []).slice(0, ideationRecovery ? 12 : 20);
|
|
6606
|
+
const ideationOpenObjectives = snapshot.open_objectives.slice(0, ideationRecovery ? 12 : 20);
|
|
6607
|
+
const ideationActiveCooldowns = snapshot.active_cooldowns.slice(0, ideationRecovery ? 12 : 20);
|
|
6608
|
+
const ideationRepoTargets = repoTargets.slice(0, ideationRecovery ? 8 : repoTargets.length);
|
|
6553
6609
|
const ideationPhase = await this.llmPhase("ideation", runId, snapshot.snapshot_id, {
|
|
6554
6610
|
system: IDEATION_SYSTEM_PROMPT,
|
|
6555
6611
|
json: true,
|
|
6556
|
-
maxTokens: 2800,
|
|
6612
|
+
maxTokens: ideationRecovery ? 1400 : 2800,
|
|
6557
6613
|
temperature: 0.2,
|
|
6558
6614
|
messages: [
|
|
6615
|
+
...ideationRecovery ? [
|
|
6616
|
+
{
|
|
6617
|
+
role: "user",
|
|
6618
|
+
content: `${IDEATION_TIMEOUT_RECOVERY_INSTRUCTION} Previous timed-out run: ${ideationRecovery.previousRunId}. Timeout budget for this round: ${this.phaseTimeoutMs("ideation")}ms.`
|
|
6619
|
+
}
|
|
6620
|
+
] : [],
|
|
6559
6621
|
{
|
|
6560
6622
|
role: "user",
|
|
6561
6623
|
content: JSON.stringify({
|
|
6562
6624
|
snapshot: {
|
|
6563
6625
|
snapshot_id: snapshot.snapshot_id,
|
|
6564
|
-
top_signals:
|
|
6565
|
-
state_traits:
|
|
6566
|
-
feedback_priors:
|
|
6567
|
-
engine_idea_priors:
|
|
6568
|
-
open_objectives:
|
|
6569
|
-
active_cooldowns:
|
|
6626
|
+
top_signals: ideationTopSignals,
|
|
6627
|
+
state_traits: ideationStateTraits,
|
|
6628
|
+
feedback_priors: ideationFeedbackPriors,
|
|
6629
|
+
engine_idea_priors: ideationEngineIdeaPriors,
|
|
6630
|
+
open_objectives: ideationOpenObjectives,
|
|
6631
|
+
active_cooldowns: ideationActiveCooldowns
|
|
6570
6632
|
},
|
|
6571
6633
|
vision: visionContext,
|
|
6572
|
-
repo_targets:
|
|
6634
|
+
repo_targets: ideationRepoTargets.map((target) => ({
|
|
6573
6635
|
component_area: target.component_area,
|
|
6574
6636
|
target_paths: target.target_paths,
|
|
6575
6637
|
write_globs: target.write_globs,
|
|
@@ -95,6 +95,7 @@ _VALID_SANDBOX_POLICIES = {"read-only", "workspace-write", "danger-full-access"}
|
|
|
95
95
|
_VALID_COLORS = {"always", "never", "auto"}
|
|
96
96
|
_VALID_AUTH_MODES = {"auto", "api_key", "chatgpt"}
|
|
97
97
|
_VALID_REASONING_EFFORTS = {"low", "medium", "high", "xhigh"}
|
|
98
|
+
_MAX_WRAPPER_RECOVERY_ATTEMPTS = 2
|
|
98
99
|
|
|
99
100
|
|
|
100
101
|
def _model_supports_xhigh_reasoning(model: str) -> bool:
|
|
@@ -296,6 +297,21 @@ def _command_router_recovery_guidance() -> str:
|
|
|
296
297
|
)
|
|
297
298
|
|
|
298
299
|
|
|
300
|
+
def _command_router_hard_recovery_guidance() -> str:
|
|
301
|
+
guidance = _load_markdown_h2_section(_COMMAND_ROUTER_POLICY_PATH, "Hard Recovery Guidance")
|
|
302
|
+
if guidance:
|
|
303
|
+
return guidance
|
|
304
|
+
return (
|
|
305
|
+
"Command-router escalation: the previous retry still attempted disallowed shell wrappers.\n"
|
|
306
|
+
"Do not invoke `bash`, `/bin/bash`, `sh`, `cmd`, `powershell`, `powershell.exe`, `pwsh`, "
|
|
307
|
+
"or `pwsh.exe` as the command itself on this attempt.\n"
|
|
308
|
+
"Your first command invocation on this retry must be one of the direct replacements listed "
|
|
309
|
+
"below, with no wrapper shell around it.\n"
|
|
310
|
+
"After you re-establish repo context, continue using ordinary shell commands directly "
|
|
311
|
+
"without wrapper shells."
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
|
|
299
315
|
def _command_router_rejection_detail_intro() -> str:
|
|
300
316
|
guidance = _load_markdown_h2_section(_COMMAND_ROUTER_POLICY_PATH, "Rejection Detail")
|
|
301
317
|
if guidance:
|
|
@@ -1066,7 +1082,7 @@ def _unwrap_shell_wrapper_command(command: str) -> str:
|
|
|
1066
1082
|
return ""
|
|
1067
1083
|
|
|
1068
1084
|
|
|
1069
|
-
def
|
|
1085
|
+
def _build_wrapper_direct_replacements(rejected_commands: List[str]) -> List[str]:
|
|
1070
1086
|
direct_equivalents: List[str] = []
|
|
1071
1087
|
seen: set[str] = set()
|
|
1072
1088
|
for command in rejected_commands:
|
|
@@ -1076,7 +1092,16 @@ def _build_wrapper_recovery_guidance(rejected_commands: List[str]) -> str:
|
|
|
1076
1092
|
continue
|
|
1077
1093
|
seen.add(lowered)
|
|
1078
1094
|
direct_equivalents.append(f"- `{command}` -> `{direct}`")
|
|
1079
|
-
|
|
1095
|
+
return direct_equivalents
|
|
1096
|
+
|
|
1097
|
+
|
|
1098
|
+
def _build_wrapper_recovery_guidance(rejected_commands: List[str], *, hard: bool = False) -> str:
|
|
1099
|
+
guidance_lines = [
|
|
1100
|
+
_command_router_hard_recovery_guidance()
|
|
1101
|
+
if hard
|
|
1102
|
+
else _command_router_recovery_guidance()
|
|
1103
|
+
]
|
|
1104
|
+
direct_equivalents = _build_wrapper_direct_replacements(rejected_commands)
|
|
1080
1105
|
if direct_equivalents:
|
|
1081
1106
|
guidance_lines.append("Use these direct replacements for the rejected commands:")
|
|
1082
1107
|
guidance_lines.extend(direct_equivalents[:6])
|
|
@@ -1515,11 +1540,20 @@ def _run_codex_task(
|
|
|
1515
1540
|
log_git_status(repo, log)
|
|
1516
1541
|
|
|
1517
1542
|
if command_policy_rejection_loop:
|
|
1518
|
-
if wrapper_recovery_attempt <
|
|
1519
|
-
|
|
1543
|
+
if wrapper_recovery_attempt < _MAX_WRAPPER_RECOVERY_ATTEMPTS:
|
|
1544
|
+
hard_recovery = wrapper_recovery_attempt >= 1
|
|
1545
|
+
recovery_guidance = _build_wrapper_recovery_guidance(
|
|
1546
|
+
rejected_shell_wrappers,
|
|
1547
|
+
hard=hard_recovery,
|
|
1548
|
+
)
|
|
1520
1549
|
if recovery_guidance:
|
|
1521
1550
|
log.warning(
|
|
1522
|
-
"Codex hit a shell-wrapper rejection loop; retrying once with
|
|
1551
|
+
"Codex hit a shell-wrapper rejection loop; retrying once with "
|
|
1552
|
+
+ (
|
|
1553
|
+
"strict no-wrapper recovery guidance."
|
|
1554
|
+
if hard_recovery
|
|
1555
|
+
else "direct-command recovery guidance."
|
|
1556
|
+
)
|
|
1523
1557
|
)
|
|
1524
1558
|
retry_result = _run_codex_task(
|
|
1525
1559
|
repo,
|
|
@@ -1529,19 +1563,19 @@ def _run_codex_task(
|
|
|
1529
1563
|
baseline_changes=baseline_snapshot,
|
|
1530
1564
|
)
|
|
1531
1565
|
retry_result["usage"] = _merge_usage_records(usage, retry_result.get("usage"))
|
|
1532
|
-
if retry_result.get("ok"):
|
|
1566
|
+
if wrapper_recovery_attempt == 0 and retry_result.get("ok"):
|
|
1533
1567
|
recovered_stdout = str(retry_result.get("stdout") or "").strip()
|
|
1534
1568
|
retry_result["stdout"] = _truncate(
|
|
1535
1569
|
(
|
|
1536
|
-
"Recovered after
|
|
1570
|
+
"Recovered after Codex attempts hit command-router shell-wrapper rejections.\n\n"
|
|
1537
1571
|
f"{recovered_stdout}"
|
|
1538
1572
|
).strip()
|
|
1539
1573
|
)
|
|
1540
|
-
|
|
1574
|
+
elif wrapper_recovery_attempt == 0:
|
|
1541
1575
|
retry_stderr = str(retry_result.get("stderr") or "").strip()
|
|
1542
1576
|
retry_result["stderr"] = _truncate(
|
|
1543
1577
|
(
|
|
1544
|
-
"
|
|
1578
|
+
"Earlier Codex attempts hit command-router shell-wrapper rejections and were retried with stricter recovery guidance.\n\n"
|
|
1545
1579
|
f"{retry_stderr}"
|
|
1546
1580
|
).strip()
|
|
1547
1581
|
)
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import re
|
|
3
|
+
import json
|
|
4
|
+
import subprocess
|
|
3
5
|
import sys
|
|
4
6
|
import unittest
|
|
5
7
|
import tempfile
|
|
8
|
+
from unittest import mock
|
|
6
9
|
from pathlib import Path
|
|
7
10
|
|
|
8
11
|
_HERE = Path(__file__).resolve().parent
|
|
@@ -22,6 +25,7 @@ from openai_codex_executor import (
|
|
|
22
25
|
OpenAICodexRuntimeConfig,
|
|
23
26
|
_augment_supplemental_guidance,
|
|
24
27
|
_build_wrapper_recovery_guidance,
|
|
28
|
+
_run_codex_task,
|
|
25
29
|
_resolve_reasoning_effort,
|
|
26
30
|
_build_instruction,
|
|
27
31
|
_collect_disallowed_shell_wrapper_rejections,
|
|
@@ -280,6 +284,103 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
280
284
|
self.assertIn("not limited to a fixed allowlist", lowered)
|
|
281
285
|
self.assertIn("`/bin/bash -lc 'git status --porcelain'` -> `git status --porcelain`", guidance)
|
|
282
286
|
|
|
287
|
+
def test_wrapper_hard_recovery_guidance_requires_direct_replacements_first(self) -> None:
|
|
288
|
+
guidance = _build_wrapper_recovery_guidance(
|
|
289
|
+
["/bin/bash -lc 'git status --porcelain'", "/bin/bash -lc pwd"],
|
|
290
|
+
hard=True,
|
|
291
|
+
)
|
|
292
|
+
lowered = guidance.lower()
|
|
293
|
+
self.assertIn("previous retry still attempted disallowed shell wrappers", lowered)
|
|
294
|
+
self.assertIn("do not invoke `bash`", lowered)
|
|
295
|
+
self.assertIn("first command invocation on this retry must be one of the direct replacements", lowered)
|
|
296
|
+
self.assertIn("`/bin/bash -lc 'git status --porcelain'` -> `git status --porcelain`", guidance)
|
|
297
|
+
|
|
298
|
+
def test_run_codex_task_escalates_wrapper_recovery_and_recovers(self) -> None:
|
|
299
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-wrapper-recovery-") as temp_dir:
|
|
300
|
+
repo = Path(temp_dir) / "repo"
|
|
301
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
302
|
+
(repo / "README.md").write_text("# wrapper recovery test\n", encoding="utf-8")
|
|
303
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
304
|
+
subprocess.run(
|
|
305
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
306
|
+
cwd=repo,
|
|
307
|
+
check=True,
|
|
308
|
+
capture_output=True,
|
|
309
|
+
text=True,
|
|
310
|
+
)
|
|
311
|
+
subprocess.run(
|
|
312
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
313
|
+
cwd=repo,
|
|
314
|
+
check=True,
|
|
315
|
+
capture_output=True,
|
|
316
|
+
text=True,
|
|
317
|
+
)
|
|
318
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
319
|
+
subprocess.run(
|
|
320
|
+
["git", "commit", "-m", "chore: seed wrapper recovery repo"],
|
|
321
|
+
cwd=repo,
|
|
322
|
+
check=True,
|
|
323
|
+
capture_output=True,
|
|
324
|
+
text=True,
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
stub_path = Path(temp_dir) / "fake_codex_wrapper_recovery.py"
|
|
328
|
+
stub_path.write_text(
|
|
329
|
+
"\n".join(
|
|
330
|
+
[
|
|
331
|
+
"from pathlib import Path",
|
|
332
|
+
"import sys",
|
|
333
|
+
"import time",
|
|
334
|
+
"",
|
|
335
|
+
"argv = sys.argv[1:]",
|
|
336
|
+
"last_message_path = None",
|
|
337
|
+
"for index, arg in enumerate(argv):",
|
|
338
|
+
" if arg == '--output-last-message' and index + 1 < len(argv):",
|
|
339
|
+
" last_message_path = argv[index + 1]",
|
|
340
|
+
" break",
|
|
341
|
+
"",
|
|
342
|
+
"prompt = sys.stdin.read()",
|
|
343
|
+
"hard_marker = 'Your first command invocation on this retry must be one of the direct replacements listed below'",
|
|
344
|
+
"if hard_marker in prompt:",
|
|
345
|
+
" if last_message_path:",
|
|
346
|
+
" Path(last_message_path).write_text(",
|
|
347
|
+
" 'Recovered by switching to direct commands after strict wrapper recovery.',",
|
|
348
|
+
" encoding='utf-8',",
|
|
349
|
+
" )",
|
|
350
|
+
" print('item.completed | Used direct commands after strict recovery guidance.', flush=True)",
|
|
351
|
+
" sys.exit(0)",
|
|
352
|
+
"",
|
|
353
|
+
"for line in (",
|
|
354
|
+
" 'error=exec_command failed for `/bin/bash -lc pwd`: CreateProcess { message: \"Rejected\" }',",
|
|
355
|
+
" 'error=exec_command failed for `/bin/bash -lc \\'git branch --show-current\\'`: CreateProcess { message: \"Rejected\" }',",
|
|
356
|
+
" 'error=exec_command failed for `/bin/bash -lc ls`: CreateProcess { message: \"Rejected\" }',",
|
|
357
|
+
" 'error=exec_command failed for `/bin/bash -lc \\'git status --porcelain\\'`: CreateProcess { message: \"Rejected\" }',",
|
|
358
|
+
"):",
|
|
359
|
+
" print(line, file=sys.stderr, flush=True)",
|
|
360
|
+
"time.sleep(10)",
|
|
361
|
+
]
|
|
362
|
+
),
|
|
363
|
+
encoding="utf-8",
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
env_overrides = {
|
|
367
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
368
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
369
|
+
"OPENAI_API_KEY": "pushpals-wrapper-recovery-test-key",
|
|
370
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "10",
|
|
371
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
372
|
+
}
|
|
373
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
374
|
+
result = _run_codex_task(
|
|
375
|
+
str(repo),
|
|
376
|
+
"Inspect the repo and report the current branch.",
|
|
377
|
+
[],
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
self.assertTrue(result.get("ok"), result)
|
|
381
|
+
self.assertIn("Recovered after Codex attempts hit command-router shell-wrapper rejections.", str(result.get("stdout") or ""))
|
|
382
|
+
self.assertIn("strict wrapper recovery", str(result.get("stdout") or "").lower())
|
|
383
|
+
|
|
283
384
|
def test_usage_falls_back_to_estimate_when_trace_has_no_usage(self) -> None:
|
|
284
385
|
usage = _usage_from_trace_or_estimate({}, "abc" * 30, "done", model="gpt-5.4")
|
|
285
386
|
self.assertTrue(usage["estimated"])
|
|
@@ -6,5 +6,11 @@ Command-router recovery: the previous attempt retried disallowed shell wrappers.
|
|
|
6
6
|
Retry once using shell commands normally, but invoke the inner command directly instead of wrapping it in `/bin/bash -lc`, `bash -c`, `sh -lc`, `cmd /c`, `powershell -Command`, or `pwsh -Command`.
|
|
7
7
|
You are not limited to a fixed allowlist of commands. The constraint is only that command execution must target the actual program/argv directly rather than a wrapper shell.
|
|
8
8
|
|
|
9
|
+
## Hard Recovery Guidance
|
|
10
|
+
Command-router escalation: the previous retry still attempted disallowed shell wrappers.
|
|
11
|
+
Do not invoke `bash`, `/bin/bash`, `sh`, `cmd`, `powershell`, `powershell.exe`, `pwsh`, or `pwsh.exe` as the command itself on this attempt.
|
|
12
|
+
Your first command invocation on this retry must be one of the direct replacements listed below, with no wrapper shell around it.
|
|
13
|
+
After you re-establish repo context, continue using ordinary shell commands directly without wrapper shells.
|
|
14
|
+
|
|
9
15
|
## Rejection Detail
|
|
10
16
|
Codex repeatedly attempted disallowed shell-wrapper commands that the command router rejected. Shell commands are allowed, but wrapper shells are not; invoke the inner command directly and avoid wrapper retries.
|