@pushpalsdev/cli 1.0.61 → 1.0.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/runtime/prompts/workerpals/openai_codex_command_router_policy.md +16 -0
- package/runtime/sandbox/.pushpals-remotebuddy-fallback.js +75 -13
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +102 -20
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +112 -1
- package/runtime/sandbox/apps/workerpals/src/docker_executor.ts +10 -4
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +40 -1
- package/runtime/sandbox/prompts/workerpals/openai_codex_command_router_policy.md +16 -0
package/package.json
CHANGED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
## Base Guidance
|
|
2
|
+
Command-router policy: shell commands are allowed, but invoke the actual command directly instead of wrapping it with `/bin/bash -lc`, `bash -c`, `sh -lc`, `cmd /c`, `powershell -Command`, or `pwsh -Command`. If a wrapper command is rejected, rerun its inner command directly through the command tool.
|
|
3
|
+
|
|
4
|
+
## Recovery Guidance
|
|
5
|
+
Command-router recovery: the previous attempt retried disallowed shell wrappers.
|
|
6
|
+
Retry once using shell commands normally, but invoke the inner command directly instead of wrapping it in `/bin/bash -lc`, `bash -c`, `sh -lc`, `cmd /c`, `powershell -Command`, or `pwsh -Command`.
|
|
7
|
+
You are not limited to a fixed allowlist of commands. The constraint is only that command execution must target the actual program/argv directly rather than a wrapper shell.
|
|
8
|
+
|
|
9
|
+
## Hard Recovery Guidance
|
|
10
|
+
Command-router escalation: the previous retry still attempted disallowed shell wrappers.
|
|
11
|
+
Do not invoke `bash`, `/bin/bash`, `sh`, `cmd`, `powershell`, `powershell.exe`, `pwsh`, or `pwsh.exe` as the command itself on this attempt.
|
|
12
|
+
Your first command invocation on this retry must be one of the direct replacements listed below, with no wrapper shell around it.
|
|
13
|
+
After you re-establish repo context, continue using ordinary shell commands directly without wrapper shells.
|
|
14
|
+
|
|
15
|
+
## Rejection Detail
|
|
16
|
+
Codex repeatedly attempted disallowed shell-wrapper commands that the command router rejected. Shell commands are allowed, but wrapper shells are not; invoke the inner command directly and avoid wrapper retries.
|
|
@@ -4018,6 +4018,7 @@ var BREADTH_ORDER = {
|
|
|
4018
4018
|
var IDEATION_SYSTEM_PROMPT = loadPromptTemplate("remotebuddy/autonomy_ideation_system_prompt.md").trim();
|
|
4019
4019
|
var SCORING_SYSTEM_PROMPT = loadPromptTemplate("remotebuddy/autonomy_scoring_system_prompt.md").trim();
|
|
4020
4020
|
var PLANNING_SYSTEM_PROMPT = loadPromptTemplate("remotebuddy/autonomy_planning_system_prompt.md").trim();
|
|
4021
|
+
var IDEATION_TIMEOUT_RECOVERY_INSTRUCTION = "Previous ideation timed out before you returned JSON. For this round only, stay within the time budget: prioritize the top 1-3 highest-confidence candidates, keep reasoning brief, avoid exhaustive exploration, and return valid JSON as soon as possible.";
|
|
4021
4022
|
var VISION_DOC_FNAME = "vision.md";
|
|
4022
4023
|
var MAX_VISION_SECTION_CHARS = 1200;
|
|
4023
4024
|
var DOCS_MIN_IMPACT_SIGNAL_FOR_NO_PENALTY = 0.45;
|
|
@@ -5779,6 +5780,7 @@ class RemoteBuddyAutonomousEngine {
|
|
|
5779
5780
|
baseBranch;
|
|
5780
5781
|
llm;
|
|
5781
5782
|
comm;
|
|
5783
|
+
llmCfg;
|
|
5782
5784
|
cfg;
|
|
5783
5785
|
runtimeEnabled = true;
|
|
5784
5786
|
timer = null;
|
|
@@ -5792,6 +5794,7 @@ class RemoteBuddyAutonomousEngine {
|
|
|
5792
5794
|
lastOutcome = "none";
|
|
5793
5795
|
lastDetail = "not_started";
|
|
5794
5796
|
lastCompletedAtMs = 0;
|
|
5797
|
+
pendingIdeationTimeoutRecovery = null;
|
|
5795
5798
|
constructor(opts) {
|
|
5796
5799
|
this.server = opts.server;
|
|
5797
5800
|
this.sessionId = opts.sessionId;
|
|
@@ -5805,6 +5808,7 @@ class RemoteBuddyAutonomousEngine {
|
|
|
5805
5808
|
this.baseBranch = String(opts.config.sourceControlManager.baseBranch || "main").trim() || "main";
|
|
5806
5809
|
this.llm = opts.llm;
|
|
5807
5810
|
this.comm = opts.comm;
|
|
5811
|
+
this.llmCfg = opts.config.remotebuddy.llm;
|
|
5808
5812
|
this.cfg = opts.config.remotebuddy.autonomy;
|
|
5809
5813
|
this.runtimeEnabled = this.cfg.enabled;
|
|
5810
5814
|
}
|
|
@@ -5859,10 +5863,30 @@ class RemoteBuddyAutonomousEngine {
|
|
|
5859
5863
|
return headers;
|
|
5860
5864
|
}
|
|
5861
5865
|
lockTtlMs() {
|
|
5862
|
-
|
|
5866
|
+
const maxPhaseTimeoutMs = Math.max(this.phaseTimeoutMs("ideation"), this.phaseTimeoutMs("scoring"), this.phaseTimeoutMs("planning"));
|
|
5867
|
+
return Math.max(this.cfg.tickIntervalMs * 3, this.cfg.ideationBudgetMs * 2 + maxPhaseTimeoutMs * 6, 30000);
|
|
5863
5868
|
}
|
|
5864
5869
|
cycleBudgetMs() {
|
|
5865
|
-
|
|
5870
|
+
const ideationTimeoutMs = this.phaseTimeoutMs("ideation");
|
|
5871
|
+
const scoringTimeoutMs = this.phaseTimeoutMs("scoring");
|
|
5872
|
+
const planningTimeoutMs = this.phaseTimeoutMs("planning");
|
|
5873
|
+
const maxPhaseTimeoutMs = Math.max(ideationTimeoutMs, scoringTimeoutMs, planningTimeoutMs);
|
|
5874
|
+
return Math.max(this.cfg.ideationBudgetMs + ideationTimeoutMs + scoringTimeoutMs + planningTimeoutMs, maxPhaseTimeoutMs * 4, 20000);
|
|
5875
|
+
}
|
|
5876
|
+
phaseTimeoutMs(phase) {
|
|
5877
|
+
const configuredTimeoutMs = Math.max(1000, this.cfg.llmTimeoutMs);
|
|
5878
|
+
if (phase !== "ideation")
|
|
5879
|
+
return configuredTimeoutMs;
|
|
5880
|
+
if (String(this.llmCfg.backend || "").trim().toLowerCase() !== "openai_codex") {
|
|
5881
|
+
return configuredTimeoutMs;
|
|
5882
|
+
}
|
|
5883
|
+
const codexTimeoutMs2 = Math.max(configuredTimeoutMs, this.llmCfg.codexTimeoutMs || 0);
|
|
5884
|
+
return Math.min(codexTimeoutMs2, Math.max(configuredTimeoutMs, 90000));
|
|
5885
|
+
}
|
|
5886
|
+
consumeIdeationTimeoutRecovery() {
|
|
5887
|
+
const recovery = this.pendingIdeationTimeoutRecovery;
|
|
5888
|
+
this.pendingIdeationTimeoutRecovery = null;
|
|
5889
|
+
return recovery;
|
|
5866
5890
|
}
|
|
5867
5891
|
loadVisionContext(runId) {
|
|
5868
5892
|
const maxVisionContextChars = this.cfg.visionContextMaxChars;
|
|
@@ -6180,6 +6204,7 @@ class RemoteBuddyAutonomousEngine {
|
|
|
6180
6204
|
}).catch(() => {});
|
|
6181
6205
|
}
|
|
6182
6206
|
async llmPhase(phase, runId, snapshotId, input, objectiveId) {
|
|
6207
|
+
const timeoutMs = this.phaseTimeoutMs(phase);
|
|
6183
6208
|
const requestPayload = {
|
|
6184
6209
|
phase,
|
|
6185
6210
|
system: input.system,
|
|
@@ -6188,10 +6213,30 @@ class RemoteBuddyAutonomousEngine {
|
|
|
6188
6213
|
maxTokens: input.maxTokens ?? null,
|
|
6189
6214
|
temperature: input.temperature ?? null
|
|
6190
6215
|
};
|
|
6216
|
+
const systemChars = input.system.length;
|
|
6217
|
+
const messageChars = (input.messages ?? []).reduce((sum, message) => sum + (message.content?.length ?? 0), 0);
|
|
6218
|
+
const requestBytes = Buffer.byteLength(JSON.stringify(requestPayload), "utf8");
|
|
6191
6219
|
const startedAt = Date.now();
|
|
6192
|
-
|
|
6220
|
+
console.log(`[RemoteBuddyAutonomousEngine] ${phase} phase start: timeout_ms=${timeoutMs} system_chars=${systemChars} message_chars=${messageChars} request_bytes=${requestBytes} max_tokens=${input.maxTokens ?? "default"} temperature=${input.temperature ?? "default"}`);
|
|
6221
|
+
let output;
|
|
6222
|
+
try {
|
|
6223
|
+
output = await withTimeout(this.llm.generate(input), timeoutMs, `autonomy ${phase} phase timeout`);
|
|
6224
|
+
} catch (error) {
|
|
6225
|
+
const elapsedMs = Date.now() - startedAt;
|
|
6226
|
+
if (phase === "ideation" && error instanceof Error && error.message === "autonomy ideation phase timeout") {
|
|
6227
|
+
this.pendingIdeationTimeoutRecovery = {
|
|
6228
|
+
previousRunId: runId,
|
|
6229
|
+
timedOutAt: new Date().toISOString(),
|
|
6230
|
+
timeoutMs
|
|
6231
|
+
};
|
|
6232
|
+
}
|
|
6233
|
+
console.warn(`[RemoteBuddyAutonomousEngine] ${phase} phase failed: elapsed_ms=${elapsedMs} timeout_ms=${timeoutMs} system_chars=${systemChars} message_chars=${messageChars} request_bytes=${requestBytes} error=${error instanceof Error ? error.message : String(error)}`);
|
|
6234
|
+
throw error;
|
|
6235
|
+
}
|
|
6193
6236
|
const responseJson = parseJsonObject(output.text);
|
|
6194
6237
|
const tokenUsage = output.usage ?? null;
|
|
6238
|
+
const latencyMs = Date.now() - startedAt;
|
|
6239
|
+
console.log(`[RemoteBuddyAutonomousEngine] ${phase} phase completed: elapsed_ms=${latencyMs} timeout_ms=${timeoutMs} response_chars=${output.text.length} prompt_tokens=${tokenUsage?.promptTokens ?? "unknown"} completion_tokens=${tokenUsage?.completionTokens ?? "unknown"}`);
|
|
6195
6240
|
return {
|
|
6196
6241
|
json: responseJson,
|
|
6197
6242
|
llmCall: {
|
|
@@ -6211,11 +6256,11 @@ ${JSON.stringify(input.messages ?? [])}`),
|
|
|
6211
6256
|
},
|
|
6212
6257
|
modelId: "configured",
|
|
6213
6258
|
temperature: input.temperature ?? null,
|
|
6214
|
-
timeoutMs
|
|
6259
|
+
timeoutMs,
|
|
6215
6260
|
response: responseJson,
|
|
6216
6261
|
responseHash: sha256(output.text),
|
|
6217
6262
|
tokenUsage,
|
|
6218
|
-
latencyMs
|
|
6263
|
+
latencyMs
|
|
6219
6264
|
}
|
|
6220
6265
|
};
|
|
6221
6266
|
}
|
|
@@ -6550,26 +6595,43 @@ ${JSON.stringify(input.messages ?? [])}`),
|
|
|
6550
6595
|
return;
|
|
6551
6596
|
}
|
|
6552
6597
|
this.setPhase("ideation");
|
|
6598
|
+
const ideationRecovery = this.consumeIdeationTimeoutRecovery();
|
|
6599
|
+
if (ideationRecovery) {
|
|
6600
|
+
console.warn(`[RemoteBuddyAutonomousEngine] tick ${runId}: applying one-shot ideation timeout recovery from ${ideationRecovery.previousRunId} after ${ideationRecovery.timeoutMs}ms timeout.`);
|
|
6601
|
+
}
|
|
6602
|
+
const ideationTopSignals = snapshot.top_signals.slice(0, ideationRecovery ? 10 : 16);
|
|
6603
|
+
const ideationStateTraits = snapshot.state_traits.slice(0, ideationRecovery ? 14 : 24);
|
|
6604
|
+
const ideationFeedbackPriors = snapshot.feedback_priors.slice(0, ideationRecovery ? 12 : 20);
|
|
6605
|
+
const ideationEngineIdeaPriors = (snapshot.engine_idea_priors ?? []).slice(0, ideationRecovery ? 12 : 20);
|
|
6606
|
+
const ideationOpenObjectives = snapshot.open_objectives.slice(0, ideationRecovery ? 12 : 20);
|
|
6607
|
+
const ideationActiveCooldowns = snapshot.active_cooldowns.slice(0, ideationRecovery ? 12 : 20);
|
|
6608
|
+
const ideationRepoTargets = repoTargets.slice(0, ideationRecovery ? 8 : repoTargets.length);
|
|
6553
6609
|
const ideationPhase = await this.llmPhase("ideation", runId, snapshot.snapshot_id, {
|
|
6554
6610
|
system: IDEATION_SYSTEM_PROMPT,
|
|
6555
6611
|
json: true,
|
|
6556
|
-
maxTokens: 2800,
|
|
6612
|
+
maxTokens: ideationRecovery ? 1400 : 2800,
|
|
6557
6613
|
temperature: 0.2,
|
|
6558
6614
|
messages: [
|
|
6615
|
+
...ideationRecovery ? [
|
|
6616
|
+
{
|
|
6617
|
+
role: "user",
|
|
6618
|
+
content: `${IDEATION_TIMEOUT_RECOVERY_INSTRUCTION} Previous timed-out run: ${ideationRecovery.previousRunId}. Timeout budget for this round: ${this.phaseTimeoutMs("ideation")}ms.`
|
|
6619
|
+
}
|
|
6620
|
+
] : [],
|
|
6559
6621
|
{
|
|
6560
6622
|
role: "user",
|
|
6561
6623
|
content: JSON.stringify({
|
|
6562
6624
|
snapshot: {
|
|
6563
6625
|
snapshot_id: snapshot.snapshot_id,
|
|
6564
|
-
top_signals:
|
|
6565
|
-
state_traits:
|
|
6566
|
-
feedback_priors:
|
|
6567
|
-
engine_idea_priors:
|
|
6568
|
-
open_objectives:
|
|
6569
|
-
active_cooldowns:
|
|
6626
|
+
top_signals: ideationTopSignals,
|
|
6627
|
+
state_traits: ideationStateTraits,
|
|
6628
|
+
feedback_priors: ideationFeedbackPriors,
|
|
6629
|
+
engine_idea_priors: ideationEngineIdeaPriors,
|
|
6630
|
+
open_objectives: ideationOpenObjectives,
|
|
6631
|
+
active_cooldowns: ideationActiveCooldowns
|
|
6570
6632
|
},
|
|
6571
6633
|
vision: visionContext,
|
|
6572
|
-
repo_targets:
|
|
6634
|
+
repo_targets: ideationRepoTargets.map((target) => ({
|
|
6573
6635
|
component_area: target.component_area,
|
|
6574
6636
|
target_paths: target.target_paths,
|
|
6575
6637
|
write_globs: target.write_globs,
|
|
@@ -55,6 +55,7 @@ _DEFAULT_TASK_SYSTEM_PROMPT_PATH = "workerpals/openai_codex_default_system_promp
|
|
|
55
55
|
_MANDATORY_RUNTIME_POLICY_APPENDIX_PATH = "workerpals/openai_codex_runtime_policy_appendix.md"
|
|
56
56
|
_INSTRUCTION_WRAPPER_PROMPT_PATH = "workerpals/openai_codex_instruction_wrapper.md"
|
|
57
57
|
_SUPPLEMENTAL_GUIDANCE_SECTION_PATH = "workerpals/openai_codex_supplemental_guidance_section.md"
|
|
58
|
+
_COMMAND_ROUTER_POLICY_PATH = "workerpals/openai_codex_command_router_policy.md"
|
|
58
59
|
_CODEX_WORKAROUND_PATTERNS = (
|
|
59
60
|
re.compile(
|
|
60
61
|
r"\bcodex cli\b.{0,120}\b(isn't|is not|not)\b.{0,120}\bavailable\b.{0,120}\b(so|therefore|instead|fallback|workaround|without|using)\b",
|
|
@@ -94,12 +95,7 @@ _VALID_SANDBOX_POLICIES = {"read-only", "workspace-write", "danger-full-access"}
|
|
|
94
95
|
_VALID_COLORS = {"always", "never", "auto"}
|
|
95
96
|
_VALID_AUTH_MODES = {"auto", "api_key", "chatgpt"}
|
|
96
97
|
_VALID_REASONING_EFFORTS = {"low", "medium", "high", "xhigh"}
|
|
97
|
-
|
|
98
|
-
"Command-router policy: use direct commands only. Do not invoke `/bin/bash -lc`, `bash -c`, "
|
|
99
|
-
"`sh -lc`, `cmd /c`, `powershell -Command`, or `pwsh -Command`. Run the direct command "
|
|
100
|
-
"instead, such as `pwd`, `git status --porcelain`, `git diff -- path`, `ls dir`, "
|
|
101
|
-
"`cat file`, `sed -n '1,160p' file`, or `bun test <path>`."
|
|
102
|
-
)
|
|
98
|
+
_MAX_WRAPPER_RECOVERY_ATTEMPTS = 2
|
|
103
99
|
|
|
104
100
|
|
|
105
101
|
def _model_supports_xhigh_reasoning(model: str) -> bool:
|
|
@@ -254,6 +250,79 @@ def _load_prompt_template(
|
|
|
254
250
|
return _PROMPT_TOKEN_REGEX.sub(_replace, template)
|
|
255
251
|
|
|
256
252
|
|
|
253
|
+
def _load_markdown_h2_section(relative_path: str, heading: str) -> str:
|
|
254
|
+
document = _load_prompt_template(relative_path)
|
|
255
|
+
if not document:
|
|
256
|
+
return ""
|
|
257
|
+
lines = document.splitlines()
|
|
258
|
+
needle = f"## {heading}".strip().lower()
|
|
259
|
+
start: Optional[int] = None
|
|
260
|
+
for idx, line in enumerate(lines):
|
|
261
|
+
if line.strip().lower() == needle:
|
|
262
|
+
start = idx + 1
|
|
263
|
+
break
|
|
264
|
+
if start is None:
|
|
265
|
+
return ""
|
|
266
|
+
collected: List[str] = []
|
|
267
|
+
for line in lines[start:]:
|
|
268
|
+
if line.startswith("## "):
|
|
269
|
+
break
|
|
270
|
+
collected.append(line)
|
|
271
|
+
return "\n".join(collected).strip()
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _command_router_policy_guidance() -> str:
|
|
275
|
+
guidance = _load_markdown_h2_section(_COMMAND_ROUTER_POLICY_PATH, "Base Guidance")
|
|
276
|
+
if guidance:
|
|
277
|
+
return guidance
|
|
278
|
+
return (
|
|
279
|
+
"Command-router policy: shell commands are allowed, but invoke the actual command directly "
|
|
280
|
+
"instead of wrapping it with `/bin/bash -lc`, `bash -c`, `sh -lc`, `cmd /c`, "
|
|
281
|
+
"`powershell -Command`, or `pwsh -Command`. If a wrapper command is rejected, rerun its "
|
|
282
|
+
"inner command directly through the command tool."
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def _command_router_recovery_guidance() -> str:
|
|
287
|
+
guidance = _load_markdown_h2_section(_COMMAND_ROUTER_POLICY_PATH, "Recovery Guidance")
|
|
288
|
+
if guidance:
|
|
289
|
+
return guidance
|
|
290
|
+
return (
|
|
291
|
+
"Command-router recovery: the previous attempt retried disallowed shell wrappers.\n"
|
|
292
|
+
"Retry once using shell commands normally, but invoke the inner command directly instead of "
|
|
293
|
+
"wrapping it in `/bin/bash -lc`, `bash -c`, `sh -lc`, `cmd /c`, `powershell -Command`, or "
|
|
294
|
+
"`pwsh -Command`.\n"
|
|
295
|
+
"You are not limited to a fixed allowlist of commands. The constraint is only that command "
|
|
296
|
+
"execution must target the actual program/argv directly rather than a wrapper shell."
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def _command_router_hard_recovery_guidance() -> str:
|
|
301
|
+
guidance = _load_markdown_h2_section(_COMMAND_ROUTER_POLICY_PATH, "Hard Recovery Guidance")
|
|
302
|
+
if guidance:
|
|
303
|
+
return guidance
|
|
304
|
+
return (
|
|
305
|
+
"Command-router escalation: the previous retry still attempted disallowed shell wrappers.\n"
|
|
306
|
+
"Do not invoke `bash`, `/bin/bash`, `sh`, `cmd`, `powershell`, `powershell.exe`, `pwsh`, "
|
|
307
|
+
"or `pwsh.exe` as the command itself on this attempt.\n"
|
|
308
|
+
"Your first command invocation on this retry must be one of the direct replacements listed "
|
|
309
|
+
"below, with no wrapper shell around it.\n"
|
|
310
|
+
"After you re-establish repo context, continue using ordinary shell commands directly "
|
|
311
|
+
"without wrapper shells."
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def _command_router_rejection_detail_intro() -> str:
|
|
316
|
+
guidance = _load_markdown_h2_section(_COMMAND_ROUTER_POLICY_PATH, "Rejection Detail")
|
|
317
|
+
if guidance:
|
|
318
|
+
return guidance
|
|
319
|
+
return (
|
|
320
|
+
"Codex repeatedly attempted disallowed shell-wrapper commands that the command router "
|
|
321
|
+
"rejected. Shell commands are allowed, but wrapper shells are not; invoke the inner "
|
|
322
|
+
"command directly and avoid wrapper retries."
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
|
|
257
326
|
def _to_positive_int(raw: str) -> Optional[int]:
|
|
258
327
|
try:
|
|
259
328
|
parsed = int(raw)
|
|
@@ -1013,7 +1082,7 @@ def _unwrap_shell_wrapper_command(command: str) -> str:
|
|
|
1013
1082
|
return ""
|
|
1014
1083
|
|
|
1015
1084
|
|
|
1016
|
-
def
|
|
1085
|
+
def _build_wrapper_direct_replacements(rejected_commands: List[str]) -> List[str]:
|
|
1017
1086
|
direct_equivalents: List[str] = []
|
|
1018
1087
|
seen: set[str] = set()
|
|
1019
1088
|
for command in rejected_commands:
|
|
@@ -1023,11 +1092,16 @@ def _build_wrapper_recovery_guidance(rejected_commands: List[str]) -> str:
|
|
|
1023
1092
|
continue
|
|
1024
1093
|
seen.add(lowered)
|
|
1025
1094
|
direct_equivalents.append(f"- `{command}` -> `{direct}`")
|
|
1095
|
+
return direct_equivalents
|
|
1096
|
+
|
|
1097
|
+
|
|
1098
|
+
def _build_wrapper_recovery_guidance(rejected_commands: List[str], *, hard: bool = False) -> str:
|
|
1026
1099
|
guidance_lines = [
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1100
|
+
_command_router_hard_recovery_guidance()
|
|
1101
|
+
if hard
|
|
1102
|
+
else _command_router_recovery_guidance()
|
|
1030
1103
|
]
|
|
1104
|
+
direct_equivalents = _build_wrapper_direct_replacements(rejected_commands)
|
|
1031
1105
|
if direct_equivalents:
|
|
1032
1106
|
guidance_lines.append("Use these direct replacements for the rejected commands:")
|
|
1033
1107
|
guidance_lines.extend(direct_equivalents[:6])
|
|
@@ -1064,7 +1138,7 @@ def _augment_supplemental_guidance(supplemental_guidance: List[str]) -> List[str
|
|
|
1064
1138
|
joined = "\n".join(normalized).lower()
|
|
1065
1139
|
if "direct commands only" in joined or "shell-wrapper" in joined or "/bin/bash -lc" in joined:
|
|
1066
1140
|
return normalized
|
|
1067
|
-
return [
|
|
1141
|
+
return [_command_router_policy_guidance(), *normalized]
|
|
1068
1142
|
|
|
1069
1143
|
|
|
1070
1144
|
def _read_text_if_exists(path: Path) -> str:
|
|
@@ -1466,11 +1540,20 @@ def _run_codex_task(
|
|
|
1466
1540
|
log_git_status(repo, log)
|
|
1467
1541
|
|
|
1468
1542
|
if command_policy_rejection_loop:
|
|
1469
|
-
if wrapper_recovery_attempt <
|
|
1470
|
-
|
|
1543
|
+
if wrapper_recovery_attempt < _MAX_WRAPPER_RECOVERY_ATTEMPTS:
|
|
1544
|
+
hard_recovery = wrapper_recovery_attempt >= 1
|
|
1545
|
+
recovery_guidance = _build_wrapper_recovery_guidance(
|
|
1546
|
+
rejected_shell_wrappers,
|
|
1547
|
+
hard=hard_recovery,
|
|
1548
|
+
)
|
|
1471
1549
|
if recovery_guidance:
|
|
1472
1550
|
log.warning(
|
|
1473
|
-
"Codex hit a shell-wrapper rejection loop; retrying once with
|
|
1551
|
+
"Codex hit a shell-wrapper rejection loop; retrying once with "
|
|
1552
|
+
+ (
|
|
1553
|
+
"strict no-wrapper recovery guidance."
|
|
1554
|
+
if hard_recovery
|
|
1555
|
+
else "direct-command recovery guidance."
|
|
1556
|
+
)
|
|
1474
1557
|
)
|
|
1475
1558
|
retry_result = _run_codex_task(
|
|
1476
1559
|
repo,
|
|
@@ -1480,19 +1563,19 @@ def _run_codex_task(
|
|
|
1480
1563
|
baseline_changes=baseline_snapshot,
|
|
1481
1564
|
)
|
|
1482
1565
|
retry_result["usage"] = _merge_usage_records(usage, retry_result.get("usage"))
|
|
1483
|
-
if retry_result.get("ok"):
|
|
1566
|
+
if wrapper_recovery_attempt == 0 and retry_result.get("ok"):
|
|
1484
1567
|
recovered_stdout = str(retry_result.get("stdout") or "").strip()
|
|
1485
1568
|
retry_result["stdout"] = _truncate(
|
|
1486
1569
|
(
|
|
1487
|
-
"Recovered after
|
|
1570
|
+
"Recovered after Codex attempts hit command-router shell-wrapper rejections.\n\n"
|
|
1488
1571
|
f"{recovered_stdout}"
|
|
1489
1572
|
).strip()
|
|
1490
1573
|
)
|
|
1491
|
-
|
|
1574
|
+
elif wrapper_recovery_attempt == 0:
|
|
1492
1575
|
retry_stderr = str(retry_result.get("stderr") or "").strip()
|
|
1493
1576
|
retry_result["stderr"] = _truncate(
|
|
1494
1577
|
(
|
|
1495
|
-
"
|
|
1578
|
+
"Earlier Codex attempts hit command-router shell-wrapper rejections and were retried with stricter recovery guidance.\n\n"
|
|
1496
1579
|
f"{retry_stderr}"
|
|
1497
1580
|
).strip()
|
|
1498
1581
|
)
|
|
@@ -1503,8 +1586,7 @@ def _run_codex_task(
|
|
|
1503
1586
|
else "- (no command details captured)"
|
|
1504
1587
|
)
|
|
1505
1588
|
detail = (
|
|
1506
|
-
"
|
|
1507
|
-
"router rejected. Switch to direct commands only and avoid wrapper retries.\n"
|
|
1589
|
+
f"{_command_router_rejection_detail_intro()}\n"
|
|
1508
1590
|
f"Rejected commands:\n{command_lines}"
|
|
1509
1591
|
)
|
|
1510
1592
|
if last_message:
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import re
|
|
3
|
+
import json
|
|
4
|
+
import subprocess
|
|
3
5
|
import sys
|
|
4
6
|
import unittest
|
|
5
7
|
import tempfile
|
|
8
|
+
from unittest import mock
|
|
6
9
|
from pathlib import Path
|
|
7
10
|
|
|
8
11
|
_HERE = Path(__file__).resolve().parent
|
|
@@ -21,6 +24,8 @@ from executor_base import (
|
|
|
21
24
|
from openai_codex_executor import (
|
|
22
25
|
OpenAICodexRuntimeConfig,
|
|
23
26
|
_augment_supplemental_guidance,
|
|
27
|
+
_build_wrapper_recovery_guidance,
|
|
28
|
+
_run_codex_task,
|
|
24
29
|
_resolve_reasoning_effort,
|
|
25
30
|
_build_instruction,
|
|
26
31
|
_collect_disallowed_shell_wrapper_rejections,
|
|
@@ -266,10 +271,116 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
266
271
|
def test_augments_guidance_with_direct_command_policy_once(self) -> None:
|
|
267
272
|
guidance = _augment_supplemental_guidance(["Run bun test tests/example.test.ts"])
|
|
268
273
|
self.assertGreaterEqual(len(guidance), 2)
|
|
269
|
-
self.assertIn("
|
|
274
|
+
self.assertIn("shell commands are allowed", guidance[0].lower())
|
|
270
275
|
guidance_again = _augment_supplemental_guidance(guidance)
|
|
271
276
|
self.assertEqual(guidance_again, guidance)
|
|
272
277
|
|
|
278
|
+
def test_wrapper_recovery_guidance_allows_arbitrary_shell_commands_without_wrappers(self) -> None:
|
|
279
|
+
guidance = _build_wrapper_recovery_guidance(
|
|
280
|
+
["/bin/bash -lc 'git status --porcelain'", "/bin/bash -lc pwd"]
|
|
281
|
+
)
|
|
282
|
+
lowered = guidance.lower()
|
|
283
|
+
self.assertIn("shell commands normally", lowered)
|
|
284
|
+
self.assertIn("not limited to a fixed allowlist", lowered)
|
|
285
|
+
self.assertIn("`/bin/bash -lc 'git status --porcelain'` -> `git status --porcelain`", guidance)
|
|
286
|
+
|
|
287
|
+
def test_wrapper_hard_recovery_guidance_requires_direct_replacements_first(self) -> None:
|
|
288
|
+
guidance = _build_wrapper_recovery_guidance(
|
|
289
|
+
["/bin/bash -lc 'git status --porcelain'", "/bin/bash -lc pwd"],
|
|
290
|
+
hard=True,
|
|
291
|
+
)
|
|
292
|
+
lowered = guidance.lower()
|
|
293
|
+
self.assertIn("previous retry still attempted disallowed shell wrappers", lowered)
|
|
294
|
+
self.assertIn("do not invoke `bash`", lowered)
|
|
295
|
+
self.assertIn("first command invocation on this retry must be one of the direct replacements", lowered)
|
|
296
|
+
self.assertIn("`/bin/bash -lc 'git status --porcelain'` -> `git status --porcelain`", guidance)
|
|
297
|
+
|
|
298
|
+
def test_run_codex_task_escalates_wrapper_recovery_and_recovers(self) -> None:
|
|
299
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-wrapper-recovery-") as temp_dir:
|
|
300
|
+
repo = Path(temp_dir) / "repo"
|
|
301
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
302
|
+
(repo / "README.md").write_text("# wrapper recovery test\n", encoding="utf-8")
|
|
303
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
304
|
+
subprocess.run(
|
|
305
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
306
|
+
cwd=repo,
|
|
307
|
+
check=True,
|
|
308
|
+
capture_output=True,
|
|
309
|
+
text=True,
|
|
310
|
+
)
|
|
311
|
+
subprocess.run(
|
|
312
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
313
|
+
cwd=repo,
|
|
314
|
+
check=True,
|
|
315
|
+
capture_output=True,
|
|
316
|
+
text=True,
|
|
317
|
+
)
|
|
318
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
319
|
+
subprocess.run(
|
|
320
|
+
["git", "commit", "-m", "chore: seed wrapper recovery repo"],
|
|
321
|
+
cwd=repo,
|
|
322
|
+
check=True,
|
|
323
|
+
capture_output=True,
|
|
324
|
+
text=True,
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
stub_path = Path(temp_dir) / "fake_codex_wrapper_recovery.py"
|
|
328
|
+
stub_path.write_text(
|
|
329
|
+
"\n".join(
|
|
330
|
+
[
|
|
331
|
+
"from pathlib import Path",
|
|
332
|
+
"import sys",
|
|
333
|
+
"import time",
|
|
334
|
+
"",
|
|
335
|
+
"argv = sys.argv[1:]",
|
|
336
|
+
"last_message_path = None",
|
|
337
|
+
"for index, arg in enumerate(argv):",
|
|
338
|
+
" if arg == '--output-last-message' and index + 1 < len(argv):",
|
|
339
|
+
" last_message_path = argv[index + 1]",
|
|
340
|
+
" break",
|
|
341
|
+
"",
|
|
342
|
+
"prompt = sys.stdin.read()",
|
|
343
|
+
"hard_marker = 'Your first command invocation on this retry must be one of the direct replacements listed below'",
|
|
344
|
+
"if hard_marker in prompt:",
|
|
345
|
+
" if last_message_path:",
|
|
346
|
+
" Path(last_message_path).write_text(",
|
|
347
|
+
" 'Recovered by switching to direct commands after strict wrapper recovery.',",
|
|
348
|
+
" encoding='utf-8',",
|
|
349
|
+
" )",
|
|
350
|
+
" print('item.completed | Used direct commands after strict recovery guidance.', flush=True)",
|
|
351
|
+
" sys.exit(0)",
|
|
352
|
+
"",
|
|
353
|
+
"for line in (",
|
|
354
|
+
" 'error=exec_command failed for `/bin/bash -lc pwd`: CreateProcess { message: \"Rejected\" }',",
|
|
355
|
+
" 'error=exec_command failed for `/bin/bash -lc \\'git branch --show-current\\'`: CreateProcess { message: \"Rejected\" }',",
|
|
356
|
+
" 'error=exec_command failed for `/bin/bash -lc ls`: CreateProcess { message: \"Rejected\" }',",
|
|
357
|
+
" 'error=exec_command failed for `/bin/bash -lc \\'git status --porcelain\\'`: CreateProcess { message: \"Rejected\" }',",
|
|
358
|
+
"):",
|
|
359
|
+
" print(line, file=sys.stderr, flush=True)",
|
|
360
|
+
"time.sleep(10)",
|
|
361
|
+
]
|
|
362
|
+
),
|
|
363
|
+
encoding="utf-8",
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
env_overrides = {
|
|
367
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
368
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
369
|
+
"OPENAI_API_KEY": "pushpals-wrapper-recovery-test-key",
|
|
370
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "10",
|
|
371
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
372
|
+
}
|
|
373
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
374
|
+
result = _run_codex_task(
|
|
375
|
+
str(repo),
|
|
376
|
+
"Inspect the repo and report the current branch.",
|
|
377
|
+
[],
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
self.assertTrue(result.get("ok"), result)
|
|
381
|
+
self.assertIn("Recovered after Codex attempts hit command-router shell-wrapper rejections.", str(result.get("stdout") or ""))
|
|
382
|
+
self.assertIn("strict wrapper recovery", str(result.get("stdout") or "").lower())
|
|
383
|
+
|
|
273
384
|
def test_usage_falls_back_to_estimate_when_trace_has_no_usage(self) -> None:
|
|
274
385
|
usage = _usage_from_trace_or_estimate({}, "abc" * 30, "done", model="gpt-5.4")
|
|
275
386
|
self.assertTrue(usage["estimated"])
|
|
@@ -757,11 +757,17 @@ export class DockerExecutor {
|
|
|
757
757
|
if (backend !== "openai_codex") return [];
|
|
758
758
|
|
|
759
759
|
const hostCodexHomeRaw = (process.env.PUSHPALS_OPENAI_CODEX_HOST_CODEX_HOME || "").trim();
|
|
760
|
+
if (hostCodexHomeRaw && !isAbsolute(hostCodexHomeRaw)) {
|
|
761
|
+
console.warn(
|
|
762
|
+
`[DockerExecutor] Ignoring relative PUSHPALS_OPENAI_CODEX_HOST_CODEX_HOME=${hostCodexHomeRaw}; using ${resolve(
|
|
763
|
+
homedir(),
|
|
764
|
+
".codex",
|
|
765
|
+
)} so Codex state stays outside the repo worktree.`,
|
|
766
|
+
);
|
|
767
|
+
}
|
|
760
768
|
const hostCodexHome = (
|
|
761
|
-
hostCodexHomeRaw
|
|
762
|
-
?
|
|
763
|
-
? hostCodexHomeRaw
|
|
764
|
-
: resolve(this.options.repo, hostCodexHomeRaw)
|
|
769
|
+
hostCodexHomeRaw && isAbsolute(hostCodexHomeRaw)
|
|
770
|
+
? hostCodexHomeRaw
|
|
765
771
|
: resolve(homedir(), ".codex")
|
|
766
772
|
).trim();
|
|
767
773
|
if (!hostCodexHome) return [];
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* Used by both the host Worker (direct mode) and the Docker job runner.
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
|
-
import { existsSync, readFileSync, unlinkSync } from "fs";
|
|
6
|
+
import { existsSync, readFileSync, rmSync, unlinkSync } from "fs";
|
|
7
7
|
import { resolve } from "path";
|
|
8
8
|
import {
|
|
9
9
|
deriveAutonomyComponentArea,
|
|
@@ -2606,6 +2606,41 @@ export async function syncHiddenRefWithRemoteBranchByRebase(
|
|
|
2606
2606
|
publicBranchName: string,
|
|
2607
2607
|
jobId: string,
|
|
2608
2608
|
): Promise<{ ok: true; sha: string } | { ok: false; error: string }> {
|
|
2609
|
+
const scrubKnownPreSyncArtifacts = async (): Promise<
|
|
2610
|
+
{ ok: true } | { ok: false; error: string }
|
|
2611
|
+
> => {
|
|
2612
|
+
const codexPath = resolve(repo, ".codex");
|
|
2613
|
+
if (!existsSync(codexPath)) return { ok: true };
|
|
2614
|
+
|
|
2615
|
+
const trackedCodex = await git(repo, ["ls-files", "--error-unmatch", "--", ".codex"]);
|
|
2616
|
+
if (trackedCodex.ok) {
|
|
2617
|
+
return {
|
|
2618
|
+
ok: false,
|
|
2619
|
+
error:
|
|
2620
|
+
"Tracked .codex path blocks branch sync. Move Codex state outside the repo worktree before retrying.",
|
|
2621
|
+
};
|
|
2622
|
+
}
|
|
2623
|
+
|
|
2624
|
+
try {
|
|
2625
|
+
rmSync(codexPath, { recursive: true, force: true });
|
|
2626
|
+
} catch (error) {
|
|
2627
|
+
return {
|
|
2628
|
+
ok: false,
|
|
2629
|
+
error: `Failed to scrub transient .codex artifact before branch sync: ${String(error)}`,
|
|
2630
|
+
};
|
|
2631
|
+
}
|
|
2632
|
+
|
|
2633
|
+
if (existsSync(codexPath)) {
|
|
2634
|
+
return {
|
|
2635
|
+
ok: false,
|
|
2636
|
+
error: "Failed to scrub transient .codex artifact before branch sync: path still exists.",
|
|
2637
|
+
};
|
|
2638
|
+
}
|
|
2639
|
+
|
|
2640
|
+
console.warn("[WorkerPals] Removed transient .codex artifact before branch sync.");
|
|
2641
|
+
return { ok: true };
|
|
2642
|
+
};
|
|
2643
|
+
|
|
2609
2644
|
const pullRebaseNonInteractive = () =>
|
|
2610
2645
|
git(repo, [
|
|
2611
2646
|
"-c",
|
|
@@ -2652,6 +2687,10 @@ export async function syncHiddenRefWithRemoteBranchByRebase(
|
|
|
2652
2687
|
const maxPullRebaseAttempts = 5;
|
|
2653
2688
|
let syncedWithRemote = false;
|
|
2654
2689
|
for (let attempt = 1; attempt <= maxPullRebaseAttempts; attempt++) {
|
|
2690
|
+
const preSyncGuard = await scrubKnownPreSyncArtifacts();
|
|
2691
|
+
if (!preSyncGuard.ok) {
|
|
2692
|
+
return { ok: false, error: preSyncGuard.error };
|
|
2693
|
+
}
|
|
2655
2694
|
let pullRebase = await pullRebaseNonInteractive();
|
|
2656
2695
|
if (!pullRebase.ok && isPullRebaseDirtyWorkingTreeOutput(combinedGitOutput(pullRebase))) {
|
|
2657
2696
|
// Recover from dirty index/worktree left by previous attempts and retry non-interactively.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
## Base Guidance
|
|
2
|
+
Command-router policy: shell commands are allowed, but invoke the actual command directly instead of wrapping it with `/bin/bash -lc`, `bash -c`, `sh -lc`, `cmd /c`, `powershell -Command`, or `pwsh -Command`. If a wrapper command is rejected, rerun its inner command directly through the command tool.
|
|
3
|
+
|
|
4
|
+
## Recovery Guidance
|
|
5
|
+
Command-router recovery: the previous attempt retried disallowed shell wrappers.
|
|
6
|
+
Retry once using shell commands normally, but invoke the inner command directly instead of wrapping it in `/bin/bash -lc`, `bash -c`, `sh -lc`, `cmd /c`, `powershell -Command`, or `pwsh -Command`.
|
|
7
|
+
You are not limited to a fixed allowlist of commands. The constraint is only that command execution must target the actual program/argv directly rather than a wrapper shell.
|
|
8
|
+
|
|
9
|
+
## Hard Recovery Guidance
|
|
10
|
+
Command-router escalation: the previous retry still attempted disallowed shell wrappers.
|
|
11
|
+
Do not invoke `bash`, `/bin/bash`, `sh`, `cmd`, `powershell`, `powershell.exe`, `pwsh`, or `pwsh.exe` as the command itself on this attempt.
|
|
12
|
+
Your first command invocation on this retry must be one of the direct replacements listed below, with no wrapper shell around it.
|
|
13
|
+
After you re-establish repo context, continue using ordinary shell commands directly without wrapper shells.
|
|
14
|
+
|
|
15
|
+
## Rejection Detail
|
|
16
|
+
Codex repeatedly attempted disallowed shell-wrapper commands that the command router rejected. Shell commands are allowed, but wrapper shells are not; invoke the inner command directly and avoid wrapper retries.
|