@pushpalsdev/cli 1.1.37 → 1.1.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +107 -3
- package/package.json +1 -1
- package/runtime/sandbox/.pushpals-remotebuddy-fallback.js +30 -3
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +297 -11
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +291 -2
- package/runtime/sandbox/apps/workerpals/src/common/generic_python_executor.ts +4 -4
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +10 -1
package/dist/pushpals-cli.js
CHANGED
|
@@ -2169,6 +2169,97 @@ function withWindowsGitSchannelEnv(env, platform = process.platform) {
|
|
|
2169
2169
|
return env;
|
|
2170
2170
|
return appendGitConfigEnv(env, "http.sslBackend", "schannel");
|
|
2171
2171
|
}
|
|
2172
|
+
var WINDOWS_NODE_EXTRA_CA_CERTS_DISABLE_ENV = "PUSHPALS_DISABLE_WINDOWS_NODE_EXTRA_CA_CERTS";
|
|
2173
|
+
var WINDOWS_NODE_EXTRA_CA_CERTS_BUNDLE_RELATIVE_PATH = ["certs", "windows-root-ca.pem"];
|
|
2174
|
+
function resolveWindowsNodeExtraCaCertsBundlePath(runtimeRoot) {
|
|
2175
|
+
return join2(runtimeRoot, ...WINDOWS_NODE_EXTRA_CA_CERTS_BUNDLE_RELATIVE_PATH);
|
|
2176
|
+
}
|
|
2177
|
+
function hasUsablePemCertificate(pathValue) {
|
|
2178
|
+
try {
|
|
2179
|
+
return /-----BEGIN CERTIFICATE-----/.test(readFileSync4(pathValue, "utf8"));
|
|
2180
|
+
} catch {
|
|
2181
|
+
return false;
|
|
2182
|
+
}
|
|
2183
|
+
}
|
|
2184
|
+
function ensureWindowsNodeExtraCaCertsBundle(outPath, env) {
|
|
2185
|
+
if (hasUsablePemCertificate(outPath))
|
|
2186
|
+
return outPath;
|
|
2187
|
+
const outDir = dirname(outPath);
|
|
2188
|
+
try {
|
|
2189
|
+
mkdirSync(outDir, { recursive: true });
|
|
2190
|
+
} catch {
|
|
2191
|
+
return "";
|
|
2192
|
+
}
|
|
2193
|
+
const script = String.raw`
|
|
2194
|
+
$ErrorActionPreference = "Stop"
|
|
2195
|
+
$outPath = $env:PUSHPALS_WINDOWS_NODE_EXTRA_CA_CERTS_OUT
|
|
2196
|
+
if (-not $outPath) { throw "PUSHPALS_WINDOWS_NODE_EXTRA_CA_CERTS_OUT is required" }
|
|
2197
|
+
$outDir = Split-Path -Parent $outPath
|
|
2198
|
+
if ($outDir) { [System.IO.Directory]::CreateDirectory($outDir) | Out-Null }
|
|
2199
|
+
$stores = @("Cert:\CurrentUser\Root", "Cert:\LocalMachine\Root")
|
|
2200
|
+
$seen = @{}
|
|
2201
|
+
$lines = New-Object System.Collections.Generic.List[string]
|
|
2202
|
+
foreach ($store in $stores) {
|
|
2203
|
+
if (-not (Test-Path $store)) { continue }
|
|
2204
|
+
foreach ($cert in Get-ChildItem $store) {
|
|
2205
|
+
if (-not $cert.RawData) { continue }
|
|
2206
|
+
if ($cert.NotAfter -lt (Get-Date)) { continue }
|
|
2207
|
+
$thumbprint = [string]$cert.Thumbprint
|
|
2208
|
+
if ($seen.ContainsKey($thumbprint)) { continue }
|
|
2209
|
+
$seen[$thumbprint] = $true
|
|
2210
|
+
$lines.Add("-----BEGIN CERTIFICATE-----")
|
|
2211
|
+
$encoded = [Convert]::ToBase64String($cert.RawData, [Base64FormattingOptions]::InsertLineBreaks)
|
|
2212
|
+
foreach ($line in [regex]::Split($encoded, '\r?\n')) {
|
|
2213
|
+
if ($line) { $lines.Add($line) }
|
|
2214
|
+
}
|
|
2215
|
+
$lines.Add("-----END CERTIFICATE-----")
|
|
2216
|
+
}
|
|
2217
|
+
}
|
|
2218
|
+
if ($lines.Count -eq 0) { throw "No Windows root certificates found" }
|
|
2219
|
+
[System.IO.File]::WriteAllLines($outPath, $lines, [System.Text.Encoding]::ASCII)
|
|
2220
|
+
`;
|
|
2221
|
+
const encodedScript = Buffer.from(script, "utf16le").toString("base64");
|
|
2222
|
+
const childEnv = normalizeChildProcessEnv({
|
|
2223
|
+
...env,
|
|
2224
|
+
PUSHPALS_WINDOWS_NODE_EXTRA_CA_CERTS_OUT: outPath
|
|
2225
|
+
});
|
|
2226
|
+
const result = Bun.spawnSync([
|
|
2227
|
+
"powershell.exe",
|
|
2228
|
+
"-NoProfile",
|
|
2229
|
+
"-NonInteractive",
|
|
2230
|
+
"-ExecutionPolicy",
|
|
2231
|
+
"Bypass",
|
|
2232
|
+
"-EncodedCommand",
|
|
2233
|
+
encodedScript
|
|
2234
|
+
], {
|
|
2235
|
+
cwd: process.cwd(),
|
|
2236
|
+
env: childEnv,
|
|
2237
|
+
stdout: "pipe",
|
|
2238
|
+
stderr: "pipe"
|
|
2239
|
+
});
|
|
2240
|
+
if (result.exitCode !== 0)
|
|
2241
|
+
return "";
|
|
2242
|
+
return hasUsablePemCertificate(outPath) ? outPath : "";
|
|
2243
|
+
}
|
|
2244
|
+
function withWindowsNodeExtraCaCertsEnv(env, opts) {
|
|
2245
|
+
const platform = opts.platform ?? process.platform;
|
|
2246
|
+
if (platform !== "win32")
|
|
2247
|
+
return env;
|
|
2248
|
+
if (parseBooleanFlag(env[WINDOWS_NODE_EXTRA_CA_CERTS_DISABLE_ENV]) === true)
|
|
2249
|
+
return env;
|
|
2250
|
+
if (typeof env.NODE_EXTRA_CA_CERTS === "string" && env.NODE_EXTRA_CA_CERTS.trim())
|
|
2251
|
+
return env;
|
|
2252
|
+
const runtimeRoot = String(opts.runtimeRoot ?? "").trim();
|
|
2253
|
+
if (!runtimeRoot || !existsSync5(runtimeRoot))
|
|
2254
|
+
return env;
|
|
2255
|
+
const bundlePath = ensureWindowsNodeExtraCaCertsBundle(resolveWindowsNodeExtraCaCertsBundlePath(runtimeRoot), env);
|
|
2256
|
+
if (!bundlePath)
|
|
2257
|
+
return env;
|
|
2258
|
+
return {
|
|
2259
|
+
...env,
|
|
2260
|
+
NODE_EXTRA_CA_CERTS: bundlePath
|
|
2261
|
+
};
|
|
2262
|
+
}
|
|
2172
2263
|
async function runGitWithEnv(args, cwd, env, timeoutMs) {
|
|
2173
2264
|
return await runCommandWithEnv(["git", ...args], cwd, withWindowsGitSchannelEnv(env), timeoutMs);
|
|
2174
2265
|
}
|
|
@@ -2844,7 +2935,11 @@ function buildEmbeddedRuntimeEnv(baseEnv, opts) {
|
|
|
2844
2935
|
...typeof env.PUSHPALS_DOCKER_BIN === "string" && env.PUSHPALS_DOCKER_BIN.trim() ? { PUSHPALS_DOCKER_BIN: env.PUSHPALS_DOCKER_BIN.trim() } : {},
|
|
2845
2936
|
...typeof env.PUSHPALS_DOCKER_BIN_ABSOLUTE === "string" && env.PUSHPALS_DOCKER_BIN_ABSOLUTE.trim() ? { PUSHPALS_DOCKER_BIN_ABSOLUTE: env.PUSHPALS_DOCKER_BIN_ABSOLUTE.trim() } : {}
|
|
2846
2937
|
};
|
|
2847
|
-
|
|
2938
|
+
const runtimeEnvWithWindowsCa = withWindowsNodeExtraCaCertsEnv(runtimeEnv, {
|
|
2939
|
+
platform,
|
|
2940
|
+
runtimeRoot: opts.runtimeRoot
|
|
2941
|
+
});
|
|
2942
|
+
return withWindowsGitSchannelEnv(runtimeEnvWithWindowsCa, platform);
|
|
2848
2943
|
}
|
|
2849
2944
|
function parseBooleanFlag(raw) {
|
|
2850
2945
|
const normalized = String(raw ?? "").trim().toLowerCase();
|
|
@@ -6034,10 +6129,13 @@ ${line}
|
|
|
6034
6129
|
console.log("[pushpals] Runtime-only mode is active. Send `exit` on stdin or terminate the process to stop.");
|
|
6035
6130
|
await new Promise((resolveStop) => {
|
|
6036
6131
|
let resolved = false;
|
|
6132
|
+
let exitRequestedFromInput = false;
|
|
6133
|
+
const keepAlive = setInterval(() => {}, 60000);
|
|
6037
6134
|
const finish = () => {
|
|
6038
6135
|
if (resolved)
|
|
6039
6136
|
return;
|
|
6040
6137
|
resolved = true;
|
|
6138
|
+
clearInterval(keepAlive);
|
|
6041
6139
|
resolveStop();
|
|
6042
6140
|
};
|
|
6043
6141
|
process.once("SIGINT", finish);
|
|
@@ -6050,13 +6148,17 @@ ${line}
|
|
|
6050
6148
|
runtimeOnlyInput.on("line", (line) => {
|
|
6051
6149
|
if (!isCliExitCommand(line))
|
|
6052
6150
|
return;
|
|
6151
|
+
exitRequestedFromInput = true;
|
|
6053
6152
|
requestStop();
|
|
6054
6153
|
runtimeOnlyInput.close();
|
|
6055
6154
|
finish();
|
|
6056
6155
|
});
|
|
6057
6156
|
runtimeOnlyInput.on("close", () => {
|
|
6058
|
-
|
|
6059
|
-
|
|
6157
|
+
if (exitRequestedFromInput || resolved) {
|
|
6158
|
+
finish();
|
|
6159
|
+
return;
|
|
6160
|
+
}
|
|
6161
|
+
console.log("[pushpals] Runtime-only stdin closed; continuing until terminated.");
|
|
6060
6162
|
});
|
|
6061
6163
|
});
|
|
6062
6164
|
await requestStop();
|
|
@@ -6138,6 +6240,7 @@ if (import.meta.main) {
|
|
|
6138
6240
|
});
|
|
6139
6241
|
}
|
|
6140
6242
|
export {
|
|
6243
|
+
withWindowsNodeExtraCaCertsEnv,
|
|
6141
6244
|
waitForWorkerpalCapacity,
|
|
6142
6245
|
waitForRemoteBuddySessionConsumer,
|
|
6143
6246
|
startEmbeddedMonitoringHub,
|
|
@@ -6154,6 +6257,7 @@ export {
|
|
|
6154
6257
|
resolveWorkerExecutionReadiness,
|
|
6155
6258
|
resolveWindowsWhereExecutableCandidatesForEnv,
|
|
6156
6259
|
resolveWindowsShellExecutableCandidatesForEnv,
|
|
6260
|
+
resolveWindowsNodeExtraCaCertsBundlePath,
|
|
6157
6261
|
resolveWindowsFreshRuntimeWorkerpalPrewarmDelayMs,
|
|
6158
6262
|
resolveRuntimeGitExecutableCandidates,
|
|
6159
6263
|
resolveRuntimeDockerExecutableCandidates,
|
package/package.json
CHANGED
|
@@ -6545,6 +6545,33 @@ function sanitizeForGitRef(value) {
|
|
|
6545
6545
|
const text = value.trim().replace(/[^A-Za-z0-9._-]/g, "-");
|
|
6546
6546
|
return text || "default";
|
|
6547
6547
|
}
|
|
6548
|
+
function isSafeGitBranchName(value) {
|
|
6549
|
+
const text = String(value ?? "").trim();
|
|
6550
|
+
if (!text || text.length > 200)
|
|
6551
|
+
return false;
|
|
6552
|
+
if (text.startsWith("-") || text.startsWith("/") || text.endsWith("/"))
|
|
6553
|
+
return false;
|
|
6554
|
+
if (text.endsWith(".") || text.endsWith(".lock"))
|
|
6555
|
+
return false;
|
|
6556
|
+
if (text.includes("..") || text.includes("//") || text.includes("@{"))
|
|
6557
|
+
return false;
|
|
6558
|
+
return !/[\\\s~^:?*\[\]\x00-\x1F\x7F]/.test(text);
|
|
6559
|
+
}
|
|
6560
|
+
function normalizeConfiguredGitBranchName(value, fallback, label = "branch") {
|
|
6561
|
+
const candidate = String(value ?? "").trim();
|
|
6562
|
+
if (isSafeGitBranchName(candidate))
|
|
6563
|
+
return candidate;
|
|
6564
|
+
const safeFallback = isSafeGitBranchName(fallback) ? fallback : "main";
|
|
6565
|
+
console.warn(`[RemoteBuddyAutonomousEngine] Ignoring unsafe ${label} ref ${JSON.stringify(candidate)}; using ${safeFallback}.`);
|
|
6566
|
+
return safeFallback;
|
|
6567
|
+
}
|
|
6568
|
+
function normalizeConfiguredGitRemoteName(value, fallback = "origin") {
|
|
6569
|
+
const candidate = String(value ?? "").trim();
|
|
6570
|
+
if (/^[A-Za-z0-9._-]+$/.test(candidate) && !candidate.startsWith("-"))
|
|
6571
|
+
return candidate;
|
|
6572
|
+
console.warn(`[RemoteBuddyAutonomousEngine] Ignoring unsafe git remote ${JSON.stringify(candidate)}; using ${fallback}.`);
|
|
6573
|
+
return fallback;
|
|
6574
|
+
}
|
|
6548
6575
|
async function repoPreflight(repo) {
|
|
6549
6576
|
const porcelain = await gitOutput(repo, ["status", "--porcelain"]);
|
|
6550
6577
|
const mergeHead = await gitOutput(repo, ["rev-parse", "-q", "--verify", "MERGE_HEAD"]);
|
|
@@ -6594,9 +6621,9 @@ class RemoteBuddyAutonomousEngine {
|
|
|
6594
6621
|
const safeSession = sanitizeForGitRef(this.sessionId).slice(0, 40);
|
|
6595
6622
|
this.autonomyRepo = resolve4(this.repoRoot, ".worktrees", `remotebuddy-autonomy-${safeSession}`);
|
|
6596
6623
|
this.autonomyBranch = `_remotebuddy/autonomy-${safeSession}`;
|
|
6597
|
-
this.gitRemote = String(opts.config.sourceControlManager.remote || "origin")
|
|
6598
|
-
this.integrationBranch = String(opts.config.sourceControlManager.mainBranch || "main_agents")
|
|
6599
|
-
this.baseBranch = String(opts.config.sourceControlManager.baseBranch || "main")
|
|
6624
|
+
this.gitRemote = normalizeConfiguredGitRemoteName(String(opts.config.sourceControlManager.remote || "origin"), "origin");
|
|
6625
|
+
this.integrationBranch = normalizeConfiguredGitBranchName(String(opts.config.sourceControlManager.mainBranch || "main_agents"), "main_agents", "integration branch");
|
|
6626
|
+
this.baseBranch = normalizeConfiguredGitBranchName(String(opts.config.sourceControlManager.baseBranch || "main"), "main", "base branch");
|
|
6600
6627
|
this.llm = opts.llm;
|
|
6601
6628
|
this.comm = opts.comm;
|
|
6602
6629
|
this.llmCfg = opts.config.remotebuddy.llm;
|
|
@@ -116,7 +116,10 @@ _WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
|
|
|
116
116
|
_BACKGROUND_NO_EDIT_WATCHDOG_S = 120
|
|
117
117
|
_NO_EDIT_RECOVERY_WATCHDOG_S = 90
|
|
118
118
|
_DEFAULT_NO_EDIT_RECHECK_S = 120
|
|
119
|
+
_NO_EDIT_RECOVERY_RECHECK_S = 30
|
|
119
120
|
_DEFAULT_NO_EDIT_COMMAND_GRACE_S = 240
|
|
121
|
+
_DEFAULT_NO_EDIT_COMMAND_PROGRESS_CAP_S = 360
|
|
122
|
+
_NO_EDIT_RECOVERY_COMMAND_PROGRESS_CAP_S = 120
|
|
120
123
|
_DEFAULT_STARTUP_STALL_WATCHDOG_S = 210
|
|
121
124
|
_RECOVERY_STARTUP_STALL_WATCHDOG_S = 150
|
|
122
125
|
_DEFAULT_ROLLOUT_WATCHDOG_S = 300
|
|
@@ -124,6 +127,8 @@ _SMALL_TASK_ROLLOUT_WATCHDOG_S = 240
|
|
|
124
127
|
_NARROW_TEST_TASK_ROLLOUT_WATCHDOG_S = 150
|
|
125
128
|
_WEB_REVIEW_ROLLOUT_WATCHDOG_S = 180
|
|
126
129
|
_BACKGROUND_ROLLOUT_WATCHDOG_S = 90
|
|
130
|
+
_MIN_AUTO_WATCHDOG_TIMEOUT_S = 180
|
|
131
|
+
_MIN_CODEX_RECOVERY_ATTEMPT_S = 120
|
|
127
132
|
_NO_PUBLISHABLE_FAILURE_COOLDOWN_MS = 10 * 60 * 1000
|
|
128
133
|
_CODEX_STARTUP_ONLY_EVENT_TYPES = {"thread.started", "turn.started"}
|
|
129
134
|
|
|
@@ -609,11 +614,19 @@ def _looks_like_small_task_prompt(prompt: str) -> bool:
|
|
|
609
614
|
"contract-level tests",
|
|
610
615
|
"contract around",
|
|
611
616
|
"contract coverage",
|
|
617
|
+
"focused contract coverage",
|
|
612
618
|
"ranking contract",
|
|
613
619
|
"regression coverage",
|
|
620
|
+
"focused coverage",
|
|
614
621
|
"focused regression",
|
|
615
622
|
"focused scenario",
|
|
616
623
|
"targeted test",
|
|
624
|
+
"small deterministic",
|
|
625
|
+
"review-fix",
|
|
626
|
+
"review fix",
|
|
627
|
+
"rejected pr",
|
|
628
|
+
"must-fix",
|
|
629
|
+
"cleanup harness",
|
|
617
630
|
"one-file",
|
|
618
631
|
"one file",
|
|
619
632
|
"single-file",
|
|
@@ -648,13 +661,19 @@ def _looks_like_narrow_test_task_prompt(prompt: str) -> bool:
|
|
|
648
661
|
"contract-level tests",
|
|
649
662
|
"contract around",
|
|
650
663
|
"contract coverage",
|
|
664
|
+
"focused contract coverage",
|
|
651
665
|
"ranking contract",
|
|
652
666
|
"regression coverage",
|
|
667
|
+
"focused coverage",
|
|
668
|
+
"focused test",
|
|
669
|
+
"focused tests",
|
|
670
|
+
"focused testing",
|
|
653
671
|
"focused regression",
|
|
654
672
|
"test-only",
|
|
655
673
|
"test only",
|
|
656
674
|
"targeted test",
|
|
657
675
|
"focused scenario",
|
|
676
|
+
"cleanup harness",
|
|
658
677
|
)
|
|
659
678
|
if not any(marker in text for marker in narrow_markers):
|
|
660
679
|
return False
|
|
@@ -668,6 +687,13 @@ def _looks_like_narrow_test_task_prompt(prompt: str) -> bool:
|
|
|
668
687
|
return not any(marker in text for marker in broad_markers)
|
|
669
688
|
|
|
670
689
|
|
|
690
|
+
def _minimum_recovery_attempt_seconds(requested_timeout_s: Optional[int]) -> int:
|
|
691
|
+
if not requested_timeout_s or requested_timeout_s <= 0:
|
|
692
|
+
return _MIN_CODEX_RECOVERY_ATTEMPT_S
|
|
693
|
+
scaled_s = max(1, int(requested_timeout_s * 0.25))
|
|
694
|
+
return max(1, min(_MIN_CODEX_RECOVERY_ATTEMPT_S, scaled_s))
|
|
695
|
+
|
|
696
|
+
|
|
671
697
|
def _resolve_task_reasoning_effort(
|
|
672
698
|
configured_effort: str,
|
|
673
699
|
prompt: str,
|
|
@@ -720,7 +746,7 @@ def _resolve_no_edit_watchdog_seconds(
|
|
|
720
746
|
else:
|
|
721
747
|
return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
|
|
722
748
|
|
|
723
|
-
if communicate_timeout_s <
|
|
749
|
+
if communicate_timeout_s < _MIN_AUTO_WATCHDOG_TIMEOUT_S:
|
|
724
750
|
return None
|
|
725
751
|
|
|
726
752
|
prompt_text = str(prompt or "").lower()
|
|
@@ -743,7 +769,10 @@ def _resolve_no_edit_watchdog_seconds(
|
|
|
743
769
|
return max(floor_s, min(default_s, max(floor_s, communicate_timeout_s - 60)))
|
|
744
770
|
|
|
745
771
|
|
|
746
|
-
def _resolve_no_edit_recheck_seconds(
|
|
772
|
+
def _resolve_no_edit_recheck_seconds(
|
|
773
|
+
communicate_timeout_s: Optional[int],
|
|
774
|
+
recovery_attempt: int = 0,
|
|
775
|
+
) -> int:
|
|
747
776
|
raw = os.environ.get("WORKERPALS_OPENAI_CODEX_NO_EDIT_RECHECK_S", "").strip()
|
|
748
777
|
if raw:
|
|
749
778
|
parsed = _to_positive_int(raw)
|
|
@@ -754,8 +783,13 @@ def _resolve_no_edit_recheck_seconds(communicate_timeout_s: Optional[int]) -> in
|
|
|
754
783
|
else:
|
|
755
784
|
upper = max(1, (communicate_timeout_s or parsed + 1) - 1)
|
|
756
785
|
return max(1, min(parsed, upper))
|
|
757
|
-
|
|
758
|
-
|
|
786
|
+
default_s = (
|
|
787
|
+
_NO_EDIT_RECOVERY_RECHECK_S
|
|
788
|
+
if recovery_attempt > 0
|
|
789
|
+
else _DEFAULT_NO_EDIT_RECHECK_S
|
|
790
|
+
)
|
|
791
|
+
upper = max(1, (communicate_timeout_s or default_s + 1) - 1)
|
|
792
|
+
return max(1, min(default_s, upper))
|
|
759
793
|
|
|
760
794
|
|
|
761
795
|
def _resolve_no_edit_command_grace_seconds(communicate_timeout_s: Optional[int]) -> Optional[int]:
|
|
@@ -779,6 +813,36 @@ def _resolve_no_edit_command_grace_seconds(communicate_timeout_s: Optional[int])
|
|
|
779
813
|
return max(1, min(_DEFAULT_NO_EDIT_COMMAND_GRACE_S, upper))
|
|
780
814
|
|
|
781
815
|
|
|
816
|
+
def _resolve_no_edit_command_progress_cap_seconds(
|
|
817
|
+
communicate_timeout_s: Optional[int],
|
|
818
|
+
no_edit_command_grace_s: Optional[int],
|
|
819
|
+
recovery_attempt: int = 0,
|
|
820
|
+
) -> Optional[int]:
|
|
821
|
+
if not communicate_timeout_s or no_edit_command_grace_s is None:
|
|
822
|
+
return None
|
|
823
|
+
|
|
824
|
+
raw = os.environ.get("WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_PROGRESS_CAP_S", "").strip()
|
|
825
|
+
if raw:
|
|
826
|
+
if raw == "0":
|
|
827
|
+
return None
|
|
828
|
+
parsed = _to_positive_int(raw)
|
|
829
|
+
if parsed is None:
|
|
830
|
+
log.info(
|
|
831
|
+
"Invalid WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_PROGRESS_CAP_S="
|
|
832
|
+
f"{raw!r}; using default command-progress cap."
|
|
833
|
+
)
|
|
834
|
+
else:
|
|
835
|
+
return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
|
|
836
|
+
|
|
837
|
+
default_s = (
|
|
838
|
+
_NO_EDIT_RECOVERY_COMMAND_PROGRESS_CAP_S
|
|
839
|
+
if recovery_attempt > 0
|
|
840
|
+
else _DEFAULT_NO_EDIT_COMMAND_PROGRESS_CAP_S
|
|
841
|
+
)
|
|
842
|
+
upper = max(1, communicate_timeout_s - 1)
|
|
843
|
+
return max(1, min(default_s, upper))
|
|
844
|
+
|
|
845
|
+
|
|
782
846
|
def _resolve_startup_stall_watchdog_seconds(
|
|
783
847
|
communicate_timeout_s: Optional[int],
|
|
784
848
|
recovery_attempt: int = 0,
|
|
@@ -827,7 +891,7 @@ def _resolve_rollout_watchdog_seconds(
|
|
|
827
891
|
communicate_timeout_s: Optional[int],
|
|
828
892
|
no_edit_watchdog_s: Optional[int],
|
|
829
893
|
) -> Optional[int]:
|
|
830
|
-
if not communicate_timeout_s or communicate_timeout_s <
|
|
894
|
+
if not communicate_timeout_s or communicate_timeout_s < _MIN_AUTO_WATCHDOG_TIMEOUT_S:
|
|
831
895
|
return None
|
|
832
896
|
|
|
833
897
|
raw = os.environ.get("WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S", "").strip()
|
|
@@ -2091,6 +2155,118 @@ def _codex_changed_paths(repo: str, baseline_snapshot: Any) -> Tuple[List[str],
|
|
|
2091
2155
|
return changed_paths, delta, effective
|
|
2092
2156
|
|
|
2093
2157
|
|
|
2158
|
+
def _safe_repo_relative_path(repo: str, path: str) -> Optional[Path]:
|
|
2159
|
+
raw = str(path or "").replace("\\", "/").strip()
|
|
2160
|
+
if not raw or raw.startswith("/") or re.match(r"^[A-Za-z]:", raw):
|
|
2161
|
+
return None
|
|
2162
|
+
parts = [part for part in raw.split("/") if part]
|
|
2163
|
+
if not parts or any(part in ("..", ".") for part in parts):
|
|
2164
|
+
return None
|
|
2165
|
+
try:
|
|
2166
|
+
repo_path = Path(repo).resolve()
|
|
2167
|
+
candidate = (repo_path / Path(*parts)).resolve()
|
|
2168
|
+
candidate.relative_to(repo_path)
|
|
2169
|
+
return candidate
|
|
2170
|
+
except Exception:
|
|
2171
|
+
return None
|
|
2172
|
+
|
|
2173
|
+
|
|
2174
|
+
def _git_status_entries(repo: str) -> List[Tuple[str, str]]:
|
|
2175
|
+
try:
|
|
2176
|
+
proc = subprocess.run(
|
|
2177
|
+
["git", "status", "--porcelain"],
|
|
2178
|
+
cwd=repo,
|
|
2179
|
+
capture_output=True,
|
|
2180
|
+
text=True,
|
|
2181
|
+
timeout=20,
|
|
2182
|
+
check=False,
|
|
2183
|
+
)
|
|
2184
|
+
except Exception:
|
|
2185
|
+
return []
|
|
2186
|
+
if proc.returncode != 0:
|
|
2187
|
+
return []
|
|
2188
|
+
entries: List[Tuple[str, str]] = []
|
|
2189
|
+
for raw_line in proc.stdout.splitlines():
|
|
2190
|
+
line = str(raw_line or "").rstrip("\r\n")
|
|
2191
|
+
if len(line) < 4:
|
|
2192
|
+
continue
|
|
2193
|
+
status = line[:2]
|
|
2194
|
+
path = line[3:].strip()
|
|
2195
|
+
if " -> " in path:
|
|
2196
|
+
path = path.split(" -> ", 1)[1].strip()
|
|
2197
|
+
if path:
|
|
2198
|
+
entries.append((status, path))
|
|
2199
|
+
return entries
|
|
2200
|
+
|
|
2201
|
+
|
|
2202
|
+
def _restore_retry_baseline(repo: str, baseline_snapshot: Any, reason: str = "") -> bool:
|
|
2203
|
+
_changed_paths, delta_paths, _effective_paths = _codex_changed_paths(repo, baseline_snapshot)
|
|
2204
|
+
if not delta_paths:
|
|
2205
|
+
return True
|
|
2206
|
+
baseline_paths = set(_baseline_snapshot_paths(baseline_snapshot))
|
|
2207
|
+
unsafe_delta = [path for path in delta_paths if _safe_repo_relative_path(repo, path) is None]
|
|
2208
|
+
if unsafe_delta:
|
|
2209
|
+
log.info(
|
|
2210
|
+
"Rollout recovery cannot safely restore worker sandbox baseline; unsafe changed paths: "
|
|
2211
|
+
f"{_describe_publishable_paths(unsafe_delta)}"
|
|
2212
|
+
)
|
|
2213
|
+
return False
|
|
2214
|
+
mutated_baseline_paths = [path for path in delta_paths if path in baseline_paths]
|
|
2215
|
+
if mutated_baseline_paths:
|
|
2216
|
+
log.info(
|
|
2217
|
+
"Rollout recovery will not reset paths that were already dirty at baseline: "
|
|
2218
|
+
f"{_describe_publishable_paths(mutated_baseline_paths)}"
|
|
2219
|
+
)
|
|
2220
|
+
return False
|
|
2221
|
+
|
|
2222
|
+
log.info(
|
|
2223
|
+
"Restoring worker sandbox baseline before rollout recovery retry"
|
|
2224
|
+
f"{f' ({reason})' if reason else ''}: {_describe_publishable_paths(delta_paths)}"
|
|
2225
|
+
)
|
|
2226
|
+
try:
|
|
2227
|
+
subprocess.run(
|
|
2228
|
+
["git", "restore", "--staged", "--worktree", "--", *delta_paths],
|
|
2229
|
+
cwd=repo,
|
|
2230
|
+
capture_output=True,
|
|
2231
|
+
text=True,
|
|
2232
|
+
timeout=30,
|
|
2233
|
+
check=False,
|
|
2234
|
+
)
|
|
2235
|
+
except Exception as exc:
|
|
2236
|
+
log.info(f"Failed to run git restore for rollout recovery baseline: {exc}")
|
|
2237
|
+
return False
|
|
2238
|
+
|
|
2239
|
+
delta_set = set(delta_paths)
|
|
2240
|
+
for status, path in _git_status_entries(repo):
|
|
2241
|
+
if status != "??":
|
|
2242
|
+
continue
|
|
2243
|
+
if path not in delta_set and not any(path.startswith(f"{delta.rstrip('/')}/") for delta in delta_set):
|
|
2244
|
+
continue
|
|
2245
|
+
candidate = _safe_repo_relative_path(repo, path)
|
|
2246
|
+
if candidate is None:
|
|
2247
|
+
return False
|
|
2248
|
+
try:
|
|
2249
|
+
if candidate.is_dir():
|
|
2250
|
+
rmtree(candidate)
|
|
2251
|
+
elif candidate.exists():
|
|
2252
|
+
candidate.unlink()
|
|
2253
|
+
except Exception as exc:
|
|
2254
|
+
log.info(f"Failed to remove untracked rollout recovery path {path}: {exc}")
|
|
2255
|
+
return False
|
|
2256
|
+
|
|
2257
|
+
_remaining_changed, remaining_delta, remaining_effective = _codex_changed_paths(
|
|
2258
|
+
repo,
|
|
2259
|
+
baseline_snapshot,
|
|
2260
|
+
)
|
|
2261
|
+
if remaining_delta:
|
|
2262
|
+
log.info(
|
|
2263
|
+
"Rollout recovery baseline restore left changed paths after cleanup: "
|
|
2264
|
+
f"{_describe_publishable_paths(remaining_effective or remaining_delta)}"
|
|
2265
|
+
)
|
|
2266
|
+
return False
|
|
2267
|
+
return True
|
|
2268
|
+
|
|
2269
|
+
|
|
2094
2270
|
def _changed_path_top_level(path: str) -> str:
|
|
2095
2271
|
raw = str(path or "").replace("\\", "/").strip()
|
|
2096
2272
|
is_top_level_directory = raw.endswith("/")
|
|
@@ -2183,6 +2359,7 @@ def _run_codex_task(
|
|
|
2183
2359
|
rollout_recovery_attempt: int = 0,
|
|
2184
2360
|
model_override: Optional[str] = None,
|
|
2185
2361
|
baseline_changes: Optional[List[str]] = None,
|
|
2362
|
+
execution_deadline_monotonic: Optional[float] = None,
|
|
2186
2363
|
) -> Dict[str, Any]:
|
|
2187
2364
|
global _ACTIVE_CHILD, _INTERRUPTED_SIGNAL
|
|
2188
2365
|
_INTERRUPTED_SIGNAL = None
|
|
@@ -2242,7 +2419,39 @@ def _run_codex_task(
|
|
|
2242
2419
|
)
|
|
2243
2420
|
# JSON event output is noisy by default; prefer plain text + output-last-message.
|
|
2244
2421
|
use_json = runtime_config.json_output
|
|
2245
|
-
|
|
2422
|
+
requested_communicate_timeout_s = _resolve_communicate_timeout_seconds(runtime_config)
|
|
2423
|
+
recovery_depth = (
|
|
2424
|
+
wrapper_recovery_attempt
|
|
2425
|
+
+ model_compatibility_recovery_attempt
|
|
2426
|
+
+ startup_stall_recovery_attempt
|
|
2427
|
+
+ no_edit_recovery_attempt
|
|
2428
|
+
+ rollout_recovery_attempt
|
|
2429
|
+
)
|
|
2430
|
+
communicate_timeout_s = requested_communicate_timeout_s
|
|
2431
|
+
overall_deadline = execution_deadline_monotonic
|
|
2432
|
+
if requested_communicate_timeout_s and requested_communicate_timeout_s > 0:
|
|
2433
|
+
if overall_deadline is None:
|
|
2434
|
+
overall_deadline = time.monotonic() + float(requested_communicate_timeout_s)
|
|
2435
|
+
else:
|
|
2436
|
+
remaining_s = int(max(0.0, overall_deadline - time.monotonic()))
|
|
2437
|
+
min_attempt_s = (
|
|
2438
|
+
_minimum_recovery_attempt_seconds(requested_communicate_timeout_s)
|
|
2439
|
+
if recovery_depth > 0
|
|
2440
|
+
else 1
|
|
2441
|
+
)
|
|
2442
|
+
if remaining_s < min_attempt_s:
|
|
2443
|
+
return {
|
|
2444
|
+
"ok": False,
|
|
2445
|
+
"summary": "openai_codex recovery budget exhausted before retry",
|
|
2446
|
+
"stderr": (
|
|
2447
|
+
"Codex recovery was requested, but the shared executor budget had only "
|
|
2448
|
+
f"{remaining_s}s remaining (< {min_attempt_s}s). Stopping before a low-odds "
|
|
2449
|
+
"retry so ValidationGate/QualityGate can return a structured result."
|
|
2450
|
+
),
|
|
2451
|
+
"exitCode": 124,
|
|
2452
|
+
"cooldownMs": _NO_PUBLISHABLE_FAILURE_COOLDOWN_MS,
|
|
2453
|
+
}
|
|
2454
|
+
communicate_timeout_s = max(1, min(requested_communicate_timeout_s, remaining_s))
|
|
2246
2455
|
effective_supplemental_guidance = _augment_supplemental_guidance(supplemental_guidance)
|
|
2247
2456
|
prompt = _build_instruction(instruction, effective_supplemental_guidance)
|
|
2248
2457
|
reasoning_effort = _resolve_task_reasoning_effort(
|
|
@@ -2484,6 +2693,7 @@ def _run_codex_task(
|
|
|
2484
2693
|
rollout_watchdog_reason = ""
|
|
2485
2694
|
rollout_artifact_only_paths = ""
|
|
2486
2695
|
rollout_watchdog_retryable = True
|
|
2696
|
+
rollout_restore_before_retry = False
|
|
2487
2697
|
command_policy_rejection_loop = False
|
|
2488
2698
|
no_edit_watchdog_s = (
|
|
2489
2699
|
_resolve_no_edit_watchdog_seconds(
|
|
@@ -2494,8 +2704,16 @@ def _run_codex_task(
|
|
|
2494
2704
|
if no_edit_recovery_attempt <= _MAX_NO_EDIT_RECOVERY_ATTEMPTS
|
|
2495
2705
|
else None
|
|
2496
2706
|
)
|
|
2497
|
-
no_edit_recheck_s = _resolve_no_edit_recheck_seconds(
|
|
2707
|
+
no_edit_recheck_s = _resolve_no_edit_recheck_seconds(
|
|
2708
|
+
communicate_timeout_s,
|
|
2709
|
+
recovery_attempt=recovery_depth,
|
|
2710
|
+
)
|
|
2498
2711
|
no_edit_command_grace_s = _resolve_no_edit_command_grace_seconds(communicate_timeout_s)
|
|
2712
|
+
no_edit_command_progress_cap_s = _resolve_no_edit_command_progress_cap_seconds(
|
|
2713
|
+
communicate_timeout_s,
|
|
2714
|
+
no_edit_command_grace_s,
|
|
2715
|
+
recovery_attempt=recovery_depth,
|
|
2716
|
+
)
|
|
2499
2717
|
startup_stall_watchdog_s = _resolve_startup_stall_watchdog_seconds(
|
|
2500
2718
|
communicate_timeout_s,
|
|
2501
2719
|
recovery_attempt=startup_stall_recovery_attempt,
|
|
@@ -2527,6 +2745,7 @@ def _run_codex_task(
|
|
|
2527
2745
|
publishable_progress_seen_at: Optional[float] = None
|
|
2528
2746
|
publishable_progress_finalized = False
|
|
2529
2747
|
publishable_progress_paths: List[str] = []
|
|
2748
|
+
first_no_edit_command_progress_at: Optional[float] = None
|
|
2530
2749
|
|
|
2531
2750
|
while proc.poll() is None:
|
|
2532
2751
|
now = time.monotonic()
|
|
@@ -2593,17 +2812,50 @@ def _run_codex_task(
|
|
|
2593
2812
|
)
|
|
2594
2813
|
except Exception:
|
|
2595
2814
|
last_command_activity_at = 0.0
|
|
2815
|
+
command_progress_cap_reached = False
|
|
2816
|
+
command_progress_elapsed_s = 0
|
|
2596
2817
|
if command_event_count > 0 and no_edit_command_grace_s is not None:
|
|
2818
|
+
observed_command_progress_at = (
|
|
2819
|
+
last_command_activity_at if last_command_activity_at > 0 else now
|
|
2820
|
+
)
|
|
2821
|
+
if first_no_edit_command_progress_at is None:
|
|
2822
|
+
first_no_edit_command_progress_at = observed_command_progress_at
|
|
2823
|
+
if no_edit_command_progress_cap_s is not None:
|
|
2824
|
+
command_progress_cap_deadline = (
|
|
2825
|
+
first_no_edit_command_progress_at
|
|
2826
|
+
+ float(no_edit_command_progress_cap_s)
|
|
2827
|
+
)
|
|
2828
|
+
command_progress_elapsed_s = int(
|
|
2829
|
+
max(0.0, now - first_no_edit_command_progress_at)
|
|
2830
|
+
)
|
|
2831
|
+
if now >= command_progress_cap_deadline:
|
|
2832
|
+
command_progress_cap_reached = True
|
|
2597
2833
|
command_grace_deadline = 0.0
|
|
2598
2834
|
if active_command_count > 0:
|
|
2599
2835
|
# Do not kill while Codex is actively running a tool command; poll
|
|
2600
|
-
# again soon, but keep
|
|
2836
|
+
# again soon, but keep endless read-only discovery bounded by the
|
|
2837
|
+
# command-progress cap above.
|
|
2601
2838
|
command_grace_deadline = now + min(60.0, float(no_edit_command_grace_s))
|
|
2602
2839
|
elif last_command_activity_at > 0:
|
|
2603
2840
|
command_grace_deadline = last_command_activity_at + float(
|
|
2604
2841
|
no_edit_command_grace_s
|
|
2605
2842
|
)
|
|
2606
|
-
if
|
|
2843
|
+
if (
|
|
2844
|
+
no_edit_command_progress_cap_s is not None
|
|
2845
|
+
and first_no_edit_command_progress_at is not None
|
|
2846
|
+
):
|
|
2847
|
+
command_grace_deadline = min(
|
|
2848
|
+
command_grace_deadline,
|
|
2849
|
+
first_no_edit_command_progress_at
|
|
2850
|
+
+ float(no_edit_command_progress_cap_s),
|
|
2851
|
+
)
|
|
2852
|
+
if command_progress_cap_reached:
|
|
2853
|
+
log.info(
|
|
2854
|
+
"No-edit watchdog observed Codex tool progress for "
|
|
2855
|
+
f"{command_progress_elapsed_s}s without a publishable patch; "
|
|
2856
|
+
"forcing patch-first recovery instead of waiting for the child timeout."
|
|
2857
|
+
)
|
|
2858
|
+
elif command_grace_deadline > now:
|
|
2607
2859
|
no_edit_deadline = command_grace_deadline
|
|
2608
2860
|
remaining_s = int(max(1.0, command_grace_deadline - now))
|
|
2609
2861
|
command_detail = (
|
|
@@ -2680,7 +2932,8 @@ def _run_codex_task(
|
|
|
2680
2932
|
"publishable-looking changed paths are broad/noisy for a small task: "
|
|
2681
2933
|
f"{_describe_publishable_paths(effective_paths)}"
|
|
2682
2934
|
)
|
|
2683
|
-
rollout_watchdog_retryable =
|
|
2935
|
+
rollout_watchdog_retryable = True
|
|
2936
|
+
rollout_restore_before_retry = True
|
|
2684
2937
|
else:
|
|
2685
2938
|
rollout_deadline = None
|
|
2686
2939
|
else:
|
|
@@ -2699,9 +2952,16 @@ def _run_codex_task(
|
|
|
2699
2952
|
if rollout_artifact_only_paths
|
|
2700
2953
|
else ""
|
|
2701
2954
|
)
|
|
2955
|
+
can_retry_rollout = (
|
|
2956
|
+
rollout_watchdog_retryable
|
|
2957
|
+
and rollout_recovery_attempt < _MAX_ROLLOUT_RECOVERY_ATTEMPTS
|
|
2958
|
+
)
|
|
2702
2959
|
action = (
|
|
2960
|
+
"Restoring worker sandbox baseline and retrying with stricter guidance."
|
|
2961
|
+
if rollout_restore_before_retry and can_retry_rollout
|
|
2962
|
+
else
|
|
2703
2963
|
"Retrying with course-correction guidance."
|
|
2704
|
-
if
|
|
2964
|
+
if can_retry_rollout
|
|
2705
2965
|
else "Failing fast instead of retrying on top of a broad/noisy diff."
|
|
2706
2966
|
)
|
|
2707
2967
|
log.info(
|
|
@@ -2779,6 +3039,27 @@ def _run_codex_task(
|
|
|
2779
3039
|
|
|
2780
3040
|
if rollout_watchdog_fired:
|
|
2781
3041
|
if rollout_watchdog_retryable and rollout_recovery_attempt < _MAX_ROLLOUT_RECOVERY_ATTEMPTS:
|
|
3042
|
+
if rollout_restore_before_retry and not _restore_retry_baseline(
|
|
3043
|
+
repo,
|
|
3044
|
+
baseline_snapshot,
|
|
3045
|
+
rollout_watchdog_reason,
|
|
3046
|
+
):
|
|
3047
|
+
detail = (
|
|
3048
|
+
"Codex trajectory drifted into broad/noisy changes and the worker sandbox "
|
|
3049
|
+
"could not be restored safely for a clean recovery retry: "
|
|
3050
|
+
f"{rollout_watchdog_reason or 'broad/noisy changes'}."
|
|
3051
|
+
)
|
|
3052
|
+
if trace_excerpt:
|
|
3053
|
+
detail = f"{detail}\n{trace_excerpt}"
|
|
3054
|
+
return {
|
|
3055
|
+
"ok": False,
|
|
3056
|
+
"summary": "openai_codex rollout coach could not safely reset broad changes",
|
|
3057
|
+
"stdout": _truncate(stdout),
|
|
3058
|
+
"stderr": _truncate(f"{detail}\n{stderr}".strip()),
|
|
3059
|
+
"exitCode": 124,
|
|
3060
|
+
"usage": usage,
|
|
3061
|
+
"cooldownMs": _NO_PUBLISHABLE_FAILURE_COOLDOWN_MS,
|
|
3062
|
+
}
|
|
2782
3063
|
retry_guidance = [
|
|
2783
3064
|
*supplemental_guidance,
|
|
2784
3065
|
_build_rollout_recovery_guidance(
|
|
@@ -2798,6 +3079,7 @@ def _run_codex_task(
|
|
|
2798
3079
|
rollout_recovery_attempt=rollout_recovery_attempt + 1,
|
|
2799
3080
|
model_override=model_override,
|
|
2800
3081
|
baseline_changes=baseline_snapshot,
|
|
3082
|
+
execution_deadline_monotonic=overall_deadline,
|
|
2801
3083
|
)
|
|
2802
3084
|
detail = (
|
|
2803
3085
|
"Codex trajectory remained off-track or too broad for safe recovery: "
|
|
@@ -2872,6 +3154,7 @@ def _run_codex_task(
|
|
|
2872
3154
|
rollout_recovery_attempt=rollout_recovery_attempt,
|
|
2873
3155
|
model_override=recovery_model or model_override,
|
|
2874
3156
|
baseline_changes=baseline_snapshot,
|
|
3157
|
+
execution_deadline_monotonic=overall_deadline,
|
|
2875
3158
|
)
|
|
2876
3159
|
retry_result["usage"] = _merge_usage_records(usage, retry_result.get("usage"))
|
|
2877
3160
|
if retry_result.get("ok"):
|
|
@@ -2918,6 +3201,7 @@ def _run_codex_task(
|
|
|
2918
3201
|
rollout_recovery_attempt=rollout_recovery_attempt,
|
|
2919
3202
|
model_override=model_override,
|
|
2920
3203
|
baseline_changes=baseline_snapshot,
|
|
3204
|
+
execution_deadline_monotonic=overall_deadline,
|
|
2921
3205
|
)
|
|
2922
3206
|
detail = "Codex spent too much of the execution budget without producing publishable file changes."
|
|
2923
3207
|
if trace_excerpt:
|
|
@@ -3114,6 +3398,7 @@ def _run_codex_task(
|
|
|
3114
3398
|
rollout_recovery_attempt=rollout_recovery_attempt,
|
|
3115
3399
|
model_override=model_override,
|
|
3116
3400
|
baseline_changes=baseline_snapshot,
|
|
3401
|
+
execution_deadline_monotonic=overall_deadline,
|
|
3117
3402
|
)
|
|
3118
3403
|
retry_result["usage"] = _merge_usage_records(usage, retry_result.get("usage"))
|
|
3119
3404
|
if wrapper_recovery_attempt == 0 and retry_result.get("ok"):
|
|
@@ -3229,6 +3514,7 @@ def _run_codex_task(
|
|
|
3229
3514
|
rollout_recovery_attempt=rollout_recovery_attempt,
|
|
3230
3515
|
model_override=LEGACY_CODEX_MODEL_FALLBACK,
|
|
3231
3516
|
baseline_changes=baseline_snapshot,
|
|
3517
|
+
execution_deadline_monotonic=overall_deadline,
|
|
3232
3518
|
)
|
|
3233
3519
|
retry_result["usage"] = _merge_usage_records(usage, retry_result.get("usage"))
|
|
3234
3520
|
if retry_result.get("ok"):
|
|
@@ -4,6 +4,7 @@ import re
|
|
|
4
4
|
import json
|
|
5
5
|
import subprocess
|
|
6
6
|
import sys
|
|
7
|
+
import time
|
|
7
8
|
import unittest
|
|
8
9
|
import tempfile
|
|
9
10
|
from unittest import mock
|
|
@@ -47,6 +48,9 @@ from openai_codex_executor import (
|
|
|
47
48
|
_repo_root_for_prompt_loading,
|
|
48
49
|
_restore_repo_local_codex_files,
|
|
49
50
|
_resolve_codex_command_prefix,
|
|
51
|
+
_resolve_no_edit_command_grace_seconds,
|
|
52
|
+
_resolve_no_edit_command_progress_cap_seconds,
|
|
53
|
+
_resolve_no_edit_recheck_seconds,
|
|
50
54
|
_resolve_no_edit_watchdog_seconds,
|
|
51
55
|
_resolve_rollout_watchdog_seconds,
|
|
52
56
|
_resolve_startup_stall_watchdog_seconds,
|
|
@@ -1519,6 +1523,96 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1519
1523
|
self.assertIn("Patched after later command progress", str(result.get("stdout") or ""))
|
|
1520
1524
|
self.assertIn("src/", str(result.get("stdout") or ""))
|
|
1521
1525
|
|
|
1526
|
+
def test_run_codex_task_command_progress_cap_forces_patch_first_recovery(self) -> None:
|
|
1527
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-command-progress-cap-") as temp_dir:
|
|
1528
|
+
repo = Path(temp_dir) / "repo"
|
|
1529
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1530
|
+
(repo / "README.md").write_text("# command progress cap repo\n", encoding="utf-8")
|
|
1531
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1532
|
+
subprocess.run(
|
|
1533
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1534
|
+
cwd=repo,
|
|
1535
|
+
check=True,
|
|
1536
|
+
capture_output=True,
|
|
1537
|
+
text=True,
|
|
1538
|
+
)
|
|
1539
|
+
subprocess.run(
|
|
1540
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1541
|
+
cwd=repo,
|
|
1542
|
+
check=True,
|
|
1543
|
+
capture_output=True,
|
|
1544
|
+
text=True,
|
|
1545
|
+
)
|
|
1546
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1547
|
+
subprocess.run(
|
|
1548
|
+
["git", "commit", "-m", "chore: seed command progress cap repo"],
|
|
1549
|
+
cwd=repo,
|
|
1550
|
+
check=True,
|
|
1551
|
+
capture_output=True,
|
|
1552
|
+
text=True,
|
|
1553
|
+
)
|
|
1554
|
+
|
|
1555
|
+
stub_path = Path(temp_dir) / "fake_codex_command_progress_cap.py"
|
|
1556
|
+
stub_path.write_text(
|
|
1557
|
+
"\n".join(
|
|
1558
|
+
[
|
|
1559
|
+
"from pathlib import Path",
|
|
1560
|
+
"import json",
|
|
1561
|
+
"import sys",
|
|
1562
|
+
"import time",
|
|
1563
|
+
"",
|
|
1564
|
+
"argv = sys.argv[1:]",
|
|
1565
|
+
"last_message_path = None",
|
|
1566
|
+
"for index, arg in enumerate(argv):",
|
|
1567
|
+
" if arg == '--output-last-message' and index + 1 < len(argv):",
|
|
1568
|
+
" last_message_path = argv[index + 1]",
|
|
1569
|
+
" break",
|
|
1570
|
+
"",
|
|
1571
|
+
"prompt = sys.stdin.read()",
|
|
1572
|
+
"if 'No-edit watchdog recovery' in prompt:",
|
|
1573
|
+
" Path('src').mkdir(exist_ok=True)",
|
|
1574
|
+
" Path('src/capped-command-recovery.txt').write_text('patched after capped command progress\\n', encoding='utf-8')",
|
|
1575
|
+
" if last_message_path:",
|
|
1576
|
+
" Path(last_message_path).write_text('Patched after capped command progress.', encoding='utf-8')",
|
|
1577
|
+
" print(json.dumps({'type': 'item.completed', 'item': {'type': 'message', 'text': 'Patched after capped command progress.'}}), flush=True)",
|
|
1578
|
+
" raise SystemExit(0)",
|
|
1579
|
+
"",
|
|
1580
|
+
"print(json.dumps({'type': 'thread.started'}), flush=True)",
|
|
1581
|
+
"print(json.dumps({'type': 'turn.started'}), flush=True)",
|
|
1582
|
+
"for index in range(8):",
|
|
1583
|
+
" command_id = f'cmd-{index}'",
|
|
1584
|
+
" print(json.dumps({'type': 'item.started', 'item': {'id': command_id, 'type': 'command_execution', 'command': 'cat README.md', 'status': 'in_progress'}}), flush=True)",
|
|
1585
|
+
" time.sleep(0.2)",
|
|
1586
|
+
" print(json.dumps({'type': 'item.completed', 'item': {'id': command_id, 'type': 'command_execution', 'command': 'cat README.md', 'status': 'completed', 'exit_code': 0}}), flush=True)",
|
|
1587
|
+
" time.sleep(0.8)",
|
|
1588
|
+
]
|
|
1589
|
+
),
|
|
1590
|
+
encoding="utf-8",
|
|
1591
|
+
)
|
|
1592
|
+
|
|
1593
|
+
env_overrides = {
|
|
1594
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
1595
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
1596
|
+
"OPENAI_API_KEY": "pushpals-command-progress-cap-test-key",
|
|
1597
|
+
"WORKERPALS_OPENAI_CODEX_JSON": "true",
|
|
1598
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "12",
|
|
1599
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
|
|
1600
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S": "3",
|
|
1601
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_PROGRESS_CAP_S": "3",
|
|
1602
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1603
|
+
}
|
|
1604
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1605
|
+
result = _run_codex_task(
|
|
1606
|
+
str(repo),
|
|
1607
|
+
"Add one focused patch after bounded command-backed discovery.",
|
|
1608
|
+
[],
|
|
1609
|
+
)
|
|
1610
|
+
|
|
1611
|
+
self.assertTrue(result.get("ok"), result)
|
|
1612
|
+
self.assertEqual(result.get("exitCode"), 0)
|
|
1613
|
+
self.assertIn("Patched after capped command progress", str(result.get("stdout") or ""))
|
|
1614
|
+
self.assertIn("src/", str(result.get("stdout") or ""))
|
|
1615
|
+
|
|
1522
1616
|
def test_run_codex_task_finalizes_after_durable_publishable_progress(self) -> None:
|
|
1523
1617
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-durable-progress-") as temp_dir:
|
|
1524
1618
|
repo = Path(temp_dir) / "repo"
|
|
@@ -1962,6 +2056,86 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1962
2056
|
|
|
1963
2057
|
self.assertEqual(watchdog_s, 300)
|
|
1964
2058
|
|
|
2059
|
+
def test_no_edit_recovery_attempt_uses_short_durable_recheck_and_command_cap(self) -> None:
|
|
2060
|
+
env = {
|
|
2061
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_RECHECK_S": "",
|
|
2062
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S": "",
|
|
2063
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_PROGRESS_CAP_S": "",
|
|
2064
|
+
}
|
|
2065
|
+
with mock.patch.dict(os.environ, env, clear=False):
|
|
2066
|
+
first_recheck_s = _resolve_no_edit_recheck_seconds(750)
|
|
2067
|
+
recovery_recheck_s = _resolve_no_edit_recheck_seconds(750, recovery_attempt=1)
|
|
2068
|
+
command_grace_s = _resolve_no_edit_command_grace_seconds(750)
|
|
2069
|
+
first_command_cap_s = _resolve_no_edit_command_progress_cap_seconds(
|
|
2070
|
+
750,
|
|
2071
|
+
command_grace_s,
|
|
2072
|
+
)
|
|
2073
|
+
recovery_command_cap_s = _resolve_no_edit_command_progress_cap_seconds(
|
|
2074
|
+
750,
|
|
2075
|
+
command_grace_s,
|
|
2076
|
+
recovery_attempt=1,
|
|
2077
|
+
)
|
|
2078
|
+
|
|
2079
|
+
self.assertEqual(first_recheck_s, 120)
|
|
2080
|
+
self.assertEqual(recovery_recheck_s, 30)
|
|
2081
|
+
self.assertEqual(first_command_cap_s, 360)
|
|
2082
|
+
self.assertEqual(recovery_command_cap_s, 120)
|
|
2083
|
+
|
|
2084
|
+
def test_codex_recovery_attempt_refuses_exhausted_shared_deadline(self) -> None:
|
|
2085
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-exhausted-recovery-") as temp_dir:
|
|
2086
|
+
repo = Path(temp_dir) / "repo"
|
|
2087
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
2088
|
+
(repo / "README.md").write_text("# exhausted recovery repo\n", encoding="utf-8")
|
|
2089
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
2090
|
+
subprocess.run(
|
|
2091
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
2092
|
+
cwd=repo,
|
|
2093
|
+
check=True,
|
|
2094
|
+
capture_output=True,
|
|
2095
|
+
text=True,
|
|
2096
|
+
)
|
|
2097
|
+
subprocess.run(
|
|
2098
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
2099
|
+
cwd=repo,
|
|
2100
|
+
check=True,
|
|
2101
|
+
capture_output=True,
|
|
2102
|
+
text=True,
|
|
2103
|
+
)
|
|
2104
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
2105
|
+
subprocess.run(
|
|
2106
|
+
["git", "commit", "-m", "chore: seed exhausted recovery repo"],
|
|
2107
|
+
cwd=repo,
|
|
2108
|
+
check=True,
|
|
2109
|
+
capture_output=True,
|
|
2110
|
+
text=True,
|
|
2111
|
+
)
|
|
2112
|
+
|
|
2113
|
+
stub_path = Path(temp_dir) / "fake_codex_should_not_run.py"
|
|
2114
|
+
stub_path.write_text(
|
|
2115
|
+
"raise SystemExit('fake codex should not run when recovery budget is exhausted')\n",
|
|
2116
|
+
encoding="utf-8",
|
|
2117
|
+
)
|
|
2118
|
+
|
|
2119
|
+
env_overrides = {
|
|
2120
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
2121
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
2122
|
+
"OPENAI_API_KEY": "pushpals-exhausted-recovery-test-key",
|
|
2123
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "750",
|
|
2124
|
+
}
|
|
2125
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
2126
|
+
result = _run_codex_task(
|
|
2127
|
+
str(repo),
|
|
2128
|
+
"Apply patch-first recovery.",
|
|
2129
|
+
[],
|
|
2130
|
+
no_edit_recovery_attempt=1,
|
|
2131
|
+
execution_deadline_monotonic=time.monotonic() - 1.0,
|
|
2132
|
+
)
|
|
2133
|
+
|
|
2134
|
+
self.assertFalse(result.get("ok"), result)
|
|
2135
|
+
self.assertEqual(result.get("exitCode"), 124)
|
|
2136
|
+
self.assertIn("recovery budget exhausted", str(result.get("summary") or ""))
|
|
2137
|
+
self.assertIn("Stopping before a low-odds retry", str(result.get("stderr") or ""))
|
|
2138
|
+
|
|
1965
2139
|
def test_review_fix_contract_level_tests_use_fast_no_edit_watchdog(self) -> None:
|
|
1966
2140
|
prompt = (
|
|
1967
2141
|
"Restore exact score assertions for contract-level tests where score is part "
|
|
@@ -1972,6 +2146,34 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1972
2146
|
|
|
1973
2147
|
self.assertEqual(watchdog_s, 180)
|
|
1974
2148
|
|
|
2149
|
+
def test_rejected_pr_review_fix_prompt_uses_compact_no_edit_watchdog(self) -> None:
|
|
2150
|
+
prompt = (
|
|
2151
|
+
"Rejected PR revision brief: Previous ReviewAgent score: 7.6 / 10. "
|
|
2152
|
+
"Address reviewer must-fix items in the cleanup harness with focused coverage."
|
|
2153
|
+
)
|
|
2154
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
|
|
2155
|
+
watchdog_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
|
|
2156
|
+
|
|
2157
|
+
self.assertEqual(watchdog_s, 180)
|
|
2158
|
+
|
|
2159
|
+
def test_review_fix_child_budget_below_ten_minutes_still_uses_watchdogs(self) -> None:
|
|
2160
|
+
prompt = (
|
|
2161
|
+
"Rejected PR revision brief: Previous ReviewAgent score: 8.0 / 10. "
|
|
2162
|
+
"Add focused tests for createCleanupHarness.runTask covering successful execution, "
|
|
2163
|
+
"execute failure, cleanup failure, invalid task input, and cleanup execution after "
|
|
2164
|
+
"successful task completion."
|
|
2165
|
+
)
|
|
2166
|
+
env = {
|
|
2167
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "",
|
|
2168
|
+
"WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S": "",
|
|
2169
|
+
}
|
|
2170
|
+
with mock.patch.dict(os.environ, env, clear=False):
|
|
2171
|
+
no_edit_s = _resolve_no_edit_watchdog_seconds(prompt, 570)
|
|
2172
|
+
rollout_s = _resolve_rollout_watchdog_seconds(prompt, 570, no_edit_s)
|
|
2173
|
+
|
|
2174
|
+
self.assertEqual(no_edit_s, 180)
|
|
2175
|
+
self.assertEqual(rollout_s, 120)
|
|
2176
|
+
|
|
1975
2177
|
def test_no_edit_recovery_guidance_warns_against_artifact_only_progress(self) -> None:
|
|
1976
2178
|
guidance = _build_no_edit_recovery_guidance(
|
|
1977
2179
|
"item.completed | still inspecting",
|
|
@@ -2112,7 +2314,7 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
2112
2314
|
self.assertIn("Patched after rollout coach guidance", str(result.get("stdout") or ""))
|
|
2113
2315
|
self.assertIn("scripts/", str(result.get("stdout") or ""))
|
|
2114
2316
|
|
|
2115
|
-
def
|
|
2317
|
+
def test_run_codex_task_rollout_coach_resets_broad_small_task_changes_before_retry(self) -> None:
|
|
2116
2318
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-rollout-noisy-") as temp_dir:
|
|
2117
2319
|
repo = Path(temp_dir) / "repo"
|
|
2118
2320
|
repo.mkdir(parents=True, exist_ok=True)
|
|
@@ -2149,7 +2351,22 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
2149
2351
|
"import sys",
|
|
2150
2352
|
"import time",
|
|
2151
2353
|
"",
|
|
2152
|
-
"sys.
|
|
2354
|
+
"argv = sys.argv[1:]",
|
|
2355
|
+
"last_message_path = None",
|
|
2356
|
+
"for index, arg in enumerate(argv):",
|
|
2357
|
+
" if arg == '--output-last-message' and index + 1 < len(argv):",
|
|
2358
|
+
" last_message_path = argv[index + 1]",
|
|
2359
|
+
" break",
|
|
2360
|
+
"",
|
|
2361
|
+
"prompt = sys.stdin.read()",
|
|
2362
|
+
"if 'Rollout coach recovery' in prompt:",
|
|
2363
|
+
" Path('src').mkdir(exist_ok=True)",
|
|
2364
|
+
" Path('src/narrow-rollout-recovery.txt').write_text('narrow recovery patch\\n', encoding='utf-8')",
|
|
2365
|
+
" if last_message_path:",
|
|
2366
|
+
" Path(last_message_path).write_text('Patched narrowly after broad rollout reset.', encoding='utf-8')",
|
|
2367
|
+
" print('item.completed | Patched narrowly after broad rollout reset.', flush=True)",
|
|
2368
|
+
" sys.exit(0)",
|
|
2369
|
+
"",
|
|
2153
2370
|
"for index in range(5):",
|
|
2154
2371
|
" root = Path(f'area{index}')",
|
|
2155
2372
|
" root.mkdir(exist_ok=True)",
|
|
@@ -2176,6 +2393,78 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
2176
2393
|
"Make a small low-risk repo-native patch.",
|
|
2177
2394
|
[],
|
|
2178
2395
|
)
|
|
2396
|
+
area0_exists_after_retry = (repo / "area0").exists()
|
|
2397
|
+
|
|
2398
|
+
self.assertTrue(result.get("ok"), result)
|
|
2399
|
+
self.assertEqual(result.get("exitCode"), 0)
|
|
2400
|
+
self.assertIn("Patched narrowly after broad rollout reset", str(result.get("stdout") or ""))
|
|
2401
|
+
self.assertIn("src/", str(result.get("stdout") or ""))
|
|
2402
|
+
self.assertFalse(area0_exists_after_retry)
|
|
2403
|
+
|
|
2404
|
+
def test_run_codex_task_rollout_coach_fails_after_repeated_broad_small_task_changes(self) -> None:
|
|
2405
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-rollout-repeat-noisy-") as temp_dir:
|
|
2406
|
+
repo = Path(temp_dir) / "repo"
|
|
2407
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
2408
|
+
(repo / "README.md").write_text("# repeated rollout noisy repo\n", encoding="utf-8")
|
|
2409
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
2410
|
+
subprocess.run(
|
|
2411
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
2412
|
+
cwd=repo,
|
|
2413
|
+
check=True,
|
|
2414
|
+
capture_output=True,
|
|
2415
|
+
text=True,
|
|
2416
|
+
)
|
|
2417
|
+
subprocess.run(
|
|
2418
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
2419
|
+
cwd=repo,
|
|
2420
|
+
check=True,
|
|
2421
|
+
capture_output=True,
|
|
2422
|
+
text=True,
|
|
2423
|
+
)
|
|
2424
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
2425
|
+
subprocess.run(
|
|
2426
|
+
["git", "commit", "-m", "chore: seed repeated rollout noisy repo"],
|
|
2427
|
+
cwd=repo,
|
|
2428
|
+
check=True,
|
|
2429
|
+
capture_output=True,
|
|
2430
|
+
text=True,
|
|
2431
|
+
)
|
|
2432
|
+
|
|
2433
|
+
stub_path = Path(temp_dir) / "fake_codex_rollout_repeat_noisy.py"
|
|
2434
|
+
stub_path.write_text(
|
|
2435
|
+
"\n".join(
|
|
2436
|
+
[
|
|
2437
|
+
"from pathlib import Path",
|
|
2438
|
+
"import sys",
|
|
2439
|
+
"import time",
|
|
2440
|
+
"",
|
|
2441
|
+
"sys.stdin.read()",
|
|
2442
|
+
"for index in range(5):",
|
|
2443
|
+
" root = Path(f'area{index}')",
|
|
2444
|
+
" root.mkdir(exist_ok=True)",
|
|
2445
|
+
" (root / 'changed.txt').write_text('broad rollout change\\n', encoding='utf-8')",
|
|
2446
|
+
"print('item.completed | Repeated broad edits for a small task.', flush=True)",
|
|
2447
|
+
"time.sleep(10)",
|
|
2448
|
+
]
|
|
2449
|
+
),
|
|
2450
|
+
encoding="utf-8",
|
|
2451
|
+
)
|
|
2452
|
+
|
|
2453
|
+
env_overrides = {
|
|
2454
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
2455
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
2456
|
+
"OPENAI_API_KEY": "pushpals-rollout-repeat-noisy-test-key",
|
|
2457
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "700",
|
|
2458
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "10",
|
|
2459
|
+
"WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S": "1",
|
|
2460
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
2461
|
+
}
|
|
2462
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
2463
|
+
result = _run_codex_task(
|
|
2464
|
+
str(repo),
|
|
2465
|
+
"Make a small low-risk repo-native patch.",
|
|
2466
|
+
[],
|
|
2467
|
+
)
|
|
2179
2468
|
|
|
2180
2469
|
self.assertFalse(result.get("ok"), result)
|
|
2181
2470
|
self.assertEqual(result.get("exitCode"), 124)
|
|
@@ -34,9 +34,9 @@ interface GenericPythonExecutorConfig {
|
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
const BACKEND_TIMEOUT_RESULT_GRACE_MS = 30_000;
|
|
37
|
-
const OPENAI_CODEX_MIN_VALIDATION_RESERVE_MS =
|
|
38
|
-
const OPENAI_CODEX_MAX_VALIDATION_RESERVE_MS =
|
|
39
|
-
const OPENAI_CODEX_MIN_PRIMARY_TURN_BUDGET_MS =
|
|
37
|
+
const OPENAI_CODEX_MIN_VALIDATION_RESERVE_MS = 240_000;
|
|
38
|
+
const OPENAI_CODEX_MAX_VALIDATION_RESERVE_MS = 720_000;
|
|
39
|
+
const OPENAI_CODEX_MIN_PRIMARY_TURN_BUDGET_MS = 540_000;
|
|
40
40
|
|
|
41
41
|
function estimateTokensFromText(text: string): number {
|
|
42
42
|
return Math.max(0, Math.ceil(String(text ?? "").length / 3));
|
|
@@ -161,7 +161,7 @@ export function resolveOpenAICodexValidationReserveMs(
|
|
|
161
161
|
budgetMs,
|
|
162
162
|
Math.max(
|
|
163
163
|
OPENAI_CODEX_MIN_VALIDATION_RESERVE_MS,
|
|
164
|
-
Math.min(OPENAI_CODEX_MAX_VALIDATION_RESERVE_MS, budgetMs * 0.
|
|
164
|
+
Math.min(OPENAI_CODEX_MAX_VALIDATION_RESERVE_MS, budgetMs * 0.5),
|
|
165
165
|
),
|
|
166
166
|
),
|
|
167
167
|
);
|
|
@@ -201,6 +201,9 @@ const BROWSER_VALIDATION_MAX_AUTO_REVISIONS = 3;
|
|
|
201
201
|
const CRITIC_COMPACT_RETRY_MIN_REDUCTION_RATIO = 0.25;
|
|
202
202
|
const MAX_DIAGNOSTIC_PATH_SAMPLES = 50;
|
|
203
203
|
const MAX_DIAGNOSTIC_TEXT_CHARS = 8_000;
|
|
204
|
+
const QUALITY_MIN_REVISION_BUDGET_MS = 120_000;
|
|
205
|
+
const QUALITY_MAX_REVISION_BUDGET_MS = 420_000;
|
|
206
|
+
const QUALITY_REVISION_BUDGET_RATIO = 0.25;
|
|
204
207
|
|
|
205
208
|
export function qualityRevisionLoopUpperBound(policy: {
|
|
206
209
|
maxAutoRevisions: number;
|
|
@@ -234,7 +237,13 @@ export function qualityRevisionBudgetDecision(opts: {
|
|
|
234
237
|
const elapsedMs = Math.max(0, Number(opts.jobElapsedMs) || 0);
|
|
235
238
|
const remainingBudgetMs = Math.max(0, Math.floor(executionBudgetMs - elapsedMs));
|
|
236
239
|
const minimumRevisionBudgetMs = Math.floor(
|
|
237
|
-
Math.min(
|
|
240
|
+
Math.min(
|
|
241
|
+
executionBudgetMs,
|
|
242
|
+
Math.max(
|
|
243
|
+
QUALITY_MIN_REVISION_BUDGET_MS,
|
|
244
|
+
Math.min(QUALITY_MAX_REVISION_BUDGET_MS, executionBudgetMs * QUALITY_REVISION_BUDGET_RATIO),
|
|
245
|
+
),
|
|
246
|
+
),
|
|
238
247
|
);
|
|
239
248
|
return {
|
|
240
249
|
shouldStart: remainingBudgetMs >= minimumRevisionBudgetMs,
|