@pushpalsdev/cli 1.1.17 → 1.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/runtime/sandbox/.pushpals-remotebuddy-fallback.js +159 -5
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +250 -6
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +223 -0
- package/runtime/sandbox/apps/workerpals/src/backends/shared/executor_base.py +9 -0
- package/runtime/sandbox/apps/workerpals/src/docker_executor.ts +47 -20
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +450 -5
package/package.json
CHANGED
|
@@ -4812,6 +4812,46 @@ var IGNORED_REPO_TARGET_DIRS = new Set([
|
|
|
4812
4812
|
"__pycache__",
|
|
4813
4813
|
"target"
|
|
4814
4814
|
]);
|
|
4815
|
+
function isPushPalsRepository(repoRoot) {
|
|
4816
|
+
return existsSync4(resolve4(repoRoot, "apps", "remotebuddy", "src", "autonomous_engine.ts")) && existsSync4(resolve4(repoRoot, "apps", "workerpals", "src", "workerpals_main.ts")) && existsSync4(resolve4(repoRoot, "packages", "shared", "src", "autonomy_policy.ts"));
|
|
4817
|
+
}
|
|
4818
|
+
function isPushPalsInternalUserRepoPath(path) {
|
|
4819
|
+
const normalized = asString2(path).replace(/\\/g, "/").toLowerCase();
|
|
4820
|
+
if (!normalized)
|
|
4821
|
+
return false;
|
|
4822
|
+
return /(^|\/)_layout\.autonomy\.test\.[cm]?[jt]sx?$/.test(normalized);
|
|
4823
|
+
}
|
|
4824
|
+
function containsPushPalsInternalUserRepoText(text) {
|
|
4825
|
+
return /\b(queue_health|workerpal|remotebuddy|sourcecontrolmanager|source_control_manager|reviewagent|pushpals)\b/i.test(text);
|
|
4826
|
+
}
|
|
4827
|
+
function candidateLeaksPushPalsInternals(candidate) {
|
|
4828
|
+
if ([candidate.component_area, ...candidate.target_paths].some((path) => isPushPalsInternalUserRepoPath(path))) {
|
|
4829
|
+
return true;
|
|
4830
|
+
}
|
|
4831
|
+
const publicText = [
|
|
4832
|
+
candidate.title,
|
|
4833
|
+
candidate.problem_statement,
|
|
4834
|
+
candidate.vision_alignment_reason,
|
|
4835
|
+
...candidate.feature_hypotheses,
|
|
4836
|
+
...candidate.target_paths
|
|
4837
|
+
].join(`
|
|
4838
|
+
`);
|
|
4839
|
+
return containsPushPalsInternalUserRepoText(publicText);
|
|
4840
|
+
}
|
|
4841
|
+
function buildRepoNativeFallbackInstruction(candidate) {
|
|
4842
|
+
return [
|
|
4843
|
+
candidate.title,
|
|
4844
|
+
"",
|
|
4845
|
+
candidate.problem_statement,
|
|
4846
|
+
"",
|
|
4847
|
+
"Keep the change scoped to the repo's own product/runtime behavior. Do not add PushPals, WorkerPal, RemoteBuddy, queue-health, or autonomy-internal concepts to user-facing code or tests.",
|
|
4848
|
+
"",
|
|
4849
|
+
"Scope:",
|
|
4850
|
+
`- target_paths: ${candidate.target_paths.join(", ")}`,
|
|
4851
|
+
`- write_globs: ${candidate.scope.write_globs.join(", ")}`
|
|
4852
|
+
].join(`
|
|
4853
|
+
`);
|
|
4854
|
+
}
|
|
4815
4855
|
function pathBasename(path) {
|
|
4816
4856
|
const normalized = path.replace(/\\/g, "/").replace(/\/+$/, "");
|
|
4817
4857
|
const idx = normalized.lastIndexOf("/");
|
|
@@ -4890,10 +4930,13 @@ function collectRepoTargetFiles(repoRoot, startRelativePath, maxResults, maxDept
|
|
|
4890
4930
|
function discoverRepoTargetProfiles(repoRoot, maxProfiles = 16) {
|
|
4891
4931
|
const profiles = [];
|
|
4892
4932
|
const seen = new Set;
|
|
4933
|
+
const allowPushPalsInternalTargets = isPushPalsRepository(repoRoot);
|
|
4893
4934
|
const add = (targetPath) => {
|
|
4894
4935
|
const finalPath = normalizeAutonomyComponentArea(targetPath);
|
|
4895
4936
|
if (!finalPath)
|
|
4896
4937
|
return;
|
|
4938
|
+
if (!allowPushPalsInternalTargets && isPushPalsInternalUserRepoPath(finalPath))
|
|
4939
|
+
return;
|
|
4897
4940
|
if (seen.has(finalPath))
|
|
4898
4941
|
return;
|
|
4899
4942
|
seen.add(finalPath);
|
|
@@ -4959,6 +5002,8 @@ function chooseRepoObjectiveTargetProfile(profiles, objective) {
|
|
|
4959
5002
|
let best = null;
|
|
4960
5003
|
for (const profile of profiles) {
|
|
4961
5004
|
const label = profile.label.toLowerCase();
|
|
5005
|
+
if (isPushPalsInternalUserRepoPath(label))
|
|
5006
|
+
continue;
|
|
4962
5007
|
const profileTokens = new Set(profile.keywords);
|
|
4963
5008
|
let score = 0;
|
|
4964
5009
|
for (const token of hintTokens) {
|
|
@@ -4988,6 +5033,17 @@ function chooseRepoObjectiveTargetProfile(profiles, objective) {
|
|
|
4988
5033
|
if (productSurface)
|
|
4989
5034
|
score += 1;
|
|
4990
5035
|
}
|
|
5036
|
+
if (/\b(web|browser|smoke|e2e|review path|review|navigation|delivery|trust)\b/i.test(objective.title)) {
|
|
5037
|
+
if (/(^|\/)(scripts?|tools?)\/.*(web|browser|smoke|e2e|playwright)/i.test(label)) {
|
|
5038
|
+
score += 8;
|
|
5039
|
+
}
|
|
5040
|
+
if (/\b(app\/(_layout|index)|route|navigation|shell|home|screen)\b/i.test(label)) {
|
|
5041
|
+
score += 4;
|
|
5042
|
+
}
|
|
5043
|
+
if (validationSurface && !/(web|browser|smoke|e2e|playwright)/i.test(label)) {
|
|
5044
|
+
score -= 3;
|
|
5045
|
+
}
|
|
5046
|
+
}
|
|
4991
5047
|
if (categories.has("performance")) {
|
|
4992
5048
|
if (productSurface || /\b(perf|render|animation|worker|server)\b/i.test(label))
|
|
4993
5049
|
score += 4;
|
|
@@ -7479,6 +7535,7 @@ ${JSON.stringify(input.messages ?? [])}`),
|
|
|
7479
7535
|
}
|
|
7480
7536
|
const normalizedCandidates = [];
|
|
7481
7537
|
const dropReasonCounts = new Map;
|
|
7538
|
+
const allowPushPalsInternalCandidates = isPushPalsRepository(this.autonomyRepo);
|
|
7482
7539
|
const recordDropReason = (reason) => {
|
|
7483
7540
|
dropReasonCounts.set(reason, (dropReasonCounts.get(reason) ?? 0) + 1);
|
|
7484
7541
|
};
|
|
@@ -7553,6 +7610,11 @@ ${JSON.stringify(input.messages ?? [])}`),
|
|
|
7553
7610
|
candidate.component_area = scopeValidation.componentArea ?? candidate.component_area;
|
|
7554
7611
|
candidate.target_paths = scopeValidation.normalizedTargetPaths;
|
|
7555
7612
|
candidate.scope.write_globs = scopeValidation.normalizedWriteGlobs;
|
|
7613
|
+
if (!allowPushPalsInternalCandidates && candidateLeaksPushPalsInternals(candidate)) {
|
|
7614
|
+
recordDropReason(`${source}_pushpals_internal_leak`);
|
|
7615
|
+
console.warn(`[RemoteBuddyAutonomousEngine] dropping candidate ${candidate.id}: PushPals-internal concepts do not belong in user-repo autonomy work.`);
|
|
7616
|
+
continue;
|
|
7617
|
+
}
|
|
7556
7618
|
const missingTargetPaths = findMissingRepoTargetPaths(this.autonomyRepo, candidate.target_paths);
|
|
7557
7619
|
if (missingTargetPaths.length > 0) {
|
|
7558
7620
|
recordDropReason(`${source}_target_paths_missing_in_repo`);
|
|
@@ -7966,13 +8028,17 @@ ${JSON.stringify(input.messages ?? [])}`),
|
|
|
7966
8028
|
outcomeDetail = "lock_renew_failed_before_enqueue";
|
|
7967
8029
|
return;
|
|
7968
8030
|
}
|
|
7969
|
-
|
|
8031
|
+
let instruction = canonicalizeInstructionTextForBun(asString2(planningJson.instruction) || `${selected.candidate.title}
|
|
7970
8032
|
|
|
7971
8033
|
${selected.candidate.problem_statement}
|
|
7972
8034
|
|
|
7973
8035
|
Scope:
|
|
7974
8036
|
- target_paths: ${selected.candidate.target_paths.join(", ")}
|
|
7975
8037
|
- write_globs: ${selected.candidate.scope.write_globs.join(", ")}`);
|
|
8038
|
+
if (!isPushPalsRepository(this.autonomyRepo) && containsPushPalsInternalUserRepoText(instruction)) {
|
|
8039
|
+
console.warn(`[RemoteBuddyAutonomousEngine] replacing autonomy instruction for ${selected.candidate.id}: planner output contained PushPals-internal wording.`);
|
|
8040
|
+
instruction = canonicalizeInstructionTextForBun(buildRepoNativeFallbackInstruction(selected.candidate));
|
|
8041
|
+
}
|
|
7976
8042
|
this.setPhase("enqueue_request");
|
|
7977
8043
|
const requestId = await this.enqueueSyntheticRequest(instruction, {
|
|
7978
8044
|
objectiveId,
|
|
@@ -8426,7 +8492,7 @@ function ensureWriteGlobsCoverTargetPaths(targetPaths, writeGlobs) {
|
|
|
8426
8492
|
}
|
|
8427
8493
|
return { normalizedWriteGlobs, uncoveredTargets, addedGlobs };
|
|
8428
8494
|
}
|
|
8429
|
-
function buildExecutionGuidance(plan, targetPaths, requiredValidationSteps = []) {
|
|
8495
|
+
function buildExecutionGuidance(plan, targetPaths, requiredValidationSteps = [], repoHintDiagnostics = []) {
|
|
8430
8496
|
const lines = [];
|
|
8431
8497
|
const targets = normalizePathHints(targetPaths.length > 0 ? targetPaths : plan.scope.write_globs ?? []);
|
|
8432
8498
|
if (targets.length > 0) {
|
|
@@ -8438,6 +8504,13 @@ function buildExecutionGuidance(plan, targetPaths, requiredValidationSteps = [])
|
|
|
8438
8504
|
lines.push("- Do not prepend a leading slash to target paths.");
|
|
8439
8505
|
lines.push("- These paths are relevance hints, not hard write boundaries; edit the behavior-owning files needed for the task and explain any expansion.");
|
|
8440
8506
|
}
|
|
8507
|
+
if (repoHintDiagnostics.length > 0) {
|
|
8508
|
+
lines.push("Repo hint preflight:");
|
|
8509
|
+
for (const diagnostic of repoHintDiagnostics.slice(0, 8)) {
|
|
8510
|
+
lines.push(`- ${diagnostic}`);
|
|
8511
|
+
}
|
|
8512
|
+
lines.push("- If a hinted path is absent, treat it as stale guidance unless the user explicitly asked to create that path. Prefer an existing repo-native owner or nearby test.");
|
|
8513
|
+
}
|
|
8441
8514
|
lines.push("Scope:");
|
|
8442
8515
|
lines.push(`- read_anywhere: ${plan.scope.read_anywhere ? "true" : "false"}`);
|
|
8443
8516
|
lines.push(`- write_allowed: ${plan.scope.write_allowed ? "true" : "false"}`);
|
|
@@ -8490,6 +8563,75 @@ function buildExecutionGuidance(plan, targetPaths, requiredValidationSteps = [])
|
|
|
8490
8563
|
return lines.join(`
|
|
8491
8564
|
`).trim();
|
|
8492
8565
|
}
|
|
8566
|
+
function pathHintHasGlob(value) {
|
|
8567
|
+
return /[*?[\]{}]/.test(value);
|
|
8568
|
+
}
|
|
8569
|
+
function pathHintLooksLikeConcreteFile(value) {
|
|
8570
|
+
const normalized = value.replace(/\\/g, "/").replace(/^\.\/+/, "");
|
|
8571
|
+
const tail = normalized.split("/").pop() ?? normalized;
|
|
8572
|
+
return /\.[A-Za-z0-9][A-Za-z0-9_-]{0,12}$/.test(tail);
|
|
8573
|
+
}
|
|
8574
|
+
function requestAllowsCreatingMissingPath(value) {
|
|
8575
|
+
return /\b(create|add|new|scaffold|generate|introduce|write)\b.{0,80}\b(file|test|module|component|script|page|route|fixture|helper)\b/i.test(value);
|
|
8576
|
+
}
|
|
8577
|
+
function shouldTreatMissingTargetAsStale(repoRoot, path, requestText) {
|
|
8578
|
+
const normalized = normalizeTargetPath(path);
|
|
8579
|
+
if (!normalized || normalized === "." || pathHintHasGlob(normalized))
|
|
8580
|
+
return false;
|
|
8581
|
+
if (!pathHintLooksLikeConcreteFile(normalized))
|
|
8582
|
+
return false;
|
|
8583
|
+
if (existsSync5(resolve5(repoRoot, normalized)))
|
|
8584
|
+
return false;
|
|
8585
|
+
if (requestAllowsCreatingMissingPath(requestText))
|
|
8586
|
+
return false;
|
|
8587
|
+
return true;
|
|
8588
|
+
}
|
|
8589
|
+
function sanitizeRepoNativeTargetHints(params) {
|
|
8590
|
+
const requestText = [
|
|
8591
|
+
params.prompt,
|
|
8592
|
+
params.plan.worker_instruction,
|
|
8593
|
+
params.plan.assistant_message,
|
|
8594
|
+
...params.plan.acceptance_criteria,
|
|
8595
|
+
...params.targetPaths
|
|
8596
|
+
].join(`
|
|
8597
|
+
`);
|
|
8598
|
+
const diagnostics = [];
|
|
8599
|
+
const staleHints = [];
|
|
8600
|
+
const targetPaths = params.targetPaths.filter((path) => {
|
|
8601
|
+
const normalized = normalizeTargetPath(path);
|
|
8602
|
+
if (!normalized)
|
|
8603
|
+
return false;
|
|
8604
|
+
if (!shouldTreatMissingTargetAsStale(params.repoRoot, normalized, requestText))
|
|
8605
|
+
return true;
|
|
8606
|
+
staleHints.push(normalized);
|
|
8607
|
+
diagnostics.push(`Path hint "${normalized}" does not exist in this checkout; it was removed as a canonical target and kept only as advisory context.`);
|
|
8608
|
+
return false;
|
|
8609
|
+
});
|
|
8610
|
+
if (staleHints.length > 0) {
|
|
8611
|
+
const staleLower = staleHints.map((path) => path.toLowerCase());
|
|
8612
|
+
params.plan.validation_steps = params.plan.validation_steps.filter((step) => {
|
|
8613
|
+
const lower = step.replace(/\\/g, "/").toLowerCase();
|
|
8614
|
+
return !staleLower.some((path) => lower.includes(path));
|
|
8615
|
+
});
|
|
8616
|
+
params.plan.scope.write_globs = params.plan.scope.write_globs.filter((glob) => {
|
|
8617
|
+
const normalized = normalizeTargetPath(glob);
|
|
8618
|
+
if (!normalized)
|
|
8619
|
+
return false;
|
|
8620
|
+
return !staleLower.includes(normalized.toLowerCase());
|
|
8621
|
+
});
|
|
8622
|
+
if (!params.plan.discovery) {
|
|
8623
|
+
params.plan.discovery = { ripgrep_queries: [] };
|
|
8624
|
+
}
|
|
8625
|
+
const keywords = new Set([...params.plan.discovery.keywords ?? []]);
|
|
8626
|
+
for (const path of staleHints) {
|
|
8627
|
+
const tail = path.split("/").pop();
|
|
8628
|
+
if (tail)
|
|
8629
|
+
keywords.add(tail.replace(/\.[^.]+$/, ""));
|
|
8630
|
+
}
|
|
8631
|
+
params.plan.discovery.keywords = [...keywords].slice(0, 12);
|
|
8632
|
+
}
|
|
8633
|
+
return { targetPaths, diagnostics, staleHints };
|
|
8634
|
+
}
|
|
8493
8635
|
var VALIDATION_COMMAND_PREFIX = /^(git|bun|bunx|npm|npx|pnpm|yarn|node|python|python3|uv|pytest|vitest|jest|tsc|eslint|ruff|mypy|go|cargo|make|docker|pwsh|powershell|sh|bash)\b/i;
|
|
8494
8636
|
var VALIDATION_GENERIC_SAFE = /^(git\s+status\s+--porcelain|git\s+diff\b)/i;
|
|
8495
8637
|
var PATH_TOKEN_REGEX = /\b([A-Za-z0-9._/\-\\]+\.[A-Za-z0-9._-]+)\b/g;
|
|
@@ -9923,7 +10065,17 @@ Please reply with the missing details and I will enqueue a follow-up request.` :
|
|
|
9923
10065
|
}
|
|
9924
10066
|
this.pushContext(`[user] ${toSingleLine(prompt, 700)}`, requestSessionId);
|
|
9925
10067
|
this.pushContext(`[plan] ${toSingleLine(JSON.stringify(plan), 900)}`, requestSessionId);
|
|
9926
|
-
|
|
10068
|
+
let targetPaths = autonomyMetadata && autonomyMetadata.targetPaths.length > 0 ? autonomyMetadata.targetPaths : plannerTargetPaths(plan, prompt);
|
|
10069
|
+
const repoHintPreflight = sanitizeRepoNativeTargetHints({
|
|
10070
|
+
repoRoot: this.repo,
|
|
10071
|
+
prompt,
|
|
10072
|
+
plan,
|
|
10073
|
+
targetPaths
|
|
10074
|
+
});
|
|
10075
|
+
targetPaths = repoHintPreflight.targetPaths;
|
|
10076
|
+
if (repoHintPreflight.diagnostics.length > 0) {
|
|
10077
|
+
console.warn(`[RemoteBuddy] Repo hint preflight: ${repoHintPreflight.diagnostics.slice(0, 3).join(" | ")}`);
|
|
10078
|
+
}
|
|
9927
10079
|
this.rememberPersistentMemory("plan", `intent=${plan.intent} worker=${plan.requires_worker ? "yes" : "no"} lane=${plan.lane} risk=${plan.risk_level} targets=${targetPaths.slice(0, 6).join(",") || "(none)"}`, requestId, requestSessionId);
|
|
9928
10080
|
const targetPath = targetPaths[0];
|
|
9929
10081
|
const isAnalysisFromEngine = plan.intent === "analysis" && Boolean(autonomyMetadata);
|
|
@@ -9961,8 +10113,9 @@ Please reply with the missing details and I will enqueue a follow-up request.` :
|
|
|
9961
10113
|
}
|
|
9962
10114
|
if (!forceWorker) {
|
|
9963
10115
|
const missing = [];
|
|
9964
|
-
if (targetPaths.length === 0)
|
|
10116
|
+
if (targetPaths.length === 0 && repoHintPreflight.diagnostics.length === 0) {
|
|
9965
10117
|
missing.push("target_paths");
|
|
10118
|
+
}
|
|
9966
10119
|
if (plan.acceptance_criteria.length === 0)
|
|
9967
10120
|
missing.push("acceptance_criteria");
|
|
9968
10121
|
if (plan.validation_steps.length === 0)
|
|
@@ -9981,7 +10134,7 @@ Please reply with the missing details and I will enqueue a follow-up request.` :
|
|
|
9981
10134
|
}
|
|
9982
10135
|
const canonicalInstruction = prompt.trim();
|
|
9983
10136
|
const rawPlannerInstruction = sanitizePlannerWorkerInstruction(String(plan.worker_instruction ?? ""), canonicalInstruction);
|
|
9984
|
-
const executionGuidance = buildExecutionGuidance(plan, targetPaths, requiredValidationSteps);
|
|
10137
|
+
const executionGuidance = buildExecutionGuidance(plan, targetPaths, requiredValidationSteps, repoHintPreflight.diagnostics);
|
|
9985
10138
|
const plannerWorkerInstruction = [rawPlannerInstruction, executionGuidance].filter(Boolean).join(`
|
|
9986
10139
|
|
|
9987
10140
|
`).trim();
|
|
@@ -10087,6 +10240,7 @@ Please reply with the missing details and I will enqueue a follow-up request.` :
|
|
|
10087
10240
|
} : {},
|
|
10088
10241
|
acceptanceCriteria: plan.acceptance_criteria,
|
|
10089
10242
|
validationSteps: plan.validation_steps,
|
|
10243
|
+
...repoHintPreflight.diagnostics.length > 0 ? { repoHintDiagnostics: repoHintPreflight.diagnostics } : {},
|
|
10090
10244
|
...requiredValidationSteps.length > 0 ? { requiredValidationSteps } : {},
|
|
10091
10245
|
queuePriority: priority,
|
|
10092
10246
|
queueWaitBudgetMs,
|
|
@@ -104,8 +104,13 @@ _MAX_WRAPPER_RECOVERY_ATTEMPTS = 2
|
|
|
104
104
|
_MAX_WRAPPER_BOOTSTRAP_OUTPUT_CHARS = 1_200
|
|
105
105
|
_MAX_WRAPPER_BOOTSTRAP_TOTAL_CHARS = 5_000
|
|
106
106
|
_MAX_NO_EDIT_RECOVERY_ATTEMPTS = 1
|
|
107
|
+
_MAX_ROLLOUT_RECOVERY_ATTEMPTS = 1
|
|
107
108
|
_DEFAULT_NO_EDIT_WATCHDOG_S = 480
|
|
108
109
|
_SMALL_TASK_NO_EDIT_WATCHDOG_S = 360
|
|
110
|
+
_WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
|
|
111
|
+
_DEFAULT_ROLLOUT_WATCHDOG_S = 300
|
|
112
|
+
_SMALL_TASK_ROLLOUT_WATCHDOG_S = 240
|
|
113
|
+
_WEB_REVIEW_ROLLOUT_WATCHDOG_S = 180
|
|
109
114
|
|
|
110
115
|
|
|
111
116
|
def _model_supports_xhigh_reasoning(model: str) -> bool:
|
|
@@ -577,6 +582,11 @@ def _looks_like_small_task_prompt(prompt: str) -> bool:
|
|
|
577
582
|
"startup shell",
|
|
578
583
|
"shell polish",
|
|
579
584
|
"visual/affordance",
|
|
585
|
+
"repo-native web review",
|
|
586
|
+
"web review path",
|
|
587
|
+
"browser smoke",
|
|
588
|
+
"web delivery",
|
|
589
|
+
"navigation trustworthy",
|
|
580
590
|
)
|
|
581
591
|
heavy_markers = (
|
|
582
592
|
"merge-conflict",
|
|
@@ -637,18 +647,142 @@ def _resolve_no_edit_watchdog_seconds(
|
|
|
637
647
|
if communicate_timeout_s < 600:
|
|
638
648
|
return None
|
|
639
649
|
|
|
640
|
-
|
|
650
|
+
prompt_text = str(prompt or "").lower()
|
|
651
|
+
if "repo-native web review" in prompt_text or "web review path" in prompt_text:
|
|
652
|
+
default_s = _WEB_REVIEW_NO_EDIT_WATCHDOG_S
|
|
653
|
+
else:
|
|
654
|
+
default_s = (
|
|
655
|
+
_SMALL_TASK_NO_EDIT_WATCHDOG_S
|
|
656
|
+
if _looks_like_small_task_prompt(prompt)
|
|
657
|
+
else _DEFAULT_NO_EDIT_WATCHDOG_S
|
|
658
|
+
)
|
|
641
659
|
return max(120, min(default_s, max(120, communicate_timeout_s - 60)))
|
|
642
660
|
|
|
643
661
|
|
|
644
|
-
def
|
|
662
|
+
def _looks_like_web_review_prompt(prompt: str) -> bool:
|
|
663
|
+
text = str(prompt or "").lower()
|
|
664
|
+
return "repo-native web review" in text or "web review path" in text
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
def _resolve_rollout_watchdog_seconds(
|
|
668
|
+
prompt: str,
|
|
669
|
+
communicate_timeout_s: Optional[int],
|
|
670
|
+
no_edit_watchdog_s: Optional[int],
|
|
671
|
+
) -> Optional[int]:
|
|
672
|
+
if not communicate_timeout_s or communicate_timeout_s < 600:
|
|
673
|
+
return None
|
|
674
|
+
|
|
675
|
+
raw = os.environ.get("WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S", "").strip()
|
|
676
|
+
if raw:
|
|
677
|
+
if raw == "0":
|
|
678
|
+
return None
|
|
679
|
+
parsed = _to_positive_int(raw)
|
|
680
|
+
if parsed is None:
|
|
681
|
+
log.info(
|
|
682
|
+
f"Invalid WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S={raw!r}; using default rollout watchdog."
|
|
683
|
+
)
|
|
684
|
+
else:
|
|
685
|
+
return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
|
|
686
|
+
|
|
687
|
+
if _looks_like_web_review_prompt(prompt):
|
|
688
|
+
default_s = _WEB_REVIEW_ROLLOUT_WATCHDOG_S
|
|
689
|
+
elif _looks_like_small_task_prompt(prompt):
|
|
690
|
+
default_s = _SMALL_TASK_ROLLOUT_WATCHDOG_S
|
|
691
|
+
else:
|
|
692
|
+
default_s = _DEFAULT_ROLLOUT_WATCHDOG_S
|
|
693
|
+
if no_edit_watchdog_s is not None:
|
|
694
|
+
default_s = min(default_s, max(90, no_edit_watchdog_s - 60))
|
|
695
|
+
return max(90, min(default_s, max(90, communicate_timeout_s - 60)))
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
def _describe_non_publishable_paths(changed_paths: List[str], baseline_snapshot: List[str]) -> str:
|
|
699
|
+
delta = [p for p in changed_paths if p not in baseline_snapshot]
|
|
700
|
+
inspected = delta if delta else changed_paths
|
|
701
|
+
non_publishable = [p for p in inspected if not _is_publishable_changed_path(p)]
|
|
702
|
+
if not non_publishable:
|
|
703
|
+
return ""
|
|
704
|
+
listed = ", ".join(non_publishable[:8])
|
|
705
|
+
if len(non_publishable) > 8:
|
|
706
|
+
listed += ", ..."
|
|
707
|
+
return listed
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
def _build_no_edit_recovery_guidance(trace_excerpt: str, artifact_only_paths: str = "") -> str:
|
|
645
711
|
lines = [
|
|
646
712
|
"No-edit watchdog recovery: the previous Codex attempt spent too much of the execution budget without producing publishable file changes.",
|
|
647
713
|
"Start from the already inspected context. Do not re-read broad repo topology, route wrappers, or missing test infrastructure unless that is the blocker.",
|
|
714
|
+
"Runtime/dependency artifacts such as node_modules, outputs, .worktrees, .codex, dist, build, and coverage do not count as progress.",
|
|
648
715
|
"Within the first response/action, edit the smallest behavior-owning file that satisfies the task. If the hinted file is a thin wrapper, patch the owner you already identified.",
|
|
716
|
+
"If a hinted test path is absent, do not invent PushPals/autonomy-specific files in the user repo. Add repo-native coverage beside existing tests, or make a tiny behavior/script patch with no new broad harness.",
|
|
649
717
|
"Use existing tests or a narrow helper/style assertion; do not create broad React Native mocks or a new full render harness for a compact shell/visual polish task.",
|
|
650
718
|
"Run at most one focused fast validation check before final diff review; let PushPals ValidationGate own long required/browser validation.",
|
|
651
719
|
]
|
|
720
|
+
if artifact_only_paths:
|
|
721
|
+
lines.append(f"Only non-publishable artifact paths changed so far: {artifact_only_paths}.")
|
|
722
|
+
if trace_excerpt:
|
|
723
|
+
lines.append("Previous Codex event trace excerpt:")
|
|
724
|
+
lines.append(trace_excerpt)
|
|
725
|
+
return "\n".join(lines)
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def _trace_summaries_text(trace: Dict[str, Any]) -> str:
|
|
729
|
+
summaries = trace.get("summaries")
|
|
730
|
+
if not isinstance(summaries, list):
|
|
731
|
+
return ""
|
|
732
|
+
return "\n".join(str(item or "") for item in summaries[-80:]).lower()
|
|
733
|
+
|
|
734
|
+
|
|
735
|
+
def _detect_offtrack_rollout(trace: Dict[str, Any], artifact_only_paths: str = "") -> str:
|
|
736
|
+
text = _trace_summaries_text(trace)
|
|
737
|
+
if artifact_only_paths:
|
|
738
|
+
return f"only non-publishable artifact paths changed: {artifact_only_paths}"
|
|
739
|
+
if not text:
|
|
740
|
+
return ""
|
|
741
|
+
checks: List[Tuple[str, re.Pattern[str]]] = [
|
|
742
|
+
(
|
|
743
|
+
"the worker is spending time on missing hinted files or absent repo scaffolding",
|
|
744
|
+
re.compile(
|
|
745
|
+
r"(not present|not found|no existing|no .* directory|missing .* checkout|not listed in the checkout|checkout is much smaller|hinted .* absent)",
|
|
746
|
+
re.I,
|
|
747
|
+
),
|
|
748
|
+
),
|
|
749
|
+
(
|
|
750
|
+
"the worker is drifting into broad test-harness or React Native mock repair",
|
|
751
|
+
re.compile(
|
|
752
|
+
r"(full[- ]?(surface|render)|test harness repair|react native mock|broad .*mock|shared mock|adding .*mock helper|full component render)",
|
|
753
|
+
re.I,
|
|
754
|
+
),
|
|
755
|
+
),
|
|
756
|
+
(
|
|
757
|
+
"the worker is about to add PushPals/autonomy internals to a user repo",
|
|
758
|
+
re.compile(
|
|
759
|
+
r"(_layout\.autonomy|queue_health|workerpal|remotebuddy|reviewagent|pushpals-internal|no autonomy module)",
|
|
760
|
+
re.I,
|
|
761
|
+
),
|
|
762
|
+
),
|
|
763
|
+
]
|
|
764
|
+
for reason, pattern in checks:
|
|
765
|
+
if pattern.search(text):
|
|
766
|
+
return reason
|
|
767
|
+
return ""
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
def _build_rollout_recovery_guidance(
|
|
771
|
+
reason: str,
|
|
772
|
+
trace_excerpt: str,
|
|
773
|
+
artifact_only_paths: str = "",
|
|
774
|
+
) -> str:
|
|
775
|
+
lines = [
|
|
776
|
+
"Rollout coach recovery: the previous Codex trajectory looked unlikely to produce a publishable, repo-native patch inside the budget.",
|
|
777
|
+
f"Detected off-track signal: {reason or 'no publishable progress despite concerning trace signals'}.",
|
|
778
|
+
"Do not continue the same exploration path. Start from the prior findings and make the smallest publishable edit first.",
|
|
779
|
+
"If the requested or hinted file/path is absent, treat it as a stale hint: choose an existing repo-native owner or existing test nearby instead of creating PushPals/autonomy-specific scaffolding.",
|
|
780
|
+
"For web review or shell-validation work, prefer an existing browser/e2e script, route shell, or navigation surface over generic autonomy infrastructure.",
|
|
781
|
+
"Avoid broad React Native render harnesses and shared mock expansion unless the repo already has that stable infrastructure and the task explicitly asks for it.",
|
|
782
|
+
"After the first patch, run one focused fast check or stop with a concise final update so ValidationGate can run the expensive suite.",
|
|
783
|
+
]
|
|
784
|
+
if artifact_only_paths:
|
|
785
|
+
lines.append(f"Only non-publishable artifact paths changed so far: {artifact_only_paths}.")
|
|
652
786
|
if trace_excerpt:
|
|
653
787
|
lines.append("Previous Codex event trace excerpt:")
|
|
654
788
|
lines.append(trace_excerpt)
|
|
@@ -1597,6 +1731,7 @@ def _run_codex_task(
|
|
|
1597
1731
|
wrapper_recovery_attempt: int = 0,
|
|
1598
1732
|
model_compatibility_recovery_attempt: int = 0,
|
|
1599
1733
|
no_edit_recovery_attempt: int = 0,
|
|
1734
|
+
rollout_recovery_attempt: int = 0,
|
|
1600
1735
|
model_override: Optional[str] = None,
|
|
1601
1736
|
baseline_changes: Optional[List[str]] = None,
|
|
1602
1737
|
) -> Dict[str, Any]:
|
|
@@ -1889,17 +2024,35 @@ def _run_codex_task(
|
|
|
1889
2024
|
next_progress_at = started_at + float(progress_interval_s)
|
|
1890
2025
|
timed_out = False
|
|
1891
2026
|
no_edit_watchdog_fired = False
|
|
2027
|
+
no_edit_artifact_only_paths = ""
|
|
2028
|
+
rollout_watchdog_fired = False
|
|
2029
|
+
rollout_watchdog_reason = ""
|
|
2030
|
+
rollout_artifact_only_paths = ""
|
|
1892
2031
|
command_policy_rejection_loop = False
|
|
1893
2032
|
no_edit_watchdog_s = (
|
|
1894
2033
|
_resolve_no_edit_watchdog_seconds(prompt, communicate_timeout_s)
|
|
1895
2034
|
if no_edit_recovery_attempt <= _MAX_NO_EDIT_RECOVERY_ATTEMPTS
|
|
1896
2035
|
else None
|
|
1897
2036
|
)
|
|
2037
|
+
rollout_watchdog_s = (
|
|
2038
|
+
_resolve_rollout_watchdog_seconds(
|
|
2039
|
+
prompt,
|
|
2040
|
+
communicate_timeout_s,
|
|
2041
|
+
no_edit_watchdog_s,
|
|
2042
|
+
)
|
|
2043
|
+
if rollout_recovery_attempt <= _MAX_ROLLOUT_RECOVERY_ATTEMPTS
|
|
2044
|
+
else None
|
|
2045
|
+
)
|
|
1898
2046
|
no_edit_deadline = (
|
|
1899
2047
|
started_at + float(no_edit_watchdog_s)
|
|
1900
2048
|
if no_edit_watchdog_s is not None
|
|
1901
2049
|
else None
|
|
1902
2050
|
)
|
|
2051
|
+
rollout_deadline = (
|
|
2052
|
+
started_at + float(rollout_watchdog_s)
|
|
2053
|
+
if rollout_watchdog_s is not None
|
|
2054
|
+
else None
|
|
2055
|
+
)
|
|
1903
2056
|
|
|
1904
2057
|
while proc.poll() is None:
|
|
1905
2058
|
now = time.monotonic()
|
|
@@ -1909,16 +2062,54 @@ def _run_codex_task(
|
|
|
1909
2062
|
break
|
|
1910
2063
|
|
|
1911
2064
|
if no_edit_deadline is not None and now >= no_edit_deadline:
|
|
1912
|
-
|
|
2065
|
+
changed_paths, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
|
|
1913
2066
|
if not effective_paths:
|
|
2067
|
+
no_edit_artifact_only_paths = _describe_non_publishable_paths(
|
|
2068
|
+
changed_paths,
|
|
2069
|
+
baseline_snapshot,
|
|
2070
|
+
)
|
|
1914
2071
|
no_edit_watchdog_fired = True
|
|
2072
|
+
artifact_detail = (
|
|
2073
|
+
f" Artifact-only dirty paths: {no_edit_artifact_only_paths}."
|
|
2074
|
+
if no_edit_artifact_only_paths
|
|
2075
|
+
else ""
|
|
2076
|
+
)
|
|
1915
2077
|
log.info(
|
|
1916
|
-
f"No-edit watchdog fired after {int(no_edit_watchdog_s or 0)}s with no publishable file changes
|
|
2078
|
+
f"No-edit watchdog fired after {int(no_edit_watchdog_s or 0)}s with no publishable file changes.{artifact_detail} Retrying with patch-first guidance."
|
|
1917
2079
|
)
|
|
1918
2080
|
_terminate_active_child()
|
|
1919
2081
|
break
|
|
1920
2082
|
no_edit_deadline = None
|
|
1921
2083
|
|
|
2084
|
+
if rollout_deadline is not None and now >= rollout_deadline:
|
|
2085
|
+
changed_paths, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
|
|
2086
|
+
if not effective_paths:
|
|
2087
|
+
with trace_lock:
|
|
2088
|
+
live_trace = dict(stdout_trace_state)
|
|
2089
|
+
summaries = stdout_trace_state.get("summaries")
|
|
2090
|
+
if isinstance(summaries, list):
|
|
2091
|
+
live_trace["summaries"] = list(summaries)
|
|
2092
|
+
rollout_artifact_only_paths = _describe_non_publishable_paths(
|
|
2093
|
+
changed_paths,
|
|
2094
|
+
baseline_snapshot,
|
|
2095
|
+
)
|
|
2096
|
+
rollout_watchdog_reason = _detect_offtrack_rollout(
|
|
2097
|
+
live_trace,
|
|
2098
|
+
rollout_artifact_only_paths,
|
|
2099
|
+
)
|
|
2100
|
+
if rollout_watchdog_reason:
|
|
2101
|
+
rollout_watchdog_fired = True
|
|
2102
|
+
artifact_detail = (
|
|
2103
|
+
f" Artifact-only dirty paths: {rollout_artifact_only_paths}."
|
|
2104
|
+
if rollout_artifact_only_paths
|
|
2105
|
+
else ""
|
|
2106
|
+
)
|
|
2107
|
+
log.info(
|
|
2108
|
+
f"Rollout coach fired after {int(rollout_watchdog_s or 0)}s: {rollout_watchdog_reason}.{artifact_detail} Retrying with course-correction guidance."
|
|
2109
|
+
)
|
|
2110
|
+
_terminate_active_child()
|
|
2111
|
+
break
|
|
2112
|
+
|
|
1922
2113
|
with trace_lock:
|
|
1923
2114
|
wrapper_rejections = to_int(wrapper_rejection_state.get("count"), 0)
|
|
1924
2115
|
if wrapper_rejections >= 3:
|
|
@@ -1986,11 +2177,50 @@ def _run_codex_task(
|
|
|
1986
2177
|
continue
|
|
1987
2178
|
rejected_shell_wrappers.append(text)
|
|
1988
2179
|
|
|
2180
|
+
if rollout_watchdog_fired:
|
|
2181
|
+
if rollout_recovery_attempt < _MAX_ROLLOUT_RECOVERY_ATTEMPTS:
|
|
2182
|
+
retry_guidance = [
|
|
2183
|
+
*supplemental_guidance,
|
|
2184
|
+
_build_rollout_recovery_guidance(
|
|
2185
|
+
rollout_watchdog_reason,
|
|
2186
|
+
trace_excerpt,
|
|
2187
|
+
rollout_artifact_only_paths,
|
|
2188
|
+
),
|
|
2189
|
+
]
|
|
2190
|
+
return _run_codex_task(
|
|
2191
|
+
repo,
|
|
2192
|
+
instruction,
|
|
2193
|
+
retry_guidance,
|
|
2194
|
+
wrapper_recovery_attempt=wrapper_recovery_attempt,
|
|
2195
|
+
model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
|
|
2196
|
+
no_edit_recovery_attempt=no_edit_recovery_attempt,
|
|
2197
|
+
rollout_recovery_attempt=rollout_recovery_attempt + 1,
|
|
2198
|
+
model_override=model_override,
|
|
2199
|
+
baseline_changes=baseline_snapshot,
|
|
2200
|
+
)
|
|
2201
|
+
detail = (
|
|
2202
|
+
"Codex trajectory remained off-track after rollout coach recovery: "
|
|
2203
|
+
f"{rollout_watchdog_reason or 'no publishable progress'}."
|
|
2204
|
+
)
|
|
2205
|
+
if trace_excerpt:
|
|
2206
|
+
detail = f"{detail}\n{trace_excerpt}"
|
|
2207
|
+
return {
|
|
2208
|
+
"ok": False,
|
|
2209
|
+
"summary": "openai_codex rollout coach could not recover publishable progress",
|
|
2210
|
+
"stdout": _truncate(stdout),
|
|
2211
|
+
"stderr": _truncate(f"{detail}\n{stderr}".strip()),
|
|
2212
|
+
"exitCode": 124,
|
|
2213
|
+
"usage": usage,
|
|
2214
|
+
}
|
|
2215
|
+
|
|
1989
2216
|
if no_edit_watchdog_fired:
|
|
1990
2217
|
if no_edit_recovery_attempt < _MAX_NO_EDIT_RECOVERY_ATTEMPTS:
|
|
1991
2218
|
retry_guidance = [
|
|
1992
2219
|
*supplemental_guidance,
|
|
1993
|
-
_build_no_edit_recovery_guidance(
|
|
2220
|
+
_build_no_edit_recovery_guidance(
|
|
2221
|
+
trace_excerpt,
|
|
2222
|
+
no_edit_artifact_only_paths,
|
|
2223
|
+
),
|
|
1994
2224
|
]
|
|
1995
2225
|
return _run_codex_task(
|
|
1996
2226
|
repo,
|
|
@@ -1999,6 +2229,7 @@ def _run_codex_task(
|
|
|
1999
2229
|
wrapper_recovery_attempt=wrapper_recovery_attempt,
|
|
2000
2230
|
model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
|
|
2001
2231
|
no_edit_recovery_attempt=no_edit_recovery_attempt + 1,
|
|
2232
|
+
rollout_recovery_attempt=rollout_recovery_attempt,
|
|
2002
2233
|
model_override=model_override,
|
|
2003
2234
|
baseline_changes=baseline_snapshot,
|
|
2004
2235
|
)
|
|
@@ -2050,9 +2281,20 @@ def _run_codex_task(
|
|
|
2050
2281
|
"exitCode": 0,
|
|
2051
2282
|
"usage": usage,
|
|
2052
2283
|
}
|
|
2284
|
+
changed_paths, _, _ = _codex_changed_paths(repo, baseline_snapshot)
|
|
2285
|
+
artifact_only_paths = _describe_non_publishable_paths(changed_paths, baseline_snapshot)
|
|
2286
|
+
if artifact_only_paths:
|
|
2287
|
+
detail = (
|
|
2288
|
+
f"{detail}\nOnly non-publishable artifact paths changed before timeout: "
|
|
2289
|
+
f"{artifact_only_paths}."
|
|
2290
|
+
)
|
|
2053
2291
|
return {
|
|
2054
2292
|
"ok": False,
|
|
2055
|
-
"summary":
|
|
2293
|
+
"summary": (
|
|
2294
|
+
"openai_codex timed out without publishable changes"
|
|
2295
|
+
if artifact_only_paths
|
|
2296
|
+
else "openai_codex execution timed out"
|
|
2297
|
+
),
|
|
2056
2298
|
"stdout": _truncate(stdout),
|
|
2057
2299
|
"stderr": _truncate(f"{detail}\n{stderr}".strip()),
|
|
2058
2300
|
"exitCode": 124,
|
|
@@ -2149,6 +2391,7 @@ def _run_codex_task(
|
|
|
2149
2391
|
wrapper_recovery_attempt=wrapper_recovery_attempt + 1,
|
|
2150
2392
|
model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
|
|
2151
2393
|
no_edit_recovery_attempt=no_edit_recovery_attempt,
|
|
2394
|
+
rollout_recovery_attempt=rollout_recovery_attempt,
|
|
2152
2395
|
model_override=model_override,
|
|
2153
2396
|
baseline_changes=baseline_snapshot,
|
|
2154
2397
|
)
|
|
@@ -2232,6 +2475,7 @@ def _run_codex_task(
|
|
|
2232
2475
|
wrapper_recovery_attempt=wrapper_recovery_attempt,
|
|
2233
2476
|
model_compatibility_recovery_attempt=model_compatibility_recovery_attempt + 1,
|
|
2234
2477
|
no_edit_recovery_attempt=no_edit_recovery_attempt,
|
|
2478
|
+
rollout_recovery_attempt=rollout_recovery_attempt,
|
|
2235
2479
|
model_override=LEGACY_CODEX_MODEL_FALLBACK,
|
|
2236
2480
|
baseline_changes=baseline_snapshot,
|
|
2237
2481
|
)
|