@pushpalsdev/cli 1.1.9 → 1.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +33 -3
- package/package.json +1 -1
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +48 -0
- package/runtime/sandbox/apps/workerpals/src/backends/shared/executor_base.py +107 -0
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +358 -6
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +95 -41
package/dist/pushpals-cli.js
CHANGED
|
@@ -1647,6 +1647,7 @@ var DEFAULT_STARTUP_GIT_REMOTE_TIMEOUT_MS = 1e4;
|
|
|
1647
1647
|
var DEFAULT_EMBEDDED_SERVICE_LAUNCH_WARN_MS = 5000;
|
|
1648
1648
|
var EMBEDDED_SERVICE_RESTART_MAX_ATTEMPTS = 4;
|
|
1649
1649
|
var WORKERPAL_STARTUP_READINESS_PROBE_MAX_MS = 15000;
|
|
1650
|
+
var CLI_SESSION_JOB_LOG_MAX_CHARS = 700;
|
|
1650
1651
|
var EMBEDDED_RUNTIME_SAFETY_CAP_DISABLE_ENV = "PUSHPALS_DISABLE_EMBEDDED_SAFETY_CAPS";
|
|
1651
1652
|
var EMBEDDED_RUNTIME_WINDOWS_SAFETY_CAPS = {
|
|
1652
1653
|
REMOTEBUDDY_WORKERPAL_STARTUP_TIMEOUT_MS: "120000",
|
|
@@ -5192,6 +5193,29 @@ function formatSessionEventLine(event) {
|
|
|
5192
5193
|
const type = String(event.type ?? "").toLowerCase();
|
|
5193
5194
|
const from = String(event.from ?? "");
|
|
5194
5195
|
const payload = event.payload ?? {};
|
|
5196
|
+
if (type === "job_enqueued") {
|
|
5197
|
+
const jobId = String(payload.jobId ?? "").slice(0, 8);
|
|
5198
|
+
const kind = String(payload.kind ?? "").trim();
|
|
5199
|
+
const taskId = String(payload.taskId ?? "").slice(0, 8);
|
|
5200
|
+
const detail = kind || (taskId ? `task ${taskId}` : "queued");
|
|
5201
|
+
return `[job ${jobId}] queued: ${detail}`;
|
|
5202
|
+
}
|
|
5203
|
+
if (type === "job_claimed") {
|
|
5204
|
+
const jobId = String(payload.jobId ?? "").slice(0, 8);
|
|
5205
|
+
const workerId = String(payload.workerId ?? "").trim();
|
|
5206
|
+
return `[job ${jobId}] claimed${workerId ? ` by ${workerId}` : ""}`;
|
|
5207
|
+
}
|
|
5208
|
+
if (type === "job_log") {
|
|
5209
|
+
const jobId = String(payload.jobId ?? "").slice(0, 8);
|
|
5210
|
+
const stream = String(payload.stream ?? "").toLowerCase() === "stderr" ? " stderr" : "";
|
|
5211
|
+
const line = compactCliSessionJobLogLine(String(payload.line ?? "").trim());
|
|
5212
|
+
return line ? `[job ${jobId}${stream}] ${line}` : null;
|
|
5213
|
+
}
|
|
5214
|
+
if (type === "job_failed") {
|
|
5215
|
+
const jobId = String(payload.jobId ?? "").slice(0, 8);
|
|
5216
|
+
const message = String(payload.message ?? "").trim();
|
|
5217
|
+
return `[job ${jobId}] failed: ${message || "unknown"}`;
|
|
5218
|
+
}
|
|
5195
5219
|
if (!shouldDisplayInteractiveSessionEvent(event))
|
|
5196
5220
|
return null;
|
|
5197
5221
|
if (type === "message")
|
|
@@ -5217,10 +5241,10 @@ function formatSessionEventLine(event) {
|
|
|
5217
5241
|
const summary = String(payload.summary ?? "").trim();
|
|
5218
5242
|
return `[task ${taskId}] completed${summary ? `: ${summary}` : ""}`;
|
|
5219
5243
|
}
|
|
5220
|
-
if (type === "
|
|
5244
|
+
if (type === "job_completed") {
|
|
5221
5245
|
const jobId = String(payload.jobId ?? "").slice(0, 8);
|
|
5222
|
-
const
|
|
5223
|
-
return `[job ${jobId}]
|
|
5246
|
+
const summary = String(payload.summary ?? "").trim();
|
|
5247
|
+
return `[job ${jobId}] completed${summary ? `: ${summary}` : ""}`;
|
|
5224
5248
|
}
|
|
5225
5249
|
if (type === "error") {
|
|
5226
5250
|
const message = String(payload.message ?? "").trim();
|
|
@@ -5234,6 +5258,12 @@ function formatSessionEventLine(event) {
|
|
|
5234
5258
|
}
|
|
5235
5259
|
return null;
|
|
5236
5260
|
}
|
|
5261
|
+
function compactCliSessionJobLogLine(line) {
|
|
5262
|
+
const compacted = line.replace(/\s+/g, " ").trim();
|
|
5263
|
+
if (compacted.length <= CLI_SESSION_JOB_LOG_MAX_CHARS)
|
|
5264
|
+
return compacted;
|
|
5265
|
+
return `${compacted.slice(0, CLI_SESSION_JOB_LOG_MAX_CHARS - 3)}...`;
|
|
5266
|
+
}
|
|
5237
5267
|
function buildSessionEventReplayFingerprint(event) {
|
|
5238
5268
|
const type = String(event.type ?? "").trim().toLowerCase();
|
|
5239
5269
|
if (type !== "status")
|
package/package.json
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import base64
|
|
1
2
|
import os
|
|
2
3
|
import re
|
|
3
4
|
import json
|
|
@@ -19,6 +20,7 @@ from executor_base import (
|
|
|
19
20
|
Logger,
|
|
20
21
|
SettingsResolver,
|
|
21
22
|
config_dir_for_runtime_config,
|
|
23
|
+
parse_task_execute_payload,
|
|
22
24
|
runtime_config,
|
|
23
25
|
)
|
|
24
26
|
from openai_codex_executor import (
|
|
@@ -257,6 +259,52 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
257
259
|
self.assertIn("Keep assertions strict", prompt)
|
|
258
260
|
self.assertIn("bun test tests/localbuddy.request-status.test.ts", prompt)
|
|
259
261
|
|
|
262
|
+
def test_parse_payload_adds_structured_planning_guidance(self) -> None:
|
|
263
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-planning-guidance-") as temp_dir:
|
|
264
|
+
repo = Path(temp_dir) / "repo"
|
|
265
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
266
|
+
payload = {
|
|
267
|
+
"kind": "task.execute",
|
|
268
|
+
"repo": str(repo),
|
|
269
|
+
"params": {
|
|
270
|
+
"instruction": "Improve the game startup smoke path",
|
|
271
|
+
"schemaVersion": 2,
|
|
272
|
+
"planning": {
|
|
273
|
+
"intent": "code_change",
|
|
274
|
+
"riskLevel": "medium",
|
|
275
|
+
"queuePriority": "normal",
|
|
276
|
+
"queueWaitBudgetMs": 90_000,
|
|
277
|
+
"executionBudgetMs": 1_800_000,
|
|
278
|
+
"finalizationBudgetMs": 120_000,
|
|
279
|
+
"scope": {
|
|
280
|
+
"readAnywhere": True,
|
|
281
|
+
"writeAllowed": True,
|
|
282
|
+
"writeGlobs": ["app/**", "scripts/**"],
|
|
283
|
+
},
|
|
284
|
+
"targetPaths": ["app/__tests__/_layout.autonomy.test.ts"],
|
|
285
|
+
"discovery": {
|
|
286
|
+
"ripgrepQueries": ['rg "home-screen|web:e2e" app scripts'],
|
|
287
|
+
"likelyDirs": ["app", "scripts"],
|
|
288
|
+
"keywords": ["home-screen", "web:e2e"],
|
|
289
|
+
},
|
|
290
|
+
"acceptanceCriteria": ["Home shell startup is assertable"],
|
|
291
|
+
"validationSteps": ["bun test", "bun run web:e2e"],
|
|
292
|
+
"requiredValidationSteps": ["bun run web:e2e"],
|
|
293
|
+
},
|
|
294
|
+
},
|
|
295
|
+
}
|
|
296
|
+
encoded = base64.b64encode(json.dumps(payload).encode("utf-8")).decode("ascii")
|
|
297
|
+
|
|
298
|
+
task = parse_task_execute_payload(["executor", encoded], logger=Logger("[test]"))
|
|
299
|
+
guidance = "\n".join(task.supplemental_guidance)
|
|
300
|
+
|
|
301
|
+
self.assertIn("Task planning contract from PushPals", guidance)
|
|
302
|
+
self.assertIn("Worker phase contract", guidance)
|
|
303
|
+
self.assertIn("Write globs are relevance hints, not hard limits", guidance)
|
|
304
|
+
self.assertIn("app/__tests__/_layout.autonomy.test.ts", guidance)
|
|
305
|
+
self.assertIn("Home shell startup is assertable", guidance)
|
|
306
|
+
self.assertIn("bun run web:e2e", guidance)
|
|
307
|
+
|
|
260
308
|
def test_detects_codex_workaround_signals(self) -> None:
|
|
261
309
|
signal = _detect_codex_workaround_signal(
|
|
262
310
|
"Adapting test to avoid external Codex calls because Codex CLI isn't available in this environment.",
|
|
@@ -717,6 +717,110 @@ def _is_non_actionable_planner_guidance(text: str) -> bool:
|
|
|
717
717
|
return any(marker in lower for marker in blocked_markers)
|
|
718
718
|
|
|
719
719
|
|
|
720
|
+
def _string_list(value: Any, *, limit: int = 12, max_chars: int = 220) -> List[str]:
|
|
721
|
+
if not isinstance(value, list):
|
|
722
|
+
return []
|
|
723
|
+
out: List[str] = []
|
|
724
|
+
for item in value:
|
|
725
|
+
text = to_single_line(item, max_chars)
|
|
726
|
+
if text:
|
|
727
|
+
out.append(text)
|
|
728
|
+
if len(out) >= limit:
|
|
729
|
+
break
|
|
730
|
+
return out
|
|
731
|
+
|
|
732
|
+
|
|
733
|
+
def _append_list_guidance(lines: List[str], label: str, values: List[str]) -> None:
|
|
734
|
+
if not values:
|
|
735
|
+
return
|
|
736
|
+
lines.append(f"- {label}:")
|
|
737
|
+
for value in values:
|
|
738
|
+
lines.append(f" - {value}")
|
|
739
|
+
|
|
740
|
+
|
|
741
|
+
def _build_planning_guidance(params: Dict[str, Any]) -> str:
|
|
742
|
+
planning = params.get("planning")
|
|
743
|
+
if not isinstance(planning, dict):
|
|
744
|
+
return ""
|
|
745
|
+
|
|
746
|
+
lines: List[str] = ["Task planning contract from PushPals:"]
|
|
747
|
+
intent = to_single_line(planning.get("intent"), 80)
|
|
748
|
+
risk = to_single_line(planning.get("riskLevel"), 80)
|
|
749
|
+
priority = to_single_line(planning.get("queuePriority"), 80)
|
|
750
|
+
summary_parts = []
|
|
751
|
+
if intent:
|
|
752
|
+
summary_parts.append(f"intent={intent}")
|
|
753
|
+
if risk:
|
|
754
|
+
summary_parts.append(f"risk={risk}")
|
|
755
|
+
if priority:
|
|
756
|
+
summary_parts.append(f"priority={priority}")
|
|
757
|
+
if summary_parts:
|
|
758
|
+
lines.append(f"- Planning summary: {', '.join(summary_parts)}")
|
|
759
|
+
lines.append(
|
|
760
|
+
"- Worker phase contract: discovering -> editing -> focused validation -> full validation handoff -> final diff review."
|
|
761
|
+
)
|
|
762
|
+
lines.append(
|
|
763
|
+
" - discovering: inspect relevant files/artifacts and state the current hypothesis before editing."
|
|
764
|
+
)
|
|
765
|
+
lines.append(" - editing: make the smallest behavior-owning patch.")
|
|
766
|
+
lines.append(" - focused validation: run targeted fast checks for the changed surface.")
|
|
767
|
+
lines.append(
|
|
768
|
+
" - full validation: let PushPals ValidationGate own long required/browser checks unless one local confirmation is explicitly useful."
|
|
769
|
+
)
|
|
770
|
+
lines.append(" - final diff review: remove unrelated churn before returning.")
|
|
771
|
+
|
|
772
|
+
scope = planning.get("scope")
|
|
773
|
+
if isinstance(scope, dict):
|
|
774
|
+
write_allowed = scope.get("writeAllowed")
|
|
775
|
+
read_anywhere = scope.get("readAnywhere")
|
|
776
|
+
scope_parts = []
|
|
777
|
+
if isinstance(read_anywhere, bool):
|
|
778
|
+
scope_parts.append(f"read_anywhere={str(read_anywhere).lower()}")
|
|
779
|
+
if isinstance(write_allowed, bool):
|
|
780
|
+
scope_parts.append(f"write_allowed={str(write_allowed).lower()}")
|
|
781
|
+
if scope_parts:
|
|
782
|
+
lines.append(f"- Repo access: {', '.join(scope_parts)}")
|
|
783
|
+
write_globs = _string_list(scope.get("writeGlobs"), limit=10)
|
|
784
|
+
if write_globs:
|
|
785
|
+
lines.append("- Write globs are relevance hints, not hard limits; edit behavior-owning files as needed.")
|
|
786
|
+
_append_list_guidance(lines, "Write-scope hints", write_globs)
|
|
787
|
+
forbidden = _string_list(scope.get("forbiddenGlobs"), limit=8)
|
|
788
|
+
_append_list_guidance(lines, "Forbidden path hints", forbidden)
|
|
789
|
+
|
|
790
|
+
_append_list_guidance(lines, "Target path hints", _string_list(planning.get("targetPaths"), limit=12))
|
|
791
|
+
|
|
792
|
+
discovery = planning.get("discovery")
|
|
793
|
+
if isinstance(discovery, dict):
|
|
794
|
+
_append_list_guidance(
|
|
795
|
+
lines,
|
|
796
|
+
"Suggested discovery commands",
|
|
797
|
+
_string_list(discovery.get("ripgrepQueries"), limit=8),
|
|
798
|
+
)
|
|
799
|
+
_append_list_guidance(lines, "Likely directories", _string_list(discovery.get("likelyDirs"), limit=8))
|
|
800
|
+
_append_list_guidance(lines, "Search keywords", _string_list(discovery.get("keywords"), limit=12))
|
|
801
|
+
|
|
802
|
+
_append_list_guidance(
|
|
803
|
+
lines,
|
|
804
|
+
"Acceptance criteria",
|
|
805
|
+
_string_list(planning.get("acceptanceCriteria"), limit=10, max_chars=260),
|
|
806
|
+
)
|
|
807
|
+
_append_list_guidance(
|
|
808
|
+
lines,
|
|
809
|
+
"Planned validation steps",
|
|
810
|
+
_string_list(planning.get("validationSteps"), limit=8, max_chars=260),
|
|
811
|
+
)
|
|
812
|
+
_append_list_guidance(
|
|
813
|
+
lines,
|
|
814
|
+
"Required vision.md validation steps",
|
|
815
|
+
_string_list(planning.get("requiredValidationSteps"), limit=8, max_chars=260),
|
|
816
|
+
)
|
|
817
|
+
|
|
818
|
+
guidance = "\n".join(lines).strip()
|
|
819
|
+
if len(guidance) > 4000:
|
|
820
|
+
guidance = guidance[:4000].rstrip() + "\n- Planning guidance truncated to stay within worker prompt budget."
|
|
821
|
+
return guidance
|
|
822
|
+
|
|
823
|
+
|
|
720
824
|
def parse_task_execute_payload(
|
|
721
825
|
argv: List[str],
|
|
722
826
|
*,
|
|
@@ -765,6 +869,9 @@ def parse_task_execute_payload(
|
|
|
765
869
|
quality_revision_hint = str(params.get("qualityRevisionHint") or "").strip()
|
|
766
870
|
|
|
767
871
|
supplemental_guidance: List[str] = []
|
|
872
|
+
planning_guidance = _build_planning_guidance(params)
|
|
873
|
+
if planning_guidance:
|
|
874
|
+
supplemental_guidance.append(planning_guidance)
|
|
768
875
|
if planner_instruction and planner_instruction != instruction:
|
|
769
876
|
if _is_non_actionable_planner_guidance(planner_instruction):
|
|
770
877
|
log.info(
|
|
@@ -6,11 +6,13 @@
|
|
|
6
6
|
import {
|
|
7
7
|
existsSync,
|
|
8
8
|
lstatSync,
|
|
9
|
+
mkdirSync,
|
|
9
10
|
readdirSync,
|
|
10
11
|
readFileSync,
|
|
11
12
|
renameSync,
|
|
12
13
|
rmSync,
|
|
13
14
|
unlinkSync,
|
|
15
|
+
writeFileSync,
|
|
14
16
|
} from "fs";
|
|
15
17
|
import { resolve } from "path";
|
|
16
18
|
import {
|
|
@@ -24,6 +26,7 @@ import {
|
|
|
24
26
|
matchesGlob,
|
|
25
27
|
normalizeTargetPath,
|
|
26
28
|
requirementsForValidationCommand,
|
|
29
|
+
resolveGitStateFilePath,
|
|
27
30
|
sanitizeSourceControlIdentityField,
|
|
28
31
|
type SourceControlCommitIdentity,
|
|
29
32
|
type ToolRequirement,
|
|
@@ -93,6 +96,8 @@ export interface BrowserValidationRepairPacket {
|
|
|
93
96
|
selector: string | null;
|
|
94
97
|
expected: string | null;
|
|
95
98
|
failureFocus: string | null;
|
|
99
|
+
lastVerifiedStage?: string | null;
|
|
100
|
+
pageUrl?: string | null;
|
|
96
101
|
digest: string;
|
|
97
102
|
previousDigest: string | null;
|
|
98
103
|
previousStage: string | null;
|
|
@@ -101,10 +106,32 @@ export interface BrowserValidationRepairPacket {
|
|
|
101
106
|
previousFailureFocus: string | null;
|
|
102
107
|
progress: "first_failure" | "same_failure" | "new_failure";
|
|
103
108
|
needsDiagnosticProbe: boolean;
|
|
109
|
+
mustReadArtifactsBeforeEdit?: boolean;
|
|
104
110
|
artifacts: string[];
|
|
111
|
+
artifactSummaries?: string[];
|
|
112
|
+
knownFailureHints?: string[];
|
|
105
113
|
output: string;
|
|
106
114
|
}
|
|
107
115
|
|
|
116
|
+
interface BrowserFailureMemoryEntry {
|
|
117
|
+
key: string;
|
|
118
|
+
jobFamily: string;
|
|
119
|
+
command: string;
|
|
120
|
+
failureKind: BrowserValidationFailureKind;
|
|
121
|
+
stage: string | null;
|
|
122
|
+
selector: string | null;
|
|
123
|
+
expected: string | null;
|
|
124
|
+
failureFocus: string | null;
|
|
125
|
+
digest: string;
|
|
126
|
+
count: number;
|
|
127
|
+
firstSeenAt: string;
|
|
128
|
+
lastSeenAt: string;
|
|
129
|
+
lastVerifiedStage: string | null;
|
|
130
|
+
pageUrl: string | null;
|
|
131
|
+
artifactSummaries: string[];
|
|
132
|
+
suggestedRemedy: string;
|
|
133
|
+
}
|
|
134
|
+
|
|
108
135
|
interface DeterministicQualityResult {
|
|
109
136
|
ok: boolean;
|
|
110
137
|
skipped: boolean;
|
|
@@ -326,6 +353,31 @@ export function buildQualityGateRevisionIssues(
|
|
|
326
353
|
return [...new Set(merged)];
|
|
327
354
|
}
|
|
328
355
|
|
|
356
|
+
function buildDiffBudgetWarning(
|
|
357
|
+
planning: TaskExecutePlanning,
|
|
358
|
+
changedPaths: string[],
|
|
359
|
+
focusedBrowserRepair: boolean,
|
|
360
|
+
): string | null {
|
|
361
|
+
const meaningfulChangedPaths = changedPaths.filter(
|
|
362
|
+
(path) => !/(^|\/)(outputs|node_modules|\.worktrees|dist|build|coverage)(\/|$)/i.test(path),
|
|
363
|
+
);
|
|
364
|
+
if (meaningfulChangedPaths.length === 0) return null;
|
|
365
|
+
const explicitBudget = Number(planning.scope.maxFilesToEdit);
|
|
366
|
+
const hasExplicitBudget = Number.isFinite(explicitBudget) && explicitBudget > 0;
|
|
367
|
+
const smallTask =
|
|
368
|
+
focusedBrowserRepair ||
|
|
369
|
+
(planning.riskLevel !== "high" &&
|
|
370
|
+
(planning.targetPaths?.length ?? 0) <= 2 &&
|
|
371
|
+
planning.acceptanceCriteria.length <= 3);
|
|
372
|
+
const budget = hasExplicitBudget ? Math.floor(explicitBudget) : smallTask ? 5 : 10;
|
|
373
|
+
if (meaningfulChangedPaths.length <= budget) return null;
|
|
374
|
+
return `Diff budget warning: this task now changes ${meaningfulChangedPaths.length} file(s), above the ${budget}-file ${
|
|
375
|
+
hasExplicitBudget ? "planning.scope.maxFilesToEdit" : smallTask ? "small-task" : "default"
|
|
376
|
+
} budget. Before editing more, remove unrelated churn and keep only behavior-owning files needed for the current repair. Changed files: ${meaningfulChangedPaths
|
|
377
|
+
.slice(0, 12)
|
|
378
|
+
.join(", ")}${meaningfulChangedPaths.length > 12 ? ", ..." : ""}`;
|
|
379
|
+
}
|
|
380
|
+
|
|
329
381
|
const TEST_ASSERTION_BALANCE_ISSUE =
|
|
330
382
|
"Changed test files do not show both positive and negative assertion coverage (expected both).";
|
|
331
383
|
|
|
@@ -2032,6 +2084,253 @@ function lastBrowserVerifiedStage(text: string): string | null {
|
|
|
2032
2084
|
return lastVerified ? toSingleLine(lastVerified, 80) : null;
|
|
2033
2085
|
}
|
|
2034
2086
|
|
|
2087
|
+
function extractBrowserValidationUrl(text: string): string | null {
|
|
2088
|
+
const clean = stripAnsiControlSequences(text);
|
|
2089
|
+
const patterns = [
|
|
2090
|
+
/\b(?:page\s+url|current\s+url|browser\s+url|url)\s*[:=]\s*(https?:\/\/[^\s|"'`<>]+)/i,
|
|
2091
|
+
/\b(?:navigated\s+to|opened|loading)\s+(https?:\/\/[^\s|"'`<>]+)/i,
|
|
2092
|
+
/\b(https?:\/\/(?:127\.0\.0\.1|localhost|0\.0\.0\.0):\d+\/?[^\s|"'`<>]*)/i,
|
|
2093
|
+
];
|
|
2094
|
+
for (const pattern of patterns) {
|
|
2095
|
+
const match = clean.match(pattern);
|
|
2096
|
+
const url = match?.[1]?.replace(/[),.;]+$/, "").trim();
|
|
2097
|
+
if (url) return toSingleLine(url, 160);
|
|
2098
|
+
}
|
|
2099
|
+
return null;
|
|
2100
|
+
}
|
|
2101
|
+
|
|
2102
|
+
function inferBrowserArtifactKind(path: string): string {
|
|
2103
|
+
if (/\.(?:png|jpe?g|webp)$/i.test(path)) return "screenshot";
|
|
2104
|
+
if (/\.zip$/i.test(path)) return "trace";
|
|
2105
|
+
if (/\.webm$/i.test(path)) return "video";
|
|
2106
|
+
if (/\.(?:log|txt)$/i.test(path)) return "log";
|
|
2107
|
+
if (/\.json$/i.test(path)) return "json";
|
|
2108
|
+
return "artifact";
|
|
2109
|
+
}
|
|
2110
|
+
|
|
2111
|
+
function inferBrowserArtifactStageFromPath(path: string): string | null {
|
|
2112
|
+
const fileName = path.split(/[\\/]/).pop() ?? "";
|
|
2113
|
+
const baseName = fileName.replace(/\.[^.]+$/, "");
|
|
2114
|
+
const candidates = [
|
|
2115
|
+
baseName.match(/^\d+[-_](.+)$/)?.[1],
|
|
2116
|
+
baseName.match(/(?:failure|failed|screenshot|snapshot)[-_](.+)$/i)?.[1],
|
|
2117
|
+
];
|
|
2118
|
+
const raw = candidates.find((entry) => entry && entry.trim());
|
|
2119
|
+
if (!raw) return null;
|
|
2120
|
+
return toSingleLine(raw.replace(/[-_]+/g, " "), 80);
|
|
2121
|
+
}
|
|
2122
|
+
|
|
2123
|
+
function summarizeBrowserValidationArtifacts(params: {
|
|
2124
|
+
repo?: string;
|
|
2125
|
+
artifacts: string[];
|
|
2126
|
+
context: string;
|
|
2127
|
+
}): string[] {
|
|
2128
|
+
const allArtifacts = mergeBrowserValidationArtifacts(
|
|
2129
|
+
params.artifacts,
|
|
2130
|
+
collectRecentBrowserValidationArtifacts(params.repo),
|
|
2131
|
+
);
|
|
2132
|
+
const out: string[] = [];
|
|
2133
|
+
const contextStage = extractBrowserValidationStage(params.context);
|
|
2134
|
+
const contextSelector = extractBrowserValidationSelector(params.context);
|
|
2135
|
+
const contextUrl = extractBrowserValidationUrl(params.context);
|
|
2136
|
+
const contextLastVerified = lastBrowserVerifiedStage(params.context);
|
|
2137
|
+
for (const artifact of allArtifacts.slice(0, 6)) {
|
|
2138
|
+
const kind = inferBrowserArtifactKind(artifact);
|
|
2139
|
+
let artifactText = "";
|
|
2140
|
+
if (params.repo && !/^(?:\/repo|\/workspace|[A-Za-z]:[\\/])/.test(artifact)) {
|
|
2141
|
+
try {
|
|
2142
|
+
artifactText = readFileSync(resolve(params.repo, artifact), "utf8");
|
|
2143
|
+
} catch {
|
|
2144
|
+
artifactText = "";
|
|
2145
|
+
}
|
|
2146
|
+
} else if (existsSync(artifact) && /\.(?:log|txt|json)$/i.test(artifact)) {
|
|
2147
|
+
try {
|
|
2148
|
+
artifactText = readFileSync(artifact, "utf8");
|
|
2149
|
+
} catch {
|
|
2150
|
+
artifactText = "";
|
|
2151
|
+
}
|
|
2152
|
+
}
|
|
2153
|
+
const artifactContext = artifactText ? stripAnsiControlSequences(artifactText) : "";
|
|
2154
|
+
const stage =
|
|
2155
|
+
inferBrowserArtifactStageFromPath(artifact) ||
|
|
2156
|
+
extractBrowserValidationStage(artifactContext) ||
|
|
2157
|
+
contextStage;
|
|
2158
|
+
const selector = extractBrowserValidationSelector(artifactContext) || contextSelector;
|
|
2159
|
+
const url = extractBrowserValidationUrl(artifactContext) || contextUrl;
|
|
2160
|
+
const lastVerified = lastBrowserVerifiedStage(artifactContext) || contextLastVerified;
|
|
2161
|
+
const detail = [
|
|
2162
|
+
`${artifact} [${kind}]`,
|
|
2163
|
+
stage ? `stage=${stage}` : "",
|
|
2164
|
+
selector ? `selector=${selector}` : "",
|
|
2165
|
+
url ? `url=${url}` : "",
|
|
2166
|
+
lastVerified ? `last_verified=${lastVerified}` : "",
|
|
2167
|
+
]
|
|
2168
|
+
.filter(Boolean)
|
|
2169
|
+
.join(" ");
|
|
2170
|
+
out.push(toSingleLine(detail, 280));
|
|
2171
|
+
}
|
|
2172
|
+
return out;
|
|
2173
|
+
}
|
|
2174
|
+
|
|
2175
|
+
function browserFailureSuggestedRemedy(packet: BrowserValidationRepairPacket): string {
|
|
2176
|
+
if (packet.failureKind === "assertion") {
|
|
2177
|
+
return [
|
|
2178
|
+
"Read the latest artifact/log/DOM state before editing.",
|
|
2179
|
+
"Preserve already-passing browser stages.",
|
|
2180
|
+
packet.selector
|
|
2181
|
+
? `Repair or replace the exact failing locator ${packet.selector} with a stable rendered signal for the same UI stage.`
|
|
2182
|
+
: "Repair the exact visible UI assertion or add a stable test id/accessibility label to existing UI.",
|
|
2183
|
+
].join(" ");
|
|
2184
|
+
}
|
|
2185
|
+
if (packet.failureKind === "startup" || packet.failureKind === "runtime") {
|
|
2186
|
+
return "Treat as browser startup/runtime provisioning; do not rewrite product UI assertions until ValidationGate reaches an assertion stage.";
|
|
2187
|
+
}
|
|
2188
|
+
if (packet.failureKind === "network") {
|
|
2189
|
+
return "Treat as local server/network readiness; add bounded startup diagnostics and avoid changing gameplay/UI behavior.";
|
|
2190
|
+
}
|
|
2191
|
+
return "Inspect captured validation output and repair the current failing stage with the smallest behavior-owning diff.";
|
|
2192
|
+
}
|
|
2193
|
+
|
|
2194
|
+
function normalizeFailureMemoryToken(value: string | null | undefined): string {
|
|
2195
|
+
return toSingleLine(value ?? "", 120).toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
|
|
2196
|
+
}
|
|
2197
|
+
|
|
2198
|
+
export function buildTaskFailureJobFamily(params: Record<string, unknown>): string {
|
|
2199
|
+
const planning = params.planning && typeof params.planning === "object"
|
|
2200
|
+
? (params.planning as Partial<TaskExecutePlanning>)
|
|
2201
|
+
: {};
|
|
2202
|
+
const autonomy = params.autonomy && typeof params.autonomy === "object"
|
|
2203
|
+
? (params.autonomy as Record<string, unknown>)
|
|
2204
|
+
: {};
|
|
2205
|
+
const targetHints = [
|
|
2206
|
+
...(Array.isArray(planning.targetPaths) ? planning.targetPaths : []),
|
|
2207
|
+
...(Array.isArray(planning.scope?.writeGlobs) ? planning.scope.writeGlobs : []),
|
|
2208
|
+
...(Array.isArray(planning.validationSteps) ? planning.validationSteps : []),
|
|
2209
|
+
...(Array.isArray(planning.requiredValidationSteps) ? planning.requiredValidationSteps : []),
|
|
2210
|
+
]
|
|
2211
|
+
.map((entry) => normalizeFailureMemoryToken(String(entry)))
|
|
2212
|
+
.filter(Boolean)
|
|
2213
|
+
.slice(0, 8);
|
|
2214
|
+
const area = normalizeFailureMemoryToken(String(autonomy.componentArea ?? autonomy.component_area ?? ""));
|
|
2215
|
+
const intent = normalizeFailureMemoryToken(String(planning.intent ?? ""));
|
|
2216
|
+
return [area, intent, ...targetHints].filter(Boolean).join("|") || "general";
|
|
2217
|
+
}
|
|
2218
|
+
|
|
2219
|
+
function browserFailureMemoryKey(jobFamily: string, packet: BrowserValidationRepairPacket): string {
|
|
2220
|
+
return [
|
|
2221
|
+
jobFamily,
|
|
2222
|
+
validationCommandKey(packet.command),
|
|
2223
|
+
packet.failureKind,
|
|
2224
|
+
normalizeFailureMemoryToken(packet.failureFocus),
|
|
2225
|
+
normalizeFailureMemoryToken(packet.stage),
|
|
2226
|
+
normalizeFailureMemoryToken(packet.selector),
|
|
2227
|
+
normalizeFailureMemoryToken(packet.expected),
|
|
2228
|
+
]
|
|
2229
|
+
.filter(Boolean)
|
|
2230
|
+
.join("|");
|
|
2231
|
+
}
|
|
2232
|
+
|
|
2233
|
+
function resolveFailureMemoryPath(repo: string): string {
|
|
2234
|
+
const rootCandidates = [
|
|
2235
|
+
process.env.PUSHPALS_PROJECT_ROOT_OVERRIDE,
|
|
2236
|
+
process.env.PUSHPALS_REPO_ROOT_OVERRIDE,
|
|
2237
|
+
process.env.PUSHPALS_REPO_PATH,
|
|
2238
|
+
repo,
|
|
2239
|
+
]
|
|
2240
|
+
.map((entry) => String(entry ?? "").trim())
|
|
2241
|
+
.filter(Boolean);
|
|
2242
|
+
const root = rootCandidates.find((entry) => existsSync(entry)) ?? repo;
|
|
2243
|
+
const gitStatePath = resolveGitStateFilePath(root, "pushpals-worker-failure-memory.json");
|
|
2244
|
+
if (gitStatePath) return gitStatePath;
|
|
2245
|
+
return resolve(root, "outputs", "data", "workerpals-failure-memory.json");
|
|
2246
|
+
}
|
|
2247
|
+
|
|
2248
|
+
function readBrowserFailureMemory(repo: string): BrowserFailureMemoryEntry[] {
|
|
2249
|
+
const memoryPath = resolveFailureMemoryPath(repo);
|
|
2250
|
+
try {
|
|
2251
|
+
const parsed = JSON.parse(readFileSync(memoryPath, "utf8")) as { entries?: unknown };
|
|
2252
|
+
if (!Array.isArray(parsed.entries)) return [];
|
|
2253
|
+
return parsed.entries
|
|
2254
|
+
.filter((entry): entry is BrowserFailureMemoryEntry => Boolean(entry && typeof entry === "object"))
|
|
2255
|
+
.slice(0, 80);
|
|
2256
|
+
} catch {
|
|
2257
|
+
return [];
|
|
2258
|
+
}
|
|
2259
|
+
}
|
|
2260
|
+
|
|
2261
|
+
export function knownFailureHintsForPacket(
|
|
2262
|
+
repo: string,
|
|
2263
|
+
jobFamily: string,
|
|
2264
|
+
packet: BrowserValidationRepairPacket,
|
|
2265
|
+
): string[] {
|
|
2266
|
+
const entries = readBrowserFailureMemory(repo)
|
|
2267
|
+
.filter((entry) => {
|
|
2268
|
+
if (entry.jobFamily !== jobFamily) return false;
|
|
2269
|
+
if (validationCommandKey(entry.command) !== validationCommandKey(packet.command)) return false;
|
|
2270
|
+
if (entry.failureKind !== packet.failureKind) return false;
|
|
2271
|
+
if (packet.failureFocus && entry.failureFocus && packet.failureFocus !== entry.failureFocus) return false;
|
|
2272
|
+
if (packet.stage && entry.stage && packet.stage !== entry.stage) return false;
|
|
2273
|
+
return true;
|
|
2274
|
+
})
|
|
2275
|
+
.sort((a, b) => b.count - a.count || b.lastSeenAt.localeCompare(a.lastSeenAt))
|
|
2276
|
+
.slice(0, 3);
|
|
2277
|
+
return entries.map((entry) =>
|
|
2278
|
+
toSingleLine(
|
|
2279
|
+
`seen ${entry.count}x before for this repo/job family; last=${entry.lastSeenAt}; focus=${entry.failureFocus ?? entry.stage ?? "unknown"}; remedy=${entry.suggestedRemedy}`,
|
|
2280
|
+
360,
|
|
2281
|
+
),
|
|
2282
|
+
);
|
|
2283
|
+
}
|
|
2284
|
+
|
|
2285
|
+
export function recordBrowserFailureMemory(
|
|
2286
|
+
repo: string,
|
|
2287
|
+
jobFamily: string,
|
|
2288
|
+
packet: BrowserValidationRepairPacket,
|
|
2289
|
+
): void {
|
|
2290
|
+
const memoryPath = resolveFailureMemoryPath(repo);
|
|
2291
|
+
const now = new Date().toISOString();
|
|
2292
|
+
const entries = readBrowserFailureMemory(repo);
|
|
2293
|
+
const key = browserFailureMemoryKey(jobFamily, packet);
|
|
2294
|
+
const existing = entries.find((entry) => entry.key === key);
|
|
2295
|
+
if (existing) {
|
|
2296
|
+
existing.count += 1;
|
|
2297
|
+
existing.lastSeenAt = now;
|
|
2298
|
+
existing.digest = packet.digest;
|
|
2299
|
+
existing.lastVerifiedStage = packet.lastVerifiedStage ?? null;
|
|
2300
|
+
existing.pageUrl = packet.pageUrl ?? null;
|
|
2301
|
+
existing.artifactSummaries = (packet.artifactSummaries ?? []).slice(0, 6);
|
|
2302
|
+
existing.suggestedRemedy = browserFailureSuggestedRemedy(packet);
|
|
2303
|
+
} else {
|
|
2304
|
+
entries.push({
|
|
2305
|
+
key,
|
|
2306
|
+
jobFamily,
|
|
2307
|
+
command: packet.command,
|
|
2308
|
+
failureKind: packet.failureKind,
|
|
2309
|
+
stage: packet.stage,
|
|
2310
|
+
selector: packet.selector,
|
|
2311
|
+
expected: packet.expected,
|
|
2312
|
+
failureFocus: packet.failureFocus,
|
|
2313
|
+
digest: packet.digest,
|
|
2314
|
+
count: 1,
|
|
2315
|
+
firstSeenAt: now,
|
|
2316
|
+
lastSeenAt: now,
|
|
2317
|
+
lastVerifiedStage: packet.lastVerifiedStage ?? null,
|
|
2318
|
+
pageUrl: packet.pageUrl ?? null,
|
|
2319
|
+
artifactSummaries: (packet.artifactSummaries ?? []).slice(0, 6),
|
|
2320
|
+
suggestedRemedy: browserFailureSuggestedRemedy(packet),
|
|
2321
|
+
});
|
|
2322
|
+
}
|
|
2323
|
+
const next = entries
|
|
2324
|
+
.sort((a, b) => b.lastSeenAt.localeCompare(a.lastSeenAt))
|
|
2325
|
+
.slice(0, 80);
|
|
2326
|
+
try {
|
|
2327
|
+
mkdirSync(resolve(memoryPath, ".."), { recursive: true });
|
|
2328
|
+
writeFileSync(memoryPath, `${JSON.stringify({ version: 1, entries: next }, null, 2)}\n`);
|
|
2329
|
+
} catch {
|
|
2330
|
+
// Failure memory is advisory; never fail a worker job because persistence is unavailable.
|
|
2331
|
+
}
|
|
2332
|
+
}
|
|
2333
|
+
|
|
2035
2334
|
export function extractValidationFailureRetryDigest(
|
|
2036
2335
|
run: {
|
|
2037
2336
|
command: string;
|
|
@@ -2075,6 +2374,7 @@ export function buildBrowserValidationRepairPacket(
|
|
|
2075
2374
|
validationRuns: ValidationExecutionResult[],
|
|
2076
2375
|
previousFailureDigests: Map<string, string> = new Map(),
|
|
2077
2376
|
repo?: string,
|
|
2377
|
+
knownFailureHints: string[] = [],
|
|
2078
2378
|
): BrowserValidationRepairPacket | null {
|
|
2079
2379
|
for (const run of validationRuns) {
|
|
2080
2380
|
if (run.ok || !isLongRunningBrowserValidationCommand(run.command)) continue;
|
|
@@ -2091,6 +2391,8 @@ export function buildBrowserValidationRepairPacket(
|
|
|
2091
2391
|
const enrichedBrowserContext = [combined, recentLogSummary].filter(Boolean).join("\n");
|
|
2092
2392
|
const selector = extractBrowserValidationSelector(enrichedBrowserContext);
|
|
2093
2393
|
const expected = extractBrowserValidationExpectedUi(enrichedBrowserContext);
|
|
2394
|
+
const lastVerifiedStage = lastBrowserVerifiedStage(enrichedBrowserContext);
|
|
2395
|
+
const pageUrl = extractBrowserValidationUrl(enrichedBrowserContext);
|
|
2094
2396
|
const stage = refineBrowserValidationStage(
|
|
2095
2397
|
extractBrowserValidationStage(enrichedBrowserContext),
|
|
2096
2398
|
selector,
|
|
@@ -2129,6 +2431,15 @@ export function buildBrowserValidationRepairPacket(
|
|
|
2129
2431
|
const needsDiagnosticProbe =
|
|
2130
2432
|
failureKind === "assertion" &&
|
|
2131
2433
|
sameFailureSignal;
|
|
2434
|
+
const artifacts = mergeBrowserValidationArtifacts(
|
|
2435
|
+
extractBrowserValidationArtifacts(combined),
|
|
2436
|
+
collectRecentBrowserValidationArtifacts(repo),
|
|
2437
|
+
);
|
|
2438
|
+
const artifactSummaries = summarizeBrowserValidationArtifacts({
|
|
2439
|
+
repo,
|
|
2440
|
+
artifacts,
|
|
2441
|
+
context: enrichedBrowserContext,
|
|
2442
|
+
});
|
|
2132
2443
|
return {
|
|
2133
2444
|
command: run.command,
|
|
2134
2445
|
failureKind,
|
|
@@ -2136,6 +2447,8 @@ export function buildBrowserValidationRepairPacket(
|
|
|
2136
2447
|
selector,
|
|
2137
2448
|
expected,
|
|
2138
2449
|
failureFocus,
|
|
2450
|
+
lastVerifiedStage,
|
|
2451
|
+
pageUrl,
|
|
2139
2452
|
digest,
|
|
2140
2453
|
previousDigest,
|
|
2141
2454
|
previousStage,
|
|
@@ -2144,10 +2457,10 @@ export function buildBrowserValidationRepairPacket(
|
|
|
2144
2457
|
previousFailureFocus,
|
|
2145
2458
|
progress,
|
|
2146
2459
|
needsDiagnosticProbe,
|
|
2147
|
-
|
|
2148
|
-
|
|
2149
|
-
|
|
2150
|
-
),
|
|
2460
|
+
mustReadArtifactsBeforeEdit: failureKind === "assertion",
|
|
2461
|
+
artifacts,
|
|
2462
|
+
artifactSummaries,
|
|
2463
|
+
knownFailureHints: knownFailureHints.slice(0, 3),
|
|
2151
2464
|
output: [
|
|
2152
2465
|
summarizeBrowserValidationOutput(combined) || digest,
|
|
2153
2466
|
recentLogSummary,
|
|
@@ -3204,10 +3517,16 @@ export function buildQualityRevisionHint(
|
|
|
3204
3517
|
validationRuns: ValidationExecutionResult[] = [],
|
|
3205
3518
|
validationBlocker: ValidationBlocker | null = null,
|
|
3206
3519
|
browserRepairPacket: BrowserValidationRepairPacket | null = null,
|
|
3520
|
+
changedPaths: string[] = [],
|
|
3207
3521
|
): string {
|
|
3208
3522
|
const lines: string[] = [];
|
|
3209
3523
|
lines.push("Quality revision required before completion.");
|
|
3210
3524
|
const focusedBrowserRepair = Boolean(browserRepairPacket);
|
|
3525
|
+
lines.push(
|
|
3526
|
+
"Worker phase contract: (1) discovering - inspect only the relevant files/artifacts and name the current hypothesis; (2) editing - make the smallest behavior-owning patch; (3) focused validation - run targeted fast checks; (4) full validation - let PushPals ValidationGate own long required checks unless a single local confirmation is explicitly useful; (5) final diff review - verify changed files are necessary and no unrelated churn remains.",
|
|
3527
|
+
);
|
|
3528
|
+
const diffBudgetWarning = buildDiffBudgetWarning(planning, changedPaths, focusedBrowserRepair);
|
|
3529
|
+
if (diffBudgetWarning) lines.push(diffBudgetWarning);
|
|
3211
3530
|
const validationAlreadyPassed =
|
|
3212
3531
|
validationRuns.length > 0 && validationRuns.every((run) => run.ok);
|
|
3213
3532
|
if (validationAlreadyPassed && !focusedBrowserRepair) {
|
|
@@ -3232,6 +3551,12 @@ export function buildQualityRevisionHint(
|
|
|
3232
3551
|
if (browserRepairPacket.failureFocus) {
|
|
3233
3552
|
lines.push(`- Failure focus: ${browserRepairPacket.failureFocus}`);
|
|
3234
3553
|
}
|
|
3554
|
+
if (browserRepairPacket.lastVerifiedStage) {
|
|
3555
|
+
lines.push(`- Last verified browser checkpoint: ${browserRepairPacket.lastVerifiedStage}`);
|
|
3556
|
+
}
|
|
3557
|
+
if (browserRepairPacket.pageUrl) {
|
|
3558
|
+
lines.push(`- Browser URL at failure: ${browserRepairPacket.pageUrl}`);
|
|
3559
|
+
}
|
|
3235
3560
|
if (browserRepairPacket.expected) {
|
|
3236
3561
|
lines.push(`- Expected UI: ${browserRepairPacket.expected}`);
|
|
3237
3562
|
}
|
|
@@ -3248,6 +3573,18 @@ export function buildQualityRevisionHint(
|
|
|
3248
3573
|
"- Failure artifacts: none were captured in command output; if this repo writes screenshots/traces, inspect the latest browser failure artifact before changing selectors.",
|
|
3249
3574
|
);
|
|
3250
3575
|
}
|
|
3576
|
+
if ((browserRepairPacket.artifactSummaries ?? []).length > 0) {
|
|
3577
|
+
lines.push("Latest browser artifact summaries:");
|
|
3578
|
+
for (const artifactSummary of browserRepairPacket.artifactSummaries ?? []) {
|
|
3579
|
+
lines.push(`- ${artifactSummary}`);
|
|
3580
|
+
}
|
|
3581
|
+
}
|
|
3582
|
+
if ((browserRepairPacket.knownFailureHints ?? []).length > 0) {
|
|
3583
|
+
lines.push("Known issue/remedy memory for this repo/job family:");
|
|
3584
|
+
for (const hint of browserRepairPacket.knownFailureHints ?? []) {
|
|
3585
|
+
lines.push(`- ${hint}`);
|
|
3586
|
+
}
|
|
3587
|
+
}
|
|
3251
3588
|
if (browserRepairPacket.digest) {
|
|
3252
3589
|
lines.push(`- Current failure: ${browserRepairPacket.digest}`);
|
|
3253
3590
|
}
|
|
@@ -3276,6 +3613,11 @@ export function buildQualityRevisionHint(
|
|
|
3276
3613
|
} else {
|
|
3277
3614
|
lines.push("- Breadcrumb: first captured failure for this command in this revision loop");
|
|
3278
3615
|
}
|
|
3616
|
+
if (browserRepairPacket.mustReadArtifactsBeforeEdit) {
|
|
3617
|
+
lines.push(
|
|
3618
|
+
"- Diagnostic artifact read requirement: before editing, explicitly inspect the listed latest artifact/log/DOM summary for the failing stage. If the artifacts are missing, stale, or stop before the failing locator, add a tiny temporary diagnostic/log for locator counts, visible text, URL, and nearby DOM/test-id state before changing product code or selectors.",
|
|
3619
|
+
);
|
|
3620
|
+
}
|
|
3279
3621
|
if (browserRepairPacket.needsDiagnosticProbe) {
|
|
3280
3622
|
lines.push(
|
|
3281
3623
|
"- Convergence mode: diagnostic-first repair. This same browser focus failed in the previous revision, so do not guess another selector or rewrite a different stage.",
|
|
@@ -3457,7 +3799,7 @@ export function buildQualityRevisionHint(
|
|
|
3457
3799
|
for (const step of planning.requiredValidationSteps ?? []) lines.push(`- ${step}`);
|
|
3458
3800
|
}
|
|
3459
3801
|
lines.push("Apply a minimal corrective patch, run focused validation, then finish.");
|
|
3460
|
-
return lines.join("\n").slice(0,
|
|
3802
|
+
return lines.join("\n").slice(0, 8000);
|
|
3461
3803
|
}
|
|
3462
3804
|
|
|
3463
3805
|
function inferTargetPathFromInstruction(text: string): string | null {
|
|
@@ -6214,6 +6556,7 @@ export async function executeJob(
|
|
|
6214
6556
|
let revisionAttempt = 0;
|
|
6215
6557
|
let revisionHint = "";
|
|
6216
6558
|
const previousValidationFailureDigests = new Map<string, string>();
|
|
6559
|
+
const failureJobFamily = buildTaskFailureJobFamily(normalizedParams);
|
|
6217
6560
|
while (revisionAttempt <= qualityRevisionLoopMax) {
|
|
6218
6561
|
const attemptParams: Record<string, unknown> = { ...normalizedParams };
|
|
6219
6562
|
if (revisionHint) {
|
|
@@ -6313,11 +6656,19 @@ export async function executeJob(
|
|
|
6313
6656
|
revisionAttempt,
|
|
6314
6657
|
},
|
|
6315
6658
|
);
|
|
6316
|
-
|
|
6659
|
+
let browserRepairPacket = buildBrowserValidationRepairPacket(
|
|
6317
6660
|
quality.validationRuns,
|
|
6318
6661
|
previousValidationFailureDigests,
|
|
6319
6662
|
repo,
|
|
6320
6663
|
);
|
|
6664
|
+
if (browserRepairPacket) {
|
|
6665
|
+
const knownFailureHints = knownFailureHintsForPacket(repo, failureJobFamily, browserRepairPacket);
|
|
6666
|
+
browserRepairPacket = {
|
|
6667
|
+
...browserRepairPacket,
|
|
6668
|
+
knownFailureHints,
|
|
6669
|
+
};
|
|
6670
|
+
recordBrowserFailureMemory(repo, failureJobFamily, browserRepairPacket);
|
|
6671
|
+
}
|
|
6321
6672
|
for (const run of quality.validationRuns) {
|
|
6322
6673
|
if (run.ok) continue;
|
|
6323
6674
|
const digest = extractValidationFailureRetryDigest(run, repo);
|
|
@@ -6592,6 +6943,7 @@ export async function executeJob(
|
|
|
6592
6943
|
validationOutsideTaskScope ? [] : quality.validationRuns,
|
|
6593
6944
|
validationOutsideTaskScope ? null : quality.blocker,
|
|
6594
6945
|
validationOutsideTaskScope ? null : browserRepairPacket,
|
|
6946
|
+
quality.changedPaths,
|
|
6595
6947
|
);
|
|
6596
6948
|
onLog?.(
|
|
6597
6949
|
"stderr",
|
|
@@ -67,6 +67,7 @@ const DEFAULT_LLM_MODEL = "local-model";
|
|
|
67
67
|
const CODEX_UNAVAILABLE_WORKER_EXIT_CODE = 86;
|
|
68
68
|
const CODEX_UNAVAILABLE_DOCKER_SHUTDOWN_GRACE_MS = 5_000;
|
|
69
69
|
const CODEX_UNAVAILABLE_WORKER_FORCE_EXIT_MS = 4_000;
|
|
70
|
+
const DEFAULT_JOB_PROGRESS_LOG_EVERY_MS = 60_000;
|
|
70
71
|
const CONFIG = loadPushPalsConfig();
|
|
71
72
|
const LOG = new Logger("WorkerPals");
|
|
72
73
|
|
|
@@ -197,7 +198,12 @@ async function reportToolRunForUnsuccessfulJob(args: {
|
|
|
197
198
|
if (record.failureClass === "unknown" && record.tool === "shell") return;
|
|
198
199
|
|
|
199
200
|
try {
|
|
200
|
-
const response = await postJsonWithTimeout(
|
|
201
|
+
const response = await postJsonWithTimeout(
|
|
202
|
+
`${args.opts.server}/tool-runs`,
|
|
203
|
+
args.headers,
|
|
204
|
+
record,
|
|
205
|
+
5_000,
|
|
206
|
+
);
|
|
201
207
|
if (!response.ok) {
|
|
202
208
|
const detail = await response.text().catch(() => "");
|
|
203
209
|
console.warn(
|
|
@@ -315,6 +321,13 @@ function formatDurationMs(durationMs: number): string {
|
|
|
315
321
|
return `${minutes}m ${seconds}s`;
|
|
316
322
|
}
|
|
317
323
|
|
|
324
|
+
function resolveJobProgressLogEveryMs(): number {
|
|
325
|
+
const raw = Number.parseInt(process.env.PUSHPALS_WORKERPAL_PROGRESS_LOG_MS ?? "", 10);
|
|
326
|
+
if (Number.isFinite(raw) && raw === 0) return 0;
|
|
327
|
+
if (Number.isFinite(raw) && raw >= 10_000) return raw;
|
|
328
|
+
return DEFAULT_JOB_PROGRESS_LOG_EVERY_MS;
|
|
329
|
+
}
|
|
330
|
+
|
|
318
331
|
function sanitizeJobLogLine(line: string): string {
|
|
319
332
|
// Strip ANSI escape/control sequences and collapse whitespace.
|
|
320
333
|
const cleaned = line
|
|
@@ -985,8 +998,7 @@ function failNoChangeReviewFixJob(jobId: string, result: WorkerJobResult): Worke
|
|
|
985
998
|
return {
|
|
986
999
|
...result,
|
|
987
1000
|
ok: false,
|
|
988
|
-
summary:
|
|
989
|
-
`Rejected review-fix job ${jobId} produced no code changes; refusing unchanged branch re-review.`,
|
|
1001
|
+
summary: `Rejected review-fix job ${jobId} produced no code changes; refusing unchanged branch re-review.`,
|
|
990
1002
|
stderr: [
|
|
991
1003
|
result.stderr,
|
|
992
1004
|
"Review-fix jobs must make at least one concrete code/test/docs change before requesting another review.",
|
|
@@ -1002,9 +1014,7 @@ function taskExecuteOrigin(params: Record<string, unknown> | undefined): "user"
|
|
|
1002
1014
|
if (!params) return "user";
|
|
1003
1015
|
if (params.origin === "autonomy") return "autonomy";
|
|
1004
1016
|
const autonomy = params.autonomy;
|
|
1005
|
-
return autonomy && typeof autonomy === "object" && !Array.isArray(autonomy)
|
|
1006
|
-
? "autonomy"
|
|
1007
|
-
: "user";
|
|
1017
|
+
return autonomy && typeof autonomy === "object" && !Array.isArray(autonomy) ? "autonomy" : "user";
|
|
1008
1018
|
}
|
|
1009
1019
|
|
|
1010
1020
|
async function enqueueCompletion(
|
|
@@ -1109,15 +1119,19 @@ async function failActiveJobOnShutdown(
|
|
|
1109
1119
|
runtimeState.currentSessionId &&
|
|
1110
1120
|
shouldEmitDirectSessionJobEvent({ ok: false, statusPersistedToServer })
|
|
1111
1121
|
) {
|
|
1112
|
-
await transport.queueSessionCommand(
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1122
|
+
await transport.queueSessionCommand(
|
|
1123
|
+
runtimeState.currentSessionId,
|
|
1124
|
+
{
|
|
1125
|
+
type: "job_failed",
|
|
1126
|
+
payload: {
|
|
1127
|
+
jobId: activeJobId,
|
|
1128
|
+
message,
|
|
1129
|
+
detail,
|
|
1130
|
+
},
|
|
1131
|
+
from: `worker:${opts.workerId}`,
|
|
1118
1132
|
},
|
|
1119
|
-
|
|
1120
|
-
|
|
1133
|
+
{ priority: "high" },
|
|
1134
|
+
);
|
|
1121
1135
|
}
|
|
1122
1136
|
}
|
|
1123
1137
|
|
|
@@ -1224,10 +1238,7 @@ async function workerLoop(
|
|
|
1224
1238
|
const job = data.job;
|
|
1225
1239
|
|
|
1226
1240
|
if (job) {
|
|
1227
|
-
if (
|
|
1228
|
-
dockerExecutor &&
|
|
1229
|
-
dockerExecutor.shouldPrepareMergeConflictJobBeforeExecution(job)
|
|
1230
|
-
) {
|
|
1241
|
+
if (dockerExecutor && dockerExecutor.shouldPrepareMergeConflictJobBeforeExecution(job)) {
|
|
1231
1242
|
const deferMs = dockerExecutor.recommendedMergeConflictDeferMs();
|
|
1232
1243
|
const deferred = await deferClaimedJobForMaintenance(opts, headers, job.id, deferMs);
|
|
1233
1244
|
if (!deferred.ok) {
|
|
@@ -1325,50 +1336,86 @@ async function workerLoop(
|
|
|
1325
1336
|
}, heartbeatEveryMs);
|
|
1326
1337
|
|
|
1327
1338
|
if (job.sessionId) {
|
|
1328
|
-
await transport.queueSessionCommand(
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1339
|
+
await transport.queueSessionCommand(
|
|
1340
|
+
job.sessionId,
|
|
1341
|
+
{
|
|
1342
|
+
type: "job_claimed",
|
|
1343
|
+
payload: { jobId: job.id, workerId: opts.workerId },
|
|
1344
|
+
from: `worker:${opts.workerId}`,
|
|
1345
|
+
},
|
|
1346
|
+
{ priority: "high" },
|
|
1347
|
+
);
|
|
1333
1348
|
}
|
|
1334
1349
|
|
|
1335
1350
|
let stdoutSeq = 0;
|
|
1336
1351
|
let stderrSeq = 0;
|
|
1337
1352
|
let lastCleanLog = "";
|
|
1338
1353
|
let lastCleanLogAt = 0;
|
|
1354
|
+
let lastForwardedJobLogAt = Date.now();
|
|
1339
1355
|
|
|
1340
|
-
const
|
|
1341
|
-
? (stream: "stdout" | "stderr", line: string) => {
|
|
1356
|
+
const emitJobLog = job.sessionId
|
|
1357
|
+
? (stream: "stdout" | "stderr", line: string): boolean => {
|
|
1342
1358
|
const cleaned = sanitizeJobLogLine(line);
|
|
1343
|
-
if (!cleaned) return;
|
|
1344
|
-
// Print executor logs locally only in debug mode.
|
|
1345
|
-
if (LOG.isDebugEnabled()) LOG.debug(`[${stream}] ${cleaned}`);
|
|
1359
|
+
if (!cleaned) return false;
|
|
1346
1360
|
|
|
1347
1361
|
// Drop high-frequency terminal progress redraw spam; keep meaningful lines.
|
|
1348
|
-
if (isNoisyProgressLine(cleaned)) return;
|
|
1362
|
+
if (isNoisyProgressLine(cleaned)) return false;
|
|
1349
1363
|
|
|
1350
1364
|
// Collapse very noisy duplicate lines emitted in tight loops.
|
|
1351
1365
|
const now = Date.now();
|
|
1352
|
-
if (cleaned === lastCleanLog && now - lastCleanLogAt < 1_000) return;
|
|
1366
|
+
if (cleaned === lastCleanLog && now - lastCleanLogAt < 1_000) return false;
|
|
1353
1367
|
lastCleanLog = cleaned;
|
|
1354
1368
|
lastCleanLogAt = now;
|
|
1369
|
+
lastForwardedJobLogAt = now;
|
|
1355
1370
|
const logTs = new Date(now).toISOString();
|
|
1356
1371
|
|
|
1357
1372
|
const seq = stream === "stdout" ? ++stdoutSeq : ++stderrSeq;
|
|
1358
|
-
void transport.queueSessionCommand(
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1373
|
+
void transport.queueSessionCommand(
|
|
1374
|
+
job.sessionId,
|
|
1375
|
+
{
|
|
1376
|
+
type: "job_log",
|
|
1377
|
+
payload: { jobId: job.id, stream, seq, line: cleaned, ts: logTs },
|
|
1378
|
+
from: `worker:${opts.workerId}`,
|
|
1379
|
+
},
|
|
1380
|
+
{ droppable: true },
|
|
1381
|
+
);
|
|
1363
1382
|
void transport.queueJobLog(job.id, {
|
|
1364
1383
|
stream,
|
|
1365
1384
|
seq,
|
|
1366
1385
|
message: cleaned,
|
|
1367
1386
|
ts: logTs,
|
|
1368
1387
|
});
|
|
1388
|
+
return true;
|
|
1389
|
+
}
|
|
1390
|
+
: undefined;
|
|
1391
|
+
|
|
1392
|
+
const onLog = emitJobLog
|
|
1393
|
+
? (stream: "stdout" | "stderr", line: string) => {
|
|
1394
|
+
const cleaned = sanitizeJobLogLine(line);
|
|
1395
|
+
if (LOG.isDebugEnabled() && cleaned) LOG.debug(`[${stream}] ${cleaned}`);
|
|
1396
|
+
emitJobLog(stream, line);
|
|
1369
1397
|
}
|
|
1370
1398
|
: undefined;
|
|
1371
1399
|
|
|
1400
|
+
const jobClaimedAtMs = Date.now();
|
|
1401
|
+
const jobProgressLogEveryMs = resolveJobProgressLogEveryMs();
|
|
1402
|
+
const jobProgressTimer =
|
|
1403
|
+
emitJobLog && jobProgressLogEveryMs > 0
|
|
1404
|
+
? setInterval(() => {
|
|
1405
|
+
const now = Date.now();
|
|
1406
|
+
const quietForMs = Math.max(0, now - lastForwardedJobLogAt);
|
|
1407
|
+
if (quietForMs < jobProgressLogEveryMs) return;
|
|
1408
|
+
emitJobLog(
|
|
1409
|
+
"stdout",
|
|
1410
|
+
`[WorkerPals] Job ${job.id} still running after ${formatDurationMs(
|
|
1411
|
+
now - jobClaimedAtMs,
|
|
1412
|
+
)} (kind=${job.kind}, worker=${opts.workerId}, quiet_for=${formatDurationMs(
|
|
1413
|
+
quietForMs,
|
|
1414
|
+
)}).`,
|
|
1415
|
+
);
|
|
1416
|
+
}, jobProgressLogEveryMs)
|
|
1417
|
+
: null;
|
|
1418
|
+
|
|
1372
1419
|
let directWorktreePath: string | null = null;
|
|
1373
1420
|
let executionRepo = opts.repo;
|
|
1374
1421
|
let result: WorkerJobResult | null = null;
|
|
@@ -1611,11 +1658,15 @@ async function workerLoop(
|
|
|
1611
1658
|
durationMs: jobDurationMs,
|
|
1612
1659
|
phase: job.kind,
|
|
1613
1660
|
});
|
|
1614
|
-
const response = await postJsonWithTimeout(
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1618
|
-
|
|
1661
|
+
const response = await postJsonWithTimeout(
|
|
1662
|
+
`${opts.server}/jobs/${job.id}/fail`,
|
|
1663
|
+
headers,
|
|
1664
|
+
{
|
|
1665
|
+
message: result.summary,
|
|
1666
|
+
detail: redactSensitiveText(result.stderr ?? ""),
|
|
1667
|
+
durationMs: jobDurationMs,
|
|
1668
|
+
},
|
|
1669
|
+
);
|
|
1619
1670
|
statusPersistedToServer = response.ok;
|
|
1620
1671
|
console.log(
|
|
1621
1672
|
`[WorkerPals] Job ${job.id} failed in ${formatDurationMs(jobDurationMs)}: ${result.summary}`,
|
|
@@ -1703,6 +1754,7 @@ async function workerLoop(
|
|
|
1703
1754
|
}
|
|
1704
1755
|
} finally {
|
|
1705
1756
|
clearInterval(busyHeartbeat);
|
|
1757
|
+
if (jobProgressTimer) clearInterval(jobProgressTimer);
|
|
1706
1758
|
if (recycleWorkerAfterJob) {
|
|
1707
1759
|
runtimeState.shutdownRequested = true;
|
|
1708
1760
|
const forceExitTimer = setTimeout(() => {
|
|
@@ -1895,7 +1947,9 @@ async function main(): Promise<void> {
|
|
|
1895
1947
|
},
|
|
1896
1948
|
}),
|
|
1897
1949
|
);
|
|
1898
|
-
await withTimeout(
|
|
1950
|
+
await withTimeout(
|
|
1951
|
+
failActiveJobOnShutdown(opts, headers, runtimeState, transport, signalName),
|
|
1952
|
+
);
|
|
1899
1953
|
await withTimeout(transport.flush());
|
|
1900
1954
|
if (dockerExecutor) {
|
|
1901
1955
|
await withTimeout(
|