@pushpalsdev/cli 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pushpalsdev/cli",
3
- "version": "1.1.0",
3
+ "version": "1.1.2",
4
4
  "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -16,7 +16,7 @@ Execution rules:
16
16
  - If the hinted file is a thin wrapper or the behavior lives elsewhere, edit the behavior-owning file(s) needed to solve the task and explain the scope expansion in your final response.
17
17
  - Avoid irrelevant sprawl; the review agent will judge whether changed files are necessary for the requested outcome.
18
18
  - Read relevant files before editing, then run focused validation.
19
- - PushPals runs the deterministic ValidationGate after your edit, including any repo-required `vision.md` commands. During the editing turn, prefer focused/fast validation. Do not spend the main Codex execution budget repeatedly running long browser/e2e smoke commands such as `bun run web:e2e`; run them only when the task is specifically about the browser harness or when you need a final targeted confirmation and can stop promptly on a clear failure.
19
+ - PushPals runs the deterministic ValidationGate after your edit, including any repo-required `vision.md` commands. During the editing turn, prefer focused/fast validation. Do not run long browser/e2e smoke commands such as `bun run web:e2e` by default from the Codex executor; ValidationGate is the authoritative browser runner and has the provisioned browser/runtime environment. For browser-harness tasks, inspect existing artifacts, run fast non-browser checks, and only run the full browser command once when a quick local startup probe shows it can run here and you need one targeted confirmation.
20
20
  - Use direct commands without shell wrappers. Prefer plain commands like `git diff -- path`, `git add <path>`, `git status --porcelain`, and `pwd`.
21
21
  - Do not wrap commands in `/bin/bash -lc`, `sh -lc`, `cmd /c`, or `powershell -Command`, and avoid pipelines, `awk`, heredocs, or multi-command shell snippets unless they are truly unavoidable.
22
22
  - If the command router rejects a command, simplify it to a single direct command instead of retrying more shell wrappers.
@@ -295,6 +295,8 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
295
295
  template = _load_prompt_template("workerpals/openai_codex_task_execute_system_prompt.md")
296
296
  self.assertIn("Codex CLI is required infrastructure", template)
297
297
  self.assertIn("Use direct commands without shell wrappers", template)
298
+ self.assertIn("ValidationGate is the authoritative browser runner", template)
299
+ self.assertIn("Do not run long browser/e2e smoke commands", template)
298
300
 
299
301
  def test_extracts_usage_counts_from_nested_json_event(self) -> None:
300
302
  usage = _extract_usage_counts(
@@ -43,6 +43,10 @@ const WORKERPAL_SANDBOX_COMPONENT_LABEL = "pushpals.component=workerpals-sandbox
43
43
  const DOCKER_IMAGE_INSPECT_TIMEOUT_MS = 15_000;
44
44
  const DOCKER_IMAGE_BUILD_TIMEOUT_MS = 10 * 60_000;
45
45
  const DOCKER_IMAGE_PULL_TIMEOUT_MS = 10 * 60_000;
46
+ const BROWSER_VALIDATION_JOB_REPAIR_ATTEMPTS = 8;
47
+ const BROWSER_VALIDATION_JOB_OVERHEAD_MS = 15 * 60_000;
48
+ const BROWSER_VALIDATION_JOB_MIN_TIMEOUT_MS = 4 * 60 * 60_000;
49
+ const BROWSER_VALIDATION_JOB_MAX_TIMEOUT_MS = 8 * 60 * 60_000;
46
50
 
47
51
  function parseClampedInt(value: unknown, defaultValue: number, min: number, max: number): number {
48
52
  const parsed =
@@ -237,6 +241,75 @@ export interface Job {
237
241
  sessionId: string;
238
242
  }
239
243
 
244
+ function readPositiveNumber(value: unknown): number | null {
245
+ const parsed =
246
+ typeof value === "number"
247
+ ? value
248
+ : typeof value === "string"
249
+ ? Number.parseInt(value, 10)
250
+ : Number.NaN;
251
+ if (!Number.isFinite(parsed) || parsed <= 0) return null;
252
+ return Math.floor(parsed);
253
+ }
254
+
255
+ function maybeRecord(value: unknown): Record<string, unknown> | null {
256
+ return value && typeof value === "object" && !Array.isArray(value)
257
+ ? (value as Record<string, unknown>)
258
+ : null;
259
+ }
260
+
261
+ function collectValidationCommandHints(params: Record<string, unknown>): string[] {
262
+ const planning = maybeRecord(params.planning);
263
+ const values: unknown[] = [
264
+ params.instruction,
265
+ params.plannerWorkerInstruction,
266
+ params.validationSteps,
267
+ params.requiredValidationSteps,
268
+ planning?.validationSteps,
269
+ planning?.requiredValidationSteps,
270
+ ];
271
+ const commands: string[] = [];
272
+ for (const value of values) {
273
+ if (typeof value === "string") {
274
+ commands.push(value);
275
+ continue;
276
+ }
277
+ if (Array.isArray(value)) {
278
+ commands.push(...value.filter((entry): entry is string => typeof entry === "string"));
279
+ }
280
+ }
281
+ return commands;
282
+ }
283
+
284
+ function hasBrowserValidationCommand(job: Pick<Job, "kind" | "params">): boolean {
285
+ if (job.kind !== "task.execute") return false;
286
+ return collectValidationCommandHints(job.params).some((command) =>
287
+ /\b(web:e2e|e2e:web|browser:e2e|smoke:web|web:smoke|browser:smoke|playwright|cypress)\b/i.test(
288
+ command,
289
+ ),
290
+ );
291
+ }
292
+
293
+ export function resolveDockerJobTimeoutMs(
294
+ configuredTimeoutMs: number,
295
+ job: Pick<Job, "kind" | "params">,
296
+ ): number {
297
+ const baseTimeoutMs = Math.max(10_000, Math.floor(configuredTimeoutMs));
298
+ if (!hasBrowserValidationCommand(job)) return baseTimeoutMs;
299
+
300
+ const planning = maybeRecord(job.params.planning);
301
+ const executionBudgetMs = readPositiveNumber(planning?.executionBudgetMs) ?? 1_800_000;
302
+ const finalizationBudgetMs = readPositiveNumber(planning?.finalizationBudgetMs) ?? 120_000;
303
+ const attempts = BROWSER_VALIDATION_JOB_REPAIR_ATTEMPTS + 1; // initial attempt plus repairs
304
+ const estimatedTimeoutMs =
305
+ attempts * (executionBudgetMs + finalizationBudgetMs + BROWSER_VALIDATION_JOB_OVERHEAD_MS);
306
+ const boundedTimeoutMs = Math.min(
307
+ BROWSER_VALIDATION_JOB_MAX_TIMEOUT_MS,
308
+ Math.max(BROWSER_VALIDATION_JOB_MIN_TIMEOUT_MS, estimatedTimeoutMs),
309
+ );
310
+ return Math.max(baseTimeoutMs, boundedTimeoutMs);
311
+ }
312
+
240
313
  export class DockerExecutor {
241
314
  private options: Required<Omit<DockerExecutorOptions, "config">>;
242
315
  private worktreeDir: string;
@@ -1120,6 +1193,7 @@ export class DockerExecutor {
1120
1193
  worktreePath,
1121
1194
  onLog,
1122
1195
  );
1196
+ await this.ensureWorktreeDependencyArtifacts(containerWorktreePath, onLog);
1123
1197
 
1124
1198
  const args: string[] = [
1125
1199
  "exec",
@@ -1140,9 +1214,15 @@ export class DockerExecutor {
1140
1214
  stdout: "pipe",
1141
1215
  stderr: "pipe",
1142
1216
  });
1217
+ const timeoutMs = resolveDockerJobTimeoutMs(this.options.timeoutMs, job);
1218
+ if (timeoutMs !== this.options.timeoutMs) {
1219
+ const note = `[DockerExecutor] Extended job timeout for browser validation convergence: ${timeoutMs}ms (configured ${this.options.timeoutMs}ms).`;
1220
+ console.log(note);
1221
+ onLog?.("stdout", note);
1222
+ }
1143
1223
 
1144
1224
  const { leadMs: warningLeadMs, delayMs: warningDelayMs } = computeTimeoutWarningWindow(
1145
- this.options.timeoutMs,
1225
+ timeoutMs,
1146
1226
  );
1147
1227
  const warningTimer = setTimeout(() => {
1148
1228
  const warning = `[DockerExecutor] Job nearing timeout in warm container (${Math.round(
@@ -1171,7 +1251,7 @@ export class DockerExecutor {
1171
1251
  } catch {
1172
1252
  // Ignore kill errors
1173
1253
  }
1174
- }, this.options.timeoutMs);
1254
+ }, timeoutMs);
1175
1255
 
1176
1256
  // Process streams
1177
1257
  const stdoutLines: string[] = [];
@@ -1191,11 +1271,56 @@ export class DockerExecutor {
1191
1271
  const result = this.parseResult(stdoutLines, stderrLines, exitCode, {
1192
1272
  timedOutByDocker,
1193
1273
  elapsedMs,
1274
+ timeoutMs,
1194
1275
  });
1195
1276
 
1196
1277
  return result;
1197
1278
  }
1198
1279
 
1280
+ private async ensureWorktreeDependencyArtifacts(
1281
+ containerWorktreePath: string,
1282
+ onLog?: (stream: "stdout" | "stderr", line: string) => void,
1283
+ ): Promise<void> {
1284
+ const worktreePrefix = shellSingleQuote(`${containerWorktreePath}/`);
1285
+ const command = [
1286
+ "set -eu",
1287
+ "linked=\"\"",
1288
+ "for name in node_modules; do",
1289
+ " src=\"/repo/$name\"",
1290
+ ` dest=${worktreePrefix}$name`,
1291
+ " if { [ -e \"$src\" ] || [ -L \"$src\" ]; } && [ ! -e \"$dest\" ] && [ ! -L \"$dest\" ]; then",
1292
+ " ln -s \"$src\" \"$dest\"",
1293
+ " linked=\"$linked $name\"",
1294
+ " fi",
1295
+ "done",
1296
+ "printf '%s' \"$linked\"",
1297
+ ].join("\n");
1298
+
1299
+ const result = await this.runWarmShell(command);
1300
+ if (!result.ok) {
1301
+ const detail = [result.stderr, result.stdout].filter(Boolean).join("\n").trim();
1302
+ const warning = `[DockerExecutor] Worktree dependency artifact linking skipped: ${
1303
+ detail || `exit ${result.exitCode}`
1304
+ }`;
1305
+ console.warn(warning);
1306
+ onLog?.("stderr", warning);
1307
+ return;
1308
+ }
1309
+
1310
+ const linked = result.stdout
1311
+ .trim()
1312
+ .split(/\s+/g)
1313
+ .map((entry) => entry.trim())
1314
+ .filter(Boolean);
1315
+ if (linked.length === 0) return;
1316
+
1317
+ const note = `[DockerExecutor] Linked worktree dependency artifact(s): ${linked.join(
1318
+ ", ",
1319
+ )}`;
1320
+ console.log(note);
1321
+ onLog?.("stdout", note);
1322
+ }
1323
+
1199
1324
  private async waitForWorktreePathInWarmContainer(
1200
1325
  containerWorktreePath: string,
1201
1326
  timeoutMs = 5_000,
@@ -1400,7 +1525,7 @@ export class DockerExecutor {
1400
1525
  stdoutLines: string[],
1401
1526
  stderrLines: string[],
1402
1527
  exitCode: number,
1403
- context: { timedOutByDocker: boolean; elapsedMs: number },
1528
+ context: { timedOutByDocker: boolean; elapsedMs: number; timeoutMs: number },
1404
1529
  ): DockerJobResult {
1405
1530
  let sawSentinel = false;
1406
1531
  let sentinelParseError = "";
@@ -1442,7 +1567,7 @@ export class DockerExecutor {
1442
1567
  if (context.timedOutByDocker) {
1443
1568
  return {
1444
1569
  ok: false,
1445
- summary: `Job timed out in Docker executor after ${context.elapsedMs}ms (limit ${this.options.timeoutMs}ms; terminated before structured result).`,
1570
+ summary: `Job timed out in Docker executor after ${context.elapsedMs}ms (limit ${context.timeoutMs}ms; terminated before structured result).`,
1446
1571
  stdout,
1447
1572
  stderr,
1448
1573
  exitCode,
@@ -3,7 +3,15 @@
3
3
  * Used by both the host Worker (direct mode) and the Docker job runner.
4
4
  */
5
5
 
6
- import { existsSync, lstatSync, readFileSync, renameSync, rmSync, unlinkSync } from "fs";
6
+ import {
7
+ existsSync,
8
+ lstatSync,
9
+ readdirSync,
10
+ readFileSync,
11
+ renameSync,
12
+ rmSync,
13
+ unlinkSync,
14
+ } from "fs";
7
15
  import { resolve } from "path";
8
16
  import {
9
17
  buildGitCommitArgs as buildSourceControlGitCommitArgs,
@@ -76,6 +84,24 @@ export interface ValidationBlocker {
76
84
  detail: string;
77
85
  }
78
86
 
87
+ type BrowserValidationFailureKind = "assertion" | "startup" | "runtime" | "network" | "unknown";
88
+
89
+ export interface BrowserValidationRepairPacket {
90
+ command: string;
91
+ failureKind: BrowserValidationFailureKind;
92
+ stage: string | null;
93
+ selector: string | null;
94
+ expected: string | null;
95
+ digest: string;
96
+ previousDigest: string | null;
97
+ previousStage: string | null;
98
+ previousSelector: string | null;
99
+ previousExpected: string | null;
100
+ progress: "first_failure" | "same_failure" | "new_failure";
101
+ artifacts: string[];
102
+ output: string;
103
+ }
104
+
79
105
  interface DeterministicQualityResult {
80
106
  ok: boolean;
81
107
  skipped: boolean;
@@ -120,6 +146,42 @@ export interface QualityGatePolicy {
120
146
  criticMinScore: number;
121
147
  }
122
148
 
149
+ const BROWSER_VALIDATION_MAX_AUTO_REVISIONS = 8;
150
+
151
+ export function qualityRevisionLoopUpperBound(policy: {
152
+ maxAutoRevisions: number;
153
+ validationMaxAutoRevisions: number;
154
+ }, opts: {
155
+ browserValidation?: boolean;
156
+ } = {}): number {
157
+ return Math.max(
158
+ policy.maxAutoRevisions,
159
+ policy.validationMaxAutoRevisions,
160
+ opts.browserValidation ? BROWSER_VALIDATION_MAX_AUTO_REVISIONS : 0,
161
+ );
162
+ }
163
+
164
+ function taskRequestsBrowserValidation(params: Record<string, unknown>): boolean {
165
+ const candidates: string[] = [];
166
+ const collect = (value: unknown) => {
167
+ if (typeof value === "string") {
168
+ candidates.push(value);
169
+ } else if (Array.isArray(value)) {
170
+ for (const item of value) collect(item);
171
+ }
172
+ };
173
+ const planning =
174
+ params.planning && typeof params.planning === "object"
175
+ ? (params.planning as Record<string, unknown>)
176
+ : {};
177
+ collect(planning.requiredValidationSteps);
178
+ collect(planning.validationSteps);
179
+ collect(params.requiredValidationSteps);
180
+ collect(params.validationSteps);
181
+ collect(params.instruction);
182
+ return candidates.some((candidate) => isLongRunningBrowserValidationCommand(candidate));
183
+ }
184
+
123
185
  function shouldSoftPassValidationBlocker(
124
186
  policy: QualityGatePolicy,
125
187
  blocker: ValidationBlocker | null,
@@ -148,14 +210,17 @@ export function revisionLimitForQualityGateFailures(opts: {
148
210
  qualityIssues: string[];
149
211
  requiredValidationFailures: string[];
150
212
  blocker: ValidationBlocker | null;
213
+ browserRepairPacket?: BrowserValidationRepairPacket | null;
151
214
  }): number {
152
215
  const hasValidationGateFailure =
153
216
  opts.requiredValidationFailures.length > 0 ||
154
217
  opts.blocker !== null ||
155
218
  opts.qualityIssues.some((issue) => issue.startsWith("ValidationGate:"));
156
- return hasValidationGateFailure
157
- ? opts.policy.validationMaxAutoRevisions
158
- : opts.policy.maxAutoRevisions;
219
+ if (!hasValidationGateFailure) return opts.policy.maxAutoRevisions;
220
+ if (opts.browserRepairPacket) {
221
+ return Math.max(opts.policy.validationMaxAutoRevisions, BROWSER_VALIDATION_MAX_AUTO_REVISIONS);
222
+ }
223
+ return opts.policy.validationMaxAutoRevisions;
159
224
  }
160
225
 
161
226
  // ─── Utilities ───────────────────────────────────────────────────────────────
@@ -1135,8 +1200,15 @@ export function prepareValidationCommandArgv(
1135
1200
  return [...argv, "--", "--port", port];
1136
1201
  }
1137
1202
 
1138
- function isBrowserValidationInfrastructureDigest(digest: string): boolean {
1139
- return /\b(ERR_SOCKET_BAD_PORT|EADDRINUSE|ECONNREFUSED|ECONNRESET|ETIMEDOUT|timed out|timeout|port|browser runtime|playwright install|executable doesn't exist)\b/i.test(
1203
+ function isBrowserAssertionDigest(digest: string): boolean {
1204
+ return /\b(Web end-to-end smoke test failed|locator\.[a-z0-9_]+:\s+Timeout\s+\d+ms\s+exceeded|page\.[a-z0-9_]+:\s+Timeout\s+\d+ms\s+exceeded|waiting for getBy(?:TestId|Role|Text|Label|Placeholder|Title)\(|Expected .+ to be .+ within \d+ms|AssertionError|Error:\s+expect\()/i.test(
1205
+ digest,
1206
+ );
1207
+ }
1208
+
1209
+ export function isBrowserValidationInfrastructureDigest(digest: string): boolean {
1210
+ if (isBrowserAssertionDigest(digest)) return false;
1211
+ return /\b(browserType\.launch|ERR_SOCKET_BAD_PORT|EADDRINUSE|ECONNREFUSED|ECONNRESET|ETIMEDOUT|listen\s+EPERM|EPERM|EACCES|freeport|port selection|browser runtime|playwright install|executable doesn't exist|Expo exited early|local port bind|Validation command timed out|terminated by signal)\b/i.test(
1140
1212
  digest,
1141
1213
  );
1142
1214
  }
@@ -1466,7 +1538,7 @@ function parseChangedPathsFromStatus(statusOutput: string): string[] {
1466
1538
  return out;
1467
1539
  }
1468
1540
 
1469
- function isLikelyTestPath(path: string): boolean {
1541
+ export function isAssertionCoverageTestPath(path: string): boolean {
1470
1542
  const normalized = path.replace(/\\/g, "/").toLowerCase();
1471
1543
  return (
1472
1544
  normalized.includes("/tests/") ||
@@ -1477,6 +1549,21 @@ function isLikelyTestPath(path: string): boolean {
1477
1549
  );
1478
1550
  }
1479
1551
 
1552
+ export function isBrowserSmokeHarnessPath(path: string): boolean {
1553
+ const normalized = path.replace(/\\/g, "/").toLowerCase();
1554
+ return (
1555
+ /(^|\/)scripts\/test-[^/]*\.(?:c?js|m?js|ts)$/.test(normalized) ||
1556
+ /(^|\/)scripts\/[^/]*(?:e2e|smoke|playwright|browser)[^/]*\.(?:c?js|m?js|ts)$/.test(
1557
+ normalized,
1558
+ ) ||
1559
+ /(^|\/)(?:playwright|cypress)\.config\.(?:c?js|m?js|ts)$/.test(normalized)
1560
+ );
1561
+ }
1562
+
1563
+ export function isLikelyTestPath(path: string): boolean {
1564
+ return isAssertionCoverageTestPath(path) || isBrowserSmokeHarnessPath(path);
1565
+ }
1566
+
1480
1567
  function extractRunnableValidationCommand(step: string): string | null {
1481
1568
  const trimmed = step.trim();
1482
1569
  if (!trimmed) return null;
@@ -1582,6 +1669,288 @@ export function extractValidationFailureDigest(run: {
1582
1669
  return "";
1583
1670
  }
1584
1671
 
1672
+ function classifyBrowserValidationFailureKindFromText(text: string): BrowserValidationFailureKind {
1673
+ const combined = stripAnsiControlSequences(text);
1674
+ if (
1675
+ /\b(browserType\.launch|Executable doesn't exist|playwright install|Browser runtime preflight failed|Please run the following command to download new browsers|Validation command timed out|terminated by signal|SIGTERM|timed out after \d+ms)\b/i.test(
1676
+ combined,
1677
+ )
1678
+ ) {
1679
+ return "runtime";
1680
+ }
1681
+ if (
1682
+ /\b(ERR_SOCKET_BAD_PORT|EADDRINUSE|listen\s+EPERM|EPERM|EACCES|freeport|port selection|Expo exited early|local port bind|cannot bind|operation not permitted)\b/i.test(
1683
+ combined,
1684
+ )
1685
+ ) {
1686
+ return "startup";
1687
+ }
1688
+ if (/\b(page\.[a-z0-9_]+:\s+net::ERR_[A-Z0-9_]+|ECONNREFUSED|ECONNRESET|ETIMEDOUT)\b/i.test(combined)) {
1689
+ return "network";
1690
+ }
1691
+ if (isBrowserAssertionDigest(combined)) {
1692
+ return "assertion";
1693
+ }
1694
+ return "unknown";
1695
+ }
1696
+
1697
+ function extractBrowserValidationStage(text: string): string | null {
1698
+ const patterns = [
1699
+ /\bBrowser validation failed during\s+([^:.\r\n]+?)\s+stage\b/i,
1700
+ /\bfailed during\s+([^:.\r\n]+?)\s+stage\b/i,
1701
+ /\b(?:stage|phase)\s*[:=]\s*["'`]?([^"'`.\r\n]+)["'`]?/i,
1702
+ ];
1703
+ for (const pattern of patterns) {
1704
+ const match = text.match(pattern);
1705
+ const value = match?.[1]?.trim();
1706
+ if (value) return toSingleLine(value, 80);
1707
+ }
1708
+ return null;
1709
+ }
1710
+
1711
+ function extractBalancedLocatorCall(text: string): string | null {
1712
+ const callPattern = /\b(?:getBy(?:TestId|Role|Text|Label|Placeholder|Title)|locator\.[a-z0-9_]+|page\.[a-z0-9_]+)\(/gi;
1713
+ let match: RegExpExecArray | null;
1714
+ while ((match = callPattern.exec(text)) != null) {
1715
+ let depth = 0;
1716
+ let quote: string | null = null;
1717
+ let escaped = false;
1718
+ for (let index = match.index; index < text.length; index += 1) {
1719
+ const char = text[index] ?? "";
1720
+ if (quote) {
1721
+ if (escaped) {
1722
+ escaped = false;
1723
+ } else if (char === "\\") {
1724
+ escaped = true;
1725
+ } else if (char === quote) {
1726
+ quote = null;
1727
+ }
1728
+ continue;
1729
+ }
1730
+ if (char === "'" || char === '"' || char === "`") {
1731
+ quote = char;
1732
+ continue;
1733
+ }
1734
+ if (char === "(") {
1735
+ depth += 1;
1736
+ continue;
1737
+ }
1738
+ if (char === ")") {
1739
+ depth -= 1;
1740
+ if (depth === 0) return toSingleLine(text.slice(match.index, index + 1), 120);
1741
+ }
1742
+ if (depth <= 0 && /\s/.test(char) && index > match.index) break;
1743
+ }
1744
+ }
1745
+ return null;
1746
+ }
1747
+
1748
+ function extractBrowserValidationSelector(text: string): string | null {
1749
+ const balanced = extractBalancedLocatorCall(text);
1750
+ if (balanced) return balanced;
1751
+ const patterns = [
1752
+ /\bwaiting for\s+(getBy(?:TestId|Role|Text|Label|Placeholder|Title)\([^)\r\n]+\))/i,
1753
+ /\b(locator\.[a-z0-9_]+\([^)\r\n]*\))/i,
1754
+ /\b(page\.[a-z0-9_]+\([^)\r\n]*\))/i,
1755
+ /\b(getBy(?:TestId|Role|Text|Label|Placeholder|Title)\([^)\r\n]+\))/i,
1756
+ ];
1757
+ for (const pattern of patterns) {
1758
+ const match = text.match(pattern);
1759
+ const value = match?.[1]?.trim();
1760
+ if (value) return toSingleLine(value, 120);
1761
+ }
1762
+ return null;
1763
+ }
1764
+
1765
+ function extractBrowserValidationExpectedUi(text: string): string | null {
1766
+ const patterns = [
1767
+ /\bExpected\s+([^:.\r\n]+?)\s+within\s+\d+ms\b/i,
1768
+ /\bExpected\s+([^:.\r\n]+?)(?:[:.]|\r?\n)/i,
1769
+ /\bExpected\s+([^:.\r\n]+?)$/i,
1770
+ ];
1771
+ for (const pattern of patterns) {
1772
+ const match = text.match(pattern);
1773
+ const value = match?.[1]?.trim();
1774
+ if (value) return toSingleLine(value, 140);
1775
+ }
1776
+ return null;
1777
+ }
1778
+
1779
+ function extractBrowserValidationArtifacts(text: string): string[] {
1780
+ const combined = stripAnsiControlSequences(text);
1781
+ const out: string[] = [];
1782
+ const seen = new Set<string>();
1783
+ const addArtifact = (raw: string | undefined) => {
1784
+ const artifact = String(raw ?? "")
1785
+ .trim()
1786
+ .replace(/[),.;:]+$/, "");
1787
+ if (!artifact || seen.has(artifact)) return;
1788
+ seen.add(artifact);
1789
+ out.push(toSingleLine(artifact, 220));
1790
+ };
1791
+ const patterns = [
1792
+ /\b(?:screenshot|snapshot|trace|video|artifact|output|saved|wrote)[^:\r\n]*:\s*(["'`]?)([^"'`\s]+(?:outputs|test-results|playwright-report)[^\s"'`]+(?:\.png|\.jpg|\.jpeg|\.webp|\.zip|\.json|\.txt|\.webm))\1/gi,
1793
+ /((?:\/repo|\/workspace|[A-Za-z]:[\\/])?[^\s"'`]*?(?:outputs|test-results|playwright-report)[\\/][^\s"'`]+(?:\.png|\.jpg|\.jpeg|\.webp|\.zip|\.json|\.txt|\.webm))/gi,
1794
+ ];
1795
+ for (const pattern of patterns) {
1796
+ let match: RegExpExecArray | null;
1797
+ while ((match = pattern.exec(combined)) != null) {
1798
+ addArtifact(match[2] ?? match[1]);
1799
+ if (out.length >= 4) return out;
1800
+ }
1801
+ }
1802
+ return out;
1803
+ }
1804
+
1805
+ function collectRecentBrowserValidationFiles(
1806
+ repo: string | undefined,
1807
+ extensions: RegExp,
1808
+ limit = 8,
1809
+ ): string[] {
1810
+ if (!repo) return [];
1811
+ const roots = ["outputs/web-e2e", "test-results", "playwright-report"]
1812
+ .map((entry) => resolve(repo, entry))
1813
+ .filter((entry) => existsSync(entry));
1814
+ const files: Array<{ path: string; mtimeMs: number }> = [];
1815
+ const visit = (dir: string, depth: number) => {
1816
+ if (depth > 4 || files.length > 2_000) return;
1817
+ let entries: Array<{ name: unknown; isDirectory(): boolean; isFile(): boolean }>;
1818
+ try {
1819
+ entries = readdirSync(dir, { withFileTypes: true });
1820
+ } catch {
1821
+ return;
1822
+ }
1823
+ for (const entry of entries) {
1824
+ const entryName = String(entry.name);
1825
+ const path = resolve(dir, entryName);
1826
+ if (entry.isDirectory()) {
1827
+ visit(path, depth + 1);
1828
+ continue;
1829
+ }
1830
+ if (!entry.isFile() || !extensions.test(entryName)) continue;
1831
+ try {
1832
+ const stat = lstatSync(path);
1833
+ files.push({ path, mtimeMs: stat.mtimeMs });
1834
+ } catch {
1835
+ // Ignore files that disappear while a validation command is cleaning up.
1836
+ }
1837
+ }
1838
+ };
1839
+ for (const root of roots) visit(root, 0);
1840
+ return files
1841
+ .sort((a, b) => b.mtimeMs - a.mtimeMs)
1842
+ .slice(0, limit)
1843
+ .map((entry) => entry.path);
1844
+ }
1845
+
1846
+ function collectRecentBrowserValidationArtifacts(repo: string | undefined): string[] {
1847
+ return collectRecentBrowserValidationFiles(
1848
+ repo,
1849
+ /\.(?:png|jpe?g|webp|zip|json|txt|log|webm)$/i,
1850
+ 6,
1851
+ ).map((entry) => toSingleLine(entry, 220));
1852
+ }
1853
+
1854
+ function summarizeRecentBrowserValidationLogs(repo: string | undefined): string {
1855
+ const logFiles = collectRecentBrowserValidationFiles(repo, /\.(?:log|txt)$/i, 3);
1856
+ const summaries: string[] = [];
1857
+ for (const logFile of logFiles) {
1858
+ let content = "";
1859
+ try {
1860
+ content = readFileSync(logFile, "utf8");
1861
+ } catch {
1862
+ continue;
1863
+ }
1864
+ const lines = stripAnsiControlSequences(content)
1865
+ .split(/\r?\n/)
1866
+ .map((line) => line.trim())
1867
+ .filter(Boolean)
1868
+ .filter((line) =>
1869
+ /\b(Web end-to-end smoke test failed|Browser validation failed|Expected |locator\.|page\.|waiting for |Call log:|Verified:|Saved screenshot|Saved trace|ERR_SOCKET_BAD_PORT|EADDRINUSE|EPERM|EACCES|browserType\.launch|Expo exited early|freeport|net::ERR_|Validation command timed out|terminated by signal|SIGTERM|timed out after \d+ms)/i.test(
1870
+ line,
1871
+ ),
1872
+ );
1873
+ if (lines.length === 0) continue;
1874
+ summaries.push(`${logFile}: ${lines.slice(-18).join(" | ")}`);
1875
+ }
1876
+ return toSingleLine(summaries.join(" | "), 1_400);
1877
+ }
1878
+
1879
+ function mergeBrowserValidationArtifacts(...sources: Array<string[] | undefined>): string[] {
1880
+ const out: string[] = [];
1881
+ const seen = new Set<string>();
1882
+ for (const source of sources) {
1883
+ for (const artifact of source ?? []) {
1884
+ const clean = toSingleLine(artifact, 220);
1885
+ if (!clean || seen.has(clean)) continue;
1886
+ seen.add(clean);
1887
+ out.push(clean);
1888
+ if (out.length >= 8) return out;
1889
+ }
1890
+ }
1891
+ return out;
1892
+ }
1893
+
1894
+ function summarizeBrowserValidationOutput(text: string): string {
1895
+ const lines = stripAnsiControlSequences(text)
1896
+ .split(/\r?\n/)
1897
+ .map((line) => line.trim())
1898
+ .filter(Boolean)
1899
+ .filter((line) =>
1900
+ /\b(Web end-to-end smoke test failed|Browser validation failed|Expected |locator\.|page\.|waiting for getBy|Call log:|ERR_SOCKET_BAD_PORT|EADDRINUSE|EPERM|EACCES|browserType\.launch|Executable doesn't exist|Expo exited early|freeport|net::ERR_|Validation command timed out|terminated by signal|SIGTERM|timed out after \d+ms)/i.test(
1901
+ line,
1902
+ ),
1903
+ );
1904
+ return toSingleLine(lines.slice(0, 8).join(" | "), 900);
1905
+ }
1906
+
1907
+ export function buildBrowserValidationRepairPacket(
1908
+ validationRuns: ValidationExecutionResult[],
1909
+ previousFailureDigests: Map<string, string> = new Map(),
1910
+ repo?: string,
1911
+ ): BrowserValidationRepairPacket | null {
1912
+ for (const run of validationRuns) {
1913
+ if (run.ok || !isLongRunningBrowserValidationCommand(run.command)) continue;
1914
+ const combined = stripAnsiControlSequences([run.stderr, run.stdout].filter(Boolean).join("\n"));
1915
+ const digest = extractValidationFailureDigest(run);
1916
+ const failureKind = classifyBrowserValidationFailureKindFromText(`${digest}\n${combined}`);
1917
+ if (failureKind === "unknown") continue;
1918
+ const previousDigest = previousFailureDigests.get(validationCommandKey(run.command)) ?? null;
1919
+ const recentLogSummary = summarizeRecentBrowserValidationLogs(repo);
1920
+ const enrichedBrowserContext = [combined, recentLogSummary].filter(Boolean).join("\n");
1921
+ const progress =
1922
+ previousDigest == null
1923
+ ? "first_failure"
1924
+ : previousDigest === digest
1925
+ ? "same_failure"
1926
+ : "new_failure";
1927
+ return {
1928
+ command: run.command,
1929
+ failureKind,
1930
+ stage: extractBrowserValidationStage(enrichedBrowserContext),
1931
+ selector: extractBrowserValidationSelector(enrichedBrowserContext),
1932
+ expected: extractBrowserValidationExpectedUi(enrichedBrowserContext),
1933
+ digest,
1934
+ previousDigest,
1935
+ previousStage: previousDigest ? extractBrowserValidationStage(previousDigest) : null,
1936
+ previousSelector: previousDigest ? extractBrowserValidationSelector(previousDigest) : null,
1937
+ previousExpected: previousDigest ? extractBrowserValidationExpectedUi(previousDigest) : null,
1938
+ progress,
1939
+ artifacts: mergeBrowserValidationArtifacts(
1940
+ extractBrowserValidationArtifacts(combined),
1941
+ collectRecentBrowserValidationArtifacts(repo),
1942
+ ),
1943
+ output: [
1944
+ summarizeBrowserValidationOutput(combined) || digest,
1945
+ recentLogSummary,
1946
+ ]
1947
+ .filter(Boolean)
1948
+ .join(" | "),
1949
+ };
1950
+ }
1951
+ return null;
1952
+ }
1953
+
1585
1954
  export function collectRequiredValidationFailures(
1586
1955
  requiredCommands: string[],
1587
1956
  validationRuns: Array<{ command: string; ok: boolean; exitCode?: number }>,
@@ -1866,6 +2235,9 @@ async function runDeterministicQualityGate(
1866
2235
  [...changedPaths, ...preparedMergeConflictPaths].filter((path) => isLikelyTestPath(path)),
1867
2236
  ),
1868
2237
  );
2238
+ const changedAssertionCoverageTestPaths = changedTestPaths.filter((path) =>
2239
+ isAssertionCoverageTestPath(path),
2240
+ );
1869
2241
  const issues: string[] = [];
1870
2242
  const scopeIssues: string[] = [];
1871
2243
  const validationIssues: string[] = [];
@@ -1890,8 +2262,8 @@ async function runDeterministicQualityGate(
1890
2262
  }
1891
2263
  if (
1892
2264
  isTestTask &&
1893
- changedTestPaths.length > 0 &&
1894
- !hasBalancedPositiveNegativeAssertions(changedTestPaths, repo)
2265
+ changedAssertionCoverageTestPaths.length > 0 &&
2266
+ !hasBalancedPositiveNegativeAssertions(changedAssertionCoverageTestPaths, repo)
1895
2267
  ) {
1896
2268
  addScopeIssue(
1897
2269
  "found changed test files without both positive and negative assertion coverage (expected both).",
@@ -2344,9 +2716,101 @@ export function buildQualityRevisionHint(
2344
2716
  reviewFixContext?: ReviewFixContext | null,
2345
2717
  validationRuns: ValidationExecutionResult[] = [],
2346
2718
  validationBlocker: ValidationBlocker | null = null,
2719
+ browserRepairPacket: BrowserValidationRepairPacket | null = null,
2347
2720
  ): string {
2348
2721
  const lines: string[] = [];
2349
2722
  lines.push("Quality revision required before completion.");
2723
+ const focusedBrowserRepair = Boolean(browserRepairPacket);
2724
+ if (browserRepairPacket) {
2725
+ lines.push("Primary ValidationGate repair objective:");
2726
+ lines.push(`- Command: ${browserRepairPacket.command}`);
2727
+ lines.push(`- Failure type: browser ${browserRepairPacket.failureKind}`);
2728
+ lines.push(
2729
+ "- First action: inspect the captured browser output/artifacts and actual rendered UI before editing; do not guess from component names or intended copy.",
2730
+ );
2731
+ if (browserRepairPacket.stage) lines.push(`- Stage: ${browserRepairPacket.stage}`);
2732
+ if (browserRepairPacket.expected) {
2733
+ lines.push(`- Expected UI: ${browserRepairPacket.expected}`);
2734
+ }
2735
+ if (browserRepairPacket.selector) {
2736
+ lines.push(`- Selector/wait: ${browserRepairPacket.selector}`);
2737
+ }
2738
+ if (browserRepairPacket.artifacts.length > 0) {
2739
+ lines.push("Failure artifacts to inspect:");
2740
+ for (const artifact of browserRepairPacket.artifacts) {
2741
+ lines.push(`- ${artifact}`);
2742
+ }
2743
+ } else {
2744
+ lines.push(
2745
+ "- Failure artifacts: none were captured in command output; if this repo writes screenshots/traces, inspect the latest browser failure artifact before changing selectors.",
2746
+ );
2747
+ }
2748
+ if (browserRepairPacket.digest) {
2749
+ lines.push(`- Current failure: ${browserRepairPacket.digest}`);
2750
+ }
2751
+ if (browserRepairPacket.previousDigest) {
2752
+ const breadcrumb =
2753
+ browserRepairPacket.progress === "same_failure"
2754
+ ? "same failure repeated for this command"
2755
+ : "new failure for this command after the previous revision";
2756
+ lines.push(`- Breadcrumb: ${breadcrumb}; previous failure was ${browserRepairPacket.previousDigest}`);
2757
+ if (
2758
+ browserRepairPacket.previousStage ||
2759
+ browserRepairPacket.previousExpected ||
2760
+ browserRepairPacket.previousSelector
2761
+ ) {
2762
+ lines.push("Previous browser failure detail:");
2763
+ if (browserRepairPacket.previousStage) {
2764
+ lines.push(`- Previous stage: ${browserRepairPacket.previousStage}`);
2765
+ }
2766
+ if (browserRepairPacket.previousExpected) {
2767
+ lines.push(`- Previous expected UI: ${browserRepairPacket.previousExpected}`);
2768
+ }
2769
+ if (browserRepairPacket.previousSelector) {
2770
+ lines.push(`- Previous selector/wait: ${browserRepairPacket.previousSelector}`);
2771
+ }
2772
+ }
2773
+ } else {
2774
+ lines.push("- Breadcrumb: first captured failure for this command in this revision loop");
2775
+ }
2776
+ if (browserRepairPacket.output) {
2777
+ lines.push(`- Relevant output: ${browserRepairPacket.output}`);
2778
+ }
2779
+ if (browserRepairPacket.failureKind === "assertion") {
2780
+ lines.push(
2781
+ "Repair direction: fix this exact visible UI assertion or the app state that should make it true. If the expected text/role/test id is not present in the screenshot, update the smoke assertion to the visible product UI that proves the same stage, or add accessibility metadata to an existing control. Do not add optional navigation or broaden the smoke path. Do not change browser startup, port selection, Playwright installation, or unrelated e2e harness behavior unless the captured failure is reclassified as startup/setup.",
2782
+ );
2783
+ lines.push(
2784
+ "Selector stability rule: prefer existing data-testid/accessibility labels/roles and stage containers over guessed title/body text. If a stage already passed with a stable container such as a home/shell/test-id locator, reuse that signal instead of replacing it with copy checks.",
2785
+ );
2786
+ lines.push(
2787
+ "Text assertion rule: rendered titles may be split across sibling nodes. Do not invent a combined phrase for split text; either assert the individual visible fragments within the stage container or add/reuse a stable test id/accessibility label.",
2788
+ );
2789
+ if (
2790
+ browserRepairPacket.progress === "same_failure" ||
2791
+ (browserRepairPacket.stage &&
2792
+ browserRepairPacket.previousStage &&
2793
+ browserRepairPacket.stage === browserRepairPacket.previousStage)
2794
+ ) {
2795
+ lines.push(
2796
+ "Repeated-stage rule: this browser stage has failed before in the current revision loop, so treat the previous selector/copy assumption as suspect and switch to the most stable rendered locator for that same stage.",
2797
+ );
2798
+ }
2799
+ } else {
2800
+ lines.push(
2801
+ "Repair direction: this is a browser startup/runtime/network failure. Fix only startup/runtime provisioning for this command and do not rewrite app UI assertions unless a later ValidationGate run reaches an assertion stage.",
2802
+ );
2803
+ }
2804
+ lines.push(
2805
+ "Convergence rule: preserve stages that already passed, repair only the current failing browser stage, and stop after one targeted browser confirmation so the next ValidationGate run gets a clean signal.",
2806
+ );
2807
+ lines.push(
2808
+ "Executor sandbox rule: if the full browser command cannot run inside this edit turn because local server binding is denied or Expo/Playwright reports ERR_SOCKET_BAD_PORT, listen EPERM, EACCES, or a local port bind/freeport failure before reaching the app, treat that as a Codex executor verification limitation. Do not change app startup, ports, or browser provisioning for that local-only signal unless the ValidationGate failure above is also a startup/setup failure. Use the captured artifacts plus fast checks, then let ValidationGate perform the authoritative browser run.",
2809
+ );
2810
+ lines.push(
2811
+ `Validation rerun rule: PushPals ValidationGate will rerun "${browserRepairPacket.command}" after the patch. During a focused browser repair turn, run fast non-browser checks and inspect captured artifacts first; do not run the full browser command from the Codex executor by default. Only run the full browser command for one targeted confirmation if artifacts are missing and a quick local bind/startup probe shows the browser server can actually run in this executor. Otherwise stop after fast checks so ValidationGate gets the clean authoritative signal.`,
2812
+ );
2813
+ }
2350
2814
  if (reviewFixContext) {
2351
2815
  lines.push("Rejected PR retry requirements:");
2352
2816
  if (reviewFixContext.previousReviewScore != null) {
@@ -2373,8 +2837,28 @@ export function buildQualityRevisionHint(
2373
2837
  lines.push("Raise the score above the approval threshold without reopening already accepted behavior.");
2374
2838
  }
2375
2839
  if (issues.length > 0) {
2376
- lines.push("Deterministic quality issues:");
2377
- for (const issue of issues) lines.push(`- ${issue}`);
2840
+ const displayedIssues = focusedBrowserRepair
2841
+ ? issues.filter(
2842
+ (issue) =>
2843
+ issue.startsWith("ValidationGate:") ||
2844
+ issue.includes("Required vision.md validation") ||
2845
+ issue.includes("Validation blocker"),
2846
+ )
2847
+ : issues;
2848
+ if (displayedIssues.length > 0) {
2849
+ lines.push(
2850
+ focusedBrowserRepair
2851
+ ? "Deterministic quality issues relevant to this validation repair:"
2852
+ : "Deterministic quality issues:",
2853
+ );
2854
+ for (const issue of displayedIssues) lines.push(`- ${issue}`);
2855
+ }
2856
+ const suppressedCount = issues.length - displayedIssues.length;
2857
+ if (focusedBrowserRepair && suppressedCount > 0) {
2858
+ lines.push(
2859
+ `Suppressed ${suppressedCount} lower-priority ScopeGate/CriticGate note(s) until the browser validation repair passes.`,
2860
+ );
2861
+ }
2378
2862
  }
2379
2863
  if (validationBlocker) {
2380
2864
  lines.push(
@@ -2387,7 +2871,10 @@ export function buildQualityRevisionHint(
2387
2871
  const failedValidationRuns = validationRuns.filter((run) => !run.ok);
2388
2872
  if (failedValidationRuns.length > 0) {
2389
2873
  lines.push("Validation failure diagnostics:");
2390
- for (const run of failedValidationRuns.slice(0, 5)) {
2874
+ const runsToShow = browserRepairPacket
2875
+ ? failedValidationRuns.filter((run) => run.command === browserRepairPacket.command).slice(0, 1)
2876
+ : failedValidationRuns.slice(0, 5);
2877
+ for (const run of runsToShow) {
2391
2878
  lines.push(`- ${run.command} failed with exit ${run.exitCode} after ${run.elapsedMs}ms.`);
2392
2879
  const output = toSingleLine(
2393
2880
  stripAnsiControlSequences([run.stderr, run.stdout].filter(Boolean).join("\n")),
@@ -2397,14 +2884,40 @@ export function buildQualityRevisionHint(
2397
2884
  }
2398
2885
  }
2399
2886
  if (critic) {
2400
- lines.push(`Critic score: ${critic.score.toFixed(1)} / 10`);
2401
- if (critic.mustFix.length > 0) {
2887
+ const deferCriticForBrowserAssertion =
2888
+ focusedBrowserRepair && browserRepairPacket?.failureKind === "assertion";
2889
+ const criticIsSevere =
2890
+ critic.score <= 4 ||
2891
+ [...critic.mustFix, ...critic.findings, critic.revisionGuidance].some((entry) =>
2892
+ /\b(browser|e2e|validation|web smoke|playwright)\b/i.test(entry),
2893
+ );
2894
+ if (deferCriticForBrowserAssertion) {
2895
+ lines.push(
2896
+ `CriticGate notes deferred while repairing the primary browser assertion failure (score ${critic.score.toFixed(1)} / 10).`,
2897
+ );
2898
+ } else if (!focusedBrowserRepair || criticIsSevere) {
2899
+ lines.push(`Critic score: ${critic.score.toFixed(1)} / 10`);
2900
+ }
2901
+ if (
2902
+ !deferCriticForBrowserAssertion &&
2903
+ (!focusedBrowserRepair || criticIsSevere) &&
2904
+ critic.mustFix.length > 0
2905
+ ) {
2402
2906
  lines.push("Critic must-fix findings:");
2403
2907
  for (const issue of critic.mustFix) lines.push(`- ${issue}`);
2404
2908
  }
2405
- if (critic.revisionGuidance) {
2909
+ if (
2910
+ !deferCriticForBrowserAssertion &&
2911
+ (!focusedBrowserRepair || criticIsSevere) &&
2912
+ critic.revisionGuidance
2913
+ ) {
2406
2914
  lines.push(`Critic revision guidance: ${critic.revisionGuidance}`);
2407
2915
  }
2916
+ if (focusedBrowserRepair && !criticIsSevere && !deferCriticForBrowserAssertion) {
2917
+ lines.push(
2918
+ `CriticGate notes deferred while repairing the primary browser validation failure (score ${critic.score.toFixed(1)} / 10).`,
2919
+ );
2920
+ }
2408
2921
  }
2409
2922
  if (planning.acceptanceCriteria.length > 0) {
2410
2923
  lines.push("Required acceptance criteria:");
@@ -2661,10 +3174,14 @@ export type WorkerGitCommitIdentity = SourceControlCommitIdentity;
2661
3174
 
2662
3175
  export const explicitWorkerCommitIdentityFromEnv = explicitSourceControlCommitIdentityFromEnv;
2663
3176
 
3177
+ export function buildSandboxArtifactUnstageCommand(): string[] {
3178
+ return ["reset", "-q", "--", ...SANDBOX_STAGE_ARTIFACT_PATHS];
3179
+ }
3180
+
2664
3181
  async function unstageSandboxArtifactPaths(
2665
3182
  repo: string,
2666
3183
  ): Promise<{ ok: boolean; stdout: string; stderr: string }> {
2667
- return git(repo, ["reset", "-q", "--", ...SANDBOX_STAGE_ARTIFACT_PATHS]);
3184
+ return git(repo, buildSandboxArtifactUnstageCommand());
2668
3185
  }
2669
3186
 
2670
3187
  async function resolveGitConfigValue(repo: string, key: string): Promise<string> {
@@ -4499,7 +5016,7 @@ function hasInvalidRepoPathHint(values: string[]): boolean {
4499
5016
  return values.some((entry) => normalizeStagePath(entry) === null);
4500
5017
  }
4501
5018
 
4502
- const SANDBOX_STAGE_ARTIFACT_PATHS = ["workspace", "outputs", ".codex"];
5019
+ export const SANDBOX_STAGE_ARTIFACT_PATHS = ["workspace", "outputs", ".codex", "node_modules"];
4503
5020
 
4504
5021
  function taskExecuteOrigin(params: Record<string, unknown>): "autonomy" | "user" {
4505
5022
  const explicit = String(params.origin ?? "")
@@ -5049,10 +5566,9 @@ export async function executeJob(
5049
5566
  const qualityGatePolicy = deriveQualityGatePolicy(normalizedParams, runtimeConfig);
5050
5567
  const qualityMaxAutoRevisions = qualityGatePolicy.maxAutoRevisions;
5051
5568
  const qualityValidationMaxAutoRevisions = qualityGatePolicy.validationMaxAutoRevisions;
5052
- const qualityRevisionLoopMax = Math.max(
5053
- qualityMaxAutoRevisions,
5054
- qualityValidationMaxAutoRevisions,
5055
- );
5569
+ const qualityRevisionLoopMax = qualityRevisionLoopUpperBound(qualityGatePolicy, {
5570
+ browserValidation: taskRequestsBrowserValidation(normalizedParams),
5571
+ });
5056
5572
  const qualitySoftPassOnExhausted = qualityGatePolicy.softPassOnExhausted;
5057
5573
  const qualityCriticMinScore = qualityGatePolicy.criticMinScore;
5058
5574
 
@@ -5190,6 +5706,11 @@ export async function executeJob(
5190
5706
  revisionAttempt,
5191
5707
  },
5192
5708
  );
5709
+ const browserRepairPacket = buildBrowserValidationRepairPacket(
5710
+ quality.validationRuns,
5711
+ previousValidationFailureDigests,
5712
+ repo,
5713
+ );
5193
5714
  for (const run of quality.validationRuns) {
5194
5715
  if (run.ok) continue;
5195
5716
  const digest = extractValidationFailureDigest(run);
@@ -5324,8 +5845,15 @@ export async function executeJob(
5324
5845
  ? []
5325
5846
  : quality.requiredValidationFailures,
5326
5847
  blocker: validationOutsideTaskScope ? null : quality.blocker,
5848
+ browserRepairPacket: validationOutsideTaskScope ? null : browserRepairPacket,
5327
5849
  });
5328
- const issueSummary = issues.map((entry) => toSingleLine(entry, 180)).join(" | ");
5850
+ const issueSummary =
5851
+ browserRepairPacket && !validationOutsideTaskScope
5852
+ ? `ValidationGate browser ${browserRepairPacket.failureKind} repair for ${browserRepairPacket.command}: ${toSingleLine(
5853
+ browserRepairPacket.digest,
5854
+ 180,
5855
+ )}`
5856
+ : issues.map((entry) => toSingleLine(entry, 180)).join(" | ");
5329
5857
  if (quality.blocker && !validationOutsideTaskScope) {
5330
5858
  const blockerSummary = `Quality gate blocked by ${quality.blocker.category} issue: ${quality.blocker.detail}`;
5331
5859
  const blockerDiagnostics = truncate(
@@ -5339,7 +5867,7 @@ export async function executeJob(
5339
5867
  requiredValidationFailures: quality.requiredValidationFailures,
5340
5868
  blocker: quality.blocker,
5341
5869
  revisionAttempt,
5342
- maxAutoRevisions: qualityValidationMaxAutoRevisions,
5870
+ maxAutoRevisions: activeMaxAutoRevisions,
5343
5871
  outsideTaskScope: validationOutsideTaskScope,
5344
5872
  });
5345
5873
  if (requiredValidationCanRevise) {
@@ -5456,6 +5984,7 @@ export async function executeJob(
5456
5984
  reviewFixContext,
5457
5985
  validationOutsideTaskScope ? [] : quality.validationRuns,
5458
5986
  validationOutsideTaskScope ? null : quality.blocker,
5987
+ validationOutsideTaskScope ? null : browserRepairPacket,
5459
5988
  );
5460
5989
  onLog?.(
5461
5990
  "stderr",
@@ -10,6 +10,7 @@
10
10
  "cli:integration": "bun run scripts/cli-integration.ts",
11
11
  "cli:bundle": "bun run --cwd packages/cli build",
12
12
  "cli:monitor:export": "bun run scripts/sync-cli-monitor-ui.ts",
13
+ "replay:worker-job": "bun run scripts/replay-worker-job.ts",
13
14
  "protocol:build": "bun --cwd packages/protocol build",
14
15
  "protocol:typecheck": "bun --cwd packages/protocol typecheck",
15
16
  "server:only": "bun --cwd apps/server --env-file ../../.env dev",
@@ -16,7 +16,7 @@ Execution rules:
16
16
  - If the hinted file is a thin wrapper or the behavior lives elsewhere, edit the behavior-owning file(s) needed to solve the task and explain the scope expansion in your final response.
17
17
  - Avoid irrelevant sprawl; the review agent will judge whether changed files are necessary for the requested outcome.
18
18
  - Read relevant files before editing, then run focused validation.
19
- - PushPals runs the deterministic ValidationGate after your edit, including any repo-required `vision.md` commands. During the editing turn, prefer focused/fast validation. Do not spend the main Codex execution budget repeatedly running long browser/e2e smoke commands such as `bun run web:e2e`; run them only when the task is specifically about the browser harness or when you need a final targeted confirmation and can stop promptly on a clear failure.
19
+ - PushPals runs the deterministic ValidationGate after your edit, including any repo-required `vision.md` commands. During the editing turn, prefer focused/fast validation. Do not run long browser/e2e smoke commands such as `bun run web:e2e` by default from the Codex executor; ValidationGate is the authoritative browser runner and has the provisioned browser/runtime environment. For browser-harness tasks, inspect existing artifacts, run fast non-browser checks, and only run the full browser command once when a quick local startup probe shows it can run here and you need one targeted confirmation.
20
20
  - Use direct commands without shell wrappers. Prefer plain commands like `git diff -- path`, `git add <path>`, `git status --porcelain`, and `pwd`.
21
21
  - Do not wrap commands in `/bin/bash -lc`, `sh -lc`, `cmd /c`, or `powershell -Command`, and avoid pipelines, `awk`, heredocs, or multi-command shell snippets unless they are truly unavoidable.
22
22
  - If the command router rejects a command, simplify it to a single direct command instead of retrying more shell wrappers.