@pushpalsdev/cli 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pushpalsdev/cli",
3
- "version": "1.1.0",
3
+ "version": "1.1.1",
4
4
  "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -1120,6 +1120,7 @@ export class DockerExecutor {
1120
1120
  worktreePath,
1121
1121
  onLog,
1122
1122
  );
1123
+ await this.ensureWorktreeDependencyArtifacts(containerWorktreePath, onLog);
1123
1124
 
1124
1125
  const args: string[] = [
1125
1126
  "exec",
@@ -1196,6 +1197,50 @@ export class DockerExecutor {
1196
1197
  return result;
1197
1198
  }
1198
1199
 
1200
+ private async ensureWorktreeDependencyArtifacts(
1201
+ containerWorktreePath: string,
1202
+ onLog?: (stream: "stdout" | "stderr", line: string) => void,
1203
+ ): Promise<void> {
1204
+ const worktreePrefix = shellSingleQuote(`${containerWorktreePath}/`);
1205
+ const command = [
1206
+ "set -eu",
1207
+ "linked=\"\"",
1208
+ "for name in node_modules; do",
1209
+ " src=\"/repo/$name\"",
1210
+ ` dest=${worktreePrefix}$name`,
1211
+ " if { [ -e \"$src\" ] || [ -L \"$src\" ]; } && [ ! -e \"$dest\" ] && [ ! -L \"$dest\" ]; then",
1212
+ " ln -s \"$src\" \"$dest\"",
1213
+ " linked=\"$linked $name\"",
1214
+ " fi",
1215
+ "done",
1216
+ "printf '%s' \"$linked\"",
1217
+ ].join("\n");
1218
+
1219
+ const result = await this.runWarmShell(command);
1220
+ if (!result.ok) {
1221
+ const detail = [result.stderr, result.stdout].filter(Boolean).join("\n").trim();
1222
+ const warning = `[DockerExecutor] Worktree dependency artifact linking skipped: ${
1223
+ detail || `exit ${result.exitCode}`
1224
+ }`;
1225
+ console.warn(warning);
1226
+ onLog?.("stderr", warning);
1227
+ return;
1228
+ }
1229
+
1230
+ const linked = result.stdout
1231
+ .trim()
1232
+ .split(/\s+/g)
1233
+ .map((entry) => entry.trim())
1234
+ .filter(Boolean);
1235
+ if (linked.length === 0) return;
1236
+
1237
+ const note = `[DockerExecutor] Linked worktree dependency artifact(s): ${linked.join(
1238
+ ", ",
1239
+ )}`;
1240
+ console.log(note);
1241
+ onLog?.("stdout", note);
1242
+ }
1243
+
1199
1244
  private async waitForWorktreePathInWarmContainer(
1200
1245
  containerWorktreePath: string,
1201
1246
  timeoutMs = 5_000,
@@ -76,6 +76,24 @@ export interface ValidationBlocker {
76
76
  detail: string;
77
77
  }
78
78
 
79
+ type BrowserValidationFailureKind = "assertion" | "startup" | "runtime" | "network" | "unknown";
80
+
81
+ export interface BrowserValidationRepairPacket {
82
+ command: string;
83
+ failureKind: BrowserValidationFailureKind;
84
+ stage: string | null;
85
+ selector: string | null;
86
+ expected: string | null;
87
+ digest: string;
88
+ previousDigest: string | null;
89
+ previousStage: string | null;
90
+ previousSelector: string | null;
91
+ previousExpected: string | null;
92
+ progress: "first_failure" | "same_failure" | "new_failure";
93
+ artifacts: string[];
94
+ output: string;
95
+ }
96
+
79
97
  interface DeterministicQualityResult {
80
98
  ok: boolean;
81
99
  skipped: boolean;
@@ -120,6 +138,8 @@ export interface QualityGatePolicy {
120
138
  criticMinScore: number;
121
139
  }
122
140
 
141
+ const BROWSER_VALIDATION_MAX_AUTO_REVISIONS = 5;
142
+
123
143
  function shouldSoftPassValidationBlocker(
124
144
  policy: QualityGatePolicy,
125
145
  blocker: ValidationBlocker | null,
@@ -148,14 +168,17 @@ export function revisionLimitForQualityGateFailures(opts: {
148
168
  qualityIssues: string[];
149
169
  requiredValidationFailures: string[];
150
170
  blocker: ValidationBlocker | null;
171
+ browserRepairPacket?: BrowserValidationRepairPacket | null;
151
172
  }): number {
152
173
  const hasValidationGateFailure =
153
174
  opts.requiredValidationFailures.length > 0 ||
154
175
  opts.blocker !== null ||
155
176
  opts.qualityIssues.some((issue) => issue.startsWith("ValidationGate:"));
156
- return hasValidationGateFailure
157
- ? opts.policy.validationMaxAutoRevisions
158
- : opts.policy.maxAutoRevisions;
177
+ if (!hasValidationGateFailure) return opts.policy.maxAutoRevisions;
178
+ if (opts.browserRepairPacket) {
179
+ return Math.max(opts.policy.validationMaxAutoRevisions, BROWSER_VALIDATION_MAX_AUTO_REVISIONS);
180
+ }
181
+ return opts.policy.validationMaxAutoRevisions;
159
182
  }
160
183
 
161
184
  // ─── Utilities ───────────────────────────────────────────────────────────────
@@ -1135,8 +1158,15 @@ export function prepareValidationCommandArgv(
1135
1158
  return [...argv, "--", "--port", port];
1136
1159
  }
1137
1160
 
1138
- function isBrowserValidationInfrastructureDigest(digest: string): boolean {
1139
- return /\b(ERR_SOCKET_BAD_PORT|EADDRINUSE|ECONNREFUSED|ECONNRESET|ETIMEDOUT|timed out|timeout|port|browser runtime|playwright install|executable doesn't exist)\b/i.test(
1161
+ function isBrowserAssertionDigest(digest: string): boolean {
1162
+ return /\b(Web end-to-end smoke test failed|locator\.[a-z0-9_]+:\s+Timeout\s+\d+ms\s+exceeded|page\.[a-z0-9_]+:\s+Timeout\s+\d+ms\s+exceeded|waiting for getBy(?:TestId|Role|Text|Label|Placeholder|Title)\(|Expected .+ to be .+ within \d+ms|AssertionError|Error:\s+expect\()/i.test(
1163
+ digest,
1164
+ );
1165
+ }
1166
+
1167
+ export function isBrowserValidationInfrastructureDigest(digest: string): boolean {
1168
+ if (isBrowserAssertionDigest(digest)) return false;
1169
+ return /\b(browserType\.launch|ERR_SOCKET_BAD_PORT|EADDRINUSE|ECONNREFUSED|ECONNRESET|ETIMEDOUT|listen\s+EPERM|EPERM|EACCES|freeport|port selection|browser runtime|playwright install|executable doesn't exist|Expo exited early|local port bind|Validation command timed out|terminated by signal)\b/i.test(
1140
1170
  digest,
1141
1171
  );
1142
1172
  }
@@ -1466,7 +1496,7 @@ function parseChangedPathsFromStatus(statusOutput: string): string[] {
1466
1496
  return out;
1467
1497
  }
1468
1498
 
1469
- function isLikelyTestPath(path: string): boolean {
1499
+ export function isAssertionCoverageTestPath(path: string): boolean {
1470
1500
  const normalized = path.replace(/\\/g, "/").toLowerCase();
1471
1501
  return (
1472
1502
  normalized.includes("/tests/") ||
@@ -1477,6 +1507,21 @@ function isLikelyTestPath(path: string): boolean {
1477
1507
  );
1478
1508
  }
1479
1509
 
1510
+ export function isBrowserSmokeHarnessPath(path: string): boolean {
1511
+ const normalized = path.replace(/\\/g, "/").toLowerCase();
1512
+ return (
1513
+ /(^|\/)scripts\/test-[^/]*\.(?:c?js|m?js|ts)$/.test(normalized) ||
1514
+ /(^|\/)scripts\/[^/]*(?:e2e|smoke|playwright|browser)[^/]*\.(?:c?js|m?js|ts)$/.test(
1515
+ normalized,
1516
+ ) ||
1517
+ /(^|\/)(?:playwright|cypress)\.config\.(?:c?js|m?js|ts)$/.test(normalized)
1518
+ );
1519
+ }
1520
+
1521
+ export function isLikelyTestPath(path: string): boolean {
1522
+ return isAssertionCoverageTestPath(path) || isBrowserSmokeHarnessPath(path);
1523
+ }
1524
+
1480
1525
  function extractRunnableValidationCommand(step: string): string | null {
1481
1526
  const trimmed = step.trim();
1482
1527
  if (!trimmed) return null;
@@ -1582,6 +1627,188 @@ export function extractValidationFailureDigest(run: {
1582
1627
  return "";
1583
1628
  }
1584
1629
 
1630
+ function classifyBrowserValidationFailureKindFromText(text: string): BrowserValidationFailureKind {
1631
+ const combined = stripAnsiControlSequences(text);
1632
+ if (
1633
+ /\b(browserType\.launch|Executable doesn't exist|playwright install|Browser runtime preflight failed|Please run the following command to download new browsers|Validation command timed out|terminated by signal|SIGTERM|timed out after \d+ms)\b/i.test(
1634
+ combined,
1635
+ )
1636
+ ) {
1637
+ return "runtime";
1638
+ }
1639
+ if (
1640
+ /\b(ERR_SOCKET_BAD_PORT|EADDRINUSE|listen\s+EPERM|EPERM|EACCES|freeport|port selection|Expo exited early|local port bind|cannot bind|operation not permitted)\b/i.test(
1641
+ combined,
1642
+ )
1643
+ ) {
1644
+ return "startup";
1645
+ }
1646
+ if (/\b(page\.[a-z0-9_]+:\s+net::ERR_[A-Z0-9_]+|ECONNREFUSED|ECONNRESET|ETIMEDOUT)\b/i.test(combined)) {
1647
+ return "network";
1648
+ }
1649
+ if (isBrowserAssertionDigest(combined)) {
1650
+ return "assertion";
1651
+ }
1652
+ return "unknown";
1653
+ }
1654
+
1655
+ function extractBrowserValidationStage(text: string): string | null {
1656
+ const patterns = [
1657
+ /\bBrowser validation failed during\s+([^:.\r\n]+?)\s+stage\b/i,
1658
+ /\bfailed during\s+([^:.\r\n]+?)\s+stage\b/i,
1659
+ /\b(?:stage|phase)\s*[:=]\s*["'`]?([^"'`.\r\n]+)["'`]?/i,
1660
+ ];
1661
+ for (const pattern of patterns) {
1662
+ const match = text.match(pattern);
1663
+ const value = match?.[1]?.trim();
1664
+ if (value) return toSingleLine(value, 80);
1665
+ }
1666
+ return null;
1667
+ }
1668
+
1669
+ function extractBalancedLocatorCall(text: string): string | null {
1670
+ const callPattern = /\b(?:getBy(?:TestId|Role|Text|Label|Placeholder|Title)|locator\.[a-z0-9_]+|page\.[a-z0-9_]+)\(/gi;
1671
+ let match: RegExpExecArray | null;
1672
+ while ((match = callPattern.exec(text)) != null) {
1673
+ let depth = 0;
1674
+ let quote: string | null = null;
1675
+ let escaped = false;
1676
+ for (let index = match.index; index < text.length; index += 1) {
1677
+ const char = text[index] ?? "";
1678
+ if (quote) {
1679
+ if (escaped) {
1680
+ escaped = false;
1681
+ } else if (char === "\\") {
1682
+ escaped = true;
1683
+ } else if (char === quote) {
1684
+ quote = null;
1685
+ }
1686
+ continue;
1687
+ }
1688
+ if (char === "'" || char === '"' || char === "`") {
1689
+ quote = char;
1690
+ continue;
1691
+ }
1692
+ if (char === "(") {
1693
+ depth += 1;
1694
+ continue;
1695
+ }
1696
+ if (char === ")") {
1697
+ depth -= 1;
1698
+ if (depth === 0) return toSingleLine(text.slice(match.index, index + 1), 120);
1699
+ }
1700
+ if (depth <= 0 && /\s/.test(char) && index > match.index) break;
1701
+ }
1702
+ }
1703
+ return null;
1704
+ }
1705
+
1706
+ function extractBrowserValidationSelector(text: string): string | null {
1707
+ const balanced = extractBalancedLocatorCall(text);
1708
+ if (balanced) return balanced;
1709
+ const patterns = [
1710
+ /\bwaiting for\s+(getBy(?:TestId|Role|Text|Label|Placeholder|Title)\([^)\r\n]+\))/i,
1711
+ /\b(locator\.[a-z0-9_]+\([^)\r\n]*\))/i,
1712
+ /\b(page\.[a-z0-9_]+\([^)\r\n]*\))/i,
1713
+ /\b(getBy(?:TestId|Role|Text|Label|Placeholder|Title)\([^)\r\n]+\))/i,
1714
+ ];
1715
+ for (const pattern of patterns) {
1716
+ const match = text.match(pattern);
1717
+ const value = match?.[1]?.trim();
1718
+ if (value) return toSingleLine(value, 120);
1719
+ }
1720
+ return null;
1721
+ }
1722
+
1723
+ function extractBrowserValidationExpectedUi(text: string): string | null {
1724
+ const patterns = [
1725
+ /\bExpected\s+([^:.\r\n]+?)\s+within\s+\d+ms\b/i,
1726
+ /\bExpected\s+([^:.\r\n]+?)(?:[:.]|\r?\n)/i,
1727
+ /\bExpected\s+([^:.\r\n]+?)$/i,
1728
+ ];
1729
+ for (const pattern of patterns) {
1730
+ const match = text.match(pattern);
1731
+ const value = match?.[1]?.trim();
1732
+ if (value) return toSingleLine(value, 140);
1733
+ }
1734
+ return null;
1735
+ }
1736
+
1737
+ function extractBrowserValidationArtifacts(text: string): string[] {
1738
+ const combined = stripAnsiControlSequences(text);
1739
+ const out: string[] = [];
1740
+ const seen = new Set<string>();
1741
+ const addArtifact = (raw: string | undefined) => {
1742
+ const artifact = String(raw ?? "")
1743
+ .trim()
1744
+ .replace(/[),.;:]+$/, "");
1745
+ if (!artifact || seen.has(artifact)) return;
1746
+ seen.add(artifact);
1747
+ out.push(toSingleLine(artifact, 220));
1748
+ };
1749
+ const patterns = [
1750
+ /\b(?:screenshot|snapshot|trace|video|artifact|output|saved|wrote)[^:\r\n]*:\s*(["'`]?)([^"'`\s]+(?:outputs|test-results|playwright-report)[^\s"'`]+(?:\.png|\.jpg|\.jpeg|\.webp|\.zip|\.json|\.txt|\.webm))\1/gi,
1751
+ /((?:\/repo|\/workspace|[A-Za-z]:[\\/])?[^\s"'`]*?(?:outputs|test-results|playwright-report)[\\/][^\s"'`]+(?:\.png|\.jpg|\.jpeg|\.webp|\.zip|\.json|\.txt|\.webm))/gi,
1752
+ ];
1753
+ for (const pattern of patterns) {
1754
+ let match: RegExpExecArray | null;
1755
+ while ((match = pattern.exec(combined)) != null) {
1756
+ addArtifact(match[2] ?? match[1]);
1757
+ if (out.length >= 4) return out;
1758
+ }
1759
+ }
1760
+ return out;
1761
+ }
1762
+
1763
+ function summarizeBrowserValidationOutput(text: string): string {
1764
+ const lines = stripAnsiControlSequences(text)
1765
+ .split(/\r?\n/)
1766
+ .map((line) => line.trim())
1767
+ .filter(Boolean)
1768
+ .filter((line) =>
1769
+ /\b(Web end-to-end smoke test failed|Browser validation failed|Expected |locator\.|page\.|waiting for getBy|Call log:|ERR_SOCKET_BAD_PORT|EADDRINUSE|EPERM|EACCES|browserType\.launch|Executable doesn't exist|Expo exited early|freeport|net::ERR_|Validation command timed out|terminated by signal|SIGTERM|timed out after \d+ms)\b/i.test(
1770
+ line,
1771
+ ),
1772
+ );
1773
+ return toSingleLine(lines.slice(0, 8).join(" | "), 900);
1774
+ }
1775
+
1776
+ export function buildBrowserValidationRepairPacket(
1777
+ validationRuns: ValidationExecutionResult[],
1778
+ previousFailureDigests: Map<string, string> = new Map(),
1779
+ ): BrowserValidationRepairPacket | null {
1780
+ for (const run of validationRuns) {
1781
+ if (run.ok || !isLongRunningBrowserValidationCommand(run.command)) continue;
1782
+ const combined = stripAnsiControlSequences([run.stderr, run.stdout].filter(Boolean).join("\n"));
1783
+ const digest = extractValidationFailureDigest(run);
1784
+ const failureKind = classifyBrowserValidationFailureKindFromText(`${digest}\n${combined}`);
1785
+ if (failureKind === "unknown") continue;
1786
+ const previousDigest = previousFailureDigests.get(validationCommandKey(run.command)) ?? null;
1787
+ const progress =
1788
+ previousDigest == null
1789
+ ? "first_failure"
1790
+ : previousDigest === digest
1791
+ ? "same_failure"
1792
+ : "new_failure";
1793
+ return {
1794
+ command: run.command,
1795
+ failureKind,
1796
+ stage: extractBrowserValidationStage(combined),
1797
+ selector: extractBrowserValidationSelector(combined),
1798
+ expected: extractBrowserValidationExpectedUi(combined),
1799
+ digest,
1800
+ previousDigest,
1801
+ previousStage: previousDigest ? extractBrowserValidationStage(previousDigest) : null,
1802
+ previousSelector: previousDigest ? extractBrowserValidationSelector(previousDigest) : null,
1803
+ previousExpected: previousDigest ? extractBrowserValidationExpectedUi(previousDigest) : null,
1804
+ progress,
1805
+ artifacts: extractBrowserValidationArtifacts(combined),
1806
+ output: summarizeBrowserValidationOutput(combined) || digest,
1807
+ };
1808
+ }
1809
+ return null;
1810
+ }
1811
+
1585
1812
  export function collectRequiredValidationFailures(
1586
1813
  requiredCommands: string[],
1587
1814
  validationRuns: Array<{ command: string; ok: boolean; exitCode?: number }>,
@@ -1866,6 +2093,9 @@ async function runDeterministicQualityGate(
1866
2093
  [...changedPaths, ...preparedMergeConflictPaths].filter((path) => isLikelyTestPath(path)),
1867
2094
  ),
1868
2095
  );
2096
+ const changedAssertionCoverageTestPaths = changedTestPaths.filter((path) =>
2097
+ isAssertionCoverageTestPath(path),
2098
+ );
1869
2099
  const issues: string[] = [];
1870
2100
  const scopeIssues: string[] = [];
1871
2101
  const validationIssues: string[] = [];
@@ -1890,8 +2120,8 @@ async function runDeterministicQualityGate(
1890
2120
  }
1891
2121
  if (
1892
2122
  isTestTask &&
1893
- changedTestPaths.length > 0 &&
1894
- !hasBalancedPositiveNegativeAssertions(changedTestPaths, repo)
2123
+ changedAssertionCoverageTestPaths.length > 0 &&
2124
+ !hasBalancedPositiveNegativeAssertions(changedAssertionCoverageTestPaths, repo)
1895
2125
  ) {
1896
2126
  addScopeIssue(
1897
2127
  "found changed test files without both positive and negative assertion coverage (expected both).",
@@ -2344,9 +2574,98 @@ export function buildQualityRevisionHint(
2344
2574
  reviewFixContext?: ReviewFixContext | null,
2345
2575
  validationRuns: ValidationExecutionResult[] = [],
2346
2576
  validationBlocker: ValidationBlocker | null = null,
2577
+ browserRepairPacket: BrowserValidationRepairPacket | null = null,
2347
2578
  ): string {
2348
2579
  const lines: string[] = [];
2349
2580
  lines.push("Quality revision required before completion.");
2581
+ const focusedBrowserRepair = Boolean(browserRepairPacket);
2582
+ if (browserRepairPacket) {
2583
+ lines.push("Primary ValidationGate repair objective:");
2584
+ lines.push(`- Command: ${browserRepairPacket.command}`);
2585
+ lines.push(`- Failure type: browser ${browserRepairPacket.failureKind}`);
2586
+ lines.push(
2587
+ "- First action: inspect the captured browser output/artifacts and actual rendered UI before editing; do not guess from component names or intended copy.",
2588
+ );
2589
+ if (browserRepairPacket.stage) lines.push(`- Stage: ${browserRepairPacket.stage}`);
2590
+ if (browserRepairPacket.expected) {
2591
+ lines.push(`- Expected UI: ${browserRepairPacket.expected}`);
2592
+ }
2593
+ if (browserRepairPacket.selector) {
2594
+ lines.push(`- Selector/wait: ${browserRepairPacket.selector}`);
2595
+ }
2596
+ if (browserRepairPacket.artifacts.length > 0) {
2597
+ lines.push("Failure artifacts to inspect:");
2598
+ for (const artifact of browserRepairPacket.artifacts) {
2599
+ lines.push(`- ${artifact}`);
2600
+ }
2601
+ } else {
2602
+ lines.push(
2603
+ "- Failure artifacts: none were captured in command output; if this repo writes screenshots/traces, inspect the latest browser failure artifact before changing selectors.",
2604
+ );
2605
+ }
2606
+ if (browserRepairPacket.digest) {
2607
+ lines.push(`- Current failure: ${browserRepairPacket.digest}`);
2608
+ }
2609
+ if (browserRepairPacket.previousDigest) {
2610
+ const breadcrumb =
2611
+ browserRepairPacket.progress === "same_failure"
2612
+ ? "same failure repeated for this command"
2613
+ : "new failure for this command after the previous revision";
2614
+ lines.push(`- Breadcrumb: ${breadcrumb}; previous failure was ${browserRepairPacket.previousDigest}`);
2615
+ if (
2616
+ browserRepairPacket.previousStage ||
2617
+ browserRepairPacket.previousExpected ||
2618
+ browserRepairPacket.previousSelector
2619
+ ) {
2620
+ lines.push("Previous browser failure detail:");
2621
+ if (browserRepairPacket.previousStage) {
2622
+ lines.push(`- Previous stage: ${browserRepairPacket.previousStage}`);
2623
+ }
2624
+ if (browserRepairPacket.previousExpected) {
2625
+ lines.push(`- Previous expected UI: ${browserRepairPacket.previousExpected}`);
2626
+ }
2627
+ if (browserRepairPacket.previousSelector) {
2628
+ lines.push(`- Previous selector/wait: ${browserRepairPacket.previousSelector}`);
2629
+ }
2630
+ }
2631
+ } else {
2632
+ lines.push("- Breadcrumb: first captured failure for this command in this revision loop");
2633
+ }
2634
+ if (browserRepairPacket.output) {
2635
+ lines.push(`- Relevant output: ${browserRepairPacket.output}`);
2636
+ }
2637
+ if (browserRepairPacket.failureKind === "assertion") {
2638
+ lines.push(
2639
+ "Repair direction: fix this exact visible UI assertion or the app state that should make it true. If the expected text/role/test id is not present in the screenshot, update the smoke assertion to the visible product UI that proves the same stage, or add accessibility metadata to an existing control. Do not add optional navigation or broaden the smoke path. Do not change browser startup, port selection, Playwright installation, or unrelated e2e harness behavior unless the captured failure is reclassified as startup/setup.",
2640
+ );
2641
+ lines.push(
2642
+ "Selector stability rule: prefer existing data-testid/accessibility labels/roles and stage containers over guessed title/body text. If a stage already passed with a stable container such as a home/shell/test-id locator, reuse that signal instead of replacing it with copy checks.",
2643
+ );
2644
+ lines.push(
2645
+ "Text assertion rule: rendered titles may be split across sibling nodes. Do not invent a combined phrase for split text; either assert the individual visible fragments within the stage container or add/reuse a stable test id/accessibility label.",
2646
+ );
2647
+ if (
2648
+ browserRepairPacket.progress === "same_failure" ||
2649
+ (browserRepairPacket.stage &&
2650
+ browserRepairPacket.previousStage &&
2651
+ browserRepairPacket.stage === browserRepairPacket.previousStage)
2652
+ ) {
2653
+ lines.push(
2654
+ "Repeated-stage rule: this browser stage has failed before in the current revision loop, so treat the previous selector/copy assumption as suspect and switch to the most stable rendered locator for that same stage.",
2655
+ );
2656
+ }
2657
+ } else {
2658
+ lines.push(
2659
+ "Repair direction: this is a browser startup/runtime/network failure. Fix only startup/runtime provisioning for this command and do not rewrite app UI assertions unless a later ValidationGate run reaches an assertion stage.",
2660
+ );
2661
+ }
2662
+ lines.push(
2663
+ "Convergence rule: preserve stages that already passed, repair only the current failing browser stage, and stop after one targeted browser confirmation so the next ValidationGate run gets a clean signal.",
2664
+ );
2665
+ lines.push(
2666
+ `Validation rerun rule: PushPals ValidationGate will rerun "${browserRepairPacket.command}" after the patch. During the edit turn, run focused fast checks first; only run the full browser command for one targeted confirmation and stop on the first clear stage failure.`,
2667
+ );
2668
+ }
2350
2669
  if (reviewFixContext) {
2351
2670
  lines.push("Rejected PR retry requirements:");
2352
2671
  if (reviewFixContext.previousReviewScore != null) {
@@ -2373,8 +2692,28 @@ export function buildQualityRevisionHint(
2373
2692
  lines.push("Raise the score above the approval threshold without reopening already accepted behavior.");
2374
2693
  }
2375
2694
  if (issues.length > 0) {
2376
- lines.push("Deterministic quality issues:");
2377
- for (const issue of issues) lines.push(`- ${issue}`);
2695
+ const displayedIssues = focusedBrowserRepair
2696
+ ? issues.filter(
2697
+ (issue) =>
2698
+ issue.startsWith("ValidationGate:") ||
2699
+ issue.includes("Required vision.md validation") ||
2700
+ issue.includes("Validation blocker"),
2701
+ )
2702
+ : issues;
2703
+ if (displayedIssues.length > 0) {
2704
+ lines.push(
2705
+ focusedBrowserRepair
2706
+ ? "Deterministic quality issues relevant to this validation repair:"
2707
+ : "Deterministic quality issues:",
2708
+ );
2709
+ for (const issue of displayedIssues) lines.push(`- ${issue}`);
2710
+ }
2711
+ const suppressedCount = issues.length - displayedIssues.length;
2712
+ if (focusedBrowserRepair && suppressedCount > 0) {
2713
+ lines.push(
2714
+ `Suppressed ${suppressedCount} lower-priority ScopeGate/CriticGate note(s) until the browser validation repair passes.`,
2715
+ );
2716
+ }
2378
2717
  }
2379
2718
  if (validationBlocker) {
2380
2719
  lines.push(
@@ -2387,7 +2726,10 @@ export function buildQualityRevisionHint(
2387
2726
  const failedValidationRuns = validationRuns.filter((run) => !run.ok);
2388
2727
  if (failedValidationRuns.length > 0) {
2389
2728
  lines.push("Validation failure diagnostics:");
2390
- for (const run of failedValidationRuns.slice(0, 5)) {
2729
+ const runsToShow = browserRepairPacket
2730
+ ? failedValidationRuns.filter((run) => run.command === browserRepairPacket.command).slice(0, 1)
2731
+ : failedValidationRuns.slice(0, 5);
2732
+ for (const run of runsToShow) {
2391
2733
  lines.push(`- ${run.command} failed with exit ${run.exitCode} after ${run.elapsedMs}ms.`);
2392
2734
  const output = toSingleLine(
2393
2735
  stripAnsiControlSequences([run.stderr, run.stdout].filter(Boolean).join("\n")),
@@ -2397,14 +2739,40 @@ export function buildQualityRevisionHint(
2397
2739
  }
2398
2740
  }
2399
2741
  if (critic) {
2400
- lines.push(`Critic score: ${critic.score.toFixed(1)} / 10`);
2401
- if (critic.mustFix.length > 0) {
2742
+ const deferCriticForBrowserAssertion =
2743
+ focusedBrowserRepair && browserRepairPacket?.failureKind === "assertion";
2744
+ const criticIsSevere =
2745
+ critic.score <= 4 ||
2746
+ [...critic.mustFix, ...critic.findings, critic.revisionGuidance].some((entry) =>
2747
+ /\b(browser|e2e|validation|web smoke|playwright)\b/i.test(entry),
2748
+ );
2749
+ if (deferCriticForBrowserAssertion) {
2750
+ lines.push(
2751
+ `CriticGate notes deferred while repairing the primary browser assertion failure (score ${critic.score.toFixed(1)} / 10).`,
2752
+ );
2753
+ } else if (!focusedBrowserRepair || criticIsSevere) {
2754
+ lines.push(`Critic score: ${critic.score.toFixed(1)} / 10`);
2755
+ }
2756
+ if (
2757
+ !deferCriticForBrowserAssertion &&
2758
+ (!focusedBrowserRepair || criticIsSevere) &&
2759
+ critic.mustFix.length > 0
2760
+ ) {
2402
2761
  lines.push("Critic must-fix findings:");
2403
2762
  for (const issue of critic.mustFix) lines.push(`- ${issue}`);
2404
2763
  }
2405
- if (critic.revisionGuidance) {
2764
+ if (
2765
+ !deferCriticForBrowserAssertion &&
2766
+ (!focusedBrowserRepair || criticIsSevere) &&
2767
+ critic.revisionGuidance
2768
+ ) {
2406
2769
  lines.push(`Critic revision guidance: ${critic.revisionGuidance}`);
2407
2770
  }
2771
+ if (focusedBrowserRepair && !criticIsSevere && !deferCriticForBrowserAssertion) {
2772
+ lines.push(
2773
+ `CriticGate notes deferred while repairing the primary browser validation failure (score ${critic.score.toFixed(1)} / 10).`,
2774
+ );
2775
+ }
2408
2776
  }
2409
2777
  if (planning.acceptanceCriteria.length > 0) {
2410
2778
  lines.push("Required acceptance criteria:");
@@ -2661,10 +3029,14 @@ export type WorkerGitCommitIdentity = SourceControlCommitIdentity;
2661
3029
 
2662
3030
  export const explicitWorkerCommitIdentityFromEnv = explicitSourceControlCommitIdentityFromEnv;
2663
3031
 
3032
+ export function buildSandboxArtifactUnstageCommand(): string[] {
3033
+ return ["reset", "-q", "--", ...SANDBOX_STAGE_ARTIFACT_PATHS];
3034
+ }
3035
+
2664
3036
  async function unstageSandboxArtifactPaths(
2665
3037
  repo: string,
2666
3038
  ): Promise<{ ok: boolean; stdout: string; stderr: string }> {
2667
- return git(repo, ["reset", "-q", "--", ...SANDBOX_STAGE_ARTIFACT_PATHS]);
3039
+ return git(repo, buildSandboxArtifactUnstageCommand());
2668
3040
  }
2669
3041
 
2670
3042
  async function resolveGitConfigValue(repo: string, key: string): Promise<string> {
@@ -4499,7 +4871,7 @@ function hasInvalidRepoPathHint(values: string[]): boolean {
4499
4871
  return values.some((entry) => normalizeStagePath(entry) === null);
4500
4872
  }
4501
4873
 
4502
- const SANDBOX_STAGE_ARTIFACT_PATHS = ["workspace", "outputs", ".codex"];
4874
+ export const SANDBOX_STAGE_ARTIFACT_PATHS = ["workspace", "outputs", ".codex", "node_modules"];
4503
4875
 
4504
4876
  function taskExecuteOrigin(params: Record<string, unknown>): "autonomy" | "user" {
4505
4877
  const explicit = String(params.origin ?? "")
@@ -5190,6 +5562,10 @@ export async function executeJob(
5190
5562
  revisionAttempt,
5191
5563
  },
5192
5564
  );
5565
+ const browserRepairPacket = buildBrowserValidationRepairPacket(
5566
+ quality.validationRuns,
5567
+ previousValidationFailureDigests,
5568
+ );
5193
5569
  for (const run of quality.validationRuns) {
5194
5570
  if (run.ok) continue;
5195
5571
  const digest = extractValidationFailureDigest(run);
@@ -5324,8 +5700,15 @@ export async function executeJob(
5324
5700
  ? []
5325
5701
  : quality.requiredValidationFailures,
5326
5702
  blocker: validationOutsideTaskScope ? null : quality.blocker,
5703
+ browserRepairPacket: validationOutsideTaskScope ? null : browserRepairPacket,
5327
5704
  });
5328
- const issueSummary = issues.map((entry) => toSingleLine(entry, 180)).join(" | ");
5705
+ const issueSummary =
5706
+ browserRepairPacket && !validationOutsideTaskScope
5707
+ ? `ValidationGate browser ${browserRepairPacket.failureKind} repair for ${browserRepairPacket.command}: ${toSingleLine(
5708
+ browserRepairPacket.digest,
5709
+ 180,
5710
+ )}`
5711
+ : issues.map((entry) => toSingleLine(entry, 180)).join(" | ");
5329
5712
  if (quality.blocker && !validationOutsideTaskScope) {
5330
5713
  const blockerSummary = `Quality gate blocked by ${quality.blocker.category} issue: ${quality.blocker.detail}`;
5331
5714
  const blockerDiagnostics = truncate(
@@ -5456,6 +5839,7 @@ export async function executeJob(
5456
5839
  reviewFixContext,
5457
5840
  validationOutsideTaskScope ? [] : quality.validationRuns,
5458
5841
  validationOutsideTaskScope ? null : quality.blocker,
5842
+ validationOutsideTaskScope ? null : browserRepairPacket,
5459
5843
  );
5460
5844
  onLog?.(
5461
5845
  "stderr",
@@ -10,6 +10,7 @@
10
10
  "cli:integration": "bun run scripts/cli-integration.ts",
11
11
  "cli:bundle": "bun run --cwd packages/cli build",
12
12
  "cli:monitor:export": "bun run scripts/sync-cli-monitor-ui.ts",
13
+ "replay:worker-job": "bun run scripts/replay-worker-job.ts",
13
14
  "protocol:build": "bun --cwd packages/protocol build",
14
15
  "protocol:typecheck": "bun --cwd packages/protocol typecheck",
15
16
  "server:only": "bun --cwd apps/server --env-file ../../.env dev",