@pushpalsdev/cli 1.1.9 → 1.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,11 +6,13 @@
6
6
  import {
7
7
  existsSync,
8
8
  lstatSync,
9
+ mkdirSync,
9
10
  readdirSync,
10
11
  readFileSync,
11
12
  renameSync,
12
13
  rmSync,
13
14
  unlinkSync,
15
+ writeFileSync,
14
16
  } from "fs";
15
17
  import { resolve } from "path";
16
18
  import {
@@ -24,6 +26,7 @@ import {
24
26
  matchesGlob,
25
27
  normalizeTargetPath,
26
28
  requirementsForValidationCommand,
29
+ resolveGitStateFilePath,
27
30
  sanitizeSourceControlIdentityField,
28
31
  type SourceControlCommitIdentity,
29
32
  type ToolRequirement,
@@ -93,6 +96,8 @@ export interface BrowserValidationRepairPacket {
93
96
  selector: string | null;
94
97
  expected: string | null;
95
98
  failureFocus: string | null;
99
+ lastVerifiedStage?: string | null;
100
+ pageUrl?: string | null;
96
101
  digest: string;
97
102
  previousDigest: string | null;
98
103
  previousStage: string | null;
@@ -101,10 +106,32 @@ export interface BrowserValidationRepairPacket {
101
106
  previousFailureFocus: string | null;
102
107
  progress: "first_failure" | "same_failure" | "new_failure";
103
108
  needsDiagnosticProbe: boolean;
109
+ mustReadArtifactsBeforeEdit?: boolean;
104
110
  artifacts: string[];
111
+ artifactSummaries?: string[];
112
+ knownFailureHints?: string[];
105
113
  output: string;
106
114
  }
107
115
 
116
+ interface BrowserFailureMemoryEntry {
117
+ key: string;
118
+ jobFamily: string;
119
+ command: string;
120
+ failureKind: BrowserValidationFailureKind;
121
+ stage: string | null;
122
+ selector: string | null;
123
+ expected: string | null;
124
+ failureFocus: string | null;
125
+ digest: string;
126
+ count: number;
127
+ firstSeenAt: string;
128
+ lastSeenAt: string;
129
+ lastVerifiedStage: string | null;
130
+ pageUrl: string | null;
131
+ artifactSummaries: string[];
132
+ suggestedRemedy: string;
133
+ }
134
+
108
135
  interface DeterministicQualityResult {
109
136
  ok: boolean;
110
137
  skipped: boolean;
@@ -149,7 +176,7 @@ export interface QualityGatePolicy {
149
176
  criticMinScore: number;
150
177
  }
151
178
 
152
- const BROWSER_VALIDATION_MAX_AUTO_REVISIONS = 8;
179
+ const BROWSER_VALIDATION_MAX_AUTO_REVISIONS = 3;
153
180
 
154
181
  export function qualityRevisionLoopUpperBound(policy: {
155
182
  maxAutoRevisions: number;
@@ -326,6 +353,122 @@ export function buildQualityGateRevisionIssues(
326
353
  return [...new Set(merged)];
327
354
  }
328
355
 
356
+ function buildDiffBudgetWarning(
357
+ planning: TaskExecutePlanning,
358
+ changedPaths: string[],
359
+ focusedBrowserRepair: boolean,
360
+ ): string | null {
361
+ const meaningfulChangedPaths = changedPaths.filter(
362
+ (path) => !/(^|\/)(outputs|node_modules|\.worktrees|dist|build|coverage)(\/|$)/i.test(path),
363
+ );
364
+ if (meaningfulChangedPaths.length === 0) return null;
365
+ const explicitBudget = Number(planning.scope.maxFilesToEdit);
366
+ const hasExplicitBudget = Number.isFinite(explicitBudget) && explicitBudget > 0;
367
+ const smallTask =
368
+ focusedBrowserRepair ||
369
+ (planning.riskLevel !== "high" &&
370
+ (planning.targetPaths?.length ?? 0) <= 2 &&
371
+ planning.acceptanceCriteria.length <= 3);
372
+ const budget = hasExplicitBudget ? Math.floor(explicitBudget) : smallTask ? 5 : 10;
373
+ if (meaningfulChangedPaths.length <= budget) return null;
374
+ return `Diff budget warning: this task now changes ${meaningfulChangedPaths.length} file(s), above the ${budget}-file ${
375
+ hasExplicitBudget ? "planning.scope.maxFilesToEdit" : smallTask ? "small-task" : "default"
376
+ } budget. Before editing more, remove unrelated churn and keep only behavior-owning files needed for the current repair. Changed files: ${meaningfulChangedPaths
377
+ .slice(0, 12)
378
+ .join(", ")}${meaningfulChangedPaths.length > 12 ? ", ..." : ""}`;
379
+ }
380
+
381
+ function isNonPublishableArtifactPath(path: string): boolean {
382
+ return /(^|\/)(outputs|node_modules|\.worktrees|\.codex|dist|build|coverage)(\/|$)/i.test(
383
+ path.replace(/\\/g, "/"),
384
+ );
385
+ }
386
+
387
+ export function publishableChangedPaths(changedPaths: string[]): string[] {
388
+ return changedPaths.filter((path) => !isNonPublishableArtifactPath(path));
389
+ }
390
+
391
+ function collectPlanningText(planning: TaskExecutePlanning): string {
392
+ return [
393
+ planning.intent,
394
+ planning.riskLevel,
395
+ ...(planning.targetPaths ?? []),
396
+ ...(planning.acceptanceCriteria ?? []),
397
+ ...(planning.validationSteps ?? []),
398
+ ...(planning.requiredValidationSteps ?? []),
399
+ ...(planning.discovery?.keywords ?? []),
400
+ ...(planning.discovery?.likelyDirs ?? []),
401
+ ...(planning.discovery?.ripgrepQueries ?? []),
402
+ ]
403
+ .map((part) => String(part ?? ""))
404
+ .join("\n")
405
+ .toLowerCase();
406
+ }
407
+
408
+ function planningLooksLikeVisualDerivationTask(planning: TaskExecutePlanning): boolean {
409
+ const text = collectPlanningText(planning);
410
+ return /\b(visual|readability|battlefield|render(?:ing)?|projectile|planet|ship|ring|danger|threat|ownership|dense action|style|ui surface)\b/i.test(
411
+ text,
412
+ );
413
+ }
414
+
415
+ function buildTestHarnessConvergenceWarning(
416
+ planning: TaskExecutePlanning,
417
+ issues: string[],
418
+ validationRuns: ValidationExecutionResult[],
419
+ ): string | null {
420
+ const combined = [
421
+ ...issues,
422
+ ...validationRuns.flatMap((run) => [run.command, run.stdout, run.stderr]),
423
+ ]
424
+ .map((part) => String(part ?? ""))
425
+ .join("\n");
426
+ const hasMockImportFailure =
427
+ /\bCannot find module\b|\bdoes not provide an export\b|\bno exported member\b|\bimport error\b|\bundefined is not a function\b/i.test(
428
+ combined,
429
+ ) &&
430
+ /\b(react[- ]native|reactNativeMock|Animated\.View|expo-secure-store|SettingsContext|skin validator|mock|test helper|__mocks__)\b/i.test(
431
+ combined,
432
+ );
433
+ if (!hasMockImportFailure) return null;
434
+ const visualPrefix = planningLooksLikeVisualDerivationTask(planning)
435
+ ? " For this visual/rendering task, prefer pure helper/state/style-prop tests over a full React Native surface render."
436
+ : "";
437
+ return (
438
+ "Test harness convergence warning: validation is failing in mock/import setup rather than product behavior." +
439
+ visualPrefix +
440
+ " Do not keep expanding broad shared mocks to rescue an over-scoped component render test. If the repo does not already have stable React Native render-test infrastructure for this surface, replace the full-surface regression with smaller deterministic helper/state coverage and one focused assertion on the behavior-owning API."
441
+ );
442
+ }
443
+
444
+ function buildBroadSharedMockWarning(
445
+ planning: TaskExecutePlanning,
446
+ changedPaths: string[],
447
+ ): string | null {
448
+ const meaningfulChangedPaths = changedPaths.filter(
449
+ (path) => !/(^|\/)(outputs|node_modules|\.worktrees|dist|build|coverage)(\/|$)/i.test(path),
450
+ );
451
+ const broadMockPaths = meaningfulChangedPaths.filter((path) =>
452
+ /(^|\/)(__mocks__|tests\/.*mock|test.*mock|reactNativeMock|setupTests?|jest\.|vitest\.|mock)(\.|\/|$)/i.test(
453
+ path,
454
+ ),
455
+ );
456
+ if (broadMockPaths.length === 0) return null;
457
+ const smallTask =
458
+ planning.riskLevel !== "high" &&
459
+ ((planning.targetPaths?.length ?? 0) <= 2 || planning.acceptanceCriteria.length <= 3);
460
+ if (!smallTask && !planningLooksLikeVisualDerivationTask(planning)) return null;
461
+ const explicitlyRequested = /mock|test harness|react native test|component render/i.test(
462
+ collectPlanningText(planning),
463
+ );
464
+ if (explicitlyRequested) return null;
465
+ return `Broad mock warning: this focused task now changes shared mock/test-harness file(s): ${broadMockPaths
466
+ .slice(0, 6)
467
+ .join(", ")}${
468
+ broadMockPaths.length > 6 ? ", ..." : ""
469
+ }. Before continuing, prefer behavior-owned helper/state tests or existing stable render-test infrastructure; do not add broad React Native mocks for a small visual/control change unless the task explicitly requires harness repair.`;
470
+ }
471
+
329
472
  const TEST_ASSERTION_BALANCE_ISSUE =
330
473
  "Changed test files do not show both positive and negative assertion coverage (expected both).";
331
474
 
@@ -2032,6 +2175,253 @@ function lastBrowserVerifiedStage(text: string): string | null {
2032
2175
  return lastVerified ? toSingleLine(lastVerified, 80) : null;
2033
2176
  }
2034
2177
 
2178
+ function extractBrowserValidationUrl(text: string): string | null {
2179
+ const clean = stripAnsiControlSequences(text);
2180
+ const patterns = [
2181
+ /\b(?:page\s+url|current\s+url|browser\s+url|url)\s*[:=]\s*(https?:\/\/[^\s|"'`<>]+)/i,
2182
+ /\b(?:navigated\s+to|opened|loading)\s+(https?:\/\/[^\s|"'`<>]+)/i,
2183
+ /\b(https?:\/\/(?:127\.0\.0\.1|localhost|0\.0\.0\.0):\d+\/?[^\s|"'`<>]*)/i,
2184
+ ];
2185
+ for (const pattern of patterns) {
2186
+ const match = clean.match(pattern);
2187
+ const url = match?.[1]?.replace(/[),.;]+$/, "").trim();
2188
+ if (url) return toSingleLine(url, 160);
2189
+ }
2190
+ return null;
2191
+ }
2192
+
2193
+ function inferBrowserArtifactKind(path: string): string {
2194
+ if (/\.(?:png|jpe?g|webp)$/i.test(path)) return "screenshot";
2195
+ if (/\.zip$/i.test(path)) return "trace";
2196
+ if (/\.webm$/i.test(path)) return "video";
2197
+ if (/\.(?:log|txt)$/i.test(path)) return "log";
2198
+ if (/\.json$/i.test(path)) return "json";
2199
+ return "artifact";
2200
+ }
2201
+
2202
+ function inferBrowserArtifactStageFromPath(path: string): string | null {
2203
+ const fileName = path.split(/[\\/]/).pop() ?? "";
2204
+ const baseName = fileName.replace(/\.[^.]+$/, "");
2205
+ const candidates = [
2206
+ baseName.match(/^\d+[-_](.+)$/)?.[1],
2207
+ baseName.match(/(?:failure|failed|screenshot|snapshot)[-_](.+)$/i)?.[1],
2208
+ ];
2209
+ const raw = candidates.find((entry) => entry && entry.trim());
2210
+ if (!raw) return null;
2211
+ return toSingleLine(raw.replace(/[-_]+/g, " "), 80);
2212
+ }
2213
+
2214
+ function summarizeBrowserValidationArtifacts(params: {
2215
+ repo?: string;
2216
+ artifacts: string[];
2217
+ context: string;
2218
+ }): string[] {
2219
+ const allArtifacts = mergeBrowserValidationArtifacts(
2220
+ params.artifacts,
2221
+ collectRecentBrowserValidationArtifacts(params.repo),
2222
+ );
2223
+ const out: string[] = [];
2224
+ const contextStage = extractBrowserValidationStage(params.context);
2225
+ const contextSelector = extractBrowserValidationSelector(params.context);
2226
+ const contextUrl = extractBrowserValidationUrl(params.context);
2227
+ const contextLastVerified = lastBrowserVerifiedStage(params.context);
2228
+ for (const artifact of allArtifacts.slice(0, 6)) {
2229
+ const kind = inferBrowserArtifactKind(artifact);
2230
+ let artifactText = "";
2231
+ if (params.repo && !/^(?:\/repo|\/workspace|[A-Za-z]:[\\/])/.test(artifact)) {
2232
+ try {
2233
+ artifactText = readFileSync(resolve(params.repo, artifact), "utf8");
2234
+ } catch {
2235
+ artifactText = "";
2236
+ }
2237
+ } else if (existsSync(artifact) && /\.(?:log|txt|json)$/i.test(artifact)) {
2238
+ try {
2239
+ artifactText = readFileSync(artifact, "utf8");
2240
+ } catch {
2241
+ artifactText = "";
2242
+ }
2243
+ }
2244
+ const artifactContext = artifactText ? stripAnsiControlSequences(artifactText) : "";
2245
+ const stage =
2246
+ inferBrowserArtifactStageFromPath(artifact) ||
2247
+ extractBrowserValidationStage(artifactContext) ||
2248
+ contextStage;
2249
+ const selector = extractBrowserValidationSelector(artifactContext) || contextSelector;
2250
+ const url = extractBrowserValidationUrl(artifactContext) || contextUrl;
2251
+ const lastVerified = lastBrowserVerifiedStage(artifactContext) || contextLastVerified;
2252
+ const detail = [
2253
+ `${artifact} [${kind}]`,
2254
+ stage ? `stage=${stage}` : "",
2255
+ selector ? `selector=${selector}` : "",
2256
+ url ? `url=${url}` : "",
2257
+ lastVerified ? `last_verified=${lastVerified}` : "",
2258
+ ]
2259
+ .filter(Boolean)
2260
+ .join(" ");
2261
+ out.push(toSingleLine(detail, 280));
2262
+ }
2263
+ return out;
2264
+ }
2265
+
2266
+ function browserFailureSuggestedRemedy(packet: BrowserValidationRepairPacket): string {
2267
+ if (packet.failureKind === "assertion") {
2268
+ return [
2269
+ "Read the latest artifact/log/DOM state before editing.",
2270
+ "Preserve already-passing browser stages.",
2271
+ packet.selector
2272
+ ? `Repair or replace the exact failing locator ${packet.selector} with a stable rendered signal for the same UI stage.`
2273
+ : "Repair the exact visible UI assertion or add a stable test id/accessibility label to existing UI.",
2274
+ ].join(" ");
2275
+ }
2276
+ if (packet.failureKind === "startup" || packet.failureKind === "runtime") {
2277
+ return "Treat as browser startup/runtime provisioning; do not rewrite product UI assertions until ValidationGate reaches an assertion stage.";
2278
+ }
2279
+ if (packet.failureKind === "network") {
2280
+ return "Treat as local server/network readiness; add bounded startup diagnostics and avoid changing gameplay/UI behavior.";
2281
+ }
2282
+ return "Inspect captured validation output and repair the current failing stage with the smallest behavior-owning diff.";
2283
+ }
2284
+
2285
+ function normalizeFailureMemoryToken(value: string | null | undefined): string {
2286
+ return toSingleLine(value ?? "", 120).toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
2287
+ }
2288
+
2289
+ export function buildTaskFailureJobFamily(params: Record<string, unknown>): string {
2290
+ const planning = params.planning && typeof params.planning === "object"
2291
+ ? (params.planning as Partial<TaskExecutePlanning>)
2292
+ : {};
2293
+ const autonomy = params.autonomy && typeof params.autonomy === "object"
2294
+ ? (params.autonomy as Record<string, unknown>)
2295
+ : {};
2296
+ const targetHints = [
2297
+ ...(Array.isArray(planning.targetPaths) ? planning.targetPaths : []),
2298
+ ...(Array.isArray(planning.scope?.writeGlobs) ? planning.scope.writeGlobs : []),
2299
+ ...(Array.isArray(planning.validationSteps) ? planning.validationSteps : []),
2300
+ ...(Array.isArray(planning.requiredValidationSteps) ? planning.requiredValidationSteps : []),
2301
+ ]
2302
+ .map((entry) => normalizeFailureMemoryToken(String(entry)))
2303
+ .filter(Boolean)
2304
+ .slice(0, 8);
2305
+ const area = normalizeFailureMemoryToken(String(autonomy.componentArea ?? autonomy.component_area ?? ""));
2306
+ const intent = normalizeFailureMemoryToken(String(planning.intent ?? ""));
2307
+ return [area, intent, ...targetHints].filter(Boolean).join("|") || "general";
2308
+ }
2309
+
2310
+ function browserFailureMemoryKey(jobFamily: string, packet: BrowserValidationRepairPacket): string {
2311
+ return [
2312
+ jobFamily,
2313
+ validationCommandKey(packet.command),
2314
+ packet.failureKind,
2315
+ normalizeFailureMemoryToken(packet.failureFocus),
2316
+ normalizeFailureMemoryToken(packet.stage),
2317
+ normalizeFailureMemoryToken(packet.selector),
2318
+ normalizeFailureMemoryToken(packet.expected),
2319
+ ]
2320
+ .filter(Boolean)
2321
+ .join("|");
2322
+ }
2323
+
2324
+ function resolveFailureMemoryPath(repo: string): string {
2325
+ const rootCandidates = [
2326
+ process.env.PUSHPALS_PROJECT_ROOT_OVERRIDE,
2327
+ process.env.PUSHPALS_REPO_ROOT_OVERRIDE,
2328
+ process.env.PUSHPALS_REPO_PATH,
2329
+ repo,
2330
+ ]
2331
+ .map((entry) => String(entry ?? "").trim())
2332
+ .filter(Boolean);
2333
+ const root = rootCandidates.find((entry) => existsSync(entry)) ?? repo;
2334
+ const gitStatePath = resolveGitStateFilePath(root, "pushpals-worker-failure-memory.json");
2335
+ if (gitStatePath) return gitStatePath;
2336
+ return resolve(root, "outputs", "data", "workerpals-failure-memory.json");
2337
+ }
2338
+
2339
+ function readBrowserFailureMemory(repo: string): BrowserFailureMemoryEntry[] {
2340
+ const memoryPath = resolveFailureMemoryPath(repo);
2341
+ try {
2342
+ const parsed = JSON.parse(readFileSync(memoryPath, "utf8")) as { entries?: unknown };
2343
+ if (!Array.isArray(parsed.entries)) return [];
2344
+ return parsed.entries
2345
+ .filter((entry): entry is BrowserFailureMemoryEntry => Boolean(entry && typeof entry === "object"))
2346
+ .slice(0, 80);
2347
+ } catch {
2348
+ return [];
2349
+ }
2350
+ }
2351
+
2352
+ export function knownFailureHintsForPacket(
2353
+ repo: string,
2354
+ jobFamily: string,
2355
+ packet: BrowserValidationRepairPacket,
2356
+ ): string[] {
2357
+ const entries = readBrowserFailureMemory(repo)
2358
+ .filter((entry) => {
2359
+ if (entry.jobFamily !== jobFamily) return false;
2360
+ if (validationCommandKey(entry.command) !== validationCommandKey(packet.command)) return false;
2361
+ if (entry.failureKind !== packet.failureKind) return false;
2362
+ if (packet.failureFocus && entry.failureFocus && packet.failureFocus !== entry.failureFocus) return false;
2363
+ if (packet.stage && entry.stage && packet.stage !== entry.stage) return false;
2364
+ return true;
2365
+ })
2366
+ .sort((a, b) => b.count - a.count || b.lastSeenAt.localeCompare(a.lastSeenAt))
2367
+ .slice(0, 3);
2368
+ return entries.map((entry) =>
2369
+ toSingleLine(
2370
+ `seen ${entry.count}x before for this repo/job family; last=${entry.lastSeenAt}; focus=${entry.failureFocus ?? entry.stage ?? "unknown"}; remedy=${entry.suggestedRemedy}`,
2371
+ 360,
2372
+ ),
2373
+ );
2374
+ }
2375
+
2376
+ export function recordBrowserFailureMemory(
2377
+ repo: string,
2378
+ jobFamily: string,
2379
+ packet: BrowserValidationRepairPacket,
2380
+ ): void {
2381
+ const memoryPath = resolveFailureMemoryPath(repo);
2382
+ const now = new Date().toISOString();
2383
+ const entries = readBrowserFailureMemory(repo);
2384
+ const key = browserFailureMemoryKey(jobFamily, packet);
2385
+ const existing = entries.find((entry) => entry.key === key);
2386
+ if (existing) {
2387
+ existing.count += 1;
2388
+ existing.lastSeenAt = now;
2389
+ existing.digest = packet.digest;
2390
+ existing.lastVerifiedStage = packet.lastVerifiedStage ?? null;
2391
+ existing.pageUrl = packet.pageUrl ?? null;
2392
+ existing.artifactSummaries = (packet.artifactSummaries ?? []).slice(0, 6);
2393
+ existing.suggestedRemedy = browserFailureSuggestedRemedy(packet);
2394
+ } else {
2395
+ entries.push({
2396
+ key,
2397
+ jobFamily,
2398
+ command: packet.command,
2399
+ failureKind: packet.failureKind,
2400
+ stage: packet.stage,
2401
+ selector: packet.selector,
2402
+ expected: packet.expected,
2403
+ failureFocus: packet.failureFocus,
2404
+ digest: packet.digest,
2405
+ count: 1,
2406
+ firstSeenAt: now,
2407
+ lastSeenAt: now,
2408
+ lastVerifiedStage: packet.lastVerifiedStage ?? null,
2409
+ pageUrl: packet.pageUrl ?? null,
2410
+ artifactSummaries: (packet.artifactSummaries ?? []).slice(0, 6),
2411
+ suggestedRemedy: browserFailureSuggestedRemedy(packet),
2412
+ });
2413
+ }
2414
+ const next = entries
2415
+ .sort((a, b) => b.lastSeenAt.localeCompare(a.lastSeenAt))
2416
+ .slice(0, 80);
2417
+ try {
2418
+ mkdirSync(resolve(memoryPath, ".."), { recursive: true });
2419
+ writeFileSync(memoryPath, `${JSON.stringify({ version: 1, entries: next }, null, 2)}\n`);
2420
+ } catch {
2421
+ // Failure memory is advisory; never fail a worker job because persistence is unavailable.
2422
+ }
2423
+ }
2424
+
2035
2425
  export function extractValidationFailureRetryDigest(
2036
2426
  run: {
2037
2427
  command: string;
@@ -2075,6 +2465,7 @@ export function buildBrowserValidationRepairPacket(
2075
2465
  validationRuns: ValidationExecutionResult[],
2076
2466
  previousFailureDigests: Map<string, string> = new Map(),
2077
2467
  repo?: string,
2468
+ knownFailureHints: string[] = [],
2078
2469
  ): BrowserValidationRepairPacket | null {
2079
2470
  for (const run of validationRuns) {
2080
2471
  if (run.ok || !isLongRunningBrowserValidationCommand(run.command)) continue;
@@ -2091,6 +2482,8 @@ export function buildBrowserValidationRepairPacket(
2091
2482
  const enrichedBrowserContext = [combined, recentLogSummary].filter(Boolean).join("\n");
2092
2483
  const selector = extractBrowserValidationSelector(enrichedBrowserContext);
2093
2484
  const expected = extractBrowserValidationExpectedUi(enrichedBrowserContext);
2485
+ const lastVerifiedStage = lastBrowserVerifiedStage(enrichedBrowserContext);
2486
+ const pageUrl = extractBrowserValidationUrl(enrichedBrowserContext);
2094
2487
  const stage = refineBrowserValidationStage(
2095
2488
  extractBrowserValidationStage(enrichedBrowserContext),
2096
2489
  selector,
@@ -2129,6 +2522,15 @@ export function buildBrowserValidationRepairPacket(
2129
2522
  const needsDiagnosticProbe =
2130
2523
  failureKind === "assertion" &&
2131
2524
  sameFailureSignal;
2525
+ const artifacts = mergeBrowserValidationArtifacts(
2526
+ extractBrowserValidationArtifacts(combined),
2527
+ collectRecentBrowserValidationArtifacts(repo),
2528
+ );
2529
+ const artifactSummaries = summarizeBrowserValidationArtifacts({
2530
+ repo,
2531
+ artifacts,
2532
+ context: enrichedBrowserContext,
2533
+ });
2132
2534
  return {
2133
2535
  command: run.command,
2134
2536
  failureKind,
@@ -2136,6 +2538,8 @@ export function buildBrowserValidationRepairPacket(
2136
2538
  selector,
2137
2539
  expected,
2138
2540
  failureFocus,
2541
+ lastVerifiedStage,
2542
+ pageUrl,
2139
2543
  digest,
2140
2544
  previousDigest,
2141
2545
  previousStage,
@@ -2144,10 +2548,10 @@ export function buildBrowserValidationRepairPacket(
2144
2548
  previousFailureFocus,
2145
2549
  progress,
2146
2550
  needsDiagnosticProbe,
2147
- artifacts: mergeBrowserValidationArtifacts(
2148
- extractBrowserValidationArtifacts(combined),
2149
- collectRecentBrowserValidationArtifacts(repo),
2150
- ),
2551
+ mustReadArtifactsBeforeEdit: failureKind === "assertion",
2552
+ artifacts,
2553
+ artifactSummaries,
2554
+ knownFailureHints: knownFailureHints.slice(0, 3),
2151
2555
  output: [
2152
2556
  summarizeBrowserValidationOutput(combined) || digest,
2153
2557
  recentLogSummary,
@@ -3204,10 +3608,32 @@ export function buildQualityRevisionHint(
3204
3608
  validationRuns: ValidationExecutionResult[] = [],
3205
3609
  validationBlocker: ValidationBlocker | null = null,
3206
3610
  browserRepairPacket: BrowserValidationRepairPacket | null = null,
3611
+ changedPaths: string[] = [],
3207
3612
  ): string {
3208
3613
  const lines: string[] = [];
3209
3614
  lines.push("Quality revision required before completion.");
3210
3615
  const focusedBrowserRepair = Boolean(browserRepairPacket);
3616
+ lines.push(
3617
+ "Worker phase contract: (1) discovering - inspect only the relevant files/artifacts and name the current hypothesis; (2) editing - make the smallest behavior-owning patch; (3) focused validation - run targeted fast checks; (4) full validation - let PushPals ValidationGate own long required checks unless a single local confirmation is explicitly useful; (5) final diff review - verify changed files are necessary and no unrelated churn remains.",
3618
+ );
3619
+ const diffBudgetWarning = buildDiffBudgetWarning(planning, changedPaths, focusedBrowserRepair);
3620
+ if (diffBudgetWarning) lines.push(diffBudgetWarning);
3621
+ const broadSharedMockWarning = buildBroadSharedMockWarning(planning, changedPaths);
3622
+ if (broadSharedMockWarning) lines.push(broadSharedMockWarning);
3623
+ const testHarnessConvergenceWarning = buildTestHarnessConvergenceWarning(
3624
+ planning,
3625
+ issues,
3626
+ validationRuns,
3627
+ );
3628
+ if (testHarnessConvergenceWarning) lines.push(testHarnessConvergenceWarning);
3629
+ if (planningLooksLikeVisualDerivationTask(planning)) {
3630
+ lines.push(
3631
+ "Visual derivation testing rule: prefer pure helper/state/style-prop tests for planet/projectile/ownership/readability cues. Only add a full React Native render regression when this repo already has a stable harness for that exact surface; otherwise keep render-visible behavior covered through the derived inputs that drive it.",
3632
+ );
3633
+ }
3634
+ lines.push(
3635
+ "Phase soft-budget reminder: if discovery, test-harness setup, or validation repair is running long, reduce the approach before spending more time. Small/medium tasks should converge toward a useful patch within roughly 20 minutes.",
3636
+ );
3211
3637
  const validationAlreadyPassed =
3212
3638
  validationRuns.length > 0 && validationRuns.every((run) => run.ok);
3213
3639
  if (validationAlreadyPassed && !focusedBrowserRepair) {
@@ -3232,6 +3658,12 @@ export function buildQualityRevisionHint(
3232
3658
  if (browserRepairPacket.failureFocus) {
3233
3659
  lines.push(`- Failure focus: ${browserRepairPacket.failureFocus}`);
3234
3660
  }
3661
+ if (browserRepairPacket.lastVerifiedStage) {
3662
+ lines.push(`- Last verified browser checkpoint: ${browserRepairPacket.lastVerifiedStage}`);
3663
+ }
3664
+ if (browserRepairPacket.pageUrl) {
3665
+ lines.push(`- Browser URL at failure: ${browserRepairPacket.pageUrl}`);
3666
+ }
3235
3667
  if (browserRepairPacket.expected) {
3236
3668
  lines.push(`- Expected UI: ${browserRepairPacket.expected}`);
3237
3669
  }
@@ -3248,6 +3680,18 @@ export function buildQualityRevisionHint(
3248
3680
  "- Failure artifacts: none were captured in command output; if this repo writes screenshots/traces, inspect the latest browser failure artifact before changing selectors.",
3249
3681
  );
3250
3682
  }
3683
+ if ((browserRepairPacket.artifactSummaries ?? []).length > 0) {
3684
+ lines.push("Latest browser artifact summaries:");
3685
+ for (const artifactSummary of browserRepairPacket.artifactSummaries ?? []) {
3686
+ lines.push(`- ${artifactSummary}`);
3687
+ }
3688
+ }
3689
+ if ((browserRepairPacket.knownFailureHints ?? []).length > 0) {
3690
+ lines.push("Known issue/remedy memory for this repo/job family:");
3691
+ for (const hint of browserRepairPacket.knownFailureHints ?? []) {
3692
+ lines.push(`- ${hint}`);
3693
+ }
3694
+ }
3251
3695
  if (browserRepairPacket.digest) {
3252
3696
  lines.push(`- Current failure: ${browserRepairPacket.digest}`);
3253
3697
  }
@@ -3276,6 +3720,11 @@ export function buildQualityRevisionHint(
3276
3720
  } else {
3277
3721
  lines.push("- Breadcrumb: first captured failure for this command in this revision loop");
3278
3722
  }
3723
+ if (browserRepairPacket.mustReadArtifactsBeforeEdit) {
3724
+ lines.push(
3725
+ "- Diagnostic artifact read requirement: before editing, explicitly inspect the listed latest artifact/log/DOM summary for the failing stage. If the artifacts are missing, stale, or stop before the failing locator, add a tiny temporary diagnostic/log for locator counts, visible text, URL, and nearby DOM/test-id state before changing product code or selectors.",
3726
+ );
3727
+ }
3279
3728
  if (browserRepairPacket.needsDiagnosticProbe) {
3280
3729
  lines.push(
3281
3730
  "- Convergence mode: diagnostic-first repair. This same browser focus failed in the previous revision, so do not guess another selector or rewrite a different stage.",
@@ -3457,7 +3906,7 @@ export function buildQualityRevisionHint(
3457
3906
  for (const step of planning.requiredValidationSteps ?? []) lines.push(`- ${step}`);
3458
3907
  }
3459
3908
  lines.push("Apply a minimal corrective patch, run focused validation, then finish.");
3460
- return lines.join("\n").slice(0, 6000);
3909
+ return lines.join("\n").slice(0, 8000);
3461
3910
  }
3462
3911
 
3463
3912
  function inferTargetPathFromInstruction(text: string): string | null {
@@ -6214,6 +6663,7 @@ export async function executeJob(
6214
6663
  let revisionAttempt = 0;
6215
6664
  let revisionHint = "";
6216
6665
  const previousValidationFailureDigests = new Map<string, string>();
6666
+ const failureJobFamily = buildTaskFailureJobFamily(normalizedParams);
6217
6667
  while (revisionAttempt <= qualityRevisionLoopMax) {
6218
6668
  const attemptParams: Record<string, unknown> = { ...normalizedParams };
6219
6669
  if (revisionHint) {
@@ -6302,6 +6752,53 @@ export async function executeJob(
6302
6752
  };
6303
6753
  }
6304
6754
 
6755
+ const preQualityStatus = await git(repo, ["status", "--porcelain"]);
6756
+ const preQualityChangedPaths = preQualityStatus.ok
6757
+ ? parseChangedPathsFromStatus(preQualityStatus.stdout)
6758
+ : [];
6759
+ const preQualityPublishablePaths = publishableChangedPaths(preQualityChangedPaths);
6760
+ const executorText = `${result.summary ?? ""}\n${result.stdout ?? ""}\n${result.stderr ?? ""}`;
6761
+ const shellWrapperReturn =
6762
+ /shell-wrapper command rejections|command-router shell-wrapper|command policy rejection/i.test(
6763
+ executorText,
6764
+ );
6765
+ if (preQualityChangedPaths.length > 0 && preQualityPublishablePaths.length === 0) {
6766
+ const detail = `Executor changed only non-publishable dependency/runtime artifact path(s): ${preQualityChangedPaths
6767
+ .slice(0, 12)
6768
+ .join(", ")}${preQualityChangedPaths.length > 12 ? ", ..." : ""}.`;
6769
+ onLog?.(
6770
+ "stderr",
6771
+ `[QualityGate] ${detail} Skipping ValidationGate/CriticGate because there is no PR-worthy patch to validate.`,
6772
+ );
6773
+ return {
6774
+ ok: false,
6775
+ summary: "Executor produced no publishable code changes",
6776
+ stdout: result.stdout,
6777
+ stderr: [result.stderr ?? "", detail].filter(Boolean).join("\n"),
6778
+ exitCode: 4,
6779
+ };
6780
+ }
6781
+ if (
6782
+ preQualityPublishablePaths.length === 0 &&
6783
+ (qualityGatePolicy.mode === "review_fix" || shellWrapperReturn)
6784
+ ) {
6785
+ const reason =
6786
+ qualityGatePolicy.mode === "review_fix"
6787
+ ? "Review-fix executor returned without publishable code changes."
6788
+ : "Codex hit shell-wrapper command rejections without leaving a publishable patch.";
6789
+ onLog?.(
6790
+ "stderr",
6791
+ `[QualityGate] ${reason} Skipping ValidationGate/CriticGate and failing fast.`,
6792
+ );
6793
+ return {
6794
+ ok: false,
6795
+ summary: reason,
6796
+ stdout: result.stdout,
6797
+ stderr: [result.stderr ?? "", reason].filter(Boolean).join("\n"),
6798
+ exitCode: 4,
6799
+ };
6800
+ }
6801
+
6305
6802
  const quality = await runDeterministicQualityGate(
6306
6803
  repo,
6307
6804
  attemptParams,
@@ -6313,11 +6810,19 @@ export async function executeJob(
6313
6810
  revisionAttempt,
6314
6811
  },
6315
6812
  );
6316
- const browserRepairPacket = buildBrowserValidationRepairPacket(
6813
+ let browserRepairPacket = buildBrowserValidationRepairPacket(
6317
6814
  quality.validationRuns,
6318
6815
  previousValidationFailureDigests,
6319
6816
  repo,
6320
6817
  );
6818
+ if (browserRepairPacket) {
6819
+ const knownFailureHints = knownFailureHintsForPacket(repo, failureJobFamily, browserRepairPacket);
6820
+ browserRepairPacket = {
6821
+ ...browserRepairPacket,
6822
+ knownFailureHints,
6823
+ };
6824
+ recordBrowserFailureMemory(repo, failureJobFamily, browserRepairPacket);
6825
+ }
6321
6826
  for (const run of quality.validationRuns) {
6322
6827
  if (run.ok) continue;
6323
6828
  const digest = extractValidationFailureRetryDigest(run, repo);
@@ -6592,6 +7097,7 @@ export async function executeJob(
6592
7097
  validationOutsideTaskScope ? [] : quality.validationRuns,
6593
7098
  validationOutsideTaskScope ? null : quality.blocker,
6594
7099
  validationOutsideTaskScope ? null : browserRepairPacket,
7100
+ quality.changedPaths,
6595
7101
  );
6596
7102
  onLog?.(
6597
7103
  "stderr",