@pushpalsdev/cli 1.1.9 → 1.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +107 -9
- package/package.json +1 -1
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +6 -1
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +126 -0
- package/runtime/sandbox/apps/workerpals/src/backends/shared/executor_base.py +177 -0
- package/runtime/sandbox/apps/workerpals/src/docker_executor.ts +8 -7
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +513 -7
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +168 -41
|
@@ -6,11 +6,13 @@
|
|
|
6
6
|
import {
|
|
7
7
|
existsSync,
|
|
8
8
|
lstatSync,
|
|
9
|
+
mkdirSync,
|
|
9
10
|
readdirSync,
|
|
10
11
|
readFileSync,
|
|
11
12
|
renameSync,
|
|
12
13
|
rmSync,
|
|
13
14
|
unlinkSync,
|
|
15
|
+
writeFileSync,
|
|
14
16
|
} from "fs";
|
|
15
17
|
import { resolve } from "path";
|
|
16
18
|
import {
|
|
@@ -24,6 +26,7 @@ import {
|
|
|
24
26
|
matchesGlob,
|
|
25
27
|
normalizeTargetPath,
|
|
26
28
|
requirementsForValidationCommand,
|
|
29
|
+
resolveGitStateFilePath,
|
|
27
30
|
sanitizeSourceControlIdentityField,
|
|
28
31
|
type SourceControlCommitIdentity,
|
|
29
32
|
type ToolRequirement,
|
|
@@ -93,6 +96,8 @@ export interface BrowserValidationRepairPacket {
|
|
|
93
96
|
selector: string | null;
|
|
94
97
|
expected: string | null;
|
|
95
98
|
failureFocus: string | null;
|
|
99
|
+
lastVerifiedStage?: string | null;
|
|
100
|
+
pageUrl?: string | null;
|
|
96
101
|
digest: string;
|
|
97
102
|
previousDigest: string | null;
|
|
98
103
|
previousStage: string | null;
|
|
@@ -101,10 +106,32 @@ export interface BrowserValidationRepairPacket {
|
|
|
101
106
|
previousFailureFocus: string | null;
|
|
102
107
|
progress: "first_failure" | "same_failure" | "new_failure";
|
|
103
108
|
needsDiagnosticProbe: boolean;
|
|
109
|
+
mustReadArtifactsBeforeEdit?: boolean;
|
|
104
110
|
artifacts: string[];
|
|
111
|
+
artifactSummaries?: string[];
|
|
112
|
+
knownFailureHints?: string[];
|
|
105
113
|
output: string;
|
|
106
114
|
}
|
|
107
115
|
|
|
116
|
+
interface BrowserFailureMemoryEntry {
|
|
117
|
+
key: string;
|
|
118
|
+
jobFamily: string;
|
|
119
|
+
command: string;
|
|
120
|
+
failureKind: BrowserValidationFailureKind;
|
|
121
|
+
stage: string | null;
|
|
122
|
+
selector: string | null;
|
|
123
|
+
expected: string | null;
|
|
124
|
+
failureFocus: string | null;
|
|
125
|
+
digest: string;
|
|
126
|
+
count: number;
|
|
127
|
+
firstSeenAt: string;
|
|
128
|
+
lastSeenAt: string;
|
|
129
|
+
lastVerifiedStage: string | null;
|
|
130
|
+
pageUrl: string | null;
|
|
131
|
+
artifactSummaries: string[];
|
|
132
|
+
suggestedRemedy: string;
|
|
133
|
+
}
|
|
134
|
+
|
|
108
135
|
interface DeterministicQualityResult {
|
|
109
136
|
ok: boolean;
|
|
110
137
|
skipped: boolean;
|
|
@@ -149,7 +176,7 @@ export interface QualityGatePolicy {
|
|
|
149
176
|
criticMinScore: number;
|
|
150
177
|
}
|
|
151
178
|
|
|
152
|
-
const BROWSER_VALIDATION_MAX_AUTO_REVISIONS =
|
|
179
|
+
const BROWSER_VALIDATION_MAX_AUTO_REVISIONS = 3;
|
|
153
180
|
|
|
154
181
|
export function qualityRevisionLoopUpperBound(policy: {
|
|
155
182
|
maxAutoRevisions: number;
|
|
@@ -326,6 +353,122 @@ export function buildQualityGateRevisionIssues(
|
|
|
326
353
|
return [...new Set(merged)];
|
|
327
354
|
}
|
|
328
355
|
|
|
356
|
+
function buildDiffBudgetWarning(
|
|
357
|
+
planning: TaskExecutePlanning,
|
|
358
|
+
changedPaths: string[],
|
|
359
|
+
focusedBrowserRepair: boolean,
|
|
360
|
+
): string | null {
|
|
361
|
+
const meaningfulChangedPaths = changedPaths.filter(
|
|
362
|
+
(path) => !/(^|\/)(outputs|node_modules|\.worktrees|dist|build|coverage)(\/|$)/i.test(path),
|
|
363
|
+
);
|
|
364
|
+
if (meaningfulChangedPaths.length === 0) return null;
|
|
365
|
+
const explicitBudget = Number(planning.scope.maxFilesToEdit);
|
|
366
|
+
const hasExplicitBudget = Number.isFinite(explicitBudget) && explicitBudget > 0;
|
|
367
|
+
const smallTask =
|
|
368
|
+
focusedBrowserRepair ||
|
|
369
|
+
(planning.riskLevel !== "high" &&
|
|
370
|
+
(planning.targetPaths?.length ?? 0) <= 2 &&
|
|
371
|
+
planning.acceptanceCriteria.length <= 3);
|
|
372
|
+
const budget = hasExplicitBudget ? Math.floor(explicitBudget) : smallTask ? 5 : 10;
|
|
373
|
+
if (meaningfulChangedPaths.length <= budget) return null;
|
|
374
|
+
return `Diff budget warning: this task now changes ${meaningfulChangedPaths.length} file(s), above the ${budget}-file ${
|
|
375
|
+
hasExplicitBudget ? "planning.scope.maxFilesToEdit" : smallTask ? "small-task" : "default"
|
|
376
|
+
} budget. Before editing more, remove unrelated churn and keep only behavior-owning files needed for the current repair. Changed files: ${meaningfulChangedPaths
|
|
377
|
+
.slice(0, 12)
|
|
378
|
+
.join(", ")}${meaningfulChangedPaths.length > 12 ? ", ..." : ""}`;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
function isNonPublishableArtifactPath(path: string): boolean {
|
|
382
|
+
return /(^|\/)(outputs|node_modules|\.worktrees|\.codex|dist|build|coverage)(\/|$)/i.test(
|
|
383
|
+
path.replace(/\\/g, "/"),
|
|
384
|
+
);
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
export function publishableChangedPaths(changedPaths: string[]): string[] {
|
|
388
|
+
return changedPaths.filter((path) => !isNonPublishableArtifactPath(path));
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
function collectPlanningText(planning: TaskExecutePlanning): string {
|
|
392
|
+
return [
|
|
393
|
+
planning.intent,
|
|
394
|
+
planning.riskLevel,
|
|
395
|
+
...(planning.targetPaths ?? []),
|
|
396
|
+
...(planning.acceptanceCriteria ?? []),
|
|
397
|
+
...(planning.validationSteps ?? []),
|
|
398
|
+
...(planning.requiredValidationSteps ?? []),
|
|
399
|
+
...(planning.discovery?.keywords ?? []),
|
|
400
|
+
...(planning.discovery?.likelyDirs ?? []),
|
|
401
|
+
...(planning.discovery?.ripgrepQueries ?? []),
|
|
402
|
+
]
|
|
403
|
+
.map((part) => String(part ?? ""))
|
|
404
|
+
.join("\n")
|
|
405
|
+
.toLowerCase();
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
function planningLooksLikeVisualDerivationTask(planning: TaskExecutePlanning): boolean {
|
|
409
|
+
const text = collectPlanningText(planning);
|
|
410
|
+
return /\b(visual|readability|battlefield|render(?:ing)?|projectile|planet|ship|ring|danger|threat|ownership|dense action|style|ui surface)\b/i.test(
|
|
411
|
+
text,
|
|
412
|
+
);
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
function buildTestHarnessConvergenceWarning(
|
|
416
|
+
planning: TaskExecutePlanning,
|
|
417
|
+
issues: string[],
|
|
418
|
+
validationRuns: ValidationExecutionResult[],
|
|
419
|
+
): string | null {
|
|
420
|
+
const combined = [
|
|
421
|
+
...issues,
|
|
422
|
+
...validationRuns.flatMap((run) => [run.command, run.stdout, run.stderr]),
|
|
423
|
+
]
|
|
424
|
+
.map((part) => String(part ?? ""))
|
|
425
|
+
.join("\n");
|
|
426
|
+
const hasMockImportFailure =
|
|
427
|
+
/\bCannot find module\b|\bdoes not provide an export\b|\bno exported member\b|\bimport error\b|\bundefined is not a function\b/i.test(
|
|
428
|
+
combined,
|
|
429
|
+
) &&
|
|
430
|
+
/\b(react[- ]native|reactNativeMock|Animated\.View|expo-secure-store|SettingsContext|skin validator|mock|test helper|__mocks__)\b/i.test(
|
|
431
|
+
combined,
|
|
432
|
+
);
|
|
433
|
+
if (!hasMockImportFailure) return null;
|
|
434
|
+
const visualPrefix = planningLooksLikeVisualDerivationTask(planning)
|
|
435
|
+
? " For this visual/rendering task, prefer pure helper/state/style-prop tests over a full React Native surface render."
|
|
436
|
+
: "";
|
|
437
|
+
return (
|
|
438
|
+
"Test harness convergence warning: validation is failing in mock/import setup rather than product behavior." +
|
|
439
|
+
visualPrefix +
|
|
440
|
+
" Do not keep expanding broad shared mocks to rescue an over-scoped component render test. If the repo does not already have stable React Native render-test infrastructure for this surface, replace the full-surface regression with smaller deterministic helper/state coverage and one focused assertion on the behavior-owning API."
|
|
441
|
+
);
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
function buildBroadSharedMockWarning(
|
|
445
|
+
planning: TaskExecutePlanning,
|
|
446
|
+
changedPaths: string[],
|
|
447
|
+
): string | null {
|
|
448
|
+
const meaningfulChangedPaths = changedPaths.filter(
|
|
449
|
+
(path) => !/(^|\/)(outputs|node_modules|\.worktrees|dist|build|coverage)(\/|$)/i.test(path),
|
|
450
|
+
);
|
|
451
|
+
const broadMockPaths = meaningfulChangedPaths.filter((path) =>
|
|
452
|
+
/(^|\/)(__mocks__|tests\/.*mock|test.*mock|reactNativeMock|setupTests?|jest\.|vitest\.|mock)(\.|\/|$)/i.test(
|
|
453
|
+
path,
|
|
454
|
+
),
|
|
455
|
+
);
|
|
456
|
+
if (broadMockPaths.length === 0) return null;
|
|
457
|
+
const smallTask =
|
|
458
|
+
planning.riskLevel !== "high" &&
|
|
459
|
+
((planning.targetPaths?.length ?? 0) <= 2 || planning.acceptanceCriteria.length <= 3);
|
|
460
|
+
if (!smallTask && !planningLooksLikeVisualDerivationTask(planning)) return null;
|
|
461
|
+
const explicitlyRequested = /mock|test harness|react native test|component render/i.test(
|
|
462
|
+
collectPlanningText(planning),
|
|
463
|
+
);
|
|
464
|
+
if (explicitlyRequested) return null;
|
|
465
|
+
return `Broad mock warning: this focused task now changes shared mock/test-harness file(s): ${broadMockPaths
|
|
466
|
+
.slice(0, 6)
|
|
467
|
+
.join(", ")}${
|
|
468
|
+
broadMockPaths.length > 6 ? ", ..." : ""
|
|
469
|
+
}. Before continuing, prefer behavior-owned helper/state tests or existing stable render-test infrastructure; do not add broad React Native mocks for a small visual/control change unless the task explicitly requires harness repair.`;
|
|
470
|
+
}
|
|
471
|
+
|
|
329
472
|
const TEST_ASSERTION_BALANCE_ISSUE =
|
|
330
473
|
"Changed test files do not show both positive and negative assertion coverage (expected both).";
|
|
331
474
|
|
|
@@ -2032,6 +2175,253 @@ function lastBrowserVerifiedStage(text: string): string | null {
|
|
|
2032
2175
|
return lastVerified ? toSingleLine(lastVerified, 80) : null;
|
|
2033
2176
|
}
|
|
2034
2177
|
|
|
2178
|
+
function extractBrowserValidationUrl(text: string): string | null {
|
|
2179
|
+
const clean = stripAnsiControlSequences(text);
|
|
2180
|
+
const patterns = [
|
|
2181
|
+
/\b(?:page\s+url|current\s+url|browser\s+url|url)\s*[:=]\s*(https?:\/\/[^\s|"'`<>]+)/i,
|
|
2182
|
+
/\b(?:navigated\s+to|opened|loading)\s+(https?:\/\/[^\s|"'`<>]+)/i,
|
|
2183
|
+
/\b(https?:\/\/(?:127\.0\.0\.1|localhost|0\.0\.0\.0):\d+\/?[^\s|"'`<>]*)/i,
|
|
2184
|
+
];
|
|
2185
|
+
for (const pattern of patterns) {
|
|
2186
|
+
const match = clean.match(pattern);
|
|
2187
|
+
const url = match?.[1]?.replace(/[),.;]+$/, "").trim();
|
|
2188
|
+
if (url) return toSingleLine(url, 160);
|
|
2189
|
+
}
|
|
2190
|
+
return null;
|
|
2191
|
+
}
|
|
2192
|
+
|
|
2193
|
+
function inferBrowserArtifactKind(path: string): string {
|
|
2194
|
+
if (/\.(?:png|jpe?g|webp)$/i.test(path)) return "screenshot";
|
|
2195
|
+
if (/\.zip$/i.test(path)) return "trace";
|
|
2196
|
+
if (/\.webm$/i.test(path)) return "video";
|
|
2197
|
+
if (/\.(?:log|txt)$/i.test(path)) return "log";
|
|
2198
|
+
if (/\.json$/i.test(path)) return "json";
|
|
2199
|
+
return "artifact";
|
|
2200
|
+
}
|
|
2201
|
+
|
|
2202
|
+
function inferBrowserArtifactStageFromPath(path: string): string | null {
|
|
2203
|
+
const fileName = path.split(/[\\/]/).pop() ?? "";
|
|
2204
|
+
const baseName = fileName.replace(/\.[^.]+$/, "");
|
|
2205
|
+
const candidates = [
|
|
2206
|
+
baseName.match(/^\d+[-_](.+)$/)?.[1],
|
|
2207
|
+
baseName.match(/(?:failure|failed|screenshot|snapshot)[-_](.+)$/i)?.[1],
|
|
2208
|
+
];
|
|
2209
|
+
const raw = candidates.find((entry) => entry && entry.trim());
|
|
2210
|
+
if (!raw) return null;
|
|
2211
|
+
return toSingleLine(raw.replace(/[-_]+/g, " "), 80);
|
|
2212
|
+
}
|
|
2213
|
+
|
|
2214
|
+
function summarizeBrowserValidationArtifacts(params: {
|
|
2215
|
+
repo?: string;
|
|
2216
|
+
artifacts: string[];
|
|
2217
|
+
context: string;
|
|
2218
|
+
}): string[] {
|
|
2219
|
+
const allArtifacts = mergeBrowserValidationArtifacts(
|
|
2220
|
+
params.artifacts,
|
|
2221
|
+
collectRecentBrowserValidationArtifacts(params.repo),
|
|
2222
|
+
);
|
|
2223
|
+
const out: string[] = [];
|
|
2224
|
+
const contextStage = extractBrowserValidationStage(params.context);
|
|
2225
|
+
const contextSelector = extractBrowserValidationSelector(params.context);
|
|
2226
|
+
const contextUrl = extractBrowserValidationUrl(params.context);
|
|
2227
|
+
const contextLastVerified = lastBrowserVerifiedStage(params.context);
|
|
2228
|
+
for (const artifact of allArtifacts.slice(0, 6)) {
|
|
2229
|
+
const kind = inferBrowserArtifactKind(artifact);
|
|
2230
|
+
let artifactText = "";
|
|
2231
|
+
if (params.repo && !/^(?:\/repo|\/workspace|[A-Za-z]:[\\/])/.test(artifact)) {
|
|
2232
|
+
try {
|
|
2233
|
+
artifactText = readFileSync(resolve(params.repo, artifact), "utf8");
|
|
2234
|
+
} catch {
|
|
2235
|
+
artifactText = "";
|
|
2236
|
+
}
|
|
2237
|
+
} else if (existsSync(artifact) && /\.(?:log|txt|json)$/i.test(artifact)) {
|
|
2238
|
+
try {
|
|
2239
|
+
artifactText = readFileSync(artifact, "utf8");
|
|
2240
|
+
} catch {
|
|
2241
|
+
artifactText = "";
|
|
2242
|
+
}
|
|
2243
|
+
}
|
|
2244
|
+
const artifactContext = artifactText ? stripAnsiControlSequences(artifactText) : "";
|
|
2245
|
+
const stage =
|
|
2246
|
+
inferBrowserArtifactStageFromPath(artifact) ||
|
|
2247
|
+
extractBrowserValidationStage(artifactContext) ||
|
|
2248
|
+
contextStage;
|
|
2249
|
+
const selector = extractBrowserValidationSelector(artifactContext) || contextSelector;
|
|
2250
|
+
const url = extractBrowserValidationUrl(artifactContext) || contextUrl;
|
|
2251
|
+
const lastVerified = lastBrowserVerifiedStage(artifactContext) || contextLastVerified;
|
|
2252
|
+
const detail = [
|
|
2253
|
+
`${artifact} [${kind}]`,
|
|
2254
|
+
stage ? `stage=${stage}` : "",
|
|
2255
|
+
selector ? `selector=${selector}` : "",
|
|
2256
|
+
url ? `url=${url}` : "",
|
|
2257
|
+
lastVerified ? `last_verified=${lastVerified}` : "",
|
|
2258
|
+
]
|
|
2259
|
+
.filter(Boolean)
|
|
2260
|
+
.join(" ");
|
|
2261
|
+
out.push(toSingleLine(detail, 280));
|
|
2262
|
+
}
|
|
2263
|
+
return out;
|
|
2264
|
+
}
|
|
2265
|
+
|
|
2266
|
+
function browserFailureSuggestedRemedy(packet: BrowserValidationRepairPacket): string {
|
|
2267
|
+
if (packet.failureKind === "assertion") {
|
|
2268
|
+
return [
|
|
2269
|
+
"Read the latest artifact/log/DOM state before editing.",
|
|
2270
|
+
"Preserve already-passing browser stages.",
|
|
2271
|
+
packet.selector
|
|
2272
|
+
? `Repair or replace the exact failing locator ${packet.selector} with a stable rendered signal for the same UI stage.`
|
|
2273
|
+
: "Repair the exact visible UI assertion or add a stable test id/accessibility label to existing UI.",
|
|
2274
|
+
].join(" ");
|
|
2275
|
+
}
|
|
2276
|
+
if (packet.failureKind === "startup" || packet.failureKind === "runtime") {
|
|
2277
|
+
return "Treat as browser startup/runtime provisioning; do not rewrite product UI assertions until ValidationGate reaches an assertion stage.";
|
|
2278
|
+
}
|
|
2279
|
+
if (packet.failureKind === "network") {
|
|
2280
|
+
return "Treat as local server/network readiness; add bounded startup diagnostics and avoid changing gameplay/UI behavior.";
|
|
2281
|
+
}
|
|
2282
|
+
return "Inspect captured validation output and repair the current failing stage with the smallest behavior-owning diff.";
|
|
2283
|
+
}
|
|
2284
|
+
|
|
2285
|
+
function normalizeFailureMemoryToken(value: string | null | undefined): string {
|
|
2286
|
+
return toSingleLine(value ?? "", 120).toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
|
|
2287
|
+
}
|
|
2288
|
+
|
|
2289
|
+
export function buildTaskFailureJobFamily(params: Record<string, unknown>): string {
|
|
2290
|
+
const planning = params.planning && typeof params.planning === "object"
|
|
2291
|
+
? (params.planning as Partial<TaskExecutePlanning>)
|
|
2292
|
+
: {};
|
|
2293
|
+
const autonomy = params.autonomy && typeof params.autonomy === "object"
|
|
2294
|
+
? (params.autonomy as Record<string, unknown>)
|
|
2295
|
+
: {};
|
|
2296
|
+
const targetHints = [
|
|
2297
|
+
...(Array.isArray(planning.targetPaths) ? planning.targetPaths : []),
|
|
2298
|
+
...(Array.isArray(planning.scope?.writeGlobs) ? planning.scope.writeGlobs : []),
|
|
2299
|
+
...(Array.isArray(planning.validationSteps) ? planning.validationSteps : []),
|
|
2300
|
+
...(Array.isArray(planning.requiredValidationSteps) ? planning.requiredValidationSteps : []),
|
|
2301
|
+
]
|
|
2302
|
+
.map((entry) => normalizeFailureMemoryToken(String(entry)))
|
|
2303
|
+
.filter(Boolean)
|
|
2304
|
+
.slice(0, 8);
|
|
2305
|
+
const area = normalizeFailureMemoryToken(String(autonomy.componentArea ?? autonomy.component_area ?? ""));
|
|
2306
|
+
const intent = normalizeFailureMemoryToken(String(planning.intent ?? ""));
|
|
2307
|
+
return [area, intent, ...targetHints].filter(Boolean).join("|") || "general";
|
|
2308
|
+
}
|
|
2309
|
+
|
|
2310
|
+
function browserFailureMemoryKey(jobFamily: string, packet: BrowserValidationRepairPacket): string {
|
|
2311
|
+
return [
|
|
2312
|
+
jobFamily,
|
|
2313
|
+
validationCommandKey(packet.command),
|
|
2314
|
+
packet.failureKind,
|
|
2315
|
+
normalizeFailureMemoryToken(packet.failureFocus),
|
|
2316
|
+
normalizeFailureMemoryToken(packet.stage),
|
|
2317
|
+
normalizeFailureMemoryToken(packet.selector),
|
|
2318
|
+
normalizeFailureMemoryToken(packet.expected),
|
|
2319
|
+
]
|
|
2320
|
+
.filter(Boolean)
|
|
2321
|
+
.join("|");
|
|
2322
|
+
}
|
|
2323
|
+
|
|
2324
|
+
function resolveFailureMemoryPath(repo: string): string {
|
|
2325
|
+
const rootCandidates = [
|
|
2326
|
+
process.env.PUSHPALS_PROJECT_ROOT_OVERRIDE,
|
|
2327
|
+
process.env.PUSHPALS_REPO_ROOT_OVERRIDE,
|
|
2328
|
+
process.env.PUSHPALS_REPO_PATH,
|
|
2329
|
+
repo,
|
|
2330
|
+
]
|
|
2331
|
+
.map((entry) => String(entry ?? "").trim())
|
|
2332
|
+
.filter(Boolean);
|
|
2333
|
+
const root = rootCandidates.find((entry) => existsSync(entry)) ?? repo;
|
|
2334
|
+
const gitStatePath = resolveGitStateFilePath(root, "pushpals-worker-failure-memory.json");
|
|
2335
|
+
if (gitStatePath) return gitStatePath;
|
|
2336
|
+
return resolve(root, "outputs", "data", "workerpals-failure-memory.json");
|
|
2337
|
+
}
|
|
2338
|
+
|
|
2339
|
+
function readBrowserFailureMemory(repo: string): BrowserFailureMemoryEntry[] {
|
|
2340
|
+
const memoryPath = resolveFailureMemoryPath(repo);
|
|
2341
|
+
try {
|
|
2342
|
+
const parsed = JSON.parse(readFileSync(memoryPath, "utf8")) as { entries?: unknown };
|
|
2343
|
+
if (!Array.isArray(parsed.entries)) return [];
|
|
2344
|
+
return parsed.entries
|
|
2345
|
+
.filter((entry): entry is BrowserFailureMemoryEntry => Boolean(entry && typeof entry === "object"))
|
|
2346
|
+
.slice(0, 80);
|
|
2347
|
+
} catch {
|
|
2348
|
+
return [];
|
|
2349
|
+
}
|
|
2350
|
+
}
|
|
2351
|
+
|
|
2352
|
+
export function knownFailureHintsForPacket(
|
|
2353
|
+
repo: string,
|
|
2354
|
+
jobFamily: string,
|
|
2355
|
+
packet: BrowserValidationRepairPacket,
|
|
2356
|
+
): string[] {
|
|
2357
|
+
const entries = readBrowserFailureMemory(repo)
|
|
2358
|
+
.filter((entry) => {
|
|
2359
|
+
if (entry.jobFamily !== jobFamily) return false;
|
|
2360
|
+
if (validationCommandKey(entry.command) !== validationCommandKey(packet.command)) return false;
|
|
2361
|
+
if (entry.failureKind !== packet.failureKind) return false;
|
|
2362
|
+
if (packet.failureFocus && entry.failureFocus && packet.failureFocus !== entry.failureFocus) return false;
|
|
2363
|
+
if (packet.stage && entry.stage && packet.stage !== entry.stage) return false;
|
|
2364
|
+
return true;
|
|
2365
|
+
})
|
|
2366
|
+
.sort((a, b) => b.count - a.count || b.lastSeenAt.localeCompare(a.lastSeenAt))
|
|
2367
|
+
.slice(0, 3);
|
|
2368
|
+
return entries.map((entry) =>
|
|
2369
|
+
toSingleLine(
|
|
2370
|
+
`seen ${entry.count}x before for this repo/job family; last=${entry.lastSeenAt}; focus=${entry.failureFocus ?? entry.stage ?? "unknown"}; remedy=${entry.suggestedRemedy}`,
|
|
2371
|
+
360,
|
|
2372
|
+
),
|
|
2373
|
+
);
|
|
2374
|
+
}
|
|
2375
|
+
|
|
2376
|
+
export function recordBrowserFailureMemory(
|
|
2377
|
+
repo: string,
|
|
2378
|
+
jobFamily: string,
|
|
2379
|
+
packet: BrowserValidationRepairPacket,
|
|
2380
|
+
): void {
|
|
2381
|
+
const memoryPath = resolveFailureMemoryPath(repo);
|
|
2382
|
+
const now = new Date().toISOString();
|
|
2383
|
+
const entries = readBrowserFailureMemory(repo);
|
|
2384
|
+
const key = browserFailureMemoryKey(jobFamily, packet);
|
|
2385
|
+
const existing = entries.find((entry) => entry.key === key);
|
|
2386
|
+
if (existing) {
|
|
2387
|
+
existing.count += 1;
|
|
2388
|
+
existing.lastSeenAt = now;
|
|
2389
|
+
existing.digest = packet.digest;
|
|
2390
|
+
existing.lastVerifiedStage = packet.lastVerifiedStage ?? null;
|
|
2391
|
+
existing.pageUrl = packet.pageUrl ?? null;
|
|
2392
|
+
existing.artifactSummaries = (packet.artifactSummaries ?? []).slice(0, 6);
|
|
2393
|
+
existing.suggestedRemedy = browserFailureSuggestedRemedy(packet);
|
|
2394
|
+
} else {
|
|
2395
|
+
entries.push({
|
|
2396
|
+
key,
|
|
2397
|
+
jobFamily,
|
|
2398
|
+
command: packet.command,
|
|
2399
|
+
failureKind: packet.failureKind,
|
|
2400
|
+
stage: packet.stage,
|
|
2401
|
+
selector: packet.selector,
|
|
2402
|
+
expected: packet.expected,
|
|
2403
|
+
failureFocus: packet.failureFocus,
|
|
2404
|
+
digest: packet.digest,
|
|
2405
|
+
count: 1,
|
|
2406
|
+
firstSeenAt: now,
|
|
2407
|
+
lastSeenAt: now,
|
|
2408
|
+
lastVerifiedStage: packet.lastVerifiedStage ?? null,
|
|
2409
|
+
pageUrl: packet.pageUrl ?? null,
|
|
2410
|
+
artifactSummaries: (packet.artifactSummaries ?? []).slice(0, 6),
|
|
2411
|
+
suggestedRemedy: browserFailureSuggestedRemedy(packet),
|
|
2412
|
+
});
|
|
2413
|
+
}
|
|
2414
|
+
const next = entries
|
|
2415
|
+
.sort((a, b) => b.lastSeenAt.localeCompare(a.lastSeenAt))
|
|
2416
|
+
.slice(0, 80);
|
|
2417
|
+
try {
|
|
2418
|
+
mkdirSync(resolve(memoryPath, ".."), { recursive: true });
|
|
2419
|
+
writeFileSync(memoryPath, `${JSON.stringify({ version: 1, entries: next }, null, 2)}\n`);
|
|
2420
|
+
} catch {
|
|
2421
|
+
// Failure memory is advisory; never fail a worker job because persistence is unavailable.
|
|
2422
|
+
}
|
|
2423
|
+
}
|
|
2424
|
+
|
|
2035
2425
|
export function extractValidationFailureRetryDigest(
|
|
2036
2426
|
run: {
|
|
2037
2427
|
command: string;
|
|
@@ -2075,6 +2465,7 @@ export function buildBrowserValidationRepairPacket(
|
|
|
2075
2465
|
validationRuns: ValidationExecutionResult[],
|
|
2076
2466
|
previousFailureDigests: Map<string, string> = new Map(),
|
|
2077
2467
|
repo?: string,
|
|
2468
|
+
knownFailureHints: string[] = [],
|
|
2078
2469
|
): BrowserValidationRepairPacket | null {
|
|
2079
2470
|
for (const run of validationRuns) {
|
|
2080
2471
|
if (run.ok || !isLongRunningBrowserValidationCommand(run.command)) continue;
|
|
@@ -2091,6 +2482,8 @@ export function buildBrowserValidationRepairPacket(
|
|
|
2091
2482
|
const enrichedBrowserContext = [combined, recentLogSummary].filter(Boolean).join("\n");
|
|
2092
2483
|
const selector = extractBrowserValidationSelector(enrichedBrowserContext);
|
|
2093
2484
|
const expected = extractBrowserValidationExpectedUi(enrichedBrowserContext);
|
|
2485
|
+
const lastVerifiedStage = lastBrowserVerifiedStage(enrichedBrowserContext);
|
|
2486
|
+
const pageUrl = extractBrowserValidationUrl(enrichedBrowserContext);
|
|
2094
2487
|
const stage = refineBrowserValidationStage(
|
|
2095
2488
|
extractBrowserValidationStage(enrichedBrowserContext),
|
|
2096
2489
|
selector,
|
|
@@ -2129,6 +2522,15 @@ export function buildBrowserValidationRepairPacket(
|
|
|
2129
2522
|
const needsDiagnosticProbe =
|
|
2130
2523
|
failureKind === "assertion" &&
|
|
2131
2524
|
sameFailureSignal;
|
|
2525
|
+
const artifacts = mergeBrowserValidationArtifacts(
|
|
2526
|
+
extractBrowserValidationArtifacts(combined),
|
|
2527
|
+
collectRecentBrowserValidationArtifacts(repo),
|
|
2528
|
+
);
|
|
2529
|
+
const artifactSummaries = summarizeBrowserValidationArtifacts({
|
|
2530
|
+
repo,
|
|
2531
|
+
artifacts,
|
|
2532
|
+
context: enrichedBrowserContext,
|
|
2533
|
+
});
|
|
2132
2534
|
return {
|
|
2133
2535
|
command: run.command,
|
|
2134
2536
|
failureKind,
|
|
@@ -2136,6 +2538,8 @@ export function buildBrowserValidationRepairPacket(
|
|
|
2136
2538
|
selector,
|
|
2137
2539
|
expected,
|
|
2138
2540
|
failureFocus,
|
|
2541
|
+
lastVerifiedStage,
|
|
2542
|
+
pageUrl,
|
|
2139
2543
|
digest,
|
|
2140
2544
|
previousDigest,
|
|
2141
2545
|
previousStage,
|
|
@@ -2144,10 +2548,10 @@ export function buildBrowserValidationRepairPacket(
|
|
|
2144
2548
|
previousFailureFocus,
|
|
2145
2549
|
progress,
|
|
2146
2550
|
needsDiagnosticProbe,
|
|
2147
|
-
|
|
2148
|
-
|
|
2149
|
-
|
|
2150
|
-
),
|
|
2551
|
+
mustReadArtifactsBeforeEdit: failureKind === "assertion",
|
|
2552
|
+
artifacts,
|
|
2553
|
+
artifactSummaries,
|
|
2554
|
+
knownFailureHints: knownFailureHints.slice(0, 3),
|
|
2151
2555
|
output: [
|
|
2152
2556
|
summarizeBrowserValidationOutput(combined) || digest,
|
|
2153
2557
|
recentLogSummary,
|
|
@@ -3204,10 +3608,32 @@ export function buildQualityRevisionHint(
|
|
|
3204
3608
|
validationRuns: ValidationExecutionResult[] = [],
|
|
3205
3609
|
validationBlocker: ValidationBlocker | null = null,
|
|
3206
3610
|
browserRepairPacket: BrowserValidationRepairPacket | null = null,
|
|
3611
|
+
changedPaths: string[] = [],
|
|
3207
3612
|
): string {
|
|
3208
3613
|
const lines: string[] = [];
|
|
3209
3614
|
lines.push("Quality revision required before completion.");
|
|
3210
3615
|
const focusedBrowserRepair = Boolean(browserRepairPacket);
|
|
3616
|
+
lines.push(
|
|
3617
|
+
"Worker phase contract: (1) discovering - inspect only the relevant files/artifacts and name the current hypothesis; (2) editing - make the smallest behavior-owning patch; (3) focused validation - run targeted fast checks; (4) full validation - let PushPals ValidationGate own long required checks unless a single local confirmation is explicitly useful; (5) final diff review - verify changed files are necessary and no unrelated churn remains.",
|
|
3618
|
+
);
|
|
3619
|
+
const diffBudgetWarning = buildDiffBudgetWarning(planning, changedPaths, focusedBrowserRepair);
|
|
3620
|
+
if (diffBudgetWarning) lines.push(diffBudgetWarning);
|
|
3621
|
+
const broadSharedMockWarning = buildBroadSharedMockWarning(planning, changedPaths);
|
|
3622
|
+
if (broadSharedMockWarning) lines.push(broadSharedMockWarning);
|
|
3623
|
+
const testHarnessConvergenceWarning = buildTestHarnessConvergenceWarning(
|
|
3624
|
+
planning,
|
|
3625
|
+
issues,
|
|
3626
|
+
validationRuns,
|
|
3627
|
+
);
|
|
3628
|
+
if (testHarnessConvergenceWarning) lines.push(testHarnessConvergenceWarning);
|
|
3629
|
+
if (planningLooksLikeVisualDerivationTask(planning)) {
|
|
3630
|
+
lines.push(
|
|
3631
|
+
"Visual derivation testing rule: prefer pure helper/state/style-prop tests for planet/projectile/ownership/readability cues. Only add a full React Native render regression when this repo already has a stable harness for that exact surface; otherwise keep render-visible behavior covered through the derived inputs that drive it.",
|
|
3632
|
+
);
|
|
3633
|
+
}
|
|
3634
|
+
lines.push(
|
|
3635
|
+
"Phase soft-budget reminder: if discovery, test-harness setup, or validation repair is running long, reduce the approach before spending more time. Small/medium tasks should converge toward a useful patch within roughly 20 minutes.",
|
|
3636
|
+
);
|
|
3211
3637
|
const validationAlreadyPassed =
|
|
3212
3638
|
validationRuns.length > 0 && validationRuns.every((run) => run.ok);
|
|
3213
3639
|
if (validationAlreadyPassed && !focusedBrowserRepair) {
|
|
@@ -3232,6 +3658,12 @@ export function buildQualityRevisionHint(
|
|
|
3232
3658
|
if (browserRepairPacket.failureFocus) {
|
|
3233
3659
|
lines.push(`- Failure focus: ${browserRepairPacket.failureFocus}`);
|
|
3234
3660
|
}
|
|
3661
|
+
if (browserRepairPacket.lastVerifiedStage) {
|
|
3662
|
+
lines.push(`- Last verified browser checkpoint: ${browserRepairPacket.lastVerifiedStage}`);
|
|
3663
|
+
}
|
|
3664
|
+
if (browserRepairPacket.pageUrl) {
|
|
3665
|
+
lines.push(`- Browser URL at failure: ${browserRepairPacket.pageUrl}`);
|
|
3666
|
+
}
|
|
3235
3667
|
if (browserRepairPacket.expected) {
|
|
3236
3668
|
lines.push(`- Expected UI: ${browserRepairPacket.expected}`);
|
|
3237
3669
|
}
|
|
@@ -3248,6 +3680,18 @@ export function buildQualityRevisionHint(
|
|
|
3248
3680
|
"- Failure artifacts: none were captured in command output; if this repo writes screenshots/traces, inspect the latest browser failure artifact before changing selectors.",
|
|
3249
3681
|
);
|
|
3250
3682
|
}
|
|
3683
|
+
if ((browserRepairPacket.artifactSummaries ?? []).length > 0) {
|
|
3684
|
+
lines.push("Latest browser artifact summaries:");
|
|
3685
|
+
for (const artifactSummary of browserRepairPacket.artifactSummaries ?? []) {
|
|
3686
|
+
lines.push(`- ${artifactSummary}`);
|
|
3687
|
+
}
|
|
3688
|
+
}
|
|
3689
|
+
if ((browserRepairPacket.knownFailureHints ?? []).length > 0) {
|
|
3690
|
+
lines.push("Known issue/remedy memory for this repo/job family:");
|
|
3691
|
+
for (const hint of browserRepairPacket.knownFailureHints ?? []) {
|
|
3692
|
+
lines.push(`- ${hint}`);
|
|
3693
|
+
}
|
|
3694
|
+
}
|
|
3251
3695
|
if (browserRepairPacket.digest) {
|
|
3252
3696
|
lines.push(`- Current failure: ${browserRepairPacket.digest}`);
|
|
3253
3697
|
}
|
|
@@ -3276,6 +3720,11 @@ export function buildQualityRevisionHint(
|
|
|
3276
3720
|
} else {
|
|
3277
3721
|
lines.push("- Breadcrumb: first captured failure for this command in this revision loop");
|
|
3278
3722
|
}
|
|
3723
|
+
if (browserRepairPacket.mustReadArtifactsBeforeEdit) {
|
|
3724
|
+
lines.push(
|
|
3725
|
+
"- Diagnostic artifact read requirement: before editing, explicitly inspect the listed latest artifact/log/DOM summary for the failing stage. If the artifacts are missing, stale, or stop before the failing locator, add a tiny temporary diagnostic/log for locator counts, visible text, URL, and nearby DOM/test-id state before changing product code or selectors.",
|
|
3726
|
+
);
|
|
3727
|
+
}
|
|
3279
3728
|
if (browserRepairPacket.needsDiagnosticProbe) {
|
|
3280
3729
|
lines.push(
|
|
3281
3730
|
"- Convergence mode: diagnostic-first repair. This same browser focus failed in the previous revision, so do not guess another selector or rewrite a different stage.",
|
|
@@ -3457,7 +3906,7 @@ export function buildQualityRevisionHint(
|
|
|
3457
3906
|
for (const step of planning.requiredValidationSteps ?? []) lines.push(`- ${step}`);
|
|
3458
3907
|
}
|
|
3459
3908
|
lines.push("Apply a minimal corrective patch, run focused validation, then finish.");
|
|
3460
|
-
return lines.join("\n").slice(0,
|
|
3909
|
+
return lines.join("\n").slice(0, 8000);
|
|
3461
3910
|
}
|
|
3462
3911
|
|
|
3463
3912
|
function inferTargetPathFromInstruction(text: string): string | null {
|
|
@@ -6214,6 +6663,7 @@ export async function executeJob(
|
|
|
6214
6663
|
let revisionAttempt = 0;
|
|
6215
6664
|
let revisionHint = "";
|
|
6216
6665
|
const previousValidationFailureDigests = new Map<string, string>();
|
|
6666
|
+
const failureJobFamily = buildTaskFailureJobFamily(normalizedParams);
|
|
6217
6667
|
while (revisionAttempt <= qualityRevisionLoopMax) {
|
|
6218
6668
|
const attemptParams: Record<string, unknown> = { ...normalizedParams };
|
|
6219
6669
|
if (revisionHint) {
|
|
@@ -6302,6 +6752,53 @@ export async function executeJob(
|
|
|
6302
6752
|
};
|
|
6303
6753
|
}
|
|
6304
6754
|
|
|
6755
|
+
const preQualityStatus = await git(repo, ["status", "--porcelain"]);
|
|
6756
|
+
const preQualityChangedPaths = preQualityStatus.ok
|
|
6757
|
+
? parseChangedPathsFromStatus(preQualityStatus.stdout)
|
|
6758
|
+
: [];
|
|
6759
|
+
const preQualityPublishablePaths = publishableChangedPaths(preQualityChangedPaths);
|
|
6760
|
+
const executorText = `${result.summary ?? ""}\n${result.stdout ?? ""}\n${result.stderr ?? ""}`;
|
|
6761
|
+
const shellWrapperReturn =
|
|
6762
|
+
/shell-wrapper command rejections|command-router shell-wrapper|command policy rejection/i.test(
|
|
6763
|
+
executorText,
|
|
6764
|
+
);
|
|
6765
|
+
if (preQualityChangedPaths.length > 0 && preQualityPublishablePaths.length === 0) {
|
|
6766
|
+
const detail = `Executor changed only non-publishable dependency/runtime artifact path(s): ${preQualityChangedPaths
|
|
6767
|
+
.slice(0, 12)
|
|
6768
|
+
.join(", ")}${preQualityChangedPaths.length > 12 ? ", ..." : ""}.`;
|
|
6769
|
+
onLog?.(
|
|
6770
|
+
"stderr",
|
|
6771
|
+
`[QualityGate] ${detail} Skipping ValidationGate/CriticGate because there is no PR-worthy patch to validate.`,
|
|
6772
|
+
);
|
|
6773
|
+
return {
|
|
6774
|
+
ok: false,
|
|
6775
|
+
summary: "Executor produced no publishable code changes",
|
|
6776
|
+
stdout: result.stdout,
|
|
6777
|
+
stderr: [result.stderr ?? "", detail].filter(Boolean).join("\n"),
|
|
6778
|
+
exitCode: 4,
|
|
6779
|
+
};
|
|
6780
|
+
}
|
|
6781
|
+
if (
|
|
6782
|
+
preQualityPublishablePaths.length === 0 &&
|
|
6783
|
+
(qualityGatePolicy.mode === "review_fix" || shellWrapperReturn)
|
|
6784
|
+
) {
|
|
6785
|
+
const reason =
|
|
6786
|
+
qualityGatePolicy.mode === "review_fix"
|
|
6787
|
+
? "Review-fix executor returned without publishable code changes."
|
|
6788
|
+
: "Codex hit shell-wrapper command rejections without leaving a publishable patch.";
|
|
6789
|
+
onLog?.(
|
|
6790
|
+
"stderr",
|
|
6791
|
+
`[QualityGate] ${reason} Skipping ValidationGate/CriticGate and failing fast.`,
|
|
6792
|
+
);
|
|
6793
|
+
return {
|
|
6794
|
+
ok: false,
|
|
6795
|
+
summary: reason,
|
|
6796
|
+
stdout: result.stdout,
|
|
6797
|
+
stderr: [result.stderr ?? "", reason].filter(Boolean).join("\n"),
|
|
6798
|
+
exitCode: 4,
|
|
6799
|
+
};
|
|
6800
|
+
}
|
|
6801
|
+
|
|
6305
6802
|
const quality = await runDeterministicQualityGate(
|
|
6306
6803
|
repo,
|
|
6307
6804
|
attemptParams,
|
|
@@ -6313,11 +6810,19 @@ export async function executeJob(
|
|
|
6313
6810
|
revisionAttempt,
|
|
6314
6811
|
},
|
|
6315
6812
|
);
|
|
6316
|
-
|
|
6813
|
+
let browserRepairPacket = buildBrowserValidationRepairPacket(
|
|
6317
6814
|
quality.validationRuns,
|
|
6318
6815
|
previousValidationFailureDigests,
|
|
6319
6816
|
repo,
|
|
6320
6817
|
);
|
|
6818
|
+
if (browserRepairPacket) {
|
|
6819
|
+
const knownFailureHints = knownFailureHintsForPacket(repo, failureJobFamily, browserRepairPacket);
|
|
6820
|
+
browserRepairPacket = {
|
|
6821
|
+
...browserRepairPacket,
|
|
6822
|
+
knownFailureHints,
|
|
6823
|
+
};
|
|
6824
|
+
recordBrowserFailureMemory(repo, failureJobFamily, browserRepairPacket);
|
|
6825
|
+
}
|
|
6321
6826
|
for (const run of quality.validationRuns) {
|
|
6322
6827
|
if (run.ok) continue;
|
|
6323
6828
|
const digest = extractValidationFailureRetryDigest(run, repo);
|
|
@@ -6592,6 +7097,7 @@ export async function executeJob(
|
|
|
6592
7097
|
validationOutsideTaskScope ? [] : quality.validationRuns,
|
|
6593
7098
|
validationOutsideTaskScope ? null : quality.blocker,
|
|
6594
7099
|
validationOutsideTaskScope ? null : browserRepairPacket,
|
|
7100
|
+
quality.changedPaths,
|
|
6595
7101
|
);
|
|
6596
7102
|
onLog?.(
|
|
6597
7103
|
"stderr",
|