@pushpalsdev/cli 1.1.8 → 1.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +68 -5
- package/monitor-ui/+not-found.html +1 -1
- package/monitor-ui/_expo/static/js/web/{entry-22a236a301d5ba71c53234f142ec71d4.js → entry-ff425ab85ad13c1920b8ee00abfae7dd.js} +1139 -1139
- package/monitor-ui/_expo/static/js/web/{index-968878738b5a9ca32445d688cec9db60.js → index-ec13ec62e2b37ed3c5f6d324ef6784e1.js} +6 -6
- package/monitor-ui/_sitemap.html +1 -1
- package/monitor-ui/index.html +1 -1
- package/monitor-ui/modal.html +1 -1
- package/package.json +1 -1
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +48 -0
- package/runtime/sandbox/apps/workerpals/src/backends/shared/executor_base.py +107 -0
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +486 -6
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +95 -41
|
@@ -6,11 +6,13 @@
|
|
|
6
6
|
import {
|
|
7
7
|
existsSync,
|
|
8
8
|
lstatSync,
|
|
9
|
+
mkdirSync,
|
|
9
10
|
readdirSync,
|
|
10
11
|
readFileSync,
|
|
11
12
|
renameSync,
|
|
12
13
|
rmSync,
|
|
13
14
|
unlinkSync,
|
|
15
|
+
writeFileSync,
|
|
14
16
|
} from "fs";
|
|
15
17
|
import { resolve } from "path";
|
|
16
18
|
import {
|
|
@@ -24,6 +26,7 @@ import {
|
|
|
24
26
|
matchesGlob,
|
|
25
27
|
normalizeTargetPath,
|
|
26
28
|
requirementsForValidationCommand,
|
|
29
|
+
resolveGitStateFilePath,
|
|
27
30
|
sanitizeSourceControlIdentityField,
|
|
28
31
|
type SourceControlCommitIdentity,
|
|
29
32
|
type ToolRequirement,
|
|
@@ -93,6 +96,8 @@ export interface BrowserValidationRepairPacket {
|
|
|
93
96
|
selector: string | null;
|
|
94
97
|
expected: string | null;
|
|
95
98
|
failureFocus: string | null;
|
|
99
|
+
lastVerifiedStage?: string | null;
|
|
100
|
+
pageUrl?: string | null;
|
|
96
101
|
digest: string;
|
|
97
102
|
previousDigest: string | null;
|
|
98
103
|
previousStage: string | null;
|
|
@@ -101,10 +106,32 @@ export interface BrowserValidationRepairPacket {
|
|
|
101
106
|
previousFailureFocus: string | null;
|
|
102
107
|
progress: "first_failure" | "same_failure" | "new_failure";
|
|
103
108
|
needsDiagnosticProbe: boolean;
|
|
109
|
+
mustReadArtifactsBeforeEdit?: boolean;
|
|
104
110
|
artifacts: string[];
|
|
111
|
+
artifactSummaries?: string[];
|
|
112
|
+
knownFailureHints?: string[];
|
|
105
113
|
output: string;
|
|
106
114
|
}
|
|
107
115
|
|
|
116
|
+
interface BrowserFailureMemoryEntry {
|
|
117
|
+
key: string;
|
|
118
|
+
jobFamily: string;
|
|
119
|
+
command: string;
|
|
120
|
+
failureKind: BrowserValidationFailureKind;
|
|
121
|
+
stage: string | null;
|
|
122
|
+
selector: string | null;
|
|
123
|
+
expected: string | null;
|
|
124
|
+
failureFocus: string | null;
|
|
125
|
+
digest: string;
|
|
126
|
+
count: number;
|
|
127
|
+
firstSeenAt: string;
|
|
128
|
+
lastSeenAt: string;
|
|
129
|
+
lastVerifiedStage: string | null;
|
|
130
|
+
pageUrl: string | null;
|
|
131
|
+
artifactSummaries: string[];
|
|
132
|
+
suggestedRemedy: string;
|
|
133
|
+
}
|
|
134
|
+
|
|
108
135
|
interface DeterministicQualityResult {
|
|
109
136
|
ok: boolean;
|
|
110
137
|
skipped: boolean;
|
|
@@ -326,6 +353,31 @@ export function buildQualityGateRevisionIssues(
|
|
|
326
353
|
return [...new Set(merged)];
|
|
327
354
|
}
|
|
328
355
|
|
|
356
|
+
function buildDiffBudgetWarning(
|
|
357
|
+
planning: TaskExecutePlanning,
|
|
358
|
+
changedPaths: string[],
|
|
359
|
+
focusedBrowserRepair: boolean,
|
|
360
|
+
): string | null {
|
|
361
|
+
const meaningfulChangedPaths = changedPaths.filter(
|
|
362
|
+
(path) => !/(^|\/)(outputs|node_modules|\.worktrees|dist|build|coverage)(\/|$)/i.test(path),
|
|
363
|
+
);
|
|
364
|
+
if (meaningfulChangedPaths.length === 0) return null;
|
|
365
|
+
const explicitBudget = Number(planning.scope.maxFilesToEdit);
|
|
366
|
+
const hasExplicitBudget = Number.isFinite(explicitBudget) && explicitBudget > 0;
|
|
367
|
+
const smallTask =
|
|
368
|
+
focusedBrowserRepair ||
|
|
369
|
+
(planning.riskLevel !== "high" &&
|
|
370
|
+
(planning.targetPaths?.length ?? 0) <= 2 &&
|
|
371
|
+
planning.acceptanceCriteria.length <= 3);
|
|
372
|
+
const budget = hasExplicitBudget ? Math.floor(explicitBudget) : smallTask ? 5 : 10;
|
|
373
|
+
if (meaningfulChangedPaths.length <= budget) return null;
|
|
374
|
+
return `Diff budget warning: this task now changes ${meaningfulChangedPaths.length} file(s), above the ${budget}-file ${
|
|
375
|
+
hasExplicitBudget ? "planning.scope.maxFilesToEdit" : smallTask ? "small-task" : "default"
|
|
376
|
+
} budget. Before editing more, remove unrelated churn and keep only behavior-owning files needed for the current repair. Changed files: ${meaningfulChangedPaths
|
|
377
|
+
.slice(0, 12)
|
|
378
|
+
.join(", ")}${meaningfulChangedPaths.length > 12 ? ", ..." : ""}`;
|
|
379
|
+
}
|
|
380
|
+
|
|
329
381
|
const TEST_ASSERTION_BALANCE_ISSUE =
|
|
330
382
|
"Changed test files do not show both positive and negative assertion coverage (expected both).";
|
|
331
383
|
|
|
@@ -2032,6 +2084,253 @@ function lastBrowserVerifiedStage(text: string): string | null {
|
|
|
2032
2084
|
return lastVerified ? toSingleLine(lastVerified, 80) : null;
|
|
2033
2085
|
}
|
|
2034
2086
|
|
|
2087
|
+
function extractBrowserValidationUrl(text: string): string | null {
|
|
2088
|
+
const clean = stripAnsiControlSequences(text);
|
|
2089
|
+
const patterns = [
|
|
2090
|
+
/\b(?:page\s+url|current\s+url|browser\s+url|url)\s*[:=]\s*(https?:\/\/[^\s|"'`<>]+)/i,
|
|
2091
|
+
/\b(?:navigated\s+to|opened|loading)\s+(https?:\/\/[^\s|"'`<>]+)/i,
|
|
2092
|
+
/\b(https?:\/\/(?:127\.0\.0\.1|localhost|0\.0\.0\.0):\d+\/?[^\s|"'`<>]*)/i,
|
|
2093
|
+
];
|
|
2094
|
+
for (const pattern of patterns) {
|
|
2095
|
+
const match = clean.match(pattern);
|
|
2096
|
+
const url = match?.[1]?.replace(/[),.;]+$/, "").trim();
|
|
2097
|
+
if (url) return toSingleLine(url, 160);
|
|
2098
|
+
}
|
|
2099
|
+
return null;
|
|
2100
|
+
}
|
|
2101
|
+
|
|
2102
|
+
function inferBrowserArtifactKind(path: string): string {
|
|
2103
|
+
if (/\.(?:png|jpe?g|webp)$/i.test(path)) return "screenshot";
|
|
2104
|
+
if (/\.zip$/i.test(path)) return "trace";
|
|
2105
|
+
if (/\.webm$/i.test(path)) return "video";
|
|
2106
|
+
if (/\.(?:log|txt)$/i.test(path)) return "log";
|
|
2107
|
+
if (/\.json$/i.test(path)) return "json";
|
|
2108
|
+
return "artifact";
|
|
2109
|
+
}
|
|
2110
|
+
|
|
2111
|
+
function inferBrowserArtifactStageFromPath(path: string): string | null {
|
|
2112
|
+
const fileName = path.split(/[\\/]/).pop() ?? "";
|
|
2113
|
+
const baseName = fileName.replace(/\.[^.]+$/, "");
|
|
2114
|
+
const candidates = [
|
|
2115
|
+
baseName.match(/^\d+[-_](.+)$/)?.[1],
|
|
2116
|
+
baseName.match(/(?:failure|failed|screenshot|snapshot)[-_](.+)$/i)?.[1],
|
|
2117
|
+
];
|
|
2118
|
+
const raw = candidates.find((entry) => entry && entry.trim());
|
|
2119
|
+
if (!raw) return null;
|
|
2120
|
+
return toSingleLine(raw.replace(/[-_]+/g, " "), 80);
|
|
2121
|
+
}
|
|
2122
|
+
|
|
2123
|
+
function summarizeBrowserValidationArtifacts(params: {
|
|
2124
|
+
repo?: string;
|
|
2125
|
+
artifacts: string[];
|
|
2126
|
+
context: string;
|
|
2127
|
+
}): string[] {
|
|
2128
|
+
const allArtifacts = mergeBrowserValidationArtifacts(
|
|
2129
|
+
params.artifacts,
|
|
2130
|
+
collectRecentBrowserValidationArtifacts(params.repo),
|
|
2131
|
+
);
|
|
2132
|
+
const out: string[] = [];
|
|
2133
|
+
const contextStage = extractBrowserValidationStage(params.context);
|
|
2134
|
+
const contextSelector = extractBrowserValidationSelector(params.context);
|
|
2135
|
+
const contextUrl = extractBrowserValidationUrl(params.context);
|
|
2136
|
+
const contextLastVerified = lastBrowserVerifiedStage(params.context);
|
|
2137
|
+
for (const artifact of allArtifacts.slice(0, 6)) {
|
|
2138
|
+
const kind = inferBrowserArtifactKind(artifact);
|
|
2139
|
+
let artifactText = "";
|
|
2140
|
+
if (params.repo && !/^(?:\/repo|\/workspace|[A-Za-z]:[\\/])/.test(artifact)) {
|
|
2141
|
+
try {
|
|
2142
|
+
artifactText = readFileSync(resolve(params.repo, artifact), "utf8");
|
|
2143
|
+
} catch {
|
|
2144
|
+
artifactText = "";
|
|
2145
|
+
}
|
|
2146
|
+
} else if (existsSync(artifact) && /\.(?:log|txt|json)$/i.test(artifact)) {
|
|
2147
|
+
try {
|
|
2148
|
+
artifactText = readFileSync(artifact, "utf8");
|
|
2149
|
+
} catch {
|
|
2150
|
+
artifactText = "";
|
|
2151
|
+
}
|
|
2152
|
+
}
|
|
2153
|
+
const artifactContext = artifactText ? stripAnsiControlSequences(artifactText) : "";
|
|
2154
|
+
const stage =
|
|
2155
|
+
inferBrowserArtifactStageFromPath(artifact) ||
|
|
2156
|
+
extractBrowserValidationStage(artifactContext) ||
|
|
2157
|
+
contextStage;
|
|
2158
|
+
const selector = extractBrowserValidationSelector(artifactContext) || contextSelector;
|
|
2159
|
+
const url = extractBrowserValidationUrl(artifactContext) || contextUrl;
|
|
2160
|
+
const lastVerified = lastBrowserVerifiedStage(artifactContext) || contextLastVerified;
|
|
2161
|
+
const detail = [
|
|
2162
|
+
`${artifact} [${kind}]`,
|
|
2163
|
+
stage ? `stage=${stage}` : "",
|
|
2164
|
+
selector ? `selector=${selector}` : "",
|
|
2165
|
+
url ? `url=${url}` : "",
|
|
2166
|
+
lastVerified ? `last_verified=${lastVerified}` : "",
|
|
2167
|
+
]
|
|
2168
|
+
.filter(Boolean)
|
|
2169
|
+
.join(" ");
|
|
2170
|
+
out.push(toSingleLine(detail, 280));
|
|
2171
|
+
}
|
|
2172
|
+
return out;
|
|
2173
|
+
}
|
|
2174
|
+
|
|
2175
|
+
function browserFailureSuggestedRemedy(packet: BrowserValidationRepairPacket): string {
|
|
2176
|
+
if (packet.failureKind === "assertion") {
|
|
2177
|
+
return [
|
|
2178
|
+
"Read the latest artifact/log/DOM state before editing.",
|
|
2179
|
+
"Preserve already-passing browser stages.",
|
|
2180
|
+
packet.selector
|
|
2181
|
+
? `Repair or replace the exact failing locator ${packet.selector} with a stable rendered signal for the same UI stage.`
|
|
2182
|
+
: "Repair the exact visible UI assertion or add a stable test id/accessibility label to existing UI.",
|
|
2183
|
+
].join(" ");
|
|
2184
|
+
}
|
|
2185
|
+
if (packet.failureKind === "startup" || packet.failureKind === "runtime") {
|
|
2186
|
+
return "Treat as browser startup/runtime provisioning; do not rewrite product UI assertions until ValidationGate reaches an assertion stage.";
|
|
2187
|
+
}
|
|
2188
|
+
if (packet.failureKind === "network") {
|
|
2189
|
+
return "Treat as local server/network readiness; add bounded startup diagnostics and avoid changing gameplay/UI behavior.";
|
|
2190
|
+
}
|
|
2191
|
+
return "Inspect captured validation output and repair the current failing stage with the smallest behavior-owning diff.";
|
|
2192
|
+
}
|
|
2193
|
+
|
|
2194
|
+
function normalizeFailureMemoryToken(value: string | null | undefined): string {
|
|
2195
|
+
return toSingleLine(value ?? "", 120).toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
|
|
2196
|
+
}
|
|
2197
|
+
|
|
2198
|
+
export function buildTaskFailureJobFamily(params: Record<string, unknown>): string {
|
|
2199
|
+
const planning = params.planning && typeof params.planning === "object"
|
|
2200
|
+
? (params.planning as Partial<TaskExecutePlanning>)
|
|
2201
|
+
: {};
|
|
2202
|
+
const autonomy = params.autonomy && typeof params.autonomy === "object"
|
|
2203
|
+
? (params.autonomy as Record<string, unknown>)
|
|
2204
|
+
: {};
|
|
2205
|
+
const targetHints = [
|
|
2206
|
+
...(Array.isArray(planning.targetPaths) ? planning.targetPaths : []),
|
|
2207
|
+
...(Array.isArray(planning.scope?.writeGlobs) ? planning.scope.writeGlobs : []),
|
|
2208
|
+
...(Array.isArray(planning.validationSteps) ? planning.validationSteps : []),
|
|
2209
|
+
...(Array.isArray(planning.requiredValidationSteps) ? planning.requiredValidationSteps : []),
|
|
2210
|
+
]
|
|
2211
|
+
.map((entry) => normalizeFailureMemoryToken(String(entry)))
|
|
2212
|
+
.filter(Boolean)
|
|
2213
|
+
.slice(0, 8);
|
|
2214
|
+
const area = normalizeFailureMemoryToken(String(autonomy.componentArea ?? autonomy.component_area ?? ""));
|
|
2215
|
+
const intent = normalizeFailureMemoryToken(String(planning.intent ?? ""));
|
|
2216
|
+
return [area, intent, ...targetHints].filter(Boolean).join("|") || "general";
|
|
2217
|
+
}
|
|
2218
|
+
|
|
2219
|
+
function browserFailureMemoryKey(jobFamily: string, packet: BrowserValidationRepairPacket): string {
|
|
2220
|
+
return [
|
|
2221
|
+
jobFamily,
|
|
2222
|
+
validationCommandKey(packet.command),
|
|
2223
|
+
packet.failureKind,
|
|
2224
|
+
normalizeFailureMemoryToken(packet.failureFocus),
|
|
2225
|
+
normalizeFailureMemoryToken(packet.stage),
|
|
2226
|
+
normalizeFailureMemoryToken(packet.selector),
|
|
2227
|
+
normalizeFailureMemoryToken(packet.expected),
|
|
2228
|
+
]
|
|
2229
|
+
.filter(Boolean)
|
|
2230
|
+
.join("|");
|
|
2231
|
+
}
|
|
2232
|
+
|
|
2233
|
+
function resolveFailureMemoryPath(repo: string): string {
|
|
2234
|
+
const rootCandidates = [
|
|
2235
|
+
process.env.PUSHPALS_PROJECT_ROOT_OVERRIDE,
|
|
2236
|
+
process.env.PUSHPALS_REPO_ROOT_OVERRIDE,
|
|
2237
|
+
process.env.PUSHPALS_REPO_PATH,
|
|
2238
|
+
repo,
|
|
2239
|
+
]
|
|
2240
|
+
.map((entry) => String(entry ?? "").trim())
|
|
2241
|
+
.filter(Boolean);
|
|
2242
|
+
const root = rootCandidates.find((entry) => existsSync(entry)) ?? repo;
|
|
2243
|
+
const gitStatePath = resolveGitStateFilePath(root, "pushpals-worker-failure-memory.json");
|
|
2244
|
+
if (gitStatePath) return gitStatePath;
|
|
2245
|
+
return resolve(root, "outputs", "data", "workerpals-failure-memory.json");
|
|
2246
|
+
}
|
|
2247
|
+
|
|
2248
|
+
function readBrowserFailureMemory(repo: string): BrowserFailureMemoryEntry[] {
|
|
2249
|
+
const memoryPath = resolveFailureMemoryPath(repo);
|
|
2250
|
+
try {
|
|
2251
|
+
const parsed = JSON.parse(readFileSync(memoryPath, "utf8")) as { entries?: unknown };
|
|
2252
|
+
if (!Array.isArray(parsed.entries)) return [];
|
|
2253
|
+
return parsed.entries
|
|
2254
|
+
.filter((entry): entry is BrowserFailureMemoryEntry => Boolean(entry && typeof entry === "object"))
|
|
2255
|
+
.slice(0, 80);
|
|
2256
|
+
} catch {
|
|
2257
|
+
return [];
|
|
2258
|
+
}
|
|
2259
|
+
}
|
|
2260
|
+
|
|
2261
|
+
export function knownFailureHintsForPacket(
|
|
2262
|
+
repo: string,
|
|
2263
|
+
jobFamily: string,
|
|
2264
|
+
packet: BrowserValidationRepairPacket,
|
|
2265
|
+
): string[] {
|
|
2266
|
+
const entries = readBrowserFailureMemory(repo)
|
|
2267
|
+
.filter((entry) => {
|
|
2268
|
+
if (entry.jobFamily !== jobFamily) return false;
|
|
2269
|
+
if (validationCommandKey(entry.command) !== validationCommandKey(packet.command)) return false;
|
|
2270
|
+
if (entry.failureKind !== packet.failureKind) return false;
|
|
2271
|
+
if (packet.failureFocus && entry.failureFocus && packet.failureFocus !== entry.failureFocus) return false;
|
|
2272
|
+
if (packet.stage && entry.stage && packet.stage !== entry.stage) return false;
|
|
2273
|
+
return true;
|
|
2274
|
+
})
|
|
2275
|
+
.sort((a, b) => b.count - a.count || b.lastSeenAt.localeCompare(a.lastSeenAt))
|
|
2276
|
+
.slice(0, 3);
|
|
2277
|
+
return entries.map((entry) =>
|
|
2278
|
+
toSingleLine(
|
|
2279
|
+
`seen ${entry.count}x before for this repo/job family; last=${entry.lastSeenAt}; focus=${entry.failureFocus ?? entry.stage ?? "unknown"}; remedy=${entry.suggestedRemedy}`,
|
|
2280
|
+
360,
|
|
2281
|
+
),
|
|
2282
|
+
);
|
|
2283
|
+
}
|
|
2284
|
+
|
|
2285
|
+
export function recordBrowserFailureMemory(
|
|
2286
|
+
repo: string,
|
|
2287
|
+
jobFamily: string,
|
|
2288
|
+
packet: BrowserValidationRepairPacket,
|
|
2289
|
+
): void {
|
|
2290
|
+
const memoryPath = resolveFailureMemoryPath(repo);
|
|
2291
|
+
const now = new Date().toISOString();
|
|
2292
|
+
const entries = readBrowserFailureMemory(repo);
|
|
2293
|
+
const key = browserFailureMemoryKey(jobFamily, packet);
|
|
2294
|
+
const existing = entries.find((entry) => entry.key === key);
|
|
2295
|
+
if (existing) {
|
|
2296
|
+
existing.count += 1;
|
|
2297
|
+
existing.lastSeenAt = now;
|
|
2298
|
+
existing.digest = packet.digest;
|
|
2299
|
+
existing.lastVerifiedStage = packet.lastVerifiedStage ?? null;
|
|
2300
|
+
existing.pageUrl = packet.pageUrl ?? null;
|
|
2301
|
+
existing.artifactSummaries = (packet.artifactSummaries ?? []).slice(0, 6);
|
|
2302
|
+
existing.suggestedRemedy = browserFailureSuggestedRemedy(packet);
|
|
2303
|
+
} else {
|
|
2304
|
+
entries.push({
|
|
2305
|
+
key,
|
|
2306
|
+
jobFamily,
|
|
2307
|
+
command: packet.command,
|
|
2308
|
+
failureKind: packet.failureKind,
|
|
2309
|
+
stage: packet.stage,
|
|
2310
|
+
selector: packet.selector,
|
|
2311
|
+
expected: packet.expected,
|
|
2312
|
+
failureFocus: packet.failureFocus,
|
|
2313
|
+
digest: packet.digest,
|
|
2314
|
+
count: 1,
|
|
2315
|
+
firstSeenAt: now,
|
|
2316
|
+
lastSeenAt: now,
|
|
2317
|
+
lastVerifiedStage: packet.lastVerifiedStage ?? null,
|
|
2318
|
+
pageUrl: packet.pageUrl ?? null,
|
|
2319
|
+
artifactSummaries: (packet.artifactSummaries ?? []).slice(0, 6),
|
|
2320
|
+
suggestedRemedy: browserFailureSuggestedRemedy(packet),
|
|
2321
|
+
});
|
|
2322
|
+
}
|
|
2323
|
+
const next = entries
|
|
2324
|
+
.sort((a, b) => b.lastSeenAt.localeCompare(a.lastSeenAt))
|
|
2325
|
+
.slice(0, 80);
|
|
2326
|
+
try {
|
|
2327
|
+
mkdirSync(resolve(memoryPath, ".."), { recursive: true });
|
|
2328
|
+
writeFileSync(memoryPath, `${JSON.stringify({ version: 1, entries: next }, null, 2)}\n`);
|
|
2329
|
+
} catch {
|
|
2330
|
+
// Failure memory is advisory; never fail a worker job because persistence is unavailable.
|
|
2331
|
+
}
|
|
2332
|
+
}
|
|
2333
|
+
|
|
2035
2334
|
export function extractValidationFailureRetryDigest(
|
|
2036
2335
|
run: {
|
|
2037
2336
|
command: string;
|
|
@@ -2075,6 +2374,7 @@ export function buildBrowserValidationRepairPacket(
|
|
|
2075
2374
|
validationRuns: ValidationExecutionResult[],
|
|
2076
2375
|
previousFailureDigests: Map<string, string> = new Map(),
|
|
2077
2376
|
repo?: string,
|
|
2377
|
+
knownFailureHints: string[] = [],
|
|
2078
2378
|
): BrowserValidationRepairPacket | null {
|
|
2079
2379
|
for (const run of validationRuns) {
|
|
2080
2380
|
if (run.ok || !isLongRunningBrowserValidationCommand(run.command)) continue;
|
|
@@ -2091,6 +2391,8 @@ export function buildBrowserValidationRepairPacket(
|
|
|
2091
2391
|
const enrichedBrowserContext = [combined, recentLogSummary].filter(Boolean).join("\n");
|
|
2092
2392
|
const selector = extractBrowserValidationSelector(enrichedBrowserContext);
|
|
2093
2393
|
const expected = extractBrowserValidationExpectedUi(enrichedBrowserContext);
|
|
2394
|
+
const lastVerifiedStage = lastBrowserVerifiedStage(enrichedBrowserContext);
|
|
2395
|
+
const pageUrl = extractBrowserValidationUrl(enrichedBrowserContext);
|
|
2094
2396
|
const stage = refineBrowserValidationStage(
|
|
2095
2397
|
extractBrowserValidationStage(enrichedBrowserContext),
|
|
2096
2398
|
selector,
|
|
@@ -2129,6 +2431,15 @@ export function buildBrowserValidationRepairPacket(
|
|
|
2129
2431
|
const needsDiagnosticProbe =
|
|
2130
2432
|
failureKind === "assertion" &&
|
|
2131
2433
|
sameFailureSignal;
|
|
2434
|
+
const artifacts = mergeBrowserValidationArtifacts(
|
|
2435
|
+
extractBrowserValidationArtifacts(combined),
|
|
2436
|
+
collectRecentBrowserValidationArtifacts(repo),
|
|
2437
|
+
);
|
|
2438
|
+
const artifactSummaries = summarizeBrowserValidationArtifacts({
|
|
2439
|
+
repo,
|
|
2440
|
+
artifacts,
|
|
2441
|
+
context: enrichedBrowserContext,
|
|
2442
|
+
});
|
|
2132
2443
|
return {
|
|
2133
2444
|
command: run.command,
|
|
2134
2445
|
failureKind,
|
|
@@ -2136,6 +2447,8 @@ export function buildBrowserValidationRepairPacket(
|
|
|
2136
2447
|
selector,
|
|
2137
2448
|
expected,
|
|
2138
2449
|
failureFocus,
|
|
2450
|
+
lastVerifiedStage,
|
|
2451
|
+
pageUrl,
|
|
2139
2452
|
digest,
|
|
2140
2453
|
previousDigest,
|
|
2141
2454
|
previousStage,
|
|
@@ -2144,10 +2457,10 @@ export function buildBrowserValidationRepairPacket(
|
|
|
2144
2457
|
previousFailureFocus,
|
|
2145
2458
|
progress,
|
|
2146
2459
|
needsDiagnosticProbe,
|
|
2147
|
-
|
|
2148
|
-
|
|
2149
|
-
|
|
2150
|
-
),
|
|
2460
|
+
mustReadArtifactsBeforeEdit: failureKind === "assertion",
|
|
2461
|
+
artifacts,
|
|
2462
|
+
artifactSummaries,
|
|
2463
|
+
knownFailureHints: knownFailureHints.slice(0, 3),
|
|
2151
2464
|
output: [
|
|
2152
2465
|
summarizeBrowserValidationOutput(combined) || digest,
|
|
2153
2466
|
recentLogSummary,
|
|
@@ -2245,11 +2558,14 @@ export function collectQualityGateValidationCommands(params: {
|
|
|
2245
2558
|
planning: TaskExecutePlanning;
|
|
2246
2559
|
changedTestPaths: string[];
|
|
2247
2560
|
isTestTask: boolean;
|
|
2561
|
+
repo?: string;
|
|
2562
|
+
changedPaths?: string[];
|
|
2248
2563
|
}): {
|
|
2249
2564
|
commandsToRun: string[];
|
|
2250
2565
|
requiredRunnableSteps: string[];
|
|
2251
2566
|
plannerRunnableSteps: string[];
|
|
2252
2567
|
fallbackValidationSteps: string[];
|
|
2568
|
+
inferredRepoNativeValidationSteps: string[];
|
|
2253
2569
|
} {
|
|
2254
2570
|
const requiredRunnableSteps = runnableValidationCommandsFromSteps(
|
|
2255
2571
|
params.planning.requiredValidationSteps,
|
|
@@ -2266,15 +2582,20 @@ export function collectQualityGateValidationCommands(params: {
|
|
|
2266
2582
|
params.changedTestPaths,
|
|
2267
2583
|
)
|
|
2268
2584
|
: [];
|
|
2585
|
+
const inferredRepoNativeValidationSteps = params.repo
|
|
2586
|
+
? inferRepoNativeValidationCommands(params.repo, params.changedPaths ?? [])
|
|
2587
|
+
: [];
|
|
2269
2588
|
const commandsToRun = dedupeValidationCommands(
|
|
2270
2589
|
requiredRunnableSteps,
|
|
2271
2590
|
plannerRunnableSteps.length > 0 ? plannerRunnableSteps : fallbackValidationSteps,
|
|
2591
|
+
inferredRepoNativeValidationSteps,
|
|
2272
2592
|
).slice(0, 16);
|
|
2273
2593
|
return {
|
|
2274
2594
|
commandsToRun,
|
|
2275
2595
|
requiredRunnableSteps,
|
|
2276
2596
|
plannerRunnableSteps,
|
|
2277
2597
|
fallbackValidationSteps,
|
|
2598
|
+
inferredRepoNativeValidationSteps,
|
|
2278
2599
|
};
|
|
2279
2600
|
}
|
|
2280
2601
|
|
|
@@ -2416,6 +2737,114 @@ function hasBalancedPositiveNegativeAssertions(paths: string[], repo: string): b
|
|
|
2416
2737
|
return positiveAssertions > 0 && negativeAssertions > 0;
|
|
2417
2738
|
}
|
|
2418
2739
|
|
|
2740
|
+
function asRecord(value: unknown): Record<string, unknown> | null {
|
|
2741
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) return null;
|
|
2742
|
+
return value as Record<string, unknown>;
|
|
2743
|
+
}
|
|
2744
|
+
|
|
2745
|
+
function changedPathMentionsGuidance(pathPattern: RegExp, guidance: string): boolean {
|
|
2746
|
+
return pathPattern.test(guidance);
|
|
2747
|
+
}
|
|
2748
|
+
|
|
2749
|
+
export function collectPrePublishHygieneIssues(params: {
|
|
2750
|
+
repo: string;
|
|
2751
|
+
changedPaths: string[];
|
|
2752
|
+
instruction: string;
|
|
2753
|
+
targetPath?: string;
|
|
2754
|
+
planning: TaskExecutePlanning;
|
|
2755
|
+
reviewAgent?: Record<string, unknown> | null;
|
|
2756
|
+
}): string[] {
|
|
2757
|
+
const changedPaths = params.changedPaths.map((path) => path.replace(/\\/g, "/"));
|
|
2758
|
+
const changedPathSet = new Set(changedPaths);
|
|
2759
|
+
const guidance = [
|
|
2760
|
+
params.instruction,
|
|
2761
|
+
params.targetPath ?? "",
|
|
2762
|
+
...(params.planning.targetPaths ?? []),
|
|
2763
|
+
...(params.planning.scope.writeGlobs ?? []),
|
|
2764
|
+
...(params.planning.acceptanceCriteria ?? []),
|
|
2765
|
+
...(params.planning.validationSteps ?? []),
|
|
2766
|
+
...((params.reviewAgent?.reviewerFindings as string[] | undefined) ?? []),
|
|
2767
|
+
]
|
|
2768
|
+
.join("\n")
|
|
2769
|
+
.toLowerCase();
|
|
2770
|
+
const issues: string[] = [];
|
|
2771
|
+
|
|
2772
|
+
if (
|
|
2773
|
+
changedPathSet.has(".gitignore") &&
|
|
2774
|
+
!changedPathMentionsGuidance(/\b(gitignore|ignore file|node_modules|dependency cache)\b/i, guidance)
|
|
2775
|
+
) {
|
|
2776
|
+
issues.push(
|
|
2777
|
+
"modified .gitignore without task or reviewer guidance requesting ignore-policy changes.",
|
|
2778
|
+
);
|
|
2779
|
+
}
|
|
2780
|
+
|
|
2781
|
+
if (changedPathSet.has("tests/reactNativeMock.ts")) {
|
|
2782
|
+
const changedTestPaths = changedPaths.filter((path) => isAssertionCoverageTestPath(path));
|
|
2783
|
+
const hasConsumerInChangedTests = changedTestPaths.some((rel) => {
|
|
2784
|
+
try {
|
|
2785
|
+
return /reactNativeMock/i.test(readFileSync(resolve(params.repo, rel), "utf8"));
|
|
2786
|
+
} catch {
|
|
2787
|
+
return false;
|
|
2788
|
+
}
|
|
2789
|
+
});
|
|
2790
|
+
const explicitlyRequested = changedPathMentionsGuidance(/reactnativemock|react native mock/i, guidance);
|
|
2791
|
+
if (!hasConsumerInChangedTests && !explicitlyRequested) {
|
|
2792
|
+
issues.push(
|
|
2793
|
+
"changed tests/reactNativeMock.ts without a changed test importing it or explicit reviewer guidance.",
|
|
2794
|
+
);
|
|
2795
|
+
}
|
|
2796
|
+
}
|
|
2797
|
+
|
|
2798
|
+
if (changedPaths.some((path) => /(^|\/)node_modules(\/|$)/i.test(path))) {
|
|
2799
|
+
issues.push("attempted to publish node_modules changes; dependency installs must not become PR content.");
|
|
2800
|
+
}
|
|
2801
|
+
|
|
2802
|
+
return Array.from(new Set(issues));
|
|
2803
|
+
}
|
|
2804
|
+
|
|
2805
|
+
export function inferRepoNativeValidationCommands(repo: string, changedPaths: string[]): string[] {
|
|
2806
|
+
const packageJsonPath = resolve(repo, "package.json");
|
|
2807
|
+
if (!existsSync(packageJsonPath)) return [];
|
|
2808
|
+
|
|
2809
|
+
let packageJson: {
|
|
2810
|
+
scripts?: Record<string, unknown>;
|
|
2811
|
+
dependencies?: Record<string, unknown>;
|
|
2812
|
+
devDependencies?: Record<string, unknown>;
|
|
2813
|
+
} = {};
|
|
2814
|
+
try {
|
|
2815
|
+
packageJson = JSON.parse(readFileSync(packageJsonPath, "utf8"));
|
|
2816
|
+
} catch {
|
|
2817
|
+
return [];
|
|
2818
|
+
}
|
|
2819
|
+
|
|
2820
|
+
const scripts = packageJson.scripts ?? {};
|
|
2821
|
+
const dependencies = {
|
|
2822
|
+
...(packageJson.dependencies ?? {}),
|
|
2823
|
+
...(packageJson.devDependencies ?? {}),
|
|
2824
|
+
};
|
|
2825
|
+
const normalizedPaths = changedPaths.map((path) => path.replace(/\\/g, "/"));
|
|
2826
|
+
const hasNonDocChange = normalizedPaths.some((path) => !/\.(?:md|mdx|txt)$/i.test(path));
|
|
2827
|
+
const hasTsChange = normalizedPaths.some((path) => /\.[cm]?tsx?$/i.test(path));
|
|
2828
|
+
const commands: string[] = [];
|
|
2829
|
+
|
|
2830
|
+
if (hasTsChange) {
|
|
2831
|
+
if (typeof scripts.typecheck === "string" && scripts.typecheck.trim()) {
|
|
2832
|
+
commands.push("bun run typecheck");
|
|
2833
|
+
} else if (
|
|
2834
|
+
existsSync(resolve(repo, "tsconfig.json")) ||
|
|
2835
|
+
Object.prototype.hasOwnProperty.call(dependencies, "typescript")
|
|
2836
|
+
) {
|
|
2837
|
+
commands.push("bun x tsc --noEmit");
|
|
2838
|
+
}
|
|
2839
|
+
}
|
|
2840
|
+
|
|
2841
|
+
if (hasNonDocChange && typeof scripts.lint === "string" && scripts.lint.trim()) {
|
|
2842
|
+
commands.push("bun run lint");
|
|
2843
|
+
}
|
|
2844
|
+
|
|
2845
|
+
return dedupeValidationCommands(commands).slice(0, 4);
|
|
2846
|
+
}
|
|
2847
|
+
|
|
2419
2848
|
async function runDeterministicQualityGate(
|
|
2420
2849
|
repo: string,
|
|
2421
2850
|
params: Record<string, unknown>,
|
|
@@ -2485,6 +2914,16 @@ async function runDeterministicQualityGate(
|
|
|
2485
2914
|
if (!statusResult.ok) {
|
|
2486
2915
|
addScopeIssue("could not evaluate changed paths from git status.");
|
|
2487
2916
|
}
|
|
2917
|
+
for (const issue of collectPrePublishHygieneIssues({
|
|
2918
|
+
repo,
|
|
2919
|
+
changedPaths,
|
|
2920
|
+
instruction,
|
|
2921
|
+
targetPath,
|
|
2922
|
+
planning,
|
|
2923
|
+
reviewAgent: asRecord(params.reviewAgent ?? params.review_agent),
|
|
2924
|
+
})) {
|
|
2925
|
+
addScopeIssue(issue);
|
|
2926
|
+
}
|
|
2488
2927
|
for (const issue of collectWriteScopeIssuesFromChangedPaths(changedPaths, planning)) {
|
|
2489
2928
|
addScopeIssue(issue);
|
|
2490
2929
|
}
|
|
@@ -2525,6 +2964,8 @@ async function runDeterministicQualityGate(
|
|
|
2525
2964
|
planning,
|
|
2526
2965
|
changedTestPaths,
|
|
2527
2966
|
isTestTask,
|
|
2967
|
+
repo,
|
|
2968
|
+
changedPaths,
|
|
2528
2969
|
});
|
|
2529
2970
|
const validationRuns: ValidationExecutionResult[] = [];
|
|
2530
2971
|
const outputPolicy = outputPolicyForRuntime(runtimeConfig);
|
|
@@ -3076,10 +3517,16 @@ export function buildQualityRevisionHint(
|
|
|
3076
3517
|
validationRuns: ValidationExecutionResult[] = [],
|
|
3077
3518
|
validationBlocker: ValidationBlocker | null = null,
|
|
3078
3519
|
browserRepairPacket: BrowserValidationRepairPacket | null = null,
|
|
3520
|
+
changedPaths: string[] = [],
|
|
3079
3521
|
): string {
|
|
3080
3522
|
const lines: string[] = [];
|
|
3081
3523
|
lines.push("Quality revision required before completion.");
|
|
3082
3524
|
const focusedBrowserRepair = Boolean(browserRepairPacket);
|
|
3525
|
+
lines.push(
|
|
3526
|
+
"Worker phase contract: (1) discovering - inspect only the relevant files/artifacts and name the current hypothesis; (2) editing - make the smallest behavior-owning patch; (3) focused validation - run targeted fast checks; (4) full validation - let PushPals ValidationGate own long required checks unless a single local confirmation is explicitly useful; (5) final diff review - verify changed files are necessary and no unrelated churn remains.",
|
|
3527
|
+
);
|
|
3528
|
+
const diffBudgetWarning = buildDiffBudgetWarning(planning, changedPaths, focusedBrowserRepair);
|
|
3529
|
+
if (diffBudgetWarning) lines.push(diffBudgetWarning);
|
|
3083
3530
|
const validationAlreadyPassed =
|
|
3084
3531
|
validationRuns.length > 0 && validationRuns.every((run) => run.ok);
|
|
3085
3532
|
if (validationAlreadyPassed && !focusedBrowserRepair) {
|
|
@@ -3104,6 +3551,12 @@ export function buildQualityRevisionHint(
|
|
|
3104
3551
|
if (browserRepairPacket.failureFocus) {
|
|
3105
3552
|
lines.push(`- Failure focus: ${browserRepairPacket.failureFocus}`);
|
|
3106
3553
|
}
|
|
3554
|
+
if (browserRepairPacket.lastVerifiedStage) {
|
|
3555
|
+
lines.push(`- Last verified browser checkpoint: ${browserRepairPacket.lastVerifiedStage}`);
|
|
3556
|
+
}
|
|
3557
|
+
if (browserRepairPacket.pageUrl) {
|
|
3558
|
+
lines.push(`- Browser URL at failure: ${browserRepairPacket.pageUrl}`);
|
|
3559
|
+
}
|
|
3107
3560
|
if (browserRepairPacket.expected) {
|
|
3108
3561
|
lines.push(`- Expected UI: ${browserRepairPacket.expected}`);
|
|
3109
3562
|
}
|
|
@@ -3120,6 +3573,18 @@ export function buildQualityRevisionHint(
|
|
|
3120
3573
|
"- Failure artifacts: none were captured in command output; if this repo writes screenshots/traces, inspect the latest browser failure artifact before changing selectors.",
|
|
3121
3574
|
);
|
|
3122
3575
|
}
|
|
3576
|
+
if ((browserRepairPacket.artifactSummaries ?? []).length > 0) {
|
|
3577
|
+
lines.push("Latest browser artifact summaries:");
|
|
3578
|
+
for (const artifactSummary of browserRepairPacket.artifactSummaries ?? []) {
|
|
3579
|
+
lines.push(`- ${artifactSummary}`);
|
|
3580
|
+
}
|
|
3581
|
+
}
|
|
3582
|
+
if ((browserRepairPacket.knownFailureHints ?? []).length > 0) {
|
|
3583
|
+
lines.push("Known issue/remedy memory for this repo/job family:");
|
|
3584
|
+
for (const hint of browserRepairPacket.knownFailureHints ?? []) {
|
|
3585
|
+
lines.push(`- ${hint}`);
|
|
3586
|
+
}
|
|
3587
|
+
}
|
|
3123
3588
|
if (browserRepairPacket.digest) {
|
|
3124
3589
|
lines.push(`- Current failure: ${browserRepairPacket.digest}`);
|
|
3125
3590
|
}
|
|
@@ -3148,6 +3613,11 @@ export function buildQualityRevisionHint(
|
|
|
3148
3613
|
} else {
|
|
3149
3614
|
lines.push("- Breadcrumb: first captured failure for this command in this revision loop");
|
|
3150
3615
|
}
|
|
3616
|
+
if (browserRepairPacket.mustReadArtifactsBeforeEdit) {
|
|
3617
|
+
lines.push(
|
|
3618
|
+
"- Diagnostic artifact read requirement: before editing, explicitly inspect the listed latest artifact/log/DOM summary for the failing stage. If the artifacts are missing, stale, or stop before the failing locator, add a tiny temporary diagnostic/log for locator counts, visible text, URL, and nearby DOM/test-id state before changing product code or selectors.",
|
|
3619
|
+
);
|
|
3620
|
+
}
|
|
3151
3621
|
if (browserRepairPacket.needsDiagnosticProbe) {
|
|
3152
3622
|
lines.push(
|
|
3153
3623
|
"- Convergence mode: diagnostic-first repair. This same browser focus failed in the previous revision, so do not guess another selector or rewrite a different stage.",
|
|
@@ -3329,7 +3799,7 @@ export function buildQualityRevisionHint(
|
|
|
3329
3799
|
for (const step of planning.requiredValidationSteps ?? []) lines.push(`- ${step}`);
|
|
3330
3800
|
}
|
|
3331
3801
|
lines.push("Apply a minimal corrective patch, run focused validation, then finish.");
|
|
3332
|
-
return lines.join("\n").slice(0,
|
|
3802
|
+
return lines.join("\n").slice(0, 8000);
|
|
3333
3803
|
}
|
|
3334
3804
|
|
|
3335
3805
|
function inferTargetPathFromInstruction(text: string): string | null {
|
|
@@ -6086,6 +6556,7 @@ export async function executeJob(
|
|
|
6086
6556
|
let revisionAttempt = 0;
|
|
6087
6557
|
let revisionHint = "";
|
|
6088
6558
|
const previousValidationFailureDigests = new Map<string, string>();
|
|
6559
|
+
const failureJobFamily = buildTaskFailureJobFamily(normalizedParams);
|
|
6089
6560
|
while (revisionAttempt <= qualityRevisionLoopMax) {
|
|
6090
6561
|
const attemptParams: Record<string, unknown> = { ...normalizedParams };
|
|
6091
6562
|
if (revisionHint) {
|
|
@@ -6185,11 +6656,19 @@ export async function executeJob(
|
|
|
6185
6656
|
revisionAttempt,
|
|
6186
6657
|
},
|
|
6187
6658
|
);
|
|
6188
|
-
|
|
6659
|
+
let browserRepairPacket = buildBrowserValidationRepairPacket(
|
|
6189
6660
|
quality.validationRuns,
|
|
6190
6661
|
previousValidationFailureDigests,
|
|
6191
6662
|
repo,
|
|
6192
6663
|
);
|
|
6664
|
+
if (browserRepairPacket) {
|
|
6665
|
+
const knownFailureHints = knownFailureHintsForPacket(repo, failureJobFamily, browserRepairPacket);
|
|
6666
|
+
browserRepairPacket = {
|
|
6667
|
+
...browserRepairPacket,
|
|
6668
|
+
knownFailureHints,
|
|
6669
|
+
};
|
|
6670
|
+
recordBrowserFailureMemory(repo, failureJobFamily, browserRepairPacket);
|
|
6671
|
+
}
|
|
6193
6672
|
for (const run of quality.validationRuns) {
|
|
6194
6673
|
if (run.ok) continue;
|
|
6195
6674
|
const digest = extractValidationFailureRetryDigest(run, repo);
|
|
@@ -6464,6 +6943,7 @@ export async function executeJob(
|
|
|
6464
6943
|
validationOutsideTaskScope ? [] : quality.validationRuns,
|
|
6465
6944
|
validationOutsideTaskScope ? null : quality.blocker,
|
|
6466
6945
|
validationOutsideTaskScope ? null : browserRepairPacket,
|
|
6946
|
+
quality.changedPaths,
|
|
6467
6947
|
);
|
|
6468
6948
|
onLog?.(
|
|
6469
6949
|
"stderr",
|