@pushpalsdev/cli 1.0.85 → 1.0.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -57,6 +57,8 @@ Constraints:
|
|
|
57
57
|
- `feature_hypotheses` may contain any suitable product/engineering features; keep each item concise and actionable.
|
|
58
58
|
- target_paths must be literal repo-relative paths.
|
|
59
59
|
- write_globs must be repo-relative globs.
|
|
60
|
+
- Choose target_paths that own the behavior being improved, not thin route wrappers, re-export files, or shell components, unless the requested change is explicitly at that wrapper boundary.
|
|
61
|
+
- For UI/game/product-surface objectives, prefer files that render or compute the relevant state directly; use wrapper files only for navigation, mounting, or screen-level chrome work.
|
|
60
62
|
- do not invent evidence ids.
|
|
61
63
|
- If all signals are low/noisy, it is valid to return zero candidates.
|
|
62
64
|
- Treat a low `sig_queue_health` value as maintenance-window evidence for safe proactive work, not only incident response.
|
|
@@ -65,7 +65,7 @@ export interface TaskExecutePlanning {
|
|
|
65
65
|
finalizationBudgetMs: number;
|
|
66
66
|
}
|
|
67
67
|
|
|
68
|
-
interface ValidationExecutionResult {
|
|
68
|
+
export interface ValidationExecutionResult {
|
|
69
69
|
step: string;
|
|
70
70
|
command: string;
|
|
71
71
|
ok: boolean;
|
|
@@ -91,6 +91,7 @@ interface DeterministicQualityResult {
|
|
|
91
91
|
validationRuns: ValidationExecutionResult[];
|
|
92
92
|
requiredValidationFailures: string[];
|
|
93
93
|
blocker: ValidationBlocker | null;
|
|
94
|
+
validationFailureScope: "none" | "task_scope" | "outside_task_scope";
|
|
94
95
|
}
|
|
95
96
|
|
|
96
97
|
interface CriticReview {
|
|
@@ -137,9 +138,11 @@ export function shouldReviseRequiredValidationBlocker(opts: {
|
|
|
137
138
|
blocker: ValidationBlocker | null;
|
|
138
139
|
revisionAttempt: number;
|
|
139
140
|
maxAutoRevisions: number;
|
|
141
|
+
outsideTaskScope?: boolean;
|
|
140
142
|
}): boolean {
|
|
141
143
|
if (opts.requiredValidationFailures.length === 0) return false;
|
|
142
144
|
if (!opts.blocker) return false;
|
|
145
|
+
if (opts.outsideTaskScope) return false;
|
|
143
146
|
if (opts.blocker.category !== "repo") return false;
|
|
144
147
|
return opts.revisionAttempt < opts.maxAutoRevisions;
|
|
145
148
|
}
|
|
@@ -615,7 +618,8 @@ async function runValidationCommand(
|
|
|
615
618
|
timeoutMs: number,
|
|
616
619
|
outputPolicy: Partial<OutputCompactionPolicy>,
|
|
617
620
|
): Promise<ValidationExecutionResult> {
|
|
618
|
-
const
|
|
621
|
+
const env = buildWorkerSandboxWritableEnv(repo);
|
|
622
|
+
const argv = prepareValidationCommandArgv(command, env);
|
|
619
623
|
if (!argv) {
|
|
620
624
|
return {
|
|
621
625
|
step: command,
|
|
@@ -631,7 +635,7 @@ async function runValidationCommand(
|
|
|
631
635
|
const startedAt = Date.now();
|
|
632
636
|
const proc = Bun.spawn(argv, {
|
|
633
637
|
cwd: repo,
|
|
634
|
-
env
|
|
638
|
+
env,
|
|
635
639
|
stdout: "pipe",
|
|
636
640
|
stderr: "pipe",
|
|
637
641
|
});
|
|
@@ -696,6 +700,35 @@ export function resolveValidationCommandTimeoutMs(command: string, baseTimeoutMs
|
|
|
696
700
|
return Math.max(normalizedBase, 600_000);
|
|
697
701
|
}
|
|
698
702
|
|
|
703
|
+
function commandHasPortArg(argv: string[]): boolean {
|
|
704
|
+
return argv.some((token) => token === "--port" || token.startsWith("--port="));
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
function shouldInjectBrowserValidationPort(command: string, argv: string[]): boolean {
|
|
708
|
+
if (commandHasPortArg(argv)) return false;
|
|
709
|
+
if (!isLongRunningBrowserValidationCommand(command)) return false;
|
|
710
|
+
return /\b(web:e2e|e2e:web|browser:e2e|smoke:web|web:smoke|browser:smoke)\b/.test(
|
|
711
|
+
validationCommandKey(command),
|
|
712
|
+
);
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
export function prepareValidationCommandArgv(
|
|
716
|
+
command: string,
|
|
717
|
+
env: Record<string, string>,
|
|
718
|
+
): string[] | null {
|
|
719
|
+
const argv = tokenizeValidationCommandArgv(command);
|
|
720
|
+
if (!argv) return null;
|
|
721
|
+
const port = String(env.EXPO_DEV_SERVER_PORT ?? "").trim();
|
|
722
|
+
if (!port || !shouldInjectBrowserValidationPort(command, argv)) return argv;
|
|
723
|
+
return [...argv, "--", "--port", port];
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
function isBrowserValidationInfrastructureDigest(digest: string): boolean {
|
|
727
|
+
return /\b(ERR_SOCKET_BAD_PORT|EADDRINUSE|ECONNREFUSED|ECONNRESET|ETIMEDOUT|timed out|timeout|port|browser runtime|playwright install|executable doesn't exist)\b/i.test(
|
|
728
|
+
digest,
|
|
729
|
+
);
|
|
730
|
+
}
|
|
731
|
+
|
|
699
732
|
interface ToolAvailabilityResult {
|
|
700
733
|
requirement: ToolRequirement;
|
|
701
734
|
ok: boolean;
|
|
@@ -802,6 +835,96 @@ function extractPreparedMergeConflictPaths(params: Record<string, unknown>): str
|
|
|
802
835
|
.filter(Boolean);
|
|
803
836
|
}
|
|
804
837
|
|
|
838
|
+
function normalizeValidationPathToken(value: string): string | null {
|
|
839
|
+
const normalized = value
|
|
840
|
+
.trim()
|
|
841
|
+
.replace(/^['"`(<[]+/, "")
|
|
842
|
+
.replace(/[>'"`)\],.;:]+$/, "")
|
|
843
|
+
.replace(/\\/g, "/")
|
|
844
|
+
.replace(/^\.\/+/, "")
|
|
845
|
+
.replace(/\/+/g, "/");
|
|
846
|
+
if (!normalized || normalized.startsWith("../") || normalized.includes("/../")) return null;
|
|
847
|
+
if (!/[./]/.test(normalized)) return null;
|
|
848
|
+
if (/^(https?|file):/i.test(normalized)) return null;
|
|
849
|
+
return normalized;
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
function extractPathTokensFromValidationOutput(value: string): string[] {
|
|
853
|
+
const seen = new Set<string>();
|
|
854
|
+
const out: string[] = [];
|
|
855
|
+
const add = (raw: string | undefined) => {
|
|
856
|
+
if (!raw) return;
|
|
857
|
+
const normalized = normalizeValidationPathToken(raw);
|
|
858
|
+
if (!normalized || seen.has(normalized)) return;
|
|
859
|
+
seen.add(normalized);
|
|
860
|
+
out.push(normalized);
|
|
861
|
+
};
|
|
862
|
+
const normalized = stripAnsiControlSequences(value);
|
|
863
|
+
for (const match of normalized.matchAll(/[A-Za-z0-9_.@-]+(?:\/[A-Za-z0-9_.@-]+)+(?:\.[A-Za-z0-9_.-]+)?/g)) {
|
|
864
|
+
add(match[0]);
|
|
865
|
+
}
|
|
866
|
+
for (const match of normalized.matchAll(/(?:from|in|at)\s+['"`]?([^'"`\s]+\/[^'"`\s]+)['"`]?/gi)) {
|
|
867
|
+
add(match[1]);
|
|
868
|
+
}
|
|
869
|
+
return out;
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
function literalScopePrefix(value: string): string | null {
|
|
873
|
+
const normalized = normalizeValidationPathToken(value.replace(/\*\*?.*$/, "").replace(/\/+$/, ""));
|
|
874
|
+
if (!normalized || normalized === ".") return null;
|
|
875
|
+
return normalized;
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
function pathMatchesScopeHint(path: string, hint: string): boolean {
|
|
879
|
+
const normalizedPath = normalizeValidationPathToken(path);
|
|
880
|
+
const normalizedHint = hint.trim().replace(/\\/g, "/").replace(/^\.\/+/, "");
|
|
881
|
+
if (!normalizedPath || !normalizedHint) return false;
|
|
882
|
+
if (matchesGlob(normalizedPath, normalizedHint)) return true;
|
|
883
|
+
const prefix = literalScopePrefix(normalizedHint);
|
|
884
|
+
if (!prefix) return false;
|
|
885
|
+
return normalizedPath === prefix || normalizedPath.startsWith(`${prefix}/`);
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
export function classifyValidationFailureScope(
|
|
889
|
+
runs: ValidationExecutionResult[],
|
|
890
|
+
planning: TaskExecutePlanning,
|
|
891
|
+
changedPaths: string[],
|
|
892
|
+
targetPath?: string,
|
|
893
|
+
): "none" | "task_scope" | "outside_task_scope" {
|
|
894
|
+
const failedRuns = runs.filter((run) => !run.ok && run.exitCode !== 127);
|
|
895
|
+
if (failedRuns.length === 0) return "none";
|
|
896
|
+
const scopeHints = [
|
|
897
|
+
targetPath ?? "",
|
|
898
|
+
...changedPaths,
|
|
899
|
+
...(planning.targetPaths ?? []),
|
|
900
|
+
...(planning.scope.writeGlobs ?? []),
|
|
901
|
+
]
|
|
902
|
+
.map((entry) => entry.trim().replace(/\\/g, "/"))
|
|
903
|
+
.filter(Boolean);
|
|
904
|
+
if (scopeHints.length === 0) return "none";
|
|
905
|
+
|
|
906
|
+
const combined = failedRuns
|
|
907
|
+
.flatMap((run) => [run.stdout, run.stderr])
|
|
908
|
+
.filter(Boolean)
|
|
909
|
+
.join("\n");
|
|
910
|
+
const lowerCombined = combined.toLowerCase().replace(/\\/g, "/");
|
|
911
|
+
for (const hint of scopeHints) {
|
|
912
|
+
const normalized = literalScopePrefix(hint);
|
|
913
|
+
if (normalized && normalized.length >= 4 && lowerCombined.includes(normalized.toLowerCase())) {
|
|
914
|
+
return "task_scope";
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
const pathTokens = extractPathTokensFromValidationOutput(combined).filter(
|
|
919
|
+
(token) => !/^(node_modules|\.bun|bun|npm|pnpm|yarn)\//i.test(token),
|
|
920
|
+
);
|
|
921
|
+
if (pathTokens.length === 0) return "none";
|
|
922
|
+
if (pathTokens.some((token) => scopeHints.some((hint) => pathMatchesScopeHint(token, hint)))) {
|
|
923
|
+
return "task_scope";
|
|
924
|
+
}
|
|
925
|
+
return "outside_task_scope";
|
|
926
|
+
}
|
|
927
|
+
|
|
805
928
|
function detectValidationBlocker(runs: ValidationExecutionResult[]): ValidationBlocker | null {
|
|
806
929
|
const combined = runs
|
|
807
930
|
.flatMap((run) => [run.stdout, run.stderr])
|
|
@@ -982,7 +1105,51 @@ function extractRunnableValidationCommand(step: string): string | null {
|
|
|
982
1105
|
}
|
|
983
1106
|
|
|
984
1107
|
function validationCommandKey(command: string): string {
|
|
985
|
-
|
|
1108
|
+
const argv = tokenizeValidationCommandArgv(command);
|
|
1109
|
+
if (argv && argv.length > 0) {
|
|
1110
|
+
const normalized = argv.map((entry) => entry.trim()).filter(Boolean);
|
|
1111
|
+
if (normalized[0]?.toLowerCase() === "bunx") {
|
|
1112
|
+
normalized.splice(0, 1, "bun", "x");
|
|
1113
|
+
}
|
|
1114
|
+
return normalized.join(" ").replace(/\s+/g, " ").toLowerCase();
|
|
1115
|
+
}
|
|
1116
|
+
return command
|
|
1117
|
+
.trim()
|
|
1118
|
+
.replace(/\s+/g, " ")
|
|
1119
|
+
.replace(/^bunx\b/i, "bun x")
|
|
1120
|
+
.toLowerCase();
|
|
1121
|
+
}
|
|
1122
|
+
|
|
1123
|
+
export function extractValidationFailureDigest(run: {
|
|
1124
|
+
exitCode?: number;
|
|
1125
|
+
stdout?: string;
|
|
1126
|
+
stderr?: string;
|
|
1127
|
+
elapsedMs?: number;
|
|
1128
|
+
}): string {
|
|
1129
|
+
const combined = stripAnsiControlSequences([run.stderr, run.stdout].filter(Boolean).join("\n"));
|
|
1130
|
+
const patterns = [
|
|
1131
|
+
/\bCannot find module\s+['"`][^'"`\r\n]+['"`][^\r\n]*/i,
|
|
1132
|
+
/\bFailed to resolve import\s+['"`][^'"`\r\n]+['"`][^\r\n]*/i,
|
|
1133
|
+
/\bCould not resolve\s+['"`]?[^'"`\r\n]+['"`]?[^\r\n]*/i,
|
|
1134
|
+
/\bModule not found[^\r\n]*/i,
|
|
1135
|
+
/\bERR_SOCKET_BAD_PORT[^\r\n]*/i,
|
|
1136
|
+
/\berror TS\d+:[^\r\n]*/i,
|
|
1137
|
+
/\bError:\s+[^\r\n]*/i,
|
|
1138
|
+
];
|
|
1139
|
+
for (const pattern of patterns) {
|
|
1140
|
+
const match = combined.match(pattern);
|
|
1141
|
+
if (match?.[0]) return toSingleLine(match[0], 180);
|
|
1142
|
+
}
|
|
1143
|
+
const firstMeaningfulLine = combined
|
|
1144
|
+
.split(/\r?\n/)
|
|
1145
|
+
.map((line) => line.trim())
|
|
1146
|
+
.find((line) => /\b(error|failed|cannot|could not|timeout|timed out)\b/i.test(line));
|
|
1147
|
+
if (firstMeaningfulLine) return toSingleLine(firstMeaningfulLine, 180);
|
|
1148
|
+
if (Number(run.exitCode) === 124) {
|
|
1149
|
+
const elapsed = Number.isFinite(Number(run.elapsedMs)) ? ` after ${Number(run.elapsedMs)}ms` : "";
|
|
1150
|
+
return `timed out${elapsed}`;
|
|
1151
|
+
}
|
|
1152
|
+
return "";
|
|
986
1153
|
}
|
|
987
1154
|
|
|
988
1155
|
export function collectRequiredValidationFailures(
|
|
@@ -995,7 +1162,8 @@ export function collectRequiredValidationFailures(
|
|
|
995
1162
|
.filter((run) => requiredKeys.has(validationCommandKey(run.command)) && !run.ok)
|
|
996
1163
|
.map((run) => {
|
|
997
1164
|
const exitCode = Number.isFinite(Number(run.exitCode)) ? Number(run.exitCode) : "unknown";
|
|
998
|
-
|
|
1165
|
+
const digest = extractValidationFailureDigest(run);
|
|
1166
|
+
return `${run.command} exited ${exitCode}${digest ? ` (${digest})` : ""}`;
|
|
999
1167
|
});
|
|
1000
1168
|
}
|
|
1001
1169
|
|
|
@@ -1055,7 +1223,7 @@ function dedupeValidationCommands(...groups: string[][]): string[] {
|
|
|
1055
1223
|
for (const command of group) {
|
|
1056
1224
|
const trimmed = command.trim();
|
|
1057
1225
|
if (!trimmed) continue;
|
|
1058
|
-
const key = trimmed
|
|
1226
|
+
const key = validationCommandKey(trimmed);
|
|
1059
1227
|
if (seen.has(key)) continue;
|
|
1060
1228
|
seen.add(key);
|
|
1061
1229
|
out.push(trimmed);
|
|
@@ -1160,14 +1328,19 @@ export function inferFallbackValidationCommandsForTestTask(
|
|
|
1160
1328
|
return candidates.slice(0, 4);
|
|
1161
1329
|
}
|
|
1162
1330
|
|
|
1163
|
-
function isTestFocusedTask(
|
|
1331
|
+
export function isTestFocusedTask(
|
|
1164
1332
|
instruction: string,
|
|
1165
1333
|
planning: TaskExecutePlanning,
|
|
1166
1334
|
targetPath?: string,
|
|
1167
1335
|
): boolean {
|
|
1168
1336
|
const lowerInstruction = instruction.toLowerCase();
|
|
1169
1337
|
if (
|
|
1170
|
-
/\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b/.test(
|
|
1338
|
+
/\b(add|write|create|update|extend|expand|harden|improve|refactor|move|extract|fix)\b.{0,80}\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b/.test(
|
|
1339
|
+
lowerInstruction,
|
|
1340
|
+
) ||
|
|
1341
|
+
/\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b.{0,80}\b(add|write|create|update|extend|expand|harden|improve|refactor|move|extract|fix)\b/.test(
|
|
1342
|
+
lowerInstruction,
|
|
1343
|
+
)
|
|
1171
1344
|
) {
|
|
1172
1345
|
return true;
|
|
1173
1346
|
}
|
|
@@ -1179,7 +1352,9 @@ function isTestFocusedTask(
|
|
|
1179
1352
|
if (pathHints.some((entry) => isLikelyTestPath(entry))) return true;
|
|
1180
1353
|
if (
|
|
1181
1354
|
planning.acceptanceCriteria.some((entry) =>
|
|
1182
|
-
/\b(test|tests|coverage|unit|integration|
|
|
1355
|
+
/\b(add|write|create|update|extend|expand|harden|improve|refactor|move|extract|fix)\b.{0,80}\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b/i.test(
|
|
1356
|
+
entry,
|
|
1357
|
+
),
|
|
1183
1358
|
)
|
|
1184
1359
|
) {
|
|
1185
1360
|
return true;
|
|
@@ -1217,6 +1392,10 @@ async function runDeterministicQualityGate(
|
|
|
1217
1392
|
runtimeConfig: WorkerpalsRuntimeConfig,
|
|
1218
1393
|
qualityGatePolicy: QualityGatePolicy,
|
|
1219
1394
|
onLog?: (stream: "stdout" | "stderr", line: string) => void,
|
|
1395
|
+
validationRetryState?: {
|
|
1396
|
+
previousFailureDigests?: Map<string, string>;
|
|
1397
|
+
revisionAttempt?: number;
|
|
1398
|
+
},
|
|
1220
1399
|
): Promise<DeterministicQualityResult> {
|
|
1221
1400
|
const instruction = String(params.instruction ?? "");
|
|
1222
1401
|
const targetPath = String(params.targetPath ?? params.path ?? "").trim() || undefined;
|
|
@@ -1245,6 +1424,7 @@ async function runDeterministicQualityGate(
|
|
|
1245
1424
|
validationRuns: [],
|
|
1246
1425
|
requiredValidationFailures: [],
|
|
1247
1426
|
blocker: null,
|
|
1427
|
+
validationFailureScope: "none",
|
|
1248
1428
|
};
|
|
1249
1429
|
}
|
|
1250
1430
|
|
|
@@ -1394,6 +1574,33 @@ async function runDeterministicQualityGate(
|
|
|
1394
1574
|
);
|
|
1395
1575
|
continue;
|
|
1396
1576
|
}
|
|
1577
|
+
const previousDigest = validationRetryState?.previousFailureDigests?.get(
|
|
1578
|
+
validationCommandKey(command),
|
|
1579
|
+
);
|
|
1580
|
+
if (
|
|
1581
|
+
previousDigest &&
|
|
1582
|
+
Number(validationRetryState?.revisionAttempt ?? 0) > 0 &&
|
|
1583
|
+
isLongRunningBrowserValidationCommand(command) &&
|
|
1584
|
+
isBrowserValidationInfrastructureDigest(previousDigest)
|
|
1585
|
+
) {
|
|
1586
|
+
const stderr =
|
|
1587
|
+
`Skipped repeated browser validation after the same command failed in an earlier revision: ${previousDigest}. ` +
|
|
1588
|
+
"Run it once after the underlying blocker changes.";
|
|
1589
|
+
validationRuns.push({
|
|
1590
|
+
step: command,
|
|
1591
|
+
command,
|
|
1592
|
+
ok: false,
|
|
1593
|
+
exitCode: 124,
|
|
1594
|
+
stdout: "",
|
|
1595
|
+
stderr,
|
|
1596
|
+
elapsedMs: 1,
|
|
1597
|
+
});
|
|
1598
|
+
onLog?.(
|
|
1599
|
+
"stderr",
|
|
1600
|
+
`[ValidationGate] Skipped repeated long browser validation: ${command} (${previousDigest})`,
|
|
1601
|
+
);
|
|
1602
|
+
continue;
|
|
1603
|
+
}
|
|
1397
1604
|
onLog?.("stdout", `[ValidationGate] Running "${command}"`);
|
|
1398
1605
|
const run = await runValidationCommand(
|
|
1399
1606
|
repo,
|
|
@@ -1402,7 +1609,8 @@ async function runDeterministicQualityGate(
|
|
|
1402
1609
|
outputPolicy,
|
|
1403
1610
|
);
|
|
1404
1611
|
validationRuns.push(run);
|
|
1405
|
-
const
|
|
1612
|
+
const digest = run.ok ? "" : extractValidationFailureDigest(run);
|
|
1613
|
+
const runSummary = `[ValidationGate] ${run.ok ? "Passed" : "Failed"} (${run.elapsedMs}ms, exit ${run.exitCode}): ${command}${digest ? ` - ${digest}` : ""}`;
|
|
1406
1614
|
onLog?.(run.ok ? "stdout" : "stderr", runSummary);
|
|
1407
1615
|
}
|
|
1408
1616
|
// exit 127 = command not found: separate tool-availability issues from real test failures.
|
|
@@ -1442,6 +1650,15 @@ async function runDeterministicQualityGate(
|
|
|
1442
1650
|
const blocker = qualityGatePolicy.validationGateEnabled
|
|
1443
1651
|
? detectValidationBlocker(validationRuns)
|
|
1444
1652
|
: null;
|
|
1653
|
+
const scopedValidationFailure = qualityGatePolicy.validationGateEnabled
|
|
1654
|
+
? classifyValidationFailureScope(validationRuns, planning, changedPaths, targetPath)
|
|
1655
|
+
: "none";
|
|
1656
|
+
if (scopedValidationFailure === "outside_task_scope") {
|
|
1657
|
+
onLog?.(
|
|
1658
|
+
"stderr",
|
|
1659
|
+
"[ValidationGate] Required validation failures appear outside the task write scope; treating them as publish blockers, not repair instructions.",
|
|
1660
|
+
);
|
|
1661
|
+
}
|
|
1445
1662
|
|
|
1446
1663
|
return {
|
|
1447
1664
|
ok: issues.length === 0 && blocker === null,
|
|
@@ -1454,6 +1671,7 @@ async function runDeterministicQualityGate(
|
|
|
1454
1671
|
validationRuns,
|
|
1455
1672
|
requiredValidationFailures,
|
|
1456
1673
|
blocker,
|
|
1674
|
+
validationFailureScope: scopedValidationFailure,
|
|
1457
1675
|
};
|
|
1458
1676
|
}
|
|
1459
1677
|
|
|
@@ -3460,9 +3678,10 @@ async function generateCommitMessageFromDiffViaCodex(
|
|
|
3460
3678
|
repo: string,
|
|
3461
3679
|
runtimeConfig: WorkerpalsRuntimeConfig,
|
|
3462
3680
|
): Promise<string | null> {
|
|
3681
|
+
const model = runtimeConfig.workerpals.llm.model.trim();
|
|
3682
|
+
if (!model) return null;
|
|
3463
3683
|
const codexPrefix = await resolveCodexCommandPrefix(repo, runtimeConfig.workerpals.llm.codexBin);
|
|
3464
3684
|
if (!codexPrefix) return null;
|
|
3465
|
-
const model = runtimeConfig.workerpals.llm.model.trim();
|
|
3466
3685
|
const timeoutMs = (() => {
|
|
3467
3686
|
const value = Number(runtimeConfig.workerpals.llm.codexTimeoutMs);
|
|
3468
3687
|
if (!Number.isFinite(value)) return 120_000;
|
|
@@ -4368,6 +4587,7 @@ export async function executeJob(
|
|
|
4368
4587
|
|
|
4369
4588
|
let revisionAttempt = 0;
|
|
4370
4589
|
let revisionHint = "";
|
|
4590
|
+
const previousValidationFailureDigests = new Map<string, string>();
|
|
4371
4591
|
while (revisionAttempt <= qualityRevisionLoopMax) {
|
|
4372
4592
|
const attemptParams: Record<string, unknown> = { ...normalizedParams };
|
|
4373
4593
|
if (revisionHint) {
|
|
@@ -4462,30 +4682,66 @@ export async function executeJob(
|
|
|
4462
4682
|
runtimeConfig,
|
|
4463
4683
|
qualityGatePolicy,
|
|
4464
4684
|
onLog,
|
|
4685
|
+
{
|
|
4686
|
+
previousFailureDigests: previousValidationFailureDigests,
|
|
4687
|
+
revisionAttempt,
|
|
4688
|
+
},
|
|
4465
4689
|
);
|
|
4690
|
+
for (const run of quality.validationRuns) {
|
|
4691
|
+
if (run.ok) continue;
|
|
4692
|
+
const digest = extractValidationFailureDigest(run);
|
|
4693
|
+
if (digest) previousValidationFailureDigests.set(validationCommandKey(run.command), digest);
|
|
4694
|
+
}
|
|
4695
|
+
const validationOutsideTaskScope =
|
|
4696
|
+
quality.validationFailureScope === "outside_task_scope";
|
|
4697
|
+
const qualityForCritic: DeterministicQualityResult = validationOutsideTaskScope
|
|
4698
|
+
? {
|
|
4699
|
+
...quality,
|
|
4700
|
+
issues: quality.issues.filter((issue) => !issue.startsWith("ValidationGate:")),
|
|
4701
|
+
validationIssues: [],
|
|
4702
|
+
validationRuns: [],
|
|
4703
|
+
blocker: null,
|
|
4704
|
+
}
|
|
4705
|
+
: quality;
|
|
4466
4706
|
const critic =
|
|
4467
4707
|
quality.skipped || !qualityGatePolicy.criticGateEnabled
|
|
4468
4708
|
? null
|
|
4469
4709
|
: executor === "openai_codex"
|
|
4470
|
-
? await runCodexCriticReview(repo, attemptParams,
|
|
4471
|
-
: await runTaskCriticReview(repo, attemptParams,
|
|
4710
|
+
? await runCodexCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog)
|
|
4711
|
+
: await runTaskCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog);
|
|
4472
4712
|
if (!qualityGatePolicy.criticGateEnabled) {
|
|
4473
4713
|
onLog?.("stdout", "[CriticGate] Disabled by workerpals.quality_critic_gate_enabled=false.");
|
|
4474
4714
|
}
|
|
4475
|
-
const
|
|
4715
|
+
const advisoryRelaxedQualityIssues = relaxAdvisoryQualityIssues(
|
|
4476
4716
|
quality.issues,
|
|
4477
4717
|
quality.validationRuns,
|
|
4478
4718
|
critic,
|
|
4479
4719
|
qualityCriticMinScore,
|
|
4480
4720
|
);
|
|
4481
|
-
|
|
4721
|
+
let effectiveQualityIssues = advisoryRelaxedQualityIssues;
|
|
4722
|
+
if (validationOutsideTaskScope) {
|
|
4723
|
+
effectiveQualityIssues = effectiveQualityIssues.filter(
|
|
4724
|
+
(issue) => !issue.startsWith("ValidationGate:"),
|
|
4725
|
+
);
|
|
4726
|
+
if (effectiveQualityIssues.length !== quality.issues.length) {
|
|
4727
|
+
onLog?.(
|
|
4728
|
+
"stderr",
|
|
4729
|
+
"[ValidationGate] Validation failures are outside the task scope; they will block publishing but will not drive another code revision.",
|
|
4730
|
+
);
|
|
4731
|
+
}
|
|
4732
|
+
}
|
|
4733
|
+
if (
|
|
4734
|
+
!validationOutsideTaskScope &&
|
|
4735
|
+
advisoryRelaxedQualityIssues.length !== quality.issues.length
|
|
4736
|
+
) {
|
|
4482
4737
|
onLog?.(
|
|
4483
4738
|
"stdout",
|
|
4484
4739
|
"[QualityGate] Assertion-balance heuristic downgraded to advisory because validation passed and critic score met threshold.",
|
|
4485
4740
|
);
|
|
4486
4741
|
}
|
|
4487
4742
|
const deterministicRequiresRevision =
|
|
4488
|
-
effectiveQualityIssues.length > 0 ||
|
|
4743
|
+
effectiveQualityIssues.length > 0 ||
|
|
4744
|
+
(quality.blocker !== null && !validationOutsideTaskScope);
|
|
4489
4745
|
const criticRequiresRevision = Boolean(critic && critic.score < qualityCriticMinScore);
|
|
4490
4746
|
if (
|
|
4491
4747
|
!qualityGatePolicy.publishGateEnabled &&
|
|
@@ -4513,6 +4769,29 @@ export async function executeJob(
|
|
|
4513
4769
|
}
|
|
4514
4770
|
|
|
4515
4771
|
if (!deterministicRequiresRevision && !criticRequiresRevision) {
|
|
4772
|
+
if (quality.requiredValidationFailures.length > 0) {
|
|
4773
|
+
const requiredSummary = `Required vision.md validation blocked publishing: ${quality.requiredValidationFailures.join("; ")}`;
|
|
4774
|
+
const diagnostics = truncate(
|
|
4775
|
+
[
|
|
4776
|
+
result.stderr ?? "",
|
|
4777
|
+
validationOutsideTaskScope
|
|
4778
|
+
? "Validation failures appear outside the task write scope and are treated as pre-existing repo blockers."
|
|
4779
|
+
: "",
|
|
4780
|
+
...quality.validationRuns.flatMap((run) => [run.stdout, run.stderr]).filter(Boolean),
|
|
4781
|
+
]
|
|
4782
|
+
.filter(Boolean)
|
|
4783
|
+
.join("\n"),
|
|
4784
|
+
outputPolicyForRuntime(runtimeConfig),
|
|
4785
|
+
);
|
|
4786
|
+
onLog?.("stderr", `[QualityGate] ${requiredSummary}`);
|
|
4787
|
+
return {
|
|
4788
|
+
ok: false,
|
|
4789
|
+
summary: requiredSummary,
|
|
4790
|
+
stdout: result.stdout,
|
|
4791
|
+
stderr: diagnostics,
|
|
4792
|
+
exitCode: 4,
|
|
4793
|
+
};
|
|
4794
|
+
}
|
|
4516
4795
|
if (critic) {
|
|
4517
4796
|
onLog?.(
|
|
4518
4797
|
"stdout",
|
|
@@ -4538,11 +4817,13 @@ export async function executeJob(
|
|
|
4538
4817
|
const activeMaxAutoRevisions = revisionLimitForQualityGateFailures({
|
|
4539
4818
|
policy: qualityGatePolicy,
|
|
4540
4819
|
qualityIssues: effectiveQualityIssues,
|
|
4541
|
-
requiredValidationFailures:
|
|
4542
|
-
|
|
4820
|
+
requiredValidationFailures: validationOutsideTaskScope
|
|
4821
|
+
? []
|
|
4822
|
+
: quality.requiredValidationFailures,
|
|
4823
|
+
blocker: validationOutsideTaskScope ? null : quality.blocker,
|
|
4543
4824
|
});
|
|
4544
4825
|
const issueSummary = issues.map((entry) => toSingleLine(entry, 180)).join(" | ");
|
|
4545
|
-
if (quality.blocker) {
|
|
4826
|
+
if (quality.blocker && !validationOutsideTaskScope) {
|
|
4546
4827
|
const blockerSummary = `Quality gate blocked by ${quality.blocker.category} issue: ${quality.blocker.detail}`;
|
|
4547
4828
|
const blockerDiagnostics = truncate(
|
|
4548
4829
|
[
|
|
@@ -4556,6 +4837,7 @@ export async function executeJob(
|
|
|
4556
4837
|
blocker: quality.blocker,
|
|
4557
4838
|
revisionAttempt,
|
|
4558
4839
|
maxAutoRevisions: qualityValidationMaxAutoRevisions,
|
|
4840
|
+
outsideTaskScope: validationOutsideTaskScope,
|
|
4559
4841
|
});
|
|
4560
4842
|
if (requiredValidationCanRevise) {
|
|
4561
4843
|
onLog?.(
|
|
@@ -4669,8 +4951,8 @@ export async function executeJob(
|
|
|
4669
4951
|
critic,
|
|
4670
4952
|
planning,
|
|
4671
4953
|
reviewFixContext,
|
|
4672
|
-
quality.validationRuns,
|
|
4673
|
-
quality.blocker,
|
|
4954
|
+
validationOutsideTaskScope ? [] : quality.validationRuns,
|
|
4955
|
+
validationOutsideTaskScope ? null : quality.blocker,
|
|
4674
4956
|
);
|
|
4675
4957
|
onLog?.(
|
|
4676
4958
|
"stderr",
|