@pushpalsdev/cli 1.1.4 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +302 -117
- package/package.json +1 -1
- package/runtime/configs/default.toml +3 -1
- package/runtime/configs/local.example.toml +3 -1
- package/runtime/sandbox/.pushpals-remotebuddy-fallback.js +13 -1
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +420 -169
- package/runtime/sandbox/configs/default.toml +3 -1
- package/runtime/sandbox/configs/local.example.toml +3 -1
- package/runtime/sandbox/packages/shared/src/config.ts +26 -1
|
@@ -826,6 +826,11 @@ export async function runValidationArgv(
|
|
|
826
826
|
outputPolicy: Partial<OutputCompactionPolicy>,
|
|
827
827
|
timeoutMessage: string,
|
|
828
828
|
): Promise<ValidationExecutionResult> {
|
|
829
|
+
type ValidationWaitResult =
|
|
830
|
+
| { type: "exit"; code: number }
|
|
831
|
+
| { type: "timeout" }
|
|
832
|
+
| { type: "failure-signal" }
|
|
833
|
+
| { type: "success-signal" };
|
|
829
834
|
const startedAt = Date.now();
|
|
830
835
|
const proc = Bun.spawn(argv, {
|
|
831
836
|
cwd: repo,
|
|
@@ -846,7 +851,7 @@ export async function runValidationArgv(
|
|
|
846
851
|
let stoppedAfterSuccessSignal = false;
|
|
847
852
|
const timeout = Math.max(1_000, timeoutMs);
|
|
848
853
|
let timeoutTimer: ReturnType<typeof setTimeout> | null = null;
|
|
849
|
-
const timeoutPromise = new Promise<
|
|
854
|
+
const timeoutPromise = new Promise<ValidationWaitResult>((resolveTimeout) => {
|
|
850
855
|
timeoutTimer = setTimeout(() => {
|
|
851
856
|
timedOut = true;
|
|
852
857
|
resolveTimeout({ type: "timeout" });
|
|
@@ -855,7 +860,7 @@ export async function runValidationArgv(
|
|
|
855
860
|
|
|
856
861
|
let browserSignalTimer: ReturnType<typeof setInterval> | null = null;
|
|
857
862
|
const browserSignalPromise = isLongRunningBrowserValidationCommand(command)
|
|
858
|
-
? new Promise<
|
|
863
|
+
? new Promise<ValidationWaitResult>((resolveBrowserSignal) => {
|
|
859
864
|
const idleMs = browserValidationFailureIdleMs(env);
|
|
860
865
|
const successIdleMs = browserValidationSuccessIdleMs(env);
|
|
861
866
|
browserSignalTimer = setInterval(() => {
|
|
@@ -877,11 +882,11 @@ export async function runValidationArgv(
|
|
|
877
882
|
}
|
|
878
883
|
}, 250);
|
|
879
884
|
})
|
|
880
|
-
: new Promise<
|
|
885
|
+
: new Promise<ValidationWaitResult>(() => {
|
|
881
886
|
// Non-browser validations should only end on process exit or timeout.
|
|
882
887
|
});
|
|
883
888
|
|
|
884
|
-
const exitOrTimeout = await Promise.race([
|
|
889
|
+
const exitOrTimeout = await Promise.race<ValidationWaitResult>([
|
|
885
890
|
proc.exited.then((code) => ({ type: "exit" as const, code })),
|
|
886
891
|
timeoutPromise,
|
|
887
892
|
browserSignalPromise,
|
|
@@ -1740,9 +1745,9 @@ function classifyBrowserValidationFailureKindFromText(text: string): BrowserVali
|
|
|
1740
1745
|
|
|
1741
1746
|
function extractBrowserValidationStage(text: string): string | null {
|
|
1742
1747
|
const patterns = [
|
|
1743
|
-
/\bBrowser validation failed during\s+([^:.\r\n]+?)\s+stage\b/i,
|
|
1744
|
-
/\bfailed during\s+([^:.\r\n]+?)\s+stage\b/i,
|
|
1745
|
-
/\b(?:stage|phase)\s*[:=]\s*["'`]?([^"'`.\r\n]+)["'`]?/i,
|
|
1748
|
+
/\bBrowser validation failed during\s+([^:.\r\n|]+?)\s+stage\b/i,
|
|
1749
|
+
/\bfailed during\s+([^:.\r\n|]+?)\s+stage\b/i,
|
|
1750
|
+
/\b(?:stage|phase)\s*[:=]\s*["'`]?([^"'`.\r\n|]+)["'`]?/i,
|
|
1746
1751
|
];
|
|
1747
1752
|
for (const pattern of patterns) {
|
|
1748
1753
|
const match = text.match(pattern);
|
|
@@ -1757,6 +1762,27 @@ function extractBrowserValidationStage(text: string): string | null {
|
|
|
1757
1762
|
return null;
|
|
1758
1763
|
}
|
|
1759
1764
|
|
|
1765
|
+
function refineBrowserValidationStage(
|
|
1766
|
+
stage: string | null,
|
|
1767
|
+
selector: string | null,
|
|
1768
|
+
expected: string | null,
|
|
1769
|
+
text: string,
|
|
1770
|
+
): string | null {
|
|
1771
|
+
const combined = stripAnsiControlSequences(
|
|
1772
|
+
[stage, selector, expected, text].filter(Boolean).join(" "),
|
|
1773
|
+
).toLowerCase();
|
|
1774
|
+
if (/\b(game-control-panel|planet control panel|selected planet panel)\b/i.test(combined)) {
|
|
1775
|
+
return "planet control panel";
|
|
1776
|
+
}
|
|
1777
|
+
if (/\bsettings-home-button\b|\breturn to home from settings\b/i.test(combined)) {
|
|
1778
|
+
return "settings return";
|
|
1779
|
+
}
|
|
1780
|
+
if (/\bshop-home-button\b|\breturn to home from shop\b/i.test(combined)) {
|
|
1781
|
+
return "shop return";
|
|
1782
|
+
}
|
|
1783
|
+
return stage;
|
|
1784
|
+
}
|
|
1785
|
+
|
|
1760
1786
|
function inferBrowserValidationFailureFocus(params: {
|
|
1761
1787
|
stage?: string | null;
|
|
1762
1788
|
selector?: string | null;
|
|
@@ -1980,13 +2006,60 @@ function summarizeBrowserValidationOutput(text: string): string {
|
|
|
1980
2006
|
.map((line) => line.trim())
|
|
1981
2007
|
.filter(Boolean)
|
|
1982
2008
|
.filter((line) =>
|
|
1983
|
-
/\b(Web end-to-end smoke test failed|Browser validation failed|Expected |locator\.|page\.|waiting for getBy|Call log:|ERR_SOCKET_BAD_PORT|EADDRINUSE|EPERM|EACCES|browserType\.launch|Executable doesn't exist|Expo exited early|freeport|net::ERR_|Validation command timed out|terminated by signal|SIGTERM|timed out after \d+ms)/i.test(
|
|
2009
|
+
/\b(Web end-to-end smoke test failed|Browser validation failed|Expected |locator\.|page\.|waiting for getBy|Call log:|Verified:|Saved screenshot|Saved trace|ERR_SOCKET_BAD_PORT|EADDRINUSE|EPERM|EACCES|browserType\.launch|Executable doesn't exist|Expo exited early|freeport|net::ERR_|Validation command timed out|terminated by signal|SIGTERM|timed out after \d+ms)/i.test(
|
|
1984
2010
|
line,
|
|
1985
2011
|
),
|
|
1986
2012
|
);
|
|
1987
2013
|
return toSingleLine(lines.slice(0, 8).join(" | "), 900);
|
|
1988
2014
|
}
|
|
1989
2015
|
|
|
2016
|
+
function lastBrowserVerifiedStage(text: string): string | null {
|
|
2017
|
+
const verifiedStages = [...stripAnsiControlSequences(text).matchAll(/\bVerified:\s+([^|\r\n]+)/gi)]
|
|
2018
|
+
.map((match) => match[1]?.trim())
|
|
2019
|
+
.filter((entry): entry is string => Boolean(entry));
|
|
2020
|
+
const lastVerified = verifiedStages.at(-1);
|
|
2021
|
+
return lastVerified ? toSingleLine(lastVerified, 80) : null;
|
|
2022
|
+
}
|
|
2023
|
+
|
|
2024
|
+
export function extractValidationFailureRetryDigest(
|
|
2025
|
+
run: {
|
|
2026
|
+
command: string;
|
|
2027
|
+
stdout?: string;
|
|
2028
|
+
stderr?: string;
|
|
2029
|
+
exitCode?: number;
|
|
2030
|
+
elapsedMs?: number;
|
|
2031
|
+
},
|
|
2032
|
+
repo?: string,
|
|
2033
|
+
): string {
|
|
2034
|
+
const baseDigest = extractValidationFailureDigest(run);
|
|
2035
|
+
if (!isLongRunningBrowserValidationCommand(run.command)) return baseDigest;
|
|
2036
|
+
const combined = stripAnsiControlSequences([run.stderr, run.stdout].filter(Boolean).join("\n"));
|
|
2037
|
+
const failureKind = classifyBrowserValidationFailureKindFromText(`${baseDigest}\n${combined}`);
|
|
2038
|
+
if (failureKind !== "assertion") return baseDigest;
|
|
2039
|
+
|
|
2040
|
+
const recentLogSummary = summarizeRecentBrowserValidationLogs(repo);
|
|
2041
|
+
const enrichedBrowserContext = [combined, recentLogSummary].filter(Boolean).join("\n");
|
|
2042
|
+
const selector = extractBrowserValidationSelector(enrichedBrowserContext);
|
|
2043
|
+
const expected = extractBrowserValidationExpectedUi(enrichedBrowserContext);
|
|
2044
|
+
const stage = refineBrowserValidationStage(
|
|
2045
|
+
extractBrowserValidationStage(enrichedBrowserContext),
|
|
2046
|
+
selector,
|
|
2047
|
+
expected,
|
|
2048
|
+
enrichedBrowserContext,
|
|
2049
|
+
);
|
|
2050
|
+
const lastVerified = lastBrowserVerifiedStage(enrichedBrowserContext);
|
|
2051
|
+
const output = summarizeBrowserValidationOutput(enrichedBrowserContext);
|
|
2052
|
+
const parts = [
|
|
2053
|
+
baseDigest,
|
|
2054
|
+
stage ? `stage=${stage}` : "",
|
|
2055
|
+
selector ? `selector=${selector}` : "",
|
|
2056
|
+
expected ? `expected=${expected}` : "",
|
|
2057
|
+
lastVerified ? `last verified=${lastVerified}` : "",
|
|
2058
|
+
output && output !== baseDigest ? output : "",
|
|
2059
|
+
].filter(Boolean);
|
|
2060
|
+
return toSingleLine(parts.join(" | "), 900) || baseDigest;
|
|
2061
|
+
}
|
|
2062
|
+
|
|
1990
2063
|
export function buildBrowserValidationRepairPacket(
|
|
1991
2064
|
validationRuns: ValidationExecutionResult[],
|
|
1992
2065
|
previousFailureDigests: Map<string, string> = new Map(),
|
|
@@ -1995,15 +2068,24 @@ export function buildBrowserValidationRepairPacket(
|
|
|
1995
2068
|
for (const run of validationRuns) {
|
|
1996
2069
|
if (run.ok || !isLongRunningBrowserValidationCommand(run.command)) continue;
|
|
1997
2070
|
const combined = stripAnsiControlSequences([run.stderr, run.stdout].filter(Boolean).join("\n"));
|
|
1998
|
-
const
|
|
1999
|
-
const failureKind = classifyBrowserValidationFailureKindFromText(`${
|
|
2071
|
+
const baseDigest = extractValidationFailureDigest(run);
|
|
2072
|
+
const failureKind = classifyBrowserValidationFailureKindFromText(`${baseDigest}\n${combined}`);
|
|
2000
2073
|
if (failureKind === "unknown") continue;
|
|
2074
|
+
const digest =
|
|
2075
|
+
failureKind === "assertion"
|
|
2076
|
+
? extractValidationFailureRetryDigest(run, repo) || baseDigest
|
|
2077
|
+
: baseDigest;
|
|
2001
2078
|
const previousDigest = previousFailureDigests.get(validationCommandKey(run.command)) ?? null;
|
|
2002
2079
|
const recentLogSummary = summarizeRecentBrowserValidationLogs(repo);
|
|
2003
2080
|
const enrichedBrowserContext = [combined, recentLogSummary].filter(Boolean).join("\n");
|
|
2004
|
-
const stage = extractBrowserValidationStage(enrichedBrowserContext);
|
|
2005
2081
|
const selector = extractBrowserValidationSelector(enrichedBrowserContext);
|
|
2006
2082
|
const expected = extractBrowserValidationExpectedUi(enrichedBrowserContext);
|
|
2083
|
+
const stage = refineBrowserValidationStage(
|
|
2084
|
+
extractBrowserValidationStage(enrichedBrowserContext),
|
|
2085
|
+
selector,
|
|
2086
|
+
expected,
|
|
2087
|
+
enrichedBrowserContext,
|
|
2088
|
+
);
|
|
2007
2089
|
const previousStage = previousDigest ? extractBrowserValidationStage(previousDigest) : null;
|
|
2008
2090
|
const previousSelector = previousDigest ? extractBrowserValidationSelector(previousDigest) : null;
|
|
2009
2091
|
const previousExpected = previousDigest ? extractBrowserValidationExpectedUi(previousDigest) : null;
|
|
@@ -2021,17 +2103,21 @@ export function buildBrowserValidationRepairPacket(
|
|
|
2021
2103
|
text: previousDigest,
|
|
2022
2104
|
})
|
|
2023
2105
|
: null;
|
|
2106
|
+
const sameFailureSignal =
|
|
2107
|
+
Boolean(previousDigest) &&
|
|
2108
|
+
(previousDigest === digest ||
|
|
2109
|
+
(Boolean(failureFocus) &&
|
|
2110
|
+
failureFocus === previousFailureFocus &&
|
|
2111
|
+
(!selector || !previousSelector || selector === previousSelector)));
|
|
2024
2112
|
const progress =
|
|
2025
2113
|
previousDigest == null
|
|
2026
2114
|
? "first_failure"
|
|
2027
|
-
:
|
|
2115
|
+
: sameFailureSignal
|
|
2028
2116
|
? "same_failure"
|
|
2029
2117
|
: "new_failure";
|
|
2030
2118
|
const needsDiagnosticProbe =
|
|
2031
2119
|
failureKind === "assertion" &&
|
|
2032
|
-
|
|
2033
|
-
Boolean(failureFocus) &&
|
|
2034
|
-
failureFocus === previousFailureFocus;
|
|
2120
|
+
sameFailureSignal;
|
|
2035
2121
|
return {
|
|
2036
2122
|
command: run.command,
|
|
2037
2123
|
failureKind,
|
|
@@ -2642,49 +2728,67 @@ async function runDeterministicQualityGate(
|
|
|
2642
2728
|
};
|
|
2643
2729
|
}
|
|
2644
2730
|
|
|
2645
|
-
|
|
2646
|
-
|
|
2647
|
-
|
|
2648
|
-
|
|
2731
|
+
type QualityCriticTimeoutBehavior = "skip" | "retry_once" | "block";
|
|
2732
|
+
|
|
2733
|
+
function resolveQualityCriticTimeoutMs(runtimeConfig: WorkerpalsRuntimeConfig): number {
|
|
2734
|
+
const value = Number(runtimeConfig.workerpals.qualityCriticTimeoutMs);
|
|
2735
|
+
if (!Number.isFinite(value)) return 90_000;
|
|
2736
|
+
return Math.max(1_000, Math.min(7_200_000, Math.floor(value)));
|
|
2737
|
+
}
|
|
2738
|
+
|
|
2739
|
+
function resolveQualityCriticTimeoutBehavior(
|
|
2649
2740
|
runtimeConfig: WorkerpalsRuntimeConfig,
|
|
2650
|
-
|
|
2651
|
-
|
|
2652
|
-
|
|
2653
|
-
|
|
2654
|
-
|
|
2741
|
+
): QualityCriticTimeoutBehavior {
|
|
2742
|
+
const value = String(runtimeConfig.workerpals.qualityCriticTimeoutBehavior ?? "")
|
|
2743
|
+
.trim()
|
|
2744
|
+
.toLowerCase()
|
|
2745
|
+
.replace(/-/g, "_");
|
|
2746
|
+
if (value === "skip" || value === "retry_once" || value === "block") return value;
|
|
2747
|
+
return "retry_once";
|
|
2748
|
+
}
|
|
2655
2749
|
|
|
2656
|
-
|
|
2657
|
-
|
|
2658
|
-
|
|
2659
|
-
|
|
2660
|
-
|
|
2661
|
-
|
|
2662
|
-
const qualityCriticMaxDiffChars = (() => {
|
|
2663
|
-
const value = Number(runtimeConfig.workerpals.qualityCriticMaxDiffChars);
|
|
2664
|
-
if (!Number.isFinite(value)) return 16_000;
|
|
2665
|
-
return Math.max(256, Math.min(524_288, Math.floor(value)));
|
|
2666
|
-
})();
|
|
2667
|
-
const qualityCriticMaxValidationOutputChars = (() => {
|
|
2668
|
-
const value = Number(runtimeConfig.workerpals.qualityCriticMaxValidationOutputChars);
|
|
2669
|
-
if (!Number.isFinite(value)) return 8_000;
|
|
2670
|
-
return Math.max(256, Math.min(524_288, Math.floor(value)));
|
|
2671
|
-
})();
|
|
2672
|
-
const qualityCriticTimeoutMs = (() => {
|
|
2673
|
-
const value = Number(runtimeConfig.workerpals.qualityCriticTimeoutMs);
|
|
2674
|
-
if (!Number.isFinite(value)) return 45_000;
|
|
2675
|
-
return Math.max(1_000, Math.min(7_200_000, Math.floor(value)));
|
|
2676
|
-
})();
|
|
2677
|
-
diffText = compactJobOutput(diffText, outputPolicyForRuntime(runtimeConfig)).slice(
|
|
2678
|
-
0,
|
|
2679
|
-
qualityCriticMaxDiffChars,
|
|
2680
|
-
);
|
|
2750
|
+
function resolveQualityCriticModel(
|
|
2751
|
+
runtimeConfig: WorkerpalsRuntimeConfig,
|
|
2752
|
+
fallback = "",
|
|
2753
|
+
): string {
|
|
2754
|
+
return String(runtimeConfig.workerpals.qualityCriticModel ?? "").trim() || fallback.trim();
|
|
2755
|
+
}
|
|
2681
2756
|
|
|
2682
|
-
|
|
2757
|
+
function resolveQualityCriticMaxDiffChars(
|
|
2758
|
+
runtimeConfig: WorkerpalsRuntimeConfig,
|
|
2759
|
+
compact = false,
|
|
2760
|
+
): number {
|
|
2761
|
+
const value = Number(runtimeConfig.workerpals.qualityCriticMaxDiffChars);
|
|
2762
|
+
const max = Number.isFinite(value) ? value : 16_000;
|
|
2763
|
+
const bounded = Math.max(256, Math.min(524_288, Math.floor(max)));
|
|
2764
|
+
return compact ? Math.min(bounded, 6_000) : bounded;
|
|
2765
|
+
}
|
|
2766
|
+
|
|
2767
|
+
function resolveQualityCriticMaxValidationOutputChars(
|
|
2768
|
+
runtimeConfig: WorkerpalsRuntimeConfig,
|
|
2769
|
+
compact = false,
|
|
2770
|
+
): number {
|
|
2771
|
+
const value = Number(runtimeConfig.workerpals.qualityCriticMaxValidationOutputChars);
|
|
2772
|
+
const max = Number.isFinite(value) ? value : 8_000;
|
|
2773
|
+
const bounded = Math.max(256, Math.min(524_288, Math.floor(max)));
|
|
2774
|
+
return compact ? Math.min(bounded, 2_000) : bounded;
|
|
2775
|
+
}
|
|
2776
|
+
|
|
2777
|
+
function buildCriticValidationSummary(
|
|
2778
|
+
quality: DeterministicQualityResult,
|
|
2779
|
+
maxValidationOutputChars: number,
|
|
2780
|
+
): string {
|
|
2781
|
+
const allPassed =
|
|
2782
|
+
quality.validationRuns.length > 0 && quality.validationRuns.every((run) => run.ok);
|
|
2783
|
+
return quality.validationRuns
|
|
2683
2784
|
.map((run) => {
|
|
2684
|
-
const output =
|
|
2685
|
-
|
|
2686
|
-
|
|
2687
|
-
|
|
2785
|
+
const output =
|
|
2786
|
+
allPassed
|
|
2787
|
+
? ""
|
|
2788
|
+
: [run.stdout, run.stderr]
|
|
2789
|
+
.filter(Boolean)
|
|
2790
|
+
.join("\n")
|
|
2791
|
+
.slice(0, maxValidationOutputChars);
|
|
2688
2792
|
return [
|
|
2689
2793
|
`Command: ${run.command}`,
|
|
2690
2794
|
`Result: ${run.ok ? "pass" : "fail"} (exit ${run.exitCode}, ${run.elapsedMs}ms)`,
|
|
@@ -2694,6 +2798,38 @@ async function runTaskCriticReview(
|
|
|
2694
2798
|
.join("\n");
|
|
2695
2799
|
})
|
|
2696
2800
|
.join("\n\n---\n\n");
|
|
2801
|
+
}
|
|
2802
|
+
|
|
2803
|
+
function criticTimeoutReview(
|
|
2804
|
+
source: "Codex" | "LLM",
|
|
2805
|
+
timeoutMs: number,
|
|
2806
|
+
elapsedMs: number,
|
|
2807
|
+
): CriticReview {
|
|
2808
|
+
const summary = `${source} critic timed out after ${elapsedMs}ms (timeout=${timeoutMs}ms).`;
|
|
2809
|
+
return {
|
|
2810
|
+
score: 0,
|
|
2811
|
+
findings: [summary],
|
|
2812
|
+
mustFix: [
|
|
2813
|
+
"CriticGate timeout behavior is set to block; complete the critic review by reducing critic input, choosing a faster critic model, or increasing workerpals.quality_critic_timeout_ms.",
|
|
2814
|
+
],
|
|
2815
|
+
revisionGuidance:
|
|
2816
|
+
"Do not change product code for this finding unless product code caused the critic prompt explosion. Adjust CriticGate configuration or reduce validation/diff evidence volume.",
|
|
2817
|
+
raw: JSON.stringify({ score: 0, findings: [summary], must_fix: ["CriticGate timed out"] }),
|
|
2818
|
+
};
|
|
2819
|
+
}
|
|
2820
|
+
|
|
2821
|
+
async function runTaskCriticReview(
|
|
2822
|
+
repo: string,
|
|
2823
|
+
params: Record<string, unknown>,
|
|
2824
|
+
quality: DeterministicQualityResult,
|
|
2825
|
+
runtimeConfig: WorkerpalsRuntimeConfig,
|
|
2826
|
+
onLog?: (stream: "stdout" | "stderr", line: string) => void,
|
|
2827
|
+
): Promise<CriticReview | null> {
|
|
2828
|
+
const endpoint = normalizeChatCompletionsEndpoint(runtimeConfig.workerpals.llm.endpoint);
|
|
2829
|
+
const model = resolveQualityCriticModel(runtimeConfig, runtimeConfig.workerpals.llm.model.trim());
|
|
2830
|
+
if (!endpoint || !model) return null;
|
|
2831
|
+
const qualityCriticTimeoutMs = resolveQualityCriticTimeoutMs(runtimeConfig);
|
|
2832
|
+
const timeoutBehavior = resolveQualityCriticTimeoutBehavior(runtimeConfig);
|
|
2697
2833
|
|
|
2698
2834
|
const planning = params.planning as TaskExecutePlanning;
|
|
2699
2835
|
const instruction = String(params.instruction ?? "").trim();
|
|
@@ -2711,33 +2847,65 @@ async function runTaskCriticReview(
|
|
|
2711
2847
|
const changedPathsText =
|
|
2712
2848
|
quality.changedPaths.map((entry) => `- ${entry}`).join("\n") || "- (none)";
|
|
2713
2849
|
const criticSystem = loadPromptTemplate("workerpals/task_quality_critic_system_prompt.md").trim();
|
|
2714
|
-
const criticUser = loadPromptTemplate("workerpals/task_quality_critic_user_prompt.md", {
|
|
2715
|
-
instruction,
|
|
2716
|
-
acceptance_criteria: acceptanceCriteriaText,
|
|
2717
|
-
validation_steps: validationStepsText,
|
|
2718
|
-
changed_paths: changedPathsText,
|
|
2719
|
-
diff_excerpt: diffText || "(empty diff excerpt)",
|
|
2720
|
-
validation_evidence: validationSummary || "(no validation output)",
|
|
2721
|
-
});
|
|
2722
2850
|
|
|
2723
2851
|
const apiKey = runtimeConfig.workerpals.llm.apiKey.trim() || "local";
|
|
2724
2852
|
const headers: Record<string, string> = {
|
|
2725
2853
|
"Content-Type": "application/json",
|
|
2726
2854
|
};
|
|
2727
2855
|
if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
|
|
2728
|
-
|
|
2729
|
-
|
|
2730
|
-
|
|
2731
|
-
|
|
2732
|
-
|
|
2733
|
-
|
|
2734
|
-
|
|
2735
|
-
|
|
2856
|
+
|
|
2857
|
+
const buildAttemptPayload = async (compact: boolean) => {
|
|
2858
|
+
const changedForDiff = quality.changedPaths.slice(0, compact ? 4 : 8);
|
|
2859
|
+
let diffText = "";
|
|
2860
|
+
if (changedForDiff.length > 0) {
|
|
2861
|
+
const diffResult = await git(repo, ["diff", "--", ...changedForDiff]);
|
|
2862
|
+
diffText = diffResult.ok ? diffResult.stdout : diffResult.stderr;
|
|
2863
|
+
}
|
|
2864
|
+
diffText = compactJobOutput(diffText, outputPolicyForRuntime(runtimeConfig)).slice(
|
|
2865
|
+
0,
|
|
2866
|
+
resolveQualityCriticMaxDiffChars(runtimeConfig, compact),
|
|
2867
|
+
);
|
|
2868
|
+
const validationSummary = buildCriticValidationSummary(
|
|
2869
|
+
quality,
|
|
2870
|
+
resolveQualityCriticMaxValidationOutputChars(runtimeConfig, compact),
|
|
2871
|
+
);
|
|
2872
|
+
const criticUser = loadPromptTemplate("workerpals/task_quality_critic_user_prompt.md", {
|
|
2873
|
+
instruction,
|
|
2874
|
+
acceptance_criteria: acceptanceCriteriaText,
|
|
2875
|
+
validation_steps: validationStepsText,
|
|
2876
|
+
changed_paths: changedPathsText,
|
|
2877
|
+
diff_excerpt: diffText || "(empty diff excerpt)",
|
|
2878
|
+
validation_evidence: validationSummary || "(no validation output)",
|
|
2879
|
+
});
|
|
2880
|
+
const promptChars = criticSystem.length + criticUser.length;
|
|
2881
|
+
const promptBytes = new TextEncoder().encode(`${criticSystem}\n${criticUser}`).length;
|
|
2882
|
+
return {
|
|
2883
|
+
bodyBase: {
|
|
2884
|
+
model,
|
|
2885
|
+
messages: [
|
|
2886
|
+
{ role: "system", content: criticSystem },
|
|
2887
|
+
{ role: "user", content: criticUser },
|
|
2888
|
+
],
|
|
2889
|
+
temperature: 0,
|
|
2890
|
+
max_tokens: compact ? 500 : 700,
|
|
2891
|
+
},
|
|
2892
|
+
promptChars,
|
|
2893
|
+
promptBytes,
|
|
2894
|
+
diffChars: diffText.length,
|
|
2895
|
+
validationChars: validationSummary.length,
|
|
2896
|
+
};
|
|
2736
2897
|
};
|
|
2737
2898
|
|
|
2738
|
-
const runCriticRequest = async (
|
|
2899
|
+
const runCriticRequest = async (
|
|
2900
|
+
bodyBase: Record<string, unknown>,
|
|
2901
|
+
responseFormat: Record<string, unknown> | null,
|
|
2902
|
+
) => {
|
|
2739
2903
|
const controller = new AbortController();
|
|
2740
|
-
|
|
2904
|
+
let timedOut = false;
|
|
2905
|
+
const timer = setTimeout(() => {
|
|
2906
|
+
timedOut = true;
|
|
2907
|
+
controller.abort();
|
|
2908
|
+
}, qualityCriticTimeoutMs);
|
|
2741
2909
|
try {
|
|
2742
2910
|
const response = await fetch(endpoint, {
|
|
2743
2911
|
method: "POST",
|
|
@@ -2748,14 +2916,29 @@ async function runTaskCriticReview(
|
|
|
2748
2916
|
signal: controller.signal,
|
|
2749
2917
|
});
|
|
2750
2918
|
const text = await response.text();
|
|
2751
|
-
return { response, text };
|
|
2919
|
+
return { timedOut: false as const, response, text };
|
|
2920
|
+
} catch (err) {
|
|
2921
|
+
if (!timedOut && String((err as { name?: unknown })?.name ?? "") !== "AbortError") {
|
|
2922
|
+
throw err;
|
|
2923
|
+
}
|
|
2924
|
+
return { timedOut: true as const, err };
|
|
2752
2925
|
} finally {
|
|
2753
2926
|
clearTimeout(timer);
|
|
2754
2927
|
}
|
|
2755
2928
|
};
|
|
2756
2929
|
|
|
2757
|
-
|
|
2758
|
-
|
|
2930
|
+
const runAttempt = async (
|
|
2931
|
+
attempt: number,
|
|
2932
|
+
compact: boolean,
|
|
2933
|
+
): Promise<{ status: "timeout" } | { status: "done"; review: CriticReview | null }> => {
|
|
2934
|
+
const payload = await buildAttemptPayload(compact);
|
|
2935
|
+
const startedAt = Date.now();
|
|
2936
|
+
onLog?.(
|
|
2937
|
+
"stdout",
|
|
2938
|
+
`[CriticGate] LLM review attempt ${attempt}${compact ? " (compact)" : ""}: model=${model} timeout_ms=${qualityCriticTimeoutMs} behavior=${timeoutBehavior} prompt_chars=${payload.promptChars} prompt_bytes=${payload.promptBytes} diff_chars=${payload.diffChars} validation_chars=${payload.validationChars}`,
|
|
2939
|
+
);
|
|
2940
|
+
let request = await runCriticRequest(payload.bodyBase, { type: "json_object" });
|
|
2941
|
+
if (request.timedOut) return { status: "timeout" };
|
|
2759
2942
|
if (!request.response.ok && request.response.status === 400) {
|
|
2760
2943
|
const lowered = request.text.toLowerCase();
|
|
2761
2944
|
if (lowered.includes("response_format")) {
|
|
@@ -2763,7 +2946,8 @@ async function runTaskCriticReview(
|
|
|
2763
2946
|
"stdout",
|
|
2764
2947
|
"[CriticGate] fallback: response_format json_object unsupported; retrying without strict response_format.",
|
|
2765
2948
|
);
|
|
2766
|
-
request = await runCriticRequest(null);
|
|
2949
|
+
request = await runCriticRequest(payload.bodyBase, null);
|
|
2950
|
+
if (request.timedOut) return { status: "timeout" };
|
|
2767
2951
|
}
|
|
2768
2952
|
}
|
|
2769
2953
|
if (!request.response.ok) {
|
|
@@ -2771,12 +2955,12 @@ async function runTaskCriticReview(
|
|
|
2771
2955
|
"stderr",
|
|
2772
2956
|
`[CriticGate] review request failed (${request.response.status}): ${toSingleLine(request.text, 240)}`,
|
|
2773
2957
|
);
|
|
2774
|
-
return null;
|
|
2958
|
+
return { status: "done", review: null };
|
|
2775
2959
|
}
|
|
2776
2960
|
|
|
2777
|
-
const
|
|
2778
|
-
const choices = Array.isArray((
|
|
2779
|
-
? ((
|
|
2961
|
+
const responsePayload = parseJsonObjectLoose(request.text) ?? JSON.parse(request.text);
|
|
2962
|
+
const choices = Array.isArray((responsePayload as Record<string, unknown>).choices)
|
|
2963
|
+
? ((responsePayload as Record<string, unknown>).choices as Array<Record<string, unknown>>)
|
|
2780
2964
|
: [];
|
|
2781
2965
|
const content = String(
|
|
2782
2966
|
(choices[0]?.message as Record<string, unknown> | undefined)?.content ?? "",
|
|
@@ -2790,7 +2974,7 @@ async function runTaskCriticReview(
|
|
|
2790
2974
|
220,
|
|
2791
2975
|
)}`,
|
|
2792
2976
|
);
|
|
2793
|
-
return null;
|
|
2977
|
+
return { status: "done", review: null };
|
|
2794
2978
|
}
|
|
2795
2979
|
|
|
2796
2980
|
const scoreRaw = Number(reviewObj.score);
|
|
@@ -2804,13 +2988,43 @@ async function runTaskCriticReview(
|
|
|
2804
2988
|
.trim()
|
|
2805
2989
|
.slice(0, 2000);
|
|
2806
2990
|
const score = Number.isFinite(scoreRaw) ? Math.max(0, Math.min(10, scoreRaw)) : 0;
|
|
2991
|
+
onLog?.(
|
|
2992
|
+
"stdout",
|
|
2993
|
+
`[CriticGate] LLM review completed in ${Date.now() - startedAt}ms (attempt ${attempt}).`,
|
|
2994
|
+
);
|
|
2807
2995
|
return {
|
|
2808
|
-
|
|
2809
|
-
|
|
2810
|
-
|
|
2811
|
-
|
|
2812
|
-
|
|
2996
|
+
status: "done",
|
|
2997
|
+
review: {
|
|
2998
|
+
score,
|
|
2999
|
+
findings,
|
|
3000
|
+
mustFix,
|
|
3001
|
+
revisionGuidance,
|
|
3002
|
+
raw: compactJobOutput(content, outputPolicyForRuntime(runtimeConfig)),
|
|
3003
|
+
},
|
|
2813
3004
|
};
|
|
3005
|
+
};
|
|
3006
|
+
|
|
3007
|
+
try {
|
|
3008
|
+
let attempt = await runAttempt(1, false);
|
|
3009
|
+
if (attempt.status === "timeout" && timeoutBehavior === "retry_once") {
|
|
3010
|
+
onLog?.(
|
|
3011
|
+
"stderr",
|
|
3012
|
+
`[CriticGate] LLM review timed out after ${qualityCriticTimeoutMs}ms; retrying once with compact critic input.`,
|
|
3013
|
+
);
|
|
3014
|
+
attempt = await runAttempt(2, true);
|
|
3015
|
+
}
|
|
3016
|
+
if (attempt.status === "timeout") {
|
|
3017
|
+
if (timeoutBehavior === "block") {
|
|
3018
|
+
onLog?.(
|
|
3019
|
+
"stderr",
|
|
3020
|
+
`[CriticGate] LLM review timed out after ${qualityCriticTimeoutMs}ms; blocking because quality_critic_timeout_behavior=block.`,
|
|
3021
|
+
);
|
|
3022
|
+
return criticTimeoutReview("LLM", qualityCriticTimeoutMs, qualityCriticTimeoutMs);
|
|
3023
|
+
}
|
|
3024
|
+
onLog?.("stderr", `[CriticGate] LLM timed out after ${qualityCriticTimeoutMs}ms; skipping.`);
|
|
3025
|
+
return null;
|
|
3026
|
+
}
|
|
3027
|
+
return attempt.review;
|
|
2814
3028
|
} catch (err) {
|
|
2815
3029
|
onLog?.(
|
|
2816
3030
|
"stderr",
|
|
@@ -2905,7 +3119,10 @@ export function buildQualityRevisionHint(
|
|
|
2905
3119
|
"- Convergence mode: diagnostic-first repair. This same browser focus failed in the previous revision, so do not guess another selector or rewrite a different stage.",
|
|
2906
3120
|
);
|
|
2907
3121
|
lines.push(
|
|
2908
|
-
"- Diagnostic requirement: before editing again, inspect or add a tiny temporary diagnostic around the failing stage that records locator counts, visible textContent, role/ARIA attributes, data-testid values, and a nearby DOM snippet for the candidate nodes.",
|
|
3122
|
+
"- Diagnostic requirement: before editing again, inspect or add a tiny temporary diagnostic around the failing stage that records locator counts, visible textContent, role/ARIA attributes, data-testid values, bounding boxes, and a nearby DOM snippet for the candidate nodes.",
|
|
3123
|
+
);
|
|
3124
|
+
lines.push(
|
|
3125
|
+
"- Artifact freshness rule: only trust screenshots/logs captured after the failing action in the current revision. If the screenshot is stale or stops before the failing locator, capture or print the DOM state instead of reasoning from that image.",
|
|
2909
3126
|
);
|
|
2910
3127
|
lines.push(
|
|
2911
3128
|
"- React Native Web note: screenshots can show the intended state while Playwright reads a duplicate or stale rendered node. Prefer one unique selected-state test id or a semantic checked attribute on the stable pressable, then assert locator count and visibility.",
|
|
@@ -2947,7 +3164,7 @@ export function buildQualityRevisionHint(
|
|
|
2947
3164
|
);
|
|
2948
3165
|
if (browserRepairPacket.needsDiagnosticProbe) {
|
|
2949
3166
|
lines.push(
|
|
2950
|
-
`Validation rerun rule: PushPals ValidationGate will rerun "${browserRepairPacket.command}" after the patch, but this is now a repeated browser assertion. If a quick local startup probe shows the browser server can run in this executor, run one targeted "${browserRepairPacket.command}" confirmation after the DOM-backed fix. Do not hand off another unverified selector guess.`,
|
|
3167
|
+
`Validation rerun rule: PushPals ValidationGate will rerun "${browserRepairPacket.command}" after the patch, but this is now a repeated browser assertion. If a quick local startup probe shows the browser server can run in this executor, run exactly one targeted "${browserRepairPacket.command}" confirmation after the DOM-backed fix. Do not stop after fast checks only. Do not hand off another unverified selector guess.`,
|
|
2951
3168
|
);
|
|
2952
3169
|
} else {
|
|
2953
3170
|
lines.push(
|
|
@@ -5462,86 +5679,92 @@ async function runCodexCriticReview(
|
|
|
5462
5679
|
|
|
5463
5680
|
const instruction = String(params.instruction ?? "").trim();
|
|
5464
5681
|
const planning = params.planning as TaskExecutePlanning;
|
|
5465
|
-
|
|
5466
|
-
const
|
|
5467
|
-
|
|
5468
|
-
|
|
5469
|
-
|
|
5470
|
-
|
|
5471
|
-
|
|
5472
|
-
|
|
5473
|
-
|
|
5474
|
-
|
|
5475
|
-
|
|
5476
|
-
|
|
5477
|
-
})();
|
|
5478
|
-
const qualityCriticTimeoutMs = (() => {
|
|
5479
|
-
const value = Number(runtimeConfig.workerpals.qualityCriticTimeoutMs);
|
|
5480
|
-
if (!Number.isFinite(value)) return 45_000;
|
|
5481
|
-
return Math.max(1_000, Math.min(7_200_000, Math.floor(value)));
|
|
5482
|
-
})();
|
|
5483
|
-
if (changedForDiff.length > 0) {
|
|
5484
|
-
const diffResult = await git(repo, ["diff", "--", ...changedForDiff]);
|
|
5485
|
-
diffText = (diffResult.ok ? diffResult.stdout : diffResult.stderr).slice(
|
|
5682
|
+
const qualityCriticTimeoutMs = resolveQualityCriticTimeoutMs(runtimeConfig);
|
|
5683
|
+
const timeoutBehavior = resolveQualityCriticTimeoutBehavior(runtimeConfig);
|
|
5684
|
+
const criticModel = resolveQualityCriticModel(runtimeConfig);
|
|
5685
|
+
|
|
5686
|
+
const buildCriticInstruction = async (compact: boolean) => {
|
|
5687
|
+
const changedForDiff = quality.changedPaths.slice(0, compact ? 4 : 8);
|
|
5688
|
+
let diffText = "";
|
|
5689
|
+
if (changedForDiff.length > 0) {
|
|
5690
|
+
const diffResult = await git(repo, ["diff", "--", ...changedForDiff]);
|
|
5691
|
+
diffText = diffResult.ok ? diffResult.stdout : diffResult.stderr;
|
|
5692
|
+
}
|
|
5693
|
+
diffText = compactJobOutput(diffText, outputPolicyForRuntime(runtimeConfig)).slice(
|
|
5486
5694
|
0,
|
|
5487
|
-
|
|
5695
|
+
resolveQualityCriticMaxDiffChars(runtimeConfig, compact),
|
|
5488
5696
|
);
|
|
5489
|
-
|
|
5490
|
-
|
|
5491
|
-
|
|
5492
|
-
|
|
5493
|
-
|
|
5494
|
-
|
|
5495
|
-
|
|
5496
|
-
|
|
5497
|
-
|
|
5498
|
-
|
|
5499
|
-
|
|
5500
|
-
|
|
5501
|
-
|
|
5502
|
-
|
|
5503
|
-
|
|
5504
|
-
|
|
5505
|
-
|
|
5506
|
-
|
|
5507
|
-
|
|
5508
|
-
|
|
5509
|
-
|
|
5510
|
-
|
|
5511
|
-
|
|
5512
|
-
|
|
5513
|
-
|
|
5514
|
-
diff_section: diffText ? `Diff:\n${diffText}` : "Diff: (empty - no changes detected)",
|
|
5515
|
-
validation_section: validationSummary
|
|
5516
|
-
? `Validation:\n${validationSummary}`
|
|
5517
|
-
: "Validation: (none)",
|
|
5518
|
-
},
|
|
5519
|
-
);
|
|
5697
|
+
const validationSummary = buildCriticValidationSummary(
|
|
5698
|
+
quality,
|
|
5699
|
+
resolveQualityCriticMaxValidationOutputChars(runtimeConfig, compact),
|
|
5700
|
+
);
|
|
5701
|
+
const criticInstruction = loadPromptTemplate(
|
|
5702
|
+
"workerpals/codex_quality_critic_instruction_prompt.md",
|
|
5703
|
+
{
|
|
5704
|
+
instruction,
|
|
5705
|
+
acceptance_criteria:
|
|
5706
|
+
planning.acceptanceCriteria.map((c) => `- ${c}`).join("\n") || "- (none)",
|
|
5707
|
+
changed_paths: quality.changedPaths.join(", ") || "(none)",
|
|
5708
|
+
diff_section: diffText ? `Diff:\n${diffText}` : "Diff: (empty - no changes detected)",
|
|
5709
|
+
validation_section: validationSummary
|
|
5710
|
+
? `Validation:\n${validationSummary}`
|
|
5711
|
+
: "Validation: (none)",
|
|
5712
|
+
},
|
|
5713
|
+
);
|
|
5714
|
+
return {
|
|
5715
|
+
criticInstruction,
|
|
5716
|
+
promptChars: criticInstruction.length,
|
|
5717
|
+
promptBytes: new TextEncoder().encode(criticInstruction).length,
|
|
5718
|
+
diffChars: diffText.length,
|
|
5719
|
+
validationChars: validationSummary.length,
|
|
5720
|
+
};
|
|
5721
|
+
};
|
|
5520
5722
|
|
|
5521
5723
|
const tmpOutputPath = `/tmp/pushpals-critic-${Date.now()}-${Math.random().toString(36).slice(2, 8)}.txt`;
|
|
5522
|
-
const
|
|
5523
|
-
|
|
5524
|
-
|
|
5525
|
-
|
|
5526
|
-
|
|
5527
|
-
|
|
5528
|
-
|
|
5529
|
-
|
|
5530
|
-
|
|
5531
|
-
|
|
5532
|
-
|
|
5533
|
-
|
|
5534
|
-
|
|
5724
|
+
const buildCmd = () => {
|
|
5725
|
+
const cmd = [
|
|
5726
|
+
...codexPrefix,
|
|
5727
|
+
"-c",
|
|
5728
|
+
'model_reasoning_effort="low"',
|
|
5729
|
+
"-a",
|
|
5730
|
+
"never",
|
|
5731
|
+
"exec",
|
|
5732
|
+
"-s",
|
|
5733
|
+
"read-only",
|
|
5734
|
+
"--color",
|
|
5735
|
+
"never",
|
|
5736
|
+
"--output-last-message",
|
|
5737
|
+
tmpOutputPath,
|
|
5738
|
+
];
|
|
5739
|
+
if (criticModel) cmd.push("-m", criticModel);
|
|
5740
|
+
cmd.push("-");
|
|
5741
|
+
return cmd;
|
|
5742
|
+
};
|
|
5535
5743
|
|
|
5536
5744
|
const env = buildWorkerSandboxWritableEnv(repo);
|
|
5537
5745
|
const codexMask = maskRepoLocalCodexFilesForCodexCli(repo, env);
|
|
5538
|
-
|
|
5539
|
-
|
|
5746
|
+
|
|
5747
|
+
const runAttempt = async (
|
|
5748
|
+
attempt: number,
|
|
5749
|
+
compact: boolean,
|
|
5750
|
+
): Promise<{ status: "timeout" } | { status: "done"; review: CriticReview | null }> => {
|
|
5751
|
+
try {
|
|
5752
|
+
unlinkSync(tmpOutputPath);
|
|
5753
|
+
} catch {
|
|
5754
|
+
/* ignore stale/missing critic output */
|
|
5755
|
+
}
|
|
5756
|
+
const payload = await buildCriticInstruction(compact);
|
|
5757
|
+
const startedAt = Date.now();
|
|
5758
|
+
onLog?.(
|
|
5759
|
+
"stdout",
|
|
5760
|
+
`[CriticGate] Codex review attempt ${attempt}${compact ? " (compact)" : ""}: model=${criticModel || "(codex default)"} timeout_ms=${qualityCriticTimeoutMs} behavior=${timeoutBehavior} prompt_chars=${payload.promptChars} prompt_bytes=${payload.promptBytes} diff_chars=${payload.diffChars} validation_chars=${payload.validationChars}`,
|
|
5761
|
+
);
|
|
5762
|
+
const proc = Bun.spawn(buildCmd(), {
|
|
5540
5763
|
cwd: repo,
|
|
5541
5764
|
env,
|
|
5542
5765
|
stdout: "pipe",
|
|
5543
5766
|
stderr: "pipe",
|
|
5544
|
-
stdin: new Blob([criticInstruction]),
|
|
5767
|
+
stdin: new Blob([payload.criticInstruction]),
|
|
5545
5768
|
});
|
|
5546
5769
|
|
|
5547
5770
|
let timedOut = false;
|
|
@@ -5558,8 +5781,7 @@ async function runCodexCriticReview(
|
|
|
5558
5781
|
clearTimeout(timer);
|
|
5559
5782
|
|
|
5560
5783
|
if (timedOut) {
|
|
5561
|
-
|
|
5562
|
-
return null;
|
|
5784
|
+
return { status: "timeout" };
|
|
5563
5785
|
}
|
|
5564
5786
|
if (exitCode !== 0) {
|
|
5565
5787
|
const stderrText = await new Response(proc.stderr).text();
|
|
@@ -5567,7 +5789,7 @@ async function runCodexCriticReview(
|
|
|
5567
5789
|
"stderr",
|
|
5568
5790
|
`[CriticGate] Codex exited ${exitCode}: ${toSingleLine(stderrText, 220)}`,
|
|
5569
5791
|
);
|
|
5570
|
-
return null;
|
|
5792
|
+
return { status: "done", review: null };
|
|
5571
5793
|
}
|
|
5572
5794
|
|
|
5573
5795
|
let lastMessage = "";
|
|
@@ -5584,7 +5806,7 @@ async function runCodexCriticReview(
|
|
|
5584
5806
|
|
|
5585
5807
|
if (!lastMessage) {
|
|
5586
5808
|
onLog?.("stderr", "[CriticGate] Codex: no output message captured; skipping.");
|
|
5587
|
-
return null;
|
|
5809
|
+
return { status: "done", review: null };
|
|
5588
5810
|
}
|
|
5589
5811
|
|
|
5590
5812
|
const reviewObj = parseJsonObjectLoose(lastMessage);
|
|
@@ -5593,7 +5815,7 @@ async function runCodexCriticReview(
|
|
|
5593
5815
|
"stderr",
|
|
5594
5816
|
`[CriticGate] Codex returned non-JSON: ${toSingleLine(lastMessage, 220)}`,
|
|
5595
5817
|
);
|
|
5596
|
-
return null;
|
|
5818
|
+
return { status: "done", review: null };
|
|
5597
5819
|
}
|
|
5598
5820
|
|
|
5599
5821
|
const scoreRaw = Number(reviewObj.score);
|
|
@@ -5607,14 +5829,43 @@ async function runCodexCriticReview(
|
|
|
5607
5829
|
const revisionGuidance = String(reviewObj.revision_guidance ?? "")
|
|
5608
5830
|
.trim()
|
|
5609
5831
|
.slice(0, 2000);
|
|
5610
|
-
onLog?.(
|
|
5832
|
+
onLog?.(
|
|
5833
|
+
"stdout",
|
|
5834
|
+
`[CriticGate] Codex score: ${score}/10 (${Date.now() - startedAt}ms, attempt ${attempt})`,
|
|
5835
|
+
);
|
|
5611
5836
|
return {
|
|
5612
|
-
|
|
5613
|
-
|
|
5614
|
-
|
|
5615
|
-
|
|
5616
|
-
|
|
5837
|
+
status: "done",
|
|
5838
|
+
review: {
|
|
5839
|
+
score,
|
|
5840
|
+
findings,
|
|
5841
|
+
mustFix,
|
|
5842
|
+
revisionGuidance,
|
|
5843
|
+
raw: compactJobOutput(lastMessage, outputPolicyForRuntime(runtimeConfig)),
|
|
5844
|
+
},
|
|
5617
5845
|
};
|
|
5846
|
+
};
|
|
5847
|
+
|
|
5848
|
+
try {
|
|
5849
|
+
let attempt = await runAttempt(1, false);
|
|
5850
|
+
if (attempt.status === "timeout" && timeoutBehavior === "retry_once") {
|
|
5851
|
+
onLog?.(
|
|
5852
|
+
"stderr",
|
|
5853
|
+
`[CriticGate] Codex timed out after ${qualityCriticTimeoutMs}ms; retrying once with compact critic input.`,
|
|
5854
|
+
);
|
|
5855
|
+
attempt = await runAttempt(2, true);
|
|
5856
|
+
}
|
|
5857
|
+
if (attempt.status === "timeout") {
|
|
5858
|
+
if (timeoutBehavior === "block") {
|
|
5859
|
+
onLog?.(
|
|
5860
|
+
"stderr",
|
|
5861
|
+
`[CriticGate] Codex timed out after ${qualityCriticTimeoutMs}ms; blocking because quality_critic_timeout_behavior=block.`,
|
|
5862
|
+
);
|
|
5863
|
+
return criticTimeoutReview("Codex", qualityCriticTimeoutMs, qualityCriticTimeoutMs);
|
|
5864
|
+
}
|
|
5865
|
+
onLog?.("stderr", `[CriticGate] Codex timed out after ${qualityCriticTimeoutMs}ms; skipping.`);
|
|
5866
|
+
return null;
|
|
5867
|
+
}
|
|
5868
|
+
return attempt.review;
|
|
5618
5869
|
} catch (err) {
|
|
5619
5870
|
onLog?.("stderr", `[CriticGate] Codex error: ${toSingleLine(err, 220)} (skipping).`);
|
|
5620
5871
|
return null;
|
|
@@ -5857,7 +6108,7 @@ export async function executeJob(
|
|
|
5857
6108
|
);
|
|
5858
6109
|
for (const run of quality.validationRuns) {
|
|
5859
6110
|
if (run.ok) continue;
|
|
5860
|
-
const digest =
|
|
6111
|
+
const digest = extractValidationFailureRetryDigest(run, repo);
|
|
5861
6112
|
if (digest) previousValidationFailureDigests.set(validationCommandKey(run.command), digest);
|
|
5862
6113
|
}
|
|
5863
6114
|
const validationOutsideTaskScope =
|