@pushpalsdev/cli 1.1.20 → 1.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +25 -1
- package/package.json +1 -1
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +161 -24
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +355 -0
- package/runtime/sandbox/apps/workerpals/src/common/generic_python_executor.ts +45 -3
- package/runtime/sandbox/apps/workerpals/src/common/types.ts +69 -0
- package/runtime/sandbox/apps/workerpals/src/docker_executor.ts +75 -16
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +557 -57
- package/runtime/sandbox/apps/workerpals/src/job_runner.ts +3 -0
- package/runtime/sandbox/apps/workerpals/src/merge_conflict_job.ts +9 -0
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +131 -3
|
@@ -32,7 +32,14 @@ import {
|
|
|
32
32
|
type ToolRequirement,
|
|
33
33
|
} from "shared";
|
|
34
34
|
import { resolveExecutor, type WorkerpalsRuntimeConfig } from "./common/executor_backend.js";
|
|
35
|
-
import type {
|
|
35
|
+
import type {
|
|
36
|
+
JobDiagnostics,
|
|
37
|
+
JobPatchSnapshotDiagnostics,
|
|
38
|
+
JobPublishBlockedInfo,
|
|
39
|
+
JobResult,
|
|
40
|
+
JobTerminalDiagnostics,
|
|
41
|
+
JobValidationRunDiagnostics,
|
|
42
|
+
} from "./common/types.js";
|
|
36
43
|
import {
|
|
37
44
|
compactJobOutput,
|
|
38
45
|
truncate,
|
|
@@ -190,6 +197,9 @@ export interface QualityGatePolicy {
|
|
|
190
197
|
}
|
|
191
198
|
|
|
192
199
|
const BROWSER_VALIDATION_MAX_AUTO_REVISIONS = 3;
|
|
200
|
+
const CRITIC_COMPACT_RETRY_MIN_REDUCTION_RATIO = 0.25;
|
|
201
|
+
const MAX_DIAGNOSTIC_PATH_SAMPLES = 50;
|
|
202
|
+
const MAX_DIAGNOSTIC_TEXT_CHARS = 8_000;
|
|
193
203
|
|
|
194
204
|
export function qualityRevisionLoopUpperBound(policy: {
|
|
195
205
|
maxAutoRevisions: number;
|
|
@@ -232,6 +242,89 @@ export function qualityRevisionBudgetDecision(opts: {
|
|
|
232
242
|
};
|
|
233
243
|
}
|
|
234
244
|
|
|
245
|
+
const MERGE_CONFLICT_RETRY_EXECUTION_BUDGET_MS = 300_000;
|
|
246
|
+
const MERGE_CONFLICT_RETRY_FINALIZATION_BUDGET_MS = 60_000;
|
|
247
|
+
const MERGE_CONFLICT_MIN_RETRY_EXECUTION_BUDGET_MS = 120_000;
|
|
248
|
+
|
|
249
|
+
export function mergeConflictResolverRetryBudgetDecision(opts: {
|
|
250
|
+
jobElapsedMs: number;
|
|
251
|
+
executionBudgetMs: number;
|
|
252
|
+
finalizationBudgetMs: number;
|
|
253
|
+
}): {
|
|
254
|
+
shouldStart: boolean;
|
|
255
|
+
executionBudgetMs: number;
|
|
256
|
+
finalizationBudgetMs: number;
|
|
257
|
+
remainingTotalBudgetMs: number;
|
|
258
|
+
minimumExecutionBudgetMs: number;
|
|
259
|
+
} {
|
|
260
|
+
const configuredExecutionBudgetMs = Number(opts.executionBudgetMs);
|
|
261
|
+
if (!Number.isFinite(configuredExecutionBudgetMs) || configuredExecutionBudgetMs <= 0) {
|
|
262
|
+
return {
|
|
263
|
+
shouldStart: true,
|
|
264
|
+
executionBudgetMs: MERGE_CONFLICT_RETRY_EXECUTION_BUDGET_MS,
|
|
265
|
+
finalizationBudgetMs: MERGE_CONFLICT_RETRY_FINALIZATION_BUDGET_MS,
|
|
266
|
+
remainingTotalBudgetMs: Number.POSITIVE_INFINITY,
|
|
267
|
+
minimumExecutionBudgetMs: MERGE_CONFLICT_MIN_RETRY_EXECUTION_BUDGET_MS,
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const configuredFinalizationBudgetMs = Math.max(0, Number(opts.finalizationBudgetMs) || 0);
|
|
272
|
+
const elapsedMs = Math.max(0, Number(opts.jobElapsedMs) || 0);
|
|
273
|
+
const remainingTotalBudgetMs = Math.max(
|
|
274
|
+
0,
|
|
275
|
+
Math.floor(configuredExecutionBudgetMs + configuredFinalizationBudgetMs - elapsedMs),
|
|
276
|
+
);
|
|
277
|
+
const finalizationBudgetMs = Math.min(
|
|
278
|
+
MERGE_CONFLICT_RETRY_FINALIZATION_BUDGET_MS,
|
|
279
|
+
configuredFinalizationBudgetMs,
|
|
280
|
+
remainingTotalBudgetMs,
|
|
281
|
+
);
|
|
282
|
+
const availableExecutionBudgetMs = Math.max(0, remainingTotalBudgetMs - finalizationBudgetMs);
|
|
283
|
+
const executionBudgetMs = Math.min(
|
|
284
|
+
MERGE_CONFLICT_RETRY_EXECUTION_BUDGET_MS,
|
|
285
|
+
Math.floor(availableExecutionBudgetMs),
|
|
286
|
+
);
|
|
287
|
+
|
|
288
|
+
return {
|
|
289
|
+
shouldStart: executionBudgetMs >= MERGE_CONFLICT_MIN_RETRY_EXECUTION_BUDGET_MS,
|
|
290
|
+
executionBudgetMs: Math.max(10_000, executionBudgetMs),
|
|
291
|
+
finalizationBudgetMs,
|
|
292
|
+
remainingTotalBudgetMs,
|
|
293
|
+
minimumExecutionBudgetMs: MERGE_CONFLICT_MIN_RETRY_EXECUTION_BUDGET_MS,
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
export function shouldRetryCriticTimeoutWithCompact(opts: {
|
|
298
|
+
timeoutBehavior: string;
|
|
299
|
+
qualityOk: boolean;
|
|
300
|
+
validationPassed: boolean;
|
|
301
|
+
initialPromptChars: number;
|
|
302
|
+
compactPromptChars: number;
|
|
303
|
+
}): boolean {
|
|
304
|
+
if (opts.timeoutBehavior !== "retry_once") return false;
|
|
305
|
+
if (!opts.qualityOk || !opts.validationPassed) return true;
|
|
306
|
+
const initialPromptChars = Math.max(1, Math.floor(opts.initialPromptChars));
|
|
307
|
+
const compactPromptChars = Math.max(0, Math.floor(opts.compactPromptChars));
|
|
308
|
+
const reductionRatio = 1 - compactPromptChars / initialPromptChars;
|
|
309
|
+
return reductionRatio >= CRITIC_COMPACT_RETRY_MIN_REDUCTION_RATIO;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
export function shouldSkipCriticAfterExecutorTimeout(opts: {
|
|
313
|
+
executor: string;
|
|
314
|
+
policyMode: string;
|
|
315
|
+
executorText: string;
|
|
316
|
+
qualityOk: boolean;
|
|
317
|
+
validationPassed: boolean;
|
|
318
|
+
qualityIssues: string[];
|
|
319
|
+
changedPaths: string[];
|
|
320
|
+
}): boolean {
|
|
321
|
+
if (opts.executor !== "openai_codex") return false;
|
|
322
|
+
if (opts.policyMode !== "default") return false;
|
|
323
|
+
if (!opts.qualityOk || !opts.validationPassed) return false;
|
|
324
|
+
if (opts.qualityIssues.length > 0 || opts.changedPaths.length === 0) return false;
|
|
325
|
+
return /\b(openai_codex|codex(?: exec)?)\b[^\r\n]*\btimed out\b/i.test(opts.executorText);
|
|
326
|
+
}
|
|
327
|
+
|
|
235
328
|
export function workerAttemptRolloutScore(params: {
|
|
236
329
|
executorElapsedMs: number;
|
|
237
330
|
qualityElapsedMs: number;
|
|
@@ -498,6 +591,162 @@ export function publishableChangedPaths(changedPaths: string[]): string[] {
|
|
|
498
591
|
return changedPaths.filter((path) => !isNonPublishableArtifactPath(path));
|
|
499
592
|
}
|
|
500
593
|
|
|
594
|
+
function compactDiagnosticText(value: unknown, maxChars = MAX_DIAGNOSTIC_TEXT_CHARS): string | null {
|
|
595
|
+
const text = String(value ?? "").replace(/\s+$/g, "");
|
|
596
|
+
if (!text.trim()) return null;
|
|
597
|
+
return text.length <= maxChars ? text : text.slice(Math.max(0, text.length - maxChars));
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
function diagnosticPathSample(paths: string[], limit = MAX_DIAGNOSTIC_PATH_SAMPLES): string[] {
|
|
601
|
+
const out: string[] = [];
|
|
602
|
+
const seen = new Set<string>();
|
|
603
|
+
for (const raw of paths) {
|
|
604
|
+
const path = String(raw ?? "").replace(/\\/g, "/").replace(/^\.\/+/, "").trim();
|
|
605
|
+
if (!path || seen.has(path)) continue;
|
|
606
|
+
seen.add(path);
|
|
607
|
+
out.push(path);
|
|
608
|
+
if (out.length >= limit) break;
|
|
609
|
+
}
|
|
610
|
+
return out;
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
function diagnosticTopLevelDirs(paths: string[]): string[] {
|
|
614
|
+
const seen = new Set<string>();
|
|
615
|
+
for (const path of paths) {
|
|
616
|
+
const normalized = String(path ?? "").replace(/\\/g, "/").replace(/^\.\/+/, "").trim();
|
|
617
|
+
if (!normalized) continue;
|
|
618
|
+
const top = normalized.includes("/") ? normalized.split("/", 1)[0] : normalized;
|
|
619
|
+
if (top) seen.add(top);
|
|
620
|
+
if (seen.size >= 20) break;
|
|
621
|
+
}
|
|
622
|
+
return [...seen];
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
function buildPatchSnapshotDiagnostics(
|
|
626
|
+
changedPaths: string[],
|
|
627
|
+
attempt: number,
|
|
628
|
+
phase: string,
|
|
629
|
+
): JobPatchSnapshotDiagnostics {
|
|
630
|
+
const publishable = publishableChangedPaths(changedPaths);
|
|
631
|
+
const artifactOnly = changedPaths.filter((path) => isNonPublishableArtifactPath(path));
|
|
632
|
+
return {
|
|
633
|
+
attempt,
|
|
634
|
+
phase,
|
|
635
|
+
publishableFileCount: publishable.length,
|
|
636
|
+
artifactOnlyPathCount: artifactOnly.length,
|
|
637
|
+
changedPathSample: diagnosticPathSample(changedPaths),
|
|
638
|
+
topLevelDirs: diagnosticTopLevelDirs(publishable.length > 0 ? publishable : changedPaths),
|
|
639
|
+
capturedAt: new Date().toISOString(),
|
|
640
|
+
};
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
function classifyValidationRunFailure(run: ValidationExecutionResult): string | null {
|
|
644
|
+
if (run.ok) return null;
|
|
645
|
+
const combined = `${run.command}\n${run.stdout}\n${run.stderr}`.toLowerCase();
|
|
646
|
+
if (run.exitCode === 124 || combined.includes("timed out") || combined.includes("timeout")) {
|
|
647
|
+
return "timeout";
|
|
648
|
+
}
|
|
649
|
+
if (run.exitCode === 127 || combined.includes("missing tool") || combined.includes("not found")) {
|
|
650
|
+
return "missing_tool";
|
|
651
|
+
}
|
|
652
|
+
if (/browser|playwright|cypress|locator|page\.|screenshot|web:e2e/.test(combined)) {
|
|
653
|
+
return "browser_validation";
|
|
654
|
+
}
|
|
655
|
+
if (/cannot find module|import error|does not provide an export|no exported member|mock/.test(combined)) {
|
|
656
|
+
return "test_harness";
|
|
657
|
+
}
|
|
658
|
+
return "nonzero_exit";
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
function buildValidationRunDiagnostics(
|
|
662
|
+
runs: ValidationExecutionResult[],
|
|
663
|
+
attempt: number,
|
|
664
|
+
): JobValidationRunDiagnostics[] {
|
|
665
|
+
return runs.slice(0, 20).map((run) => ({
|
|
666
|
+
attempt,
|
|
667
|
+
command: run.command,
|
|
668
|
+
exitCode: run.exitCode,
|
|
669
|
+
durationMs: run.elapsedMs,
|
|
670
|
+
passed: run.ok,
|
|
671
|
+
failureClass: classifyValidationRunFailure(run),
|
|
672
|
+
stdoutTail: compactDiagnosticText(run.stdout),
|
|
673
|
+
stderrTail: compactDiagnosticText(run.stderr),
|
|
674
|
+
}));
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
function inferTerminalFailureClass(result: JobResult, changedPaths: string[]): string {
|
|
678
|
+
if (result.ok) return "success";
|
|
679
|
+
const text = `${result.summary ?? ""}\n${result.stderr ?? ""}\n${result.stdout ?? ""}`.toLowerCase();
|
|
680
|
+
const publishableCount = publishableChangedPaths(changedPaths).length;
|
|
681
|
+
if (changedPaths.length > 0 && publishableCount === 0) return "artifact_only_no_publishable_patch";
|
|
682
|
+
if (result.exitCode === 124 || text.includes("timed out") || text.includes("timeout")) return "timeout";
|
|
683
|
+
if (text.includes("validationgate") || text.includes("validation")) return "validation";
|
|
684
|
+
if (text.includes("scopegate") || text.includes("scope")) return "scope";
|
|
685
|
+
if (text.includes("criticgate") || text.includes("critic")) return "critic";
|
|
686
|
+
if (text.includes("publish")) return "publish";
|
|
687
|
+
if (text.includes("shell-wrapper") || text.includes("command-router")) return "command_policy";
|
|
688
|
+
return "executor_failure";
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
function inferTerminalStage(result: JobResult, fallback: string): string {
|
|
692
|
+
const text = `${result.summary ?? ""}\n${result.stderr ?? ""}`.toLowerCase();
|
|
693
|
+
if (text.includes("validationgate") || text.includes("validation")) return "validation";
|
|
694
|
+
if (text.includes("scopegate") || text.includes("scope")) return "scope";
|
|
695
|
+
if (text.includes("criticgate") || text.includes("critic")) return "critic";
|
|
696
|
+
if (text.includes("publish")) return "publish";
|
|
697
|
+
if (text.includes("quality gate")) return "quality";
|
|
698
|
+
if (text.includes("codex") || text.includes("executor")) return "executor";
|
|
699
|
+
return fallback;
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
function mergeJobDiagnostics(base: JobDiagnostics | undefined, extra: JobDiagnostics): JobDiagnostics {
|
|
703
|
+
return {
|
|
704
|
+
...(base ?? {}),
|
|
705
|
+
...extra,
|
|
706
|
+
attempts: [...(base?.attempts ?? []), ...(extra.attempts ?? [])],
|
|
707
|
+
phaseSpans: [...(base?.phaseSpans ?? []), ...(extra.phaseSpans ?? [])],
|
|
708
|
+
validationRuns: [...(base?.validationRuns ?? []), ...(extra.validationRuns ?? [])],
|
|
709
|
+
patchSnapshots: [...(base?.patchSnapshots ?? []), ...(extra.patchSnapshots ?? [])],
|
|
710
|
+
terminal: extra.terminal ?? base?.terminal,
|
|
711
|
+
metadata: {
|
|
712
|
+
...(base?.metadata ?? {}),
|
|
713
|
+
...(extra.metadata ?? {}),
|
|
714
|
+
},
|
|
715
|
+
};
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
function withJobDiagnostics(result: JobResult, diagnostics: JobDiagnostics): JobResult {
|
|
719
|
+
return {
|
|
720
|
+
...result,
|
|
721
|
+
diagnostics: mergeJobDiagnostics(result.diagnostics, diagnostics),
|
|
722
|
+
};
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
function buildTerminalDiagnostics(args: {
|
|
726
|
+
result: JobResult;
|
|
727
|
+
executor: string;
|
|
728
|
+
changedPaths: string[];
|
|
729
|
+
terminalStage: string;
|
|
730
|
+
timeoutMs?: number | null;
|
|
731
|
+
metadata?: Record<string, unknown>;
|
|
732
|
+
}): JobTerminalDiagnostics {
|
|
733
|
+
const publishable = publishableChangedPaths(args.changedPaths);
|
|
734
|
+
const artifactOnly = args.changedPaths.filter((path) => isNonPublishableArtifactPath(path));
|
|
735
|
+
const text = `${args.result.summary ?? ""}\n${args.result.stderr ?? ""}\n${args.result.stdout ?? ""}`;
|
|
736
|
+
return {
|
|
737
|
+
failureClass: inferTerminalFailureClass(args.result, args.changedPaths),
|
|
738
|
+
terminalStage: inferTerminalStage(args.result, args.terminalStage),
|
|
739
|
+
executorBackend: args.executor,
|
|
740
|
+
summary: compactDiagnosticText(args.result.summary, 1_000),
|
|
741
|
+
watchdogFired: /watchdog|rollout coach/i.test(text),
|
|
742
|
+
timeoutMs: args.timeoutMs ?? null,
|
|
743
|
+
publishableFileCount: publishable.length,
|
|
744
|
+
artifactOnlyPathCount: artifactOnly.length,
|
|
745
|
+
changedPathSample: diagnosticPathSample(args.changedPaths),
|
|
746
|
+
metadata: args.metadata,
|
|
747
|
+
};
|
|
748
|
+
}
|
|
749
|
+
|
|
501
750
|
function collectPlanningText(planning: TaskExecutePlanning): string {
|
|
502
751
|
return [
|
|
503
752
|
planning.intent,
|
|
@@ -879,6 +1128,10 @@ function parseJsonObjectLoose(text: string): Record<string, unknown> | null {
|
|
|
879
1128
|
}
|
|
880
1129
|
|
|
881
1130
|
const COMMIT_MSG_MAX_DIFF_CHARS = 120_000;
|
|
1131
|
+
const COMMIT_MSG_LLM_MAX_CHANGED_PATHS = 20;
|
|
1132
|
+
const COMMIT_MSG_GENERATOR_DEFAULT_TIMEOUT_MS = 15_000;
|
|
1133
|
+
const COMMIT_MSG_GENERATOR_MIN_TIMEOUT_MS = 3_000;
|
|
1134
|
+
const COMMIT_MSG_GENERATOR_MAX_TIMEOUT_MS = 30_000;
|
|
882
1135
|
|
|
883
1136
|
const SHELL_CONTROL_TOKENS = new Set(["&&", "||", ";", "|"]);
|
|
884
1137
|
|
|
@@ -2096,6 +2349,18 @@ function classifyBrowserValidationFailureKindFromText(text: string): BrowserVali
|
|
|
2096
2349
|
return "unknown";
|
|
2097
2350
|
}
|
|
2098
2351
|
|
|
2352
|
+
export function shouldRetryBrowserValidationRunOnce(run: ValidationExecutionResult): boolean {
|
|
2353
|
+
if (run.ok || !isLongRunningBrowserValidationCommand(run.command)) return false;
|
|
2354
|
+
const combined = stripAnsiControlSequences([run.stderr, run.stdout].filter(Boolean).join("\n"));
|
|
2355
|
+
const digest = extractValidationFailureDigest(run);
|
|
2356
|
+
const failureKind = classifyBrowserValidationFailureKindFromText(`${digest}\n${combined}`);
|
|
2357
|
+
if (failureKind === "runtime" || failureKind === "network") return true;
|
|
2358
|
+
if (failureKind === "startup") return true;
|
|
2359
|
+
return /\b(Route\/startup smoke failure|startup smoke failure|home route startup)\b/i.test(
|
|
2360
|
+
`${digest}\n${combined}`,
|
|
2361
|
+
);
|
|
2362
|
+
}
|
|
2363
|
+
|
|
2099
2364
|
function extractBrowserValidationStage(text: string): string | null {
|
|
2100
2365
|
const patterns = [
|
|
2101
2366
|
/\bBrowser validation failed during\s+([^:.\r\n|]+?)\s+stage\b/i,
|
|
@@ -3662,12 +3927,34 @@ async function runDeterministicQualityGate(
|
|
|
3662
3927
|
continue;
|
|
3663
3928
|
}
|
|
3664
3929
|
onLog?.("stdout", `[ValidationGate] Running "${command}"`);
|
|
3665
|
-
|
|
3930
|
+
let run = await runValidationCommand(
|
|
3666
3931
|
repo,
|
|
3667
3932
|
command,
|
|
3668
3933
|
resolveValidationCommandTimeoutMs(command, qualityValidationStepTimeoutMs),
|
|
3669
3934
|
outputPolicy,
|
|
3670
3935
|
);
|
|
3936
|
+
const firstDigest = run.ok ? "" : extractValidationFailureDigest(run);
|
|
3937
|
+
if (shouldRetryBrowserValidationRunOnce(run)) {
|
|
3938
|
+
onLog?.(
|
|
3939
|
+
"stderr",
|
|
3940
|
+
`[ValidationGate] Retrying browser validation once after retryable startup/runtime failure: ${command}${firstDigest ? ` - ${firstDigest}` : ""}`,
|
|
3941
|
+
);
|
|
3942
|
+
const retryRun = await runValidationCommand(
|
|
3943
|
+
repo,
|
|
3944
|
+
command,
|
|
3945
|
+
resolveValidationCommandTimeoutMs(command, qualityValidationStepTimeoutMs),
|
|
3946
|
+
outputPolicy,
|
|
3947
|
+
);
|
|
3948
|
+
if (!retryRun.ok && firstDigest) {
|
|
3949
|
+
retryRun.stderr = [
|
|
3950
|
+
`Previous browser validation attempt failed before retry: ${firstDigest}`,
|
|
3951
|
+
retryRun.stderr,
|
|
3952
|
+
]
|
|
3953
|
+
.filter(Boolean)
|
|
3954
|
+
.join("\n");
|
|
3955
|
+
}
|
|
3956
|
+
run = retryRun;
|
|
3957
|
+
}
|
|
3671
3958
|
validationRuns.push(run);
|
|
3672
3959
|
const digest = run.ok ? "" : extractValidationFailureDigest(run);
|
|
3673
3960
|
const runSummary = `[ValidationGate] ${run.ok ? "Passed" : "Failed"} (${run.elapsedMs}ms, exit ${run.exitCode}): ${command}${digest ? ` - ${digest}` : ""}`;
|
|
@@ -4768,17 +5055,19 @@ export async function createJobCommit(
|
|
|
4768
5055
|
...toNonEmptyStringArray(jobPlanning?.requiredValidationSteps),
|
|
4769
5056
|
...loadRequiredValidationStepsFromVision(repo),
|
|
4770
5057
|
];
|
|
4771
|
-
const llmCommitMsg =
|
|
4772
|
-
|
|
4773
|
-
|
|
4774
|
-
|
|
4775
|
-
|
|
4776
|
-
|
|
4777
|
-
|
|
4778
|
-
|
|
4779
|
-
|
|
4780
|
-
|
|
4781
|
-
|
|
5058
|
+
const llmCommitMsg = shouldUseLlmCommitMessageForStagedDiff({ changedPaths, diff })
|
|
5059
|
+
? await generateCommitMessageFromDiff(
|
|
5060
|
+
diff,
|
|
5061
|
+
{
|
|
5062
|
+
instruction: String(job.params?.instruction ?? ""),
|
|
5063
|
+
type: normalizeCommitType(job.kind, job.params),
|
|
5064
|
+
area: inferCommitArea(job.kind, job.params, changedPaths),
|
|
5065
|
+
validationSteps: jobValidationSteps,
|
|
5066
|
+
},
|
|
5067
|
+
repo,
|
|
5068
|
+
runtimeConfig,
|
|
5069
|
+
).catch(() => null)
|
|
5070
|
+
: null;
|
|
4782
5071
|
if (!llmCommitMsg) {
|
|
4783
5072
|
console.warn(
|
|
4784
5073
|
`[WorkerPals] Commit message generator unavailable for job ${job.id}; using deterministic fallback.`,
|
|
@@ -5745,17 +6034,19 @@ async function createMergeConflictJobCommit(
|
|
|
5745
6034
|
...toNonEmptyStringArray(jobPlanning?.requiredValidationSteps),
|
|
5746
6035
|
...loadRequiredValidationStepsFromVision(repo),
|
|
5747
6036
|
];
|
|
5748
|
-
const llmCommitMsg =
|
|
5749
|
-
|
|
5750
|
-
|
|
5751
|
-
|
|
5752
|
-
|
|
5753
|
-
|
|
5754
|
-
|
|
5755
|
-
|
|
5756
|
-
|
|
5757
|
-
|
|
5758
|
-
|
|
6037
|
+
const llmCommitMsg = shouldUseLlmCommitMessageForStagedDiff({ changedPaths, diff })
|
|
6038
|
+
? await generateCommitMessageFromDiff(
|
|
6039
|
+
diff,
|
|
6040
|
+
{
|
|
6041
|
+
instruction: String(job.params?.instruction ?? ""),
|
|
6042
|
+
type: normalizeCommitType(job.kind, job.params),
|
|
6043
|
+
area: inferCommitArea(job.kind, job.params, changedPaths),
|
|
6044
|
+
validationSteps: jobValidationSteps,
|
|
6045
|
+
},
|
|
6046
|
+
repo,
|
|
6047
|
+
runtimeConfig,
|
|
6048
|
+
).catch(() => null)
|
|
6049
|
+
: null;
|
|
5759
6050
|
if (!llmCommitMsg) {
|
|
5760
6051
|
console.warn(
|
|
5761
6052
|
`[WorkerPals] Commit message generator unavailable for merge-conflict job ${job.id}; using deterministic fallback.`,
|
|
@@ -6206,6 +6497,38 @@ async function generateCommitMessageFromDiff(
|
|
|
6206
6497
|
return generateCommitMessageFromDiffViaHttp(prompt, opts, runtimeConfig);
|
|
6207
6498
|
}
|
|
6208
6499
|
|
|
6500
|
+
export function resolveCommitMessageGeneratorTimeoutMs(
|
|
6501
|
+
runtimeConfig: WorkerpalsRuntimeConfig = DEFAULT_CONFIG,
|
|
6502
|
+
): number {
|
|
6503
|
+
const workerpalsConfig = runtimeConfig.workerpals as Record<string, unknown>;
|
|
6504
|
+
const llmConfig =
|
|
6505
|
+
workerpalsConfig.llm && typeof workerpalsConfig.llm === "object"
|
|
6506
|
+
? (workerpalsConfig.llm as Record<string, unknown>)
|
|
6507
|
+
: {};
|
|
6508
|
+
const configuredRaw =
|
|
6509
|
+
workerpalsConfig.commitMessageTimeoutMs ??
|
|
6510
|
+
workerpalsConfig.commit_message_timeout_ms ??
|
|
6511
|
+
llmConfig.commitMessageTimeoutMs ??
|
|
6512
|
+
llmConfig.commit_message_timeout_ms ??
|
|
6513
|
+
Bun.env.WORKERPALS_COMMIT_MESSAGE_TIMEOUT_MS;
|
|
6514
|
+
const configured = Number(configuredRaw);
|
|
6515
|
+
const value = Number.isFinite(configured)
|
|
6516
|
+
? configured
|
|
6517
|
+
: COMMIT_MSG_GENERATOR_DEFAULT_TIMEOUT_MS;
|
|
6518
|
+
return Math.max(
|
|
6519
|
+
COMMIT_MSG_GENERATOR_MIN_TIMEOUT_MS,
|
|
6520
|
+
Math.min(COMMIT_MSG_GENERATOR_MAX_TIMEOUT_MS, Math.floor(value)),
|
|
6521
|
+
);
|
|
6522
|
+
}
|
|
6523
|
+
|
|
6524
|
+
export function shouldUseLlmCommitMessageForStagedDiff(params: {
|
|
6525
|
+
changedPaths: string[];
|
|
6526
|
+
diff: string;
|
|
6527
|
+
}): boolean {
|
|
6528
|
+
if (!String(params.diff ?? "").trim()) return false;
|
|
6529
|
+
return params.changedPaths.length <= COMMIT_MSG_LLM_MAX_CHANGED_PATHS;
|
|
6530
|
+
}
|
|
6531
|
+
|
|
6209
6532
|
type CommitMessagePrompt = {
|
|
6210
6533
|
systemPrompt: string;
|
|
6211
6534
|
userMessage: string;
|
|
@@ -6244,11 +6567,7 @@ async function generateCommitMessageFromDiffViaCodex(
|
|
|
6244
6567
|
if (!model) return null;
|
|
6245
6568
|
const codexPrefix = await resolveCodexCommandPrefix(repo, runtimeConfig.workerpals.llm.codexBin);
|
|
6246
6569
|
if (!codexPrefix) return null;
|
|
6247
|
-
const timeoutMs = (
|
|
6248
|
-
const value = Number(runtimeConfig.workerpals.llm.codexTimeoutMs);
|
|
6249
|
-
if (!Number.isFinite(value)) return 120_000;
|
|
6250
|
-
return Math.max(10_000, Math.min(600_000, Math.floor(value)));
|
|
6251
|
-
})();
|
|
6570
|
+
const timeoutMs = resolveCommitMessageGeneratorTimeoutMs(runtimeConfig);
|
|
6252
6571
|
const reasoningEffort = normalizeCodexReasoningEffort(
|
|
6253
6572
|
runtimeConfig.workerpals.llm.reasoningEffort,
|
|
6254
6573
|
model,
|
|
@@ -6338,7 +6657,7 @@ async function generateCommitMessageFromDiffViaHttp(
|
|
|
6338
6657
|
if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
|
|
6339
6658
|
|
|
6340
6659
|
const controller = new AbortController();
|
|
6341
|
-
const timer = setTimeout(() => controller.abort(),
|
|
6660
|
+
const timer = setTimeout(() => controller.abort(), resolveCommitMessageGeneratorTimeoutMs(runtimeConfig));
|
|
6342
6661
|
try {
|
|
6343
6662
|
const response = await fetch(endpoint, {
|
|
6344
6663
|
method: "POST",
|
|
@@ -6952,6 +7271,7 @@ async function runCodexCriticReview(
|
|
|
6952
7271
|
validationChars: validationSummary.length,
|
|
6953
7272
|
};
|
|
6954
7273
|
};
|
|
7274
|
+
type CodexCriticPayload = Awaited<ReturnType<typeof buildCriticInstruction>>;
|
|
6955
7275
|
|
|
6956
7276
|
const tmpOutputPath = `/tmp/pushpals-critic-${Date.now()}-${Math.random().toString(36).slice(2, 8)}.txt`;
|
|
6957
7277
|
const buildCmd = () => {
|
|
@@ -6980,13 +7300,17 @@ async function runCodexCriticReview(
|
|
|
6980
7300
|
const runAttempt = async (
|
|
6981
7301
|
attempt: number,
|
|
6982
7302
|
compact: boolean,
|
|
6983
|
-
|
|
7303
|
+
payloadOverride?: CodexCriticPayload,
|
|
7304
|
+
): Promise<
|
|
7305
|
+
| { status: "timeout"; payload: CodexCriticPayload }
|
|
7306
|
+
| { status: "done"; review: CriticReview | null; payload: CodexCriticPayload }
|
|
7307
|
+
> => {
|
|
6984
7308
|
try {
|
|
6985
7309
|
unlinkSync(tmpOutputPath);
|
|
6986
7310
|
} catch {
|
|
6987
7311
|
/* ignore stale/missing critic output */
|
|
6988
7312
|
}
|
|
6989
|
-
const payload = await buildCriticInstruction(compact);
|
|
7313
|
+
const payload = payloadOverride ?? (await buildCriticInstruction(compact));
|
|
6990
7314
|
const startedAt = Date.now();
|
|
6991
7315
|
onLog?.(
|
|
6992
7316
|
"stdout",
|
|
@@ -7014,7 +7338,7 @@ async function runCodexCriticReview(
|
|
|
7014
7338
|
clearTimeout(timer);
|
|
7015
7339
|
|
|
7016
7340
|
if (timedOut) {
|
|
7017
|
-
return { status: "timeout" };
|
|
7341
|
+
return { status: "timeout", payload };
|
|
7018
7342
|
}
|
|
7019
7343
|
if (exitCode !== 0) {
|
|
7020
7344
|
const stderrText = await new Response(proc.stderr).text();
|
|
@@ -7022,7 +7346,7 @@ async function runCodexCriticReview(
|
|
|
7022
7346
|
"stderr",
|
|
7023
7347
|
`[CriticGate] Codex exited ${exitCode}: ${toSingleLine(stderrText, 220)}`,
|
|
7024
7348
|
);
|
|
7025
|
-
return { status: "done", review: null };
|
|
7349
|
+
return { status: "done", review: null, payload };
|
|
7026
7350
|
}
|
|
7027
7351
|
|
|
7028
7352
|
let lastMessage = "";
|
|
@@ -7039,7 +7363,7 @@ async function runCodexCriticReview(
|
|
|
7039
7363
|
|
|
7040
7364
|
if (!lastMessage) {
|
|
7041
7365
|
onLog?.("stderr", "[CriticGate] Codex: no output message captured; skipping.");
|
|
7042
|
-
return { status: "done", review: null };
|
|
7366
|
+
return { status: "done", review: null, payload };
|
|
7043
7367
|
}
|
|
7044
7368
|
|
|
7045
7369
|
const reviewObj = parseJsonObjectLoose(lastMessage);
|
|
@@ -7048,7 +7372,7 @@ async function runCodexCriticReview(
|
|
|
7048
7372
|
"stderr",
|
|
7049
7373
|
`[CriticGate] Codex returned non-JSON: ${toSingleLine(lastMessage, 220)}`,
|
|
7050
7374
|
);
|
|
7051
|
-
return { status: "done", review: null };
|
|
7375
|
+
return { status: "done", review: null, payload };
|
|
7052
7376
|
}
|
|
7053
7377
|
|
|
7054
7378
|
const scoreRaw = Number(reviewObj.score);
|
|
@@ -7068,6 +7392,7 @@ async function runCodexCriticReview(
|
|
|
7068
7392
|
);
|
|
7069
7393
|
return {
|
|
7070
7394
|
status: "done",
|
|
7395
|
+
payload,
|
|
7071
7396
|
review: {
|
|
7072
7397
|
score,
|
|
7073
7398
|
findings,
|
|
@@ -7081,11 +7406,36 @@ async function runCodexCriticReview(
|
|
|
7081
7406
|
try {
|
|
7082
7407
|
let attempt = await runAttempt(1, false);
|
|
7083
7408
|
if (attempt.status === "timeout" && timeoutBehavior === "retry_once") {
|
|
7084
|
-
|
|
7085
|
-
|
|
7086
|
-
|
|
7087
|
-
|
|
7088
|
-
|
|
7409
|
+
const compactPayload = await buildCriticInstruction(true);
|
|
7410
|
+
const validationPassed =
|
|
7411
|
+
quality.validationRuns.length > 0 && quality.validationRuns.every((run) => run.ok);
|
|
7412
|
+
if (
|
|
7413
|
+
shouldRetryCriticTimeoutWithCompact({
|
|
7414
|
+
timeoutBehavior,
|
|
7415
|
+
qualityOk: quality.ok,
|
|
7416
|
+
validationPassed,
|
|
7417
|
+
initialPromptChars: attempt.payload.promptChars,
|
|
7418
|
+
compactPromptChars: compactPayload.promptChars,
|
|
7419
|
+
})
|
|
7420
|
+
) {
|
|
7421
|
+
onLog?.(
|
|
7422
|
+
"stderr",
|
|
7423
|
+
`[CriticGate] Codex timed out after ${qualityCriticTimeoutMs}ms; retrying once with compact critic input.`,
|
|
7424
|
+
);
|
|
7425
|
+
attempt = await runAttempt(2, true, compactPayload);
|
|
7426
|
+
} else {
|
|
7427
|
+
const reductionPct = Math.max(
|
|
7428
|
+
0,
|
|
7429
|
+
Math.round(
|
|
7430
|
+
(1 - compactPayload.promptChars / Math.max(1, attempt.payload.promptChars)) * 100,
|
|
7431
|
+
),
|
|
7432
|
+
);
|
|
7433
|
+
onLog?.(
|
|
7434
|
+
"stderr",
|
|
7435
|
+
`[CriticGate] Codex timed out after ${qualityCriticTimeoutMs}ms; compact critic input only reduced prompt by ${reductionPct}% after clean validation; skipping retry.`,
|
|
7436
|
+
);
|
|
7437
|
+
return null;
|
|
7438
|
+
}
|
|
7089
7439
|
}
|
|
7090
7440
|
if (attempt.status === "timeout") {
|
|
7091
7441
|
if (timeoutBehavior === "block") {
|
|
@@ -7246,6 +7596,8 @@ export async function executeJob(
|
|
|
7246
7596
|
const jobStartedAt = Date.now();
|
|
7247
7597
|
const previousValidationFailureDigests = new Map<string, string>();
|
|
7248
7598
|
const failureJobFamily = buildTaskFailureJobFamily(normalizedParams);
|
|
7599
|
+
const diagnosticValidationRuns: JobValidationRunDiagnostics[] = [];
|
|
7600
|
+
const diagnosticPatchSnapshots: JobPatchSnapshotDiagnostics[] = [];
|
|
7249
7601
|
while (revisionAttempt <= qualityRevisionLoopMax) {
|
|
7250
7602
|
const attemptStartedAt = Date.now();
|
|
7251
7603
|
const attemptParams: Record<string, unknown> = { ...normalizedParams };
|
|
@@ -7255,7 +7607,7 @@ export async function executeJob(
|
|
|
7255
7607
|
}
|
|
7256
7608
|
|
|
7257
7609
|
const executor = resolveExecutor(runtimeConfig);
|
|
7258
|
-
const
|
|
7610
|
+
const defaultExecuteBudgets = { executionBudgetMs, finalizationBudgetMs };
|
|
7259
7611
|
const runExecutor = getBackendTaskExecutor(executor);
|
|
7260
7612
|
if (!runExecutor) {
|
|
7261
7613
|
return {
|
|
@@ -7267,14 +7619,17 @@ export async function executeJob(
|
|
|
7267
7619
|
let result: Awaited<ReturnType<typeof runExecutor>> | null = null;
|
|
7268
7620
|
let mergeConflictPass = 0;
|
|
7269
7621
|
let executorElapsedMs = 0;
|
|
7622
|
+
let nextMergeConflictExecuteBudgets: typeof defaultExecuteBudgets | null = null;
|
|
7270
7623
|
while (true) {
|
|
7624
|
+
const currentExecuteBudgets = nextMergeConflictExecuteBudgets ?? defaultExecuteBudgets;
|
|
7625
|
+
nextMergeConflictExecuteBudgets = null;
|
|
7271
7626
|
const currentResult = await runExecutor(
|
|
7272
7627
|
kind,
|
|
7273
7628
|
attemptParams,
|
|
7274
7629
|
repo,
|
|
7275
7630
|
runtimeConfig,
|
|
7276
7631
|
onLog,
|
|
7277
|
-
|
|
7632
|
+
currentExecuteBudgets,
|
|
7278
7633
|
);
|
|
7279
7634
|
if (!currentResult.ok) return currentResult;
|
|
7280
7635
|
result = currentResult;
|
|
@@ -7308,14 +7663,77 @@ export async function executeJob(
|
|
|
7308
7663
|
exitCode: 4,
|
|
7309
7664
|
};
|
|
7310
7665
|
}
|
|
7666
|
+
const retryBudget = mergeConflictResolverRetryBudgetDecision({
|
|
7667
|
+
jobElapsedMs: Date.now() - attemptStartedAt,
|
|
7668
|
+
executionBudgetMs,
|
|
7669
|
+
finalizationBudgetMs,
|
|
7670
|
+
});
|
|
7671
|
+
if (!retryBudget.shouldStart) {
|
|
7672
|
+
const detail =
|
|
7673
|
+
"Merge-conflict rebase advanced into another conflicted commit, but remaining job budget " +
|
|
7674
|
+
`is ${retryBudget.remainingTotalBudgetMs}ms (< ${retryBudget.minimumExecutionBudgetMs}ms execution).`;
|
|
7675
|
+
onLog?.("stderr", `[MergeConflict] ${detail}`);
|
|
7676
|
+
return {
|
|
7677
|
+
ok: false,
|
|
7678
|
+
summary: detail,
|
|
7679
|
+
stdout: currentResult.stdout,
|
|
7680
|
+
stderr: [currentResult.stderr ?? "", resume.detail ?? detail].filter(Boolean).join("\n"),
|
|
7681
|
+
exitCode: 4,
|
|
7682
|
+
};
|
|
7683
|
+
}
|
|
7684
|
+
nextMergeConflictExecuteBudgets = {
|
|
7685
|
+
executionBudgetMs: retryBudget.executionBudgetMs,
|
|
7686
|
+
finalizationBudgetMs: retryBudget.finalizationBudgetMs,
|
|
7687
|
+
};
|
|
7311
7688
|
onLog?.(
|
|
7312
7689
|
"stdout",
|
|
7313
7690
|
`[MergeConflict] Rebase surfaced another conflicted commit after auto-continue; rerunning resolver pass ${
|
|
7314
7691
|
mergeConflictPass + 1
|
|
7315
|
-
}.`,
|
|
7692
|
+
} with a capped completion budget (${retryBudget.executionBudgetMs}ms execution).`,
|
|
7316
7693
|
);
|
|
7317
7694
|
continue;
|
|
7318
7695
|
}
|
|
7696
|
+
if (sequencer === "rebase" && !resume.resumed) {
|
|
7697
|
+
mergeConflictPass += 1;
|
|
7698
|
+
const budget = mergeConflictResolverRetryBudgetDecision({
|
|
7699
|
+
jobElapsedMs: Date.now() - attemptStartedAt,
|
|
7700
|
+
executionBudgetMs,
|
|
7701
|
+
finalizationBudgetMs,
|
|
7702
|
+
});
|
|
7703
|
+
if (mergeConflictPass < MAX_MERGE_CONFLICT_RESOLUTION_PASSES && budget.shouldStart) {
|
|
7704
|
+
const retryDetail =
|
|
7705
|
+
resume.detail ??
|
|
7706
|
+
"the previous resolver pass returned before the prepared rebase completed";
|
|
7707
|
+
const previousHint = String(attemptParams.qualityRevisionHint ?? "").trim();
|
|
7708
|
+
attemptParams.qualityRevisionHint = [
|
|
7709
|
+
previousHint,
|
|
7710
|
+
[
|
|
7711
|
+
`Merge-conflict resolver pass ${mergeConflictPass} left the rebase unfinished: ${retryDetail}.`,
|
|
7712
|
+
"Focus only on completing the active rebase. Inspect unresolved files with `git diff --name-only --diff-filter=U`, remove remaining conflict markers, stage resolved files, and run `git -c core.editor=true rebase --continue` until no rebase remains.",
|
|
7713
|
+
"Do not broaden the patch or run full validation before the rebase is complete.",
|
|
7714
|
+
].join("\n"),
|
|
7715
|
+
]
|
|
7716
|
+
.filter(Boolean)
|
|
7717
|
+
.join("\n\n");
|
|
7718
|
+
nextMergeConflictExecuteBudgets = {
|
|
7719
|
+
executionBudgetMs: budget.executionBudgetMs,
|
|
7720
|
+
finalizationBudgetMs: budget.finalizationBudgetMs,
|
|
7721
|
+
};
|
|
7722
|
+
onLog?.(
|
|
7723
|
+
"stdout",
|
|
7724
|
+
`[MergeConflict] ${retryDetail}; rerunning resolver pass ${
|
|
7725
|
+
mergeConflictPass + 1
|
|
7726
|
+
} with focused rebase-completion guidance and capped budget (${budget.executionBudgetMs}ms execution).`,
|
|
7727
|
+
);
|
|
7728
|
+
continue;
|
|
7729
|
+
}
|
|
7730
|
+
if (!budget.shouldStart) {
|
|
7731
|
+
onLog?.(
|
|
7732
|
+
"stderr",
|
|
7733
|
+
`[MergeConflict] Not rerunning unfinished rebase resolver: remaining total budget is ${budget.remainingTotalBudgetMs}ms (< ${budget.minimumExecutionBudgetMs}ms execution).`,
|
|
7734
|
+
);
|
|
7735
|
+
}
|
|
7736
|
+
}
|
|
7319
7737
|
const detail =
|
|
7320
7738
|
`Merge-conflict job returned with git ${sequencer} still in progress. ` +
|
|
7321
7739
|
`Finish the ${sequencer} before returning control to WorkerPals.`;
|
|
@@ -7342,6 +7760,11 @@ export async function executeJob(
|
|
|
7342
7760
|
? parseChangedPathsFromStatus(preQualityStatus.stdout)
|
|
7343
7761
|
: [];
|
|
7344
7762
|
const preQualityPublishablePaths = publishableChangedPaths(preQualityChangedPaths);
|
|
7763
|
+
if (preQualityChangedPaths.length > 0) {
|
|
7764
|
+
diagnosticPatchSnapshots.push(
|
|
7765
|
+
buildPatchSnapshotDiagnostics(preQualityChangedPaths, revisionAttempt, "executor"),
|
|
7766
|
+
);
|
|
7767
|
+
}
|
|
7345
7768
|
const executorText = `${result.summary ?? ""}\n${result.stdout ?? ""}\n${result.stderr ?? ""}`;
|
|
7346
7769
|
const shellWrapperReturn =
|
|
7347
7770
|
/shell-wrapper command rejections|command-router shell-wrapper|command policy rejection/i.test(
|
|
@@ -7355,13 +7778,24 @@ export async function executeJob(
|
|
|
7355
7778
|
"stderr",
|
|
7356
7779
|
`[QualityGate] ${detail} Skipping ValidationGate/CriticGate because there is no PR-worthy patch to validate.`,
|
|
7357
7780
|
);
|
|
7358
|
-
|
|
7781
|
+
const failure: JobResult = {
|
|
7359
7782
|
ok: false,
|
|
7360
7783
|
summary: `Executor produced no publishable code changes (${detail})`,
|
|
7361
7784
|
stdout: result.stdout,
|
|
7362
7785
|
stderr: [result.stderr ?? "", detail].filter(Boolean).join("\n"),
|
|
7363
7786
|
exitCode: 4,
|
|
7364
7787
|
};
|
|
7788
|
+
return withJobDiagnostics(failure, {
|
|
7789
|
+
terminal: buildTerminalDiagnostics({
|
|
7790
|
+
result: failure,
|
|
7791
|
+
executor,
|
|
7792
|
+
changedPaths: preQualityChangedPaths,
|
|
7793
|
+
terminalStage: "executor",
|
|
7794
|
+
timeoutMs: executionBudgetMs,
|
|
7795
|
+
metadata: { revisionAttempt, executorElapsedMs },
|
|
7796
|
+
}),
|
|
7797
|
+
patchSnapshots: [...diagnosticPatchSnapshots],
|
|
7798
|
+
});
|
|
7365
7799
|
}
|
|
7366
7800
|
if (
|
|
7367
7801
|
preQualityPublishablePaths.length === 0 &&
|
|
@@ -7375,13 +7809,24 @@ export async function executeJob(
|
|
|
7375
7809
|
"stderr",
|
|
7376
7810
|
`[QualityGate] ${reason} Skipping ValidationGate/CriticGate and failing fast.`,
|
|
7377
7811
|
);
|
|
7378
|
-
|
|
7812
|
+
const failure: JobResult = {
|
|
7379
7813
|
ok: false,
|
|
7380
7814
|
summary: reason,
|
|
7381
7815
|
stdout: result.stdout,
|
|
7382
7816
|
stderr: [result.stderr ?? "", reason].filter(Boolean).join("\n"),
|
|
7383
7817
|
exitCode: 4,
|
|
7384
7818
|
};
|
|
7819
|
+
return withJobDiagnostics(failure, {
|
|
7820
|
+
terminal: buildTerminalDiagnostics({
|
|
7821
|
+
result: failure,
|
|
7822
|
+
executor,
|
|
7823
|
+
changedPaths: preQualityChangedPaths,
|
|
7824
|
+
terminalStage: "executor",
|
|
7825
|
+
timeoutMs: executionBudgetMs,
|
|
7826
|
+
metadata: { revisionAttempt, executorElapsedMs, shellWrapperReturn },
|
|
7827
|
+
}),
|
|
7828
|
+
patchSnapshots: [...diagnosticPatchSnapshots],
|
|
7829
|
+
});
|
|
7385
7830
|
}
|
|
7386
7831
|
|
|
7387
7832
|
const qualityStartedAt = Date.now();
|
|
@@ -7397,6 +7842,12 @@ export async function executeJob(
|
|
|
7397
7842
|
},
|
|
7398
7843
|
);
|
|
7399
7844
|
const qualityElapsedMs = Date.now() - qualityStartedAt;
|
|
7845
|
+
diagnosticPatchSnapshots.push(
|
|
7846
|
+
buildPatchSnapshotDiagnostics(quality.changedPaths, revisionAttempt, "quality"),
|
|
7847
|
+
);
|
|
7848
|
+
diagnosticValidationRuns.push(
|
|
7849
|
+
...buildValidationRunDiagnostics(quality.validationRuns, revisionAttempt),
|
|
7850
|
+
);
|
|
7400
7851
|
const validationCommandElapsedMs = quality.validationRuns.reduce(
|
|
7401
7852
|
(total, run) => total + Math.max(0, Number(run.elapsedMs) || 0),
|
|
7402
7853
|
0,
|
|
@@ -7440,14 +7891,54 @@ export async function executeJob(
|
|
|
7440
7891
|
blocker: null,
|
|
7441
7892
|
}
|
|
7442
7893
|
: quality;
|
|
7894
|
+
const validationPassed =
|
|
7895
|
+
quality.validationRuns.length > 0 && quality.validationRuns.every((run) => run.ok);
|
|
7896
|
+
const skipCriticAfterExecutorTimeout = shouldSkipCriticAfterExecutorTimeout({
|
|
7897
|
+
executor,
|
|
7898
|
+
policyMode: qualityGatePolicy.mode,
|
|
7899
|
+
executorText,
|
|
7900
|
+
qualityOk: quality.ok,
|
|
7901
|
+
validationPassed,
|
|
7902
|
+
qualityIssues: qualityForCritic.issues,
|
|
7903
|
+
changedPaths: quality.changedPaths,
|
|
7904
|
+
});
|
|
7443
7905
|
const critic =
|
|
7444
|
-
quality.skipped || !qualityGatePolicy.criticGateEnabled
|
|
7906
|
+
quality.skipped || !qualityGatePolicy.criticGateEnabled || skipCriticAfterExecutorTimeout
|
|
7445
7907
|
? null
|
|
7446
7908
|
: executor === "openai_codex"
|
|
7447
7909
|
? await runCodexCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog)
|
|
7448
7910
|
: await runTaskCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog);
|
|
7911
|
+
const annotateTerminalResult = (
|
|
7912
|
+
terminalResult: JobResult,
|
|
7913
|
+
terminalStage: string,
|
|
7914
|
+
changedPaths: string[] = quality.changedPaths,
|
|
7915
|
+
): JobResult =>
|
|
7916
|
+
withJobDiagnostics(terminalResult, {
|
|
7917
|
+
terminal: buildTerminalDiagnostics({
|
|
7918
|
+
result: terminalResult,
|
|
7919
|
+
executor,
|
|
7920
|
+
changedPaths,
|
|
7921
|
+
terminalStage,
|
|
7922
|
+
timeoutMs: executionBudgetMs,
|
|
7923
|
+
metadata: {
|
|
7924
|
+
revisionAttempt,
|
|
7925
|
+
executorElapsedMs,
|
|
7926
|
+
qualityElapsedMs,
|
|
7927
|
+
validationFailureScope: quality.validationFailureScope,
|
|
7928
|
+
validationRuns: quality.validationRuns.length,
|
|
7929
|
+
criticScore: critic?.score ?? null,
|
|
7930
|
+
},
|
|
7931
|
+
}),
|
|
7932
|
+
validationRuns: [...diagnosticValidationRuns],
|
|
7933
|
+
patchSnapshots: [...diagnosticPatchSnapshots],
|
|
7934
|
+
});
|
|
7449
7935
|
if (!qualityGatePolicy.criticGateEnabled) {
|
|
7450
7936
|
onLog?.("stdout", "[CriticGate] Disabled by workerpals.quality_critic_gate_enabled=false.");
|
|
7937
|
+
} else if (skipCriticAfterExecutorTimeout) {
|
|
7938
|
+
onLog?.(
|
|
7939
|
+
"stdout",
|
|
7940
|
+
"[CriticGate] Skipping Codex critic after primary Codex executor timeout because deterministic quality and validation are clean.",
|
|
7941
|
+
);
|
|
7451
7942
|
}
|
|
7452
7943
|
const rolloutScore = workerAttemptRolloutScore({
|
|
7453
7944
|
executorElapsedMs,
|
|
@@ -7500,7 +7991,7 @@ export async function executeJob(
|
|
|
7500
7991
|
"stderr",
|
|
7501
7992
|
"[PublishGate] Disabled by workerpals.quality_publish_gate_enabled=false; returning worker result despite gate failures.",
|
|
7502
7993
|
);
|
|
7503
|
-
|
|
7994
|
+
const advisoryResult: JobResult = {
|
|
7504
7995
|
...result,
|
|
7505
7996
|
summary: `${result.summary} (publish gate disabled; quality gate findings were advisory)`,
|
|
7506
7997
|
stderr: truncate(
|
|
@@ -7515,6 +8006,7 @@ export async function executeJob(
|
|
|
7515
8006
|
),
|
|
7516
8007
|
exitCode: typeof result.exitCode === "number" ? result.exitCode : 0,
|
|
7517
8008
|
};
|
|
8009
|
+
return annotateTerminalResult(advisoryResult, "quality");
|
|
7518
8010
|
}
|
|
7519
8011
|
|
|
7520
8012
|
if (!deterministicRequiresRevision && !criticRequiresRevision) {
|
|
@@ -7533,13 +8025,14 @@ export async function executeJob(
|
|
|
7533
8025
|
outputPolicyForRuntime(runtimeConfig),
|
|
7534
8026
|
);
|
|
7535
8027
|
onLog?.("stderr", `[QualityGate] ${requiredSummary}`);
|
|
7536
|
-
|
|
8028
|
+
const failure: JobResult = {
|
|
7537
8029
|
ok: false,
|
|
7538
8030
|
summary: requiredSummary,
|
|
7539
8031
|
stdout: result.stdout,
|
|
7540
8032
|
stderr: diagnostics,
|
|
7541
8033
|
exitCode: 4,
|
|
7542
8034
|
};
|
|
8035
|
+
return annotateTerminalResult(failure, "validation");
|
|
7543
8036
|
}
|
|
7544
8037
|
if (critic) {
|
|
7545
8038
|
onLog?.(
|
|
@@ -7547,7 +8040,7 @@ export async function executeJob(
|
|
|
7547
8040
|
`[CriticGate] review score ${critic.score.toFixed(1)}/10 (threshold ${qualityCriticMinScore}).`,
|
|
7548
8041
|
);
|
|
7549
8042
|
}
|
|
7550
|
-
return result;
|
|
8043
|
+
return annotateTerminalResult(result, "completed");
|
|
7551
8044
|
}
|
|
7552
8045
|
|
|
7553
8046
|
const blockerIssue = quality.blocker
|
|
@@ -7607,13 +8100,14 @@ export async function executeJob(
|
|
|
7607
8100
|
} else if (quality.requiredValidationFailures.length > 0) {
|
|
7608
8101
|
const requiredSummary = `Required vision.md validation blocked publishing: ${quality.requiredValidationFailures.join("; ")}`;
|
|
7609
8102
|
onLog?.("stderr", `[QualityGate] ${requiredSummary}`);
|
|
7610
|
-
|
|
8103
|
+
const failure: JobResult = {
|
|
7611
8104
|
ok: false,
|
|
7612
8105
|
summary: requiredSummary,
|
|
7613
8106
|
stdout: result.stdout,
|
|
7614
8107
|
stderr: blockerDiagnostics,
|
|
7615
8108
|
exitCode: 4,
|
|
7616
8109
|
};
|
|
8110
|
+
return annotateTerminalResult(failure, "validation");
|
|
7617
8111
|
} else if (shouldSoftPassValidationBlocker(qualityGatePolicy, quality.blocker)) {
|
|
7618
8112
|
onLog?.(
|
|
7619
8113
|
"stderr",
|
|
@@ -7622,7 +8116,7 @@ export async function executeJob(
|
|
|
7622
8116
|
260,
|
|
7623
8117
|
)}`,
|
|
7624
8118
|
);
|
|
7625
|
-
|
|
8119
|
+
const softPass: JobResult = {
|
|
7626
8120
|
...result,
|
|
7627
8121
|
summary:
|
|
7628
8122
|
`${result.summary} ` +
|
|
@@ -7630,15 +8124,17 @@ export async function executeJob(
|
|
|
7630
8124
|
stderr: blockerDiagnostics,
|
|
7631
8125
|
exitCode: typeof result.exitCode === "number" ? result.exitCode : 0,
|
|
7632
8126
|
};
|
|
8127
|
+
return annotateTerminalResult(softPass, "quality");
|
|
7633
8128
|
} else {
|
|
7634
8129
|
onLog?.("stderr", `[QualityGate] ${blockerSummary}`);
|
|
7635
|
-
|
|
8130
|
+
const failure: JobResult = {
|
|
7636
8131
|
ok: false,
|
|
7637
8132
|
summary: blockerSummary,
|
|
7638
8133
|
stdout: result.stdout,
|
|
7639
8134
|
stderr: blockerDiagnostics,
|
|
7640
8135
|
exitCode: 4,
|
|
7641
8136
|
};
|
|
8137
|
+
return annotateTerminalResult(failure, "quality");
|
|
7642
8138
|
}
|
|
7643
8139
|
}
|
|
7644
8140
|
if (revisionAttempt >= activeMaxAutoRevisions) {
|
|
@@ -7655,13 +8151,14 @@ export async function executeJob(
|
|
|
7655
8151
|
);
|
|
7656
8152
|
const requiredSummary = `Required vision.md validation failed after ${revisionAttempt} auto-revision attempt(s): ${quality.requiredValidationFailures.join("; ")}`;
|
|
7657
8153
|
onLog?.("stderr", `[QualityGate] ${requiredSummary}`);
|
|
7658
|
-
|
|
8154
|
+
const failure: JobResult = {
|
|
7659
8155
|
ok: false,
|
|
7660
8156
|
summary: requiredSummary,
|
|
7661
8157
|
stdout: result.stdout,
|
|
7662
8158
|
stderr: diagnostics,
|
|
7663
8159
|
exitCode: 4,
|
|
7664
8160
|
};
|
|
8161
|
+
return annotateTerminalResult(failure, "validation");
|
|
7665
8162
|
}
|
|
7666
8163
|
if (qualitySoftPassOnExhausted) {
|
|
7667
8164
|
const diagnostics = truncate(
|
|
@@ -7677,14 +8174,15 @@ export async function executeJob(
|
|
|
7677
8174
|
260,
|
|
7678
8175
|
)}`,
|
|
7679
8176
|
);
|
|
7680
|
-
|
|
8177
|
+
const softPass: JobResult = {
|
|
7681
8178
|
...result,
|
|
7682
8179
|
summary: `${result.summary} (quality gate soft-pass after ${revisionAttempt} auto-revision attempt(s))`,
|
|
7683
8180
|
stderr: diagnostics,
|
|
7684
8181
|
exitCode: typeof result.exitCode === "number" ? result.exitCode : 0,
|
|
7685
8182
|
};
|
|
8183
|
+
return annotateTerminalResult(softPass, "quality");
|
|
7686
8184
|
}
|
|
7687
|
-
|
|
8185
|
+
const failure: JobResult = {
|
|
7688
8186
|
ok: false,
|
|
7689
8187
|
summary: `Quality gate failed after ${revisionAttempt} auto-revision attempt(s): ${toSingleLine(
|
|
7690
8188
|
issueSummary,
|
|
@@ -7699,6 +8197,7 @@ export async function executeJob(
|
|
|
7699
8197
|
),
|
|
7700
8198
|
exitCode: 4,
|
|
7701
8199
|
};
|
|
8200
|
+
return annotateTerminalResult(failure, "quality");
|
|
7702
8201
|
}
|
|
7703
8202
|
|
|
7704
8203
|
const revisionBudget = qualityRevisionBudgetDecision({
|
|
@@ -7715,7 +8214,7 @@ export async function executeJob(
|
|
|
7715
8214
|
220,
|
|
7716
8215
|
)}`;
|
|
7717
8216
|
onLog?.("stderr", `[QualityGate] ${budgetSummary}`);
|
|
7718
|
-
|
|
8217
|
+
const failure: JobResult = {
|
|
7719
8218
|
ok: false,
|
|
7720
8219
|
summary: budgetSummary,
|
|
7721
8220
|
stdout: result.stdout,
|
|
@@ -7731,6 +8230,7 @@ export async function executeJob(
|
|
|
7731
8230
|
),
|
|
7732
8231
|
exitCode: 4,
|
|
7733
8232
|
};
|
|
8233
|
+
return annotateTerminalResult(failure, "quality");
|
|
7734
8234
|
}
|
|
7735
8235
|
|
|
7736
8236
|
revisionAttempt += 1;
|