@pushpalsdev/cli 1.0.85 → 1.0.93
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +1 -1
- package/package.json +2 -2
- package/runtime/prompts/remotebuddy/autonomy_ideation_system_prompt.md +4 -1
- package/runtime/prompts/remotebuddy/autonomy_planning_system_prompt.md +1 -1
- package/runtime/prompts/remotebuddy/remotebuddy_system_prompt.md +2 -2
- package/runtime/prompts/workerpals/miniswe_completion_requirement.md +1 -1
- package/runtime/prompts/workerpals/miniswe_explicit_targets_block.md +1 -1
- package/runtime/prompts/workerpals/openai_codex_task_execute_system_prompt.md +4 -1
- package/runtime/prompts/workerpals/openhands_minimal_system_prompt.j2 +3 -1
- package/runtime/prompts/workerpals/openhands_task_execute_system_prompt.md +2 -1
- package/runtime/prompts/workerpals/workerpals_system_prompt.md +2 -2
- package/runtime/sandbox/.pushpals-remotebuddy-fallback.js +41 -45
- package/runtime/sandbox/apps/workerpals/src/backends/miniswe/miniswe_executor.py +5 -34
- package/runtime/sandbox/apps/workerpals/src/backends/openhands/openhands_executor.py +3 -2
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +328 -71
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +70 -25
- package/runtime/sandbox/packages/shared/src/autonomy_policy.ts +14 -8
- package/runtime/sandbox/packages/shared/src/communication.ts +4 -1
- package/runtime/sandbox/packages/shared/src/config.ts +1 -1
- package/runtime/sandbox/prompts/workerpals/miniswe_completion_requirement.md +1 -1
- package/runtime/sandbox/prompts/workerpals/miniswe_explicit_targets_block.md +1 -1
- package/runtime/sandbox/prompts/workerpals/openai_codex_task_execute_system_prompt.md +4 -1
- package/runtime/sandbox/prompts/workerpals/openhands_minimal_system_prompt.j2 +3 -1
- package/runtime/sandbox/prompts/workerpals/openhands_task_execute_system_prompt.md +2 -1
- package/runtime/sandbox/prompts/workerpals/workerpals_system_prompt.md +2 -2
|
@@ -19,7 +19,6 @@ import {
|
|
|
19
19
|
normalizeTargetPath,
|
|
20
20
|
requirementsForValidationCommand,
|
|
21
21
|
sanitizeSourceControlIdentityField,
|
|
22
|
-
validateScopeInvariants,
|
|
23
22
|
type AutonomyComponentArea,
|
|
24
23
|
type SourceControlCommitIdentity,
|
|
25
24
|
type ToolRequirement,
|
|
@@ -65,7 +64,7 @@ export interface TaskExecutePlanning {
|
|
|
65
64
|
finalizationBudgetMs: number;
|
|
66
65
|
}
|
|
67
66
|
|
|
68
|
-
interface ValidationExecutionResult {
|
|
67
|
+
export interface ValidationExecutionResult {
|
|
69
68
|
step: string;
|
|
70
69
|
command: string;
|
|
71
70
|
ok: boolean;
|
|
@@ -91,6 +90,7 @@ interface DeterministicQualityResult {
|
|
|
91
90
|
validationRuns: ValidationExecutionResult[];
|
|
92
91
|
requiredValidationFailures: string[];
|
|
93
92
|
blocker: ValidationBlocker | null;
|
|
93
|
+
validationFailureScope: "none" | "task_scope" | "outside_task_scope";
|
|
94
94
|
}
|
|
95
95
|
|
|
96
96
|
interface CriticReview {
|
|
@@ -137,9 +137,11 @@ export function shouldReviseRequiredValidationBlocker(opts: {
|
|
|
137
137
|
blocker: ValidationBlocker | null;
|
|
138
138
|
revisionAttempt: number;
|
|
139
139
|
maxAutoRevisions: number;
|
|
140
|
+
outsideTaskScope?: boolean;
|
|
140
141
|
}): boolean {
|
|
141
142
|
if (opts.requiredValidationFailures.length === 0) return false;
|
|
142
143
|
if (!opts.blocker) return false;
|
|
144
|
+
if (opts.outsideTaskScope) return false;
|
|
143
145
|
if (opts.blocker.category !== "repo") return false;
|
|
144
146
|
return opts.revisionAttempt < opts.maxAutoRevisions;
|
|
145
147
|
}
|
|
@@ -615,7 +617,8 @@ async function runValidationCommand(
|
|
|
615
617
|
timeoutMs: number,
|
|
616
618
|
outputPolicy: Partial<OutputCompactionPolicy>,
|
|
617
619
|
): Promise<ValidationExecutionResult> {
|
|
618
|
-
const
|
|
620
|
+
const env = buildWorkerSandboxWritableEnv(repo);
|
|
621
|
+
const argv = prepareValidationCommandArgv(command, env);
|
|
619
622
|
if (!argv) {
|
|
620
623
|
return {
|
|
621
624
|
step: command,
|
|
@@ -631,7 +634,7 @@ async function runValidationCommand(
|
|
|
631
634
|
const startedAt = Date.now();
|
|
632
635
|
const proc = Bun.spawn(argv, {
|
|
633
636
|
cwd: repo,
|
|
634
|
-
env
|
|
637
|
+
env,
|
|
635
638
|
stdout: "pipe",
|
|
636
639
|
stderr: "pipe",
|
|
637
640
|
});
|
|
@@ -696,6 +699,35 @@ export function resolveValidationCommandTimeoutMs(command: string, baseTimeoutMs
|
|
|
696
699
|
return Math.max(normalizedBase, 600_000);
|
|
697
700
|
}
|
|
698
701
|
|
|
702
|
+
function commandHasPortArg(argv: string[]): boolean {
|
|
703
|
+
return argv.some((token) => token === "--port" || token.startsWith("--port="));
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
function shouldInjectBrowserValidationPort(command: string, argv: string[]): boolean {
|
|
707
|
+
if (commandHasPortArg(argv)) return false;
|
|
708
|
+
if (!isLongRunningBrowserValidationCommand(command)) return false;
|
|
709
|
+
return /\b(web:e2e|e2e:web|browser:e2e|smoke:web|web:smoke|browser:smoke)\b/.test(
|
|
710
|
+
validationCommandKey(command),
|
|
711
|
+
);
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
export function prepareValidationCommandArgv(
|
|
715
|
+
command: string,
|
|
716
|
+
env: Record<string, string>,
|
|
717
|
+
): string[] | null {
|
|
718
|
+
const argv = tokenizeValidationCommandArgv(command);
|
|
719
|
+
if (!argv) return null;
|
|
720
|
+
const port = String(env.EXPO_DEV_SERVER_PORT ?? "").trim();
|
|
721
|
+
if (!port || !shouldInjectBrowserValidationPort(command, argv)) return argv;
|
|
722
|
+
return [...argv, "--", "--port", port];
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
function isBrowserValidationInfrastructureDigest(digest: string): boolean {
|
|
726
|
+
return /\b(ERR_SOCKET_BAD_PORT|EADDRINUSE|ECONNREFUSED|ECONNRESET|ETIMEDOUT|timed out|timeout|port|browser runtime|playwright install|executable doesn't exist)\b/i.test(
|
|
727
|
+
digest,
|
|
728
|
+
);
|
|
729
|
+
}
|
|
730
|
+
|
|
699
731
|
interface ToolAvailabilityResult {
|
|
700
732
|
requirement: ToolRequirement;
|
|
701
733
|
ok: boolean;
|
|
@@ -802,6 +834,96 @@ function extractPreparedMergeConflictPaths(params: Record<string, unknown>): str
|
|
|
802
834
|
.filter(Boolean);
|
|
803
835
|
}
|
|
804
836
|
|
|
837
|
+
function normalizeValidationPathToken(value: string): string | null {
|
|
838
|
+
const normalized = value
|
|
839
|
+
.trim()
|
|
840
|
+
.replace(/^['"`(<[]+/, "")
|
|
841
|
+
.replace(/[>'"`)\],.;:]+$/, "")
|
|
842
|
+
.replace(/\\/g, "/")
|
|
843
|
+
.replace(/^\.\/+/, "")
|
|
844
|
+
.replace(/\/+/g, "/");
|
|
845
|
+
if (!normalized || normalized.startsWith("../") || normalized.includes("/../")) return null;
|
|
846
|
+
if (!/[./]/.test(normalized)) return null;
|
|
847
|
+
if (/^(https?|file):/i.test(normalized)) return null;
|
|
848
|
+
return normalized;
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
function extractPathTokensFromValidationOutput(value: string): string[] {
|
|
852
|
+
const seen = new Set<string>();
|
|
853
|
+
const out: string[] = [];
|
|
854
|
+
const add = (raw: string | undefined) => {
|
|
855
|
+
if (!raw) return;
|
|
856
|
+
const normalized = normalizeValidationPathToken(raw);
|
|
857
|
+
if (!normalized || seen.has(normalized)) return;
|
|
858
|
+
seen.add(normalized);
|
|
859
|
+
out.push(normalized);
|
|
860
|
+
};
|
|
861
|
+
const normalized = stripAnsiControlSequences(value);
|
|
862
|
+
for (const match of normalized.matchAll(/[A-Za-z0-9_.@-]+(?:\/[A-Za-z0-9_.@-]+)+(?:\.[A-Za-z0-9_.-]+)?/g)) {
|
|
863
|
+
add(match[0]);
|
|
864
|
+
}
|
|
865
|
+
for (const match of normalized.matchAll(/(?:from|in|at)\s+['"`]?([^'"`\s]+\/[^'"`\s]+)['"`]?/gi)) {
|
|
866
|
+
add(match[1]);
|
|
867
|
+
}
|
|
868
|
+
return out;
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
function literalScopePrefix(value: string): string | null {
|
|
872
|
+
const normalized = normalizeValidationPathToken(value.replace(/\*\*?.*$/, "").replace(/\/+$/, ""));
|
|
873
|
+
if (!normalized || normalized === ".") return null;
|
|
874
|
+
return normalized;
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
function pathMatchesScopeHint(path: string, hint: string): boolean {
|
|
878
|
+
const normalizedPath = normalizeValidationPathToken(path);
|
|
879
|
+
const normalizedHint = hint.trim().replace(/\\/g, "/").replace(/^\.\/+/, "");
|
|
880
|
+
if (!normalizedPath || !normalizedHint) return false;
|
|
881
|
+
if (matchesGlob(normalizedPath, normalizedHint)) return true;
|
|
882
|
+
const prefix = literalScopePrefix(normalizedHint);
|
|
883
|
+
if (!prefix) return false;
|
|
884
|
+
return normalizedPath === prefix || normalizedPath.startsWith(`${prefix}/`);
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
export function classifyValidationFailureScope(
|
|
888
|
+
runs: ValidationExecutionResult[],
|
|
889
|
+
planning: TaskExecutePlanning,
|
|
890
|
+
changedPaths: string[],
|
|
891
|
+
targetPath?: string,
|
|
892
|
+
): "none" | "task_scope" | "outside_task_scope" {
|
|
893
|
+
const failedRuns = runs.filter((run) => !run.ok && run.exitCode !== 127);
|
|
894
|
+
if (failedRuns.length === 0) return "none";
|
|
895
|
+
const scopeHints = [
|
|
896
|
+
targetPath ?? "",
|
|
897
|
+
...changedPaths,
|
|
898
|
+
...(planning.targetPaths ?? []),
|
|
899
|
+
...(planning.scope.writeGlobs ?? []),
|
|
900
|
+
]
|
|
901
|
+
.map((entry) => entry.trim().replace(/\\/g, "/"))
|
|
902
|
+
.filter(Boolean);
|
|
903
|
+
if (scopeHints.length === 0) return "none";
|
|
904
|
+
|
|
905
|
+
const combined = failedRuns
|
|
906
|
+
.flatMap((run) => [run.stdout, run.stderr])
|
|
907
|
+
.filter(Boolean)
|
|
908
|
+
.join("\n");
|
|
909
|
+
const lowerCombined = combined.toLowerCase().replace(/\\/g, "/");
|
|
910
|
+
for (const hint of scopeHints) {
|
|
911
|
+
const normalized = literalScopePrefix(hint);
|
|
912
|
+
if (normalized && normalized.length >= 4 && lowerCombined.includes(normalized.toLowerCase())) {
|
|
913
|
+
return "task_scope";
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
const pathTokens = extractPathTokensFromValidationOutput(combined).filter(
|
|
918
|
+
(token) => !/^(node_modules|\.bun|bun|npm|pnpm|yarn)\//i.test(token),
|
|
919
|
+
);
|
|
920
|
+
if (pathTokens.length === 0) return "none";
|
|
921
|
+
if (pathTokens.some((token) => scopeHints.some((hint) => pathMatchesScopeHint(token, hint)))) {
|
|
922
|
+
return "task_scope";
|
|
923
|
+
}
|
|
924
|
+
return "outside_task_scope";
|
|
925
|
+
}
|
|
926
|
+
|
|
805
927
|
function detectValidationBlocker(runs: ValidationExecutionResult[]): ValidationBlocker | null {
|
|
806
928
|
const combined = runs
|
|
807
929
|
.flatMap((run) => [run.stdout, run.stderr])
|
|
@@ -982,7 +1104,51 @@ function extractRunnableValidationCommand(step: string): string | null {
|
|
|
982
1104
|
}
|
|
983
1105
|
|
|
984
1106
|
function validationCommandKey(command: string): string {
|
|
985
|
-
|
|
1107
|
+
const argv = tokenizeValidationCommandArgv(command);
|
|
1108
|
+
if (argv && argv.length > 0) {
|
|
1109
|
+
const normalized = argv.map((entry) => entry.trim()).filter(Boolean);
|
|
1110
|
+
if (normalized[0]?.toLowerCase() === "bunx") {
|
|
1111
|
+
normalized.splice(0, 1, "bun", "x");
|
|
1112
|
+
}
|
|
1113
|
+
return normalized.join(" ").replace(/\s+/g, " ").toLowerCase();
|
|
1114
|
+
}
|
|
1115
|
+
return command
|
|
1116
|
+
.trim()
|
|
1117
|
+
.replace(/\s+/g, " ")
|
|
1118
|
+
.replace(/^bunx\b/i, "bun x")
|
|
1119
|
+
.toLowerCase();
|
|
1120
|
+
}
|
|
1121
|
+
|
|
1122
|
+
export function extractValidationFailureDigest(run: {
|
|
1123
|
+
exitCode?: number;
|
|
1124
|
+
stdout?: string;
|
|
1125
|
+
stderr?: string;
|
|
1126
|
+
elapsedMs?: number;
|
|
1127
|
+
}): string {
|
|
1128
|
+
const combined = stripAnsiControlSequences([run.stderr, run.stdout].filter(Boolean).join("\n"));
|
|
1129
|
+
const patterns = [
|
|
1130
|
+
/\bCannot find module\s+['"`][^'"`\r\n]+['"`][^\r\n]*/i,
|
|
1131
|
+
/\bFailed to resolve import\s+['"`][^'"`\r\n]+['"`][^\r\n]*/i,
|
|
1132
|
+
/\bCould not resolve\s+['"`]?[^'"`\r\n]+['"`]?[^\r\n]*/i,
|
|
1133
|
+
/\bModule not found[^\r\n]*/i,
|
|
1134
|
+
/\bERR_SOCKET_BAD_PORT[^\r\n]*/i,
|
|
1135
|
+
/\berror TS\d+:[^\r\n]*/i,
|
|
1136
|
+
/\bError:\s+[^\r\n]*/i,
|
|
1137
|
+
];
|
|
1138
|
+
for (const pattern of patterns) {
|
|
1139
|
+
const match = combined.match(pattern);
|
|
1140
|
+
if (match?.[0]) return toSingleLine(match[0], 180);
|
|
1141
|
+
}
|
|
1142
|
+
const firstMeaningfulLine = combined
|
|
1143
|
+
.split(/\r?\n/)
|
|
1144
|
+
.map((line) => line.trim())
|
|
1145
|
+
.find((line) => /\b(error|failed|cannot|could not|timeout|timed out)\b/i.test(line));
|
|
1146
|
+
if (firstMeaningfulLine) return toSingleLine(firstMeaningfulLine, 180);
|
|
1147
|
+
if (Number(run.exitCode) === 124) {
|
|
1148
|
+
const elapsed = Number.isFinite(Number(run.elapsedMs)) ? ` after ${Number(run.elapsedMs)}ms` : "";
|
|
1149
|
+
return `timed out${elapsed}`;
|
|
1150
|
+
}
|
|
1151
|
+
return "";
|
|
986
1152
|
}
|
|
987
1153
|
|
|
988
1154
|
export function collectRequiredValidationFailures(
|
|
@@ -995,7 +1161,8 @@ export function collectRequiredValidationFailures(
|
|
|
995
1161
|
.filter((run) => requiredKeys.has(validationCommandKey(run.command)) && !run.ok)
|
|
996
1162
|
.map((run) => {
|
|
997
1163
|
const exitCode = Number.isFinite(Number(run.exitCode)) ? Number(run.exitCode) : "unknown";
|
|
998
|
-
|
|
1164
|
+
const digest = extractValidationFailureDigest(run);
|
|
1165
|
+
return `${run.command} exited ${exitCode}${digest ? ` (${digest})` : ""}`;
|
|
999
1166
|
});
|
|
1000
1167
|
}
|
|
1001
1168
|
|
|
@@ -1055,7 +1222,7 @@ function dedupeValidationCommands(...groups: string[][]): string[] {
|
|
|
1055
1222
|
for (const command of group) {
|
|
1056
1223
|
const trimmed = command.trim();
|
|
1057
1224
|
if (!trimmed) continue;
|
|
1058
|
-
const key = trimmed
|
|
1225
|
+
const key = validationCommandKey(trimmed);
|
|
1059
1226
|
if (seen.has(key)) continue;
|
|
1060
1227
|
seen.add(key);
|
|
1061
1228
|
out.push(trimmed);
|
|
@@ -1160,14 +1327,19 @@ export function inferFallbackValidationCommandsForTestTask(
|
|
|
1160
1327
|
return candidates.slice(0, 4);
|
|
1161
1328
|
}
|
|
1162
1329
|
|
|
1163
|
-
function isTestFocusedTask(
|
|
1330
|
+
export function isTestFocusedTask(
|
|
1164
1331
|
instruction: string,
|
|
1165
1332
|
planning: TaskExecutePlanning,
|
|
1166
1333
|
targetPath?: string,
|
|
1167
1334
|
): boolean {
|
|
1168
1335
|
const lowerInstruction = instruction.toLowerCase();
|
|
1169
1336
|
if (
|
|
1170
|
-
/\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b/.test(
|
|
1337
|
+
/\b(add|write|create|update|extend|expand|harden|improve|refactor|move|extract|fix)\b.{0,80}\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b/.test(
|
|
1338
|
+
lowerInstruction,
|
|
1339
|
+
) ||
|
|
1340
|
+
/\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b.{0,80}\b(add|write|create|update|extend|expand|harden|improve|refactor|move|extract|fix)\b/.test(
|
|
1341
|
+
lowerInstruction,
|
|
1342
|
+
)
|
|
1171
1343
|
) {
|
|
1172
1344
|
return true;
|
|
1173
1345
|
}
|
|
@@ -1179,7 +1351,9 @@ function isTestFocusedTask(
|
|
|
1179
1351
|
if (pathHints.some((entry) => isLikelyTestPath(entry))) return true;
|
|
1180
1352
|
if (
|
|
1181
1353
|
planning.acceptanceCriteria.some((entry) =>
|
|
1182
|
-
/\b(test|tests|coverage|unit|integration|
|
|
1354
|
+
/\b(add|write|create|update|extend|expand|harden|improve|refactor|move|extract|fix)\b.{0,80}\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b/i.test(
|
|
1355
|
+
entry,
|
|
1356
|
+
),
|
|
1183
1357
|
)
|
|
1184
1358
|
) {
|
|
1185
1359
|
return true;
|
|
@@ -1217,6 +1391,10 @@ async function runDeterministicQualityGate(
|
|
|
1217
1391
|
runtimeConfig: WorkerpalsRuntimeConfig,
|
|
1218
1392
|
qualityGatePolicy: QualityGatePolicy,
|
|
1219
1393
|
onLog?: (stream: "stdout" | "stderr", line: string) => void,
|
|
1394
|
+
validationRetryState?: {
|
|
1395
|
+
previousFailureDigests?: Map<string, string>;
|
|
1396
|
+
revisionAttempt?: number;
|
|
1397
|
+
},
|
|
1220
1398
|
): Promise<DeterministicQualityResult> {
|
|
1221
1399
|
const instruction = String(params.instruction ?? "");
|
|
1222
1400
|
const targetPath = String(params.targetPath ?? params.path ?? "").trim() || undefined;
|
|
@@ -1245,6 +1423,7 @@ async function runDeterministicQualityGate(
|
|
|
1245
1423
|
validationRuns: [],
|
|
1246
1424
|
requiredValidationFailures: [],
|
|
1247
1425
|
blocker: null,
|
|
1426
|
+
validationFailureScope: "none",
|
|
1248
1427
|
};
|
|
1249
1428
|
}
|
|
1250
1429
|
|
|
@@ -1394,6 +1573,33 @@ async function runDeterministicQualityGate(
|
|
|
1394
1573
|
);
|
|
1395
1574
|
continue;
|
|
1396
1575
|
}
|
|
1576
|
+
const previousDigest = validationRetryState?.previousFailureDigests?.get(
|
|
1577
|
+
validationCommandKey(command),
|
|
1578
|
+
);
|
|
1579
|
+
if (
|
|
1580
|
+
previousDigest &&
|
|
1581
|
+
Number(validationRetryState?.revisionAttempt ?? 0) > 0 &&
|
|
1582
|
+
isLongRunningBrowserValidationCommand(command) &&
|
|
1583
|
+
isBrowserValidationInfrastructureDigest(previousDigest)
|
|
1584
|
+
) {
|
|
1585
|
+
const stderr =
|
|
1586
|
+
`Skipped repeated browser validation after the same command failed in an earlier revision: ${previousDigest}. ` +
|
|
1587
|
+
"Run it once after the underlying blocker changes.";
|
|
1588
|
+
validationRuns.push({
|
|
1589
|
+
step: command,
|
|
1590
|
+
command,
|
|
1591
|
+
ok: false,
|
|
1592
|
+
exitCode: 124,
|
|
1593
|
+
stdout: "",
|
|
1594
|
+
stderr,
|
|
1595
|
+
elapsedMs: 1,
|
|
1596
|
+
});
|
|
1597
|
+
onLog?.(
|
|
1598
|
+
"stderr",
|
|
1599
|
+
`[ValidationGate] Skipped repeated long browser validation: ${command} (${previousDigest})`,
|
|
1600
|
+
);
|
|
1601
|
+
continue;
|
|
1602
|
+
}
|
|
1397
1603
|
onLog?.("stdout", `[ValidationGate] Running "${command}"`);
|
|
1398
1604
|
const run = await runValidationCommand(
|
|
1399
1605
|
repo,
|
|
@@ -1402,7 +1608,8 @@ async function runDeterministicQualityGate(
|
|
|
1402
1608
|
outputPolicy,
|
|
1403
1609
|
);
|
|
1404
1610
|
validationRuns.push(run);
|
|
1405
|
-
const
|
|
1611
|
+
const digest = run.ok ? "" : extractValidationFailureDigest(run);
|
|
1612
|
+
const runSummary = `[ValidationGate] ${run.ok ? "Passed" : "Failed"} (${run.elapsedMs}ms, exit ${run.exitCode}): ${command}${digest ? ` - ${digest}` : ""}`;
|
|
1406
1613
|
onLog?.(run.ok ? "stdout" : "stderr", runSummary);
|
|
1407
1614
|
}
|
|
1408
1615
|
// exit 127 = command not found: separate tool-availability issues from real test failures.
|
|
@@ -1442,6 +1649,15 @@ async function runDeterministicQualityGate(
|
|
|
1442
1649
|
const blocker = qualityGatePolicy.validationGateEnabled
|
|
1443
1650
|
? detectValidationBlocker(validationRuns)
|
|
1444
1651
|
: null;
|
|
1652
|
+
const scopedValidationFailure = qualityGatePolicy.validationGateEnabled
|
|
1653
|
+
? classifyValidationFailureScope(validationRuns, planning, changedPaths, targetPath)
|
|
1654
|
+
: "none";
|
|
1655
|
+
if (scopedValidationFailure === "outside_task_scope") {
|
|
1656
|
+
onLog?.(
|
|
1657
|
+
"stderr",
|
|
1658
|
+
"[ValidationGate] Required validation failures appear outside the task write scope; treating them as publish blockers, not repair instructions.",
|
|
1659
|
+
);
|
|
1660
|
+
}
|
|
1445
1661
|
|
|
1446
1662
|
return {
|
|
1447
1663
|
ok: issues.length === 0 && blocker === null,
|
|
@@ -1454,6 +1670,7 @@ async function runDeterministicQualityGate(
|
|
|
1454
1670
|
validationRuns,
|
|
1455
1671
|
requiredValidationFailures,
|
|
1456
1672
|
blocker,
|
|
1673
|
+
validationFailureScope: scopedValidationFailure,
|
|
1457
1674
|
};
|
|
1458
1675
|
}
|
|
1459
1676
|
|
|
@@ -2285,12 +2502,21 @@ function buildStageTargets(kind: string, params?: Record<string, unknown>): stri
|
|
|
2285
2502
|
}
|
|
2286
2503
|
}
|
|
2287
2504
|
|
|
2288
|
-
function buildStageCommand(kind: string, params?: Record<string, unknown>): string[] | null {
|
|
2505
|
+
export function buildStageCommand(kind: string, params?: Record<string, unknown>): string[] | null {
|
|
2506
|
+
if (kind === "task.execute") {
|
|
2507
|
+
return [
|
|
2508
|
+
"add",
|
|
2509
|
+
"-A",
|
|
2510
|
+
"--",
|
|
2511
|
+
".",
|
|
2512
|
+
":(exclude)workspace/**",
|
|
2513
|
+
":(exclude)outputs/**",
|
|
2514
|
+
":(exclude).codex",
|
|
2515
|
+
":(exclude).codex/**",
|
|
2516
|
+
];
|
|
2517
|
+
}
|
|
2289
2518
|
const targets = buildStageTargets(kind, params);
|
|
2290
2519
|
if (targets.length === 0) {
|
|
2291
|
-
if (kind === "task.execute") {
|
|
2292
|
-
return ["add", "-A", "--", ".", ":(exclude)workspace/**", ":(exclude)outputs/**"];
|
|
2293
|
-
}
|
|
2294
2520
|
return null;
|
|
2295
2521
|
}
|
|
2296
2522
|
return ["add", "-A", "--", ...targets];
|
|
@@ -3460,9 +3686,10 @@ async function generateCommitMessageFromDiffViaCodex(
|
|
|
3460
3686
|
repo: string,
|
|
3461
3687
|
runtimeConfig: WorkerpalsRuntimeConfig,
|
|
3462
3688
|
): Promise<string | null> {
|
|
3689
|
+
const model = runtimeConfig.workerpals.llm.model.trim();
|
|
3690
|
+
if (!model) return null;
|
|
3463
3691
|
const codexPrefix = await resolveCodexCommandPrefix(repo, runtimeConfig.workerpals.llm.codexBin);
|
|
3464
3692
|
if (!codexPrefix) return null;
|
|
3465
|
-
const model = runtimeConfig.workerpals.llm.model.trim();
|
|
3466
3693
|
const timeoutMs = (() => {
|
|
3467
3694
|
const value = Number(runtimeConfig.workerpals.llm.codexTimeoutMs);
|
|
3468
3695
|
if (!Number.isFinite(value)) return 120_000;
|
|
@@ -3730,13 +3957,10 @@ function taskExecuteOrigin(params: Record<string, unknown>): "autonomy" | "user"
|
|
|
3730
3957
|
return "user";
|
|
3731
3958
|
}
|
|
3732
3959
|
|
|
3733
|
-
function collectWriteScopeIssuesFromChangedPaths(
|
|
3960
|
+
export function collectWriteScopeIssuesFromChangedPaths(
|
|
3734
3961
|
changedPaths: string[],
|
|
3735
3962
|
planning: TaskExecutePlanning,
|
|
3736
3963
|
): string[] {
|
|
3737
|
-
const writeGlobs = toStringArray(planning.scope.writeGlobs ?? []);
|
|
3738
|
-
if (writeGlobs.length === 0) return [];
|
|
3739
|
-
|
|
3740
3964
|
const normalizedChangedPaths = changedPaths
|
|
3741
3965
|
.map((entry) => normalizeStagePath(entry))
|
|
3742
3966
|
.filter((entry): entry is string => Boolean(entry) && entry !== ".");
|
|
@@ -3744,12 +3968,6 @@ function collectWriteScopeIssuesFromChangedPaths(
|
|
|
3744
3968
|
|
|
3745
3969
|
const forbidden = toStringArray(planning.scope.forbiddenGlobs ?? []);
|
|
3746
3970
|
const issues: string[] = [];
|
|
3747
|
-
const outOfScope = normalizedChangedPaths.filter(
|
|
3748
|
-
(path) => !writeGlobs.some((glob) => matchesGlob(path, glob)),
|
|
3749
|
-
);
|
|
3750
|
-
if (outOfScope.length > 0) {
|
|
3751
|
-
issues.push(`modified paths outside writeGlobs: ${outOfScope.join(", ")}`);
|
|
3752
|
-
}
|
|
3753
3971
|
const forbiddenTouched = normalizedChangedPaths.filter((path) =>
|
|
3754
3972
|
forbidden.some((glob) => matchesGlob(path, glob)),
|
|
3755
3973
|
);
|
|
@@ -3886,41 +4104,17 @@ function validateTaskExecutePlanning(
|
|
|
3886
4104
|
reviewAgentAllowsMultiRootScope(options?.reviewAgentResolutionType);
|
|
3887
4105
|
if (origin === "autonomy") {
|
|
3888
4106
|
const declaredComponentArea = asAutonomyComponentArea(options?.autonomyComponentArea);
|
|
3889
|
-
|
|
3890
|
-
|
|
3891
|
-
|
|
3892
|
-
|
|
3893
|
-
|
|
3894
|
-
|
|
3895
|
-
|
|
3896
|
-
|
|
3897
|
-
|
|
3898
|
-
|
|
3899
|
-
|
|
3900
|
-
};
|
|
3901
|
-
}
|
|
3902
|
-
if (
|
|
3903
|
-
!allowMultiRootAutonomyScope &&
|
|
3904
|
-
declaredComponentArea &&
|
|
3905
|
-
inferredComponentArea &&
|
|
3906
|
-
declaredComponentArea !== inferredComponentArea
|
|
3907
|
-
) {
|
|
3908
|
-
return {
|
|
3909
|
-
ok: false,
|
|
3910
|
-
message: "task.execute planning.targetPaths do not match autonomy componentArea",
|
|
3911
|
-
};
|
|
3912
|
-
}
|
|
3913
|
-
const validatedScope = validateScopeInvariants(
|
|
3914
|
-
componentArea,
|
|
3915
|
-
normalizedTargetPaths,
|
|
3916
|
-
normalizedWriteGlobs,
|
|
3917
|
-
{ requireWriteGlobs: false, allowMultipleComponentRoots: allowMultiRootAutonomyScope },
|
|
3918
|
-
);
|
|
3919
|
-
if (!validatedScope.ok) {
|
|
3920
|
-
return {
|
|
3921
|
-
ok: false,
|
|
3922
|
-
message: `task.execute scope invariants failed: ${validatedScope.errors.join("; ")}`,
|
|
3923
|
-
};
|
|
4107
|
+
if (!allowMultiRootAutonomyScope && declaredComponentArea) {
|
|
4108
|
+
const inferredComponentArea = deriveAutonomyComponentArea(
|
|
4109
|
+
normalizedTargetPaths,
|
|
4110
|
+
normalizedWriteGlobs,
|
|
4111
|
+
);
|
|
4112
|
+
if (inferredComponentArea && declaredComponentArea !== inferredComponentArea) {
|
|
4113
|
+
return {
|
|
4114
|
+
ok: false,
|
|
4115
|
+
message: "task.execute planning.targetPaths do not match autonomy componentArea",
|
|
4116
|
+
};
|
|
4117
|
+
}
|
|
3924
4118
|
}
|
|
3925
4119
|
} else if (normalizedWriteGlobs.length > 0) {
|
|
3926
4120
|
const uncoveredPaths = normalizedTargetPaths.filter(
|
|
@@ -4368,6 +4562,7 @@ export async function executeJob(
|
|
|
4368
4562
|
|
|
4369
4563
|
let revisionAttempt = 0;
|
|
4370
4564
|
let revisionHint = "";
|
|
4565
|
+
const previousValidationFailureDigests = new Map<string, string>();
|
|
4371
4566
|
while (revisionAttempt <= qualityRevisionLoopMax) {
|
|
4372
4567
|
const attemptParams: Record<string, unknown> = { ...normalizedParams };
|
|
4373
4568
|
if (revisionHint) {
|
|
@@ -4462,30 +4657,66 @@ export async function executeJob(
|
|
|
4462
4657
|
runtimeConfig,
|
|
4463
4658
|
qualityGatePolicy,
|
|
4464
4659
|
onLog,
|
|
4660
|
+
{
|
|
4661
|
+
previousFailureDigests: previousValidationFailureDigests,
|
|
4662
|
+
revisionAttempt,
|
|
4663
|
+
},
|
|
4465
4664
|
);
|
|
4665
|
+
for (const run of quality.validationRuns) {
|
|
4666
|
+
if (run.ok) continue;
|
|
4667
|
+
const digest = extractValidationFailureDigest(run);
|
|
4668
|
+
if (digest) previousValidationFailureDigests.set(validationCommandKey(run.command), digest);
|
|
4669
|
+
}
|
|
4670
|
+
const validationOutsideTaskScope =
|
|
4671
|
+
quality.validationFailureScope === "outside_task_scope";
|
|
4672
|
+
const qualityForCritic: DeterministicQualityResult = validationOutsideTaskScope
|
|
4673
|
+
? {
|
|
4674
|
+
...quality,
|
|
4675
|
+
issues: quality.issues.filter((issue) => !issue.startsWith("ValidationGate:")),
|
|
4676
|
+
validationIssues: [],
|
|
4677
|
+
validationRuns: [],
|
|
4678
|
+
blocker: null,
|
|
4679
|
+
}
|
|
4680
|
+
: quality;
|
|
4466
4681
|
const critic =
|
|
4467
4682
|
quality.skipped || !qualityGatePolicy.criticGateEnabled
|
|
4468
4683
|
? null
|
|
4469
4684
|
: executor === "openai_codex"
|
|
4470
|
-
? await runCodexCriticReview(repo, attemptParams,
|
|
4471
|
-
: await runTaskCriticReview(repo, attemptParams,
|
|
4685
|
+
? await runCodexCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog)
|
|
4686
|
+
: await runTaskCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog);
|
|
4472
4687
|
if (!qualityGatePolicy.criticGateEnabled) {
|
|
4473
4688
|
onLog?.("stdout", "[CriticGate] Disabled by workerpals.quality_critic_gate_enabled=false.");
|
|
4474
4689
|
}
|
|
4475
|
-
const
|
|
4690
|
+
const advisoryRelaxedQualityIssues = relaxAdvisoryQualityIssues(
|
|
4476
4691
|
quality.issues,
|
|
4477
4692
|
quality.validationRuns,
|
|
4478
4693
|
critic,
|
|
4479
4694
|
qualityCriticMinScore,
|
|
4480
4695
|
);
|
|
4481
|
-
|
|
4696
|
+
let effectiveQualityIssues = advisoryRelaxedQualityIssues;
|
|
4697
|
+
if (validationOutsideTaskScope) {
|
|
4698
|
+
effectiveQualityIssues = effectiveQualityIssues.filter(
|
|
4699
|
+
(issue) => !issue.startsWith("ValidationGate:"),
|
|
4700
|
+
);
|
|
4701
|
+
if (effectiveQualityIssues.length !== quality.issues.length) {
|
|
4702
|
+
onLog?.(
|
|
4703
|
+
"stderr",
|
|
4704
|
+
"[ValidationGate] Validation failures are outside the task scope; they will block publishing but will not drive another code revision.",
|
|
4705
|
+
);
|
|
4706
|
+
}
|
|
4707
|
+
}
|
|
4708
|
+
if (
|
|
4709
|
+
!validationOutsideTaskScope &&
|
|
4710
|
+
advisoryRelaxedQualityIssues.length !== quality.issues.length
|
|
4711
|
+
) {
|
|
4482
4712
|
onLog?.(
|
|
4483
4713
|
"stdout",
|
|
4484
4714
|
"[QualityGate] Assertion-balance heuristic downgraded to advisory because validation passed and critic score met threshold.",
|
|
4485
4715
|
);
|
|
4486
4716
|
}
|
|
4487
4717
|
const deterministicRequiresRevision =
|
|
4488
|
-
effectiveQualityIssues.length > 0 ||
|
|
4718
|
+
effectiveQualityIssues.length > 0 ||
|
|
4719
|
+
(quality.blocker !== null && !validationOutsideTaskScope);
|
|
4489
4720
|
const criticRequiresRevision = Boolean(critic && critic.score < qualityCriticMinScore);
|
|
4490
4721
|
if (
|
|
4491
4722
|
!qualityGatePolicy.publishGateEnabled &&
|
|
@@ -4513,6 +4744,29 @@ export async function executeJob(
|
|
|
4513
4744
|
}
|
|
4514
4745
|
|
|
4515
4746
|
if (!deterministicRequiresRevision && !criticRequiresRevision) {
|
|
4747
|
+
if (quality.requiredValidationFailures.length > 0) {
|
|
4748
|
+
const requiredSummary = `Required vision.md validation blocked publishing: ${quality.requiredValidationFailures.join("; ")}`;
|
|
4749
|
+
const diagnostics = truncate(
|
|
4750
|
+
[
|
|
4751
|
+
result.stderr ?? "",
|
|
4752
|
+
validationOutsideTaskScope
|
|
4753
|
+
? "Validation failures appear outside the task write scope and are treated as pre-existing repo blockers."
|
|
4754
|
+
: "",
|
|
4755
|
+
...quality.validationRuns.flatMap((run) => [run.stdout, run.stderr]).filter(Boolean),
|
|
4756
|
+
]
|
|
4757
|
+
.filter(Boolean)
|
|
4758
|
+
.join("\n"),
|
|
4759
|
+
outputPolicyForRuntime(runtimeConfig),
|
|
4760
|
+
);
|
|
4761
|
+
onLog?.("stderr", `[QualityGate] ${requiredSummary}`);
|
|
4762
|
+
return {
|
|
4763
|
+
ok: false,
|
|
4764
|
+
summary: requiredSummary,
|
|
4765
|
+
stdout: result.stdout,
|
|
4766
|
+
stderr: diagnostics,
|
|
4767
|
+
exitCode: 4,
|
|
4768
|
+
};
|
|
4769
|
+
}
|
|
4516
4770
|
if (critic) {
|
|
4517
4771
|
onLog?.(
|
|
4518
4772
|
"stdout",
|
|
@@ -4538,11 +4792,13 @@ export async function executeJob(
|
|
|
4538
4792
|
const activeMaxAutoRevisions = revisionLimitForQualityGateFailures({
|
|
4539
4793
|
policy: qualityGatePolicy,
|
|
4540
4794
|
qualityIssues: effectiveQualityIssues,
|
|
4541
|
-
requiredValidationFailures:
|
|
4542
|
-
|
|
4795
|
+
requiredValidationFailures: validationOutsideTaskScope
|
|
4796
|
+
? []
|
|
4797
|
+
: quality.requiredValidationFailures,
|
|
4798
|
+
blocker: validationOutsideTaskScope ? null : quality.blocker,
|
|
4543
4799
|
});
|
|
4544
4800
|
const issueSummary = issues.map((entry) => toSingleLine(entry, 180)).join(" | ");
|
|
4545
|
-
if (quality.blocker) {
|
|
4801
|
+
if (quality.blocker && !validationOutsideTaskScope) {
|
|
4546
4802
|
const blockerSummary = `Quality gate blocked by ${quality.blocker.category} issue: ${quality.blocker.detail}`;
|
|
4547
4803
|
const blockerDiagnostics = truncate(
|
|
4548
4804
|
[
|
|
@@ -4556,6 +4812,7 @@ export async function executeJob(
|
|
|
4556
4812
|
blocker: quality.blocker,
|
|
4557
4813
|
revisionAttempt,
|
|
4558
4814
|
maxAutoRevisions: qualityValidationMaxAutoRevisions,
|
|
4815
|
+
outsideTaskScope: validationOutsideTaskScope,
|
|
4559
4816
|
});
|
|
4560
4817
|
if (requiredValidationCanRevise) {
|
|
4561
4818
|
onLog?.(
|
|
@@ -4669,8 +4926,8 @@ export async function executeJob(
|
|
|
4669
4926
|
critic,
|
|
4670
4927
|
planning,
|
|
4671
4928
|
reviewFixContext,
|
|
4672
|
-
quality.validationRuns,
|
|
4673
|
-
quality.blocker,
|
|
4929
|
+
validationOutsideTaskScope ? [] : quality.validationRuns,
|
|
4930
|
+
validationOutsideTaskScope ? null : quality.blocker,
|
|
4674
4931
|
);
|
|
4675
4932
|
onLog?.(
|
|
4676
4933
|
"stderr",
|