@pushpalsdev/cli 1.0.85 → 1.0.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. package/dist/pushpals-cli.js +1 -1
  2. package/package.json +2 -2
  3. package/runtime/prompts/remotebuddy/autonomy_ideation_system_prompt.md +4 -1
  4. package/runtime/prompts/remotebuddy/autonomy_planning_system_prompt.md +1 -1
  5. package/runtime/prompts/remotebuddy/remotebuddy_system_prompt.md +2 -2
  6. package/runtime/prompts/workerpals/miniswe_completion_requirement.md +1 -1
  7. package/runtime/prompts/workerpals/miniswe_explicit_targets_block.md +1 -1
  8. package/runtime/prompts/workerpals/openai_codex_task_execute_system_prompt.md +4 -1
  9. package/runtime/prompts/workerpals/openhands_minimal_system_prompt.j2 +3 -1
  10. package/runtime/prompts/workerpals/openhands_task_execute_system_prompt.md +2 -1
  11. package/runtime/prompts/workerpals/workerpals_system_prompt.md +2 -2
  12. package/runtime/sandbox/.pushpals-remotebuddy-fallback.js +41 -45
  13. package/runtime/sandbox/apps/workerpals/src/backends/miniswe/miniswe_executor.py +5 -34
  14. package/runtime/sandbox/apps/workerpals/src/backends/openhands/openhands_executor.py +3 -2
  15. package/runtime/sandbox/apps/workerpals/src/execute_job.ts +328 -71
  16. package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +70 -25
  17. package/runtime/sandbox/packages/shared/src/autonomy_policy.ts +14 -8
  18. package/runtime/sandbox/packages/shared/src/communication.ts +4 -1
  19. package/runtime/sandbox/packages/shared/src/config.ts +1 -1
  20. package/runtime/sandbox/prompts/workerpals/miniswe_completion_requirement.md +1 -1
  21. package/runtime/sandbox/prompts/workerpals/miniswe_explicit_targets_block.md +1 -1
  22. package/runtime/sandbox/prompts/workerpals/openai_codex_task_execute_system_prompt.md +4 -1
  23. package/runtime/sandbox/prompts/workerpals/openhands_minimal_system_prompt.j2 +3 -1
  24. package/runtime/sandbox/prompts/workerpals/openhands_task_execute_system_prompt.md +2 -1
  25. package/runtime/sandbox/prompts/workerpals/workerpals_system_prompt.md +2 -2
@@ -19,7 +19,6 @@ import {
19
19
  normalizeTargetPath,
20
20
  requirementsForValidationCommand,
21
21
  sanitizeSourceControlIdentityField,
22
- validateScopeInvariants,
23
22
  type AutonomyComponentArea,
24
23
  type SourceControlCommitIdentity,
25
24
  type ToolRequirement,
@@ -65,7 +64,7 @@ export interface TaskExecutePlanning {
65
64
  finalizationBudgetMs: number;
66
65
  }
67
66
 
68
- interface ValidationExecutionResult {
67
+ export interface ValidationExecutionResult {
69
68
  step: string;
70
69
  command: string;
71
70
  ok: boolean;
@@ -91,6 +90,7 @@ interface DeterministicQualityResult {
91
90
  validationRuns: ValidationExecutionResult[];
92
91
  requiredValidationFailures: string[];
93
92
  blocker: ValidationBlocker | null;
93
+ validationFailureScope: "none" | "task_scope" | "outside_task_scope";
94
94
  }
95
95
 
96
96
  interface CriticReview {
@@ -137,9 +137,11 @@ export function shouldReviseRequiredValidationBlocker(opts: {
137
137
  blocker: ValidationBlocker | null;
138
138
  revisionAttempt: number;
139
139
  maxAutoRevisions: number;
140
+ outsideTaskScope?: boolean;
140
141
  }): boolean {
141
142
  if (opts.requiredValidationFailures.length === 0) return false;
142
143
  if (!opts.blocker) return false;
144
+ if (opts.outsideTaskScope) return false;
143
145
  if (opts.blocker.category !== "repo") return false;
144
146
  return opts.revisionAttempt < opts.maxAutoRevisions;
145
147
  }
@@ -615,7 +617,8 @@ async function runValidationCommand(
615
617
  timeoutMs: number,
616
618
  outputPolicy: Partial<OutputCompactionPolicy>,
617
619
  ): Promise<ValidationExecutionResult> {
618
- const argv = tokenizeValidationCommandArgv(command);
620
+ const env = buildWorkerSandboxWritableEnv(repo);
621
+ const argv = prepareValidationCommandArgv(command, env);
619
622
  if (!argv) {
620
623
  return {
621
624
  step: command,
@@ -631,7 +634,7 @@ async function runValidationCommand(
631
634
  const startedAt = Date.now();
632
635
  const proc = Bun.spawn(argv, {
633
636
  cwd: repo,
634
- env: buildWorkerSandboxWritableEnv(repo),
637
+ env,
635
638
  stdout: "pipe",
636
639
  stderr: "pipe",
637
640
  });
@@ -696,6 +699,35 @@ export function resolveValidationCommandTimeoutMs(command: string, baseTimeoutMs
696
699
  return Math.max(normalizedBase, 600_000);
697
700
  }
698
701
 
702
+ function commandHasPortArg(argv: string[]): boolean {
703
+ return argv.some((token) => token === "--port" || token.startsWith("--port="));
704
+ }
705
+
706
+ function shouldInjectBrowserValidationPort(command: string, argv: string[]): boolean {
707
+ if (commandHasPortArg(argv)) return false;
708
+ if (!isLongRunningBrowserValidationCommand(command)) return false;
709
+ return /\b(web:e2e|e2e:web|browser:e2e|smoke:web|web:smoke|browser:smoke)\b/.test(
710
+ validationCommandKey(command),
711
+ );
712
+ }
713
+
714
+ export function prepareValidationCommandArgv(
715
+ command: string,
716
+ env: Record<string, string>,
717
+ ): string[] | null {
718
+ const argv = tokenizeValidationCommandArgv(command);
719
+ if (!argv) return null;
720
+ const port = String(env.EXPO_DEV_SERVER_PORT ?? "").trim();
721
+ if (!port || !shouldInjectBrowserValidationPort(command, argv)) return argv;
722
+ return [...argv, "--", "--port", port];
723
+ }
724
+
725
+ function isBrowserValidationInfrastructureDigest(digest: string): boolean {
726
+ return /\b(ERR_SOCKET_BAD_PORT|EADDRINUSE|ECONNREFUSED|ECONNRESET|ETIMEDOUT|timed out|timeout|port|browser runtime|playwright install|executable doesn't exist)\b/i.test(
727
+ digest,
728
+ );
729
+ }
730
+
699
731
  interface ToolAvailabilityResult {
700
732
  requirement: ToolRequirement;
701
733
  ok: boolean;
@@ -802,6 +834,96 @@ function extractPreparedMergeConflictPaths(params: Record<string, unknown>): str
802
834
  .filter(Boolean);
803
835
  }
804
836
 
837
+ function normalizeValidationPathToken(value: string): string | null {
838
+ const normalized = value
839
+ .trim()
840
+ .replace(/^['"`(<[]+/, "")
841
+ .replace(/[>'"`)\],.;:]+$/, "")
842
+ .replace(/\\/g, "/")
843
+ .replace(/^\.\/+/, "")
844
+ .replace(/\/+/g, "/");
845
+ if (!normalized || normalized.startsWith("../") || normalized.includes("/../")) return null;
846
+ if (!/[./]/.test(normalized)) return null;
847
+ if (/^(https?|file):/i.test(normalized)) return null;
848
+ return normalized;
849
+ }
850
+
851
+ function extractPathTokensFromValidationOutput(value: string): string[] {
852
+ const seen = new Set<string>();
853
+ const out: string[] = [];
854
+ const add = (raw: string | undefined) => {
855
+ if (!raw) return;
856
+ const normalized = normalizeValidationPathToken(raw);
857
+ if (!normalized || seen.has(normalized)) return;
858
+ seen.add(normalized);
859
+ out.push(normalized);
860
+ };
861
+ const normalized = stripAnsiControlSequences(value);
862
+ for (const match of normalized.matchAll(/[A-Za-z0-9_.@-]+(?:\/[A-Za-z0-9_.@-]+)+(?:\.[A-Za-z0-9_.-]+)?/g)) {
863
+ add(match[0]);
864
+ }
865
+ for (const match of normalized.matchAll(/(?:from|in|at)\s+['"`]?([^'"`\s]+\/[^'"`\s]+)['"`]?/gi)) {
866
+ add(match[1]);
867
+ }
868
+ return out;
869
+ }
870
+
871
+ function literalScopePrefix(value: string): string | null {
872
+ const normalized = normalizeValidationPathToken(value.replace(/\*\*?.*$/, "").replace(/\/+$/, ""));
873
+ if (!normalized || normalized === ".") return null;
874
+ return normalized;
875
+ }
876
+
877
+ function pathMatchesScopeHint(path: string, hint: string): boolean {
878
+ const normalizedPath = normalizeValidationPathToken(path);
879
+ const normalizedHint = hint.trim().replace(/\\/g, "/").replace(/^\.\/+/, "");
880
+ if (!normalizedPath || !normalizedHint) return false;
881
+ if (matchesGlob(normalizedPath, normalizedHint)) return true;
882
+ const prefix = literalScopePrefix(normalizedHint);
883
+ if (!prefix) return false;
884
+ return normalizedPath === prefix || normalizedPath.startsWith(`${prefix}/`);
885
+ }
886
+
887
+ export function classifyValidationFailureScope(
888
+ runs: ValidationExecutionResult[],
889
+ planning: TaskExecutePlanning,
890
+ changedPaths: string[],
891
+ targetPath?: string,
892
+ ): "none" | "task_scope" | "outside_task_scope" {
893
+ const failedRuns = runs.filter((run) => !run.ok && run.exitCode !== 127);
894
+ if (failedRuns.length === 0) return "none";
895
+ const scopeHints = [
896
+ targetPath ?? "",
897
+ ...changedPaths,
898
+ ...(planning.targetPaths ?? []),
899
+ ...(planning.scope.writeGlobs ?? []),
900
+ ]
901
+ .map((entry) => entry.trim().replace(/\\/g, "/"))
902
+ .filter(Boolean);
903
+ if (scopeHints.length === 0) return "none";
904
+
905
+ const combined = failedRuns
906
+ .flatMap((run) => [run.stdout, run.stderr])
907
+ .filter(Boolean)
908
+ .join("\n");
909
+ const lowerCombined = combined.toLowerCase().replace(/\\/g, "/");
910
+ for (const hint of scopeHints) {
911
+ const normalized = literalScopePrefix(hint);
912
+ if (normalized && normalized.length >= 4 && lowerCombined.includes(normalized.toLowerCase())) {
913
+ return "task_scope";
914
+ }
915
+ }
916
+
917
+ const pathTokens = extractPathTokensFromValidationOutput(combined).filter(
918
+ (token) => !/^(node_modules|\.bun|bun|npm|pnpm|yarn)\//i.test(token),
919
+ );
920
+ if (pathTokens.length === 0) return "none";
921
+ if (pathTokens.some((token) => scopeHints.some((hint) => pathMatchesScopeHint(token, hint)))) {
922
+ return "task_scope";
923
+ }
924
+ return "outside_task_scope";
925
+ }
926
+
805
927
  function detectValidationBlocker(runs: ValidationExecutionResult[]): ValidationBlocker | null {
806
928
  const combined = runs
807
929
  .flatMap((run) => [run.stdout, run.stderr])
@@ -982,7 +1104,51 @@ function extractRunnableValidationCommand(step: string): string | null {
982
1104
  }
983
1105
 
984
1106
  function validationCommandKey(command: string): string {
985
- return command.trim().replace(/\s+/g, " ").toLowerCase();
1107
+ const argv = tokenizeValidationCommandArgv(command);
1108
+ if (argv && argv.length > 0) {
1109
+ const normalized = argv.map((entry) => entry.trim()).filter(Boolean);
1110
+ if (normalized[0]?.toLowerCase() === "bunx") {
1111
+ normalized.splice(0, 1, "bun", "x");
1112
+ }
1113
+ return normalized.join(" ").replace(/\s+/g, " ").toLowerCase();
1114
+ }
1115
+ return command
1116
+ .trim()
1117
+ .replace(/\s+/g, " ")
1118
+ .replace(/^bunx\b/i, "bun x")
1119
+ .toLowerCase();
1120
+ }
1121
+
1122
+ export function extractValidationFailureDigest(run: {
1123
+ exitCode?: number;
1124
+ stdout?: string;
1125
+ stderr?: string;
1126
+ elapsedMs?: number;
1127
+ }): string {
1128
+ const combined = stripAnsiControlSequences([run.stderr, run.stdout].filter(Boolean).join("\n"));
1129
+ const patterns = [
1130
+ /\bCannot find module\s+['"`][^'"`\r\n]+['"`][^\r\n]*/i,
1131
+ /\bFailed to resolve import\s+['"`][^'"`\r\n]+['"`][^\r\n]*/i,
1132
+ /\bCould not resolve\s+['"`]?[^'"`\r\n]+['"`]?[^\r\n]*/i,
1133
+ /\bModule not found[^\r\n]*/i,
1134
+ /\bERR_SOCKET_BAD_PORT[^\r\n]*/i,
1135
+ /\berror TS\d+:[^\r\n]*/i,
1136
+ /\bError:\s+[^\r\n]*/i,
1137
+ ];
1138
+ for (const pattern of patterns) {
1139
+ const match = combined.match(pattern);
1140
+ if (match?.[0]) return toSingleLine(match[0], 180);
1141
+ }
1142
+ const firstMeaningfulLine = combined
1143
+ .split(/\r?\n/)
1144
+ .map((line) => line.trim())
1145
+ .find((line) => /\b(error|failed|cannot|could not|timeout|timed out)\b/i.test(line));
1146
+ if (firstMeaningfulLine) return toSingleLine(firstMeaningfulLine, 180);
1147
+ if (Number(run.exitCode) === 124) {
1148
+ const elapsed = Number.isFinite(Number(run.elapsedMs)) ? ` after ${Number(run.elapsedMs)}ms` : "";
1149
+ return `timed out${elapsed}`;
1150
+ }
1151
+ return "";
986
1152
  }
987
1153
 
988
1154
  export function collectRequiredValidationFailures(
@@ -995,7 +1161,8 @@ export function collectRequiredValidationFailures(
995
1161
  .filter((run) => requiredKeys.has(validationCommandKey(run.command)) && !run.ok)
996
1162
  .map((run) => {
997
1163
  const exitCode = Number.isFinite(Number(run.exitCode)) ? Number(run.exitCode) : "unknown";
998
- return `${run.command} exited ${exitCode}`;
1164
+ const digest = extractValidationFailureDigest(run);
1165
+ return `${run.command} exited ${exitCode}${digest ? ` (${digest})` : ""}`;
999
1166
  });
1000
1167
  }
1001
1168
 
@@ -1055,7 +1222,7 @@ function dedupeValidationCommands(...groups: string[][]): string[] {
1055
1222
  for (const command of group) {
1056
1223
  const trimmed = command.trim();
1057
1224
  if (!trimmed) continue;
1058
- const key = trimmed.toLowerCase();
1225
+ const key = validationCommandKey(trimmed);
1059
1226
  if (seen.has(key)) continue;
1060
1227
  seen.add(key);
1061
1228
  out.push(trimmed);
@@ -1160,14 +1327,19 @@ export function inferFallbackValidationCommandsForTestTask(
1160
1327
  return candidates.slice(0, 4);
1161
1328
  }
1162
1329
 
1163
- function isTestFocusedTask(
1330
+ export function isTestFocusedTask(
1164
1331
  instruction: string,
1165
1332
  planning: TaskExecutePlanning,
1166
1333
  targetPath?: string,
1167
1334
  ): boolean {
1168
1335
  const lowerInstruction = instruction.toLowerCase();
1169
1336
  if (
1170
- /\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b/.test(lowerInstruction)
1337
+ /\b(add|write|create|update|extend|expand|harden|improve|refactor|move|extract|fix)\b.{0,80}\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b/.test(
1338
+ lowerInstruction,
1339
+ ) ||
1340
+ /\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b.{0,80}\b(add|write|create|update|extend|expand|harden|improve|refactor|move|extract|fix)\b/.test(
1341
+ lowerInstruction,
1342
+ )
1171
1343
  ) {
1172
1344
  return true;
1173
1345
  }
@@ -1179,7 +1351,9 @@ function isTestFocusedTask(
1179
1351
  if (pathHints.some((entry) => isLikelyTestPath(entry))) return true;
1180
1352
  if (
1181
1353
  planning.acceptanceCriteria.some((entry) =>
1182
- /\b(test|tests|coverage|unit|integration|negative|invalid|valid)\b/i.test(entry),
1354
+ /\b(add|write|create|update|extend|expand|harden|improve|refactor|move|extract|fix)\b.{0,80}\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b/i.test(
1355
+ entry,
1356
+ ),
1183
1357
  )
1184
1358
  ) {
1185
1359
  return true;
@@ -1217,6 +1391,10 @@ async function runDeterministicQualityGate(
1217
1391
  runtimeConfig: WorkerpalsRuntimeConfig,
1218
1392
  qualityGatePolicy: QualityGatePolicy,
1219
1393
  onLog?: (stream: "stdout" | "stderr", line: string) => void,
1394
+ validationRetryState?: {
1395
+ previousFailureDigests?: Map<string, string>;
1396
+ revisionAttempt?: number;
1397
+ },
1220
1398
  ): Promise<DeterministicQualityResult> {
1221
1399
  const instruction = String(params.instruction ?? "");
1222
1400
  const targetPath = String(params.targetPath ?? params.path ?? "").trim() || undefined;
@@ -1245,6 +1423,7 @@ async function runDeterministicQualityGate(
1245
1423
  validationRuns: [],
1246
1424
  requiredValidationFailures: [],
1247
1425
  blocker: null,
1426
+ validationFailureScope: "none",
1248
1427
  };
1249
1428
  }
1250
1429
 
@@ -1394,6 +1573,33 @@ async function runDeterministicQualityGate(
1394
1573
  );
1395
1574
  continue;
1396
1575
  }
1576
+ const previousDigest = validationRetryState?.previousFailureDigests?.get(
1577
+ validationCommandKey(command),
1578
+ );
1579
+ if (
1580
+ previousDigest &&
1581
+ Number(validationRetryState?.revisionAttempt ?? 0) > 0 &&
1582
+ isLongRunningBrowserValidationCommand(command) &&
1583
+ isBrowserValidationInfrastructureDigest(previousDigest)
1584
+ ) {
1585
+ const stderr =
1586
+ `Skipped repeated browser validation after the same command failed in an earlier revision: ${previousDigest}. ` +
1587
+ "Run it once after the underlying blocker changes.";
1588
+ validationRuns.push({
1589
+ step: command,
1590
+ command,
1591
+ ok: false,
1592
+ exitCode: 124,
1593
+ stdout: "",
1594
+ stderr,
1595
+ elapsedMs: 1,
1596
+ });
1597
+ onLog?.(
1598
+ "stderr",
1599
+ `[ValidationGate] Skipped repeated long browser validation: ${command} (${previousDigest})`,
1600
+ );
1601
+ continue;
1602
+ }
1397
1603
  onLog?.("stdout", `[ValidationGate] Running "${command}"`);
1398
1604
  const run = await runValidationCommand(
1399
1605
  repo,
@@ -1402,7 +1608,8 @@ async function runDeterministicQualityGate(
1402
1608
  outputPolicy,
1403
1609
  );
1404
1610
  validationRuns.push(run);
1405
- const runSummary = `[ValidationGate] ${run.ok ? "Passed" : "Failed"} (${run.elapsedMs}ms, exit ${run.exitCode}): ${command}`;
1611
+ const digest = run.ok ? "" : extractValidationFailureDigest(run);
1612
+ const runSummary = `[ValidationGate] ${run.ok ? "Passed" : "Failed"} (${run.elapsedMs}ms, exit ${run.exitCode}): ${command}${digest ? ` - ${digest}` : ""}`;
1406
1613
  onLog?.(run.ok ? "stdout" : "stderr", runSummary);
1407
1614
  }
1408
1615
  // exit 127 = command not found: separate tool-availability issues from real test failures.
@@ -1442,6 +1649,15 @@ async function runDeterministicQualityGate(
1442
1649
  const blocker = qualityGatePolicy.validationGateEnabled
1443
1650
  ? detectValidationBlocker(validationRuns)
1444
1651
  : null;
1652
+ const scopedValidationFailure = qualityGatePolicy.validationGateEnabled
1653
+ ? classifyValidationFailureScope(validationRuns, planning, changedPaths, targetPath)
1654
+ : "none";
1655
+ if (scopedValidationFailure === "outside_task_scope") {
1656
+ onLog?.(
1657
+ "stderr",
1658
+ "[ValidationGate] Required validation failures appear outside the task write scope; treating them as publish blockers, not repair instructions.",
1659
+ );
1660
+ }
1445
1661
 
1446
1662
  return {
1447
1663
  ok: issues.length === 0 && blocker === null,
@@ -1454,6 +1670,7 @@ async function runDeterministicQualityGate(
1454
1670
  validationRuns,
1455
1671
  requiredValidationFailures,
1456
1672
  blocker,
1673
+ validationFailureScope: scopedValidationFailure,
1457
1674
  };
1458
1675
  }
1459
1676
 
@@ -2285,12 +2502,21 @@ function buildStageTargets(kind: string, params?: Record<string, unknown>): stri
2285
2502
  }
2286
2503
  }
2287
2504
 
2288
- function buildStageCommand(kind: string, params?: Record<string, unknown>): string[] | null {
2505
+ export function buildStageCommand(kind: string, params?: Record<string, unknown>): string[] | null {
2506
+ if (kind === "task.execute") {
2507
+ return [
2508
+ "add",
2509
+ "-A",
2510
+ "--",
2511
+ ".",
2512
+ ":(exclude)workspace/**",
2513
+ ":(exclude)outputs/**",
2514
+ ":(exclude).codex",
2515
+ ":(exclude).codex/**",
2516
+ ];
2517
+ }
2289
2518
  const targets = buildStageTargets(kind, params);
2290
2519
  if (targets.length === 0) {
2291
- if (kind === "task.execute") {
2292
- return ["add", "-A", "--", ".", ":(exclude)workspace/**", ":(exclude)outputs/**"];
2293
- }
2294
2520
  return null;
2295
2521
  }
2296
2522
  return ["add", "-A", "--", ...targets];
@@ -3460,9 +3686,10 @@ async function generateCommitMessageFromDiffViaCodex(
3460
3686
  repo: string,
3461
3687
  runtimeConfig: WorkerpalsRuntimeConfig,
3462
3688
  ): Promise<string | null> {
3689
+ const model = runtimeConfig.workerpals.llm.model.trim();
3690
+ if (!model) return null;
3463
3691
  const codexPrefix = await resolveCodexCommandPrefix(repo, runtimeConfig.workerpals.llm.codexBin);
3464
3692
  if (!codexPrefix) return null;
3465
- const model = runtimeConfig.workerpals.llm.model.trim();
3466
3693
  const timeoutMs = (() => {
3467
3694
  const value = Number(runtimeConfig.workerpals.llm.codexTimeoutMs);
3468
3695
  if (!Number.isFinite(value)) return 120_000;
@@ -3730,13 +3957,10 @@ function taskExecuteOrigin(params: Record<string, unknown>): "autonomy" | "user"
3730
3957
  return "user";
3731
3958
  }
3732
3959
 
3733
- function collectWriteScopeIssuesFromChangedPaths(
3960
+ export function collectWriteScopeIssuesFromChangedPaths(
3734
3961
  changedPaths: string[],
3735
3962
  planning: TaskExecutePlanning,
3736
3963
  ): string[] {
3737
- const writeGlobs = toStringArray(planning.scope.writeGlobs ?? []);
3738
- if (writeGlobs.length === 0) return [];
3739
-
3740
3964
  const normalizedChangedPaths = changedPaths
3741
3965
  .map((entry) => normalizeStagePath(entry))
3742
3966
  .filter((entry): entry is string => Boolean(entry) && entry !== ".");
@@ -3744,12 +3968,6 @@ function collectWriteScopeIssuesFromChangedPaths(
3744
3968
 
3745
3969
  const forbidden = toStringArray(planning.scope.forbiddenGlobs ?? []);
3746
3970
  const issues: string[] = [];
3747
- const outOfScope = normalizedChangedPaths.filter(
3748
- (path) => !writeGlobs.some((glob) => matchesGlob(path, glob)),
3749
- );
3750
- if (outOfScope.length > 0) {
3751
- issues.push(`modified paths outside writeGlobs: ${outOfScope.join(", ")}`);
3752
- }
3753
3971
  const forbiddenTouched = normalizedChangedPaths.filter((path) =>
3754
3972
  forbidden.some((glob) => matchesGlob(path, glob)),
3755
3973
  );
@@ -3886,41 +4104,17 @@ function validateTaskExecutePlanning(
3886
4104
  reviewAgentAllowsMultiRootScope(options?.reviewAgentResolutionType);
3887
4105
  if (origin === "autonomy") {
3888
4106
  const declaredComponentArea = asAutonomyComponentArea(options?.autonomyComponentArea);
3889
- const inferredComponentArea = allowMultiRootAutonomyScope
3890
- ? null
3891
- : deriveAutonomyComponentArea(normalizedTargetPaths, normalizedWriteGlobs);
3892
- const componentArea = allowMultiRootAutonomyScope
3893
- ? declaredComponentArea
3894
- : declaredComponentArea ?? inferredComponentArea;
3895
- if (!allowMultiRootAutonomyScope && !componentArea) {
3896
- return {
3897
- ok: false,
3898
- message:
3899
- "task.execute planning.targetPaths must resolve to a repo-relative componentArea",
3900
- };
3901
- }
3902
- if (
3903
- !allowMultiRootAutonomyScope &&
3904
- declaredComponentArea &&
3905
- inferredComponentArea &&
3906
- declaredComponentArea !== inferredComponentArea
3907
- ) {
3908
- return {
3909
- ok: false,
3910
- message: "task.execute planning.targetPaths do not match autonomy componentArea",
3911
- };
3912
- }
3913
- const validatedScope = validateScopeInvariants(
3914
- componentArea,
3915
- normalizedTargetPaths,
3916
- normalizedWriteGlobs,
3917
- { requireWriteGlobs: false, allowMultipleComponentRoots: allowMultiRootAutonomyScope },
3918
- );
3919
- if (!validatedScope.ok) {
3920
- return {
3921
- ok: false,
3922
- message: `task.execute scope invariants failed: ${validatedScope.errors.join("; ")}`,
3923
- };
4107
+ if (!allowMultiRootAutonomyScope && declaredComponentArea) {
4108
+ const inferredComponentArea = deriveAutonomyComponentArea(
4109
+ normalizedTargetPaths,
4110
+ normalizedWriteGlobs,
4111
+ );
4112
+ if (inferredComponentArea && declaredComponentArea !== inferredComponentArea) {
4113
+ return {
4114
+ ok: false,
4115
+ message: "task.execute planning.targetPaths do not match autonomy componentArea",
4116
+ };
4117
+ }
3924
4118
  }
3925
4119
  } else if (normalizedWriteGlobs.length > 0) {
3926
4120
  const uncoveredPaths = normalizedTargetPaths.filter(
@@ -4368,6 +4562,7 @@ export async function executeJob(
4368
4562
 
4369
4563
  let revisionAttempt = 0;
4370
4564
  let revisionHint = "";
4565
+ const previousValidationFailureDigests = new Map<string, string>();
4371
4566
  while (revisionAttempt <= qualityRevisionLoopMax) {
4372
4567
  const attemptParams: Record<string, unknown> = { ...normalizedParams };
4373
4568
  if (revisionHint) {
@@ -4462,30 +4657,66 @@ export async function executeJob(
4462
4657
  runtimeConfig,
4463
4658
  qualityGatePolicy,
4464
4659
  onLog,
4660
+ {
4661
+ previousFailureDigests: previousValidationFailureDigests,
4662
+ revisionAttempt,
4663
+ },
4465
4664
  );
4665
+ for (const run of quality.validationRuns) {
4666
+ if (run.ok) continue;
4667
+ const digest = extractValidationFailureDigest(run);
4668
+ if (digest) previousValidationFailureDigests.set(validationCommandKey(run.command), digest);
4669
+ }
4670
+ const validationOutsideTaskScope =
4671
+ quality.validationFailureScope === "outside_task_scope";
4672
+ const qualityForCritic: DeterministicQualityResult = validationOutsideTaskScope
4673
+ ? {
4674
+ ...quality,
4675
+ issues: quality.issues.filter((issue) => !issue.startsWith("ValidationGate:")),
4676
+ validationIssues: [],
4677
+ validationRuns: [],
4678
+ blocker: null,
4679
+ }
4680
+ : quality;
4466
4681
  const critic =
4467
4682
  quality.skipped || !qualityGatePolicy.criticGateEnabled
4468
4683
  ? null
4469
4684
  : executor === "openai_codex"
4470
- ? await runCodexCriticReview(repo, attemptParams, quality, runtimeConfig, onLog)
4471
- : await runTaskCriticReview(repo, attemptParams, quality, runtimeConfig, onLog);
4685
+ ? await runCodexCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog)
4686
+ : await runTaskCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog);
4472
4687
  if (!qualityGatePolicy.criticGateEnabled) {
4473
4688
  onLog?.("stdout", "[CriticGate] Disabled by workerpals.quality_critic_gate_enabled=false.");
4474
4689
  }
4475
- const effectiveQualityIssues = relaxAdvisoryQualityIssues(
4690
+ const advisoryRelaxedQualityIssues = relaxAdvisoryQualityIssues(
4476
4691
  quality.issues,
4477
4692
  quality.validationRuns,
4478
4693
  critic,
4479
4694
  qualityCriticMinScore,
4480
4695
  );
4481
- if (effectiveQualityIssues.length !== quality.issues.length) {
4696
+ let effectiveQualityIssues = advisoryRelaxedQualityIssues;
4697
+ if (validationOutsideTaskScope) {
4698
+ effectiveQualityIssues = effectiveQualityIssues.filter(
4699
+ (issue) => !issue.startsWith("ValidationGate:"),
4700
+ );
4701
+ if (effectiveQualityIssues.length !== quality.issues.length) {
4702
+ onLog?.(
4703
+ "stderr",
4704
+ "[ValidationGate] Validation failures are outside the task scope; they will block publishing but will not drive another code revision.",
4705
+ );
4706
+ }
4707
+ }
4708
+ if (
4709
+ !validationOutsideTaskScope &&
4710
+ advisoryRelaxedQualityIssues.length !== quality.issues.length
4711
+ ) {
4482
4712
  onLog?.(
4483
4713
  "stdout",
4484
4714
  "[QualityGate] Assertion-balance heuristic downgraded to advisory because validation passed and critic score met threshold.",
4485
4715
  );
4486
4716
  }
4487
4717
  const deterministicRequiresRevision =
4488
- effectiveQualityIssues.length > 0 || quality.blocker !== null;
4718
+ effectiveQualityIssues.length > 0 ||
4719
+ (quality.blocker !== null && !validationOutsideTaskScope);
4489
4720
  const criticRequiresRevision = Boolean(critic && critic.score < qualityCriticMinScore);
4490
4721
  if (
4491
4722
  !qualityGatePolicy.publishGateEnabled &&
@@ -4513,6 +4744,29 @@ export async function executeJob(
4513
4744
  }
4514
4745
 
4515
4746
  if (!deterministicRequiresRevision && !criticRequiresRevision) {
4747
+ if (quality.requiredValidationFailures.length > 0) {
4748
+ const requiredSummary = `Required vision.md validation blocked publishing: ${quality.requiredValidationFailures.join("; ")}`;
4749
+ const diagnostics = truncate(
4750
+ [
4751
+ result.stderr ?? "",
4752
+ validationOutsideTaskScope
4753
+ ? "Validation failures appear outside the task write scope and are treated as pre-existing repo blockers."
4754
+ : "",
4755
+ ...quality.validationRuns.flatMap((run) => [run.stdout, run.stderr]).filter(Boolean),
4756
+ ]
4757
+ .filter(Boolean)
4758
+ .join("\n"),
4759
+ outputPolicyForRuntime(runtimeConfig),
4760
+ );
4761
+ onLog?.("stderr", `[QualityGate] ${requiredSummary}`);
4762
+ return {
4763
+ ok: false,
4764
+ summary: requiredSummary,
4765
+ stdout: result.stdout,
4766
+ stderr: diagnostics,
4767
+ exitCode: 4,
4768
+ };
4769
+ }
4516
4770
  if (critic) {
4517
4771
  onLog?.(
4518
4772
  "stdout",
@@ -4538,11 +4792,13 @@ export async function executeJob(
4538
4792
  const activeMaxAutoRevisions = revisionLimitForQualityGateFailures({
4539
4793
  policy: qualityGatePolicy,
4540
4794
  qualityIssues: effectiveQualityIssues,
4541
- requiredValidationFailures: quality.requiredValidationFailures,
4542
- blocker: quality.blocker,
4795
+ requiredValidationFailures: validationOutsideTaskScope
4796
+ ? []
4797
+ : quality.requiredValidationFailures,
4798
+ blocker: validationOutsideTaskScope ? null : quality.blocker,
4543
4799
  });
4544
4800
  const issueSummary = issues.map((entry) => toSingleLine(entry, 180)).join(" | ");
4545
- if (quality.blocker) {
4801
+ if (quality.blocker && !validationOutsideTaskScope) {
4546
4802
  const blockerSummary = `Quality gate blocked by ${quality.blocker.category} issue: ${quality.blocker.detail}`;
4547
4803
  const blockerDiagnostics = truncate(
4548
4804
  [
@@ -4556,6 +4812,7 @@ export async function executeJob(
4556
4812
  blocker: quality.blocker,
4557
4813
  revisionAttempt,
4558
4814
  maxAutoRevisions: qualityValidationMaxAutoRevisions,
4815
+ outsideTaskScope: validationOutsideTaskScope,
4559
4816
  });
4560
4817
  if (requiredValidationCanRevise) {
4561
4818
  onLog?.(
@@ -4669,8 +4926,8 @@ export async function executeJob(
4669
4926
  critic,
4670
4927
  planning,
4671
4928
  reviewFixContext,
4672
- quality.validationRuns,
4673
- quality.blocker,
4929
+ validationOutsideTaskScope ? [] : quality.validationRuns,
4930
+ validationOutsideTaskScope ? null : quality.blocker,
4674
4931
  );
4675
4932
  onLog?.(
4676
4933
  "stderr",