@pushpalsdev/cli 1.0.84 → 1.0.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,8 +3,7 @@
3
3
  * Used by both the host Worker (direct mode) and the Docker job runner.
4
4
  */
5
5
 
6
- import { existsSync, mkdirSync, readFileSync, rmSync, unlinkSync } from "fs";
7
- import { tmpdir } from "os";
6
+ import { existsSync, readFileSync, rmSync, unlinkSync } from "fs";
8
7
  import { resolve } from "path";
9
8
  import {
10
9
  deriveAutonomyComponentArea,
@@ -32,6 +31,7 @@ import {
32
31
  truncate,
33
32
  type OutputCompactionPolicy,
34
33
  } from "./common/execution_utils.js";
34
+ import { buildWorkerSandboxWritableEnv } from "./common/sandbox_env.js";
35
35
  // Re-export shared utilities for backward compatibility with external consumers.
36
36
  export { compactJobOutput, truncate, streamLines } from "./common/execution_utils.js";
37
37
  export { extractClarificationQuestionFromOutput } from "./backends/openhands_task_execute.js";
@@ -65,7 +65,7 @@ export interface TaskExecutePlanning {
65
65
  finalizationBudgetMs: number;
66
66
  }
67
67
 
68
- interface ValidationExecutionResult {
68
+ export interface ValidationExecutionResult {
69
69
  step: string;
70
70
  command: string;
71
71
  ok: boolean;
@@ -75,7 +75,7 @@ interface ValidationExecutionResult {
75
75
  elapsedMs: number;
76
76
  }
77
77
 
78
- interface ValidationBlocker {
78
+ export interface ValidationBlocker {
79
79
  category: "repo" | "environment";
80
80
  detail: string;
81
81
  }
@@ -84,11 +84,14 @@ interface DeterministicQualityResult {
84
84
  ok: boolean;
85
85
  skipped: boolean;
86
86
  issues: string[];
87
+ scopeIssues: string[];
88
+ validationIssues: string[];
87
89
  changedPaths: string[];
88
90
  changedTestPaths: string[];
89
91
  validationRuns: ValidationExecutionResult[];
90
92
  requiredValidationFailures: string[];
91
93
  blocker: ValidationBlocker | null;
94
+ validationFailureScope: "none" | "task_scope" | "outside_task_scope";
92
95
  }
93
96
 
94
97
  interface CriticReview {
@@ -112,6 +115,11 @@ export interface ReviewFixContext {
112
115
  export interface QualityGatePolicy {
113
116
  mode: "default" | "review_fix" | "merge_conflict";
114
117
  maxAutoRevisions: number;
118
+ validationMaxAutoRevisions: number;
119
+ scopeGateEnabled: boolean;
120
+ validationGateEnabled: boolean;
121
+ criticGateEnabled: boolean;
122
+ publishGateEnabled: boolean;
115
123
  softPassOnExhausted: boolean;
116
124
  criticMinScore: number;
117
125
  }
@@ -125,6 +133,35 @@ function shouldSoftPassValidationBlocker(
125
133
  return policy.mode === "review_fix" || policy.mode === "merge_conflict";
126
134
  }
127
135
 
136
+ export function shouldReviseRequiredValidationBlocker(opts: {
137
+ requiredValidationFailures: string[];
138
+ blocker: ValidationBlocker | null;
139
+ revisionAttempt: number;
140
+ maxAutoRevisions: number;
141
+ outsideTaskScope?: boolean;
142
+ }): boolean {
143
+ if (opts.requiredValidationFailures.length === 0) return false;
144
+ if (!opts.blocker) return false;
145
+ if (opts.outsideTaskScope) return false;
146
+ if (opts.blocker.category !== "repo") return false;
147
+ return opts.revisionAttempt < opts.maxAutoRevisions;
148
+ }
149
+
150
+ export function revisionLimitForQualityGateFailures(opts: {
151
+ policy: Pick<QualityGatePolicy, "maxAutoRevisions" | "validationMaxAutoRevisions">;
152
+ qualityIssues: string[];
153
+ requiredValidationFailures: string[];
154
+ blocker: ValidationBlocker | null;
155
+ }): number {
156
+ const hasValidationGateFailure =
157
+ opts.requiredValidationFailures.length > 0 ||
158
+ opts.blocker !== null ||
159
+ opts.qualityIssues.some((issue) => issue.startsWith("ValidationGate:"));
160
+ return hasValidationGateFailure
161
+ ? opts.policy.validationMaxAutoRevisions
162
+ : opts.policy.maxAutoRevisions;
163
+ }
164
+
128
165
  // ─── Utilities ───────────────────────────────────────────────────────────────
129
166
 
130
167
  export function shouldCommit(
@@ -228,6 +265,13 @@ export function buildQualityGateRevisionIssues(
228
265
  const TEST_ASSERTION_BALANCE_ISSUE =
229
266
  "Changed test files do not show both positive and negative assertion coverage (expected both).";
230
267
 
268
+ function isAssertionBalanceIssue(issue: string): boolean {
269
+ return (
270
+ issue === TEST_ASSERTION_BALANCE_ISSUE ||
271
+ issue.includes("positive and negative assertion coverage")
272
+ );
273
+ }
274
+
231
275
  export function relaxAdvisoryQualityIssues(
232
276
  qualityIssues: string[],
233
277
  validationRuns: Array<{ ok: boolean }>,
@@ -245,7 +289,7 @@ export function relaxAdvisoryQualityIssues(
245
289
  return normalizedQualityIssues;
246
290
  }
247
291
 
248
- const relaxed = normalizedQualityIssues.filter((issue) => issue !== TEST_ASSERTION_BALANCE_ISSUE);
292
+ const relaxed = normalizedQualityIssues.filter((issue) => !isAssertionBalanceIssue(issue));
249
293
  return relaxed;
250
294
  }
251
295
 
@@ -362,13 +406,40 @@ export function deriveQualityGatePolicy(
362
406
  10,
363
407
  Number.isFinite(Number(runtimeConfig.workerpals.qualityMaxAutoRevisions))
364
408
  ? Math.floor(Number(runtimeConfig.workerpals.qualityMaxAutoRevisions))
365
- : 4,
409
+ : 3,
410
+ ),
411
+ );
412
+ const baseValidationMaxAutoRevisions = Math.max(
413
+ 0,
414
+ Math.min(
415
+ 10,
416
+ Number.isFinite(Number(runtimeConfig.workerpals.qualityValidationMaxAutoRevisions))
417
+ ? Math.floor(Number(runtimeConfig.workerpals.qualityValidationMaxAutoRevisions))
418
+ : 3,
366
419
  ),
367
420
  );
368
421
  const baseSoftPassOnExhausted =
369
422
  typeof runtimeConfig.workerpals.qualitySoftPassOnExhausted === "boolean"
370
423
  ? runtimeConfig.workerpals.qualitySoftPassOnExhausted
371
424
  : true;
425
+ const gateSwitches = {
426
+ scopeGateEnabled:
427
+ typeof runtimeConfig.workerpals.qualityScopeGateEnabled === "boolean"
428
+ ? runtimeConfig.workerpals.qualityScopeGateEnabled
429
+ : true,
430
+ validationGateEnabled:
431
+ typeof runtimeConfig.workerpals.qualityValidationGateEnabled === "boolean"
432
+ ? runtimeConfig.workerpals.qualityValidationGateEnabled
433
+ : true,
434
+ criticGateEnabled:
435
+ typeof runtimeConfig.workerpals.qualityCriticGateEnabled === "boolean"
436
+ ? runtimeConfig.workerpals.qualityCriticGateEnabled
437
+ : true,
438
+ publishGateEnabled:
439
+ typeof runtimeConfig.workerpals.qualityPublishGateEnabled === "boolean"
440
+ ? runtimeConfig.workerpals.qualityPublishGateEnabled
441
+ : true,
442
+ };
372
443
  const baseCriticMinScore = (() => {
373
444
  const value = Number(runtimeConfig.workerpals.qualityCriticMinScore);
374
445
  if (!Number.isFinite(value)) return 8;
@@ -379,19 +450,23 @@ export function deriveQualityGatePolicy(
379
450
  const mergeConflict = extractMergeConflictReviewContext(params);
380
451
  if (mergeConflict) {
381
452
  return {
382
- mode: "merge_conflict",
383
- maxAutoRevisions: baseMaxAutoRevisions,
384
- softPassOnExhausted: baseSoftPassOnExhausted,
385
- criticMinScore: baseCriticMinScore,
386
- };
387
- }
388
- return {
389
- mode: "default",
453
+ mode: "merge_conflict",
390
454
  maxAutoRevisions: baseMaxAutoRevisions,
455
+ validationMaxAutoRevisions: baseValidationMaxAutoRevisions,
456
+ ...gateSwitches,
391
457
  softPassOnExhausted: baseSoftPassOnExhausted,
392
458
  criticMinScore: baseCriticMinScore,
393
459
  };
394
460
  }
461
+ return {
462
+ mode: "default",
463
+ maxAutoRevisions: baseMaxAutoRevisions,
464
+ validationMaxAutoRevisions: baseValidationMaxAutoRevisions,
465
+ ...gateSwitches,
466
+ softPassOnExhausted: baseSoftPassOnExhausted,
467
+ criticMinScore: baseCriticMinScore,
468
+ };
469
+ }
395
470
  const tightenedCriticMinScore =
396
471
  reviewFix.reviewThreshold != null
397
472
  ? Math.max(baseCriticMinScore, Math.max(0, Math.min(10, reviewFix.reviewThreshold - 0.2)))
@@ -399,6 +474,8 @@ export function deriveQualityGatePolicy(
399
474
  return {
400
475
  mode: "review_fix",
401
476
  maxAutoRevisions: Math.max(baseMaxAutoRevisions, 2),
477
+ validationMaxAutoRevisions: baseValidationMaxAutoRevisions,
478
+ ...gateSwitches,
402
479
  softPassOnExhausted: baseSoftPassOnExhausted,
403
480
  criticMinScore: tightenedCriticMinScore,
404
481
  };
@@ -541,7 +618,8 @@ async function runValidationCommand(
541
618
  timeoutMs: number,
542
619
  outputPolicy: Partial<OutputCompactionPolicy>,
543
620
  ): Promise<ValidationExecutionResult> {
544
- const argv = tokenizeValidationCommandArgv(command);
621
+ const env = buildWorkerSandboxWritableEnv(repo);
622
+ const argv = prepareValidationCommandArgv(command, env);
545
623
  if (!argv) {
546
624
  return {
547
625
  step: command,
@@ -557,7 +635,7 @@ async function runValidationCommand(
557
635
  const startedAt = Date.now();
558
636
  const proc = Bun.spawn(argv, {
559
637
  cwd: repo,
560
- env: buildValidationCommandEnv(repo),
638
+ env,
561
639
  stdout: "pipe",
562
640
  stderr: "pipe",
563
641
  });
@@ -587,41 +665,68 @@ async function runValidationCommand(
587
665
  ok: !timedOut && exitCode === 0,
588
666
  exitCode: timedOut ? 124 : exitCode,
589
667
  stdout: compactJobOutput(stdout.trim(), outputPolicy),
590
- stderr: compactJobOutput(stderr.trim(), outputPolicy),
668
+ stderr: compactJobOutput(
669
+ [
670
+ stderr.trim(),
671
+ timedOut
672
+ ? `Validation command timed out after ${Math.max(1_000, timeoutMs)}ms. Captured output is the process output emitted before PushPals terminated the command.`
673
+ : "",
674
+ ]
675
+ .filter(Boolean)
676
+ .join("\n"),
677
+ outputPolicy,
678
+ ),
591
679
  elapsedMs: Math.max(1, Date.now() - startedAt),
592
680
  };
593
681
  }
594
682
 
595
- function buildValidationCommandEnv(repo: string): Record<string, string> {
596
- const homeDir = resolve(tmpdir(), "pushpals-validation-home");
597
- const cacheDir = resolve(tmpdir(), "pushpals-validation-cache");
598
- const expoDir = resolve(tmpdir(), "pushpals-validation-expo");
599
- for (const dir of [homeDir, cacheDir, expoDir]) {
600
- try {
601
- mkdirSync(dir, { recursive: true });
602
- } catch {
603
- // Keep validation best-effort; the command output will expose any real env blocker.
604
- }
605
- }
606
- const env: Record<string, string> = {};
607
- for (const [key, value] of Object.entries(process.env)) {
608
- if (typeof value === "string") env[key] = value;
609
- }
610
- return {
611
- ...env,
612
- HOME: homeDir,
613
- USERPROFILE: homeDir,
614
- XDG_CACHE_HOME: cacheDir,
615
- npm_config_cache: resolve(cacheDir, "npm"),
616
- EXPO_HOME: expoDir,
617
- EXPO_NO_TELEMETRY: process.env.EXPO_NO_TELEMETRY ?? "1",
618
- EXPO_NO_INTERACTIVE: process.env.EXPO_NO_INTERACTIVE ?? "1",
619
- CI: process.env.CI ?? "1",
620
- BROWSER: process.env.BROWSER ?? "none",
621
- EXPO_DEV_SERVER_PORT: process.env.EXPO_DEV_SERVER_PORT ?? "19006",
622
- RCT_METRO_PORT: process.env.RCT_METRO_PORT ?? "19006",
623
- PUSHPALS_VALIDATION_REPO: repo,
624
- };
683
+ export function isLongRunningBrowserValidationCommand(command: string): boolean {
684
+ const normalized = validationCommandKey(command);
685
+ if (!normalized) return false;
686
+ const tokens = tokenizeValidationCommandArgv(command)?.map((token) => token.toLowerCase()) ?? [];
687
+ const joined = tokens.join(" ");
688
+ return (
689
+ /\b(web:e2e|e2e:web|browser:e2e|smoke:web|web:smoke|browser:smoke)\b/.test(normalized) ||
690
+ /\b(playwright|cypress)\b/.test(joined) ||
691
+ (/\bexpo\b/.test(joined) && /\b(web|start)\b/.test(joined))
692
+ );
693
+ }
694
+
695
+ export function resolveValidationCommandTimeoutMs(command: string, baseTimeoutMs: number): number {
696
+ const normalizedBase = Number.isFinite(Number(baseTimeoutMs))
697
+ ? Math.max(1_000, Math.min(7_200_000, Math.floor(Number(baseTimeoutMs))))
698
+ : 180_000;
699
+ if (!isLongRunningBrowserValidationCommand(command)) return normalizedBase;
700
+ return Math.max(normalizedBase, 600_000);
701
+ }
702
+
703
+ function commandHasPortArg(argv: string[]): boolean {
704
+ return argv.some((token) => token === "--port" || token.startsWith("--port="));
705
+ }
706
+
707
+ function shouldInjectBrowserValidationPort(command: string, argv: string[]): boolean {
708
+ if (commandHasPortArg(argv)) return false;
709
+ if (!isLongRunningBrowserValidationCommand(command)) return false;
710
+ return /\b(web:e2e|e2e:web|browser:e2e|smoke:web|web:smoke|browser:smoke)\b/.test(
711
+ validationCommandKey(command),
712
+ );
713
+ }
714
+
715
+ export function prepareValidationCommandArgv(
716
+ command: string,
717
+ env: Record<string, string>,
718
+ ): string[] | null {
719
+ const argv = tokenizeValidationCommandArgv(command);
720
+ if (!argv) return null;
721
+ const port = String(env.EXPO_DEV_SERVER_PORT ?? "").trim();
722
+ if (!port || !shouldInjectBrowserValidationPort(command, argv)) return argv;
723
+ return [...argv, "--", "--port", port];
724
+ }
725
+
726
+ function isBrowserValidationInfrastructureDigest(digest: string): boolean {
727
+ return /\b(ERR_SOCKET_BAD_PORT|EADDRINUSE|ECONNREFUSED|ECONNRESET|ETIMEDOUT|timed out|timeout|port|browser runtime|playwright install|executable doesn't exist)\b/i.test(
728
+ digest,
729
+ );
625
730
  }
626
731
 
627
732
  interface ToolAvailabilityResult {
@@ -730,6 +835,96 @@ function extractPreparedMergeConflictPaths(params: Record<string, unknown>): str
730
835
  .filter(Boolean);
731
836
  }
732
837
 
838
+ function normalizeValidationPathToken(value: string): string | null {
839
+ const normalized = value
840
+ .trim()
841
+ .replace(/^['"`(<[]+/, "")
842
+ .replace(/[>'"`)\],.;:]+$/, "")
843
+ .replace(/\\/g, "/")
844
+ .replace(/^\.\/+/, "")
845
+ .replace(/\/+/g, "/");
846
+ if (!normalized || normalized.startsWith("../") || normalized.includes("/../")) return null;
847
+ if (!/[./]/.test(normalized)) return null;
848
+ if (/^(https?|file):/i.test(normalized)) return null;
849
+ return normalized;
850
+ }
851
+
852
+ function extractPathTokensFromValidationOutput(value: string): string[] {
853
+ const seen = new Set<string>();
854
+ const out: string[] = [];
855
+ const add = (raw: string | undefined) => {
856
+ if (!raw) return;
857
+ const normalized = normalizeValidationPathToken(raw);
858
+ if (!normalized || seen.has(normalized)) return;
859
+ seen.add(normalized);
860
+ out.push(normalized);
861
+ };
862
+ const normalized = stripAnsiControlSequences(value);
863
+ for (const match of normalized.matchAll(/[A-Za-z0-9_.@-]+(?:\/[A-Za-z0-9_.@-]+)+(?:\.[A-Za-z0-9_.-]+)?/g)) {
864
+ add(match[0]);
865
+ }
866
+ for (const match of normalized.matchAll(/(?:from|in|at)\s+['"`]?([^'"`\s]+\/[^'"`\s]+)['"`]?/gi)) {
867
+ add(match[1]);
868
+ }
869
+ return out;
870
+ }
871
+
872
+ function literalScopePrefix(value: string): string | null {
873
+ const normalized = normalizeValidationPathToken(value.replace(/\*\*?.*$/, "").replace(/\/+$/, ""));
874
+ if (!normalized || normalized === ".") return null;
875
+ return normalized;
876
+ }
877
+
878
+ function pathMatchesScopeHint(path: string, hint: string): boolean {
879
+ const normalizedPath = normalizeValidationPathToken(path);
880
+ const normalizedHint = hint.trim().replace(/\\/g, "/").replace(/^\.\/+/, "");
881
+ if (!normalizedPath || !normalizedHint) return false;
882
+ if (matchesGlob(normalizedPath, normalizedHint)) return true;
883
+ const prefix = literalScopePrefix(normalizedHint);
884
+ if (!prefix) return false;
885
+ return normalizedPath === prefix || normalizedPath.startsWith(`${prefix}/`);
886
+ }
887
+
888
+ export function classifyValidationFailureScope(
889
+ runs: ValidationExecutionResult[],
890
+ planning: TaskExecutePlanning,
891
+ changedPaths: string[],
892
+ targetPath?: string,
893
+ ): "none" | "task_scope" | "outside_task_scope" {
894
+ const failedRuns = runs.filter((run) => !run.ok && run.exitCode !== 127);
895
+ if (failedRuns.length === 0) return "none";
896
+ const scopeHints = [
897
+ targetPath ?? "",
898
+ ...changedPaths,
899
+ ...(planning.targetPaths ?? []),
900
+ ...(planning.scope.writeGlobs ?? []),
901
+ ]
902
+ .map((entry) => entry.trim().replace(/\\/g, "/"))
903
+ .filter(Boolean);
904
+ if (scopeHints.length === 0) return "none";
905
+
906
+ const combined = failedRuns
907
+ .flatMap((run) => [run.stdout, run.stderr])
908
+ .filter(Boolean)
909
+ .join("\n");
910
+ const lowerCombined = combined.toLowerCase().replace(/\\/g, "/");
911
+ for (const hint of scopeHints) {
912
+ const normalized = literalScopePrefix(hint);
913
+ if (normalized && normalized.length >= 4 && lowerCombined.includes(normalized.toLowerCase())) {
914
+ return "task_scope";
915
+ }
916
+ }
917
+
918
+ const pathTokens = extractPathTokensFromValidationOutput(combined).filter(
919
+ (token) => !/^(node_modules|\.bun|bun|npm|pnpm|yarn)\//i.test(token),
920
+ );
921
+ if (pathTokens.length === 0) return "none";
922
+ if (pathTokens.some((token) => scopeHints.some((hint) => pathMatchesScopeHint(token, hint)))) {
923
+ return "task_scope";
924
+ }
925
+ return "outside_task_scope";
926
+ }
927
+
733
928
  function detectValidationBlocker(runs: ValidationExecutionResult[]): ValidationBlocker | null {
734
929
  const combined = runs
735
930
  .flatMap((run) => [run.stdout, run.stderr])
@@ -910,7 +1105,51 @@ function extractRunnableValidationCommand(step: string): string | null {
910
1105
  }
911
1106
 
912
1107
  function validationCommandKey(command: string): string {
913
- return command.trim().replace(/\s+/g, " ").toLowerCase();
1108
+ const argv = tokenizeValidationCommandArgv(command);
1109
+ if (argv && argv.length > 0) {
1110
+ const normalized = argv.map((entry) => entry.trim()).filter(Boolean);
1111
+ if (normalized[0]?.toLowerCase() === "bunx") {
1112
+ normalized.splice(0, 1, "bun", "x");
1113
+ }
1114
+ return normalized.join(" ").replace(/\s+/g, " ").toLowerCase();
1115
+ }
1116
+ return command
1117
+ .trim()
1118
+ .replace(/\s+/g, " ")
1119
+ .replace(/^bunx\b/i, "bun x")
1120
+ .toLowerCase();
1121
+ }
1122
+
1123
+ export function extractValidationFailureDigest(run: {
1124
+ exitCode?: number;
1125
+ stdout?: string;
1126
+ stderr?: string;
1127
+ elapsedMs?: number;
1128
+ }): string {
1129
+ const combined = stripAnsiControlSequences([run.stderr, run.stdout].filter(Boolean).join("\n"));
1130
+ const patterns = [
1131
+ /\bCannot find module\s+['"`][^'"`\r\n]+['"`][^\r\n]*/i,
1132
+ /\bFailed to resolve import\s+['"`][^'"`\r\n]+['"`][^\r\n]*/i,
1133
+ /\bCould not resolve\s+['"`]?[^'"`\r\n]+['"`]?[^\r\n]*/i,
1134
+ /\bModule not found[^\r\n]*/i,
1135
+ /\bERR_SOCKET_BAD_PORT[^\r\n]*/i,
1136
+ /\berror TS\d+:[^\r\n]*/i,
1137
+ /\bError:\s+[^\r\n]*/i,
1138
+ ];
1139
+ for (const pattern of patterns) {
1140
+ const match = combined.match(pattern);
1141
+ if (match?.[0]) return toSingleLine(match[0], 180);
1142
+ }
1143
+ const firstMeaningfulLine = combined
1144
+ .split(/\r?\n/)
1145
+ .map((line) => line.trim())
1146
+ .find((line) => /\b(error|failed|cannot|could not|timeout|timed out)\b/i.test(line));
1147
+ if (firstMeaningfulLine) return toSingleLine(firstMeaningfulLine, 180);
1148
+ if (Number(run.exitCode) === 124) {
1149
+ const elapsed = Number.isFinite(Number(run.elapsedMs)) ? ` after ${Number(run.elapsedMs)}ms` : "";
1150
+ return `timed out${elapsed}`;
1151
+ }
1152
+ return "";
914
1153
  }
915
1154
 
916
1155
  export function collectRequiredValidationFailures(
@@ -923,7 +1162,8 @@ export function collectRequiredValidationFailures(
923
1162
  .filter((run) => requiredKeys.has(validationCommandKey(run.command)) && !run.ok)
924
1163
  .map((run) => {
925
1164
  const exitCode = Number.isFinite(Number(run.exitCode)) ? Number(run.exitCode) : "unknown";
926
- return `${run.command} exited ${exitCode}`;
1165
+ const digest = extractValidationFailureDigest(run);
1166
+ return `${run.command} exited ${exitCode}${digest ? ` (${digest})` : ""}`;
927
1167
  });
928
1168
  }
929
1169
 
@@ -983,7 +1223,7 @@ function dedupeValidationCommands(...groups: string[][]): string[] {
983
1223
  for (const command of group) {
984
1224
  const trimmed = command.trim();
985
1225
  if (!trimmed) continue;
986
- const key = trimmed.toLowerCase();
1226
+ const key = validationCommandKey(trimmed);
987
1227
  if (seen.has(key)) continue;
988
1228
  seen.add(key);
989
1229
  out.push(trimmed);
@@ -1088,14 +1328,19 @@ export function inferFallbackValidationCommandsForTestTask(
1088
1328
  return candidates.slice(0, 4);
1089
1329
  }
1090
1330
 
1091
- function isTestFocusedTask(
1331
+ export function isTestFocusedTask(
1092
1332
  instruction: string,
1093
1333
  planning: TaskExecutePlanning,
1094
1334
  targetPath?: string,
1095
1335
  ): boolean {
1096
1336
  const lowerInstruction = instruction.toLowerCase();
1097
1337
  if (
1098
- /\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b/.test(lowerInstruction)
1338
+ /\b(add|write|create|update|extend|expand|harden|improve|refactor|move|extract|fix)\b.{0,80}\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b/.test(
1339
+ lowerInstruction,
1340
+ ) ||
1341
+ /\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b.{0,80}\b(add|write|create|update|extend|expand|harden|improve|refactor|move|extract|fix)\b/.test(
1342
+ lowerInstruction,
1343
+ )
1099
1344
  ) {
1100
1345
  return true;
1101
1346
  }
@@ -1107,7 +1352,9 @@ function isTestFocusedTask(
1107
1352
  if (pathHints.some((entry) => isLikelyTestPath(entry))) return true;
1108
1353
  if (
1109
1354
  planning.acceptanceCriteria.some((entry) =>
1110
- /\b(test|tests|coverage|unit|integration|negative|invalid|valid)\b/i.test(entry),
1355
+ /\b(add|write|create|update|extend|expand|harden|improve|refactor|move|extract|fix)\b.{0,80}\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b/i.test(
1356
+ entry,
1357
+ ),
1111
1358
  )
1112
1359
  ) {
1113
1360
  return true;
@@ -1143,7 +1390,12 @@ async function runDeterministicQualityGate(
1143
1390
  repo: string,
1144
1391
  params: Record<string, unknown>,
1145
1392
  runtimeConfig: WorkerpalsRuntimeConfig,
1393
+ qualityGatePolicy: QualityGatePolicy,
1146
1394
  onLog?: (stream: "stdout" | "stderr", line: string) => void,
1395
+ validationRetryState?: {
1396
+ previousFailureDigests?: Map<string, string>;
1397
+ revisionAttempt?: number;
1398
+ },
1147
1399
  ): Promise<DeterministicQualityResult> {
1148
1400
  const instruction = String(params.instruction ?? "");
1149
1401
  const targetPath = String(params.targetPath ?? params.path ?? "").trim() || undefined;
@@ -1154,16 +1406,25 @@ async function runDeterministicQualityGate(
1154
1406
  }
1155
1407
  const isTestTask = isTestFocusedTask(instruction, planning, targetPath);
1156
1408
  const hasRequiredValidationCriteria = requiredValidationSteps.length > 0;
1157
- if (!isTestTask && !hasRequiredValidationCriteria) {
1409
+ if (
1410
+ !qualityGatePolicy.scopeGateEnabled &&
1411
+ !qualityGatePolicy.validationGateEnabled &&
1412
+ !qualityGatePolicy.criticGateEnabled &&
1413
+ !isTestTask &&
1414
+ !hasRequiredValidationCriteria
1415
+ ) {
1158
1416
  return {
1159
1417
  ok: true,
1160
1418
  skipped: true,
1161
1419
  issues: [],
1420
+ scopeIssues: [],
1421
+ validationIssues: [],
1162
1422
  changedPaths: [],
1163
1423
  changedTestPaths: [],
1164
1424
  validationRuns: [],
1165
1425
  requiredValidationFailures: [],
1166
1426
  blocker: null,
1427
+ validationFailureScope: "none",
1167
1428
  };
1168
1429
  }
1169
1430
 
@@ -1176,15 +1437,47 @@ async function runDeterministicQualityGate(
1176
1437
  ),
1177
1438
  );
1178
1439
  const issues: string[] = [];
1179
- if (changedTestPaths.length === 0) {
1180
- issues.push("No relevant test file was modified for this test-focused task.");
1440
+ const scopeIssues: string[] = [];
1441
+ const validationIssues: string[] = [];
1442
+ const addScopeIssue = (issue: string): void => {
1443
+ scopeIssues.push(issue);
1444
+ issues.push(`ScopeGate: ${issue}`);
1445
+ };
1446
+ const addValidationIssue = (issue: string): void => {
1447
+ validationIssues.push(issue);
1448
+ issues.push(`ValidationGate: ${issue}`);
1449
+ };
1450
+
1451
+ if (qualityGatePolicy.scopeGateEnabled) {
1452
+ if (!statusResult.ok) {
1453
+ addScopeIssue("could not evaluate changed paths from git status.");
1454
+ }
1455
+ for (const issue of collectWriteScopeIssuesFromChangedPaths(changedPaths, planning)) {
1456
+ addScopeIssue(issue);
1457
+ }
1458
+ if (isTestTask && changedTestPaths.length === 0) {
1459
+ addScopeIssue("found no relevant test file modified for this test-focused task.");
1460
+ }
1461
+ if (
1462
+ isTestTask &&
1463
+ changedTestPaths.length > 0 &&
1464
+ !hasBalancedPositiveNegativeAssertions(changedTestPaths, repo)
1465
+ ) {
1466
+ addScopeIssue(
1467
+ "found changed test files without both positive and negative assertion coverage (expected both).",
1468
+ );
1469
+ }
1470
+ for (const issue of scopeIssues) {
1471
+ onLog?.("stderr", `[ScopeGate] ${issue}`);
1472
+ }
1473
+ } else {
1474
+ onLog?.("stdout", "[ScopeGate] Disabled by workerpals.quality_scope_gate_enabled=false.");
1181
1475
  }
1182
- if (
1183
- changedTestPaths.length > 0 &&
1184
- !hasBalancedPositiveNegativeAssertions(changedTestPaths, repo)
1185
- ) {
1186
- issues.push(
1187
- "Changed test files do not show both positive and negative assertion coverage (expected both).",
1476
+
1477
+ if (!qualityGatePolicy.validationGateEnabled) {
1478
+ onLog?.(
1479
+ "stdout",
1480
+ "[ValidationGate] Disabled by workerpals.quality_validation_gate_enabled=false.",
1188
1481
  );
1189
1482
  }
1190
1483
 
@@ -1207,28 +1500,30 @@ async function runDeterministicQualityGate(
1207
1500
  if (!Number.isFinite(value)) return 180_000;
1208
1501
  return Math.max(1_000, Math.min(7_200_000, Math.floor(value)));
1209
1502
  })();
1503
+ let requiredValidationFailures: string[] = [];
1504
+ if (qualityGatePolicy.validationGateEnabled) {
1210
1505
  if (hasRequiredValidationCriteria && requiredRunnableSteps.length === 0) {
1211
- issues.push(
1212
- "vision.md testing criteria were provided, but none contained a runnable validation command.",
1506
+ addValidationIssue(
1507
+ "found vision.md testing criteria, but none contained a runnable validation command.",
1213
1508
  );
1214
1509
  }
1215
1510
  if (commandsToRun.length === 0) {
1216
- issues.push(
1511
+ addValidationIssue(
1217
1512
  hasRequiredValidationCriteria
1218
- ? "No runnable validation command was available from vision.md testing criteria or planning.validationSteps."
1219
- : "No runnable validation command was provided in planning.validationSteps (expected at least one test command).",
1513
+ ? "found no runnable validation command from vision.md testing criteria or planning.validationSteps."
1514
+ : "found no runnable validation command in planning.validationSteps (expected at least one test command).",
1220
1515
  );
1221
1516
  } else {
1222
1517
  if (requiredRunnableSteps.length > 0) {
1223
1518
  onLog?.(
1224
1519
  "stdout",
1225
- `[QualityGate] Running required vision.md testing criteria: ${requiredRunnableSteps.join(" | ")}`,
1520
+ `[ValidationGate] Running required vision.md testing criteria: ${requiredRunnableSteps.join(" | ")}`,
1226
1521
  );
1227
1522
  }
1228
1523
  if (isTestTask && plannerRunnableSteps.length === 0 && fallbackValidationSteps.length > 0) {
1229
1524
  onLog?.(
1230
1525
  "stdout",
1231
- `[QualityGate] No runnable planning.validationSteps found; using fallback validation command(s): ${commandsToRun.join(" | ")}`,
1526
+ `[ValidationGate] No runnable planning.validationSteps found; using fallback validation command(s): ${commandsToRun.join(" | ")}`,
1232
1527
  );
1233
1528
  }
1234
1529
  const toolchainPlan = buildToolchainPlan({
@@ -1238,7 +1533,7 @@ async function runDeterministicQualityGate(
1238
1533
  if (toolchainPlan.requirements.length > 0) {
1239
1534
  onLog?.(
1240
1535
  "stdout",
1241
- `[QualityGate] Toolchain preflight: source=${toolchainPlan.environmentSource}, required=${toolchainPlan.requirements
1536
+ `[ValidationGate] Toolchain preflight: source=${toolchainPlan.environmentSource}, required=${toolchainPlan.requirements
1242
1537
  .map((requirement) => requirement.tool)
1243
1538
  .join(", ")}`,
1244
1539
  );
@@ -1250,7 +1545,7 @@ async function runDeterministicQualityGate(
1250
1545
  if (missingToolRequirements.length > 0) {
1251
1546
  onLog?.(
1252
1547
  "stderr",
1253
- `[QualityGate] Toolchain preflight blocked dependent validation command(s): ${formatMissingToolRequirements(
1548
+ `[ValidationGate] Toolchain preflight blocked dependent validation command(s): ${formatMissingToolRequirements(
1254
1549
  missingToolRequirements,
1255
1550
  )}`,
1256
1551
  );
@@ -1275,19 +1570,47 @@ async function runDeterministicQualityGate(
1275
1570
  });
1276
1571
  onLog?.(
1277
1572
  "stderr",
1278
- `[QualityGate] Quality gate validation skipped (missing toolchain): ${command}`,
1573
+ `[ValidationGate] Validation skipped (missing toolchain): ${command}`,
1279
1574
  );
1280
1575
  continue;
1281
1576
  }
1282
- onLog?.("stdout", `[QualityGate] Quality gate validation: running "${command}"`);
1577
+ const previousDigest = validationRetryState?.previousFailureDigests?.get(
1578
+ validationCommandKey(command),
1579
+ );
1580
+ if (
1581
+ previousDigest &&
1582
+ Number(validationRetryState?.revisionAttempt ?? 0) > 0 &&
1583
+ isLongRunningBrowserValidationCommand(command) &&
1584
+ isBrowserValidationInfrastructureDigest(previousDigest)
1585
+ ) {
1586
+ const stderr =
1587
+ `Skipped repeated browser validation after the same command failed in an earlier revision: ${previousDigest}. ` +
1588
+ "Run it once after the underlying blocker changes.";
1589
+ validationRuns.push({
1590
+ step: command,
1591
+ command,
1592
+ ok: false,
1593
+ exitCode: 124,
1594
+ stdout: "",
1595
+ stderr,
1596
+ elapsedMs: 1,
1597
+ });
1598
+ onLog?.(
1599
+ "stderr",
1600
+ `[ValidationGate] Skipped repeated long browser validation: ${command} (${previousDigest})`,
1601
+ );
1602
+ continue;
1603
+ }
1604
+ onLog?.("stdout", `[ValidationGate] Running "${command}"`);
1283
1605
  const run = await runValidationCommand(
1284
1606
  repo,
1285
1607
  command,
1286
- qualityValidationStepTimeoutMs,
1608
+ resolveValidationCommandTimeoutMs(command, qualityValidationStepTimeoutMs),
1287
1609
  outputPolicy,
1288
1610
  );
1289
1611
  validationRuns.push(run);
1290
- const runSummary = `[QualityGate] Quality gate validation ${run.ok ? "passed" : "failed"} (${run.elapsedMs}ms, exit ${run.exitCode}): ${command}`;
1612
+ const digest = run.ok ? "" : extractValidationFailureDigest(run);
1613
+ const runSummary = `[ValidationGate] ${run.ok ? "Passed" : "Failed"} (${run.elapsedMs}ms, exit ${run.exitCode}): ${command}${digest ? ` - ${digest}` : ""}`;
1291
1614
  onLog?.(run.ok ? "stdout" : "stderr", runSummary);
1292
1615
  }
1293
1616
  // exit 127 = command not found: separate tool-availability issues from real test failures.
@@ -1297,43 +1620,58 @@ async function runDeterministicQualityGate(
1297
1620
  const cmds = notFoundRuns.map((run) => run.command).join(", ");
1298
1621
  onLog?.(
1299
1622
  "stderr",
1300
- `[QualityGate] Some validation commands not found (exit 127 wrong tool?): ${cmds}. This project uses Bun: prefer "bun test".`,
1623
+ `[ValidationGate] Some validation commands not found (exit 127 - wrong tool?): ${cmds}. This project uses Bun: prefer "bun test".`,
1301
1624
  );
1302
1625
  }
1303
1626
  if (executedRuns.length > 0 && executedRuns.every((run) => !run.ok)) {
1304
- issues.push("Validation commands were executed but none passed.");
1627
+ addValidationIssue("executed validation commands, but none passed.");
1305
1628
  } else if (executedRuns.length === 0 && notFoundRuns.length > 0) {
1306
- issues.push(
1307
- 'No validation command could be run (command not found). Use "bun test" or another available test runner.',
1629
+ addValidationIssue(
1630
+ 'could not run any validation command (command not found). Use "bun test" or another available test runner.',
1308
1631
  );
1309
1632
  }
1310
1633
  if (
1311
1634
  isTestTask &&
1312
1635
  !validationRuns.some((run) => /\b(test|pytest|coverage|vitest|jest)\b/i.test(run.command))
1313
1636
  ) {
1314
- issues.push("Validation steps did not execute a recognizable test command.");
1637
+ addValidationIssue("did not execute a recognizable test command.");
1315
1638
  }
1316
1639
  }
1317
- const requiredValidationFailures = collectRequiredValidationFailures(
1640
+ requiredValidationFailures = collectRequiredValidationFailures(
1318
1641
  requiredRunnableSteps,
1319
1642
  validationRuns,
1320
1643
  );
1321
1644
  if (requiredValidationFailures.length > 0) {
1322
- issues.push(
1645
+ addValidationIssue(
1323
1646
  `Required vision.md validation failed: ${requiredValidationFailures.join("; ")}`,
1324
1647
  );
1325
1648
  }
1326
- const blocker = detectValidationBlocker(validationRuns);
1649
+ }
1650
+ const blocker = qualityGatePolicy.validationGateEnabled
1651
+ ? detectValidationBlocker(validationRuns)
1652
+ : null;
1653
+ const scopedValidationFailure = qualityGatePolicy.validationGateEnabled
1654
+ ? classifyValidationFailureScope(validationRuns, planning, changedPaths, targetPath)
1655
+ : "none";
1656
+ if (scopedValidationFailure === "outside_task_scope") {
1657
+ onLog?.(
1658
+ "stderr",
1659
+ "[ValidationGate] Required validation failures appear outside the task write scope; treating them as publish blockers, not repair instructions.",
1660
+ );
1661
+ }
1327
1662
 
1328
1663
  return {
1329
1664
  ok: issues.length === 0 && blocker === null,
1330
1665
  skipped: false,
1331
1666
  issues,
1667
+ scopeIssues,
1668
+ validationIssues,
1332
1669
  changedPaths,
1333
1670
  changedTestPaths,
1334
1671
  validationRuns,
1335
1672
  requiredValidationFailures,
1336
1673
  blocker,
1674
+ validationFailureScope: scopedValidationFailure,
1337
1675
  };
1338
1676
  }
1339
1677
 
@@ -1456,7 +1794,7 @@ async function runTaskCriticReview(
1456
1794
  if (lowered.includes("response_format")) {
1457
1795
  onLog?.(
1458
1796
  "stdout",
1459
- "[QualityGate] Critic fallback: response_format json_object unsupported; retrying without strict response_format.",
1797
+ "[CriticGate] fallback: response_format json_object unsupported; retrying without strict response_format.",
1460
1798
  );
1461
1799
  request = await runCriticRequest(null);
1462
1800
  }
@@ -1464,7 +1802,7 @@ async function runTaskCriticReview(
1464
1802
  if (!request.response.ok) {
1465
1803
  onLog?.(
1466
1804
  "stderr",
1467
- `[QualityGate] Critic review request failed (${request.response.status}): ${toSingleLine(request.text, 240)}`,
1805
+ `[CriticGate] review request failed (${request.response.status}): ${toSingleLine(request.text, 240)}`,
1468
1806
  );
1469
1807
  return null;
1470
1808
  }
@@ -1480,7 +1818,7 @@ async function runTaskCriticReview(
1480
1818
  if (!reviewObj) {
1481
1819
  onLog?.(
1482
1820
  "stderr",
1483
- `[QualityGate] Critic produced non-JSON content; skipping critic gate. Raw: ${toSingleLine(
1821
+ `[CriticGate] produced non-JSON content; skipping critic gate. Raw: ${toSingleLine(
1484
1822
  content,
1485
1823
  220,
1486
1824
  )}`,
@@ -1509,7 +1847,7 @@ async function runTaskCriticReview(
1509
1847
  } catch (err) {
1510
1848
  onLog?.(
1511
1849
  "stderr",
1512
- `[QualityGate] Critic review unavailable: ${toSingleLine(err, 220)} (continuing without critic gate).`,
1850
+ `[CriticGate] review unavailable: ${toSingleLine(err, 220)} (continuing without critic gate).`,
1513
1851
  );
1514
1852
  return null;
1515
1853
  }
@@ -1520,6 +1858,8 @@ export function buildQualityRevisionHint(
1520
1858
  critic: CriticReview | null,
1521
1859
  planning: TaskExecutePlanning,
1522
1860
  reviewFixContext?: ReviewFixContext | null,
1861
+ validationRuns: ValidationExecutionResult[] = [],
1862
+ validationBlocker: ValidationBlocker | null = null,
1523
1863
  ): string {
1524
1864
  const lines: string[] = [];
1525
1865
  lines.push("Quality revision required before completion.");
@@ -1552,6 +1892,26 @@ export function buildQualityRevisionHint(
1552
1892
  lines.push("Deterministic quality issues:");
1553
1893
  for (const issue of issues) lines.push(`- ${issue}`);
1554
1894
  }
1895
+ if (validationBlocker) {
1896
+ lines.push(
1897
+ `Validation blocker: ${validationBlocker.category} - ${toSingleLine(
1898
+ validationBlocker.detail,
1899
+ 300,
1900
+ )}`,
1901
+ );
1902
+ }
1903
+ const failedValidationRuns = validationRuns.filter((run) => !run.ok);
1904
+ if (failedValidationRuns.length > 0) {
1905
+ lines.push("Validation failure diagnostics:");
1906
+ for (const run of failedValidationRuns.slice(0, 5)) {
1907
+ lines.push(`- ${run.command} failed with exit ${run.exitCode} after ${run.elapsedMs}ms.`);
1908
+ const output = toSingleLine(
1909
+ stripAnsiControlSequences([run.stderr, run.stdout].filter(Boolean).join("\n")),
1910
+ 700,
1911
+ );
1912
+ if (output) lines.push(` Output: ${output}`);
1913
+ }
1914
+ }
1555
1915
  if (critic) {
1556
1916
  lines.push(`Critic score: ${critic.score.toFixed(1)} / 10`);
1557
1917
  if (critic.mustFix.length > 0) {
@@ -3318,9 +3678,10 @@ async function generateCommitMessageFromDiffViaCodex(
3318
3678
  repo: string,
3319
3679
  runtimeConfig: WorkerpalsRuntimeConfig,
3320
3680
  ): Promise<string | null> {
3681
+ const model = runtimeConfig.workerpals.llm.model.trim();
3682
+ if (!model) return null;
3321
3683
  const codexPrefix = await resolveCodexCommandPrefix(repo, runtimeConfig.workerpals.llm.codexBin);
3322
3684
  if (!codexPrefix) return null;
3323
- const model = runtimeConfig.workerpals.llm.model.trim();
3324
3685
  const timeoutMs = (() => {
3325
3686
  const value = Number(runtimeConfig.workerpals.llm.codexTimeoutMs);
3326
3687
  if (!Number.isFinite(value)) return 120_000;
@@ -3355,6 +3716,7 @@ async function generateCommitMessageFromDiffViaCodex(
3355
3716
  const stdinText = `${prompt.systemPrompt}\n\n${prompt.userMessage}`;
3356
3717
  const proc = Bun.spawn(cmd, {
3357
3718
  cwd: repo,
3719
+ env: buildWorkerSandboxWritableEnv(repo),
3358
3720
  stdout: "pipe",
3359
3721
  stderr: "pipe",
3360
3722
  stdin: new Blob([stdinText]),
@@ -3587,40 +3949,33 @@ function taskExecuteOrigin(params: Record<string, unknown>): "autonomy" | "user"
3587
3949
  return "user";
3588
3950
  }
3589
3951
 
3590
- async function collectWriteScopeWarnings(
3591
- repo: string,
3952
+ function collectWriteScopeIssuesFromChangedPaths(
3953
+ changedPaths: string[],
3592
3954
  planning: TaskExecutePlanning,
3593
- ): Promise<{ warnings: string[] }> {
3955
+ ): string[] {
3594
3956
  const writeGlobs = toStringArray(planning.scope.writeGlobs ?? []);
3595
- if (writeGlobs.length === 0) return { warnings: [] };
3596
-
3597
- const statusResult = await git(repo, ["status", "--porcelain"]);
3598
- if (!statusResult.ok) {
3599
- return { warnings: ["Unable to evaluate changed paths for scope suggestion check."] };
3600
- }
3957
+ if (writeGlobs.length === 0) return [];
3601
3958
 
3602
- const changedPaths = parseChangedPathsFromStatus(statusResult.stdout)
3959
+ const normalizedChangedPaths = changedPaths
3603
3960
  .map((entry) => normalizeStagePath(entry))
3604
3961
  .filter((entry): entry is string => Boolean(entry) && entry !== ".");
3605
- if (changedPaths.length === 0) return { warnings: [] };
3962
+ if (normalizedChangedPaths.length === 0) return [];
3606
3963
 
3607
3964
  const forbidden = toStringArray(planning.scope.forbiddenGlobs ?? []);
3608
- const warnings: string[] = [];
3609
- const outOfScope = changedPaths.filter(
3965
+ const issues: string[] = [];
3966
+ const outOfScope = normalizedChangedPaths.filter(
3610
3967
  (path) => !writeGlobs.some((glob) => matchesGlob(path, glob)),
3611
3968
  );
3612
3969
  if (outOfScope.length > 0) {
3613
- warnings.push(`Scope suggestion: modified paths outside writeGlobs: ${outOfScope.join(", ")}`);
3970
+ issues.push(`modified paths outside writeGlobs: ${outOfScope.join(", ")}`);
3614
3971
  }
3615
- const forbiddenTouched = changedPaths.filter((path) =>
3972
+ const forbiddenTouched = normalizedChangedPaths.filter((path) =>
3616
3973
  forbidden.some((glob) => matchesGlob(path, glob)),
3617
3974
  );
3618
3975
  if (forbiddenTouched.length > 0) {
3619
- warnings.push(
3620
- `Scope suggestion: modified paths matching forbiddenGlobs: ${forbiddenTouched.join(", ")}`,
3621
- );
3976
+ issues.push(`modified paths matching forbiddenGlobs: ${forbiddenTouched.join(", ")}`);
3622
3977
  }
3623
- return { warnings };
3978
+ return issues;
3624
3979
  }
3625
3980
 
3626
3981
  function sanitizeTaskExecutePlanningPathHints(value: unknown): unknown {
@@ -3945,7 +4300,7 @@ async function runCodexCriticReview(
3945
4300
  if (!codexPrefix) {
3946
4301
  onLog?.(
3947
4302
  "stderr",
3948
- "[QualityGate] Codex critic: unable to resolve Codex CLI command (workerpals.llm.codex_bin/PATH); skipping.",
4303
+ "[CriticGate] Codex: unable to resolve Codex CLI command (workerpals.llm.codex_bin/PATH); skipping.",
3949
4304
  );
3950
4305
  return null;
3951
4306
  }
@@ -4026,6 +4381,7 @@ async function runCodexCriticReview(
4026
4381
  try {
4027
4382
  const proc = Bun.spawn(cmd, {
4028
4383
  cwd: repo,
4384
+ env: buildWorkerSandboxWritableEnv(repo),
4029
4385
  stdout: "pipe",
4030
4386
  stderr: "pipe",
4031
4387
  stdin: new Blob([criticInstruction]),
@@ -4045,14 +4401,14 @@ async function runCodexCriticReview(
4045
4401
  clearTimeout(timer);
4046
4402
 
4047
4403
  if (timedOut) {
4048
- onLog?.("stderr", "[QualityGate] Codex critic timed out; skipping.");
4404
+ onLog?.("stderr", "[CriticGate] Codex timed out; skipping.");
4049
4405
  return null;
4050
4406
  }
4051
4407
  if (exitCode !== 0) {
4052
4408
  const stderrText = await new Response(proc.stderr).text();
4053
4409
  onLog?.(
4054
4410
  "stderr",
4055
- `[QualityGate] Codex critic exited ${exitCode}: ${toSingleLine(stderrText, 220)}`,
4411
+ `[CriticGate] Codex exited ${exitCode}: ${toSingleLine(stderrText, 220)}`,
4056
4412
  );
4057
4413
  return null;
4058
4414
  }
@@ -4070,7 +4426,7 @@ async function runCodexCriticReview(
4070
4426
  }
4071
4427
 
4072
4428
  if (!lastMessage) {
4073
- onLog?.("stderr", "[QualityGate] Codex critic: no output message captured; skipping.");
4429
+ onLog?.("stderr", "[CriticGate] Codex: no output message captured; skipping.");
4074
4430
  return null;
4075
4431
  }
4076
4432
 
@@ -4078,7 +4434,7 @@ async function runCodexCriticReview(
4078
4434
  if (!reviewObj) {
4079
4435
  onLog?.(
4080
4436
  "stderr",
4081
- `[QualityGate] Codex critic returned non-JSON: ${toSingleLine(lastMessage, 220)}`,
4437
+ `[CriticGate] Codex returned non-JSON: ${toSingleLine(lastMessage, 220)}`,
4082
4438
  );
4083
4439
  return null;
4084
4440
  }
@@ -4094,7 +4450,7 @@ async function runCodexCriticReview(
4094
4450
  const revisionGuidance = String(reviewObj.revision_guidance ?? "")
4095
4451
  .trim()
4096
4452
  .slice(0, 2000);
4097
- onLog?.("stdout", `[QualityGate] Codex critic score: ${score}/10`);
4453
+ onLog?.("stdout", `[CriticGate] Codex score: ${score}/10`);
4098
4454
  return {
4099
4455
  score,
4100
4456
  findings,
@@ -4103,7 +4459,7 @@ async function runCodexCriticReview(
4103
4459
  raw: compactJobOutput(lastMessage, outputPolicyForRuntime(runtimeConfig)),
4104
4460
  };
4105
4461
  } catch (err) {
4106
- onLog?.("stderr", `[QualityGate] Codex critic error: ${toSingleLine(err, 220)} (skipping).`);
4462
+ onLog?.("stderr", `[CriticGate] Codex error: ${toSingleLine(err, 220)} (skipping).`);
4107
4463
  return null;
4108
4464
  }
4109
4465
  }
@@ -4189,12 +4545,25 @@ export async function executeJob(
4189
4545
  const reviewFixContext = extractReviewFixContext(normalizedParams);
4190
4546
  const qualityGatePolicy = deriveQualityGatePolicy(normalizedParams, runtimeConfig);
4191
4547
  const qualityMaxAutoRevisions = qualityGatePolicy.maxAutoRevisions;
4548
+ const qualityValidationMaxAutoRevisions = qualityGatePolicy.validationMaxAutoRevisions;
4549
+ const qualityRevisionLoopMax = Math.max(
4550
+ qualityMaxAutoRevisions,
4551
+ qualityValidationMaxAutoRevisions,
4552
+ );
4192
4553
  const qualitySoftPassOnExhausted = qualityGatePolicy.softPassOnExhausted;
4193
4554
  const qualityCriticMinScore = qualityGatePolicy.criticMinScore;
4194
4555
 
4195
4556
  onLog?.(
4196
4557
  "stdout",
4197
- `[QualityGate] Policy: max_auto_revisions=${qualityMaxAutoRevisions}, soft_pass_on_exhausted=${qualitySoftPassOnExhausted ? "true" : "false"}, critic_min_score=${qualityCriticMinScore}`,
4558
+ `[QualityGate] Policy: max_auto_revisions=${qualityMaxAutoRevisions}, validation_max_auto_revisions=${qualityValidationMaxAutoRevisions}, soft_pass_on_exhausted=${qualitySoftPassOnExhausted ? "true" : "false"}, critic_min_score=${qualityCriticMinScore}`,
4559
+ );
4560
+ onLog?.(
4561
+ "stdout",
4562
+ `[QualityGate] Gates: scope=${qualityGatePolicy.scopeGateEnabled ? "on" : "off"}, validation=${
4563
+ qualityGatePolicy.validationGateEnabled ? "on" : "off"
4564
+ }, critic=${qualityGatePolicy.criticGateEnabled ? "on" : "off"}, publish=${
4565
+ qualityGatePolicy.publishGateEnabled ? "on" : "off"
4566
+ }`,
4198
4567
  );
4199
4568
  if (qualityGatePolicy.mode === "review_fix") {
4200
4569
  const priorScore =
@@ -4218,7 +4587,8 @@ export async function executeJob(
4218
4587
 
4219
4588
  let revisionAttempt = 0;
4220
4589
  let revisionHint = "";
4221
- while (revisionAttempt <= qualityMaxAutoRevisions) {
4590
+ const previousValidationFailureDigests = new Map<string, string>();
4591
+ while (revisionAttempt <= qualityRevisionLoopMax) {
4222
4592
  const attemptParams: Record<string, unknown> = { ...normalizedParams };
4223
4593
  if (revisionHint) {
4224
4594
  attemptParams.qualityRevisionHint = revisionHint;
@@ -4306,50 +4676,154 @@ export async function executeJob(
4306
4676
  };
4307
4677
  }
4308
4678
 
4309
- const scopeCheck = await collectWriteScopeWarnings(repo, planning);
4310
- for (const warning of scopeCheck.warnings) {
4311
- onLog?.("stdout", `[TaskExecute] ${warning}`);
4679
+ const quality = await runDeterministicQualityGate(
4680
+ repo,
4681
+ attemptParams,
4682
+ runtimeConfig,
4683
+ qualityGatePolicy,
4684
+ onLog,
4685
+ {
4686
+ previousFailureDigests: previousValidationFailureDigests,
4687
+ revisionAttempt,
4688
+ },
4689
+ );
4690
+ for (const run of quality.validationRuns) {
4691
+ if (run.ok) continue;
4692
+ const digest = extractValidationFailureDigest(run);
4693
+ if (digest) previousValidationFailureDigests.set(validationCommandKey(run.command), digest);
4312
4694
  }
4313
-
4314
- const quality = await runDeterministicQualityGate(repo, attemptParams, runtimeConfig, onLog);
4315
- const critic = quality.skipped
4316
- ? null
4317
- : executor === "openai_codex"
4318
- ? await runCodexCriticReview(repo, attemptParams, quality, runtimeConfig, onLog)
4319
- : await runTaskCriticReview(repo, attemptParams, quality, runtimeConfig, onLog);
4320
- const effectiveQualityIssues = relaxAdvisoryQualityIssues(
4695
+ const validationOutsideTaskScope =
4696
+ quality.validationFailureScope === "outside_task_scope";
4697
+ const qualityForCritic: DeterministicQualityResult = validationOutsideTaskScope
4698
+ ? {
4699
+ ...quality,
4700
+ issues: quality.issues.filter((issue) => !issue.startsWith("ValidationGate:")),
4701
+ validationIssues: [],
4702
+ validationRuns: [],
4703
+ blocker: null,
4704
+ }
4705
+ : quality;
4706
+ const critic =
4707
+ quality.skipped || !qualityGatePolicy.criticGateEnabled
4708
+ ? null
4709
+ : executor === "openai_codex"
4710
+ ? await runCodexCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog)
4711
+ : await runTaskCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog);
4712
+ if (!qualityGatePolicy.criticGateEnabled) {
4713
+ onLog?.("stdout", "[CriticGate] Disabled by workerpals.quality_critic_gate_enabled=false.");
4714
+ }
4715
+ const advisoryRelaxedQualityIssues = relaxAdvisoryQualityIssues(
4321
4716
  quality.issues,
4322
4717
  quality.validationRuns,
4323
4718
  critic,
4324
4719
  qualityCriticMinScore,
4325
4720
  );
4326
- if (effectiveQualityIssues.length !== quality.issues.length) {
4721
+ let effectiveQualityIssues = advisoryRelaxedQualityIssues;
4722
+ if (validationOutsideTaskScope) {
4723
+ effectiveQualityIssues = effectiveQualityIssues.filter(
4724
+ (issue) => !issue.startsWith("ValidationGate:"),
4725
+ );
4726
+ if (effectiveQualityIssues.length !== quality.issues.length) {
4727
+ onLog?.(
4728
+ "stderr",
4729
+ "[ValidationGate] Validation failures are outside the task scope; they will block publishing but will not drive another code revision.",
4730
+ );
4731
+ }
4732
+ }
4733
+ if (
4734
+ !validationOutsideTaskScope &&
4735
+ advisoryRelaxedQualityIssues.length !== quality.issues.length
4736
+ ) {
4327
4737
  onLog?.(
4328
4738
  "stdout",
4329
4739
  "[QualityGate] Assertion-balance heuristic downgraded to advisory because validation passed and critic score met threshold.",
4330
4740
  );
4331
4741
  }
4332
4742
  const deterministicRequiresRevision =
4333
- effectiveQualityIssues.length > 0 || quality.blocker !== null;
4743
+ effectiveQualityIssues.length > 0 ||
4744
+ (quality.blocker !== null && !validationOutsideTaskScope);
4334
4745
  const criticRequiresRevision = Boolean(critic && critic.score < qualityCriticMinScore);
4746
+ if (
4747
+ !qualityGatePolicy.publishGateEnabled &&
4748
+ (deterministicRequiresRevision || criticRequiresRevision)
4749
+ ) {
4750
+ onLog?.(
4751
+ "stderr",
4752
+ "[PublishGate] Disabled by workerpals.quality_publish_gate_enabled=false; returning worker result despite gate failures.",
4753
+ );
4754
+ return {
4755
+ ...result,
4756
+ summary: `${result.summary} (publish gate disabled; quality gate findings were advisory)`,
4757
+ stderr: truncate(
4758
+ [
4759
+ result.stderr ?? "",
4760
+ ...quality.validationRuns.flatMap((run) => [run.stdout, run.stderr]).filter(Boolean),
4761
+ critic ? `Critic raw: ${critic.raw}` : "",
4762
+ ]
4763
+ .filter(Boolean)
4764
+ .join("\n"),
4765
+ outputPolicyForRuntime(runtimeConfig),
4766
+ ),
4767
+ exitCode: typeof result.exitCode === "number" ? result.exitCode : 0,
4768
+ };
4769
+ }
4335
4770
 
4336
4771
  if (!deterministicRequiresRevision && !criticRequiresRevision) {
4772
+ if (quality.requiredValidationFailures.length > 0) {
4773
+ const requiredSummary = `Required vision.md validation blocked publishing: ${quality.requiredValidationFailures.join("; ")}`;
4774
+ const diagnostics = truncate(
4775
+ [
4776
+ result.stderr ?? "",
4777
+ validationOutsideTaskScope
4778
+ ? "Validation failures appear outside the task write scope and are treated as pre-existing repo blockers."
4779
+ : "",
4780
+ ...quality.validationRuns.flatMap((run) => [run.stdout, run.stderr]).filter(Boolean),
4781
+ ]
4782
+ .filter(Boolean)
4783
+ .join("\n"),
4784
+ outputPolicyForRuntime(runtimeConfig),
4785
+ );
4786
+ onLog?.("stderr", `[QualityGate] ${requiredSummary}`);
4787
+ return {
4788
+ ok: false,
4789
+ summary: requiredSummary,
4790
+ stdout: result.stdout,
4791
+ stderr: diagnostics,
4792
+ exitCode: 4,
4793
+ };
4794
+ }
4337
4795
  if (critic) {
4338
4796
  onLog?.(
4339
4797
  "stdout",
4340
- `[QualityGate] Critic review score ${critic.score.toFixed(1)}/10 (threshold ${qualityCriticMinScore}).`,
4798
+ `[CriticGate] review score ${critic.score.toFixed(1)}/10 (threshold ${qualityCriticMinScore}).`,
4341
4799
  );
4342
4800
  }
4343
4801
  return result;
4344
4802
  }
4345
4803
 
4804
+ const blockerIssue = quality.blocker
4805
+ ? [
4806
+ `Validation blocker (${quality.blocker.category}): ${toSingleLine(
4807
+ quality.blocker.detail,
4808
+ 240,
4809
+ )}`,
4810
+ ]
4811
+ : [];
4346
4812
  const issues = buildQualityGateRevisionIssues(
4347
- effectiveQualityIssues,
4813
+ [...effectiveQualityIssues, ...blockerIssue],
4348
4814
  critic,
4349
4815
  qualityCriticMinScore,
4350
4816
  );
4817
+ const activeMaxAutoRevisions = revisionLimitForQualityGateFailures({
4818
+ policy: qualityGatePolicy,
4819
+ qualityIssues: effectiveQualityIssues,
4820
+ requiredValidationFailures: validationOutsideTaskScope
4821
+ ? []
4822
+ : quality.requiredValidationFailures,
4823
+ blocker: validationOutsideTaskScope ? null : quality.blocker,
4824
+ });
4351
4825
  const issueSummary = issues.map((entry) => toSingleLine(entry, 180)).join(" | ");
4352
- if (quality.blocker) {
4826
+ if (quality.blocker && !validationOutsideTaskScope) {
4353
4827
  const blockerSummary = `Quality gate blocked by ${quality.blocker.category} issue: ${quality.blocker.detail}`;
4354
4828
  const blockerDiagnostics = truncate(
4355
4829
  [
@@ -4358,7 +4832,23 @@ export async function executeJob(
4358
4832
  ].join("\n"),
4359
4833
  outputPolicyForRuntime(runtimeConfig),
4360
4834
  );
4361
- if (quality.requiredValidationFailures.length > 0) {
4835
+ const requiredValidationCanRevise = shouldReviseRequiredValidationBlocker({
4836
+ requiredValidationFailures: quality.requiredValidationFailures,
4837
+ blocker: quality.blocker,
4838
+ revisionAttempt,
4839
+ maxAutoRevisions: qualityValidationMaxAutoRevisions,
4840
+ outsideTaskScope: validationOutsideTaskScope,
4841
+ });
4842
+ if (requiredValidationCanRevise) {
4843
+ onLog?.(
4844
+ "stderr",
4845
+ `[QualityGate] Required vision.md validation hit a repo blocker; requesting revision ${
4846
+ revisionAttempt + 1
4847
+ }/${qualityValidationMaxAutoRevisions} instead of failing immediately: ${quality.requiredValidationFailures.join(
4848
+ "; ",
4849
+ )}`,
4850
+ );
4851
+ } else if (quality.requiredValidationFailures.length > 0) {
4362
4852
  const requiredSummary = `Required vision.md validation blocked publishing: ${quality.requiredValidationFailures.join("; ")}`;
4363
4853
  onLog?.("stderr", `[QualityGate] ${requiredSummary}`);
4364
4854
  return {
@@ -4368,8 +4858,7 @@ export async function executeJob(
4368
4858
  stderr: blockerDiagnostics,
4369
4859
  exitCode: 4,
4370
4860
  };
4371
- }
4372
- if (shouldSoftPassValidationBlocker(qualityGatePolicy, quality.blocker)) {
4861
+ } else if (shouldSoftPassValidationBlocker(qualityGatePolicy, quality.blocker)) {
4373
4862
  onLog?.(
4374
4863
  "stderr",
4375
4864
  `[QualityGate] Soft-pass on ${quality.blocker.category} blocker for publishable ${qualityGatePolicy.mode} job: ${toSingleLine(
@@ -4385,17 +4874,18 @@ export async function executeJob(
4385
4874
  stderr: blockerDiagnostics,
4386
4875
  exitCode: typeof result.exitCode === "number" ? result.exitCode : 0,
4387
4876
  };
4877
+ } else {
4878
+ onLog?.("stderr", `[QualityGate] ${blockerSummary}`);
4879
+ return {
4880
+ ok: false,
4881
+ summary: blockerSummary,
4882
+ stdout: result.stdout,
4883
+ stderr: blockerDiagnostics,
4884
+ exitCode: 4,
4885
+ };
4388
4886
  }
4389
- onLog?.("stderr", `[QualityGate] ${blockerSummary}`);
4390
- return {
4391
- ok: false,
4392
- summary: blockerSummary,
4393
- stdout: result.stdout,
4394
- stderr: blockerDiagnostics,
4395
- exitCode: 4,
4396
- };
4397
4887
  }
4398
- if (revisionAttempt >= qualityMaxAutoRevisions) {
4888
+ if (revisionAttempt >= activeMaxAutoRevisions) {
4399
4889
  if (quality.requiredValidationFailures.length > 0) {
4400
4890
  const diagnostics = truncate(
4401
4891
  [
@@ -4456,10 +4946,17 @@ export async function executeJob(
4456
4946
  }
4457
4947
 
4458
4948
  revisionAttempt += 1;
4459
- revisionHint = buildQualityRevisionHint(issues, critic, planning, reviewFixContext);
4949
+ revisionHint = buildQualityRevisionHint(
4950
+ issues,
4951
+ critic,
4952
+ planning,
4953
+ reviewFixContext,
4954
+ validationOutsideTaskScope ? [] : quality.validationRuns,
4955
+ validationOutsideTaskScope ? null : quality.blocker,
4956
+ );
4460
4957
  onLog?.(
4461
4958
  "stderr",
4462
- `[QualityGate] Quality gate requested revision ${revisionAttempt}/${qualityMaxAutoRevisions}: ${toSingleLine(
4959
+ `[QualityGate] Quality gate requested revision ${revisionAttempt}/${activeMaxAutoRevisions}: ${toSingleLine(
4463
4960
  issueSummary,
4464
4961
  260,
4465
4962
  )}`,