@pushpalsdev/cli 1.0.83 → 1.0.85

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,8 +3,7 @@
3
3
  * Used by both the host Worker (direct mode) and the Docker job runner.
4
4
  */
5
5
 
6
- import { existsSync, mkdirSync, readFileSync, rmSync, unlinkSync } from "fs";
7
- import { tmpdir } from "os";
6
+ import { existsSync, readFileSync, rmSync, unlinkSync } from "fs";
8
7
  import { resolve } from "path";
9
8
  import {
10
9
  deriveAutonomyComponentArea,
@@ -32,6 +31,7 @@ import {
32
31
  truncate,
33
32
  type OutputCompactionPolicy,
34
33
  } from "./common/execution_utils.js";
34
+ import { buildWorkerSandboxWritableEnv } from "./common/sandbox_env.js";
35
35
  // Re-export shared utilities for backward compatibility with external consumers.
36
36
  export { compactJobOutput, truncate, streamLines } from "./common/execution_utils.js";
37
37
  export { extractClarificationQuestionFromOutput } from "./backends/openhands_task_execute.js";
@@ -75,7 +75,7 @@ interface ValidationExecutionResult {
75
75
  elapsedMs: number;
76
76
  }
77
77
 
78
- interface ValidationBlocker {
78
+ export interface ValidationBlocker {
79
79
  category: "repo" | "environment";
80
80
  detail: string;
81
81
  }
@@ -84,6 +84,8 @@ interface DeterministicQualityResult {
84
84
  ok: boolean;
85
85
  skipped: boolean;
86
86
  issues: string[];
87
+ scopeIssues: string[];
88
+ validationIssues: string[];
87
89
  changedPaths: string[];
88
90
  changedTestPaths: string[];
89
91
  validationRuns: ValidationExecutionResult[];
@@ -112,6 +114,11 @@ export interface ReviewFixContext {
112
114
  export interface QualityGatePolicy {
113
115
  mode: "default" | "review_fix" | "merge_conflict";
114
116
  maxAutoRevisions: number;
117
+ validationMaxAutoRevisions: number;
118
+ scopeGateEnabled: boolean;
119
+ validationGateEnabled: boolean;
120
+ criticGateEnabled: boolean;
121
+ publishGateEnabled: boolean;
115
122
  softPassOnExhausted: boolean;
116
123
  criticMinScore: number;
117
124
  }
@@ -125,6 +132,33 @@ function shouldSoftPassValidationBlocker(
125
132
  return policy.mode === "review_fix" || policy.mode === "merge_conflict";
126
133
  }
127
134
 
135
+ export function shouldReviseRequiredValidationBlocker(opts: {
136
+ requiredValidationFailures: string[];
137
+ blocker: ValidationBlocker | null;
138
+ revisionAttempt: number;
139
+ maxAutoRevisions: number;
140
+ }): boolean {
141
+ if (opts.requiredValidationFailures.length === 0) return false;
142
+ if (!opts.blocker) return false;
143
+ if (opts.blocker.category !== "repo") return false;
144
+ return opts.revisionAttempt < opts.maxAutoRevisions;
145
+ }
146
+
147
+ export function revisionLimitForQualityGateFailures(opts: {
148
+ policy: Pick<QualityGatePolicy, "maxAutoRevisions" | "validationMaxAutoRevisions">;
149
+ qualityIssues: string[];
150
+ requiredValidationFailures: string[];
151
+ blocker: ValidationBlocker | null;
152
+ }): number {
153
+ const hasValidationGateFailure =
154
+ opts.requiredValidationFailures.length > 0 ||
155
+ opts.blocker !== null ||
156
+ opts.qualityIssues.some((issue) => issue.startsWith("ValidationGate:"));
157
+ return hasValidationGateFailure
158
+ ? opts.policy.validationMaxAutoRevisions
159
+ : opts.policy.maxAutoRevisions;
160
+ }
161
+
128
162
  // ─── Utilities ───────────────────────────────────────────────────────────────
129
163
 
130
164
  export function shouldCommit(
@@ -228,6 +262,13 @@ export function buildQualityGateRevisionIssues(
228
262
  const TEST_ASSERTION_BALANCE_ISSUE =
229
263
  "Changed test files do not show both positive and negative assertion coverage (expected both).";
230
264
 
265
+ function isAssertionBalanceIssue(issue: string): boolean {
266
+ return (
267
+ issue === TEST_ASSERTION_BALANCE_ISSUE ||
268
+ issue.includes("positive and negative assertion coverage")
269
+ );
270
+ }
271
+
231
272
  export function relaxAdvisoryQualityIssues(
232
273
  qualityIssues: string[],
233
274
  validationRuns: Array<{ ok: boolean }>,
@@ -245,7 +286,7 @@ export function relaxAdvisoryQualityIssues(
245
286
  return normalizedQualityIssues;
246
287
  }
247
288
 
248
- const relaxed = normalizedQualityIssues.filter((issue) => issue !== TEST_ASSERTION_BALANCE_ISSUE);
289
+ const relaxed = normalizedQualityIssues.filter((issue) => !isAssertionBalanceIssue(issue));
249
290
  return relaxed;
250
291
  }
251
292
 
@@ -362,13 +403,40 @@ export function deriveQualityGatePolicy(
362
403
  10,
363
404
  Number.isFinite(Number(runtimeConfig.workerpals.qualityMaxAutoRevisions))
364
405
  ? Math.floor(Number(runtimeConfig.workerpals.qualityMaxAutoRevisions))
365
- : 4,
406
+ : 3,
407
+ ),
408
+ );
409
+ const baseValidationMaxAutoRevisions = Math.max(
410
+ 0,
411
+ Math.min(
412
+ 10,
413
+ Number.isFinite(Number(runtimeConfig.workerpals.qualityValidationMaxAutoRevisions))
414
+ ? Math.floor(Number(runtimeConfig.workerpals.qualityValidationMaxAutoRevisions))
415
+ : 3,
366
416
  ),
367
417
  );
368
418
  const baseSoftPassOnExhausted =
369
419
  typeof runtimeConfig.workerpals.qualitySoftPassOnExhausted === "boolean"
370
420
  ? runtimeConfig.workerpals.qualitySoftPassOnExhausted
371
421
  : true;
422
+ const gateSwitches = {
423
+ scopeGateEnabled:
424
+ typeof runtimeConfig.workerpals.qualityScopeGateEnabled === "boolean"
425
+ ? runtimeConfig.workerpals.qualityScopeGateEnabled
426
+ : true,
427
+ validationGateEnabled:
428
+ typeof runtimeConfig.workerpals.qualityValidationGateEnabled === "boolean"
429
+ ? runtimeConfig.workerpals.qualityValidationGateEnabled
430
+ : true,
431
+ criticGateEnabled:
432
+ typeof runtimeConfig.workerpals.qualityCriticGateEnabled === "boolean"
433
+ ? runtimeConfig.workerpals.qualityCriticGateEnabled
434
+ : true,
435
+ publishGateEnabled:
436
+ typeof runtimeConfig.workerpals.qualityPublishGateEnabled === "boolean"
437
+ ? runtimeConfig.workerpals.qualityPublishGateEnabled
438
+ : true,
439
+ };
372
440
  const baseCriticMinScore = (() => {
373
441
  const value = Number(runtimeConfig.workerpals.qualityCriticMinScore);
374
442
  if (!Number.isFinite(value)) return 8;
@@ -379,19 +447,23 @@ export function deriveQualityGatePolicy(
379
447
  const mergeConflict = extractMergeConflictReviewContext(params);
380
448
  if (mergeConflict) {
381
449
  return {
382
- mode: "merge_conflict",
383
- maxAutoRevisions: baseMaxAutoRevisions,
384
- softPassOnExhausted: baseSoftPassOnExhausted,
385
- criticMinScore: baseCriticMinScore,
386
- };
387
- }
388
- return {
389
- mode: "default",
450
+ mode: "merge_conflict",
390
451
  maxAutoRevisions: baseMaxAutoRevisions,
452
+ validationMaxAutoRevisions: baseValidationMaxAutoRevisions,
453
+ ...gateSwitches,
391
454
  softPassOnExhausted: baseSoftPassOnExhausted,
392
455
  criticMinScore: baseCriticMinScore,
393
456
  };
394
457
  }
458
+ return {
459
+ mode: "default",
460
+ maxAutoRevisions: baseMaxAutoRevisions,
461
+ validationMaxAutoRevisions: baseValidationMaxAutoRevisions,
462
+ ...gateSwitches,
463
+ softPassOnExhausted: baseSoftPassOnExhausted,
464
+ criticMinScore: baseCriticMinScore,
465
+ };
466
+ }
395
467
  const tightenedCriticMinScore =
396
468
  reviewFix.reviewThreshold != null
397
469
  ? Math.max(baseCriticMinScore, Math.max(0, Math.min(10, reviewFix.reviewThreshold - 0.2)))
@@ -399,6 +471,8 @@ export function deriveQualityGatePolicy(
399
471
  return {
400
472
  mode: "review_fix",
401
473
  maxAutoRevisions: Math.max(baseMaxAutoRevisions, 2),
474
+ validationMaxAutoRevisions: baseValidationMaxAutoRevisions,
475
+ ...gateSwitches,
402
476
  softPassOnExhausted: baseSoftPassOnExhausted,
403
477
  criticMinScore: tightenedCriticMinScore,
404
478
  };
@@ -557,7 +631,7 @@ async function runValidationCommand(
557
631
  const startedAt = Date.now();
558
632
  const proc = Bun.spawn(argv, {
559
633
  cwd: repo,
560
- env: buildValidationCommandEnv(repo),
634
+ env: buildWorkerSandboxWritableEnv(repo),
561
635
  stdout: "pipe",
562
636
  stderr: "pipe",
563
637
  });
@@ -587,41 +661,39 @@ async function runValidationCommand(
587
661
  ok: !timedOut && exitCode === 0,
588
662
  exitCode: timedOut ? 124 : exitCode,
589
663
  stdout: compactJobOutput(stdout.trim(), outputPolicy),
590
- stderr: compactJobOutput(stderr.trim(), outputPolicy),
664
+ stderr: compactJobOutput(
665
+ [
666
+ stderr.trim(),
667
+ timedOut
668
+ ? `Validation command timed out after ${Math.max(1_000, timeoutMs)}ms. Captured output is the process output emitted before PushPals terminated the command.`
669
+ : "",
670
+ ]
671
+ .filter(Boolean)
672
+ .join("\n"),
673
+ outputPolicy,
674
+ ),
591
675
  elapsedMs: Math.max(1, Date.now() - startedAt),
592
676
  };
593
677
  }
594
678
 
595
- function buildValidationCommandEnv(repo: string): Record<string, string> {
596
- const homeDir = resolve(tmpdir(), "pushpals-validation-home");
597
- const cacheDir = resolve(tmpdir(), "pushpals-validation-cache");
598
- const expoDir = resolve(tmpdir(), "pushpals-validation-expo");
599
- for (const dir of [homeDir, cacheDir, expoDir]) {
600
- try {
601
- mkdirSync(dir, { recursive: true });
602
- } catch {
603
- // Keep validation best-effort; the command output will expose any real env blocker.
604
- }
605
- }
606
- const env: Record<string, string> = {};
607
- for (const [key, value] of Object.entries(process.env)) {
608
- if (typeof value === "string") env[key] = value;
609
- }
610
- return {
611
- ...env,
612
- HOME: homeDir,
613
- USERPROFILE: homeDir,
614
- XDG_CACHE_HOME: cacheDir,
615
- npm_config_cache: resolve(cacheDir, "npm"),
616
- EXPO_HOME: expoDir,
617
- EXPO_NO_TELEMETRY: process.env.EXPO_NO_TELEMETRY ?? "1",
618
- EXPO_NO_INTERACTIVE: process.env.EXPO_NO_INTERACTIVE ?? "1",
619
- CI: process.env.CI ?? "1",
620
- BROWSER: process.env.BROWSER ?? "none",
621
- EXPO_DEV_SERVER_PORT: process.env.EXPO_DEV_SERVER_PORT ?? "19006",
622
- RCT_METRO_PORT: process.env.RCT_METRO_PORT ?? "19006",
623
- PUSHPALS_VALIDATION_REPO: repo,
624
- };
679
+ export function isLongRunningBrowserValidationCommand(command: string): boolean {
680
+ const normalized = validationCommandKey(command);
681
+ if (!normalized) return false;
682
+ const tokens = tokenizeValidationCommandArgv(command)?.map((token) => token.toLowerCase()) ?? [];
683
+ const joined = tokens.join(" ");
684
+ return (
685
+ /\b(web:e2e|e2e:web|browser:e2e|smoke:web|web:smoke|browser:smoke)\b/.test(normalized) ||
686
+ /\b(playwright|cypress)\b/.test(joined) ||
687
+ (/\bexpo\b/.test(joined) && /\b(web|start)\b/.test(joined))
688
+ );
689
+ }
690
+
691
+ export function resolveValidationCommandTimeoutMs(command: string, baseTimeoutMs: number): number {
692
+ const normalizedBase = Number.isFinite(Number(baseTimeoutMs))
693
+ ? Math.max(1_000, Math.min(7_200_000, Math.floor(Number(baseTimeoutMs))))
694
+ : 180_000;
695
+ if (!isLongRunningBrowserValidationCommand(command)) return normalizedBase;
696
+ return Math.max(normalizedBase, 600_000);
625
697
  }
626
698
 
627
699
  interface ToolAvailabilityResult {
@@ -1143,6 +1215,7 @@ async function runDeterministicQualityGate(
1143
1215
  repo: string,
1144
1216
  params: Record<string, unknown>,
1145
1217
  runtimeConfig: WorkerpalsRuntimeConfig,
1218
+ qualityGatePolicy: QualityGatePolicy,
1146
1219
  onLog?: (stream: "stdout" | "stderr", line: string) => void,
1147
1220
  ): Promise<DeterministicQualityResult> {
1148
1221
  const instruction = String(params.instruction ?? "");
@@ -1154,11 +1227,19 @@ async function runDeterministicQualityGate(
1154
1227
  }
1155
1228
  const isTestTask = isTestFocusedTask(instruction, planning, targetPath);
1156
1229
  const hasRequiredValidationCriteria = requiredValidationSteps.length > 0;
1157
- if (!isTestTask && !hasRequiredValidationCriteria) {
1230
+ if (
1231
+ !qualityGatePolicy.scopeGateEnabled &&
1232
+ !qualityGatePolicy.validationGateEnabled &&
1233
+ !qualityGatePolicy.criticGateEnabled &&
1234
+ !isTestTask &&
1235
+ !hasRequiredValidationCriteria
1236
+ ) {
1158
1237
  return {
1159
1238
  ok: true,
1160
1239
  skipped: true,
1161
1240
  issues: [],
1241
+ scopeIssues: [],
1242
+ validationIssues: [],
1162
1243
  changedPaths: [],
1163
1244
  changedTestPaths: [],
1164
1245
  validationRuns: [],
@@ -1176,15 +1257,47 @@ async function runDeterministicQualityGate(
1176
1257
  ),
1177
1258
  );
1178
1259
  const issues: string[] = [];
1179
- if (changedTestPaths.length === 0) {
1180
- issues.push("No relevant test file was modified for this test-focused task.");
1260
+ const scopeIssues: string[] = [];
1261
+ const validationIssues: string[] = [];
1262
+ const addScopeIssue = (issue: string): void => {
1263
+ scopeIssues.push(issue);
1264
+ issues.push(`ScopeGate: ${issue}`);
1265
+ };
1266
+ const addValidationIssue = (issue: string): void => {
1267
+ validationIssues.push(issue);
1268
+ issues.push(`ValidationGate: ${issue}`);
1269
+ };
1270
+
1271
+ if (qualityGatePolicy.scopeGateEnabled) {
1272
+ if (!statusResult.ok) {
1273
+ addScopeIssue("could not evaluate changed paths from git status.");
1274
+ }
1275
+ for (const issue of collectWriteScopeIssuesFromChangedPaths(changedPaths, planning)) {
1276
+ addScopeIssue(issue);
1277
+ }
1278
+ if (isTestTask && changedTestPaths.length === 0) {
1279
+ addScopeIssue("found no relevant test file modified for this test-focused task.");
1280
+ }
1281
+ if (
1282
+ isTestTask &&
1283
+ changedTestPaths.length > 0 &&
1284
+ !hasBalancedPositiveNegativeAssertions(changedTestPaths, repo)
1285
+ ) {
1286
+ addScopeIssue(
1287
+ "found changed test files without both positive and negative assertion coverage (expected both).",
1288
+ );
1289
+ }
1290
+ for (const issue of scopeIssues) {
1291
+ onLog?.("stderr", `[ScopeGate] ${issue}`);
1292
+ }
1293
+ } else {
1294
+ onLog?.("stdout", "[ScopeGate] Disabled by workerpals.quality_scope_gate_enabled=false.");
1181
1295
  }
1182
- if (
1183
- changedTestPaths.length > 0 &&
1184
- !hasBalancedPositiveNegativeAssertions(changedTestPaths, repo)
1185
- ) {
1186
- issues.push(
1187
- "Changed test files do not show both positive and negative assertion coverage (expected both).",
1296
+
1297
+ if (!qualityGatePolicy.validationGateEnabled) {
1298
+ onLog?.(
1299
+ "stdout",
1300
+ "[ValidationGate] Disabled by workerpals.quality_validation_gate_enabled=false.",
1188
1301
  );
1189
1302
  }
1190
1303
 
@@ -1207,28 +1320,30 @@ async function runDeterministicQualityGate(
1207
1320
  if (!Number.isFinite(value)) return 180_000;
1208
1321
  return Math.max(1_000, Math.min(7_200_000, Math.floor(value)));
1209
1322
  })();
1323
+ let requiredValidationFailures: string[] = [];
1324
+ if (qualityGatePolicy.validationGateEnabled) {
1210
1325
  if (hasRequiredValidationCriteria && requiredRunnableSteps.length === 0) {
1211
- issues.push(
1212
- "vision.md testing criteria were provided, but none contained a runnable validation command.",
1326
+ addValidationIssue(
1327
+ "found vision.md testing criteria, but none contained a runnable validation command.",
1213
1328
  );
1214
1329
  }
1215
1330
  if (commandsToRun.length === 0) {
1216
- issues.push(
1331
+ addValidationIssue(
1217
1332
  hasRequiredValidationCriteria
1218
- ? "No runnable validation command was available from vision.md testing criteria or planning.validationSteps."
1219
- : "No runnable validation command was provided in planning.validationSteps (expected at least one test command).",
1333
+ ? "found no runnable validation command from vision.md testing criteria or planning.validationSteps."
1334
+ : "found no runnable validation command in planning.validationSteps (expected at least one test command).",
1220
1335
  );
1221
1336
  } else {
1222
1337
  if (requiredRunnableSteps.length > 0) {
1223
1338
  onLog?.(
1224
1339
  "stdout",
1225
- `[QualityGate] Running required vision.md testing criteria: ${requiredRunnableSteps.join(" | ")}`,
1340
+ `[ValidationGate] Running required vision.md testing criteria: ${requiredRunnableSteps.join(" | ")}`,
1226
1341
  );
1227
1342
  }
1228
1343
  if (isTestTask && plannerRunnableSteps.length === 0 && fallbackValidationSteps.length > 0) {
1229
1344
  onLog?.(
1230
1345
  "stdout",
1231
- `[QualityGate] No runnable planning.validationSteps found; using fallback validation command(s): ${commandsToRun.join(" | ")}`,
1346
+ `[ValidationGate] No runnable planning.validationSteps found; using fallback validation command(s): ${commandsToRun.join(" | ")}`,
1232
1347
  );
1233
1348
  }
1234
1349
  const toolchainPlan = buildToolchainPlan({
@@ -1238,7 +1353,7 @@ async function runDeterministicQualityGate(
1238
1353
  if (toolchainPlan.requirements.length > 0) {
1239
1354
  onLog?.(
1240
1355
  "stdout",
1241
- `[QualityGate] Toolchain preflight: source=${toolchainPlan.environmentSource}, required=${toolchainPlan.requirements
1356
+ `[ValidationGate] Toolchain preflight: source=${toolchainPlan.environmentSource}, required=${toolchainPlan.requirements
1242
1357
  .map((requirement) => requirement.tool)
1243
1358
  .join(", ")}`,
1244
1359
  );
@@ -1250,7 +1365,7 @@ async function runDeterministicQualityGate(
1250
1365
  if (missingToolRequirements.length > 0) {
1251
1366
  onLog?.(
1252
1367
  "stderr",
1253
- `[QualityGate] Toolchain preflight blocked dependent validation command(s): ${formatMissingToolRequirements(
1368
+ `[ValidationGate] Toolchain preflight blocked dependent validation command(s): ${formatMissingToolRequirements(
1254
1369
  missingToolRequirements,
1255
1370
  )}`,
1256
1371
  );
@@ -1275,19 +1390,19 @@ async function runDeterministicQualityGate(
1275
1390
  });
1276
1391
  onLog?.(
1277
1392
  "stderr",
1278
- `[QualityGate] Quality gate validation skipped (missing toolchain): ${command}`,
1393
+ `[ValidationGate] Validation skipped (missing toolchain): ${command}`,
1279
1394
  );
1280
1395
  continue;
1281
1396
  }
1282
- onLog?.("stdout", `[QualityGate] Quality gate validation: running "${command}"`);
1397
+ onLog?.("stdout", `[ValidationGate] Running "${command}"`);
1283
1398
  const run = await runValidationCommand(
1284
1399
  repo,
1285
1400
  command,
1286
- qualityValidationStepTimeoutMs,
1401
+ resolveValidationCommandTimeoutMs(command, qualityValidationStepTimeoutMs),
1287
1402
  outputPolicy,
1288
1403
  );
1289
1404
  validationRuns.push(run);
1290
- const runSummary = `[QualityGate] Quality gate validation ${run.ok ? "passed" : "failed"} (${run.elapsedMs}ms, exit ${run.exitCode}): ${command}`;
1405
+ const runSummary = `[ValidationGate] ${run.ok ? "Passed" : "Failed"} (${run.elapsedMs}ms, exit ${run.exitCode}): ${command}`;
1291
1406
  onLog?.(run.ok ? "stdout" : "stderr", runSummary);
1292
1407
  }
1293
1408
  // exit 127 = command not found: separate tool-availability issues from real test failures.
@@ -1297,38 +1412,43 @@ async function runDeterministicQualityGate(
1297
1412
  const cmds = notFoundRuns.map((run) => run.command).join(", ");
1298
1413
  onLog?.(
1299
1414
  "stderr",
1300
- `[QualityGate] Some validation commands not found (exit 127 wrong tool?): ${cmds}. This project uses Bun: prefer "bun test".`,
1415
+ `[ValidationGate] Some validation commands not found (exit 127 - wrong tool?): ${cmds}. This project uses Bun: prefer "bun test".`,
1301
1416
  );
1302
1417
  }
1303
1418
  if (executedRuns.length > 0 && executedRuns.every((run) => !run.ok)) {
1304
- issues.push("Validation commands were executed but none passed.");
1419
+ addValidationIssue("executed validation commands, but none passed.");
1305
1420
  } else if (executedRuns.length === 0 && notFoundRuns.length > 0) {
1306
- issues.push(
1307
- 'No validation command could be run (command not found). Use "bun test" or another available test runner.',
1421
+ addValidationIssue(
1422
+ 'could not run any validation command (command not found). Use "bun test" or another available test runner.',
1308
1423
  );
1309
1424
  }
1310
1425
  if (
1311
1426
  isTestTask &&
1312
1427
  !validationRuns.some((run) => /\b(test|pytest|coverage|vitest|jest)\b/i.test(run.command))
1313
1428
  ) {
1314
- issues.push("Validation steps did not execute a recognizable test command.");
1429
+ addValidationIssue("did not execute a recognizable test command.");
1315
1430
  }
1316
1431
  }
1317
- const requiredValidationFailures = collectRequiredValidationFailures(
1432
+ requiredValidationFailures = collectRequiredValidationFailures(
1318
1433
  requiredRunnableSteps,
1319
1434
  validationRuns,
1320
1435
  );
1321
1436
  if (requiredValidationFailures.length > 0) {
1322
- issues.push(
1437
+ addValidationIssue(
1323
1438
  `Required vision.md validation failed: ${requiredValidationFailures.join("; ")}`,
1324
1439
  );
1325
1440
  }
1326
- const blocker = detectValidationBlocker(validationRuns);
1441
+ }
1442
+ const blocker = qualityGatePolicy.validationGateEnabled
1443
+ ? detectValidationBlocker(validationRuns)
1444
+ : null;
1327
1445
 
1328
1446
  return {
1329
1447
  ok: issues.length === 0 && blocker === null,
1330
1448
  skipped: false,
1331
1449
  issues,
1450
+ scopeIssues,
1451
+ validationIssues,
1332
1452
  changedPaths,
1333
1453
  changedTestPaths,
1334
1454
  validationRuns,
@@ -1456,7 +1576,7 @@ async function runTaskCriticReview(
1456
1576
  if (lowered.includes("response_format")) {
1457
1577
  onLog?.(
1458
1578
  "stdout",
1459
- "[QualityGate] Critic fallback: response_format json_object unsupported; retrying without strict response_format.",
1579
+ "[CriticGate] fallback: response_format json_object unsupported; retrying without strict response_format.",
1460
1580
  );
1461
1581
  request = await runCriticRequest(null);
1462
1582
  }
@@ -1464,7 +1584,7 @@ async function runTaskCriticReview(
1464
1584
  if (!request.response.ok) {
1465
1585
  onLog?.(
1466
1586
  "stderr",
1467
- `[QualityGate] Critic review request failed (${request.response.status}): ${toSingleLine(request.text, 240)}`,
1587
+ `[CriticGate] review request failed (${request.response.status}): ${toSingleLine(request.text, 240)}`,
1468
1588
  );
1469
1589
  return null;
1470
1590
  }
@@ -1480,7 +1600,7 @@ async function runTaskCriticReview(
1480
1600
  if (!reviewObj) {
1481
1601
  onLog?.(
1482
1602
  "stderr",
1483
- `[QualityGate] Critic produced non-JSON content; skipping critic gate. Raw: ${toSingleLine(
1603
+ `[CriticGate] produced non-JSON content; skipping critic gate. Raw: ${toSingleLine(
1484
1604
  content,
1485
1605
  220,
1486
1606
  )}`,
@@ -1509,7 +1629,7 @@ async function runTaskCriticReview(
1509
1629
  } catch (err) {
1510
1630
  onLog?.(
1511
1631
  "stderr",
1512
- `[QualityGate] Critic review unavailable: ${toSingleLine(err, 220)} (continuing without critic gate).`,
1632
+ `[CriticGate] review unavailable: ${toSingleLine(err, 220)} (continuing without critic gate).`,
1513
1633
  );
1514
1634
  return null;
1515
1635
  }
@@ -1520,6 +1640,8 @@ export function buildQualityRevisionHint(
1520
1640
  critic: CriticReview | null,
1521
1641
  planning: TaskExecutePlanning,
1522
1642
  reviewFixContext?: ReviewFixContext | null,
1643
+ validationRuns: ValidationExecutionResult[] = [],
1644
+ validationBlocker: ValidationBlocker | null = null,
1523
1645
  ): string {
1524
1646
  const lines: string[] = [];
1525
1647
  lines.push("Quality revision required before completion.");
@@ -1552,6 +1674,26 @@ export function buildQualityRevisionHint(
1552
1674
  lines.push("Deterministic quality issues:");
1553
1675
  for (const issue of issues) lines.push(`- ${issue}`);
1554
1676
  }
1677
+ if (validationBlocker) {
1678
+ lines.push(
1679
+ `Validation blocker: ${validationBlocker.category} - ${toSingleLine(
1680
+ validationBlocker.detail,
1681
+ 300,
1682
+ )}`,
1683
+ );
1684
+ }
1685
+ const failedValidationRuns = validationRuns.filter((run) => !run.ok);
1686
+ if (failedValidationRuns.length > 0) {
1687
+ lines.push("Validation failure diagnostics:");
1688
+ for (const run of failedValidationRuns.slice(0, 5)) {
1689
+ lines.push(`- ${run.command} failed with exit ${run.exitCode} after ${run.elapsedMs}ms.`);
1690
+ const output = toSingleLine(
1691
+ stripAnsiControlSequences([run.stderr, run.stdout].filter(Boolean).join("\n")),
1692
+ 700,
1693
+ );
1694
+ if (output) lines.push(` Output: ${output}`);
1695
+ }
1696
+ }
1555
1697
  if (critic) {
1556
1698
  lines.push(`Critic score: ${critic.score.toFixed(1)} / 10`);
1557
1699
  if (critic.mustFix.length > 0) {
@@ -3355,6 +3497,7 @@ async function generateCommitMessageFromDiffViaCodex(
3355
3497
  const stdinText = `${prompt.systemPrompt}\n\n${prompt.userMessage}`;
3356
3498
  const proc = Bun.spawn(cmd, {
3357
3499
  cwd: repo,
3500
+ env: buildWorkerSandboxWritableEnv(repo),
3358
3501
  stdout: "pipe",
3359
3502
  stderr: "pipe",
3360
3503
  stdin: new Blob([stdinText]),
@@ -3587,40 +3730,33 @@ function taskExecuteOrigin(params: Record<string, unknown>): "autonomy" | "user"
3587
3730
  return "user";
3588
3731
  }
3589
3732
 
3590
- async function collectWriteScopeWarnings(
3591
- repo: string,
3733
+ function collectWriteScopeIssuesFromChangedPaths(
3734
+ changedPaths: string[],
3592
3735
  planning: TaskExecutePlanning,
3593
- ): Promise<{ warnings: string[] }> {
3736
+ ): string[] {
3594
3737
  const writeGlobs = toStringArray(planning.scope.writeGlobs ?? []);
3595
- if (writeGlobs.length === 0) return { warnings: [] };
3596
-
3597
- const statusResult = await git(repo, ["status", "--porcelain"]);
3598
- if (!statusResult.ok) {
3599
- return { warnings: ["Unable to evaluate changed paths for scope suggestion check."] };
3600
- }
3738
+ if (writeGlobs.length === 0) return [];
3601
3739
 
3602
- const changedPaths = parseChangedPathsFromStatus(statusResult.stdout)
3740
+ const normalizedChangedPaths = changedPaths
3603
3741
  .map((entry) => normalizeStagePath(entry))
3604
3742
  .filter((entry): entry is string => Boolean(entry) && entry !== ".");
3605
- if (changedPaths.length === 0) return { warnings: [] };
3743
+ if (normalizedChangedPaths.length === 0) return [];
3606
3744
 
3607
3745
  const forbidden = toStringArray(planning.scope.forbiddenGlobs ?? []);
3608
- const warnings: string[] = [];
3609
- const outOfScope = changedPaths.filter(
3746
+ const issues: string[] = [];
3747
+ const outOfScope = normalizedChangedPaths.filter(
3610
3748
  (path) => !writeGlobs.some((glob) => matchesGlob(path, glob)),
3611
3749
  );
3612
3750
  if (outOfScope.length > 0) {
3613
- warnings.push(`Scope suggestion: modified paths outside writeGlobs: ${outOfScope.join(", ")}`);
3751
+ issues.push(`modified paths outside writeGlobs: ${outOfScope.join(", ")}`);
3614
3752
  }
3615
- const forbiddenTouched = changedPaths.filter((path) =>
3753
+ const forbiddenTouched = normalizedChangedPaths.filter((path) =>
3616
3754
  forbidden.some((glob) => matchesGlob(path, glob)),
3617
3755
  );
3618
3756
  if (forbiddenTouched.length > 0) {
3619
- warnings.push(
3620
- `Scope suggestion: modified paths matching forbiddenGlobs: ${forbiddenTouched.join(", ")}`,
3621
- );
3757
+ issues.push(`modified paths matching forbiddenGlobs: ${forbiddenTouched.join(", ")}`);
3622
3758
  }
3623
- return { warnings };
3759
+ return issues;
3624
3760
  }
3625
3761
 
3626
3762
  function sanitizeTaskExecutePlanningPathHints(value: unknown): unknown {
@@ -3945,7 +4081,7 @@ async function runCodexCriticReview(
3945
4081
  if (!codexPrefix) {
3946
4082
  onLog?.(
3947
4083
  "stderr",
3948
- "[QualityGate] Codex critic: unable to resolve Codex CLI command (workerpals.llm.codex_bin/PATH); skipping.",
4084
+ "[CriticGate] Codex: unable to resolve Codex CLI command (workerpals.llm.codex_bin/PATH); skipping.",
3949
4085
  );
3950
4086
  return null;
3951
4087
  }
@@ -4026,6 +4162,7 @@ async function runCodexCriticReview(
4026
4162
  try {
4027
4163
  const proc = Bun.spawn(cmd, {
4028
4164
  cwd: repo,
4165
+ env: buildWorkerSandboxWritableEnv(repo),
4029
4166
  stdout: "pipe",
4030
4167
  stderr: "pipe",
4031
4168
  stdin: new Blob([criticInstruction]),
@@ -4045,14 +4182,14 @@ async function runCodexCriticReview(
4045
4182
  clearTimeout(timer);
4046
4183
 
4047
4184
  if (timedOut) {
4048
- onLog?.("stderr", "[QualityGate] Codex critic timed out; skipping.");
4185
+ onLog?.("stderr", "[CriticGate] Codex timed out; skipping.");
4049
4186
  return null;
4050
4187
  }
4051
4188
  if (exitCode !== 0) {
4052
4189
  const stderrText = await new Response(proc.stderr).text();
4053
4190
  onLog?.(
4054
4191
  "stderr",
4055
- `[QualityGate] Codex critic exited ${exitCode}: ${toSingleLine(stderrText, 220)}`,
4192
+ `[CriticGate] Codex exited ${exitCode}: ${toSingleLine(stderrText, 220)}`,
4056
4193
  );
4057
4194
  return null;
4058
4195
  }
@@ -4070,7 +4207,7 @@ async function runCodexCriticReview(
4070
4207
  }
4071
4208
 
4072
4209
  if (!lastMessage) {
4073
- onLog?.("stderr", "[QualityGate] Codex critic: no output message captured; skipping.");
4210
+ onLog?.("stderr", "[CriticGate] Codex: no output message captured; skipping.");
4074
4211
  return null;
4075
4212
  }
4076
4213
 
@@ -4078,7 +4215,7 @@ async function runCodexCriticReview(
4078
4215
  if (!reviewObj) {
4079
4216
  onLog?.(
4080
4217
  "stderr",
4081
- `[QualityGate] Codex critic returned non-JSON: ${toSingleLine(lastMessage, 220)}`,
4218
+ `[CriticGate] Codex returned non-JSON: ${toSingleLine(lastMessage, 220)}`,
4082
4219
  );
4083
4220
  return null;
4084
4221
  }
@@ -4094,7 +4231,7 @@ async function runCodexCriticReview(
4094
4231
  const revisionGuidance = String(reviewObj.revision_guidance ?? "")
4095
4232
  .trim()
4096
4233
  .slice(0, 2000);
4097
- onLog?.("stdout", `[QualityGate] Codex critic score: ${score}/10`);
4234
+ onLog?.("stdout", `[CriticGate] Codex score: ${score}/10`);
4098
4235
  return {
4099
4236
  score,
4100
4237
  findings,
@@ -4103,7 +4240,7 @@ async function runCodexCriticReview(
4103
4240
  raw: compactJobOutput(lastMessage, outputPolicyForRuntime(runtimeConfig)),
4104
4241
  };
4105
4242
  } catch (err) {
4106
- onLog?.("stderr", `[QualityGate] Codex critic error: ${toSingleLine(err, 220)} (skipping).`);
4243
+ onLog?.("stderr", `[CriticGate] Codex error: ${toSingleLine(err, 220)} (skipping).`);
4107
4244
  return null;
4108
4245
  }
4109
4246
  }
@@ -4189,12 +4326,25 @@ export async function executeJob(
4189
4326
  const reviewFixContext = extractReviewFixContext(normalizedParams);
4190
4327
  const qualityGatePolicy = deriveQualityGatePolicy(normalizedParams, runtimeConfig);
4191
4328
  const qualityMaxAutoRevisions = qualityGatePolicy.maxAutoRevisions;
4329
+ const qualityValidationMaxAutoRevisions = qualityGatePolicy.validationMaxAutoRevisions;
4330
+ const qualityRevisionLoopMax = Math.max(
4331
+ qualityMaxAutoRevisions,
4332
+ qualityValidationMaxAutoRevisions,
4333
+ );
4192
4334
  const qualitySoftPassOnExhausted = qualityGatePolicy.softPassOnExhausted;
4193
4335
  const qualityCriticMinScore = qualityGatePolicy.criticMinScore;
4194
4336
 
4195
4337
  onLog?.(
4196
4338
  "stdout",
4197
- `[QualityGate] Policy: max_auto_revisions=${qualityMaxAutoRevisions}, soft_pass_on_exhausted=${qualitySoftPassOnExhausted ? "true" : "false"}, critic_min_score=${qualityCriticMinScore}`,
4339
+ `[QualityGate] Policy: max_auto_revisions=${qualityMaxAutoRevisions}, validation_max_auto_revisions=${qualityValidationMaxAutoRevisions}, soft_pass_on_exhausted=${qualitySoftPassOnExhausted ? "true" : "false"}, critic_min_score=${qualityCriticMinScore}`,
4340
+ );
4341
+ onLog?.(
4342
+ "stdout",
4343
+ `[QualityGate] Gates: scope=${qualityGatePolicy.scopeGateEnabled ? "on" : "off"}, validation=${
4344
+ qualityGatePolicy.validationGateEnabled ? "on" : "off"
4345
+ }, critic=${qualityGatePolicy.criticGateEnabled ? "on" : "off"}, publish=${
4346
+ qualityGatePolicy.publishGateEnabled ? "on" : "off"
4347
+ }`,
4198
4348
  );
4199
4349
  if (qualityGatePolicy.mode === "review_fix") {
4200
4350
  const priorScore =
@@ -4218,7 +4368,7 @@ export async function executeJob(
4218
4368
 
4219
4369
  let revisionAttempt = 0;
4220
4370
  let revisionHint = "";
4221
- while (revisionAttempt <= qualityMaxAutoRevisions) {
4371
+ while (revisionAttempt <= qualityRevisionLoopMax) {
4222
4372
  const attemptParams: Record<string, unknown> = { ...normalizedParams };
4223
4373
  if (revisionHint) {
4224
4374
  attemptParams.qualityRevisionHint = revisionHint;
@@ -4306,17 +4456,22 @@ export async function executeJob(
4306
4456
  };
4307
4457
  }
4308
4458
 
4309
- const scopeCheck = await collectWriteScopeWarnings(repo, planning);
4310
- for (const warning of scopeCheck.warnings) {
4311
- onLog?.("stdout", `[TaskExecute] ${warning}`);
4459
+ const quality = await runDeterministicQualityGate(
4460
+ repo,
4461
+ attemptParams,
4462
+ runtimeConfig,
4463
+ qualityGatePolicy,
4464
+ onLog,
4465
+ );
4466
+ const critic =
4467
+ quality.skipped || !qualityGatePolicy.criticGateEnabled
4468
+ ? null
4469
+ : executor === "openai_codex"
4470
+ ? await runCodexCriticReview(repo, attemptParams, quality, runtimeConfig, onLog)
4471
+ : await runTaskCriticReview(repo, attemptParams, quality, runtimeConfig, onLog);
4472
+ if (!qualityGatePolicy.criticGateEnabled) {
4473
+ onLog?.("stdout", "[CriticGate] Disabled by workerpals.quality_critic_gate_enabled=false.");
4312
4474
  }
4313
-
4314
- const quality = await runDeterministicQualityGate(repo, attemptParams, runtimeConfig, onLog);
4315
- const critic = quality.skipped
4316
- ? null
4317
- : executor === "openai_codex"
4318
- ? await runCodexCriticReview(repo, attemptParams, quality, runtimeConfig, onLog)
4319
- : await runTaskCriticReview(repo, attemptParams, quality, runtimeConfig, onLog);
4320
4475
  const effectiveQualityIssues = relaxAdvisoryQualityIssues(
4321
4476
  quality.issues,
4322
4477
  quality.validationRuns,
@@ -4332,22 +4487,60 @@ export async function executeJob(
4332
4487
  const deterministicRequiresRevision =
4333
4488
  effectiveQualityIssues.length > 0 || quality.blocker !== null;
4334
4489
  const criticRequiresRevision = Boolean(critic && critic.score < qualityCriticMinScore);
4490
+ if (
4491
+ !qualityGatePolicy.publishGateEnabled &&
4492
+ (deterministicRequiresRevision || criticRequiresRevision)
4493
+ ) {
4494
+ onLog?.(
4495
+ "stderr",
4496
+ "[PublishGate] Disabled by workerpals.quality_publish_gate_enabled=false; returning worker result despite gate failures.",
4497
+ );
4498
+ return {
4499
+ ...result,
4500
+ summary: `${result.summary} (publish gate disabled; quality gate findings were advisory)`,
4501
+ stderr: truncate(
4502
+ [
4503
+ result.stderr ?? "",
4504
+ ...quality.validationRuns.flatMap((run) => [run.stdout, run.stderr]).filter(Boolean),
4505
+ critic ? `Critic raw: ${critic.raw}` : "",
4506
+ ]
4507
+ .filter(Boolean)
4508
+ .join("\n"),
4509
+ outputPolicyForRuntime(runtimeConfig),
4510
+ ),
4511
+ exitCode: typeof result.exitCode === "number" ? result.exitCode : 0,
4512
+ };
4513
+ }
4335
4514
 
4336
4515
  if (!deterministicRequiresRevision && !criticRequiresRevision) {
4337
4516
  if (critic) {
4338
4517
  onLog?.(
4339
4518
  "stdout",
4340
- `[QualityGate] Critic review score ${critic.score.toFixed(1)}/10 (threshold ${qualityCriticMinScore}).`,
4519
+ `[CriticGate] review score ${critic.score.toFixed(1)}/10 (threshold ${qualityCriticMinScore}).`,
4341
4520
  );
4342
4521
  }
4343
4522
  return result;
4344
4523
  }
4345
4524
 
4525
+ const blockerIssue = quality.blocker
4526
+ ? [
4527
+ `Validation blocker (${quality.blocker.category}): ${toSingleLine(
4528
+ quality.blocker.detail,
4529
+ 240,
4530
+ )}`,
4531
+ ]
4532
+ : [];
4346
4533
  const issues = buildQualityGateRevisionIssues(
4347
- effectiveQualityIssues,
4534
+ [...effectiveQualityIssues, ...blockerIssue],
4348
4535
  critic,
4349
4536
  qualityCriticMinScore,
4350
4537
  );
4538
+ const activeMaxAutoRevisions = revisionLimitForQualityGateFailures({
4539
+ policy: qualityGatePolicy,
4540
+ qualityIssues: effectiveQualityIssues,
4541
+ requiredValidationFailures: quality.requiredValidationFailures,
4542
+ blocker: quality.blocker,
4543
+ });
4351
4544
  const issueSummary = issues.map((entry) => toSingleLine(entry, 180)).join(" | ");
4352
4545
  if (quality.blocker) {
4353
4546
  const blockerSummary = `Quality gate blocked by ${quality.blocker.category} issue: ${quality.blocker.detail}`;
@@ -4358,7 +4551,22 @@ export async function executeJob(
4358
4551
  ].join("\n"),
4359
4552
  outputPolicyForRuntime(runtimeConfig),
4360
4553
  );
4361
- if (quality.requiredValidationFailures.length > 0) {
4554
+ const requiredValidationCanRevise = shouldReviseRequiredValidationBlocker({
4555
+ requiredValidationFailures: quality.requiredValidationFailures,
4556
+ blocker: quality.blocker,
4557
+ revisionAttempt,
4558
+ maxAutoRevisions: qualityValidationMaxAutoRevisions,
4559
+ });
4560
+ if (requiredValidationCanRevise) {
4561
+ onLog?.(
4562
+ "stderr",
4563
+ `[QualityGate] Required vision.md validation hit a repo blocker; requesting revision ${
4564
+ revisionAttempt + 1
4565
+ }/${qualityValidationMaxAutoRevisions} instead of failing immediately: ${quality.requiredValidationFailures.join(
4566
+ "; ",
4567
+ )}`,
4568
+ );
4569
+ } else if (quality.requiredValidationFailures.length > 0) {
4362
4570
  const requiredSummary = `Required vision.md validation blocked publishing: ${quality.requiredValidationFailures.join("; ")}`;
4363
4571
  onLog?.("stderr", `[QualityGate] ${requiredSummary}`);
4364
4572
  return {
@@ -4368,8 +4576,7 @@ export async function executeJob(
4368
4576
  stderr: blockerDiagnostics,
4369
4577
  exitCode: 4,
4370
4578
  };
4371
- }
4372
- if (shouldSoftPassValidationBlocker(qualityGatePolicy, quality.blocker)) {
4579
+ } else if (shouldSoftPassValidationBlocker(qualityGatePolicy, quality.blocker)) {
4373
4580
  onLog?.(
4374
4581
  "stderr",
4375
4582
  `[QualityGate] Soft-pass on ${quality.blocker.category} blocker for publishable ${qualityGatePolicy.mode} job: ${toSingleLine(
@@ -4385,17 +4592,18 @@ export async function executeJob(
4385
4592
  stderr: blockerDiagnostics,
4386
4593
  exitCode: typeof result.exitCode === "number" ? result.exitCode : 0,
4387
4594
  };
4595
+ } else {
4596
+ onLog?.("stderr", `[QualityGate] ${blockerSummary}`);
4597
+ return {
4598
+ ok: false,
4599
+ summary: blockerSummary,
4600
+ stdout: result.stdout,
4601
+ stderr: blockerDiagnostics,
4602
+ exitCode: 4,
4603
+ };
4388
4604
  }
4389
- onLog?.("stderr", `[QualityGate] ${blockerSummary}`);
4390
- return {
4391
- ok: false,
4392
- summary: blockerSummary,
4393
- stdout: result.stdout,
4394
- stderr: blockerDiagnostics,
4395
- exitCode: 4,
4396
- };
4397
4605
  }
4398
- if (revisionAttempt >= qualityMaxAutoRevisions) {
4606
+ if (revisionAttempt >= activeMaxAutoRevisions) {
4399
4607
  if (quality.requiredValidationFailures.length > 0) {
4400
4608
  const diagnostics = truncate(
4401
4609
  [
@@ -4456,10 +4664,17 @@ export async function executeJob(
4456
4664
  }
4457
4665
 
4458
4666
  revisionAttempt += 1;
4459
- revisionHint = buildQualityRevisionHint(issues, critic, planning, reviewFixContext);
4667
+ revisionHint = buildQualityRevisionHint(
4668
+ issues,
4669
+ critic,
4670
+ planning,
4671
+ reviewFixContext,
4672
+ quality.validationRuns,
4673
+ quality.blocker,
4674
+ );
4460
4675
  onLog?.(
4461
4676
  "stderr",
4462
- `[QualityGate] Quality gate requested revision ${revisionAttempt}/${qualityMaxAutoRevisions}: ${toSingleLine(
4677
+ `[QualityGate] Quality gate requested revision ${revisionAttempt}/${activeMaxAutoRevisions}: ${toSingleLine(
4463
4678
  issueSummary,
4464
4679
  260,
4465
4680
  )}`,