@pushpalsdev/cli 1.0.84 → 1.0.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +11 -1
- package/package.json +1 -1
- package/runtime/configs/default.toml +8 -1
- package/runtime/configs/local.example.toml +8 -1
- package/runtime/prompts/remotebuddy/autonomy_ideation_system_prompt.md +2 -0
- package/runtime/sandbox/.pushpals-remotebuddy-fallback.js +11 -1
- package/runtime/sandbox/apps/workerpals/src/backends/openhands_task_execute.ts +2 -1
- package/runtime/sandbox/apps/workerpals/src/common/generic_python_executor.ts +2 -1
- package/runtime/sandbox/apps/workerpals/src/common/sandbox_env.ts +76 -0
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +643 -146
- package/runtime/sandbox/configs/default.toml +8 -1
- package/runtime/sandbox/configs/local.example.toml +8 -1
- package/runtime/sandbox/packages/shared/src/config.ts +34 -1
|
@@ -3,8 +3,7 @@
|
|
|
3
3
|
* Used by both the host Worker (direct mode) and the Docker job runner.
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
|
-
import { existsSync,
|
|
7
|
-
import { tmpdir } from "os";
|
|
6
|
+
import { existsSync, readFileSync, rmSync, unlinkSync } from "fs";
|
|
8
7
|
import { resolve } from "path";
|
|
9
8
|
import {
|
|
10
9
|
deriveAutonomyComponentArea,
|
|
@@ -32,6 +31,7 @@ import {
|
|
|
32
31
|
truncate,
|
|
33
32
|
type OutputCompactionPolicy,
|
|
34
33
|
} from "./common/execution_utils.js";
|
|
34
|
+
import { buildWorkerSandboxWritableEnv } from "./common/sandbox_env.js";
|
|
35
35
|
// Re-export shared utilities for backward compatibility with external consumers.
|
|
36
36
|
export { compactJobOutput, truncate, streamLines } from "./common/execution_utils.js";
|
|
37
37
|
export { extractClarificationQuestionFromOutput } from "./backends/openhands_task_execute.js";
|
|
@@ -65,7 +65,7 @@ export interface TaskExecutePlanning {
|
|
|
65
65
|
finalizationBudgetMs: number;
|
|
66
66
|
}
|
|
67
67
|
|
|
68
|
-
interface ValidationExecutionResult {
|
|
68
|
+
export interface ValidationExecutionResult {
|
|
69
69
|
step: string;
|
|
70
70
|
command: string;
|
|
71
71
|
ok: boolean;
|
|
@@ -75,7 +75,7 @@ interface ValidationExecutionResult {
|
|
|
75
75
|
elapsedMs: number;
|
|
76
76
|
}
|
|
77
77
|
|
|
78
|
-
interface ValidationBlocker {
|
|
78
|
+
export interface ValidationBlocker {
|
|
79
79
|
category: "repo" | "environment";
|
|
80
80
|
detail: string;
|
|
81
81
|
}
|
|
@@ -84,11 +84,14 @@ interface DeterministicQualityResult {
|
|
|
84
84
|
ok: boolean;
|
|
85
85
|
skipped: boolean;
|
|
86
86
|
issues: string[];
|
|
87
|
+
scopeIssues: string[];
|
|
88
|
+
validationIssues: string[];
|
|
87
89
|
changedPaths: string[];
|
|
88
90
|
changedTestPaths: string[];
|
|
89
91
|
validationRuns: ValidationExecutionResult[];
|
|
90
92
|
requiredValidationFailures: string[];
|
|
91
93
|
blocker: ValidationBlocker | null;
|
|
94
|
+
validationFailureScope: "none" | "task_scope" | "outside_task_scope";
|
|
92
95
|
}
|
|
93
96
|
|
|
94
97
|
interface CriticReview {
|
|
@@ -112,6 +115,11 @@ export interface ReviewFixContext {
|
|
|
112
115
|
export interface QualityGatePolicy {
|
|
113
116
|
mode: "default" | "review_fix" | "merge_conflict";
|
|
114
117
|
maxAutoRevisions: number;
|
|
118
|
+
validationMaxAutoRevisions: number;
|
|
119
|
+
scopeGateEnabled: boolean;
|
|
120
|
+
validationGateEnabled: boolean;
|
|
121
|
+
criticGateEnabled: boolean;
|
|
122
|
+
publishGateEnabled: boolean;
|
|
115
123
|
softPassOnExhausted: boolean;
|
|
116
124
|
criticMinScore: number;
|
|
117
125
|
}
|
|
@@ -125,6 +133,35 @@ function shouldSoftPassValidationBlocker(
|
|
|
125
133
|
return policy.mode === "review_fix" || policy.mode === "merge_conflict";
|
|
126
134
|
}
|
|
127
135
|
|
|
136
|
+
export function shouldReviseRequiredValidationBlocker(opts: {
|
|
137
|
+
requiredValidationFailures: string[];
|
|
138
|
+
blocker: ValidationBlocker | null;
|
|
139
|
+
revisionAttempt: number;
|
|
140
|
+
maxAutoRevisions: number;
|
|
141
|
+
outsideTaskScope?: boolean;
|
|
142
|
+
}): boolean {
|
|
143
|
+
if (opts.requiredValidationFailures.length === 0) return false;
|
|
144
|
+
if (!opts.blocker) return false;
|
|
145
|
+
if (opts.outsideTaskScope) return false;
|
|
146
|
+
if (opts.blocker.category !== "repo") return false;
|
|
147
|
+
return opts.revisionAttempt < opts.maxAutoRevisions;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
export function revisionLimitForQualityGateFailures(opts: {
|
|
151
|
+
policy: Pick<QualityGatePolicy, "maxAutoRevisions" | "validationMaxAutoRevisions">;
|
|
152
|
+
qualityIssues: string[];
|
|
153
|
+
requiredValidationFailures: string[];
|
|
154
|
+
blocker: ValidationBlocker | null;
|
|
155
|
+
}): number {
|
|
156
|
+
const hasValidationGateFailure =
|
|
157
|
+
opts.requiredValidationFailures.length > 0 ||
|
|
158
|
+
opts.blocker !== null ||
|
|
159
|
+
opts.qualityIssues.some((issue) => issue.startsWith("ValidationGate:"));
|
|
160
|
+
return hasValidationGateFailure
|
|
161
|
+
? opts.policy.validationMaxAutoRevisions
|
|
162
|
+
: opts.policy.maxAutoRevisions;
|
|
163
|
+
}
|
|
164
|
+
|
|
128
165
|
// ─── Utilities ───────────────────────────────────────────────────────────────
|
|
129
166
|
|
|
130
167
|
export function shouldCommit(
|
|
@@ -228,6 +265,13 @@ export function buildQualityGateRevisionIssues(
|
|
|
228
265
|
const TEST_ASSERTION_BALANCE_ISSUE =
|
|
229
266
|
"Changed test files do not show both positive and negative assertion coverage (expected both).";
|
|
230
267
|
|
|
268
|
+
function isAssertionBalanceIssue(issue: string): boolean {
|
|
269
|
+
return (
|
|
270
|
+
issue === TEST_ASSERTION_BALANCE_ISSUE ||
|
|
271
|
+
issue.includes("positive and negative assertion coverage")
|
|
272
|
+
);
|
|
273
|
+
}
|
|
274
|
+
|
|
231
275
|
export function relaxAdvisoryQualityIssues(
|
|
232
276
|
qualityIssues: string[],
|
|
233
277
|
validationRuns: Array<{ ok: boolean }>,
|
|
@@ -245,7 +289,7 @@ export function relaxAdvisoryQualityIssues(
|
|
|
245
289
|
return normalizedQualityIssues;
|
|
246
290
|
}
|
|
247
291
|
|
|
248
|
-
const relaxed = normalizedQualityIssues.filter((issue) => issue
|
|
292
|
+
const relaxed = normalizedQualityIssues.filter((issue) => !isAssertionBalanceIssue(issue));
|
|
249
293
|
return relaxed;
|
|
250
294
|
}
|
|
251
295
|
|
|
@@ -362,13 +406,40 @@ export function deriveQualityGatePolicy(
|
|
|
362
406
|
10,
|
|
363
407
|
Number.isFinite(Number(runtimeConfig.workerpals.qualityMaxAutoRevisions))
|
|
364
408
|
? Math.floor(Number(runtimeConfig.workerpals.qualityMaxAutoRevisions))
|
|
365
|
-
:
|
|
409
|
+
: 3,
|
|
410
|
+
),
|
|
411
|
+
);
|
|
412
|
+
const baseValidationMaxAutoRevisions = Math.max(
|
|
413
|
+
0,
|
|
414
|
+
Math.min(
|
|
415
|
+
10,
|
|
416
|
+
Number.isFinite(Number(runtimeConfig.workerpals.qualityValidationMaxAutoRevisions))
|
|
417
|
+
? Math.floor(Number(runtimeConfig.workerpals.qualityValidationMaxAutoRevisions))
|
|
418
|
+
: 3,
|
|
366
419
|
),
|
|
367
420
|
);
|
|
368
421
|
const baseSoftPassOnExhausted =
|
|
369
422
|
typeof runtimeConfig.workerpals.qualitySoftPassOnExhausted === "boolean"
|
|
370
423
|
? runtimeConfig.workerpals.qualitySoftPassOnExhausted
|
|
371
424
|
: true;
|
|
425
|
+
const gateSwitches = {
|
|
426
|
+
scopeGateEnabled:
|
|
427
|
+
typeof runtimeConfig.workerpals.qualityScopeGateEnabled === "boolean"
|
|
428
|
+
? runtimeConfig.workerpals.qualityScopeGateEnabled
|
|
429
|
+
: true,
|
|
430
|
+
validationGateEnabled:
|
|
431
|
+
typeof runtimeConfig.workerpals.qualityValidationGateEnabled === "boolean"
|
|
432
|
+
? runtimeConfig.workerpals.qualityValidationGateEnabled
|
|
433
|
+
: true,
|
|
434
|
+
criticGateEnabled:
|
|
435
|
+
typeof runtimeConfig.workerpals.qualityCriticGateEnabled === "boolean"
|
|
436
|
+
? runtimeConfig.workerpals.qualityCriticGateEnabled
|
|
437
|
+
: true,
|
|
438
|
+
publishGateEnabled:
|
|
439
|
+
typeof runtimeConfig.workerpals.qualityPublishGateEnabled === "boolean"
|
|
440
|
+
? runtimeConfig.workerpals.qualityPublishGateEnabled
|
|
441
|
+
: true,
|
|
442
|
+
};
|
|
372
443
|
const baseCriticMinScore = (() => {
|
|
373
444
|
const value = Number(runtimeConfig.workerpals.qualityCriticMinScore);
|
|
374
445
|
if (!Number.isFinite(value)) return 8;
|
|
@@ -379,19 +450,23 @@ export function deriveQualityGatePolicy(
|
|
|
379
450
|
const mergeConflict = extractMergeConflictReviewContext(params);
|
|
380
451
|
if (mergeConflict) {
|
|
381
452
|
return {
|
|
382
|
-
|
|
383
|
-
maxAutoRevisions: baseMaxAutoRevisions,
|
|
384
|
-
softPassOnExhausted: baseSoftPassOnExhausted,
|
|
385
|
-
criticMinScore: baseCriticMinScore,
|
|
386
|
-
};
|
|
387
|
-
}
|
|
388
|
-
return {
|
|
389
|
-
mode: "default",
|
|
453
|
+
mode: "merge_conflict",
|
|
390
454
|
maxAutoRevisions: baseMaxAutoRevisions,
|
|
455
|
+
validationMaxAutoRevisions: baseValidationMaxAutoRevisions,
|
|
456
|
+
...gateSwitches,
|
|
391
457
|
softPassOnExhausted: baseSoftPassOnExhausted,
|
|
392
458
|
criticMinScore: baseCriticMinScore,
|
|
393
459
|
};
|
|
394
460
|
}
|
|
461
|
+
return {
|
|
462
|
+
mode: "default",
|
|
463
|
+
maxAutoRevisions: baseMaxAutoRevisions,
|
|
464
|
+
validationMaxAutoRevisions: baseValidationMaxAutoRevisions,
|
|
465
|
+
...gateSwitches,
|
|
466
|
+
softPassOnExhausted: baseSoftPassOnExhausted,
|
|
467
|
+
criticMinScore: baseCriticMinScore,
|
|
468
|
+
};
|
|
469
|
+
}
|
|
395
470
|
const tightenedCriticMinScore =
|
|
396
471
|
reviewFix.reviewThreshold != null
|
|
397
472
|
? Math.max(baseCriticMinScore, Math.max(0, Math.min(10, reviewFix.reviewThreshold - 0.2)))
|
|
@@ -399,6 +474,8 @@ export function deriveQualityGatePolicy(
|
|
|
399
474
|
return {
|
|
400
475
|
mode: "review_fix",
|
|
401
476
|
maxAutoRevisions: Math.max(baseMaxAutoRevisions, 2),
|
|
477
|
+
validationMaxAutoRevisions: baseValidationMaxAutoRevisions,
|
|
478
|
+
...gateSwitches,
|
|
402
479
|
softPassOnExhausted: baseSoftPassOnExhausted,
|
|
403
480
|
criticMinScore: tightenedCriticMinScore,
|
|
404
481
|
};
|
|
@@ -541,7 +618,8 @@ async function runValidationCommand(
|
|
|
541
618
|
timeoutMs: number,
|
|
542
619
|
outputPolicy: Partial<OutputCompactionPolicy>,
|
|
543
620
|
): Promise<ValidationExecutionResult> {
|
|
544
|
-
const
|
|
621
|
+
const env = buildWorkerSandboxWritableEnv(repo);
|
|
622
|
+
const argv = prepareValidationCommandArgv(command, env);
|
|
545
623
|
if (!argv) {
|
|
546
624
|
return {
|
|
547
625
|
step: command,
|
|
@@ -557,7 +635,7 @@ async function runValidationCommand(
|
|
|
557
635
|
const startedAt = Date.now();
|
|
558
636
|
const proc = Bun.spawn(argv, {
|
|
559
637
|
cwd: repo,
|
|
560
|
-
env
|
|
638
|
+
env,
|
|
561
639
|
stdout: "pipe",
|
|
562
640
|
stderr: "pipe",
|
|
563
641
|
});
|
|
@@ -587,41 +665,68 @@ async function runValidationCommand(
|
|
|
587
665
|
ok: !timedOut && exitCode === 0,
|
|
588
666
|
exitCode: timedOut ? 124 : exitCode,
|
|
589
667
|
stdout: compactJobOutput(stdout.trim(), outputPolicy),
|
|
590
|
-
stderr: compactJobOutput(
|
|
668
|
+
stderr: compactJobOutput(
|
|
669
|
+
[
|
|
670
|
+
stderr.trim(),
|
|
671
|
+
timedOut
|
|
672
|
+
? `Validation command timed out after ${Math.max(1_000, timeoutMs)}ms. Captured output is the process output emitted before PushPals terminated the command.`
|
|
673
|
+
: "",
|
|
674
|
+
]
|
|
675
|
+
.filter(Boolean)
|
|
676
|
+
.join("\n"),
|
|
677
|
+
outputPolicy,
|
|
678
|
+
),
|
|
591
679
|
elapsedMs: Math.max(1, Date.now() - startedAt),
|
|
592
680
|
};
|
|
593
681
|
}
|
|
594
682
|
|
|
595
|
-
function
|
|
596
|
-
const
|
|
597
|
-
|
|
598
|
-
const
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
683
|
+
export function isLongRunningBrowserValidationCommand(command: string): boolean {
|
|
684
|
+
const normalized = validationCommandKey(command);
|
|
685
|
+
if (!normalized) return false;
|
|
686
|
+
const tokens = tokenizeValidationCommandArgv(command)?.map((token) => token.toLowerCase()) ?? [];
|
|
687
|
+
const joined = tokens.join(" ");
|
|
688
|
+
return (
|
|
689
|
+
/\b(web:e2e|e2e:web|browser:e2e|smoke:web|web:smoke|browser:smoke)\b/.test(normalized) ||
|
|
690
|
+
/\b(playwright|cypress)\b/.test(joined) ||
|
|
691
|
+
(/\bexpo\b/.test(joined) && /\b(web|start)\b/.test(joined))
|
|
692
|
+
);
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
export function resolveValidationCommandTimeoutMs(command: string, baseTimeoutMs: number): number {
|
|
696
|
+
const normalizedBase = Number.isFinite(Number(baseTimeoutMs))
|
|
697
|
+
? Math.max(1_000, Math.min(7_200_000, Math.floor(Number(baseTimeoutMs))))
|
|
698
|
+
: 180_000;
|
|
699
|
+
if (!isLongRunningBrowserValidationCommand(command)) return normalizedBase;
|
|
700
|
+
return Math.max(normalizedBase, 600_000);
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
function commandHasPortArg(argv: string[]): boolean {
|
|
704
|
+
return argv.some((token) => token === "--port" || token.startsWith("--port="));
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
function shouldInjectBrowserValidationPort(command: string, argv: string[]): boolean {
|
|
708
|
+
if (commandHasPortArg(argv)) return false;
|
|
709
|
+
if (!isLongRunningBrowserValidationCommand(command)) return false;
|
|
710
|
+
return /\b(web:e2e|e2e:web|browser:e2e|smoke:web|web:smoke|browser:smoke)\b/.test(
|
|
711
|
+
validationCommandKey(command),
|
|
712
|
+
);
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
export function prepareValidationCommandArgv(
|
|
716
|
+
command: string,
|
|
717
|
+
env: Record<string, string>,
|
|
718
|
+
): string[] | null {
|
|
719
|
+
const argv = tokenizeValidationCommandArgv(command);
|
|
720
|
+
if (!argv) return null;
|
|
721
|
+
const port = String(env.EXPO_DEV_SERVER_PORT ?? "").trim();
|
|
722
|
+
if (!port || !shouldInjectBrowserValidationPort(command, argv)) return argv;
|
|
723
|
+
return [...argv, "--", "--port", port];
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
function isBrowserValidationInfrastructureDigest(digest: string): boolean {
|
|
727
|
+
return /\b(ERR_SOCKET_BAD_PORT|EADDRINUSE|ECONNREFUSED|ECONNRESET|ETIMEDOUT|timed out|timeout|port|browser runtime|playwright install|executable doesn't exist)\b/i.test(
|
|
728
|
+
digest,
|
|
729
|
+
);
|
|
625
730
|
}
|
|
626
731
|
|
|
627
732
|
interface ToolAvailabilityResult {
|
|
@@ -730,6 +835,96 @@ function extractPreparedMergeConflictPaths(params: Record<string, unknown>): str
|
|
|
730
835
|
.filter(Boolean);
|
|
731
836
|
}
|
|
732
837
|
|
|
838
|
+
function normalizeValidationPathToken(value: string): string | null {
|
|
839
|
+
const normalized = value
|
|
840
|
+
.trim()
|
|
841
|
+
.replace(/^['"`(<[]+/, "")
|
|
842
|
+
.replace(/[>'"`)\],.;:]+$/, "")
|
|
843
|
+
.replace(/\\/g, "/")
|
|
844
|
+
.replace(/^\.\/+/, "")
|
|
845
|
+
.replace(/\/+/g, "/");
|
|
846
|
+
if (!normalized || normalized.startsWith("../") || normalized.includes("/../")) return null;
|
|
847
|
+
if (!/[./]/.test(normalized)) return null;
|
|
848
|
+
if (/^(https?|file):/i.test(normalized)) return null;
|
|
849
|
+
return normalized;
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
function extractPathTokensFromValidationOutput(value: string): string[] {
|
|
853
|
+
const seen = new Set<string>();
|
|
854
|
+
const out: string[] = [];
|
|
855
|
+
const add = (raw: string | undefined) => {
|
|
856
|
+
if (!raw) return;
|
|
857
|
+
const normalized = normalizeValidationPathToken(raw);
|
|
858
|
+
if (!normalized || seen.has(normalized)) return;
|
|
859
|
+
seen.add(normalized);
|
|
860
|
+
out.push(normalized);
|
|
861
|
+
};
|
|
862
|
+
const normalized = stripAnsiControlSequences(value);
|
|
863
|
+
for (const match of normalized.matchAll(/[A-Za-z0-9_.@-]+(?:\/[A-Za-z0-9_.@-]+)+(?:\.[A-Za-z0-9_.-]+)?/g)) {
|
|
864
|
+
add(match[0]);
|
|
865
|
+
}
|
|
866
|
+
for (const match of normalized.matchAll(/(?:from|in|at)\s+['"`]?([^'"`\s]+\/[^'"`\s]+)['"`]?/gi)) {
|
|
867
|
+
add(match[1]);
|
|
868
|
+
}
|
|
869
|
+
return out;
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
function literalScopePrefix(value: string): string | null {
|
|
873
|
+
const normalized = normalizeValidationPathToken(value.replace(/\*\*?.*$/, "").replace(/\/+$/, ""));
|
|
874
|
+
if (!normalized || normalized === ".") return null;
|
|
875
|
+
return normalized;
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
function pathMatchesScopeHint(path: string, hint: string): boolean {
|
|
879
|
+
const normalizedPath = normalizeValidationPathToken(path);
|
|
880
|
+
const normalizedHint = hint.trim().replace(/\\/g, "/").replace(/^\.\/+/, "");
|
|
881
|
+
if (!normalizedPath || !normalizedHint) return false;
|
|
882
|
+
if (matchesGlob(normalizedPath, normalizedHint)) return true;
|
|
883
|
+
const prefix = literalScopePrefix(normalizedHint);
|
|
884
|
+
if (!prefix) return false;
|
|
885
|
+
return normalizedPath === prefix || normalizedPath.startsWith(`${prefix}/`);
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
export function classifyValidationFailureScope(
|
|
889
|
+
runs: ValidationExecutionResult[],
|
|
890
|
+
planning: TaskExecutePlanning,
|
|
891
|
+
changedPaths: string[],
|
|
892
|
+
targetPath?: string,
|
|
893
|
+
): "none" | "task_scope" | "outside_task_scope" {
|
|
894
|
+
const failedRuns = runs.filter((run) => !run.ok && run.exitCode !== 127);
|
|
895
|
+
if (failedRuns.length === 0) return "none";
|
|
896
|
+
const scopeHints = [
|
|
897
|
+
targetPath ?? "",
|
|
898
|
+
...changedPaths,
|
|
899
|
+
...(planning.targetPaths ?? []),
|
|
900
|
+
...(planning.scope.writeGlobs ?? []),
|
|
901
|
+
]
|
|
902
|
+
.map((entry) => entry.trim().replace(/\\/g, "/"))
|
|
903
|
+
.filter(Boolean);
|
|
904
|
+
if (scopeHints.length === 0) return "none";
|
|
905
|
+
|
|
906
|
+
const combined = failedRuns
|
|
907
|
+
.flatMap((run) => [run.stdout, run.stderr])
|
|
908
|
+
.filter(Boolean)
|
|
909
|
+
.join("\n");
|
|
910
|
+
const lowerCombined = combined.toLowerCase().replace(/\\/g, "/");
|
|
911
|
+
for (const hint of scopeHints) {
|
|
912
|
+
const normalized = literalScopePrefix(hint);
|
|
913
|
+
if (normalized && normalized.length >= 4 && lowerCombined.includes(normalized.toLowerCase())) {
|
|
914
|
+
return "task_scope";
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
const pathTokens = extractPathTokensFromValidationOutput(combined).filter(
|
|
919
|
+
(token) => !/^(node_modules|\.bun|bun|npm|pnpm|yarn)\//i.test(token),
|
|
920
|
+
);
|
|
921
|
+
if (pathTokens.length === 0) return "none";
|
|
922
|
+
if (pathTokens.some((token) => scopeHints.some((hint) => pathMatchesScopeHint(token, hint)))) {
|
|
923
|
+
return "task_scope";
|
|
924
|
+
}
|
|
925
|
+
return "outside_task_scope";
|
|
926
|
+
}
|
|
927
|
+
|
|
733
928
|
function detectValidationBlocker(runs: ValidationExecutionResult[]): ValidationBlocker | null {
|
|
734
929
|
const combined = runs
|
|
735
930
|
.flatMap((run) => [run.stdout, run.stderr])
|
|
@@ -910,7 +1105,51 @@ function extractRunnableValidationCommand(step: string): string | null {
|
|
|
910
1105
|
}
|
|
911
1106
|
|
|
912
1107
|
function validationCommandKey(command: string): string {
|
|
913
|
-
|
|
1108
|
+
const argv = tokenizeValidationCommandArgv(command);
|
|
1109
|
+
if (argv && argv.length > 0) {
|
|
1110
|
+
const normalized = argv.map((entry) => entry.trim()).filter(Boolean);
|
|
1111
|
+
if (normalized[0]?.toLowerCase() === "bunx") {
|
|
1112
|
+
normalized.splice(0, 1, "bun", "x");
|
|
1113
|
+
}
|
|
1114
|
+
return normalized.join(" ").replace(/\s+/g, " ").toLowerCase();
|
|
1115
|
+
}
|
|
1116
|
+
return command
|
|
1117
|
+
.trim()
|
|
1118
|
+
.replace(/\s+/g, " ")
|
|
1119
|
+
.replace(/^bunx\b/i, "bun x")
|
|
1120
|
+
.toLowerCase();
|
|
1121
|
+
}
|
|
1122
|
+
|
|
1123
|
+
export function extractValidationFailureDigest(run: {
|
|
1124
|
+
exitCode?: number;
|
|
1125
|
+
stdout?: string;
|
|
1126
|
+
stderr?: string;
|
|
1127
|
+
elapsedMs?: number;
|
|
1128
|
+
}): string {
|
|
1129
|
+
const combined = stripAnsiControlSequences([run.stderr, run.stdout].filter(Boolean).join("\n"));
|
|
1130
|
+
const patterns = [
|
|
1131
|
+
/\bCannot find module\s+['"`][^'"`\r\n]+['"`][^\r\n]*/i,
|
|
1132
|
+
/\bFailed to resolve import\s+['"`][^'"`\r\n]+['"`][^\r\n]*/i,
|
|
1133
|
+
/\bCould not resolve\s+['"`]?[^'"`\r\n]+['"`]?[^\r\n]*/i,
|
|
1134
|
+
/\bModule not found[^\r\n]*/i,
|
|
1135
|
+
/\bERR_SOCKET_BAD_PORT[^\r\n]*/i,
|
|
1136
|
+
/\berror TS\d+:[^\r\n]*/i,
|
|
1137
|
+
/\bError:\s+[^\r\n]*/i,
|
|
1138
|
+
];
|
|
1139
|
+
for (const pattern of patterns) {
|
|
1140
|
+
const match = combined.match(pattern);
|
|
1141
|
+
if (match?.[0]) return toSingleLine(match[0], 180);
|
|
1142
|
+
}
|
|
1143
|
+
const firstMeaningfulLine = combined
|
|
1144
|
+
.split(/\r?\n/)
|
|
1145
|
+
.map((line) => line.trim())
|
|
1146
|
+
.find((line) => /\b(error|failed|cannot|could not|timeout|timed out)\b/i.test(line));
|
|
1147
|
+
if (firstMeaningfulLine) return toSingleLine(firstMeaningfulLine, 180);
|
|
1148
|
+
if (Number(run.exitCode) === 124) {
|
|
1149
|
+
const elapsed = Number.isFinite(Number(run.elapsedMs)) ? ` after ${Number(run.elapsedMs)}ms` : "";
|
|
1150
|
+
return `timed out${elapsed}`;
|
|
1151
|
+
}
|
|
1152
|
+
return "";
|
|
914
1153
|
}
|
|
915
1154
|
|
|
916
1155
|
export function collectRequiredValidationFailures(
|
|
@@ -923,7 +1162,8 @@ export function collectRequiredValidationFailures(
|
|
|
923
1162
|
.filter((run) => requiredKeys.has(validationCommandKey(run.command)) && !run.ok)
|
|
924
1163
|
.map((run) => {
|
|
925
1164
|
const exitCode = Number.isFinite(Number(run.exitCode)) ? Number(run.exitCode) : "unknown";
|
|
926
|
-
|
|
1165
|
+
const digest = extractValidationFailureDigest(run);
|
|
1166
|
+
return `${run.command} exited ${exitCode}${digest ? ` (${digest})` : ""}`;
|
|
927
1167
|
});
|
|
928
1168
|
}
|
|
929
1169
|
|
|
@@ -983,7 +1223,7 @@ function dedupeValidationCommands(...groups: string[][]): string[] {
|
|
|
983
1223
|
for (const command of group) {
|
|
984
1224
|
const trimmed = command.trim();
|
|
985
1225
|
if (!trimmed) continue;
|
|
986
|
-
const key = trimmed
|
|
1226
|
+
const key = validationCommandKey(trimmed);
|
|
987
1227
|
if (seen.has(key)) continue;
|
|
988
1228
|
seen.add(key);
|
|
989
1229
|
out.push(trimmed);
|
|
@@ -1088,14 +1328,19 @@ export function inferFallbackValidationCommandsForTestTask(
|
|
|
1088
1328
|
return candidates.slice(0, 4);
|
|
1089
1329
|
}
|
|
1090
1330
|
|
|
1091
|
-
function isTestFocusedTask(
|
|
1331
|
+
export function isTestFocusedTask(
|
|
1092
1332
|
instruction: string,
|
|
1093
1333
|
planning: TaskExecutePlanning,
|
|
1094
1334
|
targetPath?: string,
|
|
1095
1335
|
): boolean {
|
|
1096
1336
|
const lowerInstruction = instruction.toLowerCase();
|
|
1097
1337
|
if (
|
|
1098
|
-
/\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b/.test(
|
|
1338
|
+
/\b(add|write|create|update|extend|expand|harden|improve|refactor|move|extract|fix)\b.{0,80}\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b/.test(
|
|
1339
|
+
lowerInstruction,
|
|
1340
|
+
) ||
|
|
1341
|
+
/\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b.{0,80}\b(add|write|create|update|extend|expand|harden|improve|refactor|move|extract|fix)\b/.test(
|
|
1342
|
+
lowerInstruction,
|
|
1343
|
+
)
|
|
1099
1344
|
) {
|
|
1100
1345
|
return true;
|
|
1101
1346
|
}
|
|
@@ -1107,7 +1352,9 @@ function isTestFocusedTask(
|
|
|
1107
1352
|
if (pathHints.some((entry) => isLikelyTestPath(entry))) return true;
|
|
1108
1353
|
if (
|
|
1109
1354
|
planning.acceptanceCriteria.some((entry) =>
|
|
1110
|
-
/\b(test|tests|coverage|unit|integration|
|
|
1355
|
+
/\b(add|write|create|update|extend|expand|harden|improve|refactor|move|extract|fix)\b.{0,80}\b(test|tests|coverage|unit test|integration test|unittest|pytest)\b/i.test(
|
|
1356
|
+
entry,
|
|
1357
|
+
),
|
|
1111
1358
|
)
|
|
1112
1359
|
) {
|
|
1113
1360
|
return true;
|
|
@@ -1143,7 +1390,12 @@ async function runDeterministicQualityGate(
|
|
|
1143
1390
|
repo: string,
|
|
1144
1391
|
params: Record<string, unknown>,
|
|
1145
1392
|
runtimeConfig: WorkerpalsRuntimeConfig,
|
|
1393
|
+
qualityGatePolicy: QualityGatePolicy,
|
|
1146
1394
|
onLog?: (stream: "stdout" | "stderr", line: string) => void,
|
|
1395
|
+
validationRetryState?: {
|
|
1396
|
+
previousFailureDigests?: Map<string, string>;
|
|
1397
|
+
revisionAttempt?: number;
|
|
1398
|
+
},
|
|
1147
1399
|
): Promise<DeterministicQualityResult> {
|
|
1148
1400
|
const instruction = String(params.instruction ?? "");
|
|
1149
1401
|
const targetPath = String(params.targetPath ?? params.path ?? "").trim() || undefined;
|
|
@@ -1154,16 +1406,25 @@ async function runDeterministicQualityGate(
|
|
|
1154
1406
|
}
|
|
1155
1407
|
const isTestTask = isTestFocusedTask(instruction, planning, targetPath);
|
|
1156
1408
|
const hasRequiredValidationCriteria = requiredValidationSteps.length > 0;
|
|
1157
|
-
if (
|
|
1409
|
+
if (
|
|
1410
|
+
!qualityGatePolicy.scopeGateEnabled &&
|
|
1411
|
+
!qualityGatePolicy.validationGateEnabled &&
|
|
1412
|
+
!qualityGatePolicy.criticGateEnabled &&
|
|
1413
|
+
!isTestTask &&
|
|
1414
|
+
!hasRequiredValidationCriteria
|
|
1415
|
+
) {
|
|
1158
1416
|
return {
|
|
1159
1417
|
ok: true,
|
|
1160
1418
|
skipped: true,
|
|
1161
1419
|
issues: [],
|
|
1420
|
+
scopeIssues: [],
|
|
1421
|
+
validationIssues: [],
|
|
1162
1422
|
changedPaths: [],
|
|
1163
1423
|
changedTestPaths: [],
|
|
1164
1424
|
validationRuns: [],
|
|
1165
1425
|
requiredValidationFailures: [],
|
|
1166
1426
|
blocker: null,
|
|
1427
|
+
validationFailureScope: "none",
|
|
1167
1428
|
};
|
|
1168
1429
|
}
|
|
1169
1430
|
|
|
@@ -1176,15 +1437,47 @@ async function runDeterministicQualityGate(
|
|
|
1176
1437
|
),
|
|
1177
1438
|
);
|
|
1178
1439
|
const issues: string[] = [];
|
|
1179
|
-
|
|
1180
|
-
|
|
1440
|
+
const scopeIssues: string[] = [];
|
|
1441
|
+
const validationIssues: string[] = [];
|
|
1442
|
+
const addScopeIssue = (issue: string): void => {
|
|
1443
|
+
scopeIssues.push(issue);
|
|
1444
|
+
issues.push(`ScopeGate: ${issue}`);
|
|
1445
|
+
};
|
|
1446
|
+
const addValidationIssue = (issue: string): void => {
|
|
1447
|
+
validationIssues.push(issue);
|
|
1448
|
+
issues.push(`ValidationGate: ${issue}`);
|
|
1449
|
+
};
|
|
1450
|
+
|
|
1451
|
+
if (qualityGatePolicy.scopeGateEnabled) {
|
|
1452
|
+
if (!statusResult.ok) {
|
|
1453
|
+
addScopeIssue("could not evaluate changed paths from git status.");
|
|
1454
|
+
}
|
|
1455
|
+
for (const issue of collectWriteScopeIssuesFromChangedPaths(changedPaths, planning)) {
|
|
1456
|
+
addScopeIssue(issue);
|
|
1457
|
+
}
|
|
1458
|
+
if (isTestTask && changedTestPaths.length === 0) {
|
|
1459
|
+
addScopeIssue("found no relevant test file modified for this test-focused task.");
|
|
1460
|
+
}
|
|
1461
|
+
if (
|
|
1462
|
+
isTestTask &&
|
|
1463
|
+
changedTestPaths.length > 0 &&
|
|
1464
|
+
!hasBalancedPositiveNegativeAssertions(changedTestPaths, repo)
|
|
1465
|
+
) {
|
|
1466
|
+
addScopeIssue(
|
|
1467
|
+
"found changed test files without both positive and negative assertion coverage (expected both).",
|
|
1468
|
+
);
|
|
1469
|
+
}
|
|
1470
|
+
for (const issue of scopeIssues) {
|
|
1471
|
+
onLog?.("stderr", `[ScopeGate] ${issue}`);
|
|
1472
|
+
}
|
|
1473
|
+
} else {
|
|
1474
|
+
onLog?.("stdout", "[ScopeGate] Disabled by workerpals.quality_scope_gate_enabled=false.");
|
|
1181
1475
|
}
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
"Changed test files do not show both positive and negative assertion coverage (expected both).",
|
|
1476
|
+
|
|
1477
|
+
if (!qualityGatePolicy.validationGateEnabled) {
|
|
1478
|
+
onLog?.(
|
|
1479
|
+
"stdout",
|
|
1480
|
+
"[ValidationGate] Disabled by workerpals.quality_validation_gate_enabled=false.",
|
|
1188
1481
|
);
|
|
1189
1482
|
}
|
|
1190
1483
|
|
|
@@ -1207,28 +1500,30 @@ async function runDeterministicQualityGate(
|
|
|
1207
1500
|
if (!Number.isFinite(value)) return 180_000;
|
|
1208
1501
|
return Math.max(1_000, Math.min(7_200_000, Math.floor(value)));
|
|
1209
1502
|
})();
|
|
1503
|
+
let requiredValidationFailures: string[] = [];
|
|
1504
|
+
if (qualityGatePolicy.validationGateEnabled) {
|
|
1210
1505
|
if (hasRequiredValidationCriteria && requiredRunnableSteps.length === 0) {
|
|
1211
|
-
|
|
1212
|
-
"vision.md testing criteria
|
|
1506
|
+
addValidationIssue(
|
|
1507
|
+
"found vision.md testing criteria, but none contained a runnable validation command.",
|
|
1213
1508
|
);
|
|
1214
1509
|
}
|
|
1215
1510
|
if (commandsToRun.length === 0) {
|
|
1216
|
-
|
|
1511
|
+
addValidationIssue(
|
|
1217
1512
|
hasRequiredValidationCriteria
|
|
1218
|
-
? "
|
|
1219
|
-
: "
|
|
1513
|
+
? "found no runnable validation command from vision.md testing criteria or planning.validationSteps."
|
|
1514
|
+
: "found no runnable validation command in planning.validationSteps (expected at least one test command).",
|
|
1220
1515
|
);
|
|
1221
1516
|
} else {
|
|
1222
1517
|
if (requiredRunnableSteps.length > 0) {
|
|
1223
1518
|
onLog?.(
|
|
1224
1519
|
"stdout",
|
|
1225
|
-
`[
|
|
1520
|
+
`[ValidationGate] Running required vision.md testing criteria: ${requiredRunnableSteps.join(" | ")}`,
|
|
1226
1521
|
);
|
|
1227
1522
|
}
|
|
1228
1523
|
if (isTestTask && plannerRunnableSteps.length === 0 && fallbackValidationSteps.length > 0) {
|
|
1229
1524
|
onLog?.(
|
|
1230
1525
|
"stdout",
|
|
1231
|
-
`[
|
|
1526
|
+
`[ValidationGate] No runnable planning.validationSteps found; using fallback validation command(s): ${commandsToRun.join(" | ")}`,
|
|
1232
1527
|
);
|
|
1233
1528
|
}
|
|
1234
1529
|
const toolchainPlan = buildToolchainPlan({
|
|
@@ -1238,7 +1533,7 @@ async function runDeterministicQualityGate(
|
|
|
1238
1533
|
if (toolchainPlan.requirements.length > 0) {
|
|
1239
1534
|
onLog?.(
|
|
1240
1535
|
"stdout",
|
|
1241
|
-
`[
|
|
1536
|
+
`[ValidationGate] Toolchain preflight: source=${toolchainPlan.environmentSource}, required=${toolchainPlan.requirements
|
|
1242
1537
|
.map((requirement) => requirement.tool)
|
|
1243
1538
|
.join(", ")}`,
|
|
1244
1539
|
);
|
|
@@ -1250,7 +1545,7 @@ async function runDeterministicQualityGate(
|
|
|
1250
1545
|
if (missingToolRequirements.length > 0) {
|
|
1251
1546
|
onLog?.(
|
|
1252
1547
|
"stderr",
|
|
1253
|
-
`[
|
|
1548
|
+
`[ValidationGate] Toolchain preflight blocked dependent validation command(s): ${formatMissingToolRequirements(
|
|
1254
1549
|
missingToolRequirements,
|
|
1255
1550
|
)}`,
|
|
1256
1551
|
);
|
|
@@ -1275,19 +1570,47 @@ async function runDeterministicQualityGate(
|
|
|
1275
1570
|
});
|
|
1276
1571
|
onLog?.(
|
|
1277
1572
|
"stderr",
|
|
1278
|
-
`[
|
|
1573
|
+
`[ValidationGate] Validation skipped (missing toolchain): ${command}`,
|
|
1279
1574
|
);
|
|
1280
1575
|
continue;
|
|
1281
1576
|
}
|
|
1282
|
-
|
|
1577
|
+
const previousDigest = validationRetryState?.previousFailureDigests?.get(
|
|
1578
|
+
validationCommandKey(command),
|
|
1579
|
+
);
|
|
1580
|
+
if (
|
|
1581
|
+
previousDigest &&
|
|
1582
|
+
Number(validationRetryState?.revisionAttempt ?? 0) > 0 &&
|
|
1583
|
+
isLongRunningBrowserValidationCommand(command) &&
|
|
1584
|
+
isBrowserValidationInfrastructureDigest(previousDigest)
|
|
1585
|
+
) {
|
|
1586
|
+
const stderr =
|
|
1587
|
+
`Skipped repeated browser validation after the same command failed in an earlier revision: ${previousDigest}. ` +
|
|
1588
|
+
"Run it once after the underlying blocker changes.";
|
|
1589
|
+
validationRuns.push({
|
|
1590
|
+
step: command,
|
|
1591
|
+
command,
|
|
1592
|
+
ok: false,
|
|
1593
|
+
exitCode: 124,
|
|
1594
|
+
stdout: "",
|
|
1595
|
+
stderr,
|
|
1596
|
+
elapsedMs: 1,
|
|
1597
|
+
});
|
|
1598
|
+
onLog?.(
|
|
1599
|
+
"stderr",
|
|
1600
|
+
`[ValidationGate] Skipped repeated long browser validation: ${command} (${previousDigest})`,
|
|
1601
|
+
);
|
|
1602
|
+
continue;
|
|
1603
|
+
}
|
|
1604
|
+
onLog?.("stdout", `[ValidationGate] Running "${command}"`);
|
|
1283
1605
|
const run = await runValidationCommand(
|
|
1284
1606
|
repo,
|
|
1285
1607
|
command,
|
|
1286
|
-
qualityValidationStepTimeoutMs,
|
|
1608
|
+
resolveValidationCommandTimeoutMs(command, qualityValidationStepTimeoutMs),
|
|
1287
1609
|
outputPolicy,
|
|
1288
1610
|
);
|
|
1289
1611
|
validationRuns.push(run);
|
|
1290
|
-
const
|
|
1612
|
+
const digest = run.ok ? "" : extractValidationFailureDigest(run);
|
|
1613
|
+
const runSummary = `[ValidationGate] ${run.ok ? "Passed" : "Failed"} (${run.elapsedMs}ms, exit ${run.exitCode}): ${command}${digest ? ` - ${digest}` : ""}`;
|
|
1291
1614
|
onLog?.(run.ok ? "stdout" : "stderr", runSummary);
|
|
1292
1615
|
}
|
|
1293
1616
|
// exit 127 = command not found: separate tool-availability issues from real test failures.
|
|
@@ -1297,43 +1620,58 @@ async function runDeterministicQualityGate(
|
|
|
1297
1620
|
const cmds = notFoundRuns.map((run) => run.command).join(", ");
|
|
1298
1621
|
onLog?.(
|
|
1299
1622
|
"stderr",
|
|
1300
|
-
`[
|
|
1623
|
+
`[ValidationGate] Some validation commands not found (exit 127 - wrong tool?): ${cmds}. This project uses Bun: prefer "bun test".`,
|
|
1301
1624
|
);
|
|
1302
1625
|
}
|
|
1303
1626
|
if (executedRuns.length > 0 && executedRuns.every((run) => !run.ok)) {
|
|
1304
|
-
|
|
1627
|
+
addValidationIssue("executed validation commands, but none passed.");
|
|
1305
1628
|
} else if (executedRuns.length === 0 && notFoundRuns.length > 0) {
|
|
1306
|
-
|
|
1307
|
-
'
|
|
1629
|
+
addValidationIssue(
|
|
1630
|
+
'could not run any validation command (command not found). Use "bun test" or another available test runner.',
|
|
1308
1631
|
);
|
|
1309
1632
|
}
|
|
1310
1633
|
if (
|
|
1311
1634
|
isTestTask &&
|
|
1312
1635
|
!validationRuns.some((run) => /\b(test|pytest|coverage|vitest|jest)\b/i.test(run.command))
|
|
1313
1636
|
) {
|
|
1314
|
-
|
|
1637
|
+
addValidationIssue("did not execute a recognizable test command.");
|
|
1315
1638
|
}
|
|
1316
1639
|
}
|
|
1317
|
-
|
|
1640
|
+
requiredValidationFailures = collectRequiredValidationFailures(
|
|
1318
1641
|
requiredRunnableSteps,
|
|
1319
1642
|
validationRuns,
|
|
1320
1643
|
);
|
|
1321
1644
|
if (requiredValidationFailures.length > 0) {
|
|
1322
|
-
|
|
1645
|
+
addValidationIssue(
|
|
1323
1646
|
`Required vision.md validation failed: ${requiredValidationFailures.join("; ")}`,
|
|
1324
1647
|
);
|
|
1325
1648
|
}
|
|
1326
|
-
|
|
1649
|
+
}
|
|
1650
|
+
const blocker = qualityGatePolicy.validationGateEnabled
|
|
1651
|
+
? detectValidationBlocker(validationRuns)
|
|
1652
|
+
: null;
|
|
1653
|
+
const scopedValidationFailure = qualityGatePolicy.validationGateEnabled
|
|
1654
|
+
? classifyValidationFailureScope(validationRuns, planning, changedPaths, targetPath)
|
|
1655
|
+
: "none";
|
|
1656
|
+
if (scopedValidationFailure === "outside_task_scope") {
|
|
1657
|
+
onLog?.(
|
|
1658
|
+
"stderr",
|
|
1659
|
+
"[ValidationGate] Required validation failures appear outside the task write scope; treating them as publish blockers, not repair instructions.",
|
|
1660
|
+
);
|
|
1661
|
+
}
|
|
1327
1662
|
|
|
1328
1663
|
return {
|
|
1329
1664
|
ok: issues.length === 0 && blocker === null,
|
|
1330
1665
|
skipped: false,
|
|
1331
1666
|
issues,
|
|
1667
|
+
scopeIssues,
|
|
1668
|
+
validationIssues,
|
|
1332
1669
|
changedPaths,
|
|
1333
1670
|
changedTestPaths,
|
|
1334
1671
|
validationRuns,
|
|
1335
1672
|
requiredValidationFailures,
|
|
1336
1673
|
blocker,
|
|
1674
|
+
validationFailureScope: scopedValidationFailure,
|
|
1337
1675
|
};
|
|
1338
1676
|
}
|
|
1339
1677
|
|
|
@@ -1456,7 +1794,7 @@ async function runTaskCriticReview(
|
|
|
1456
1794
|
if (lowered.includes("response_format")) {
|
|
1457
1795
|
onLog?.(
|
|
1458
1796
|
"stdout",
|
|
1459
|
-
"[
|
|
1797
|
+
"[CriticGate] fallback: response_format json_object unsupported; retrying without strict response_format.",
|
|
1460
1798
|
);
|
|
1461
1799
|
request = await runCriticRequest(null);
|
|
1462
1800
|
}
|
|
@@ -1464,7 +1802,7 @@ async function runTaskCriticReview(
|
|
|
1464
1802
|
if (!request.response.ok) {
|
|
1465
1803
|
onLog?.(
|
|
1466
1804
|
"stderr",
|
|
1467
|
-
`[
|
|
1805
|
+
`[CriticGate] review request failed (${request.response.status}): ${toSingleLine(request.text, 240)}`,
|
|
1468
1806
|
);
|
|
1469
1807
|
return null;
|
|
1470
1808
|
}
|
|
@@ -1480,7 +1818,7 @@ async function runTaskCriticReview(
|
|
|
1480
1818
|
if (!reviewObj) {
|
|
1481
1819
|
onLog?.(
|
|
1482
1820
|
"stderr",
|
|
1483
|
-
`[
|
|
1821
|
+
`[CriticGate] produced non-JSON content; skipping critic gate. Raw: ${toSingleLine(
|
|
1484
1822
|
content,
|
|
1485
1823
|
220,
|
|
1486
1824
|
)}`,
|
|
@@ -1509,7 +1847,7 @@ async function runTaskCriticReview(
|
|
|
1509
1847
|
} catch (err) {
|
|
1510
1848
|
onLog?.(
|
|
1511
1849
|
"stderr",
|
|
1512
|
-
`[
|
|
1850
|
+
`[CriticGate] review unavailable: ${toSingleLine(err, 220)} (continuing without critic gate).`,
|
|
1513
1851
|
);
|
|
1514
1852
|
return null;
|
|
1515
1853
|
}
|
|
@@ -1520,6 +1858,8 @@ export function buildQualityRevisionHint(
|
|
|
1520
1858
|
critic: CriticReview | null,
|
|
1521
1859
|
planning: TaskExecutePlanning,
|
|
1522
1860
|
reviewFixContext?: ReviewFixContext | null,
|
|
1861
|
+
validationRuns: ValidationExecutionResult[] = [],
|
|
1862
|
+
validationBlocker: ValidationBlocker | null = null,
|
|
1523
1863
|
): string {
|
|
1524
1864
|
const lines: string[] = [];
|
|
1525
1865
|
lines.push("Quality revision required before completion.");
|
|
@@ -1552,6 +1892,26 @@ export function buildQualityRevisionHint(
|
|
|
1552
1892
|
lines.push("Deterministic quality issues:");
|
|
1553
1893
|
for (const issue of issues) lines.push(`- ${issue}`);
|
|
1554
1894
|
}
|
|
1895
|
+
if (validationBlocker) {
|
|
1896
|
+
lines.push(
|
|
1897
|
+
`Validation blocker: ${validationBlocker.category} - ${toSingleLine(
|
|
1898
|
+
validationBlocker.detail,
|
|
1899
|
+
300,
|
|
1900
|
+
)}`,
|
|
1901
|
+
);
|
|
1902
|
+
}
|
|
1903
|
+
const failedValidationRuns = validationRuns.filter((run) => !run.ok);
|
|
1904
|
+
if (failedValidationRuns.length > 0) {
|
|
1905
|
+
lines.push("Validation failure diagnostics:");
|
|
1906
|
+
for (const run of failedValidationRuns.slice(0, 5)) {
|
|
1907
|
+
lines.push(`- ${run.command} failed with exit ${run.exitCode} after ${run.elapsedMs}ms.`);
|
|
1908
|
+
const output = toSingleLine(
|
|
1909
|
+
stripAnsiControlSequences([run.stderr, run.stdout].filter(Boolean).join("\n")),
|
|
1910
|
+
700,
|
|
1911
|
+
);
|
|
1912
|
+
if (output) lines.push(` Output: ${output}`);
|
|
1913
|
+
}
|
|
1914
|
+
}
|
|
1555
1915
|
if (critic) {
|
|
1556
1916
|
lines.push(`Critic score: ${critic.score.toFixed(1)} / 10`);
|
|
1557
1917
|
if (critic.mustFix.length > 0) {
|
|
@@ -3318,9 +3678,10 @@ async function generateCommitMessageFromDiffViaCodex(
|
|
|
3318
3678
|
repo: string,
|
|
3319
3679
|
runtimeConfig: WorkerpalsRuntimeConfig,
|
|
3320
3680
|
): Promise<string | null> {
|
|
3681
|
+
const model = runtimeConfig.workerpals.llm.model.trim();
|
|
3682
|
+
if (!model) return null;
|
|
3321
3683
|
const codexPrefix = await resolveCodexCommandPrefix(repo, runtimeConfig.workerpals.llm.codexBin);
|
|
3322
3684
|
if (!codexPrefix) return null;
|
|
3323
|
-
const model = runtimeConfig.workerpals.llm.model.trim();
|
|
3324
3685
|
const timeoutMs = (() => {
|
|
3325
3686
|
const value = Number(runtimeConfig.workerpals.llm.codexTimeoutMs);
|
|
3326
3687
|
if (!Number.isFinite(value)) return 120_000;
|
|
@@ -3355,6 +3716,7 @@ async function generateCommitMessageFromDiffViaCodex(
|
|
|
3355
3716
|
const stdinText = `${prompt.systemPrompt}\n\n${prompt.userMessage}`;
|
|
3356
3717
|
const proc = Bun.spawn(cmd, {
|
|
3357
3718
|
cwd: repo,
|
|
3719
|
+
env: buildWorkerSandboxWritableEnv(repo),
|
|
3358
3720
|
stdout: "pipe",
|
|
3359
3721
|
stderr: "pipe",
|
|
3360
3722
|
stdin: new Blob([stdinText]),
|
|
@@ -3587,40 +3949,33 @@ function taskExecuteOrigin(params: Record<string, unknown>): "autonomy" | "user"
|
|
|
3587
3949
|
return "user";
|
|
3588
3950
|
}
|
|
3589
3951
|
|
|
3590
|
-
|
|
3591
|
-
|
|
3952
|
+
function collectWriteScopeIssuesFromChangedPaths(
|
|
3953
|
+
changedPaths: string[],
|
|
3592
3954
|
planning: TaskExecutePlanning,
|
|
3593
|
-
):
|
|
3955
|
+
): string[] {
|
|
3594
3956
|
const writeGlobs = toStringArray(planning.scope.writeGlobs ?? []);
|
|
3595
|
-
if (writeGlobs.length === 0) return
|
|
3596
|
-
|
|
3597
|
-
const statusResult = await git(repo, ["status", "--porcelain"]);
|
|
3598
|
-
if (!statusResult.ok) {
|
|
3599
|
-
return { warnings: ["Unable to evaluate changed paths for scope suggestion check."] };
|
|
3600
|
-
}
|
|
3957
|
+
if (writeGlobs.length === 0) return [];
|
|
3601
3958
|
|
|
3602
|
-
const
|
|
3959
|
+
const normalizedChangedPaths = changedPaths
|
|
3603
3960
|
.map((entry) => normalizeStagePath(entry))
|
|
3604
3961
|
.filter((entry): entry is string => Boolean(entry) && entry !== ".");
|
|
3605
|
-
if (
|
|
3962
|
+
if (normalizedChangedPaths.length === 0) return [];
|
|
3606
3963
|
|
|
3607
3964
|
const forbidden = toStringArray(planning.scope.forbiddenGlobs ?? []);
|
|
3608
|
-
const
|
|
3609
|
-
const outOfScope =
|
|
3965
|
+
const issues: string[] = [];
|
|
3966
|
+
const outOfScope = normalizedChangedPaths.filter(
|
|
3610
3967
|
(path) => !writeGlobs.some((glob) => matchesGlob(path, glob)),
|
|
3611
3968
|
);
|
|
3612
3969
|
if (outOfScope.length > 0) {
|
|
3613
|
-
|
|
3970
|
+
issues.push(`modified paths outside writeGlobs: ${outOfScope.join(", ")}`);
|
|
3614
3971
|
}
|
|
3615
|
-
const forbiddenTouched =
|
|
3972
|
+
const forbiddenTouched = normalizedChangedPaths.filter((path) =>
|
|
3616
3973
|
forbidden.some((glob) => matchesGlob(path, glob)),
|
|
3617
3974
|
);
|
|
3618
3975
|
if (forbiddenTouched.length > 0) {
|
|
3619
|
-
|
|
3620
|
-
`Scope suggestion: modified paths matching forbiddenGlobs: ${forbiddenTouched.join(", ")}`,
|
|
3621
|
-
);
|
|
3976
|
+
issues.push(`modified paths matching forbiddenGlobs: ${forbiddenTouched.join(", ")}`);
|
|
3622
3977
|
}
|
|
3623
|
-
return
|
|
3978
|
+
return issues;
|
|
3624
3979
|
}
|
|
3625
3980
|
|
|
3626
3981
|
function sanitizeTaskExecutePlanningPathHints(value: unknown): unknown {
|
|
@@ -3945,7 +4300,7 @@ async function runCodexCriticReview(
|
|
|
3945
4300
|
if (!codexPrefix) {
|
|
3946
4301
|
onLog?.(
|
|
3947
4302
|
"stderr",
|
|
3948
|
-
"[
|
|
4303
|
+
"[CriticGate] Codex: unable to resolve Codex CLI command (workerpals.llm.codex_bin/PATH); skipping.",
|
|
3949
4304
|
);
|
|
3950
4305
|
return null;
|
|
3951
4306
|
}
|
|
@@ -4026,6 +4381,7 @@ async function runCodexCriticReview(
|
|
|
4026
4381
|
try {
|
|
4027
4382
|
const proc = Bun.spawn(cmd, {
|
|
4028
4383
|
cwd: repo,
|
|
4384
|
+
env: buildWorkerSandboxWritableEnv(repo),
|
|
4029
4385
|
stdout: "pipe",
|
|
4030
4386
|
stderr: "pipe",
|
|
4031
4387
|
stdin: new Blob([criticInstruction]),
|
|
@@ -4045,14 +4401,14 @@ async function runCodexCriticReview(
|
|
|
4045
4401
|
clearTimeout(timer);
|
|
4046
4402
|
|
|
4047
4403
|
if (timedOut) {
|
|
4048
|
-
onLog?.("stderr", "[
|
|
4404
|
+
onLog?.("stderr", "[CriticGate] Codex timed out; skipping.");
|
|
4049
4405
|
return null;
|
|
4050
4406
|
}
|
|
4051
4407
|
if (exitCode !== 0) {
|
|
4052
4408
|
const stderrText = await new Response(proc.stderr).text();
|
|
4053
4409
|
onLog?.(
|
|
4054
4410
|
"stderr",
|
|
4055
|
-
`[
|
|
4411
|
+
`[CriticGate] Codex exited ${exitCode}: ${toSingleLine(stderrText, 220)}`,
|
|
4056
4412
|
);
|
|
4057
4413
|
return null;
|
|
4058
4414
|
}
|
|
@@ -4070,7 +4426,7 @@ async function runCodexCriticReview(
|
|
|
4070
4426
|
}
|
|
4071
4427
|
|
|
4072
4428
|
if (!lastMessage) {
|
|
4073
|
-
onLog?.("stderr", "[
|
|
4429
|
+
onLog?.("stderr", "[CriticGate] Codex: no output message captured; skipping.");
|
|
4074
4430
|
return null;
|
|
4075
4431
|
}
|
|
4076
4432
|
|
|
@@ -4078,7 +4434,7 @@ async function runCodexCriticReview(
|
|
|
4078
4434
|
if (!reviewObj) {
|
|
4079
4435
|
onLog?.(
|
|
4080
4436
|
"stderr",
|
|
4081
|
-
`[
|
|
4437
|
+
`[CriticGate] Codex returned non-JSON: ${toSingleLine(lastMessage, 220)}`,
|
|
4082
4438
|
);
|
|
4083
4439
|
return null;
|
|
4084
4440
|
}
|
|
@@ -4094,7 +4450,7 @@ async function runCodexCriticReview(
|
|
|
4094
4450
|
const revisionGuidance = String(reviewObj.revision_guidance ?? "")
|
|
4095
4451
|
.trim()
|
|
4096
4452
|
.slice(0, 2000);
|
|
4097
|
-
onLog?.("stdout", `[
|
|
4453
|
+
onLog?.("stdout", `[CriticGate] Codex score: ${score}/10`);
|
|
4098
4454
|
return {
|
|
4099
4455
|
score,
|
|
4100
4456
|
findings,
|
|
@@ -4103,7 +4459,7 @@ async function runCodexCriticReview(
|
|
|
4103
4459
|
raw: compactJobOutput(lastMessage, outputPolicyForRuntime(runtimeConfig)),
|
|
4104
4460
|
};
|
|
4105
4461
|
} catch (err) {
|
|
4106
|
-
onLog?.("stderr", `[
|
|
4462
|
+
onLog?.("stderr", `[CriticGate] Codex error: ${toSingleLine(err, 220)} (skipping).`);
|
|
4107
4463
|
return null;
|
|
4108
4464
|
}
|
|
4109
4465
|
}
|
|
@@ -4189,12 +4545,25 @@ export async function executeJob(
|
|
|
4189
4545
|
const reviewFixContext = extractReviewFixContext(normalizedParams);
|
|
4190
4546
|
const qualityGatePolicy = deriveQualityGatePolicy(normalizedParams, runtimeConfig);
|
|
4191
4547
|
const qualityMaxAutoRevisions = qualityGatePolicy.maxAutoRevisions;
|
|
4548
|
+
const qualityValidationMaxAutoRevisions = qualityGatePolicy.validationMaxAutoRevisions;
|
|
4549
|
+
const qualityRevisionLoopMax = Math.max(
|
|
4550
|
+
qualityMaxAutoRevisions,
|
|
4551
|
+
qualityValidationMaxAutoRevisions,
|
|
4552
|
+
);
|
|
4192
4553
|
const qualitySoftPassOnExhausted = qualityGatePolicy.softPassOnExhausted;
|
|
4193
4554
|
const qualityCriticMinScore = qualityGatePolicy.criticMinScore;
|
|
4194
4555
|
|
|
4195
4556
|
onLog?.(
|
|
4196
4557
|
"stdout",
|
|
4197
|
-
`[QualityGate] Policy: max_auto_revisions=${qualityMaxAutoRevisions}, soft_pass_on_exhausted=${qualitySoftPassOnExhausted ? "true" : "false"}, critic_min_score=${qualityCriticMinScore}`,
|
|
4558
|
+
`[QualityGate] Policy: max_auto_revisions=${qualityMaxAutoRevisions}, validation_max_auto_revisions=${qualityValidationMaxAutoRevisions}, soft_pass_on_exhausted=${qualitySoftPassOnExhausted ? "true" : "false"}, critic_min_score=${qualityCriticMinScore}`,
|
|
4559
|
+
);
|
|
4560
|
+
onLog?.(
|
|
4561
|
+
"stdout",
|
|
4562
|
+
`[QualityGate] Gates: scope=${qualityGatePolicy.scopeGateEnabled ? "on" : "off"}, validation=${
|
|
4563
|
+
qualityGatePolicy.validationGateEnabled ? "on" : "off"
|
|
4564
|
+
}, critic=${qualityGatePolicy.criticGateEnabled ? "on" : "off"}, publish=${
|
|
4565
|
+
qualityGatePolicy.publishGateEnabled ? "on" : "off"
|
|
4566
|
+
}`,
|
|
4198
4567
|
);
|
|
4199
4568
|
if (qualityGatePolicy.mode === "review_fix") {
|
|
4200
4569
|
const priorScore =
|
|
@@ -4218,7 +4587,8 @@ export async function executeJob(
|
|
|
4218
4587
|
|
|
4219
4588
|
let revisionAttempt = 0;
|
|
4220
4589
|
let revisionHint = "";
|
|
4221
|
-
|
|
4590
|
+
const previousValidationFailureDigests = new Map<string, string>();
|
|
4591
|
+
while (revisionAttempt <= qualityRevisionLoopMax) {
|
|
4222
4592
|
const attemptParams: Record<string, unknown> = { ...normalizedParams };
|
|
4223
4593
|
if (revisionHint) {
|
|
4224
4594
|
attemptParams.qualityRevisionHint = revisionHint;
|
|
@@ -4306,50 +4676,154 @@ export async function executeJob(
|
|
|
4306
4676
|
};
|
|
4307
4677
|
}
|
|
4308
4678
|
|
|
4309
|
-
const
|
|
4310
|
-
|
|
4311
|
-
|
|
4679
|
+
const quality = await runDeterministicQualityGate(
|
|
4680
|
+
repo,
|
|
4681
|
+
attemptParams,
|
|
4682
|
+
runtimeConfig,
|
|
4683
|
+
qualityGatePolicy,
|
|
4684
|
+
onLog,
|
|
4685
|
+
{
|
|
4686
|
+
previousFailureDigests: previousValidationFailureDigests,
|
|
4687
|
+
revisionAttempt,
|
|
4688
|
+
},
|
|
4689
|
+
);
|
|
4690
|
+
for (const run of quality.validationRuns) {
|
|
4691
|
+
if (run.ok) continue;
|
|
4692
|
+
const digest = extractValidationFailureDigest(run);
|
|
4693
|
+
if (digest) previousValidationFailureDigests.set(validationCommandKey(run.command), digest);
|
|
4312
4694
|
}
|
|
4313
|
-
|
|
4314
|
-
|
|
4315
|
-
const
|
|
4316
|
-
?
|
|
4317
|
-
|
|
4318
|
-
|
|
4319
|
-
|
|
4320
|
-
|
|
4695
|
+
const validationOutsideTaskScope =
|
|
4696
|
+
quality.validationFailureScope === "outside_task_scope";
|
|
4697
|
+
const qualityForCritic: DeterministicQualityResult = validationOutsideTaskScope
|
|
4698
|
+
? {
|
|
4699
|
+
...quality,
|
|
4700
|
+
issues: quality.issues.filter((issue) => !issue.startsWith("ValidationGate:")),
|
|
4701
|
+
validationIssues: [],
|
|
4702
|
+
validationRuns: [],
|
|
4703
|
+
blocker: null,
|
|
4704
|
+
}
|
|
4705
|
+
: quality;
|
|
4706
|
+
const critic =
|
|
4707
|
+
quality.skipped || !qualityGatePolicy.criticGateEnabled
|
|
4708
|
+
? null
|
|
4709
|
+
: executor === "openai_codex"
|
|
4710
|
+
? await runCodexCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog)
|
|
4711
|
+
: await runTaskCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog);
|
|
4712
|
+
if (!qualityGatePolicy.criticGateEnabled) {
|
|
4713
|
+
onLog?.("stdout", "[CriticGate] Disabled by workerpals.quality_critic_gate_enabled=false.");
|
|
4714
|
+
}
|
|
4715
|
+
const advisoryRelaxedQualityIssues = relaxAdvisoryQualityIssues(
|
|
4321
4716
|
quality.issues,
|
|
4322
4717
|
quality.validationRuns,
|
|
4323
4718
|
critic,
|
|
4324
4719
|
qualityCriticMinScore,
|
|
4325
4720
|
);
|
|
4326
|
-
|
|
4721
|
+
let effectiveQualityIssues = advisoryRelaxedQualityIssues;
|
|
4722
|
+
if (validationOutsideTaskScope) {
|
|
4723
|
+
effectiveQualityIssues = effectiveQualityIssues.filter(
|
|
4724
|
+
(issue) => !issue.startsWith("ValidationGate:"),
|
|
4725
|
+
);
|
|
4726
|
+
if (effectiveQualityIssues.length !== quality.issues.length) {
|
|
4727
|
+
onLog?.(
|
|
4728
|
+
"stderr",
|
|
4729
|
+
"[ValidationGate] Validation failures are outside the task scope; they will block publishing but will not drive another code revision.",
|
|
4730
|
+
);
|
|
4731
|
+
}
|
|
4732
|
+
}
|
|
4733
|
+
if (
|
|
4734
|
+
!validationOutsideTaskScope &&
|
|
4735
|
+
advisoryRelaxedQualityIssues.length !== quality.issues.length
|
|
4736
|
+
) {
|
|
4327
4737
|
onLog?.(
|
|
4328
4738
|
"stdout",
|
|
4329
4739
|
"[QualityGate] Assertion-balance heuristic downgraded to advisory because validation passed and critic score met threshold.",
|
|
4330
4740
|
);
|
|
4331
4741
|
}
|
|
4332
4742
|
const deterministicRequiresRevision =
|
|
4333
|
-
effectiveQualityIssues.length > 0 ||
|
|
4743
|
+
effectiveQualityIssues.length > 0 ||
|
|
4744
|
+
(quality.blocker !== null && !validationOutsideTaskScope);
|
|
4334
4745
|
const criticRequiresRevision = Boolean(critic && critic.score < qualityCriticMinScore);
|
|
4746
|
+
if (
|
|
4747
|
+
!qualityGatePolicy.publishGateEnabled &&
|
|
4748
|
+
(deterministicRequiresRevision || criticRequiresRevision)
|
|
4749
|
+
) {
|
|
4750
|
+
onLog?.(
|
|
4751
|
+
"stderr",
|
|
4752
|
+
"[PublishGate] Disabled by workerpals.quality_publish_gate_enabled=false; returning worker result despite gate failures.",
|
|
4753
|
+
);
|
|
4754
|
+
return {
|
|
4755
|
+
...result,
|
|
4756
|
+
summary: `${result.summary} (publish gate disabled; quality gate findings were advisory)`,
|
|
4757
|
+
stderr: truncate(
|
|
4758
|
+
[
|
|
4759
|
+
result.stderr ?? "",
|
|
4760
|
+
...quality.validationRuns.flatMap((run) => [run.stdout, run.stderr]).filter(Boolean),
|
|
4761
|
+
critic ? `Critic raw: ${critic.raw}` : "",
|
|
4762
|
+
]
|
|
4763
|
+
.filter(Boolean)
|
|
4764
|
+
.join("\n"),
|
|
4765
|
+
outputPolicyForRuntime(runtimeConfig),
|
|
4766
|
+
),
|
|
4767
|
+
exitCode: typeof result.exitCode === "number" ? result.exitCode : 0,
|
|
4768
|
+
};
|
|
4769
|
+
}
|
|
4335
4770
|
|
|
4336
4771
|
if (!deterministicRequiresRevision && !criticRequiresRevision) {
|
|
4772
|
+
if (quality.requiredValidationFailures.length > 0) {
|
|
4773
|
+
const requiredSummary = `Required vision.md validation blocked publishing: ${quality.requiredValidationFailures.join("; ")}`;
|
|
4774
|
+
const diagnostics = truncate(
|
|
4775
|
+
[
|
|
4776
|
+
result.stderr ?? "",
|
|
4777
|
+
validationOutsideTaskScope
|
|
4778
|
+
? "Validation failures appear outside the task write scope and are treated as pre-existing repo blockers."
|
|
4779
|
+
: "",
|
|
4780
|
+
...quality.validationRuns.flatMap((run) => [run.stdout, run.stderr]).filter(Boolean),
|
|
4781
|
+
]
|
|
4782
|
+
.filter(Boolean)
|
|
4783
|
+
.join("\n"),
|
|
4784
|
+
outputPolicyForRuntime(runtimeConfig),
|
|
4785
|
+
);
|
|
4786
|
+
onLog?.("stderr", `[QualityGate] ${requiredSummary}`);
|
|
4787
|
+
return {
|
|
4788
|
+
ok: false,
|
|
4789
|
+
summary: requiredSummary,
|
|
4790
|
+
stdout: result.stdout,
|
|
4791
|
+
stderr: diagnostics,
|
|
4792
|
+
exitCode: 4,
|
|
4793
|
+
};
|
|
4794
|
+
}
|
|
4337
4795
|
if (critic) {
|
|
4338
4796
|
onLog?.(
|
|
4339
4797
|
"stdout",
|
|
4340
|
-
`[
|
|
4798
|
+
`[CriticGate] review score ${critic.score.toFixed(1)}/10 (threshold ${qualityCriticMinScore}).`,
|
|
4341
4799
|
);
|
|
4342
4800
|
}
|
|
4343
4801
|
return result;
|
|
4344
4802
|
}
|
|
4345
4803
|
|
|
4804
|
+
const blockerIssue = quality.blocker
|
|
4805
|
+
? [
|
|
4806
|
+
`Validation blocker (${quality.blocker.category}): ${toSingleLine(
|
|
4807
|
+
quality.blocker.detail,
|
|
4808
|
+
240,
|
|
4809
|
+
)}`,
|
|
4810
|
+
]
|
|
4811
|
+
: [];
|
|
4346
4812
|
const issues = buildQualityGateRevisionIssues(
|
|
4347
|
-
effectiveQualityIssues,
|
|
4813
|
+
[...effectiveQualityIssues, ...blockerIssue],
|
|
4348
4814
|
critic,
|
|
4349
4815
|
qualityCriticMinScore,
|
|
4350
4816
|
);
|
|
4817
|
+
const activeMaxAutoRevisions = revisionLimitForQualityGateFailures({
|
|
4818
|
+
policy: qualityGatePolicy,
|
|
4819
|
+
qualityIssues: effectiveQualityIssues,
|
|
4820
|
+
requiredValidationFailures: validationOutsideTaskScope
|
|
4821
|
+
? []
|
|
4822
|
+
: quality.requiredValidationFailures,
|
|
4823
|
+
blocker: validationOutsideTaskScope ? null : quality.blocker,
|
|
4824
|
+
});
|
|
4351
4825
|
const issueSummary = issues.map((entry) => toSingleLine(entry, 180)).join(" | ");
|
|
4352
|
-
if (quality.blocker) {
|
|
4826
|
+
if (quality.blocker && !validationOutsideTaskScope) {
|
|
4353
4827
|
const blockerSummary = `Quality gate blocked by ${quality.blocker.category} issue: ${quality.blocker.detail}`;
|
|
4354
4828
|
const blockerDiagnostics = truncate(
|
|
4355
4829
|
[
|
|
@@ -4358,7 +4832,23 @@ export async function executeJob(
|
|
|
4358
4832
|
].join("\n"),
|
|
4359
4833
|
outputPolicyForRuntime(runtimeConfig),
|
|
4360
4834
|
);
|
|
4361
|
-
|
|
4835
|
+
const requiredValidationCanRevise = shouldReviseRequiredValidationBlocker({
|
|
4836
|
+
requiredValidationFailures: quality.requiredValidationFailures,
|
|
4837
|
+
blocker: quality.blocker,
|
|
4838
|
+
revisionAttempt,
|
|
4839
|
+
maxAutoRevisions: qualityValidationMaxAutoRevisions,
|
|
4840
|
+
outsideTaskScope: validationOutsideTaskScope,
|
|
4841
|
+
});
|
|
4842
|
+
if (requiredValidationCanRevise) {
|
|
4843
|
+
onLog?.(
|
|
4844
|
+
"stderr",
|
|
4845
|
+
`[QualityGate] Required vision.md validation hit a repo blocker; requesting revision ${
|
|
4846
|
+
revisionAttempt + 1
|
|
4847
|
+
}/${qualityValidationMaxAutoRevisions} instead of failing immediately: ${quality.requiredValidationFailures.join(
|
|
4848
|
+
"; ",
|
|
4849
|
+
)}`,
|
|
4850
|
+
);
|
|
4851
|
+
} else if (quality.requiredValidationFailures.length > 0) {
|
|
4362
4852
|
const requiredSummary = `Required vision.md validation blocked publishing: ${quality.requiredValidationFailures.join("; ")}`;
|
|
4363
4853
|
onLog?.("stderr", `[QualityGate] ${requiredSummary}`);
|
|
4364
4854
|
return {
|
|
@@ -4368,8 +4858,7 @@ export async function executeJob(
|
|
|
4368
4858
|
stderr: blockerDiagnostics,
|
|
4369
4859
|
exitCode: 4,
|
|
4370
4860
|
};
|
|
4371
|
-
}
|
|
4372
|
-
if (shouldSoftPassValidationBlocker(qualityGatePolicy, quality.blocker)) {
|
|
4861
|
+
} else if (shouldSoftPassValidationBlocker(qualityGatePolicy, quality.blocker)) {
|
|
4373
4862
|
onLog?.(
|
|
4374
4863
|
"stderr",
|
|
4375
4864
|
`[QualityGate] Soft-pass on ${quality.blocker.category} blocker for publishable ${qualityGatePolicy.mode} job: ${toSingleLine(
|
|
@@ -4385,17 +4874,18 @@ export async function executeJob(
|
|
|
4385
4874
|
stderr: blockerDiagnostics,
|
|
4386
4875
|
exitCode: typeof result.exitCode === "number" ? result.exitCode : 0,
|
|
4387
4876
|
};
|
|
4877
|
+
} else {
|
|
4878
|
+
onLog?.("stderr", `[QualityGate] ${blockerSummary}`);
|
|
4879
|
+
return {
|
|
4880
|
+
ok: false,
|
|
4881
|
+
summary: blockerSummary,
|
|
4882
|
+
stdout: result.stdout,
|
|
4883
|
+
stderr: blockerDiagnostics,
|
|
4884
|
+
exitCode: 4,
|
|
4885
|
+
};
|
|
4388
4886
|
}
|
|
4389
|
-
onLog?.("stderr", `[QualityGate] ${blockerSummary}`);
|
|
4390
|
-
return {
|
|
4391
|
-
ok: false,
|
|
4392
|
-
summary: blockerSummary,
|
|
4393
|
-
stdout: result.stdout,
|
|
4394
|
-
stderr: blockerDiagnostics,
|
|
4395
|
-
exitCode: 4,
|
|
4396
|
-
};
|
|
4397
4887
|
}
|
|
4398
|
-
if (revisionAttempt >=
|
|
4888
|
+
if (revisionAttempt >= activeMaxAutoRevisions) {
|
|
4399
4889
|
if (quality.requiredValidationFailures.length > 0) {
|
|
4400
4890
|
const diagnostics = truncate(
|
|
4401
4891
|
[
|
|
@@ -4456,10 +4946,17 @@ export async function executeJob(
|
|
|
4456
4946
|
}
|
|
4457
4947
|
|
|
4458
4948
|
revisionAttempt += 1;
|
|
4459
|
-
revisionHint = buildQualityRevisionHint(
|
|
4949
|
+
revisionHint = buildQualityRevisionHint(
|
|
4950
|
+
issues,
|
|
4951
|
+
critic,
|
|
4952
|
+
planning,
|
|
4953
|
+
reviewFixContext,
|
|
4954
|
+
validationOutsideTaskScope ? [] : quality.validationRuns,
|
|
4955
|
+
validationOutsideTaskScope ? null : quality.blocker,
|
|
4956
|
+
);
|
|
4460
4957
|
onLog?.(
|
|
4461
4958
|
"stderr",
|
|
4462
|
-
`[QualityGate] Quality gate requested revision ${revisionAttempt}/${
|
|
4959
|
+
`[QualityGate] Quality gate requested revision ${revisionAttempt}/${activeMaxAutoRevisions}: ${toSingleLine(
|
|
4463
4960
|
issueSummary,
|
|
4464
4961
|
260,
|
|
4465
4962
|
)}`,
|