@pushpalsdev/cli 1.0.79 → 1.0.81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +66 -3
- package/monitor-ui/+not-found.html +1 -1
- package/monitor-ui/_expo/static/js/web/{entry-c6862f701ea52ccf8692a6c9e749af5c.js → entry-e66b4de45f75e702ac16916082bcc9a5.js} +172 -171
- package/monitor-ui/_expo/static/js/web/{index-6013f9ebc87a963a55bb9137af1a5a06.js → index-ec13ec62e2b37ed3c5f6d324ef6784e1.js} +4 -4
- package/monitor-ui/_sitemap.html +1 -1
- package/monitor-ui/index.html +1 -1
- package/monitor-ui/modal.html +1 -1
- package/package.json +1 -1
- package/runtime/prompts/remotebuddy/autonomy_ideation_system_prompt.md +5 -3
- package/runtime/prompts/workerpals/openai_codex_default_system_prompt.md +1 -0
- package/runtime/prompts/workerpals/openai_codex_runtime_policy_appendix.md +1 -0
- package/runtime/prompts/workerpals/openai_codex_task_execute_system_prompt.md +1 -0
- package/runtime/sandbox/.pushpals-remotebuddy-fallback.js +560 -23
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +282 -33
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +113 -2
- package/runtime/sandbox/packages/shared/src/client_preflight.ts +2 -0
- package/runtime/sandbox/packages/shared/src/config.ts +1 -6
- package/runtime/sandbox/packages/shared/src/index.ts +19 -0
- package/runtime/sandbox/packages/shared/src/tooling.ts +422 -0
- package/runtime/sandbox/packages/shared/src/vision.ts +12 -0
- package/runtime/sandbox/prompts/workerpals/openai_codex_default_system_prompt.md +1 -0
- package/runtime/sandbox/prompts/workerpals/openai_codex_runtime_policy_appendix.md +1 -0
- package/runtime/sandbox/prompts/workerpals/openai_codex_task_execute_system_prompt.md +1 -0
- package/runtime/vision.example.md +125 -122
|
@@ -11,6 +11,7 @@ import {
|
|
|
11
11
|
explicitSourceControlCommitIdentityFromEnv,
|
|
12
12
|
loadPromptTemplate,
|
|
13
13
|
loadPushPalsConfig,
|
|
14
|
+
extractVisionKeyItems,
|
|
14
15
|
matchesGlob,
|
|
15
16
|
normalizeAutonomyComponentArea,
|
|
16
17
|
normalizeTargetPath,
|
|
@@ -34,7 +35,7 @@ import { extractMergeConflictReviewContext } from "./merge_conflict_job.js";
|
|
|
34
35
|
|
|
35
36
|
const DEFAULT_CONFIG = loadPushPalsConfig();
|
|
36
37
|
|
|
37
|
-
interface TaskExecutePlanning {
|
|
38
|
+
export interface TaskExecutePlanning {
|
|
38
39
|
intent: TaskExecuteIntent;
|
|
39
40
|
riskLevel: TaskExecuteRisk;
|
|
40
41
|
targetPaths?: string[];
|
|
@@ -52,6 +53,7 @@ interface TaskExecutePlanning {
|
|
|
52
53
|
};
|
|
53
54
|
acceptanceCriteria: string[];
|
|
54
55
|
validationSteps: string[];
|
|
56
|
+
requiredValidationSteps?: string[];
|
|
55
57
|
queuePriority: TaskExecutePriority;
|
|
56
58
|
queueWaitBudgetMs: number;
|
|
57
59
|
executionBudgetMs: number;
|
|
@@ -80,6 +82,7 @@ interface DeterministicQualityResult {
|
|
|
80
82
|
changedPaths: string[];
|
|
81
83
|
changedTestPaths: string[];
|
|
82
84
|
validationRuns: ValidationExecutionResult[];
|
|
85
|
+
requiredValidationFailures: string[];
|
|
83
86
|
blocker: ValidationBlocker | null;
|
|
84
87
|
}
|
|
85
88
|
|
|
@@ -728,11 +731,161 @@ function extractRunnableValidationCommand(step: string): string | null {
|
|
|
728
731
|
? trimmed.slice(8).trim()
|
|
729
732
|
: trimmed;
|
|
730
733
|
const firstToken = maybeStripped.split(/\s+/, 1)[0]?.toLowerCase() ?? "";
|
|
731
|
-
const runnable = new Set([
|
|
734
|
+
const runnable = new Set([
|
|
735
|
+
"bun",
|
|
736
|
+
"bunx",
|
|
737
|
+
"git",
|
|
738
|
+
"npm",
|
|
739
|
+
"npx",
|
|
740
|
+
"pnpm",
|
|
741
|
+
"yarn",
|
|
742
|
+
"node",
|
|
743
|
+
"pytest",
|
|
744
|
+
"python",
|
|
745
|
+
"python3",
|
|
746
|
+
"uv",
|
|
747
|
+
"coverage",
|
|
748
|
+
"vitest",
|
|
749
|
+
"jest",
|
|
750
|
+
"tsc",
|
|
751
|
+
"eslint",
|
|
752
|
+
"ruff",
|
|
753
|
+
"mypy",
|
|
754
|
+
"go",
|
|
755
|
+
"cargo",
|
|
756
|
+
"make",
|
|
757
|
+
"docker",
|
|
758
|
+
"pwsh",
|
|
759
|
+
"powershell",
|
|
760
|
+
"sh",
|
|
761
|
+
"bash",
|
|
762
|
+
]);
|
|
732
763
|
if (runnable.has(firstToken)) return maybeStripped;
|
|
733
764
|
return null;
|
|
734
765
|
}
|
|
735
766
|
|
|
767
|
+
function validationCommandKey(command: string): string {
|
|
768
|
+
return command.trim().replace(/\s+/g, " ").toLowerCase();
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
export function collectRequiredValidationFailures(
|
|
772
|
+
requiredCommands: string[],
|
|
773
|
+
validationRuns: Array<{ command: string; ok: boolean; exitCode?: number }>,
|
|
774
|
+
): string[] {
|
|
775
|
+
const requiredKeys = new Set(requiredCommands.map(validationCommandKey).filter(Boolean));
|
|
776
|
+
if (requiredKeys.size === 0) return [];
|
|
777
|
+
return validationRuns
|
|
778
|
+
.filter((run) => requiredKeys.has(validationCommandKey(run.command)) && !run.ok)
|
|
779
|
+
.map((run) => {
|
|
780
|
+
const exitCode = Number.isFinite(Number(run.exitCode)) ? Number(run.exitCode) : "unknown";
|
|
781
|
+
return `${run.command} exited ${exitCode}`;
|
|
782
|
+
});
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
export function extractRequiredValidationStepsFromVisionMarkdown(markdown: string): string[] {
|
|
786
|
+
const out: string[] = [];
|
|
787
|
+
const seen = new Set<string>();
|
|
788
|
+
for (const criterion of extractVisionKeyItems(markdown).testingCriteria) {
|
|
789
|
+
const command = extractRunnableValidationCommand(String(criterion ?? ""));
|
|
790
|
+
if (!command) continue;
|
|
791
|
+
const key = command.toLowerCase();
|
|
792
|
+
if (seen.has(key)) continue;
|
|
793
|
+
seen.add(key);
|
|
794
|
+
out.push(command);
|
|
795
|
+
if (out.length >= 12) break;
|
|
796
|
+
}
|
|
797
|
+
return out;
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
function loadRequiredValidationStepsFromVision(repo: string): string[] {
|
|
801
|
+
const visionPath = resolve(repo, "vision.md");
|
|
802
|
+
if (!existsSync(visionPath)) return [];
|
|
803
|
+
try {
|
|
804
|
+
return extractRequiredValidationStepsFromVisionMarkdown(readFileSync(visionPath, "utf8"));
|
|
805
|
+
} catch {
|
|
806
|
+
return [];
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
function resolveRequiredValidationSteps(
|
|
811
|
+
repo: string,
|
|
812
|
+
planning: TaskExecutePlanning,
|
|
813
|
+
): string[] {
|
|
814
|
+
return dedupeValidationCommands(
|
|
815
|
+
runnableValidationCommandsFromSteps(planning.requiredValidationSteps),
|
|
816
|
+
loadRequiredValidationStepsFromVision(repo),
|
|
817
|
+
).slice(0, 12);
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
function runnableValidationCommandsFromSteps(steps: string[] | undefined): string[] {
|
|
821
|
+
const out: string[] = [];
|
|
822
|
+
const seen = new Set<string>();
|
|
823
|
+
for (const step of steps ?? []) {
|
|
824
|
+
const command = extractRunnableValidationCommand(String(step ?? ""));
|
|
825
|
+
if (!command) continue;
|
|
826
|
+
const key = command.toLowerCase();
|
|
827
|
+
if (seen.has(key)) continue;
|
|
828
|
+
seen.add(key);
|
|
829
|
+
out.push(command);
|
|
830
|
+
}
|
|
831
|
+
return out;
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
function dedupeValidationCommands(...groups: string[][]): string[] {
|
|
835
|
+
const out: string[] = [];
|
|
836
|
+
const seen = new Set<string>();
|
|
837
|
+
for (const group of groups) {
|
|
838
|
+
for (const command of group) {
|
|
839
|
+
const trimmed = command.trim();
|
|
840
|
+
if (!trimmed) continue;
|
|
841
|
+
const key = trimmed.toLowerCase();
|
|
842
|
+
if (seen.has(key)) continue;
|
|
843
|
+
seen.add(key);
|
|
844
|
+
out.push(trimmed);
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
return out;
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
export function collectQualityGateValidationCommands(params: {
|
|
851
|
+
instruction: string;
|
|
852
|
+
targetPath?: string;
|
|
853
|
+
planning: TaskExecutePlanning;
|
|
854
|
+
changedTestPaths: string[];
|
|
855
|
+
isTestTask: boolean;
|
|
856
|
+
}): {
|
|
857
|
+
commandsToRun: string[];
|
|
858
|
+
requiredRunnableSteps: string[];
|
|
859
|
+
plannerRunnableSteps: string[];
|
|
860
|
+
fallbackValidationSteps: string[];
|
|
861
|
+
} {
|
|
862
|
+
const requiredRunnableSteps = runnableValidationCommandsFromSteps(
|
|
863
|
+
params.planning.requiredValidationSteps,
|
|
864
|
+
).slice(0, 12);
|
|
865
|
+
const plannerRunnableSteps = runnableValidationCommandsFromSteps(
|
|
866
|
+
params.planning.validationSteps,
|
|
867
|
+
).slice(0, 4);
|
|
868
|
+
const fallbackValidationSteps =
|
|
869
|
+
params.isTestTask && plannerRunnableSteps.length === 0
|
|
870
|
+
? inferFallbackValidationCommandsForTestTask(
|
|
871
|
+
params.instruction,
|
|
872
|
+
params.targetPath,
|
|
873
|
+
params.planning,
|
|
874
|
+
params.changedTestPaths,
|
|
875
|
+
)
|
|
876
|
+
: [];
|
|
877
|
+
const commandsToRun = dedupeValidationCommands(
|
|
878
|
+
requiredRunnableSteps,
|
|
879
|
+
plannerRunnableSteps.length > 0 ? plannerRunnableSteps : fallbackValidationSteps,
|
|
880
|
+
).slice(0, 16);
|
|
881
|
+
return {
|
|
882
|
+
commandsToRun,
|
|
883
|
+
requiredRunnableSteps,
|
|
884
|
+
plannerRunnableSteps,
|
|
885
|
+
fallbackValidationSteps,
|
|
886
|
+
};
|
|
887
|
+
}
|
|
888
|
+
|
|
736
889
|
export function inferFallbackValidationCommandsForTestTask(
|
|
737
890
|
instruction: string,
|
|
738
891
|
targetPath: string | undefined,
|
|
@@ -807,13 +960,6 @@ function isTestFocusedTask(
|
|
|
807
960
|
...(planning.discovery?.likelyDirs ?? []),
|
|
808
961
|
];
|
|
809
962
|
if (pathHints.some((entry) => isLikelyTestPath(entry))) return true;
|
|
810
|
-
if (
|
|
811
|
-
planning.validationSteps.some((entry) =>
|
|
812
|
-
/\b(test|tests|coverage|pytest|vitest|jest|bun test)\b/i.test(entry),
|
|
813
|
-
)
|
|
814
|
-
) {
|
|
815
|
-
return true;
|
|
816
|
-
}
|
|
817
963
|
if (
|
|
818
964
|
planning.acceptanceCriteria.some((entry) =>
|
|
819
965
|
/\b(test|tests|coverage|unit|integration|negative|invalid|valid)\b/i.test(entry),
|
|
@@ -857,8 +1003,13 @@ async function runDeterministicQualityGate(
|
|
|
857
1003
|
const instruction = String(params.instruction ?? "");
|
|
858
1004
|
const targetPath = String(params.targetPath ?? params.path ?? "").trim() || undefined;
|
|
859
1005
|
const planning = params.planning as TaskExecutePlanning;
|
|
1006
|
+
const requiredValidationSteps = resolveRequiredValidationSteps(repo, planning);
|
|
1007
|
+
if (requiredValidationSteps.length > 0) {
|
|
1008
|
+
planning.requiredValidationSteps = requiredValidationSteps;
|
|
1009
|
+
}
|
|
860
1010
|
const isTestTask = isTestFocusedTask(instruction, planning, targetPath);
|
|
861
|
-
|
|
1011
|
+
const hasRequiredValidationCriteria = requiredValidationSteps.length > 0;
|
|
1012
|
+
if (!isTestTask && !hasRequiredValidationCriteria) {
|
|
862
1013
|
return {
|
|
863
1014
|
ok: true,
|
|
864
1015
|
skipped: true,
|
|
@@ -866,6 +1017,7 @@ async function runDeterministicQualityGate(
|
|
|
866
1017
|
changedPaths: [],
|
|
867
1018
|
changedTestPaths: [],
|
|
868
1019
|
validationRuns: [],
|
|
1020
|
+
requiredValidationFailures: [],
|
|
869
1021
|
blocker: null,
|
|
870
1022
|
};
|
|
871
1023
|
}
|
|
@@ -891,20 +1043,18 @@ async function runDeterministicQualityGate(
|
|
|
891
1043
|
);
|
|
892
1044
|
}
|
|
893
1045
|
|
|
894
|
-
const
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
: [];
|
|
907
|
-
const commandsToRun = runnableSteps.length > 0 ? runnableSteps : fallbackValidationSteps;
|
|
1046
|
+
const {
|
|
1047
|
+
commandsToRun,
|
|
1048
|
+
requiredRunnableSteps,
|
|
1049
|
+
plannerRunnableSteps,
|
|
1050
|
+
fallbackValidationSteps,
|
|
1051
|
+
} = collectQualityGateValidationCommands({
|
|
1052
|
+
instruction,
|
|
1053
|
+
targetPath,
|
|
1054
|
+
planning,
|
|
1055
|
+
changedTestPaths,
|
|
1056
|
+
isTestTask,
|
|
1057
|
+
});
|
|
908
1058
|
const validationRuns: ValidationExecutionResult[] = [];
|
|
909
1059
|
const outputPolicy = outputPolicyForRuntime(runtimeConfig);
|
|
910
1060
|
const qualityValidationStepTimeoutMs = (() => {
|
|
@@ -912,12 +1062,25 @@ async function runDeterministicQualityGate(
|
|
|
912
1062
|
if (!Number.isFinite(value)) return 180_000;
|
|
913
1063
|
return Math.max(1_000, Math.min(7_200_000, Math.floor(value)));
|
|
914
1064
|
})();
|
|
1065
|
+
if (hasRequiredValidationCriteria && requiredRunnableSteps.length === 0) {
|
|
1066
|
+
issues.push(
|
|
1067
|
+
"vision.md testing criteria were provided, but none contained a runnable validation command.",
|
|
1068
|
+
);
|
|
1069
|
+
}
|
|
915
1070
|
if (commandsToRun.length === 0) {
|
|
916
1071
|
issues.push(
|
|
917
|
-
|
|
1072
|
+
hasRequiredValidationCriteria
|
|
1073
|
+
? "No runnable validation command was available from vision.md testing criteria or planning.validationSteps."
|
|
1074
|
+
: "No runnable validation command was provided in planning.validationSteps (expected at least one test command).",
|
|
918
1075
|
);
|
|
919
1076
|
} else {
|
|
920
|
-
if (
|
|
1077
|
+
if (requiredRunnableSteps.length > 0) {
|
|
1078
|
+
onLog?.(
|
|
1079
|
+
"stdout",
|
|
1080
|
+
`[QualityGate] Running required vision.md testing criteria: ${requiredRunnableSteps.join(" | ")}`,
|
|
1081
|
+
);
|
|
1082
|
+
}
|
|
1083
|
+
if (isTestTask && plannerRunnableSteps.length === 0 && fallbackValidationSteps.length > 0) {
|
|
921
1084
|
onLog?.(
|
|
922
1085
|
"stdout",
|
|
923
1086
|
`[QualityGate] No runnable planning.validationSteps found; using fallback validation command(s): ${commandsToRun.join(" | ")}`,
|
|
@@ -953,11 +1116,21 @@ async function runDeterministicQualityGate(
|
|
|
953
1116
|
);
|
|
954
1117
|
}
|
|
955
1118
|
if (
|
|
1119
|
+
isTestTask &&
|
|
956
1120
|
!validationRuns.some((run) => /\b(test|pytest|coverage|vitest|jest)\b/i.test(run.command))
|
|
957
1121
|
) {
|
|
958
1122
|
issues.push("Validation steps did not execute a recognizable test command.");
|
|
959
1123
|
}
|
|
960
1124
|
}
|
|
1125
|
+
const requiredValidationFailures = collectRequiredValidationFailures(
|
|
1126
|
+
requiredRunnableSteps,
|
|
1127
|
+
validationRuns,
|
|
1128
|
+
);
|
|
1129
|
+
if (requiredValidationFailures.length > 0) {
|
|
1130
|
+
issues.push(
|
|
1131
|
+
`Required vision.md validation failed: ${requiredValidationFailures.join("; ")}`,
|
|
1132
|
+
);
|
|
1133
|
+
}
|
|
961
1134
|
const blocker = detectValidationBlocker(validationRuns);
|
|
962
1135
|
|
|
963
1136
|
return {
|
|
@@ -967,6 +1140,7 @@ async function runDeterministicQualityGate(
|
|
|
967
1140
|
changedPaths,
|
|
968
1141
|
changedTestPaths,
|
|
969
1142
|
validationRuns,
|
|
1143
|
+
requiredValidationFailures,
|
|
970
1144
|
blocker,
|
|
971
1145
|
};
|
|
972
1146
|
}
|
|
@@ -1029,7 +1203,14 @@ async function runTaskCriticReview(
|
|
|
1029
1203
|
const acceptanceCriteriaText =
|
|
1030
1204
|
planning.acceptanceCriteria.map((entry) => `- ${entry}`).join("\n") || "- (none)";
|
|
1031
1205
|
const validationStepsText =
|
|
1032
|
-
|
|
1206
|
+
[
|
|
1207
|
+
...planning.validationSteps,
|
|
1208
|
+
...(planning.requiredValidationSteps ?? []).map(
|
|
1209
|
+
(entry) => `${entry} (required by vision.md testing criteria)`,
|
|
1210
|
+
),
|
|
1211
|
+
]
|
|
1212
|
+
.map((entry) => `- ${entry}`)
|
|
1213
|
+
.join("\n") || "- (none)";
|
|
1033
1214
|
const changedPathsText =
|
|
1034
1215
|
quality.changedPaths.map((entry) => `- ${entry}`).join("\n") || "- (none)";
|
|
1035
1216
|
const criticSystem = loadPromptTemplate("workerpals/task_quality_critic_system_prompt.md").trim();
|
|
@@ -1199,6 +1380,10 @@ export function buildQualityRevisionHint(
|
|
|
1199
1380
|
lines.push("Required validation steps:");
|
|
1200
1381
|
for (const step of planning.validationSteps) lines.push(`- ${step}`);
|
|
1201
1382
|
}
|
|
1383
|
+
if ((planning.requiredValidationSteps ?? []).length > 0) {
|
|
1384
|
+
lines.push("Required vision.md testing criteria:");
|
|
1385
|
+
for (const step of planning.requiredValidationSteps ?? []) lines.push(`- ${step}`);
|
|
1386
|
+
}
|
|
1202
1387
|
lines.push("Apply a minimal corrective patch, run focused validation, then finish.");
|
|
1203
1388
|
return lines.join("\n").slice(0, 6000);
|
|
1204
1389
|
}
|
|
@@ -1586,9 +1771,13 @@ export async function createJobCommit(
|
|
|
1586
1771
|
? parseChangedPathsFromNameOnlyOutput(cachedNameOnly.stdout)
|
|
1587
1772
|
: [];
|
|
1588
1773
|
const jobPlanning = job.params?.planning as Record<string, unknown> | undefined;
|
|
1589
|
-
const jobValidationSteps =
|
|
1590
|
-
|
|
1591
|
-
|
|
1774
|
+
const jobValidationSteps = [
|
|
1775
|
+
...toNonEmptyStringArray(job.params?.validationSteps),
|
|
1776
|
+
...toNonEmptyStringArray(job.params?.requiredValidationSteps),
|
|
1777
|
+
...toNonEmptyStringArray(jobPlanning?.validationSteps),
|
|
1778
|
+
...toNonEmptyStringArray(jobPlanning?.requiredValidationSteps),
|
|
1779
|
+
...loadRequiredValidationStepsFromVision(repo),
|
|
1780
|
+
];
|
|
1592
1781
|
const llmCommitMsg = await generateCommitMessageFromDiff(
|
|
1593
1782
|
diff,
|
|
1594
1783
|
{
|
|
@@ -2022,11 +2211,17 @@ export function isTestLikeValidationStep(step: string): boolean {
|
|
|
2022
2211
|
|
|
2023
2212
|
switch (tool) {
|
|
2024
2213
|
case "bun":
|
|
2214
|
+
case "bunx":
|
|
2025
2215
|
case "npm":
|
|
2216
|
+
case "npx":
|
|
2026
2217
|
case "pnpm":
|
|
2027
2218
|
case "yarn": {
|
|
2028
2219
|
// "bun test", "npm test", "yarn test"
|
|
2029
2220
|
if (hasToken("test")) return true;
|
|
2221
|
+
if (["bunx", "npx"].includes(tool)) {
|
|
2222
|
+
const runner = argv[1]?.toLowerCase() ?? "";
|
|
2223
|
+
if (runner === "vitest" || runner === "jest" || runner === "playwright") return true;
|
|
2224
|
+
}
|
|
2030
2225
|
const sub = argv[1]?.toLowerCase() ?? "";
|
|
2031
2226
|
// "bun run test:root", "npm run test:unit", "pnpm run test:integration"
|
|
2032
2227
|
if (sub === "run" && argv[2]?.toLowerCase().startsWith("test")) return true;
|
|
@@ -2045,9 +2240,14 @@ export function isTestLikeValidationStep(step: string): boolean {
|
|
|
2045
2240
|
case "jest":
|
|
2046
2241
|
return true;
|
|
2047
2242
|
case "python":
|
|
2243
|
+
case "python3":
|
|
2048
2244
|
return (
|
|
2049
2245
|
argv.length >= 3 && argv[1].toLowerCase() === "-m" && argv[2].toLowerCase() === "pytest"
|
|
2050
2246
|
);
|
|
2247
|
+
case "go":
|
|
2248
|
+
case "cargo":
|
|
2249
|
+
case "make":
|
|
2250
|
+
return hasToken("test");
|
|
2051
2251
|
case "coverage":
|
|
2052
2252
|
return hasToken("pytest");
|
|
2053
2253
|
default:
|
|
@@ -2069,9 +2269,13 @@ function buildCommitTestsBlock(params?: Record<string, unknown>): string {
|
|
|
2069
2269
|
|
|
2070
2270
|
const candidates = [
|
|
2071
2271
|
...toNonEmptyStringArray(params?.validationSteps),
|
|
2272
|
+
...toNonEmptyStringArray(params?.requiredValidationSteps),
|
|
2072
2273
|
...toNonEmptyStringArray(params?.validation_steps),
|
|
2274
|
+
...toNonEmptyStringArray(params?.required_validation_steps),
|
|
2073
2275
|
...toNonEmptyStringArray(planning?.validationSteps),
|
|
2276
|
+
...toNonEmptyStringArray(planning?.requiredValidationSteps),
|
|
2074
2277
|
...toNonEmptyStringArray(planning?.validation_steps),
|
|
2278
|
+
...toNonEmptyStringArray(planning?.required_validation_steps),
|
|
2075
2279
|
];
|
|
2076
2280
|
|
|
2077
2281
|
const seen = new Set<string>();
|
|
@@ -2499,9 +2703,13 @@ async function createMergeConflictJobCommit(
|
|
|
2499
2703
|
? parseChangedPathsFromNameOnlyOutput(cachedNameOnly.stdout)
|
|
2500
2704
|
: [];
|
|
2501
2705
|
const jobPlanning = job.params?.planning as Record<string, unknown> | undefined;
|
|
2502
|
-
const jobValidationSteps =
|
|
2503
|
-
|
|
2504
|
-
|
|
2706
|
+
const jobValidationSteps = [
|
|
2707
|
+
...toNonEmptyStringArray(job.params?.validationSteps),
|
|
2708
|
+
...toNonEmptyStringArray(job.params?.requiredValidationSteps),
|
|
2709
|
+
...toNonEmptyStringArray(jobPlanning?.validationSteps),
|
|
2710
|
+
...toNonEmptyStringArray(jobPlanning?.requiredValidationSteps),
|
|
2711
|
+
...loadRequiredValidationStepsFromVision(repo),
|
|
2712
|
+
];
|
|
2505
2713
|
const llmCommitMsg = await generateCommitMessageFromDiff(
|
|
2506
2714
|
diff,
|
|
2507
2715
|
{
|
|
@@ -3443,6 +3651,15 @@ function validateTaskExecutePlanning(
|
|
|
3443
3651
|
if (!isStringArray(planning.validationSteps)) {
|
|
3444
3652
|
return { ok: false, message: "task.execute planning.validationSteps must be a string array" };
|
|
3445
3653
|
}
|
|
3654
|
+
if (
|
|
3655
|
+
planning.requiredValidationSteps !== undefined &&
|
|
3656
|
+
!isStringArray(planning.requiredValidationSteps)
|
|
3657
|
+
) {
|
|
3658
|
+
return {
|
|
3659
|
+
ok: false,
|
|
3660
|
+
message: "task.execute planning.requiredValidationSteps must be a string array",
|
|
3661
|
+
};
|
|
3662
|
+
}
|
|
3446
3663
|
if ((planning.acceptanceCriteria as string[]).length === 0) {
|
|
3447
3664
|
return {
|
|
3448
3665
|
ok: false,
|
|
@@ -3949,6 +4166,17 @@ export async function executeJob(
|
|
|
3949
4166
|
].join("\n"),
|
|
3950
4167
|
outputPolicyForRuntime(runtimeConfig),
|
|
3951
4168
|
);
|
|
4169
|
+
if (quality.requiredValidationFailures.length > 0) {
|
|
4170
|
+
const requiredSummary = `Required vision.md validation blocked publishing: ${quality.requiredValidationFailures.join("; ")}`;
|
|
4171
|
+
onLog?.("stderr", `[QualityGate] ${requiredSummary}`);
|
|
4172
|
+
return {
|
|
4173
|
+
ok: false,
|
|
4174
|
+
summary: requiredSummary,
|
|
4175
|
+
stdout: result.stdout,
|
|
4176
|
+
stderr: blockerDiagnostics,
|
|
4177
|
+
exitCode: 4,
|
|
4178
|
+
};
|
|
4179
|
+
}
|
|
3952
4180
|
if (shouldSoftPassValidationBlocker(qualityGatePolicy, quality.blocker)) {
|
|
3953
4181
|
onLog?.(
|
|
3954
4182
|
"stderr",
|
|
@@ -3976,6 +4204,27 @@ export async function executeJob(
|
|
|
3976
4204
|
};
|
|
3977
4205
|
}
|
|
3978
4206
|
if (revisionAttempt >= qualityMaxAutoRevisions) {
|
|
4207
|
+
if (quality.requiredValidationFailures.length > 0) {
|
|
4208
|
+
const diagnostics = truncate(
|
|
4209
|
+
[
|
|
4210
|
+
result.stderr ?? "",
|
|
4211
|
+
...quality.validationRuns.flatMap((run) => [run.stdout, run.stderr]).filter(Boolean),
|
|
4212
|
+
critic ? `Critic raw: ${critic.raw}` : "",
|
|
4213
|
+
]
|
|
4214
|
+
.filter(Boolean)
|
|
4215
|
+
.join("\n"),
|
|
4216
|
+
outputPolicyForRuntime(runtimeConfig),
|
|
4217
|
+
);
|
|
4218
|
+
const requiredSummary = `Required vision.md validation failed after ${revisionAttempt} auto-revision attempt(s): ${quality.requiredValidationFailures.join("; ")}`;
|
|
4219
|
+
onLog?.("stderr", `[QualityGate] ${requiredSummary}`);
|
|
4220
|
+
return {
|
|
4221
|
+
ok: false,
|
|
4222
|
+
summary: requiredSummary,
|
|
4223
|
+
stdout: result.stdout,
|
|
4224
|
+
stderr: diagnostics,
|
|
4225
|
+
exitCode: 4,
|
|
4226
|
+
};
|
|
4227
|
+
}
|
|
3979
4228
|
if (qualitySoftPassOnExhausted) {
|
|
3980
4229
|
const diagnostics = truncate(
|
|
3981
4230
|
[result.stderr ?? "", critic ? `Critic raw: ${critic.raw}` : ""]
|
|
@@ -29,6 +29,7 @@ import {
|
|
|
29
29
|
loadPushPalsConfig,
|
|
30
30
|
resolveLocalServerConnection,
|
|
31
31
|
resolveGitTokenForRemote,
|
|
32
|
+
createToolRunRecordFromFailure,
|
|
32
33
|
} from "shared";
|
|
33
34
|
import { resolveExecutor } from "./common/executor_backend.js";
|
|
34
35
|
import { Logger } from "./common/logger.js";
|
|
@@ -121,6 +122,95 @@ async function postJsonWithTimeout(
|
|
|
121
122
|
}
|
|
122
123
|
}
|
|
123
124
|
|
|
125
|
+
function inferFailureToolInvocation(result: JobResult): {
|
|
126
|
+
tool?: string;
|
|
127
|
+
argv?: string[];
|
|
128
|
+
commandLine?: string;
|
|
129
|
+
exitCode?: number | null;
|
|
130
|
+
} {
|
|
131
|
+
const combined = [result.summary, result.stdout, result.stderr, result.publishBlocked?.detail]
|
|
132
|
+
.map((part) => String(part ?? ""))
|
|
133
|
+
.join("\n");
|
|
134
|
+
if (/codex\s+--version/i.test(combined) || /openai_codex/i.test(combined)) {
|
|
135
|
+
return {
|
|
136
|
+
tool: "codex",
|
|
137
|
+
argv: /codex\s+--version/i.test(combined) ? ["codex", "--version"] : [],
|
|
138
|
+
commandLine: /codex\s+--version/i.test(combined) ? "codex --version" : undefined,
|
|
139
|
+
exitCode: result.exitCode ?? (/exit\s+127/i.test(combined) ? 127 : null),
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
if (/git\s+pull\s+--rebase/i.test(combined)) {
|
|
143
|
+
return {
|
|
144
|
+
tool: "git",
|
|
145
|
+
argv: ["git", "pull", "--rebase"],
|
|
146
|
+
commandLine: "git pull --rebase",
|
|
147
|
+
exitCode: result.exitCode ?? null,
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
if (/\bgit\b/i.test(combined) && /\b(rebase|cherry-pick|checkout|push)\b/i.test(combined)) {
|
|
151
|
+
return { tool: "git", argv: [], exitCode: result.exitCode ?? null };
|
|
152
|
+
}
|
|
153
|
+
if (/\bdocker\b/i.test(combined) || /docker_engine/i.test(combined)) {
|
|
154
|
+
return { tool: "docker", argv: [], exitCode: result.exitCode ?? null };
|
|
155
|
+
}
|
|
156
|
+
if (/\bbun\b/i.test(combined)) {
|
|
157
|
+
return { tool: "bun", argv: [], exitCode: result.exitCode ?? null };
|
|
158
|
+
}
|
|
159
|
+
return { exitCode: result.exitCode ?? null };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
async function reportToolRunForUnsuccessfulJob(args: {
|
|
163
|
+
opts: ReturnType<typeof parseArgs>;
|
|
164
|
+
headers: Record<string, string>;
|
|
165
|
+
job: { id: string; kind: string; sessionId?: string | null };
|
|
166
|
+
result: JobResult;
|
|
167
|
+
durationMs: number;
|
|
168
|
+
phase: string;
|
|
169
|
+
}): Promise<void> {
|
|
170
|
+
const invocation = inferFailureToolInvocation(args.result);
|
|
171
|
+
const record = createToolRunRecordFromFailure({
|
|
172
|
+
id: randomUUID(),
|
|
173
|
+
jobId: args.job.id,
|
|
174
|
+
workerId: args.opts.workerId,
|
|
175
|
+
sessionId: args.job.sessionId ?? null,
|
|
176
|
+
phase: args.phase || args.job.kind,
|
|
177
|
+
tool: invocation.tool,
|
|
178
|
+
argv: invocation.argv,
|
|
179
|
+
commandLine: invocation.commandLine,
|
|
180
|
+
stdout: args.result.stdout,
|
|
181
|
+
stderr: args.result.stderr ?? args.result.publishBlocked?.detail,
|
|
182
|
+
summary: args.result.summary,
|
|
183
|
+
detail: args.result.publishBlocked?.detail,
|
|
184
|
+
exitCode: invocation.exitCode,
|
|
185
|
+
durationMs: args.durationMs,
|
|
186
|
+
finishedAt: new Date().toISOString(),
|
|
187
|
+
envProfile: args.opts.docker ? "worker-container" : "worker-host",
|
|
188
|
+
cwd: args.opts.repo,
|
|
189
|
+
metadata: {
|
|
190
|
+
publishBlocked: Boolean(args.result.publishBlocked),
|
|
191
|
+
publishStage: args.result.publishBlocked?.stage ?? null,
|
|
192
|
+
},
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
if (record.failureClass === "unknown" && record.tool === "shell") return;
|
|
196
|
+
|
|
197
|
+
try {
|
|
198
|
+
const response = await postJsonWithTimeout(`${args.opts.server}/tool-runs`, args.headers, record, 5_000);
|
|
199
|
+
if (!response.ok) {
|
|
200
|
+
const detail = await response.text().catch(() => "");
|
|
201
|
+
console.warn(
|
|
202
|
+
`[WorkerPals] Failed to record tool run telemetry for job ${args.job.id}: ${response.status} ${detail}`,
|
|
203
|
+
);
|
|
204
|
+
}
|
|
205
|
+
} catch (error) {
|
|
206
|
+
console.warn(
|
|
207
|
+
`[WorkerPals] Failed to record tool run telemetry for job ${args.job.id}: ${
|
|
208
|
+
error instanceof Error ? error.message : String(error)
|
|
209
|
+
}`,
|
|
210
|
+
);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
124
214
|
function buildWorkerLlmUsageEvent(
|
|
125
215
|
job: {
|
|
126
216
|
kind: string;
|
|
@@ -602,10 +692,15 @@ function inferChangedPaths(params: Record<string, unknown> | undefined): string[
|
|
|
602
692
|
function inferValidationSteps(params: Record<string, unknown> | undefined): string[] {
|
|
603
693
|
if (!params || !params.planning || typeof params.planning !== "object") return [];
|
|
604
694
|
const planning = params.planning as Record<string, unknown>;
|
|
605
|
-
if (!Array.isArray(planning.validationSteps)) return [];
|
|
606
695
|
const out: string[] = [];
|
|
607
696
|
const seen = new Set<string>();
|
|
608
|
-
|
|
697
|
+
const candidates = [
|
|
698
|
+
...(Array.isArray(planning.validationSteps) ? planning.validationSteps : []),
|
|
699
|
+
...(Array.isArray(planning.requiredValidationSteps)
|
|
700
|
+
? planning.requiredValidationSteps.map((step) => `${step} (required by vision.md)`)
|
|
701
|
+
: []),
|
|
702
|
+
];
|
|
703
|
+
for (const raw of candidates) {
|
|
609
704
|
if (typeof raw !== "string") continue;
|
|
610
705
|
const step = sanitizePrText(raw, 200);
|
|
611
706
|
if (!step || seen.has(step)) continue;
|
|
@@ -1421,6 +1516,14 @@ async function workerLoop(
|
|
|
1421
1516
|
|
|
1422
1517
|
let statusPersistedToServer = false;
|
|
1423
1518
|
if (result.publishBlocked) {
|
|
1519
|
+
await reportToolRunForUnsuccessfulJob({
|
|
1520
|
+
opts,
|
|
1521
|
+
headers,
|
|
1522
|
+
job,
|
|
1523
|
+
result,
|
|
1524
|
+
durationMs: jobDurationMs,
|
|
1525
|
+
phase: `publish:${result.publishBlocked.stage}`,
|
|
1526
|
+
});
|
|
1424
1527
|
const response = await postJsonWithTimeout(
|
|
1425
1528
|
`${opts.server}/jobs/${job.id}/publish-blocked`,
|
|
1426
1529
|
headers,
|
|
@@ -1464,6 +1567,14 @@ async function workerLoop(
|
|
|
1464
1567
|
`[WorkerPals] Job ${job.id} completed in ${formatDurationMs(jobDurationMs)}: ${result.summary}`,
|
|
1465
1568
|
);
|
|
1466
1569
|
} else {
|
|
1570
|
+
await reportToolRunForUnsuccessfulJob({
|
|
1571
|
+
opts,
|
|
1572
|
+
headers,
|
|
1573
|
+
job,
|
|
1574
|
+
result,
|
|
1575
|
+
durationMs: jobDurationMs,
|
|
1576
|
+
phase: job.kind,
|
|
1577
|
+
});
|
|
1467
1578
|
const response = await postJsonWithTimeout(`${opts.server}/jobs/${job.id}/fail`, headers, {
|
|
1468
1579
|
message: result.summary,
|
|
1469
1580
|
detail: redactSensitiveText(result.stderr ?? ""),
|
|
@@ -167,6 +167,8 @@ export function evaluateClientRuntimePreflight(
|
|
|
167
167
|
message:
|
|
168
168
|
"Missing required autonomy vision file: vision.md " +
|
|
169
169
|
"(required when remotebuddy.autonomy.enabled=true).",
|
|
170
|
+
detail:
|
|
171
|
+
"Run `pushpals --create_vision_md` to create a starter vision.md, then edit it for this repo.",
|
|
170
172
|
copyCommands: existsSync(visionTemplatePath)
|
|
171
173
|
? buildCopyCommands(projectRoot, visionTemplatePath, visionPath)
|
|
172
174
|
: undefined,
|
|
@@ -869,12 +869,7 @@ export function loadPushPalsConfig(options: LoadOptions = {}): PushPalsConfig {
|
|
|
869
869
|
for (const [rawKey, rawValue] of Object.entries(remoteAutonomyDispatchByComponentRaw)) {
|
|
870
870
|
const canonical = coerceAutonomyComponentConfigKey(rawKey);
|
|
871
871
|
if (!canonical) continue;
|
|
872
|
-
const parsed =
|
|
873
|
-
typeof rawValue === "number"
|
|
874
|
-
? rawValue
|
|
875
|
-
: typeof rawValue === "string"
|
|
876
|
-
? Number.parseInt(rawValue.trim(), 10)
|
|
877
|
-
: Number.NaN;
|
|
872
|
+
const parsed = rawValue;
|
|
878
873
|
remoteAutonomyDispatchByComponent[canonical] = Number.isFinite(parsed)
|
|
879
874
|
? Math.max(0, Math.floor(parsed))
|
|
880
875
|
: 0;
|
|
@@ -54,6 +54,25 @@ export {
|
|
|
54
54
|
type SourceControlCommitIdentitySource,
|
|
55
55
|
type SourceControlProvider,
|
|
56
56
|
} from "./source_control_api.js";
|
|
57
|
+
export {
|
|
58
|
+
DEFAULT_TOOL_REGISTRY,
|
|
59
|
+
TOOL_RUN_TAIL_CHARS,
|
|
60
|
+
classifyToolFailure,
|
|
61
|
+
createToolRunRecordFromFailure,
|
|
62
|
+
inferToolNameFromFailureText,
|
|
63
|
+
normalizeToolName,
|
|
64
|
+
redactToolText,
|
|
65
|
+
resolveToolKind,
|
|
66
|
+
truncateToolText,
|
|
67
|
+
type ToolAdapter,
|
|
68
|
+
type ToolEffect,
|
|
69
|
+
type ToolFailureClass,
|
|
70
|
+
type ToolFailureClassification,
|
|
71
|
+
type ToolFailureInput,
|
|
72
|
+
type ToolKind,
|
|
73
|
+
type ToolRegistry,
|
|
74
|
+
type ToolRunRecord,
|
|
75
|
+
} from "./tooling.js";
|
|
57
76
|
export {
|
|
58
77
|
DEFAULT_WORKERPALS_EXECUTOR,
|
|
59
78
|
invalidatePushPalsConfigCache,
|