bosun 0.41.2 → 0.41.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +1 -1
- package/agent/agent-pool.mjs +9 -2
- package/agent/agent-prompt-catalog.mjs +971 -0
- package/agent/agent-prompts.mjs +2 -970
- package/agent/agent-supervisor.mjs +119 -6
- package/agent/autofix-git.mjs +33 -0
- package/agent/autofix-prompts.mjs +151 -0
- package/agent/autofix.mjs +11 -175
- package/agent/bosun-skills.mjs +3 -2
- package/bosun.config.example.json +17 -0
- package/bosun.schema.json +87 -188
- package/cli.mjs +34 -1
- package/config/config-doctor.mjs +5 -250
- package/config/config-file-names.mjs +5 -0
- package/config/config.mjs +89 -493
- package/config/executor-config.mjs +493 -0
- package/config/repo-root.mjs +1 -2
- package/config/workspace-health.mjs +242 -0
- package/git/git-safety.mjs +15 -0
- package/github/github-oauth-portal.mjs +46 -0
- package/infra/library-manager-utils.mjs +22 -0
- package/infra/library-manager-well-known-sources.mjs +578 -0
- package/infra/library-manager.mjs +512 -1030
- package/infra/monitor.mjs +35 -9
- package/infra/session-tracker.mjs +10 -7
- package/kanban/kanban-adapter.mjs +17 -1
- package/lib/codebase-audit-manifests.mjs +117 -0
- package/lib/codebase-audit.mjs +18 -115
- package/package.json +18 -3
- package/server/setup-web-server.mjs +58 -5
- package/server/ui-server.mjs +1394 -79
- package/shell/codex-config-file.mjs +178 -0
- package/shell/codex-config.mjs +538 -575
- package/task/task-cli.mjs +54 -3
- package/task/task-executor.mjs +143 -13
- package/task/task-store.mjs +409 -1
- package/telegram/telegram-bot.mjs +127 -0
- package/tools/apply-pr-suggestions.mjs +401 -0
- package/tools/syntax-check.mjs +28 -9
- package/ui/app.js +3 -14
- package/ui/components/kanban-board.js +227 -4
- package/ui/components/session-list.js +85 -5
- package/ui/demo-defaults.js +338 -84
- package/ui/demo.html +155 -0
- package/ui/modules/session-api.js +96 -0
- package/ui/modules/settings-schema.js +1 -2
- package/ui/modules/state.js +43 -3
- package/ui/setup.html +4 -5
- package/ui/styles/components.css +58 -4
- package/ui/tabs/agents.js +12 -15
- package/ui/tabs/control.js +1 -0
- package/ui/tabs/library.js +484 -22
- package/ui/tabs/manual-flows.js +105 -29
- package/ui/tabs/tasks.js +848 -141
- package/ui/tabs/telemetry.js +129 -11
- package/ui/tabs/workflow-canvas-utils.mjs +130 -0
- package/ui/tabs/workflows.js +293 -23
- package/voice/voice-tool-definitions.mjs +757 -0
- package/voice/voice-tools.mjs +34 -778
- package/workflow/manual-flow-audit.mjs +165 -0
- package/workflow/manual-flows.mjs +164 -259
- package/workflow/workflow-engine.mjs +147 -58
- package/workflow/workflow-nodes/definitions.mjs +1207 -0
- package/workflow/workflow-nodes/transforms.mjs +612 -0
- package/workflow/workflow-nodes.mjs +358 -63
- package/workflow/workflow-templates.mjs +313 -191
- package/workflow-templates/_helpers.mjs +154 -0
- package/workflow-templates/agents.mjs +61 -4
- package/workflow-templates/code-quality.mjs +7 -7
- package/workflow-templates/github.mjs +20 -10
- package/workflow-templates/task-batch.mjs +44 -11
- package/workflow-templates/task-lifecycle.mjs +31 -6
- package/workspace/worktree-manager.mjs +277 -3
|
@@ -30,6 +30,12 @@
|
|
|
30
30
|
*/
|
|
31
31
|
|
|
32
32
|
const TAG = "[agent-supervisor]";
|
|
33
|
+
const API_ERROR_CONTINUE_COOLDOWNS_MS = Object.freeze([
|
|
34
|
+
3 * 60_000,
|
|
35
|
+
5 * 60_000,
|
|
36
|
+
5 * 60_000,
|
|
37
|
+
]);
|
|
38
|
+
const API_ERROR_RECOVERY_RESET_MS = 15 * 60_000;
|
|
33
39
|
|
|
34
40
|
// ── Situation Types (30+ edge cases) ────────────────────────────────────────
|
|
35
41
|
|
|
@@ -140,7 +146,7 @@ const INTERVENTION_LADDER = {
|
|
|
140
146
|
[SITUATION.PRE_PUSH_FAILURE]: [INTERVENTION.INJECT_PROMPT, INTERVENTION.INJECT_PROMPT, INTERVENTION.FORCE_NEW_THREAD, INTERVENTION.BLOCK_AND_NOTIFY],
|
|
141
147
|
|
|
142
148
|
[SITUATION.RATE_LIMITED]: [INTERVENTION.COOLDOWN, INTERVENTION.COOLDOWN, INTERVENTION.PAUSE_EXECUTOR],
|
|
143
|
-
[SITUATION.API_ERROR]: [INTERVENTION.
|
|
149
|
+
[SITUATION.API_ERROR]: [INTERVENTION.FORCE_NEW_THREAD, INTERVENTION.REDISPATCH_TASK, INTERVENTION.BLOCK_AND_NOTIFY],
|
|
144
150
|
[SITUATION.TOKEN_OVERFLOW]: [INTERVENTION.FORCE_NEW_THREAD, INTERVENTION.FORCE_NEW_THREAD, INTERVENTION.BLOCK_AND_NOTIFY],
|
|
145
151
|
[SITUATION.SESSION_EXPIRED]: [INTERVENTION.FORCE_NEW_THREAD, INTERVENTION.FORCE_NEW_THREAD, INTERVENTION.BLOCK_AND_NOTIFY],
|
|
146
152
|
[SITUATION.MODEL_ERROR]: [INTERVENTION.BLOCK_AND_NOTIFY], // Not retryable — wrong model name
|
|
@@ -263,9 +269,12 @@ const RECOVERY_PROMPTS = {
|
|
|
263
269
|
[SITUATION.REBASE_SPIRAL]: () =>
|
|
264
270
|
`You're stuck in a rebase loop. STOP rebasing and try:\n` +
|
|
265
271
|
`1. git rebase --abort\n` +
|
|
266
|
-
`2. git
|
|
267
|
-
`3.
|
|
268
|
-
`4.
|
|
272
|
+
`2. git stash (save any local changes)\n` +
|
|
273
|
+
`3. git fetch origin && git merge origin/main --no-edit\n` +
|
|
274
|
+
`4. If merge conflicts, resolve them manually\n` +
|
|
275
|
+
`5. git stash pop (if you stashed changes)\n` +
|
|
276
|
+
`6. Commit and push\n` +
|
|
277
|
+
`WARNING: Do NOT use git checkout -B to reset your branch to origin/main — this destroys all PR changes.`,
|
|
269
278
|
|
|
270
279
|
[SITUATION.THOUGHT_SPINNING]: () =>
|
|
271
280
|
`You've been reasoning without taking action. STOP thinking and START doing:\n` +
|
|
@@ -440,13 +449,15 @@ export class AgentSupervisor {
|
|
|
440
449
|
const signals = this._gatherSignals(taskId, context);
|
|
441
450
|
const situation = this._diagnose(signals, context);
|
|
442
451
|
const healthScore = this._computeHealthScore(signals);
|
|
452
|
+
const recoveryOverride = this._selectRecoveryIntervention(taskId, situation, context, state);
|
|
443
453
|
const attemptIndex = Math.min(
|
|
444
454
|
state.interventionCount,
|
|
445
455
|
(INTERVENTION_LADDER[situation] || [INTERVENTION.NONE]).length - 1,
|
|
446
456
|
);
|
|
447
|
-
const intervention =
|
|
457
|
+
const intervention = recoveryOverride?.intervention
|
|
458
|
+
|| (INTERVENTION_LADDER[situation] || [INTERVENTION.NONE])[attemptIndex];
|
|
448
459
|
const prompt = this._buildPrompt(situation, taskId, context);
|
|
449
|
-
const reason = this._buildReason(situation, signals, context);
|
|
460
|
+
const reason = recoveryOverride?.reason || this._buildReason(situation, signals, context);
|
|
450
461
|
|
|
451
462
|
// Record
|
|
452
463
|
state.situationHistory.push({ situation, ts: Date.now() });
|
|
@@ -482,6 +493,9 @@ export class AgentSupervisor {
|
|
|
482
493
|
break;
|
|
483
494
|
|
|
484
495
|
case INTERVENTION.CONTINUE_SIGNAL:
|
|
496
|
+
if (situation === SITUATION.API_ERROR) {
|
|
497
|
+
this._recordApiErrorContinue(taskId);
|
|
498
|
+
}
|
|
485
499
|
if (this._sendContinueSignal) {
|
|
486
500
|
this._sendContinueSignal(taskId);
|
|
487
501
|
}
|
|
@@ -722,6 +736,12 @@ export class AgentSupervisor {
|
|
|
722
736
|
qualityScore: state.qualityScore,
|
|
723
737
|
reviewVerdict: state.reviewVerdict,
|
|
724
738
|
reviewIssueCount: state.reviewIssues?.length || 0,
|
|
739
|
+
apiErrorRecovery: state.apiErrorRecovery
|
|
740
|
+
? {
|
|
741
|
+
...state.apiErrorRecovery,
|
|
742
|
+
cooldownRemainingMs: Math.max(0, Number(state.apiErrorRecovery.cooldownUntil || 0) - Date.now()),
|
|
743
|
+
}
|
|
744
|
+
: null,
|
|
725
745
|
recentSituations: state.situationHistory.slice(-10),
|
|
726
746
|
};
|
|
727
747
|
}
|
|
@@ -786,11 +806,104 @@ export class AgentSupervisor {
|
|
|
786
806
|
qualityScore: null,
|
|
787
807
|
reviewVerdict: null,
|
|
788
808
|
reviewIssues: null,
|
|
809
|
+
apiErrorRecovery: null,
|
|
789
810
|
});
|
|
790
811
|
}
|
|
791
812
|
return this._taskState.get(taskId);
|
|
792
813
|
}
|
|
793
814
|
|
|
815
|
+
_normalizeApiErrorSignature(context) {
|
|
816
|
+
const raw = String(context?.error || context?.output || "").trim().toLowerCase();
|
|
817
|
+
if (!raw) return "api_error";
|
|
818
|
+
return raw
|
|
819
|
+
.replace(/\s+/g, " ")
|
|
820
|
+
.replace(/\b\d{2,}\b/g, "#")
|
|
821
|
+
.slice(0, 240);
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
_selectRecoveryIntervention(taskId, situation, context, state) {
|
|
825
|
+
if (situation !== SITUATION.API_ERROR) {
|
|
826
|
+
if (state?.apiErrorRecovery) state.apiErrorRecovery = null;
|
|
827
|
+
return null;
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
const now = Date.now();
|
|
831
|
+
const signature = this._normalizeApiErrorSignature(context);
|
|
832
|
+
const current = state.apiErrorRecovery || {
|
|
833
|
+
signature,
|
|
834
|
+
continueAttempts: 0,
|
|
835
|
+
lastErrorAt: 0,
|
|
836
|
+
cooldownUntil: 0,
|
|
837
|
+
};
|
|
838
|
+
|
|
839
|
+
const shouldReset =
|
|
840
|
+
current.signature !== signature ||
|
|
841
|
+
(current.lastErrorAt > 0 && now - current.lastErrorAt > API_ERROR_RECOVERY_RESET_MS);
|
|
842
|
+
|
|
843
|
+
const nextState = shouldReset
|
|
844
|
+
? {
|
|
845
|
+
signature,
|
|
846
|
+
continueAttempts: 0,
|
|
847
|
+
lastErrorAt: now,
|
|
848
|
+
cooldownUntil: 0,
|
|
849
|
+
}
|
|
850
|
+
: {
|
|
851
|
+
...current,
|
|
852
|
+
signature,
|
|
853
|
+
lastErrorAt: now,
|
|
854
|
+
};
|
|
855
|
+
|
|
856
|
+
state.apiErrorRecovery = nextState;
|
|
857
|
+
|
|
858
|
+
if (Number(nextState.cooldownUntil || 0) > now) {
|
|
859
|
+
const remainingMs = Math.max(0, nextState.cooldownUntil - now);
|
|
860
|
+
return {
|
|
861
|
+
intervention: INTERVENTION.COOLDOWN,
|
|
862
|
+
reason: `Transient API failure on cooldown for ${Math.ceil(remainingMs / 60000)} minute(s) before retrying the same thread.`,
|
|
863
|
+
};
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
if (nextState.continueAttempts < API_ERROR_CONTINUE_COOLDOWNS_MS.length) {
|
|
867
|
+
const cooldownMs = API_ERROR_CONTINUE_COOLDOWNS_MS[nextState.continueAttempts];
|
|
868
|
+
return {
|
|
869
|
+
intervention: INTERVENTION.CONTINUE_SIGNAL,
|
|
870
|
+
reason: `Transient API failure — continue the current thread and back off for ${Math.ceil(cooldownMs / 60000)} minute(s) if it repeats.`,
|
|
871
|
+
};
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
const ladder = INTERVENTION_LADDER[SITUATION.API_ERROR] || [INTERVENTION.BLOCK_AND_NOTIFY];
|
|
875
|
+
const escalationIndex = Math.min(
|
|
876
|
+
nextState.continueAttempts - API_ERROR_CONTINUE_COOLDOWNS_MS.length,
|
|
877
|
+
ladder.length - 1,
|
|
878
|
+
);
|
|
879
|
+
const escalation = ladder[escalationIndex];
|
|
880
|
+
const escalationReason = escalation === INTERVENTION.FORCE_NEW_THREAD
|
|
881
|
+
? "Repeated API failures survived 3 continue attempts — forcing a fresh thread."
|
|
882
|
+
: escalation === INTERVENTION.REDISPATCH_TASK
|
|
883
|
+
? "Repeated API failures survived continue attempts and a fresh thread — redispatching the task."
|
|
884
|
+
: "Repeated API failures survived all automated recovery attempts — blocking for human review.";
|
|
885
|
+
return {
|
|
886
|
+
intervention: escalation,
|
|
887
|
+
reason: escalationReason,
|
|
888
|
+
};
|
|
889
|
+
}
|
|
890
|
+
|
|
891
|
+
_recordApiErrorContinue(taskId) {
|
|
892
|
+
const state = this._getTaskState(taskId);
|
|
893
|
+
if (!state?.apiErrorRecovery) return;
|
|
894
|
+
const attemptIndex = Math.min(
|
|
895
|
+
state.apiErrorRecovery.continueAttempts,
|
|
896
|
+
API_ERROR_CONTINUE_COOLDOWNS_MS.length - 1,
|
|
897
|
+
);
|
|
898
|
+
const cooldownMs = API_ERROR_CONTINUE_COOLDOWNS_MS[attemptIndex] || 0;
|
|
899
|
+
state.apiErrorRecovery = {
|
|
900
|
+
...state.apiErrorRecovery,
|
|
901
|
+
continueAttempts: Number(state.apiErrorRecovery.continueAttempts || 0) + 1,
|
|
902
|
+
cooldownUntil: cooldownMs > 0 ? Date.now() + cooldownMs : 0,
|
|
903
|
+
lastErrorAt: Date.now(),
|
|
904
|
+
};
|
|
905
|
+
}
|
|
906
|
+
|
|
794
907
|
_getTaskState(taskId) {
|
|
795
908
|
return this._taskState.get(taskId) || null;
|
|
796
909
|
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { execSync } from "node:child_process";
|
|
2
|
+
|
|
3
|
+
export function detectChangedFiles(repoRoot) {
|
|
4
|
+
try {
|
|
5
|
+
const output = execSync("git diff --name-only", {
|
|
6
|
+
cwd: repoRoot,
|
|
7
|
+
encoding: "utf8",
|
|
8
|
+
timeout: 10_000,
|
|
9
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
10
|
+
});
|
|
11
|
+
return output
|
|
12
|
+
.split(/\r?\n/)
|
|
13
|
+
.map((filePath) => filePath.trim())
|
|
14
|
+
.filter(Boolean);
|
|
15
|
+
} catch {
|
|
16
|
+
return [];
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function getChangeSummary(repoRoot, files) {
|
|
21
|
+
if (!files.length) return "(no file changes detected)";
|
|
22
|
+
try {
|
|
23
|
+
const diff = execSync("git diff --stat", {
|
|
24
|
+
cwd: repoRoot,
|
|
25
|
+
encoding: "utf8",
|
|
26
|
+
timeout: 10_000,
|
|
27
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
28
|
+
});
|
|
29
|
+
return diff.trim() || files.join(", ");
|
|
30
|
+
} catch {
|
|
31
|
+
return files.join(", ");
|
|
32
|
+
}
|
|
33
|
+
}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import { resolvePromptTemplate } from "./agent-prompts.mjs";
|
|
2
|
+
|
|
3
|
+
function buildRecentMessagesContext(recentMessages) {
|
|
4
|
+
if (!recentMessages || !recentMessages.length) return "";
|
|
5
|
+
const msgs = recentMessages.slice(-15);
|
|
6
|
+
return `
|
|
7
|
+
## Recent monitor notifications (for context — shows what led to this crash)
|
|
8
|
+
${msgs.map((message, index) => `[${index + 1}] ${message}`).join("\n")}
|
|
9
|
+
`;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export function buildFixPrompt(
|
|
13
|
+
error,
|
|
14
|
+
sourceContext,
|
|
15
|
+
reason,
|
|
16
|
+
recentMessages,
|
|
17
|
+
promptTemplate = "",
|
|
18
|
+
) {
|
|
19
|
+
const messagesCtx = buildRecentMessagesContext(recentMessages);
|
|
20
|
+
|
|
21
|
+
const fallback = `You are a PowerShell expert fixing a crash in a running orchestrator script.
|
|
22
|
+
|
|
23
|
+
## Error
|
|
24
|
+
Type: ${error.errorType}
|
|
25
|
+
File: ${error.file}
|
|
26
|
+
Line: ${error.line}${error.column ? `\nColumn: ${error.column}` : ""}
|
|
27
|
+
Message: ${error.message}${error.codeLine ? `\nFailing code: ${error.codeLine}` : ""}
|
|
28
|
+
Crash reason: ${reason}
|
|
29
|
+
|
|
30
|
+
## Source context around line ${error.line}
|
|
31
|
+
\`\`\`powershell
|
|
32
|
+
${sourceContext}
|
|
33
|
+
\`\`\`
|
|
34
|
+
${messagesCtx}
|
|
35
|
+
## Instructions
|
|
36
|
+
1. Read the file "${error.file}"
|
|
37
|
+
2. Identify the root cause of the error at line ${error.line}
|
|
38
|
+
3. Fix ONLY the bug — minimal change, don't refactor unrelated code
|
|
39
|
+
4. Common PowerShell pitfalls:
|
|
40
|
+
- \`+=\` on arrays with single items fails — use [List[object]] or @() wrapping
|
|
41
|
+
- \`$a + $b\` on PSObjects fails — iterate and add individually
|
|
42
|
+
- Pipeline output can be a single object, not an array — always wrap with @()
|
|
43
|
+
- \`$null.Method()\` crashes — add null guards
|
|
44
|
+
- Named mutex with "Global\\" prefix fails on non-elevated Windows — use plain names
|
|
45
|
+
- \`$Var:\` is treated as a scope-qualified variable — use \`\${Var}:\` to embed colon in string
|
|
46
|
+
- ParserError: check for syntax issues like unclosed brackets, bad string interpolation
|
|
47
|
+
5. Write the fix to the file. Do NOT create new files or refactor other functions.
|
|
48
|
+
6. Keep all existing functionality intact.`;
|
|
49
|
+
return resolvePromptTemplate(
|
|
50
|
+
promptTemplate,
|
|
51
|
+
{
|
|
52
|
+
ERROR_TYPE: error.errorType,
|
|
53
|
+
ERROR_FILE: error.file,
|
|
54
|
+
ERROR_LINE: error.line,
|
|
55
|
+
ERROR_COLUMN_LINE: error.column ? `Column: ${error.column}` : "",
|
|
56
|
+
ERROR_MESSAGE: error.message,
|
|
57
|
+
ERROR_CODE_LINE: error.codeLine ? `Failing code: ${error.codeLine}` : "",
|
|
58
|
+
CRASH_REASON: reason,
|
|
59
|
+
SOURCE_CONTEXT: sourceContext,
|
|
60
|
+
RECENT_MESSAGES_CONTEXT: messagesCtx,
|
|
61
|
+
},
|
|
62
|
+
fallback,
|
|
63
|
+
);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export function buildFallbackPrompt(
|
|
67
|
+
fallback,
|
|
68
|
+
recentMessages,
|
|
69
|
+
promptTemplate = "",
|
|
70
|
+
) {
|
|
71
|
+
const messagesCtx = buildRecentMessagesContext(recentMessages);
|
|
72
|
+
|
|
73
|
+
const defaultPrompt = `You are a PowerShell expert analyzing an orchestrator script crash.
|
|
74
|
+
No structured error was extracted — the process terminated with: ${fallback.reason}
|
|
75
|
+
|
|
76
|
+
## Error indicators from log tail
|
|
77
|
+
${fallback.errorLines.length > 0 ? fallback.errorLines.join("\n") : "(no explicit error lines detected — possible SIGKILL, OOM, or silent crash)"}
|
|
78
|
+
|
|
79
|
+
## Last ${Math.min(80, fallback.lineCount)} lines of crash log
|
|
80
|
+
\`\`\`
|
|
81
|
+
${fallback.tail}
|
|
82
|
+
\`\`\`
|
|
83
|
+
${messagesCtx}
|
|
84
|
+
## Instructions
|
|
85
|
+
1. Analyze the log for the root cause of the crash
|
|
86
|
+
2. The main orchestrator script is: scripts/bosun/ve-orchestrator.ps1
|
|
87
|
+
3. If you can identify a fixable bug, apply a minimal fix to the file
|
|
88
|
+
4. Common crash causes:
|
|
89
|
+
- PowerShell syntax errors (\$Var: treated as scope, missing brackets)
|
|
90
|
+
- Array/object operation errors (+=, +, pipeline single-item issues)
|
|
91
|
+
- Null reference errors on optional API responses
|
|
92
|
+
- Infinite loops or stack overflow from recursive calls
|
|
93
|
+
- Exit code 4294967295 = unsigned overflow from uncaught exception
|
|
94
|
+
5. If the crash is external (SIGKILL, OOM) with no code bug, do nothing
|
|
95
|
+
6. Write any fix directly to the file. Keep existing functionality intact.`;
|
|
96
|
+
return resolvePromptTemplate(
|
|
97
|
+
promptTemplate,
|
|
98
|
+
{
|
|
99
|
+
FALLBACK_REASON: fallback.reason,
|
|
100
|
+
FALLBACK_ERROR_LINES:
|
|
101
|
+
fallback.errorLines.length > 0
|
|
102
|
+
? fallback.errorLines.join("\n")
|
|
103
|
+
: "(no explicit error lines detected — possible SIGKILL, OOM, or silent crash)",
|
|
104
|
+
FALLBACK_LINE_COUNT: Math.min(80, fallback.lineCount),
|
|
105
|
+
FALLBACK_TAIL: fallback.tail,
|
|
106
|
+
RECENT_MESSAGES_CONTEXT: messagesCtx,
|
|
107
|
+
},
|
|
108
|
+
defaultPrompt,
|
|
109
|
+
);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
export function buildLoopPrompt(
|
|
113
|
+
errorLine,
|
|
114
|
+
repeatCount,
|
|
115
|
+
recentMessages,
|
|
116
|
+
promptTemplate = "",
|
|
117
|
+
) {
|
|
118
|
+
const messagesCtx = buildRecentMessagesContext(recentMessages);
|
|
119
|
+
|
|
120
|
+
const defaultPrompt = `You are a PowerShell expert fixing a loop bug in a running orchestrator script.
|
|
121
|
+
|
|
122
|
+
## Problem
|
|
123
|
+
The following error line is repeating ${repeatCount} times in the orchestrator output,
|
|
124
|
+
indicating an infinite retry loop that needs to be fixed:
|
|
125
|
+
|
|
126
|
+
"${errorLine}"
|
|
127
|
+
|
|
128
|
+
${messagesCtx}
|
|
129
|
+
|
|
130
|
+
## Instructions
|
|
131
|
+
1. The main script is: scripts/bosun/ve-orchestrator.ps1
|
|
132
|
+
2. Search for the code that produces this error message
|
|
133
|
+
3. Identify why it loops (missing break/continue/return, no state change between iterations, etc.)
|
|
134
|
+
4. Fix the loop by adding proper exit conditions, error handling, or state tracking
|
|
135
|
+
5. Common loop-causing patterns in this codebase:
|
|
136
|
+
- PR lifecycle handoff repeatedly retried with no diff between branch and base
|
|
137
|
+
- API calls returning the same error repeatedly with no backoff or give-up logic
|
|
138
|
+
- Status not updated after failure → next cycle tries the same thing
|
|
139
|
+
- Missing \`continue\` or state change in foreach loops over tracked attempts
|
|
140
|
+
6. Apply a minimal fix. Do NOT refactor unrelated code.
|
|
141
|
+
7. Write the fix directly to the file.`;
|
|
142
|
+
return resolvePromptTemplate(
|
|
143
|
+
promptTemplate,
|
|
144
|
+
{
|
|
145
|
+
REPEAT_COUNT: repeatCount,
|
|
146
|
+
ERROR_LINE: errorLine,
|
|
147
|
+
RECENT_MESSAGES_CONTEXT: messagesCtx,
|
|
148
|
+
},
|
|
149
|
+
defaultPrompt,
|
|
150
|
+
);
|
|
151
|
+
}
|
package/agent/autofix.mjs
CHANGED
|
@@ -27,14 +27,19 @@
|
|
|
27
27
|
* - Raw log fallback: when no structured errors found, feeds raw tail to Codex
|
|
28
28
|
*/
|
|
29
29
|
|
|
30
|
-
import { spawn
|
|
30
|
+
import { spawn } from "node:child_process";
|
|
31
31
|
import { existsSync, mkdirSync, createWriteStream, readFileSync } from "node:fs";
|
|
32
32
|
import { readFile, writeFile } from "node:fs/promises";
|
|
33
33
|
import { resolve, dirname } from "node:path";
|
|
34
34
|
import { fileURLToPath } from "node:url";
|
|
35
35
|
import { getConsoleLevel, LogLevel } from "../lib/logger.mjs";
|
|
36
36
|
import { isBenignErrorMention } from "../utils.mjs";
|
|
37
|
-
import {
|
|
37
|
+
import {
|
|
38
|
+
buildFallbackPrompt,
|
|
39
|
+
buildFixPrompt,
|
|
40
|
+
buildLoopPrompt,
|
|
41
|
+
} from "./autofix-prompts.mjs";
|
|
42
|
+
import { detectChangedFiles, getChangeSummary } from "./autofix-git.mjs";
|
|
38
43
|
import { resolveCodexProfileRuntime } from "../shell/codex-model-profiles.mjs";
|
|
39
44
|
|
|
40
45
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
@@ -693,41 +698,6 @@ export function runCodexExec(
|
|
|
693
698
|
* Detect which files were modified by comparing git status before/after.
|
|
694
699
|
* Returns array of changed file paths.
|
|
695
700
|
*/
|
|
696
|
-
function detectChangedFiles(repoRoot) {
|
|
697
|
-
try {
|
|
698
|
-
const output = execSync("git diff --name-only", {
|
|
699
|
-
cwd: repoRoot,
|
|
700
|
-
encoding: "utf8",
|
|
701
|
-
timeout: 10_000,
|
|
702
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
703
|
-
});
|
|
704
|
-
return output
|
|
705
|
-
.split(/\r?\n/)
|
|
706
|
-
.map((f) => f.trim())
|
|
707
|
-
.filter(Boolean);
|
|
708
|
-
} catch {
|
|
709
|
-
return [];
|
|
710
|
-
}
|
|
711
|
-
}
|
|
712
|
-
|
|
713
|
-
/**
|
|
714
|
-
* Get git diff summary for changed files (short, for Telegram).
|
|
715
|
-
*/
|
|
716
|
-
function getChangeSummary(repoRoot, files) {
|
|
717
|
-
if (!files.length) return "(no file changes detected)";
|
|
718
|
-
try {
|
|
719
|
-
const diff = execSync("git diff --stat", {
|
|
720
|
-
cwd: repoRoot,
|
|
721
|
-
encoding: "utf8",
|
|
722
|
-
timeout: 10_000,
|
|
723
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
724
|
-
});
|
|
725
|
-
return diff.trim() || files.join(", ");
|
|
726
|
-
} catch {
|
|
727
|
-
return files.join(", ");
|
|
728
|
-
}
|
|
729
|
-
}
|
|
730
|
-
|
|
731
701
|
/**
|
|
732
702
|
* Attempt to auto-fix errors found in a crash log.
|
|
733
703
|
*
|
|
@@ -1106,113 +1076,6 @@ export async function attemptAutoFix(opts) {
|
|
|
1106
1076
|
};
|
|
1107
1077
|
}
|
|
1108
1078
|
|
|
1109
|
-
// ── Prompt builders ─────────────────────────────────────────────────────────
|
|
1110
|
-
|
|
1111
|
-
function buildRecentMessagesContext(recentMessages) {
|
|
1112
|
-
if (!recentMessages || !recentMessages.length) return "";
|
|
1113
|
-
const msgs = recentMessages.slice(-15);
|
|
1114
|
-
return `
|
|
1115
|
-
## Recent monitor notifications (for context — shows what led to this crash)
|
|
1116
|
-
${msgs.map((m, i) => `[${i + 1}] ${m}`).join("\n")}
|
|
1117
|
-
`;
|
|
1118
|
-
}
|
|
1119
|
-
|
|
1120
|
-
function buildFixPrompt(
|
|
1121
|
-
error,
|
|
1122
|
-
sourceContext,
|
|
1123
|
-
reason,
|
|
1124
|
-
recentMessages,
|
|
1125
|
-
promptTemplate = "",
|
|
1126
|
-
) {
|
|
1127
|
-
const messagesCtx = buildRecentMessagesContext(recentMessages);
|
|
1128
|
-
|
|
1129
|
-
const fallback = `You are a PowerShell expert fixing a crash in a running orchestrator script.
|
|
1130
|
-
|
|
1131
|
-
## Error
|
|
1132
|
-
Type: ${error.errorType}
|
|
1133
|
-
File: ${error.file}
|
|
1134
|
-
Line: ${error.line}${error.column ? `\nColumn: ${error.column}` : ""}
|
|
1135
|
-
Message: ${error.message}${error.codeLine ? `\nFailing code: ${error.codeLine}` : ""}
|
|
1136
|
-
Crash reason: ${reason}
|
|
1137
|
-
|
|
1138
|
-
## Source context around line ${error.line}
|
|
1139
|
-
\`\`\`powershell
|
|
1140
|
-
${sourceContext}
|
|
1141
|
-
\`\`\`
|
|
1142
|
-
${messagesCtx}
|
|
1143
|
-
## Instructions
|
|
1144
|
-
1. Read the file "${error.file}"
|
|
1145
|
-
2. Identify the root cause of the error at line ${error.line}
|
|
1146
|
-
3. Fix ONLY the bug — minimal change, don't refactor unrelated code
|
|
1147
|
-
4. Common PowerShell pitfalls:
|
|
1148
|
-
- \`+=\` on arrays with single items fails — use [List[object]] or @() wrapping
|
|
1149
|
-
- \`$a + $b\` on PSObjects fails — iterate and add individually
|
|
1150
|
-
- Pipeline output can be a single object, not an array — always wrap with @()
|
|
1151
|
-
- \`$null.Method()\` crashes — add null guards
|
|
1152
|
-
- Named mutex with "Global\\\\" prefix fails on non-elevated Windows — use plain names
|
|
1153
|
-
- \`$Var:\` is treated as a scope-qualified variable — use \`\${Var}:\` to embed colon in string
|
|
1154
|
-
- ParserError: check for syntax issues like unclosed brackets, bad string interpolation
|
|
1155
|
-
5. Write the fix to the file. Do NOT create new files or refactor other functions.
|
|
1156
|
-
6. Keep all existing functionality intact.`;
|
|
1157
|
-
return resolvePromptTemplate(
|
|
1158
|
-
promptTemplate,
|
|
1159
|
-
{
|
|
1160
|
-
ERROR_TYPE: error.errorType,
|
|
1161
|
-
ERROR_FILE: error.file,
|
|
1162
|
-
ERROR_LINE: error.line,
|
|
1163
|
-
ERROR_COLUMN_LINE: error.column ? `Column: ${error.column}` : "",
|
|
1164
|
-
ERROR_MESSAGE: error.message,
|
|
1165
|
-
ERROR_CODE_LINE: error.codeLine ? `Failing code: ${error.codeLine}` : "",
|
|
1166
|
-
CRASH_REASON: reason,
|
|
1167
|
-
SOURCE_CONTEXT: sourceContext,
|
|
1168
|
-
RECENT_MESSAGES_CONTEXT: messagesCtx,
|
|
1169
|
-
},
|
|
1170
|
-
fallback,
|
|
1171
|
-
);
|
|
1172
|
-
}
|
|
1173
|
-
|
|
1174
|
-
function buildFallbackPrompt(fallback, recentMessages, promptTemplate = "") {
|
|
1175
|
-
const messagesCtx = buildRecentMessagesContext(recentMessages);
|
|
1176
|
-
|
|
1177
|
-
const defaultPrompt = `You are a PowerShell expert analyzing an orchestrator script crash.
|
|
1178
|
-
No structured error was extracted — the process terminated with: ${fallback.reason}
|
|
1179
|
-
|
|
1180
|
-
## Error indicators from log tail
|
|
1181
|
-
${fallback.errorLines.length > 0 ? fallback.errorLines.join("\n") : "(no explicit error lines detected — possible SIGKILL, OOM, or silent crash)"}
|
|
1182
|
-
|
|
1183
|
-
## Last ${Math.min(80, fallback.lineCount)} lines of crash log
|
|
1184
|
-
\`\`\`
|
|
1185
|
-
${fallback.tail}
|
|
1186
|
-
\`\`\`
|
|
1187
|
-
${messagesCtx}
|
|
1188
|
-
## Instructions
|
|
1189
|
-
1. Analyze the log for the root cause of the crash
|
|
1190
|
-
2. The main orchestrator script is: scripts/bosun/ve-orchestrator.ps1
|
|
1191
|
-
3. If you can identify a fixable bug, apply a minimal fix to the file
|
|
1192
|
-
4. Common crash causes:
|
|
1193
|
-
- PowerShell syntax errors (\$Var: treated as scope, missing brackets)
|
|
1194
|
-
- Array/object operation errors (+=, +, pipeline single-item issues)
|
|
1195
|
-
- Null reference errors on optional API responses
|
|
1196
|
-
- Infinite loops or stack overflow from recursive calls
|
|
1197
|
-
- Exit code 4294967295 = unsigned overflow from uncaught exception
|
|
1198
|
-
5. If the crash is external (SIGKILL, OOM) with no code bug, do nothing
|
|
1199
|
-
6. Write any fix directly to the file. Keep existing functionality intact.`;
|
|
1200
|
-
return resolvePromptTemplate(
|
|
1201
|
-
promptTemplate,
|
|
1202
|
-
{
|
|
1203
|
-
FALLBACK_REASON: fallback.reason,
|
|
1204
|
-
FALLBACK_ERROR_LINES:
|
|
1205
|
-
fallback.errorLines.length > 0
|
|
1206
|
-
? fallback.errorLines.join("\n")
|
|
1207
|
-
: "(no explicit error lines detected — possible SIGKILL, OOM, or silent crash)",
|
|
1208
|
-
FALLBACK_LINE_COUNT: Math.min(80, fallback.lineCount),
|
|
1209
|
-
FALLBACK_TAIL: fallback.tail,
|
|
1210
|
-
RECENT_MESSAGES_CONTEXT: messagesCtx,
|
|
1211
|
-
},
|
|
1212
|
-
defaultPrompt,
|
|
1213
|
-
);
|
|
1214
|
-
}
|
|
1215
|
-
|
|
1216
1079
|
// ── Repeating error (loop) fixer ────────────────────────────────────────────
|
|
1217
1080
|
|
|
1218
1081
|
/**
|
|
@@ -1265,38 +1128,11 @@ export async function fixLoopingError(opts) {
|
|
|
1265
1128
|
);
|
|
1266
1129
|
}
|
|
1267
1130
|
|
|
1268
|
-
const
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
## Problem
|
|
1273
|
-
The following error line is repeating ${repeatCount} times in the orchestrator output,
|
|
1274
|
-
indicating an infinite retry loop that needs to be fixed:
|
|
1275
|
-
|
|
1276
|
-
"${errorLine}"
|
|
1277
|
-
|
|
1278
|
-
${messagesCtx}
|
|
1279
|
-
|
|
1280
|
-
## Instructions
|
|
1281
|
-
1. The main script is: scripts/bosun/ve-orchestrator.ps1
|
|
1282
|
-
2. Search for the code that produces this error message
|
|
1283
|
-
3. Identify why it loops (missing break/continue/return, no state change between iterations, etc.)
|
|
1284
|
-
4. Fix the loop by adding proper exit conditions, error handling, or state tracking
|
|
1285
|
-
5. Common loop-causing patterns in this codebase:
|
|
1286
|
-
- PR lifecycle handoff repeatedly retried with no diff between branch and base
|
|
1287
|
-
- API calls returning the same error repeatedly with no backoff or give-up logic
|
|
1288
|
-
- Status not updated after failure → next cycle tries the same thing
|
|
1289
|
-
- Missing \`continue\` or state change in foreach loops over tracked attempts
|
|
1290
|
-
6. Apply a minimal fix. Do NOT refactor unrelated code.
|
|
1291
|
-
7. Write the fix directly to the file.`;
|
|
1292
|
-
const prompt = resolvePromptTemplate(
|
|
1131
|
+
const prompt = buildLoopPrompt(
|
|
1132
|
+
errorLine,
|
|
1133
|
+
repeatCount,
|
|
1134
|
+
recentMessages,
|
|
1293
1135
|
promptTemplate,
|
|
1294
|
-
{
|
|
1295
|
-
REPEAT_COUNT: repeatCount,
|
|
1296
|
-
ERROR_LINE: errorLine,
|
|
1297
|
-
RECENT_MESSAGES_CONTEXT: messagesCtx,
|
|
1298
|
-
},
|
|
1299
|
-
defaultPrompt,
|
|
1300
1136
|
);
|
|
1301
1137
|
|
|
1302
1138
|
// Audit log
|
package/agent/bosun-skills.mjs
CHANGED
|
@@ -41,9 +41,9 @@ const _SKILL_STREAM_PATH = resolve(
|
|
|
41
41
|
*
|
|
42
42
|
* @param {string} skillName
|
|
43
43
|
* @param {string} [skillTitle]
|
|
44
|
-
* @param {{ taskId?: string, executor?: string }} [opts]
|
|
44
|
+
* @param {{ taskId?: string, executor?: string, source?: string }} [opts]
|
|
45
45
|
*/
|
|
46
|
-
function emitSkillInvokeEvent(skillName, skillTitle, opts = {}) {
|
|
46
|
+
export function emitSkillInvokeEvent(skillName, skillTitle, opts = {}) {
|
|
47
47
|
try {
|
|
48
48
|
const event = {
|
|
49
49
|
timestamp: new Date().toISOString(),
|
|
@@ -51,6 +51,7 @@ function emitSkillInvokeEvent(skillName, skillTitle, opts = {}) {
|
|
|
51
51
|
data: { skill_name: skillName, skill_title: skillTitle || skillName },
|
|
52
52
|
...(opts.taskId ? { task_id: String(opts.taskId) } : {}),
|
|
53
53
|
...(opts.executor ? { executor: String(opts.executor) } : {}),
|
|
54
|
+
...(opts.source ? { source: String(opts.source) } : {}),
|
|
54
55
|
};
|
|
55
56
|
mkdirSync(dirname(_SKILL_STREAM_PATH), { recursive: true });
|
|
56
57
|
appendFileSync(_SKILL_STREAM_PATH, JSON.stringify(event) + "\n", "utf8");
|
|
@@ -5,6 +5,23 @@
|
|
|
5
5
|
|
|
6
6
|
"_comment_workflowFirst": "Set to true to run everything as workflows (task lifecycle, PR management, etc.)",
|
|
7
7
|
"workflowFirst": false,
|
|
8
|
+
"_comment_worktreeRecoveryCooldown": "Minutes to keep a task blocked after a non-retryable worktree failure before Bosun automatically returns it to todo.",
|
|
9
|
+
"workflowWorktreeRecoveryCooldownMin": 15,
|
|
10
|
+
"_comment_worktreeBootstrap": "Optional managed-worktree bootstrap policy. Commands are per detected stack and can be overridden per repo.",
|
|
11
|
+
"worktreeBootstrap": {
|
|
12
|
+
"enabled": true,
|
|
13
|
+
"linkSharedPaths": true,
|
|
14
|
+
"commandTimeoutMs": 600000,
|
|
15
|
+
"commandsByStack": {
|
|
16
|
+
"node": ["npm install"],
|
|
17
|
+
"python": ["poetry install --no-interaction"],
|
|
18
|
+
"dotnet": ["dotnet restore"]
|
|
19
|
+
},
|
|
20
|
+
"sharedPathsByStack": {
|
|
21
|
+
"node": ["node_modules"],
|
|
22
|
+
"php": ["vendor"]
|
|
23
|
+
}
|
|
24
|
+
},
|
|
8
25
|
"workflows": [
|
|
9
26
|
{
|
|
10
27
|
"type": "continuation-loop",
|