bosun 0.41.2 → 0.41.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/.env.example +1 -1
  2. package/agent/agent-pool.mjs +9 -2
  3. package/agent/agent-prompt-catalog.mjs +971 -0
  4. package/agent/agent-prompts.mjs +2 -970
  5. package/agent/agent-supervisor.mjs +119 -6
  6. package/agent/autofix-git.mjs +33 -0
  7. package/agent/autofix-prompts.mjs +151 -0
  8. package/agent/autofix.mjs +11 -175
  9. package/agent/bosun-skills.mjs +3 -2
  10. package/bosun.config.example.json +17 -0
  11. package/bosun.schema.json +87 -188
  12. package/cli.mjs +34 -1
  13. package/config/config-doctor.mjs +5 -250
  14. package/config/config-file-names.mjs +5 -0
  15. package/config/config.mjs +89 -493
  16. package/config/executor-config.mjs +493 -0
  17. package/config/repo-root.mjs +1 -2
  18. package/config/workspace-health.mjs +242 -0
  19. package/git/git-safety.mjs +15 -0
  20. package/github/github-oauth-portal.mjs +46 -0
  21. package/infra/library-manager-utils.mjs +22 -0
  22. package/infra/library-manager-well-known-sources.mjs +578 -0
  23. package/infra/library-manager.mjs +512 -1030
  24. package/infra/monitor.mjs +35 -9
  25. package/infra/session-tracker.mjs +10 -7
  26. package/kanban/kanban-adapter.mjs +17 -1
  27. package/lib/codebase-audit-manifests.mjs +117 -0
  28. package/lib/codebase-audit.mjs +18 -115
  29. package/package.json +18 -3
  30. package/server/setup-web-server.mjs +58 -5
  31. package/server/ui-server.mjs +1394 -79
  32. package/shell/codex-config-file.mjs +178 -0
  33. package/shell/codex-config.mjs +538 -575
  34. package/task/task-cli.mjs +54 -3
  35. package/task/task-executor.mjs +143 -13
  36. package/task/task-store.mjs +409 -1
  37. package/telegram/telegram-bot.mjs +127 -0
  38. package/tools/apply-pr-suggestions.mjs +401 -0
  39. package/tools/syntax-check.mjs +28 -9
  40. package/ui/app.js +3 -14
  41. package/ui/components/kanban-board.js +227 -4
  42. package/ui/components/session-list.js +85 -5
  43. package/ui/demo-defaults.js +338 -84
  44. package/ui/demo.html +155 -0
  45. package/ui/modules/session-api.js +96 -0
  46. package/ui/modules/settings-schema.js +1 -2
  47. package/ui/modules/state.js +43 -3
  48. package/ui/setup.html +4 -5
  49. package/ui/styles/components.css +58 -4
  50. package/ui/tabs/agents.js +12 -15
  51. package/ui/tabs/control.js +1 -0
  52. package/ui/tabs/library.js +484 -22
  53. package/ui/tabs/manual-flows.js +105 -29
  54. package/ui/tabs/tasks.js +848 -141
  55. package/ui/tabs/telemetry.js +129 -11
  56. package/ui/tabs/workflow-canvas-utils.mjs +130 -0
  57. package/ui/tabs/workflows.js +293 -23
  58. package/voice/voice-tool-definitions.mjs +757 -0
  59. package/voice/voice-tools.mjs +34 -778
  60. package/workflow/manual-flow-audit.mjs +165 -0
  61. package/workflow/manual-flows.mjs +164 -259
  62. package/workflow/workflow-engine.mjs +147 -58
  63. package/workflow/workflow-nodes/definitions.mjs +1207 -0
  64. package/workflow/workflow-nodes/transforms.mjs +612 -0
  65. package/workflow/workflow-nodes.mjs +358 -63
  66. package/workflow/workflow-templates.mjs +313 -191
  67. package/workflow-templates/_helpers.mjs +154 -0
  68. package/workflow-templates/agents.mjs +61 -4
  69. package/workflow-templates/code-quality.mjs +7 -7
  70. package/workflow-templates/github.mjs +20 -10
  71. package/workflow-templates/task-batch.mjs +44 -11
  72. package/workflow-templates/task-lifecycle.mjs +31 -6
  73. package/workspace/worktree-manager.mjs +277 -3
@@ -30,6 +30,12 @@
30
30
  */
31
31
 
32
32
  const TAG = "[agent-supervisor]";
33
+ const API_ERROR_CONTINUE_COOLDOWNS_MS = Object.freeze([
34
+ 3 * 60_000,
35
+ 5 * 60_000,
36
+ 5 * 60_000,
37
+ ]);
38
+ const API_ERROR_RECOVERY_RESET_MS = 15 * 60_000;
33
39
 
34
40
  // ── Situation Types (30+ edge cases) ────────────────────────────────────────
35
41
 
@@ -140,7 +146,7 @@ const INTERVENTION_LADDER = {
140
146
  [SITUATION.PRE_PUSH_FAILURE]: [INTERVENTION.INJECT_PROMPT, INTERVENTION.INJECT_PROMPT, INTERVENTION.FORCE_NEW_THREAD, INTERVENTION.BLOCK_AND_NOTIFY],
141
147
 
142
148
  [SITUATION.RATE_LIMITED]: [INTERVENTION.COOLDOWN, INTERVENTION.COOLDOWN, INTERVENTION.PAUSE_EXECUTOR],
143
- [SITUATION.API_ERROR]: [INTERVENTION.COOLDOWN, INTERVENTION.COOLDOWN, INTERVENTION.BLOCK_AND_NOTIFY],
149
+ [SITUATION.API_ERROR]: [INTERVENTION.FORCE_NEW_THREAD, INTERVENTION.REDISPATCH_TASK, INTERVENTION.BLOCK_AND_NOTIFY],
144
150
  [SITUATION.TOKEN_OVERFLOW]: [INTERVENTION.FORCE_NEW_THREAD, INTERVENTION.FORCE_NEW_THREAD, INTERVENTION.BLOCK_AND_NOTIFY],
145
151
  [SITUATION.SESSION_EXPIRED]: [INTERVENTION.FORCE_NEW_THREAD, INTERVENTION.FORCE_NEW_THREAD, INTERVENTION.BLOCK_AND_NOTIFY],
146
152
  [SITUATION.MODEL_ERROR]: [INTERVENTION.BLOCK_AND_NOTIFY], // Not retryable — wrong model name
@@ -263,9 +269,12 @@ const RECOVERY_PROMPTS = {
263
269
  [SITUATION.REBASE_SPIRAL]: () =>
264
270
  `You're stuck in a rebase loop. STOP rebasing and try:\n` +
265
271
  `1. git rebase --abort\n` +
266
- `2. git checkout -B <your-branch> origin/main\n` +
267
- `3. Re-apply your changes manually\n` +
268
- `4. Commit and push`,
272
+ `2. git stash (save any local changes)\n` +
273
+ `3. git fetch origin && git merge origin/main --no-edit\n` +
274
+ `4. If merge conflicts, resolve them manually\n` +
275
+ `5. git stash pop (if you stashed changes)\n` +
276
+ `6. Commit and push\n` +
277
+ `WARNING: Do NOT use git checkout -B to reset your branch to origin/main — this destroys all PR changes.`,
269
278
 
270
279
  [SITUATION.THOUGHT_SPINNING]: () =>
271
280
  `You've been reasoning without taking action. STOP thinking and START doing:\n` +
@@ -440,13 +449,15 @@ export class AgentSupervisor {
440
449
  const signals = this._gatherSignals(taskId, context);
441
450
  const situation = this._diagnose(signals, context);
442
451
  const healthScore = this._computeHealthScore(signals);
452
+ const recoveryOverride = this._selectRecoveryIntervention(taskId, situation, context, state);
443
453
  const attemptIndex = Math.min(
444
454
  state.interventionCount,
445
455
  (INTERVENTION_LADDER[situation] || [INTERVENTION.NONE]).length - 1,
446
456
  );
447
- const intervention = (INTERVENTION_LADDER[situation] || [INTERVENTION.NONE])[attemptIndex];
457
+ const intervention = recoveryOverride?.intervention
458
+ || (INTERVENTION_LADDER[situation] || [INTERVENTION.NONE])[attemptIndex];
448
459
  const prompt = this._buildPrompt(situation, taskId, context);
449
- const reason = this._buildReason(situation, signals, context);
460
+ const reason = recoveryOverride?.reason || this._buildReason(situation, signals, context);
450
461
 
451
462
  // Record
452
463
  state.situationHistory.push({ situation, ts: Date.now() });
@@ -482,6 +493,9 @@ export class AgentSupervisor {
482
493
  break;
483
494
 
484
495
  case INTERVENTION.CONTINUE_SIGNAL:
496
+ if (situation === SITUATION.API_ERROR) {
497
+ this._recordApiErrorContinue(taskId);
498
+ }
485
499
  if (this._sendContinueSignal) {
486
500
  this._sendContinueSignal(taskId);
487
501
  }
@@ -722,6 +736,12 @@ export class AgentSupervisor {
722
736
  qualityScore: state.qualityScore,
723
737
  reviewVerdict: state.reviewVerdict,
724
738
  reviewIssueCount: state.reviewIssues?.length || 0,
739
+ apiErrorRecovery: state.apiErrorRecovery
740
+ ? {
741
+ ...state.apiErrorRecovery,
742
+ cooldownRemainingMs: Math.max(0, Number(state.apiErrorRecovery.cooldownUntil || 0) - Date.now()),
743
+ }
744
+ : null,
725
745
  recentSituations: state.situationHistory.slice(-10),
726
746
  };
727
747
  }
@@ -786,11 +806,104 @@ export class AgentSupervisor {
786
806
  qualityScore: null,
787
807
  reviewVerdict: null,
788
808
  reviewIssues: null,
809
+ apiErrorRecovery: null,
789
810
  });
790
811
  }
791
812
  return this._taskState.get(taskId);
792
813
  }
793
814
 
815
+ _normalizeApiErrorSignature(context) {
816
+ const raw = String(context?.error || context?.output || "").trim().toLowerCase();
817
+ if (!raw) return "api_error";
818
+ return raw
819
+ .replace(/\s+/g, " ")
820
+ .replace(/\b\d{2,}\b/g, "#")
821
+ .slice(0, 240);
822
+ }
823
+
824
+ _selectRecoveryIntervention(taskId, situation, context, state) {
825
+ if (situation !== SITUATION.API_ERROR) {
826
+ if (state?.apiErrorRecovery) state.apiErrorRecovery = null;
827
+ return null;
828
+ }
829
+
830
+ const now = Date.now();
831
+ const signature = this._normalizeApiErrorSignature(context);
832
+ const current = state.apiErrorRecovery || {
833
+ signature,
834
+ continueAttempts: 0,
835
+ lastErrorAt: 0,
836
+ cooldownUntil: 0,
837
+ };
838
+
839
+ const shouldReset =
840
+ current.signature !== signature ||
841
+ (current.lastErrorAt > 0 && now - current.lastErrorAt > API_ERROR_RECOVERY_RESET_MS);
842
+
843
+ const nextState = shouldReset
844
+ ? {
845
+ signature,
846
+ continueAttempts: 0,
847
+ lastErrorAt: now,
848
+ cooldownUntil: 0,
849
+ }
850
+ : {
851
+ ...current,
852
+ signature,
853
+ lastErrorAt: now,
854
+ };
855
+
856
+ state.apiErrorRecovery = nextState;
857
+
858
+ if (Number(nextState.cooldownUntil || 0) > now) {
859
+ const remainingMs = Math.max(0, nextState.cooldownUntil - now);
860
+ return {
861
+ intervention: INTERVENTION.COOLDOWN,
862
+ reason: `Transient API failure on cooldown for ${Math.ceil(remainingMs / 60000)} minute(s) before retrying the same thread.`,
863
+ };
864
+ }
865
+
866
+ if (nextState.continueAttempts < API_ERROR_CONTINUE_COOLDOWNS_MS.length) {
867
+ const cooldownMs = API_ERROR_CONTINUE_COOLDOWNS_MS[nextState.continueAttempts];
868
+ return {
869
+ intervention: INTERVENTION.CONTINUE_SIGNAL,
870
+ reason: `Transient API failure — continue the current thread and back off for ${Math.ceil(cooldownMs / 60000)} minute(s) if it repeats.`,
871
+ };
872
+ }
873
+
874
+ const ladder = INTERVENTION_LADDER[SITUATION.API_ERROR] || [INTERVENTION.BLOCK_AND_NOTIFY];
875
+ const escalationIndex = Math.min(
876
+ nextState.continueAttempts - API_ERROR_CONTINUE_COOLDOWNS_MS.length,
877
+ ladder.length - 1,
878
+ );
879
+ const escalation = ladder[escalationIndex];
880
+ const escalationReason = escalation === INTERVENTION.FORCE_NEW_THREAD
881
+ ? "Repeated API failures survived 3 continue attempts — forcing a fresh thread."
882
+ : escalation === INTERVENTION.REDISPATCH_TASK
883
+ ? "Repeated API failures survived continue attempts and a fresh thread — redispatching the task."
884
+ : "Repeated API failures survived all automated recovery attempts — blocking for human review.";
885
+ return {
886
+ intervention: escalation,
887
+ reason: escalationReason,
888
+ };
889
+ }
890
+
891
+ _recordApiErrorContinue(taskId) {
892
+ const state = this._getTaskState(taskId);
893
+ if (!state?.apiErrorRecovery) return;
894
+ const attemptIndex = Math.min(
895
+ state.apiErrorRecovery.continueAttempts,
896
+ API_ERROR_CONTINUE_COOLDOWNS_MS.length - 1,
897
+ );
898
+ const cooldownMs = API_ERROR_CONTINUE_COOLDOWNS_MS[attemptIndex] || 0;
899
+ state.apiErrorRecovery = {
900
+ ...state.apiErrorRecovery,
901
+ continueAttempts: Number(state.apiErrorRecovery.continueAttempts || 0) + 1,
902
+ cooldownUntil: cooldownMs > 0 ? Date.now() + cooldownMs : 0,
903
+ lastErrorAt: Date.now(),
904
+ };
905
+ }
906
+
794
907
  _getTaskState(taskId) {
795
908
  return this._taskState.get(taskId) || null;
796
909
  }
@@ -0,0 +1,33 @@
1
+ import { execSync } from "node:child_process";
2
+
3
+ export function detectChangedFiles(repoRoot) {
4
+ try {
5
+ const output = execSync("git diff --name-only", {
6
+ cwd: repoRoot,
7
+ encoding: "utf8",
8
+ timeout: 10_000,
9
+ stdio: ["pipe", "pipe", "pipe"],
10
+ });
11
+ return output
12
+ .split(/\r?\n/)
13
+ .map((filePath) => filePath.trim())
14
+ .filter(Boolean);
15
+ } catch {
16
+ return [];
17
+ }
18
+ }
19
+
20
+ export function getChangeSummary(repoRoot, files) {
21
+ if (!files.length) return "(no file changes detected)";
22
+ try {
23
+ const diff = execSync("git diff --stat", {
24
+ cwd: repoRoot,
25
+ encoding: "utf8",
26
+ timeout: 10_000,
27
+ stdio: ["pipe", "pipe", "pipe"],
28
+ });
29
+ return diff.trim() || files.join(", ");
30
+ } catch {
31
+ return files.join(", ");
32
+ }
33
+ }
@@ -0,0 +1,151 @@
1
+ import { resolvePromptTemplate } from "./agent-prompts.mjs";
2
+
3
+ function buildRecentMessagesContext(recentMessages) {
4
+ if (!recentMessages || !recentMessages.length) return "";
5
+ const msgs = recentMessages.slice(-15);
6
+ return `
7
+ ## Recent monitor notifications (for context — shows what led to this crash)
8
+ ${msgs.map((message, index) => `[${index + 1}] ${message}`).join("\n")}
9
+ `;
10
+ }
11
+
12
+ export function buildFixPrompt(
13
+ error,
14
+ sourceContext,
15
+ reason,
16
+ recentMessages,
17
+ promptTemplate = "",
18
+ ) {
19
+ const messagesCtx = buildRecentMessagesContext(recentMessages);
20
+
21
+ const fallback = `You are a PowerShell expert fixing a crash in a running orchestrator script.
22
+
23
+ ## Error
24
+ Type: ${error.errorType}
25
+ File: ${error.file}
26
+ Line: ${error.line}${error.column ? `\nColumn: ${error.column}` : ""}
27
+ Message: ${error.message}${error.codeLine ? `\nFailing code: ${error.codeLine}` : ""}
28
+ Crash reason: ${reason}
29
+
30
+ ## Source context around line ${error.line}
31
+ \`\`\`powershell
32
+ ${sourceContext}
33
+ \`\`\`
34
+ ${messagesCtx}
35
+ ## Instructions
36
+ 1. Read the file "${error.file}"
37
+ 2. Identify the root cause of the error at line ${error.line}
38
+ 3. Fix ONLY the bug — minimal change, don't refactor unrelated code
39
+ 4. Common PowerShell pitfalls:
40
+ - \`+=\` on arrays with single items fails — use [List[object]] or @() wrapping
41
+ - \`$a + $b\` on PSObjects fails — iterate and add individually
42
+ - Pipeline output can be a single object, not an array — always wrap with @()
43
+ - \`$null.Method()\` crashes — add null guards
44
+ - Named mutex with "Global\\" prefix fails on non-elevated Windows — use plain names
45
+ - \`$Var:\` is treated as a scope-qualified variable — use \`\${Var}:\` to embed colon in string
46
+ - ParserError: check for syntax issues like unclosed brackets, bad string interpolation
47
+ 5. Write the fix to the file. Do NOT create new files or refactor other functions.
48
+ 6. Keep all existing functionality intact.`;
49
+ return resolvePromptTemplate(
50
+ promptTemplate,
51
+ {
52
+ ERROR_TYPE: error.errorType,
53
+ ERROR_FILE: error.file,
54
+ ERROR_LINE: error.line,
55
+ ERROR_COLUMN_LINE: error.column ? `Column: ${error.column}` : "",
56
+ ERROR_MESSAGE: error.message,
57
+ ERROR_CODE_LINE: error.codeLine ? `Failing code: ${error.codeLine}` : "",
58
+ CRASH_REASON: reason,
59
+ SOURCE_CONTEXT: sourceContext,
60
+ RECENT_MESSAGES_CONTEXT: messagesCtx,
61
+ },
62
+ fallback,
63
+ );
64
+ }
65
+
66
+ export function buildFallbackPrompt(
67
+ fallback,
68
+ recentMessages,
69
+ promptTemplate = "",
70
+ ) {
71
+ const messagesCtx = buildRecentMessagesContext(recentMessages);
72
+
73
+ const defaultPrompt = `You are a PowerShell expert analyzing an orchestrator script crash.
74
+ No structured error was extracted — the process terminated with: ${fallback.reason}
75
+
76
+ ## Error indicators from log tail
77
+ ${fallback.errorLines.length > 0 ? fallback.errorLines.join("\n") : "(no explicit error lines detected — possible SIGKILL, OOM, or silent crash)"}
78
+
79
+ ## Last ${Math.min(80, fallback.lineCount)} lines of crash log
80
+ \`\`\`
81
+ ${fallback.tail}
82
+ \`\`\`
83
+ ${messagesCtx}
84
+ ## Instructions
85
+ 1. Analyze the log for the root cause of the crash
86
+ 2. The main orchestrator script is: scripts/bosun/ve-orchestrator.ps1
87
+ 3. If you can identify a fixable bug, apply a minimal fix to the file
88
+ 4. Common crash causes:
89
+ - PowerShell syntax errors (\$Var: treated as scope, missing brackets)
90
+ - Array/object operation errors (+=, +, pipeline single-item issues)
91
+ - Null reference errors on optional API responses
92
+ - Infinite loops or stack overflow from recursive calls
93
+ - Exit code 4294967295 = unsigned overflow from uncaught exception
94
+ 5. If the crash is external (SIGKILL, OOM) with no code bug, do nothing
95
+ 6. Write any fix directly to the file. Keep existing functionality intact.`;
96
+ return resolvePromptTemplate(
97
+ promptTemplate,
98
+ {
99
+ FALLBACK_REASON: fallback.reason,
100
+ FALLBACK_ERROR_LINES:
101
+ fallback.errorLines.length > 0
102
+ ? fallback.errorLines.join("\n")
103
+ : "(no explicit error lines detected — possible SIGKILL, OOM, or silent crash)",
104
+ FALLBACK_LINE_COUNT: Math.min(80, fallback.lineCount),
105
+ FALLBACK_TAIL: fallback.tail,
106
+ RECENT_MESSAGES_CONTEXT: messagesCtx,
107
+ },
108
+ defaultPrompt,
109
+ );
110
+ }
111
+
112
+ export function buildLoopPrompt(
113
+ errorLine,
114
+ repeatCount,
115
+ recentMessages,
116
+ promptTemplate = "",
117
+ ) {
118
+ const messagesCtx = buildRecentMessagesContext(recentMessages);
119
+
120
+ const defaultPrompt = `You are a PowerShell expert fixing a loop bug in a running orchestrator script.
121
+
122
+ ## Problem
123
+ The following error line is repeating ${repeatCount} times in the orchestrator output,
124
+ indicating an infinite retry loop that needs to be fixed:
125
+
126
+ "${errorLine}"
127
+
128
+ ${messagesCtx}
129
+
130
+ ## Instructions
131
+ 1. The main script is: scripts/bosun/ve-orchestrator.ps1
132
+ 2. Search for the code that produces this error message
133
+ 3. Identify why it loops (missing break/continue/return, no state change between iterations, etc.)
134
+ 4. Fix the loop by adding proper exit conditions, error handling, or state tracking
135
+ 5. Common loop-causing patterns in this codebase:
136
+ - PR lifecycle handoff repeatedly retried with no diff between branch and base
137
+ - API calls returning the same error repeatedly with no backoff or give-up logic
138
+ - Status not updated after failure → next cycle tries the same thing
139
+ - Missing \`continue\` or state change in foreach loops over tracked attempts
140
+ 6. Apply a minimal fix. Do NOT refactor unrelated code.
141
+ 7. Write the fix directly to the file.`;
142
+ return resolvePromptTemplate(
143
+ promptTemplate,
144
+ {
145
+ REPEAT_COUNT: repeatCount,
146
+ ERROR_LINE: errorLine,
147
+ RECENT_MESSAGES_CONTEXT: messagesCtx,
148
+ },
149
+ defaultPrompt,
150
+ );
151
+ }
package/agent/autofix.mjs CHANGED
@@ -27,14 +27,19 @@
27
27
  * - Raw log fallback: when no structured errors found, feeds raw tail to Codex
28
28
  */
29
29
 
30
- import { spawn, execSync } from "node:child_process";
30
+ import { spawn } from "node:child_process";
31
31
  import { existsSync, mkdirSync, createWriteStream, readFileSync } from "node:fs";
32
32
  import { readFile, writeFile } from "node:fs/promises";
33
33
  import { resolve, dirname } from "node:path";
34
34
  import { fileURLToPath } from "node:url";
35
35
  import { getConsoleLevel, LogLevel } from "../lib/logger.mjs";
36
36
  import { isBenignErrorMention } from "../utils.mjs";
37
- import { resolvePromptTemplate } from "./agent-prompts.mjs";
37
+ import {
38
+ buildFallbackPrompt,
39
+ buildFixPrompt,
40
+ buildLoopPrompt,
41
+ } from "./autofix-prompts.mjs";
42
+ import { detectChangedFiles, getChangeSummary } from "./autofix-git.mjs";
38
43
  import { resolveCodexProfileRuntime } from "../shell/codex-model-profiles.mjs";
39
44
 
40
45
  const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -693,41 +698,6 @@ export function runCodexExec(
693
698
  * Detect which files were modified by comparing git status before/after.
694
699
  * Returns array of changed file paths.
695
700
  */
696
- function detectChangedFiles(repoRoot) {
697
- try {
698
- const output = execSync("git diff --name-only", {
699
- cwd: repoRoot,
700
- encoding: "utf8",
701
- timeout: 10_000,
702
- stdio: ["pipe", "pipe", "pipe"],
703
- });
704
- return output
705
- .split(/\r?\n/)
706
- .map((f) => f.trim())
707
- .filter(Boolean);
708
- } catch {
709
- return [];
710
- }
711
- }
712
-
713
- /**
714
- * Get git diff summary for changed files (short, for Telegram).
715
- */
716
- function getChangeSummary(repoRoot, files) {
717
- if (!files.length) return "(no file changes detected)";
718
- try {
719
- const diff = execSync("git diff --stat", {
720
- cwd: repoRoot,
721
- encoding: "utf8",
722
- timeout: 10_000,
723
- stdio: ["pipe", "pipe", "pipe"],
724
- });
725
- return diff.trim() || files.join(", ");
726
- } catch {
727
- return files.join(", ");
728
- }
729
- }
730
-
731
701
  /**
732
702
  * Attempt to auto-fix errors found in a crash log.
733
703
  *
@@ -1106,113 +1076,6 @@ export async function attemptAutoFix(opts) {
1106
1076
  };
1107
1077
  }
1108
1078
 
1109
- // ── Prompt builders ─────────────────────────────────────────────────────────
1110
-
1111
- function buildRecentMessagesContext(recentMessages) {
1112
- if (!recentMessages || !recentMessages.length) return "";
1113
- const msgs = recentMessages.slice(-15);
1114
- return `
1115
- ## Recent monitor notifications (for context — shows what led to this crash)
1116
- ${msgs.map((m, i) => `[${i + 1}] ${m}`).join("\n")}
1117
- `;
1118
- }
1119
-
1120
- function buildFixPrompt(
1121
- error,
1122
- sourceContext,
1123
- reason,
1124
- recentMessages,
1125
- promptTemplate = "",
1126
- ) {
1127
- const messagesCtx = buildRecentMessagesContext(recentMessages);
1128
-
1129
- const fallback = `You are a PowerShell expert fixing a crash in a running orchestrator script.
1130
-
1131
- ## Error
1132
- Type: ${error.errorType}
1133
- File: ${error.file}
1134
- Line: ${error.line}${error.column ? `\nColumn: ${error.column}` : ""}
1135
- Message: ${error.message}${error.codeLine ? `\nFailing code: ${error.codeLine}` : ""}
1136
- Crash reason: ${reason}
1137
-
1138
- ## Source context around line ${error.line}
1139
- \`\`\`powershell
1140
- ${sourceContext}
1141
- \`\`\`
1142
- ${messagesCtx}
1143
- ## Instructions
1144
- 1. Read the file "${error.file}"
1145
- 2. Identify the root cause of the error at line ${error.line}
1146
- 3. Fix ONLY the bug — minimal change, don't refactor unrelated code
1147
- 4. Common PowerShell pitfalls:
1148
- - \`+=\` on arrays with single items fails — use [List[object]] or @() wrapping
1149
- - \`$a + $b\` on PSObjects fails — iterate and add individually
1150
- - Pipeline output can be a single object, not an array — always wrap with @()
1151
- - \`$null.Method()\` crashes — add null guards
1152
- - Named mutex with "Global\\\\" prefix fails on non-elevated Windows — use plain names
1153
- - \`$Var:\` is treated as a scope-qualified variable — use \`\${Var}:\` to embed colon in string
1154
- - ParserError: check for syntax issues like unclosed brackets, bad string interpolation
1155
- 5. Write the fix to the file. Do NOT create new files or refactor other functions.
1156
- 6. Keep all existing functionality intact.`;
1157
- return resolvePromptTemplate(
1158
- promptTemplate,
1159
- {
1160
- ERROR_TYPE: error.errorType,
1161
- ERROR_FILE: error.file,
1162
- ERROR_LINE: error.line,
1163
- ERROR_COLUMN_LINE: error.column ? `Column: ${error.column}` : "",
1164
- ERROR_MESSAGE: error.message,
1165
- ERROR_CODE_LINE: error.codeLine ? `Failing code: ${error.codeLine}` : "",
1166
- CRASH_REASON: reason,
1167
- SOURCE_CONTEXT: sourceContext,
1168
- RECENT_MESSAGES_CONTEXT: messagesCtx,
1169
- },
1170
- fallback,
1171
- );
1172
- }
1173
-
1174
- function buildFallbackPrompt(fallback, recentMessages, promptTemplate = "") {
1175
- const messagesCtx = buildRecentMessagesContext(recentMessages);
1176
-
1177
- const defaultPrompt = `You are a PowerShell expert analyzing an orchestrator script crash.
1178
- No structured error was extracted — the process terminated with: ${fallback.reason}
1179
-
1180
- ## Error indicators from log tail
1181
- ${fallback.errorLines.length > 0 ? fallback.errorLines.join("\n") : "(no explicit error lines detected — possible SIGKILL, OOM, or silent crash)"}
1182
-
1183
- ## Last ${Math.min(80, fallback.lineCount)} lines of crash log
1184
- \`\`\`
1185
- ${fallback.tail}
1186
- \`\`\`
1187
- ${messagesCtx}
1188
- ## Instructions
1189
- 1. Analyze the log for the root cause of the crash
1190
- 2. The main orchestrator script is: scripts/bosun/ve-orchestrator.ps1
1191
- 3. If you can identify a fixable bug, apply a minimal fix to the file
1192
- 4. Common crash causes:
1193
- - PowerShell syntax errors (\$Var: treated as scope, missing brackets)
1194
- - Array/object operation errors (+=, +, pipeline single-item issues)
1195
- - Null reference errors on optional API responses
1196
- - Infinite loops or stack overflow from recursive calls
1197
- - Exit code 4294967295 = unsigned overflow from uncaught exception
1198
- 5. If the crash is external (SIGKILL, OOM) with no code bug, do nothing
1199
- 6. Write any fix directly to the file. Keep existing functionality intact.`;
1200
- return resolvePromptTemplate(
1201
- promptTemplate,
1202
- {
1203
- FALLBACK_REASON: fallback.reason,
1204
- FALLBACK_ERROR_LINES:
1205
- fallback.errorLines.length > 0
1206
- ? fallback.errorLines.join("\n")
1207
- : "(no explicit error lines detected — possible SIGKILL, OOM, or silent crash)",
1208
- FALLBACK_LINE_COUNT: Math.min(80, fallback.lineCount),
1209
- FALLBACK_TAIL: fallback.tail,
1210
- RECENT_MESSAGES_CONTEXT: messagesCtx,
1211
- },
1212
- defaultPrompt,
1213
- );
1214
- }
1215
-
1216
1079
  // ── Repeating error (loop) fixer ────────────────────────────────────────────
1217
1080
 
1218
1081
  /**
@@ -1265,38 +1128,11 @@ export async function fixLoopingError(opts) {
1265
1128
  );
1266
1129
  }
1267
1130
 
1268
- const messagesCtx = buildRecentMessagesContext(recentMessages);
1269
-
1270
- const defaultPrompt = `You are a PowerShell expert fixing a loop bug in a running orchestrator script.
1271
-
1272
- ## Problem
1273
- The following error line is repeating ${repeatCount} times in the orchestrator output,
1274
- indicating an infinite retry loop that needs to be fixed:
1275
-
1276
- "${errorLine}"
1277
-
1278
- ${messagesCtx}
1279
-
1280
- ## Instructions
1281
- 1. The main script is: scripts/bosun/ve-orchestrator.ps1
1282
- 2. Search for the code that produces this error message
1283
- 3. Identify why it loops (missing break/continue/return, no state change between iterations, etc.)
1284
- 4. Fix the loop by adding proper exit conditions, error handling, or state tracking
1285
- 5. Common loop-causing patterns in this codebase:
1286
- - PR lifecycle handoff repeatedly retried with no diff between branch and base
1287
- - API calls returning the same error repeatedly with no backoff or give-up logic
1288
- - Status not updated after failure → next cycle tries the same thing
1289
- - Missing \`continue\` or state change in foreach loops over tracked attempts
1290
- 6. Apply a minimal fix. Do NOT refactor unrelated code.
1291
- 7. Write the fix directly to the file.`;
1292
- const prompt = resolvePromptTemplate(
1131
+ const prompt = buildLoopPrompt(
1132
+ errorLine,
1133
+ repeatCount,
1134
+ recentMessages,
1293
1135
  promptTemplate,
1294
- {
1295
- REPEAT_COUNT: repeatCount,
1296
- ERROR_LINE: errorLine,
1297
- RECENT_MESSAGES_CONTEXT: messagesCtx,
1298
- },
1299
- defaultPrompt,
1300
1136
  );
1301
1137
 
1302
1138
  // Audit log
@@ -41,9 +41,9 @@ const _SKILL_STREAM_PATH = resolve(
41
41
  *
42
42
  * @param {string} skillName
43
43
  * @param {string} [skillTitle]
44
- * @param {{ taskId?: string, executor?: string }} [opts]
44
+ * @param {{ taskId?: string, executor?: string, source?: string }} [opts]
45
45
  */
46
- function emitSkillInvokeEvent(skillName, skillTitle, opts = {}) {
46
+ export function emitSkillInvokeEvent(skillName, skillTitle, opts = {}) {
47
47
  try {
48
48
  const event = {
49
49
  timestamp: new Date().toISOString(),
@@ -51,6 +51,7 @@ function emitSkillInvokeEvent(skillName, skillTitle, opts = {}) {
51
51
  data: { skill_name: skillName, skill_title: skillTitle || skillName },
52
52
  ...(opts.taskId ? { task_id: String(opts.taskId) } : {}),
53
53
  ...(opts.executor ? { executor: String(opts.executor) } : {}),
54
+ ...(opts.source ? { source: String(opts.source) } : {}),
54
55
  };
55
56
  mkdirSync(dirname(_SKILL_STREAM_PATH), { recursive: true });
56
57
  appendFileSync(_SKILL_STREAM_PATH, JSON.stringify(event) + "\n", "utf8");
@@ -5,6 +5,23 @@
5
5
 
6
6
  "_comment_workflowFirst": "Set to true to run everything as workflows (task lifecycle, PR management, etc.)",
7
7
  "workflowFirst": false,
8
+ "_comment_worktreeRecoveryCooldown": "Minutes to keep a task blocked after a non-retryable worktree failure before Bosun automatically returns it to todo.",
9
+ "workflowWorktreeRecoveryCooldownMin": 15,
10
+ "_comment_worktreeBootstrap": "Optional managed-worktree bootstrap policy. Commands are per detected stack and can be overridden per repo.",
11
+ "worktreeBootstrap": {
12
+ "enabled": true,
13
+ "linkSharedPaths": true,
14
+ "commandTimeoutMs": 600000,
15
+ "commandsByStack": {
16
+ "node": ["npm install"],
17
+ "python": ["poetry install --no-interaction"],
18
+ "dotnet": ["dotnet restore"]
19
+ },
20
+ "sharedPathsByStack": {
21
+ "node": ["node_modules"],
22
+ "php": ["vendor"]
23
+ }
24
+ },
8
25
  "workflows": [
9
26
  {
10
27
  "type": "continuation-loop",