bosun 0.41.3 → 0.41.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agent/agent-pool.mjs +9 -2
- package/agent/agent-supervisor.mjs +113 -3
- package/infra/monitor.mjs +17 -0
- package/package.json +1 -1
- package/server/setup-web-server.mjs +58 -5
- package/server/ui-server.mjs +200 -0
- package/tools/syntax-check.mjs +8 -1
- package/ui/demo-defaults.js +8 -6
- package/ui/modules/state.js +22 -0
- package/ui/tabs/tasks.js +63 -1
- package/workflow/project-detection.mjs +68 -3
- package/workflow/workflow-engine.mjs +48 -4
- package/workflow/workflow-nodes.mjs +90 -17
- package/workflow/workflow-templates.mjs +39 -9
- package/workflow-templates/task-batch.mjs +24 -2
- package/workflow-templates/task-lifecycle.mjs +2 -2
package/agent/agent-pool.mjs
CHANGED
|
@@ -2846,6 +2846,10 @@ function isPoisonedCodexResumeError(errorValue) {
|
|
|
2846
2846
|
);
|
|
2847
2847
|
}
|
|
2848
2848
|
|
|
2849
|
+
function isCodexResumeTimeoutError(errorValue) {
|
|
2850
|
+
return String(errorValue || "").toLowerCase().includes("codex resume timeout");
|
|
2851
|
+
}
|
|
2852
|
+
|
|
2849
2853
|
/**
|
|
2850
2854
|
* Resume an existing Codex thread and run a follow-up prompt.
|
|
2851
2855
|
* Uses `codex.resumeThread(threadId)` from @openai/codex-sdk.
|
|
@@ -3017,6 +3021,7 @@ async function resumeCodexThread(threadId, prompt, cwd, timeoutMs, extra = {}) {
|
|
|
3017
3021
|
: `Thread resume error: ${err.message}`,
|
|
3018
3022
|
sdk: "codex",
|
|
3019
3023
|
threadId: null,
|
|
3024
|
+
staleResumeState: isTimeout,
|
|
3020
3025
|
poisonedResumeState:
|
|
3021
3026
|
!isTimeout && isPoisonedCodexResumeError(err.message),
|
|
3022
3027
|
};
|
|
@@ -3194,10 +3199,12 @@ export async function launchOrResumeThread(
|
|
|
3194
3199
|
// Resume failed — fall through to fresh launch
|
|
3195
3200
|
if (
|
|
3196
3201
|
result.poisonedResumeState ||
|
|
3197
|
-
|
|
3202
|
+
result.staleResumeState ||
|
|
3203
|
+
isPoisonedCodexResumeError(result.error) ||
|
|
3204
|
+
isCodexResumeTimeoutError(result.error)
|
|
3198
3205
|
) {
|
|
3199
3206
|
console.warn(
|
|
3200
|
-
`${TAG} resume failed for task "${taskKey}" with corrupted state: ${result.error}. Dropping cached thread metadata and starting fresh.`,
|
|
3207
|
+
`${TAG} resume failed for task "${taskKey}" with stale or corrupted state: ${result.error}. Dropping cached thread metadata and starting fresh.`,
|
|
3201
3208
|
);
|
|
3202
3209
|
threadRegistry.delete(taskKey);
|
|
3203
3210
|
} else {
|
|
@@ -30,6 +30,12 @@
|
|
|
30
30
|
*/
|
|
31
31
|
|
|
32
32
|
const TAG = "[agent-supervisor]";
|
|
33
|
+
const API_ERROR_CONTINUE_COOLDOWNS_MS = Object.freeze([
|
|
34
|
+
3 * 60_000,
|
|
35
|
+
5 * 60_000,
|
|
36
|
+
5 * 60_000,
|
|
37
|
+
]);
|
|
38
|
+
const API_ERROR_RECOVERY_RESET_MS = 15 * 60_000;
|
|
33
39
|
|
|
34
40
|
// ── Situation Types (30+ edge cases) ────────────────────────────────────────
|
|
35
41
|
|
|
@@ -140,7 +146,7 @@ const INTERVENTION_LADDER = {
|
|
|
140
146
|
[SITUATION.PRE_PUSH_FAILURE]: [INTERVENTION.INJECT_PROMPT, INTERVENTION.INJECT_PROMPT, INTERVENTION.FORCE_NEW_THREAD, INTERVENTION.BLOCK_AND_NOTIFY],
|
|
141
147
|
|
|
142
148
|
[SITUATION.RATE_LIMITED]: [INTERVENTION.COOLDOWN, INTERVENTION.COOLDOWN, INTERVENTION.PAUSE_EXECUTOR],
|
|
143
|
-
[SITUATION.API_ERROR]: [INTERVENTION.
|
|
149
|
+
[SITUATION.API_ERROR]: [INTERVENTION.FORCE_NEW_THREAD, INTERVENTION.REDISPATCH_TASK, INTERVENTION.BLOCK_AND_NOTIFY],
|
|
144
150
|
[SITUATION.TOKEN_OVERFLOW]: [INTERVENTION.FORCE_NEW_THREAD, INTERVENTION.FORCE_NEW_THREAD, INTERVENTION.BLOCK_AND_NOTIFY],
|
|
145
151
|
[SITUATION.SESSION_EXPIRED]: [INTERVENTION.FORCE_NEW_THREAD, INTERVENTION.FORCE_NEW_THREAD, INTERVENTION.BLOCK_AND_NOTIFY],
|
|
146
152
|
[SITUATION.MODEL_ERROR]: [INTERVENTION.BLOCK_AND_NOTIFY], // Not retryable — wrong model name
|
|
@@ -443,13 +449,15 @@ export class AgentSupervisor {
|
|
|
443
449
|
const signals = this._gatherSignals(taskId, context);
|
|
444
450
|
const situation = this._diagnose(signals, context);
|
|
445
451
|
const healthScore = this._computeHealthScore(signals);
|
|
452
|
+
const recoveryOverride = this._selectRecoveryIntervention(taskId, situation, context, state);
|
|
446
453
|
const attemptIndex = Math.min(
|
|
447
454
|
state.interventionCount,
|
|
448
455
|
(INTERVENTION_LADDER[situation] || [INTERVENTION.NONE]).length - 1,
|
|
449
456
|
);
|
|
450
|
-
const intervention =
|
|
457
|
+
const intervention = recoveryOverride?.intervention
|
|
458
|
+
|| (INTERVENTION_LADDER[situation] || [INTERVENTION.NONE])[attemptIndex];
|
|
451
459
|
const prompt = this._buildPrompt(situation, taskId, context);
|
|
452
|
-
const reason = this._buildReason(situation, signals, context);
|
|
460
|
+
const reason = recoveryOverride?.reason || this._buildReason(situation, signals, context);
|
|
453
461
|
|
|
454
462
|
// Record
|
|
455
463
|
state.situationHistory.push({ situation, ts: Date.now() });
|
|
@@ -485,6 +493,9 @@ export class AgentSupervisor {
|
|
|
485
493
|
break;
|
|
486
494
|
|
|
487
495
|
case INTERVENTION.CONTINUE_SIGNAL:
|
|
496
|
+
if (situation === SITUATION.API_ERROR) {
|
|
497
|
+
this._recordApiErrorContinue(taskId);
|
|
498
|
+
}
|
|
488
499
|
if (this._sendContinueSignal) {
|
|
489
500
|
this._sendContinueSignal(taskId);
|
|
490
501
|
}
|
|
@@ -725,6 +736,12 @@ export class AgentSupervisor {
|
|
|
725
736
|
qualityScore: state.qualityScore,
|
|
726
737
|
reviewVerdict: state.reviewVerdict,
|
|
727
738
|
reviewIssueCount: state.reviewIssues?.length || 0,
|
|
739
|
+
apiErrorRecovery: state.apiErrorRecovery
|
|
740
|
+
? {
|
|
741
|
+
...state.apiErrorRecovery,
|
|
742
|
+
cooldownRemainingMs: Math.max(0, Number(state.apiErrorRecovery.cooldownUntil || 0) - Date.now()),
|
|
743
|
+
}
|
|
744
|
+
: null,
|
|
728
745
|
recentSituations: state.situationHistory.slice(-10),
|
|
729
746
|
};
|
|
730
747
|
}
|
|
@@ -789,11 +806,104 @@ export class AgentSupervisor {
|
|
|
789
806
|
qualityScore: null,
|
|
790
807
|
reviewVerdict: null,
|
|
791
808
|
reviewIssues: null,
|
|
809
|
+
apiErrorRecovery: null,
|
|
792
810
|
});
|
|
793
811
|
}
|
|
794
812
|
return this._taskState.get(taskId);
|
|
795
813
|
}
|
|
796
814
|
|
|
815
|
+
_normalizeApiErrorSignature(context) {
|
|
816
|
+
const raw = String(context?.error || context?.output || "").trim().toLowerCase();
|
|
817
|
+
if (!raw) return "api_error";
|
|
818
|
+
return raw
|
|
819
|
+
.replace(/\s+/g, " ")
|
|
820
|
+
.replace(/\b\d{2,}\b/g, "#")
|
|
821
|
+
.slice(0, 240);
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
_selectRecoveryIntervention(taskId, situation, context, state) {
|
|
825
|
+
if (situation !== SITUATION.API_ERROR) {
|
|
826
|
+
if (state?.apiErrorRecovery) state.apiErrorRecovery = null;
|
|
827
|
+
return null;
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
const now = Date.now();
|
|
831
|
+
const signature = this._normalizeApiErrorSignature(context);
|
|
832
|
+
const current = state.apiErrorRecovery || {
|
|
833
|
+
signature,
|
|
834
|
+
continueAttempts: 0,
|
|
835
|
+
lastErrorAt: 0,
|
|
836
|
+
cooldownUntil: 0,
|
|
837
|
+
};
|
|
838
|
+
|
|
839
|
+
const shouldReset =
|
|
840
|
+
current.signature !== signature ||
|
|
841
|
+
(current.lastErrorAt > 0 && now - current.lastErrorAt > API_ERROR_RECOVERY_RESET_MS);
|
|
842
|
+
|
|
843
|
+
const nextState = shouldReset
|
|
844
|
+
? {
|
|
845
|
+
signature,
|
|
846
|
+
continueAttempts: 0,
|
|
847
|
+
lastErrorAt: now,
|
|
848
|
+
cooldownUntil: 0,
|
|
849
|
+
}
|
|
850
|
+
: {
|
|
851
|
+
...current,
|
|
852
|
+
signature,
|
|
853
|
+
lastErrorAt: now,
|
|
854
|
+
};
|
|
855
|
+
|
|
856
|
+
state.apiErrorRecovery = nextState;
|
|
857
|
+
|
|
858
|
+
if (Number(nextState.cooldownUntil || 0) > now) {
|
|
859
|
+
const remainingMs = Math.max(0, nextState.cooldownUntil - now);
|
|
860
|
+
return {
|
|
861
|
+
intervention: INTERVENTION.COOLDOWN,
|
|
862
|
+
reason: `Transient API failure on cooldown for ${Math.ceil(remainingMs / 60000)} minute(s) before retrying the same thread.`,
|
|
863
|
+
};
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
if (nextState.continueAttempts < API_ERROR_CONTINUE_COOLDOWNS_MS.length) {
|
|
867
|
+
const cooldownMs = API_ERROR_CONTINUE_COOLDOWNS_MS[nextState.continueAttempts];
|
|
868
|
+
return {
|
|
869
|
+
intervention: INTERVENTION.CONTINUE_SIGNAL,
|
|
870
|
+
reason: `Transient API failure — continue the current thread and back off for ${Math.ceil(cooldownMs / 60000)} minute(s) if it repeats.`,
|
|
871
|
+
};
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
const ladder = INTERVENTION_LADDER[SITUATION.API_ERROR] || [INTERVENTION.BLOCK_AND_NOTIFY];
|
|
875
|
+
const escalationIndex = Math.min(
|
|
876
|
+
nextState.continueAttempts - API_ERROR_CONTINUE_COOLDOWNS_MS.length,
|
|
877
|
+
ladder.length - 1,
|
|
878
|
+
);
|
|
879
|
+
const escalation = ladder[escalationIndex];
|
|
880
|
+
const escalationReason = escalation === INTERVENTION.FORCE_NEW_THREAD
|
|
881
|
+
? "Repeated API failures survived 3 continue attempts — forcing a fresh thread."
|
|
882
|
+
: escalation === INTERVENTION.REDISPATCH_TASK
|
|
883
|
+
? "Repeated API failures survived continue attempts and a fresh thread — redispatching the task."
|
|
884
|
+
: "Repeated API failures survived all automated recovery attempts — blocking for human review.";
|
|
885
|
+
return {
|
|
886
|
+
intervention: escalation,
|
|
887
|
+
reason: escalationReason,
|
|
888
|
+
};
|
|
889
|
+
}
|
|
890
|
+
|
|
891
|
+
_recordApiErrorContinue(taskId) {
|
|
892
|
+
const state = this._getTaskState(taskId);
|
|
893
|
+
if (!state?.apiErrorRecovery) return;
|
|
894
|
+
const attemptIndex = Math.min(
|
|
895
|
+
state.apiErrorRecovery.continueAttempts,
|
|
896
|
+
API_ERROR_CONTINUE_COOLDOWNS_MS.length - 1,
|
|
897
|
+
);
|
|
898
|
+
const cooldownMs = API_ERROR_CONTINUE_COOLDOWNS_MS[attemptIndex] || 0;
|
|
899
|
+
state.apiErrorRecovery = {
|
|
900
|
+
...state.apiErrorRecovery,
|
|
901
|
+
continueAttempts: Number(state.apiErrorRecovery.continueAttempts || 0) + 1,
|
|
902
|
+
cooldownUntil: cooldownMs > 0 ? Date.now() + cooldownMs : 0,
|
|
903
|
+
lastErrorAt: Date.now(),
|
|
904
|
+
};
|
|
905
|
+
}
|
|
906
|
+
|
|
797
907
|
_getTaskState(taskId) {
|
|
798
908
|
return this._taskState.get(taskId) || null;
|
|
799
909
|
}
|
package/infra/monitor.mjs
CHANGED
|
@@ -663,6 +663,7 @@ async function ensureWorkflowAutomationEngine() {
|
|
|
663
663
|
"template-task-lifecycle",
|
|
664
664
|
"template-task-finalization-guard",
|
|
665
665
|
"template-agent-session-monitor",
|
|
666
|
+
"template-github-kanban-sync",
|
|
666
667
|
],
|
|
667
668
|
});
|
|
668
669
|
if (Number(reconcile?.autoUpdated || 0) > 0) {
|
|
@@ -673,6 +674,15 @@ async function ensureWorkflowAutomationEngine() {
|
|
|
673
674
|
: ""),
|
|
674
675
|
);
|
|
675
676
|
}
|
|
677
|
+
if (
|
|
678
|
+
typeof engine.load === "function" &&
|
|
679
|
+
(Number(reconcile?.autoUpdated || 0) > 0 ||
|
|
680
|
+
Number(reconcile?.metadataUpdated || 0) > 0 ||
|
|
681
|
+
(Array.isArray(reconcile?.updatedWorkflowIds) &&
|
|
682
|
+
reconcile.updatedWorkflowIds.length > 0))
|
|
683
|
+
) {
|
|
684
|
+
engine.load();
|
|
685
|
+
}
|
|
676
686
|
}
|
|
677
687
|
for (const summary of engine.list?.() || []) {
|
|
678
688
|
const installedFrom = String(summary?.metadata?.installedFrom || "").trim();
|
|
@@ -724,6 +734,13 @@ async function ensureWorkflowAutomationEngine() {
|
|
|
724
734
|
);
|
|
725
735
|
}
|
|
726
736
|
}
|
|
737
|
+
|
|
738
|
+
// Resume runs paused by a previous monitor shutdown after services are wired.
|
|
739
|
+
if (typeof engine.resumeInterruptedRuns === "function") {
|
|
740
|
+
engine.resumeInterruptedRuns().catch((err) => {
|
|
741
|
+
console.warn(`[workflows] Failed to resume interrupted runs: ${err?.message || err}`);
|
|
742
|
+
});
|
|
743
|
+
}
|
|
727
744
|
workflowAutomationInitDone = true;
|
|
728
745
|
return engine;
|
|
729
746
|
} catch (err) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "bosun",
|
|
3
|
-
"version": "0.41.
|
|
3
|
+
"version": "0.41.5",
|
|
4
4
|
"description": "Bosun Autonomous Engineering — manages AI agent executors with failover, extremely powerful workflow builder, and a massive amount of included default workflow templates for autonomous engineering, creates PRs via Vibe-Kanban API, and sends Telegram notifications. Supports N executors with weighted distribution, multi-repo projects, and auto-setup.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "Apache-2.0",
|
|
@@ -39,6 +39,61 @@ function trimTrailingSlashes(value) {
|
|
|
39
39
|
return out;
|
|
40
40
|
}
|
|
41
41
|
|
|
42
|
+
function isAzureOpenAIHost(value) {
|
|
43
|
+
try {
|
|
44
|
+
const parsed = value instanceof URL ? value : new URL(String(value || "").trim());
|
|
45
|
+
const host = String(parsed.hostname || "").toLowerCase();
|
|
46
|
+
return host === "openai.azure.com" || host.endsWith(".openai.azure.com");
|
|
47
|
+
} catch {
|
|
48
|
+
return false;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function buildModelsProbeRequest({ apiKey = "", baseUrl = "" } = {}) {
|
|
53
|
+
const trimmedBase = String(baseUrl || "").trim();
|
|
54
|
+
const fallbackBase = "https://api.openai.com";
|
|
55
|
+
const headers = { "Content-Type": "application/json" };
|
|
56
|
+
|
|
57
|
+
try {
|
|
58
|
+
const parsed = new URL(trimmedBase || fallbackBase);
|
|
59
|
+
const pathname = trimTrailingSlashes(parsed.pathname || "");
|
|
60
|
+
const lowerPath = pathname.toLowerCase();
|
|
61
|
+
const isAzure = isAzureOpenAIHost(parsed);
|
|
62
|
+
|
|
63
|
+
if (apiKey) {
|
|
64
|
+
if (isAzure) headers["api-key"] = apiKey;
|
|
65
|
+
else headers.Authorization = `Bearer ${apiKey}`;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (lowerPath.endsWith("/models")) {
|
|
69
|
+
return { endpoint: parsed.toString(), headers };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if (isAzure || lowerPath === "/openai" || lowerPath.startsWith("/openai/")) {
|
|
73
|
+
parsed.pathname = "/openai/models";
|
|
74
|
+
if (!parsed.searchParams.has("api-version")) {
|
|
75
|
+
parsed.searchParams.set("api-version", "2024-10-21");
|
|
76
|
+
}
|
|
77
|
+
return { endpoint: parsed.toString(), headers };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const v1Match = lowerPath.match(/^(.*\/v1)(?:\/.*)?$/);
|
|
81
|
+
if (v1Match) {
|
|
82
|
+
parsed.pathname = `${v1Match[1]}/models`;
|
|
83
|
+
parsed.search = "";
|
|
84
|
+
return { endpoint: parsed.toString(), headers };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
parsed.pathname = `${pathname || ""}/v1/models`;
|
|
88
|
+
parsed.search = "";
|
|
89
|
+
return { endpoint: parsed.toString(), headers };
|
|
90
|
+
} catch {
|
|
91
|
+
const resolvedBase = trimTrailingSlashes(trimmedBase || fallbackBase);
|
|
92
|
+
if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
|
|
93
|
+
return { endpoint: `${resolvedBase}/v1/models`, headers };
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
42
97
|
|
|
43
98
|
// ── Vendor file serving (hoisting-safe) ───────────────────────────────────────────
|
|
44
99
|
// Resolution order:
|
|
@@ -1750,13 +1805,9 @@ async function handleModelsProbe(body) {
|
|
|
1750
1805
|
}
|
|
1751
1806
|
|
|
1752
1807
|
// For OpenAI / compatible endpoints, try GET /v1/models
|
|
1753
|
-
const
|
|
1754
|
-
const endpoint = `${resolvedBase}/v1/models`;
|
|
1808
|
+
const { endpoint, headers } = buildModelsProbeRequest({ apiKey, baseUrl });
|
|
1755
1809
|
|
|
1756
1810
|
try {
|
|
1757
|
-
const headers = { "Content-Type": "application/json" };
|
|
1758
|
-
if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`;
|
|
1759
|
-
|
|
1760
1811
|
const controller = new AbortController();
|
|
1761
1812
|
const timeout = setTimeout(() => controller.abort(), 8000);
|
|
1762
1813
|
|
|
@@ -3008,7 +3059,9 @@ export async function startSetupServer(options = {}) {
|
|
|
3008
3059
|
export {
|
|
3009
3060
|
applyTelegramMiniAppSetupEnv,
|
|
3010
3061
|
applyNonBlockingSetupEnvDefaults,
|
|
3062
|
+
buildModelsProbeRequest,
|
|
3011
3063
|
handleTelegramChatIdLookup,
|
|
3064
|
+
isAzureOpenAIHost,
|
|
3012
3065
|
normalizeWorkflowTemplateOverrides,
|
|
3013
3066
|
normalizeTelegramUiPort,
|
|
3014
3067
|
normalizeRepoConfigEntry,
|
package/server/ui-server.mjs
CHANGED
|
@@ -7032,13 +7032,58 @@ function extractSafeErrorMessage(payload) {
|
|
|
7032
7032
|
return "Internal server error";
|
|
7033
7033
|
}
|
|
7034
7034
|
|
|
7035
|
+
function createRequestDiagnosticId() {
|
|
7036
|
+
return `req_${randomBytes(6).toString("hex")}`;
|
|
7037
|
+
}
|
|
7038
|
+
|
|
7039
|
+
function ensureResponseDiagnosticId(res) {
|
|
7040
|
+
if (!res || typeof res !== "object") return createRequestDiagnosticId();
|
|
7041
|
+
if (!res.__bosunDiagnosticId) {
|
|
7042
|
+
res.__bosunDiagnosticId = createRequestDiagnosticId();
|
|
7043
|
+
}
|
|
7044
|
+
return res.__bosunDiagnosticId;
|
|
7045
|
+
}
|
|
7046
|
+
|
|
7047
|
+
function describePayloadForErrorLog(payload, depth = 0) {
|
|
7048
|
+
if (payload instanceof Error) {
|
|
7049
|
+
const described = {
|
|
7050
|
+
name: String(payload.name || "Error"),
|
|
7051
|
+
message: String(payload.message || ""),
|
|
7052
|
+
};
|
|
7053
|
+
if (payload.stack) described.stack = String(payload.stack);
|
|
7054
|
+
if (payload.code != null) described.code = String(payload.code);
|
|
7055
|
+
if (depth < 3 && payload.cause) {
|
|
7056
|
+
described.cause = describePayloadForErrorLog(payload.cause, depth + 1);
|
|
7057
|
+
}
|
|
7058
|
+
return described;
|
|
7059
|
+
}
|
|
7060
|
+
return makeJsonSafe(payload, { maxDepth: 6 });
|
|
7061
|
+
}
|
|
7062
|
+
|
|
7063
|
+
function logJsonFailure(res, statusCode, payload, diagnosticId) {
|
|
7064
|
+
const requestContext = res?.__bosunRequestContext || {};
|
|
7065
|
+
console.error("[ui-server] request failed", {
|
|
7066
|
+
diagnosticId,
|
|
7067
|
+
statusCode,
|
|
7068
|
+
method: requestContext.method || null,
|
|
7069
|
+
path: requestContext.path || null,
|
|
7070
|
+
query: requestContext.query || "",
|
|
7071
|
+
payload: describePayloadForErrorLog(payload),
|
|
7072
|
+
});
|
|
7073
|
+
}
|
|
7074
|
+
|
|
7035
7075
|
function jsonResponse(res, statusCode, payload) {
|
|
7076
|
+
const diagnosticId = statusCode >= 500 ? ensureResponseDiagnosticId(res) : null;
|
|
7077
|
+
if (statusCode >= 500) {
|
|
7078
|
+
logJsonFailure(res, statusCode, payload, diagnosticId);
|
|
7079
|
+
}
|
|
7036
7080
|
const normalizedPayload = normalizeJsonResponsePayload(payload);
|
|
7037
7081
|
const safePayload =
|
|
7038
7082
|
statusCode >= 500
|
|
7039
7083
|
? {
|
|
7040
7084
|
ok: false,
|
|
7041
7085
|
error: extractSafeErrorMessage(normalizedPayload),
|
|
7086
|
+
diagnosticId,
|
|
7042
7087
|
}
|
|
7043
7088
|
: normalizedPayload;
|
|
7044
7089
|
const body = JSON.stringify(safePayload, null, 2);
|
|
@@ -7890,6 +7935,129 @@ function withTaskRuntimeSnapshot(task) {
|
|
|
7890
7935
|
};
|
|
7891
7936
|
}
|
|
7892
7937
|
|
|
7938
|
+
function normalizeTaskDiagnosticText(value) {
|
|
7939
|
+
const text = String(value || "").trim();
|
|
7940
|
+
return text ? text.replace(/\s+/g, " ") : "";
|
|
7941
|
+
}
|
|
7942
|
+
|
|
7943
|
+
function buildTaskStableCause(task, supervisorDiagnostics = null) {
|
|
7944
|
+
const lastError = normalizeTaskDiagnosticText(task?.lastError || "");
|
|
7945
|
+
const blockedReason = normalizeTaskDiagnosticText(task?.blockedReason || "");
|
|
7946
|
+
const errorPattern = String(task?.errorPattern || "").trim().toLowerCase();
|
|
7947
|
+
const apiErrorRecovery = supervisorDiagnostics?.apiErrorRecovery || null;
|
|
7948
|
+
const apiSignature = normalizeTaskDiagnosticText(apiErrorRecovery?.signature || "");
|
|
7949
|
+
const lastErrorLower = lastError.toLowerCase();
|
|
7950
|
+
const blockedReasonLower = blockedReason.toLowerCase();
|
|
7951
|
+
|
|
7952
|
+
if (lastErrorLower.includes("codex resume timeout")) {
|
|
7953
|
+
return {
|
|
7954
|
+
code: "codex_resume_timeout",
|
|
7955
|
+
title: "Codex resume timed out",
|
|
7956
|
+
severity: "warning",
|
|
7957
|
+
summary: "Bosun timed out while resuming a cached Codex thread and will start fresh on the next attempt.",
|
|
7958
|
+
};
|
|
7959
|
+
}
|
|
7960
|
+
if (
|
|
7961
|
+
lastErrorLower.includes("invalid_encrypted_content") ||
|
|
7962
|
+
lastErrorLower.includes("state db missing rollout path") ||
|
|
7963
|
+
lastErrorLower.includes("could not be verified") ||
|
|
7964
|
+
lastErrorLower.includes("tool_call_id")
|
|
7965
|
+
) {
|
|
7966
|
+
return {
|
|
7967
|
+
code: "codex_resume_corrupted_state",
|
|
7968
|
+
title: "Codex resume state is corrupted",
|
|
7969
|
+
severity: "error",
|
|
7970
|
+
summary: "Bosun detected poisoned Codex thread metadata and will discard the cached resume state.",
|
|
7971
|
+
};
|
|
7972
|
+
}
|
|
7973
|
+
if (errorPattern === "rate_limit") {
|
|
7974
|
+
return {
|
|
7975
|
+
code: "agent_rate_limit",
|
|
7976
|
+
title: "Agent is rate limited",
|
|
7977
|
+
severity: "warning",
|
|
7978
|
+
summary: "The assigned agent hit a rate limit and Bosun is waiting before retrying.",
|
|
7979
|
+
};
|
|
7980
|
+
}
|
|
7981
|
+
if (errorPattern === "token_overflow") {
|
|
7982
|
+
return {
|
|
7983
|
+
code: "token_overflow",
|
|
7984
|
+
title: "Context window exhausted",
|
|
7985
|
+
severity: "error",
|
|
7986
|
+
summary: "The current task exceeded the model context budget and needs a smaller prompt or a fresh session.",
|
|
7987
|
+
};
|
|
7988
|
+
}
|
|
7989
|
+
if (errorPattern === "api_error" || apiErrorRecovery) {
|
|
7990
|
+
return {
|
|
7991
|
+
code: Number(apiErrorRecovery?.cooldownUntil || 0) > Date.now()
|
|
7992
|
+
? "api_error_cooldown"
|
|
7993
|
+
: "api_error_recovery",
|
|
7994
|
+
title: "Transient API failure",
|
|
7995
|
+
severity: "warning",
|
|
7996
|
+
summary: "Bosun detected a backend API failure and is applying the task-level recovery ladder before escalating.",
|
|
7997
|
+
};
|
|
7998
|
+
}
|
|
7999
|
+
if (blockedReason && blockedReasonLower.includes("dependency")) {
|
|
8000
|
+
return {
|
|
8001
|
+
code: "dependency_blocked",
|
|
8002
|
+
title: "Dependency is still blocked",
|
|
8003
|
+
severity: "warning",
|
|
8004
|
+
summary: "Bosun is holding this task until one or more dependencies finish.",
|
|
8005
|
+
};
|
|
8006
|
+
}
|
|
8007
|
+
if (blockedReason) {
|
|
8008
|
+
return {
|
|
8009
|
+
code: "task_blocked",
|
|
8010
|
+
title: "Task is blocked",
|
|
8011
|
+
severity: "warning",
|
|
8012
|
+
summary: "Bosun recorded a blocking condition for this task and will not dispatch it until the condition clears.",
|
|
8013
|
+
};
|
|
8014
|
+
}
|
|
8015
|
+
if (lastError || apiSignature) {
|
|
8016
|
+
return {
|
|
8017
|
+
code: "agent_runtime_error",
|
|
8018
|
+
title: "Agent runtime error",
|
|
8019
|
+
severity: "error",
|
|
8020
|
+
summary: "Bosun recorded an agent-side runtime failure for this task.",
|
|
8021
|
+
};
|
|
8022
|
+
}
|
|
8023
|
+
return null;
|
|
8024
|
+
}
|
|
8025
|
+
|
|
8026
|
+
function buildTaskDiagnostics(task, supervisorDiagnostics = null) {
|
|
8027
|
+
if (!task || typeof task !== "object") return null;
|
|
8028
|
+
const apiErrorRecovery = supervisorDiagnostics?.apiErrorRecovery
|
|
8029
|
+
? makeJsonSafe(supervisorDiagnostics.apiErrorRecovery, { maxDepth: 4 })
|
|
8030
|
+
: null;
|
|
8031
|
+
const diagnostics = {
|
|
8032
|
+
stableCause: buildTaskStableCause(task, supervisorDiagnostics),
|
|
8033
|
+
lastError: normalizeTaskDiagnosticText(task?.lastError || "") || null,
|
|
8034
|
+
errorPattern: normalizeTaskDiagnosticText(task?.errorPattern || "") || null,
|
|
8035
|
+
blockedReason: normalizeTaskDiagnosticText(task?.blockedReason || "") || null,
|
|
8036
|
+
cooldownUntil: task?.cooldownUntil || apiErrorRecovery?.cooldownUntil || null,
|
|
8037
|
+
supervisor: supervisorDiagnostics
|
|
8038
|
+
? {
|
|
8039
|
+
interventionCount: Number(supervisorDiagnostics.interventionCount || 0),
|
|
8040
|
+
lastIntervention: supervisorDiagnostics.lastIntervention || null,
|
|
8041
|
+
lastDecision: supervisorDiagnostics.lastDecision
|
|
8042
|
+
? makeJsonSafe(supervisorDiagnostics.lastDecision, { maxDepth: 3 })
|
|
8043
|
+
: null,
|
|
8044
|
+
apiErrorRecovery,
|
|
8045
|
+
}
|
|
8046
|
+
: null,
|
|
8047
|
+
};
|
|
8048
|
+
if (
|
|
8049
|
+
!diagnostics.stableCause &&
|
|
8050
|
+
!diagnostics.lastError &&
|
|
8051
|
+
!diagnostics.errorPattern &&
|
|
8052
|
+
!diagnostics.blockedReason &&
|
|
8053
|
+
!diagnostics.cooldownUntil &&
|
|
8054
|
+
!diagnostics.supervisor
|
|
8055
|
+
) {
|
|
8056
|
+
return null;
|
|
8057
|
+
}
|
|
8058
|
+
return diagnostics;
|
|
8059
|
+
}
|
|
8060
|
+
|
|
7893
8061
|
async function maybeStartTaskFromLifecycleAction({
|
|
7894
8062
|
taskId,
|
|
7895
8063
|
updatedTask,
|
|
@@ -11364,6 +11532,12 @@ async function handleApi(req, res, url) {
|
|
|
11364
11532
|
reqUrl: url,
|
|
11365
11533
|
adapter,
|
|
11366
11534
|
});
|
|
11535
|
+
const supervisor = typeof uiDeps.getAgentSupervisor === "function"
|
|
11536
|
+
? uiDeps.getAgentSupervisor()
|
|
11537
|
+
: null;
|
|
11538
|
+
const supervisorDiagnostics = typeof supervisor?.getTaskDiagnostics === "function"
|
|
11539
|
+
? supervisor.getTaskDiagnostics(detailTask.id)
|
|
11540
|
+
: null;
|
|
11367
11541
|
|
|
11368
11542
|
const sprintId = resolveTaskSprintId(detailTask);
|
|
11369
11543
|
const sprintDag = includeDag && sprintId ? await getSprintDagData(sprintId) : null;
|
|
@@ -11373,6 +11547,7 @@ async function handleApi(req, res, url) {
|
|
|
11373
11547
|
workflowRuns: mergedWorkflowRuns,
|
|
11374
11548
|
workspaceDir: workspaceContext?.workspaceDir || repoRoot,
|
|
11375
11549
|
});
|
|
11550
|
+
const diagnostics = buildTaskDiagnostics(detailTask, supervisorDiagnostics);
|
|
11376
11551
|
|
|
11377
11552
|
detailTask.meta = {
|
|
11378
11553
|
...(detailTask.meta || {}),
|
|
@@ -11381,6 +11556,7 @@ async function handleApi(req, res, url) {
|
|
|
11381
11556
|
timelineCount: Array.isArray(detailTask.timeline) ? detailTask.timeline.length : 0,
|
|
11382
11557
|
canStart,
|
|
11383
11558
|
blockedContext,
|
|
11559
|
+
...(diagnostics ? { diagnostics } : {}),
|
|
11384
11560
|
...(sprintId ? { sprintId } : {}),
|
|
11385
11561
|
...(sprintDag ? { sprintDag: sprintDag.data } : {}),
|
|
11386
11562
|
...(globalDag ? { dagOfDags: globalDag.data } : {}),
|
|
@@ -11389,6 +11565,7 @@ async function handleApi(req, res, url) {
|
|
|
11389
11565
|
if (globalDag) detailTask.dagOfDags = globalDag.data;
|
|
11390
11566
|
detailTask.canStart = canStart;
|
|
11391
11567
|
detailTask.blockedContext = blockedContext;
|
|
11568
|
+
if (diagnostics) detailTask.diagnostics = diagnostics;
|
|
11392
11569
|
detailTask = withTaskRuntimeSnapshot(detailTask);
|
|
11393
11570
|
}
|
|
11394
11571
|
jsonResponse(res, 200, { ok: true, data: detailTask });
|
|
@@ -19169,8 +19346,16 @@ export async function startTelegramUiServer(options = {}) {
|
|
|
19169
19346
|
req.url || "/",
|
|
19170
19347
|
`http://${req.headers.host || "localhost"}`,
|
|
19171
19348
|
);
|
|
19349
|
+
res.__bosunRequestContext = {
|
|
19350
|
+
diagnosticId: ensureResponseDiagnosticId(res),
|
|
19351
|
+
method: String(req?.method || "GET").toUpperCase(),
|
|
19352
|
+
path: url.pathname,
|
|
19353
|
+
query: url.search || "",
|
|
19354
|
+
};
|
|
19172
19355
|
const webhookPath = getGitHubWebhookPath();
|
|
19173
19356
|
|
|
19357
|
+
try {
|
|
19358
|
+
|
|
19174
19359
|
// Token exchange: ?token=<hex> → set session cookie and redirect to clean URL
|
|
19175
19360
|
const qToken = url.searchParams.get("token");
|
|
19176
19361
|
if (qToken && sessionToken) {
|
|
@@ -19312,6 +19497,21 @@ export async function startTelegramUiServer(options = {}) {
|
|
|
19312
19497
|
}
|
|
19313
19498
|
}
|
|
19314
19499
|
await handleStatic(req, res, url);
|
|
19500
|
+
} catch (err) {
|
|
19501
|
+
if (res.headersSent) {
|
|
19502
|
+
console.error("[ui-server] unhandled request failure after headers sent", {
|
|
19503
|
+
diagnosticId: ensureResponseDiagnosticId(res),
|
|
19504
|
+
payload: describePayloadForErrorLog(err),
|
|
19505
|
+
});
|
|
19506
|
+
try {
|
|
19507
|
+
res.destroy?.(err);
|
|
19508
|
+
} catch {
|
|
19509
|
+
/* best effort */
|
|
19510
|
+
}
|
|
19511
|
+
return;
|
|
19512
|
+
}
|
|
19513
|
+
jsonResponse(res, 500, err);
|
|
19514
|
+
}
|
|
19315
19515
|
};
|
|
19316
19516
|
|
|
19317
19517
|
try {
|
package/tools/syntax-check.mjs
CHANGED
|
@@ -54,7 +54,14 @@ function validateModuleSyntax(filePath) {
|
|
|
54
54
|
function validateBrowserModuleSyntax(filePath) {
|
|
55
55
|
const source = readFileSync(filePath, "utf8");
|
|
56
56
|
const mod = new vm.SourceTextModule(source, { identifier: filePath });
|
|
57
|
-
|
|
57
|
+
let hasTLA = false;
|
|
58
|
+
const tlaProp = mod.hasTopLevelAwait;
|
|
59
|
+
if (typeof tlaProp === "function") {
|
|
60
|
+
hasTLA = !!tlaProp.call(mod);
|
|
61
|
+
} else if (typeof tlaProp === "boolean") {
|
|
62
|
+
hasTLA = tlaProp;
|
|
63
|
+
}
|
|
64
|
+
if (hasTLA) {
|
|
58
65
|
throw new Error(
|
|
59
66
|
"Top-level await is not allowed in browser-served modules because embedded WebViews can fail with 'Unexpected reserved word'.",
|
|
60
67
|
);
|