bosun 0.42.0 → 0.42.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +12 -0
- package/README.md +2 -0
- package/agent/agent-pool.mjs +34 -1
- package/agent/agent-work-report.mjs +89 -3
- package/agent/analyze-agent-work-helpers.mjs +14 -0
- package/agent/analyze-agent-work.mjs +23 -3
- package/agent/primary-agent.mjs +23 -1
- package/bosun-tui.mjs +4 -3
- package/bosun.schema.json +1 -1
- package/config/config.mjs +58 -0
- package/config/workspace-health.mjs +36 -6
- package/git/diff-stats.mjs +550 -124
- package/github/github-app-auth.mjs +9 -5
- package/infra/maintenance.mjs +13 -6
- package/infra/monitor.mjs +398 -10
- package/infra/runtime-accumulator.mjs +9 -1
- package/infra/session-tracker.mjs +163 -1
- package/infra/tui-bridge.mjs +415 -0
- package/infra/worktree-recovery-state.mjs +159 -0
- package/kanban/kanban-adapter.mjs +41 -8
- package/lib/repo-map.mjs +411 -0
- package/package.json +140 -137
- package/server/ui-server.mjs +953 -59
- package/shell/codex-config.mjs +34 -8
- package/task/task-cli.mjs +93 -19
- package/task/task-executor.mjs +397 -8
- package/task/task-store.mjs +194 -1
- package/telegram/telegram-bot.mjs +267 -18
- package/tools/vitest-runner.mjs +108 -0
- package/tui/app.mjs +252 -148
- package/tui/components/status-header.mjs +88 -131
- package/tui/lib/ws-bridge.mjs +125 -35
- package/tui/screens/agents-screen-helpers.mjs +219 -0
- package/tui/screens/agents.mjs +287 -270
- package/tui/screens/status.mjs +51 -189
- package/tui/screens/tasks.mjs +41 -253
- package/ui/app.js +52 -23
- package/ui/components/chat-view.js +263 -84
- package/ui/components/diff-viewer.js +324 -140
- package/ui/components/kanban-board.js +13 -9
- package/ui/components/session-list.js +111 -41
- package/ui/demo-defaults.js +481 -59
- package/ui/demo.html +32 -0
- package/ui/modules/session-api.js +320 -5
- package/ui/modules/stream-timeline.js +356 -0
- package/ui/modules/telegram.js +5 -2
- package/ui/modules/worktree-recovery.js +85 -0
- package/ui/styles.css +44 -0
- package/ui/tabs/chat.js +19 -4
- package/ui/tabs/dashboard.js +22 -0
- package/ui/tabs/infra.js +25 -0
- package/ui/tabs/tasks.js +119 -11
- package/voice/voice-auth-manager.mjs +10 -5
- package/workflow/workflow-engine.mjs +179 -1
- package/workflow/workflow-nodes.mjs +872 -16
- package/workflow/workflow-templates.mjs +4 -0
- package/workflow-templates/github.mjs +2 -1
- package/workflow-templates/planning.mjs +2 -1
- package/workflow-templates/sub-workflows.mjs +10 -0
- package/workflow-templates/task-batch.mjs +9 -8
- package/workflow-templates/task-execution.mjs +30 -12
- package/workflow-templates/task-lifecycle.mjs +59 -4
- package/workspace/shared-knowledge.mjs +409 -155
|
@@ -384,7 +384,9 @@ export function getAppId() {
|
|
|
384
384
|
|
|
385
385
|
// ── OAuth state persistence ───────────────────────────────────────────────────
|
|
386
386
|
|
|
387
|
-
|
|
387
|
+
function getAuthStatePath() {
|
|
388
|
+
return join(homedir(), ".bosun", "github-auth-state.json");
|
|
389
|
+
}
|
|
388
390
|
|
|
389
391
|
/**
|
|
390
392
|
* Saves OAuth user token state to ~/.bosun/github-auth-state.json.
|
|
@@ -392,11 +394,12 @@ const AUTH_STATE_PATH = join(homedir(), ".bosun", "github-auth-state.json");
|
|
|
392
394
|
* @param {{ user: object, accessToken: string, tokenType: string, scope: string, savedAt: string, installationIds: number[] }} state
|
|
393
395
|
*/
|
|
394
396
|
export function saveOAuthState(state) {
|
|
395
|
-
const
|
|
397
|
+
const authStatePath = getAuthStatePath();
|
|
398
|
+
const dir = dirname(authStatePath);
|
|
396
399
|
if (!existsSync(dir)) {
|
|
397
400
|
mkdirSync(dir, { recursive: true });
|
|
398
401
|
}
|
|
399
|
-
writeFileSync(
|
|
402
|
+
writeFileSync(authStatePath, JSON.stringify(state, null, 2), "utf8");
|
|
400
403
|
}
|
|
401
404
|
|
|
402
405
|
/**
|
|
@@ -407,8 +410,9 @@ export function saveOAuthState(state) {
|
|
|
407
410
|
*/
|
|
408
411
|
export function loadOAuthState() {
|
|
409
412
|
try {
|
|
410
|
-
|
|
411
|
-
|
|
413
|
+
const authStatePath = getAuthStatePath();
|
|
414
|
+
if (!existsSync(authStatePath)) return null;
|
|
415
|
+
const raw = readFileSync(authStatePath, "utf8");
|
|
412
416
|
const parsed = JSON.parse(raw);
|
|
413
417
|
if (typeof parsed !== "object" || parsed === null) return null;
|
|
414
418
|
return parsed;
|
package/infra/maintenance.mjs
CHANGED
|
@@ -117,11 +117,15 @@ function clearBranchSyncWarning(key) {
|
|
|
117
117
|
function logThrottledBranchSync(
|
|
118
118
|
key,
|
|
119
119
|
message,
|
|
120
|
-
{
|
|
120
|
+
levelOrOptions = {},
|
|
121
|
+
) {
|
|
122
|
+
const options = typeof levelOrOptions === "string"
|
|
123
|
+
? { level: levelOrOptions }
|
|
124
|
+
: (levelOrOptions || {});
|
|
125
|
+
const {
|
|
121
126
|
level = "warn",
|
|
122
127
|
throttleMs = BRANCH_SYNC_LOG_THROTTLE_MS,
|
|
123
|
-
} =
|
|
124
|
-
) {
|
|
128
|
+
} = options;
|
|
125
129
|
const normalizedKey = String(key || "default").trim() || "default";
|
|
126
130
|
const now = Date.now();
|
|
127
131
|
const state = branchSyncLogState.get(normalizedKey) || {
|
|
@@ -147,11 +151,12 @@ function logThrottledBranchSync(
|
|
|
147
151
|
: "";
|
|
148
152
|
const line = `${message}${suffix}`;
|
|
149
153
|
|
|
154
|
+
const isConsoleLogLevel = level === "info" || level === "log";
|
|
150
155
|
if (level === "error") {
|
|
151
156
|
console.error(line);
|
|
152
157
|
} else if (level === "info") {
|
|
153
|
-
console.
|
|
154
|
-
} else if (
|
|
158
|
+
console.log(line);
|
|
159
|
+
} else if (isConsoleLogLevel) {
|
|
155
160
|
console.log(line);
|
|
156
161
|
} else {
|
|
157
162
|
console.warn(line);
|
|
@@ -702,7 +707,7 @@ function parsePidFile(raw) {
|
|
|
702
707
|
return { pid: Number(text), raw: text };
|
|
703
708
|
}
|
|
704
709
|
|
|
705
|
-
function formatPidFileSummary(parsed) {
|
|
710
|
+
export function formatPidFileSummary(parsed) {
|
|
706
711
|
const pid = Number(parsed?.pid);
|
|
707
712
|
if (Number.isFinite(pid) && pid > 0) return String(pid);
|
|
708
713
|
const raw = String(parsed?.raw || "").replace(/\s+/g, " ").trim();
|
|
@@ -1144,6 +1149,7 @@ export function syncLocalTrackingBranches(repoRoot, branches) {
|
|
|
1144
1149
|
windowsHide: true,
|
|
1145
1150
|
});
|
|
1146
1151
|
if (statusCheck.stdout?.trim()) {
|
|
1152
|
+
console.log(`[maintenance] local '${branch}' diverged (${ahead}↑ ${behind}↓) but has uncommitted changes — skipping`);
|
|
1147
1153
|
logThrottledBranchSync(
|
|
1148
1154
|
`sync:${branch}:diverged-dirty`,
|
|
1149
1155
|
`[maintenance] local '${branch}' diverged (${ahead}↑ ${behind}↓) but has uncommitted changes — skipping`,
|
|
@@ -1205,6 +1211,7 @@ export function syncLocalTrackingBranches(repoRoot, branches) {
|
|
|
1205
1211
|
windowsHide: true,
|
|
1206
1212
|
});
|
|
1207
1213
|
if (statusCheck.stdout?.trim()) {
|
|
1214
|
+
console.log(`[maintenance] '${branch}' is checked out with uncommitted changes — skipping pull`);
|
|
1208
1215
|
logThrottledBranchSync(
|
|
1209
1216
|
`sync:${branch}:dirty-pull-skip`,
|
|
1210
1217
|
`[maintenance] '${branch}' is checked out with uncommitted changes — skipping pull`,
|
package/infra/monitor.mjs
CHANGED
|
@@ -58,7 +58,11 @@ import {
|
|
|
58
58
|
import { startAnalyzer, stopAnalyzer } from "../agent/agent-work-analyzer.mjs";
|
|
59
59
|
import {
|
|
60
60
|
generateWeeklyAgentWorkReport,
|
|
61
|
+
getNextWeeklyReportTime,
|
|
62
|
+
getWeeklyReportStatePath,
|
|
63
|
+
readWeeklyReportScheduleState,
|
|
61
64
|
shouldSendWeeklyReport,
|
|
65
|
+
writeWeeklyReportScheduleState,
|
|
62
66
|
} from "../agent/agent-work-report.mjs";
|
|
63
67
|
|
|
64
68
|
import {
|
|
@@ -256,6 +260,9 @@ const ANOMALY_SIGNAL_PATH = resolve(
|
|
|
256
260
|
);
|
|
257
261
|
|
|
258
262
|
const AGENT_ALERT_POLL_MS = 10_000;
|
|
263
|
+
const AGENT_ALERTS_REPLAY_STARTUP = isTruthyFlag(
|
|
264
|
+
process.env.AGENT_ALERTS_REPLAY_STARTUP,
|
|
265
|
+
);
|
|
259
266
|
let agentWorkAnalyzerActive = false;
|
|
260
267
|
let agentAlertsOffset = 0;
|
|
261
268
|
let agentAlertsTimer = null;
|
|
@@ -319,6 +326,25 @@ function saveAgentAlertsState() {
|
|
|
319
326
|
}
|
|
320
327
|
}
|
|
321
328
|
|
|
329
|
+
function initializeAgentAlertsOffset() {
|
|
330
|
+
if (AGENT_ALERTS_REPLAY_STARTUP) {
|
|
331
|
+
// In replay mode, start from the beginning and ignore any persisted
|
|
332
|
+
// deduplication state so that replayed alerts are not suppressed.
|
|
333
|
+
agentAlertsOffset = 0;
|
|
334
|
+
agentAlertsDedup.clear();
|
|
335
|
+
return;
|
|
336
|
+
}
|
|
337
|
+
loadAgentAlertsState();
|
|
338
|
+
if (agentAlertsOffset > 0) return;
|
|
339
|
+
const path = getAgentAlertsPath();
|
|
340
|
+
if (!existsSync(path)) return;
|
|
341
|
+
try {
|
|
342
|
+
agentAlertsOffset = statSync(path).size;
|
|
343
|
+
} catch {
|
|
344
|
+
agentAlertsOffset = 0;
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
322
348
|
function rememberAlert(key) {
|
|
323
349
|
agentAlertsDedup.set(key, Date.now());
|
|
324
350
|
if (agentAlertsDedup.size > 200) {
|
|
@@ -381,6 +407,16 @@ function parseEnvInteger(value, defaultValue, { min = null, max = null } = {}) {
|
|
|
381
407
|
return parsed;
|
|
382
408
|
}
|
|
383
409
|
|
|
410
|
+
function parseEnvNumber(value, defaultValue, { min = null, max = null } = {}) {
|
|
411
|
+
const normalized = String(value ?? "").trim();
|
|
412
|
+
if (!normalized) return defaultValue;
|
|
413
|
+
const parsed = Number(normalized);
|
|
414
|
+
if (!Number.isFinite(parsed)) return defaultValue;
|
|
415
|
+
if (Number.isFinite(min) && parsed < min) return defaultValue;
|
|
416
|
+
if (Number.isFinite(max) && parsed > max) return defaultValue;
|
|
417
|
+
return parsed;
|
|
418
|
+
}
|
|
419
|
+
|
|
384
420
|
const DEFAULT_AGENT_ENDPOINT_PORT = 18432;
|
|
385
421
|
const REPO_SCOPED_AGENT_ENDPOINT_PORT_WINDOW = 2048;
|
|
386
422
|
|
|
@@ -491,6 +527,264 @@ function buildWorkflowEventPayload(eventType, eventData = {}) {
|
|
|
491
527
|
return payload;
|
|
492
528
|
}
|
|
493
529
|
|
|
530
|
+
const DEFAULT_WORKFLOW_RECOVERY_POLICY = Object.freeze({
|
|
531
|
+
maxAttempts: 5,
|
|
532
|
+
escalationWarnAfterAttempts: 3,
|
|
533
|
+
baseBackoffMs: 5000,
|
|
534
|
+
maxBackoffMs: 60_000,
|
|
535
|
+
jitterRatio: 0.2,
|
|
536
|
+
});
|
|
537
|
+
|
|
538
|
+
let workflowRecoveryPolicy = DEFAULT_WORKFLOW_RECOVERY_POLICY;
|
|
539
|
+
|
|
540
|
+
const workflowRecoveryState = new Map();
|
|
541
|
+
const workflowRecoveryTimers = new Map();
|
|
542
|
+
|
|
543
|
+
function emitWorkflowRecoveryTelemetry(eventName, details = {}) {
|
|
544
|
+
const payload = {
|
|
545
|
+
ts: new Date().toISOString(),
|
|
546
|
+
component: "monitor.workflow-recovery",
|
|
547
|
+
event: eventName,
|
|
548
|
+
...details,
|
|
549
|
+
};
|
|
550
|
+
console.log(`[monitor-telemetry] ${JSON.stringify(payload)}`);
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
function normalizeWorkflowRecoveryPolicy(candidate = {}) {
|
|
554
|
+
const policy =
|
|
555
|
+
candidate && typeof candidate === "object" ? candidate : {};
|
|
556
|
+
const maxAttempts = parseEnvInteger(
|
|
557
|
+
policy.maxAttempts,
|
|
558
|
+
DEFAULT_WORKFLOW_RECOVERY_POLICY.maxAttempts,
|
|
559
|
+
{ min: 1, max: 20 },
|
|
560
|
+
);
|
|
561
|
+
const escalationWarnAfterAttempts = parseEnvInteger(
|
|
562
|
+
policy.escalationWarnAfterAttempts,
|
|
563
|
+
DEFAULT_WORKFLOW_RECOVERY_POLICY.escalationWarnAfterAttempts,
|
|
564
|
+
{ min: 1, max: maxAttempts },
|
|
565
|
+
);
|
|
566
|
+
return Object.freeze({
|
|
567
|
+
maxAttempts,
|
|
568
|
+
escalationWarnAfterAttempts,
|
|
569
|
+
baseBackoffMs: parseEnvInteger(
|
|
570
|
+
policy.baseBackoffMs,
|
|
571
|
+
DEFAULT_WORKFLOW_RECOVERY_POLICY.baseBackoffMs,
|
|
572
|
+
{ min: 50, max: 60_000 },
|
|
573
|
+
),
|
|
574
|
+
maxBackoffMs: parseEnvInteger(
|
|
575
|
+
policy.maxBackoffMs,
|
|
576
|
+
DEFAULT_WORKFLOW_RECOVERY_POLICY.maxBackoffMs,
|
|
577
|
+
{ min: 1000, max: 30 * 60 * 1000 },
|
|
578
|
+
),
|
|
579
|
+
jitterRatio: parseEnvNumber(
|
|
580
|
+
policy.jitterRatio,
|
|
581
|
+
DEFAULT_WORKFLOW_RECOVERY_POLICY.jitterRatio,
|
|
582
|
+
{ min: 0, max: 0.9 },
|
|
583
|
+
),
|
|
584
|
+
});
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
function isWorkflowRecoveryPolicyEqual(left, right) {
|
|
588
|
+
return (
|
|
589
|
+
left?.maxAttempts === right?.maxAttempts &&
|
|
590
|
+
left?.escalationWarnAfterAttempts === right?.escalationWarnAfterAttempts &&
|
|
591
|
+
left?.baseBackoffMs === right?.baseBackoffMs &&
|
|
592
|
+
left?.maxBackoffMs === right?.maxBackoffMs &&
|
|
593
|
+
left?.jitterRatio === right?.jitterRatio
|
|
594
|
+
);
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
function applyWorkflowRecoveryPolicy(nextPolicy, reason = "startup") {
|
|
598
|
+
const normalized = normalizeWorkflowRecoveryPolicy(nextPolicy);
|
|
599
|
+
const changed = !isWorkflowRecoveryPolicyEqual(
|
|
600
|
+
workflowRecoveryPolicy,
|
|
601
|
+
normalized,
|
|
602
|
+
);
|
|
603
|
+
workflowRecoveryPolicy = normalized;
|
|
604
|
+
if (!changed) return;
|
|
605
|
+
|
|
606
|
+
for (const operation of workflowRecoveryTimers.keys()) {
|
|
607
|
+
clearWorkflowRecoveryTimer(operation);
|
|
608
|
+
}
|
|
609
|
+
workflowRecoveryState.clear();
|
|
610
|
+
emitWorkflowRecoveryTelemetry("policy_updated", {
|
|
611
|
+
reason,
|
|
612
|
+
policy: normalized,
|
|
613
|
+
});
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
function clearWorkflowRecoveryTimer(operation) {
|
|
617
|
+
const existing = workflowRecoveryTimers.get(operation);
|
|
618
|
+
if (existing) {
|
|
619
|
+
clearTimeout(existing);
|
|
620
|
+
workflowRecoveryTimers.delete(operation);
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
function computeWorkflowRecoveryBackoffMs(attempt) {
|
|
625
|
+
const cappedBase = Math.max(1, workflowRecoveryPolicy.baseBackoffMs);
|
|
626
|
+
const expDelay = Math.min(
|
|
627
|
+
cappedBase * Math.pow(2, Math.max(0, attempt - 1)),
|
|
628
|
+
Math.max(cappedBase, workflowRecoveryPolicy.maxBackoffMs),
|
|
629
|
+
);
|
|
630
|
+
const jitterWindow = Math.max(
|
|
631
|
+
0,
|
|
632
|
+
Math.round(expDelay * workflowRecoveryPolicy.jitterRatio),
|
|
633
|
+
);
|
|
634
|
+
const jitter = jitterWindow > 0
|
|
635
|
+
? Math.floor(Math.random() * (jitterWindow * 2 + 1)) - jitterWindow
|
|
636
|
+
: 0;
|
|
637
|
+
return Math.max(0, Math.min(
|
|
638
|
+
Math.round(expDelay + jitter),
|
|
639
|
+
Math.max(cappedBase, workflowRecoveryPolicy.maxBackoffMs),
|
|
640
|
+
));
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
function emitWorkflowRecoverySuppressed(operation, state, reason, metadata = {}) {
|
|
644
|
+
if (!state || !reason) return;
|
|
645
|
+
const counters = state.suppressedCounters || {};
|
|
646
|
+
const nextCount = Number(counters[reason] || 0) + 1;
|
|
647
|
+
counters[reason] = nextCount;
|
|
648
|
+
state.suppressedCounters = counters;
|
|
649
|
+
workflowRecoveryState.set(operation, state);
|
|
650
|
+
|
|
651
|
+
if (nextCount !== 1 && nextCount % 10 !== 0) return;
|
|
652
|
+
emitWorkflowRecoveryTelemetry("suppressed", {
|
|
653
|
+
operation,
|
|
654
|
+
reason,
|
|
655
|
+
suppressedCount: nextCount,
|
|
656
|
+
attempts: Number(state.attempts || 0),
|
|
657
|
+
terminalEscalated: state.terminalEscalated === true,
|
|
658
|
+
inFlight: state.inFlight === true,
|
|
659
|
+
nextRetryAt: state.nextRetryAt
|
|
660
|
+
? new Date(state.nextRetryAt).toISOString()
|
|
661
|
+
: null,
|
|
662
|
+
context: metadata || null,
|
|
663
|
+
});
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
function runWorkflowRecoveryWithPolicy(operation, attemptFn, metadata = {}) {
|
|
667
|
+
const op = String(operation || "").trim();
|
|
668
|
+
if (!op || typeof attemptFn !== "function") return;
|
|
669
|
+
|
|
670
|
+
const now = Date.now();
|
|
671
|
+
const state = workflowRecoveryState.get(op) || {
|
|
672
|
+
attempts: 0,
|
|
673
|
+
warningEscalated: false,
|
|
674
|
+
terminalEscalated: false,
|
|
675
|
+
inFlight: false,
|
|
676
|
+
nextRetryAt: 0,
|
|
677
|
+
suppressedCounters: {},
|
|
678
|
+
};
|
|
679
|
+
if (state.terminalEscalated || state.inFlight) {
|
|
680
|
+
emitWorkflowRecoverySuppressed(
|
|
681
|
+
op,
|
|
682
|
+
state,
|
|
683
|
+
state.terminalEscalated ? "terminal-escalated" : "in-flight",
|
|
684
|
+
metadata,
|
|
685
|
+
);
|
|
686
|
+
return;
|
|
687
|
+
}
|
|
688
|
+
if (state.nextRetryAt > now) {
|
|
689
|
+
emitWorkflowRecoverySuppressed(op, state, "cooldown", metadata);
|
|
690
|
+
return;
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
const attempt = state.attempts + 1;
|
|
694
|
+
state.inFlight = true;
|
|
695
|
+
workflowRecoveryState.set(op, state);
|
|
696
|
+
emitWorkflowRecoveryTelemetry("attempt", {
|
|
697
|
+
operation: op,
|
|
698
|
+
attempt,
|
|
699
|
+
maxAttempts: workflowRecoveryPolicy.maxAttempts,
|
|
700
|
+
escalationWarnAfterAttempts: workflowRecoveryPolicy.escalationWarnAfterAttempts,
|
|
701
|
+
trigger: metadata?.trigger || "manual",
|
|
702
|
+
context: metadata || null,
|
|
703
|
+
});
|
|
704
|
+
|
|
705
|
+
const startedAt = Date.now();
|
|
706
|
+
void Promise.resolve()
|
|
707
|
+
.then(() => attemptFn({ operation: op, attempt }))
|
|
708
|
+
.then(() => {
|
|
709
|
+
clearWorkflowRecoveryTimer(op);
|
|
710
|
+
workflowRecoveryState.delete(op);
|
|
711
|
+
emitWorkflowRecoveryTelemetry("success", {
|
|
712
|
+
operation: op,
|
|
713
|
+
attempt,
|
|
714
|
+
durationMs: Date.now() - startedAt,
|
|
715
|
+
context: metadata || null,
|
|
716
|
+
});
|
|
717
|
+
})
|
|
718
|
+
.catch((err) => {
|
|
719
|
+
const errorMessage = formatMonitorError(err);
|
|
720
|
+
const failureAt = Date.now();
|
|
721
|
+
const willWarnEscalate =
|
|
722
|
+
!state.warningEscalated &&
|
|
723
|
+
attempt >= workflowRecoveryPolicy.escalationWarnAfterAttempts;
|
|
724
|
+
const terminalEscalated = attempt >= workflowRecoveryPolicy.maxAttempts;
|
|
725
|
+
state.attempts = attempt;
|
|
726
|
+
state.lastError = errorMessage;
|
|
727
|
+
state.lastFailureAt = failureAt;
|
|
728
|
+
state.inFlight = false;
|
|
729
|
+
if (willWarnEscalate) {
|
|
730
|
+
state.warningEscalated = true;
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
if (terminalEscalated) {
|
|
734
|
+
state.terminalEscalated = true;
|
|
735
|
+
state.nextRetryAt = 0;
|
|
736
|
+
workflowRecoveryState.set(op, state);
|
|
737
|
+
clearWorkflowRecoveryTimer(op);
|
|
738
|
+
emitWorkflowRecoveryTelemetry("escalated", {
|
|
739
|
+
operation: op,
|
|
740
|
+
attempt,
|
|
741
|
+
escalationState: "terminal",
|
|
742
|
+
error: errorMessage,
|
|
743
|
+
durationMs: failureAt - startedAt,
|
|
744
|
+
context: metadata || null,
|
|
745
|
+
});
|
|
746
|
+
console.warn(
|
|
747
|
+
`[workflows] recovery escalated for ${op} after ${attempt} attempt(s): ${errorMessage}`,
|
|
748
|
+
);
|
|
749
|
+
return;
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
const backoffMs = computeWorkflowRecoveryBackoffMs(attempt);
|
|
753
|
+
state.nextRetryAt = failureAt + backoffMs;
|
|
754
|
+
workflowRecoveryState.set(op, state);
|
|
755
|
+
emitWorkflowRecoveryTelemetry("retry_scheduled", {
|
|
756
|
+
operation: op,
|
|
757
|
+
attempt,
|
|
758
|
+
error: errorMessage,
|
|
759
|
+
backoffMs,
|
|
760
|
+
nextRetryAt: new Date(state.nextRetryAt).toISOString(),
|
|
761
|
+
escalationState: state.warningEscalated ? "warning" : "none",
|
|
762
|
+
durationMs: failureAt - startedAt,
|
|
763
|
+
context: metadata || null,
|
|
764
|
+
});
|
|
765
|
+
if (willWarnEscalate) {
|
|
766
|
+
console.warn(
|
|
767
|
+
`[workflows] recovery warning for ${op}: ${attempt} consecutive failures (threshold ${workflowRecoveryPolicy.escalationWarnAfterAttempts})`,
|
|
768
|
+
);
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
clearWorkflowRecoveryTimer(op);
|
|
772
|
+
const timer = setTimeout(() => {
|
|
773
|
+
workflowRecoveryTimers.delete(op);
|
|
774
|
+
runWorkflowRecoveryWithPolicy(op, attemptFn, {
|
|
775
|
+
...metadata,
|
|
776
|
+
trigger: "retry-timer",
|
|
777
|
+
});
|
|
778
|
+
}, backoffMs);
|
|
779
|
+
if (typeof timer.unref === "function") timer.unref();
|
|
780
|
+
workflowRecoveryTimers.set(op, timer);
|
|
781
|
+
})
|
|
782
|
+
.finally(() => {
|
|
783
|
+
const current = workflowRecoveryState.get(op);
|
|
784
|
+
if (current) current.inFlight = false;
|
|
785
|
+
});
|
|
786
|
+
}
|
|
787
|
+
|
|
494
788
|
async function ensureWorkflowAutomationEngine() {
|
|
495
789
|
if (!workflowAutomationEnabled || process.env.VITEST) return null;
|
|
496
790
|
if (workflowAutomationEngine) return workflowAutomationEngine;
|
|
@@ -1627,7 +1921,7 @@ function stopAgentWorkAnalyzer() {
|
|
|
1627
1921
|
|
|
1628
1922
|
function startAgentAlertTailer() {
|
|
1629
1923
|
if (agentAlertsTimer) return;
|
|
1630
|
-
|
|
1924
|
+
initializeAgentAlertsOffset();
|
|
1631
1925
|
agentAlertsTimer = setInterval(() => {
|
|
1632
1926
|
runDetached("agent-alerts:poll-interval", pollAgentAlerts);
|
|
1633
1927
|
}, AGENT_ALERT_POLL_MS);
|
|
@@ -1835,9 +2129,12 @@ let {
|
|
|
1835
2129
|
telegramVerbosity,
|
|
1836
2130
|
fleet: fleetConfig,
|
|
1837
2131
|
internalExecutor: internalExecutorConfig,
|
|
2132
|
+
workflowRecovery: configWorkflowRecovery,
|
|
1838
2133
|
executorMode: configExecutorMode,
|
|
1839
2134
|
} = config;
|
|
1840
2135
|
|
|
2136
|
+
applyWorkflowRecoveryPolicy(configWorkflowRecovery, "startup-config");
|
|
2137
|
+
|
|
1841
2138
|
const telegramWeeklyReportEnabled = parseEnvBoolean(
|
|
1842
2139
|
process.env.TELEGRAM_WEEKLY_REPORT_ENABLED,
|
|
1843
2140
|
false,
|
|
@@ -2860,6 +3157,8 @@ try {
|
|
|
2860
3157
|
let telegramNotifierInterval = null;
|
|
2861
3158
|
let telegramNotifierTimeout = null;
|
|
2862
3159
|
let weeklyReportLastSentAt = null;
|
|
3160
|
+
let weeklyReportStateLoaded = false;
|
|
3161
|
+
const weeklyReportStatePath = getWeeklyReportStatePath();
|
|
2863
3162
|
const monitorRestartReason = String(
|
|
2864
3163
|
process.env.BOSUN_MONITOR_RESTART_REASON || "",
|
|
2865
3164
|
)
|
|
@@ -7046,8 +7345,11 @@ function buildEpicMergeBody(tasks, headName, baseName) {
|
|
|
7046
7345
|
const maxList = 25;
|
|
7047
7346
|
const slice = safeTasks.slice(0, maxList);
|
|
7048
7347
|
for (const task of slice) {
|
|
7049
|
-
const
|
|
7050
|
-
const
|
|
7348
|
+
const normalizedTaskId = String(task?.id || "").trim();
|
|
7349
|
+
const title = deriveTaskDisplayTitle(task?.title || task?.name, normalizedTaskId);
|
|
7350
|
+
const shouldOmitIdSuffix =
|
|
7351
|
+
normalizedTaskId && title === `Task ${normalizedTaskId}`;
|
|
7352
|
+
const id = normalizedTaskId && !shouldOmitIdSuffix ? ` (${normalizedTaskId})` : "";
|
|
7051
7353
|
lines.push(`- ${title}${id}`);
|
|
7052
7354
|
}
|
|
7053
7355
|
if (safeTasks.length > maxList) {
|
|
@@ -7059,6 +7361,16 @@ function buildEpicMergeBody(tasks, headName, baseName) {
|
|
|
7059
7361
|
return lines.join("\n");
|
|
7060
7362
|
}
|
|
7061
7363
|
|
|
7364
|
+
function deriveTaskDisplayTitle(titleValue, taskId) {
|
|
7365
|
+
const title = String(titleValue || "").trim();
|
|
7366
|
+
if (title && title.toLowerCase() !== "untitled task") {
|
|
7367
|
+
return title;
|
|
7368
|
+
}
|
|
7369
|
+
|
|
7370
|
+
const normalizedTaskId = String(taskId || "").trim();
|
|
7371
|
+
return normalizedTaskId ? `Task ${normalizedTaskId}` : "Untitled task";
|
|
7372
|
+
}
|
|
7373
|
+
|
|
7062
7374
|
function summarizeEpicBranch(headBranch, baseBranch) {
|
|
7063
7375
|
const headInfo = splitRemoteRef(headBranch, "origin");
|
|
7064
7376
|
const baseInfo = splitRemoteRef(baseBranch, "origin");
|
|
@@ -10103,9 +10415,10 @@ function formatRecentStatusItems(items, timestampField, maxItems = 6) {
|
|
|
10103
10415
|
})
|
|
10104
10416
|
.slice(0, maxItems)
|
|
10105
10417
|
.map((entry) => {
|
|
10106
|
-
const title = entry?.task_title || entry?.title || "Untitled task";
|
|
10107
10418
|
const id = (entry?.task_id || entry?.id || "").toString().slice(0, 8);
|
|
10108
|
-
const
|
|
10419
|
+
const title = deriveTaskDisplayTitle(entry?.task_title || entry?.title, id);
|
|
10420
|
+
const suffix =
|
|
10421
|
+
id && title !== `Task ${id}` ? ` (${id})` : "";
|
|
10109
10422
|
return `- ${title}${suffix}`;
|
|
10110
10423
|
});
|
|
10111
10424
|
}
|
|
@@ -10275,9 +10588,26 @@ async function sendTelegramMessage(text, options = {}) {
|
|
|
10275
10588
|
});
|
|
10276
10589
|
}
|
|
10277
10590
|
|
|
10591
|
+
async function ensureWeeklyReportStateLoaded() {
|
|
10592
|
+
if (weeklyReportStateLoaded) return;
|
|
10593
|
+
weeklyReportStateLoaded = true;
|
|
10594
|
+
try {
|
|
10595
|
+
const state = await readWeeklyReportScheduleState({
|
|
10596
|
+
statePath: weeklyReportStatePath,
|
|
10597
|
+
});
|
|
10598
|
+
weeklyReportLastSentAt = state?.lastSentAt || null;
|
|
10599
|
+
} catch (err) {
|
|
10600
|
+
console.warn(
|
|
10601
|
+
`[monitor] failed loading weekly report state from ${weeklyReportStatePath}: ${err?.message || err}`,
|
|
10602
|
+
);
|
|
10603
|
+
weeklyReportLastSentAt = null;
|
|
10604
|
+
}
|
|
10605
|
+
}
|
|
10606
|
+
|
|
10278
10607
|
async function maybeSendWeeklyReport(nowInput = new Date()) {
|
|
10279
10608
|
if (!telegramWeeklyReportEnabled) return;
|
|
10280
10609
|
if (!telegramToken || !telegramChatId) return;
|
|
10610
|
+
await ensureWeeklyReportStateLoaded();
|
|
10281
10611
|
const now = nowInput instanceof Date ? nowInput : new Date(nowInput);
|
|
10282
10612
|
if (!Number.isFinite(now.getTime())) return;
|
|
10283
10613
|
|
|
@@ -10299,6 +10629,9 @@ async function maybeSendWeeklyReport(nowInput = new Date()) {
|
|
|
10299
10629
|
skipDedup: true,
|
|
10300
10630
|
});
|
|
10301
10631
|
weeklyReportLastSentAt = now.toISOString();
|
|
10632
|
+
await writeWeeklyReportScheduleState(weeklyReportLastSentAt, {
|
|
10633
|
+
statePath: weeklyReportStatePath,
|
|
10634
|
+
});
|
|
10302
10635
|
if (Array.isArray(report.warnings) && report.warnings.length > 0) {
|
|
10303
10636
|
console.warn(
|
|
10304
10637
|
`[monitor] weekly report generated with warnings: ${report.warnings.join(" | ")}`,
|
|
@@ -14093,6 +14426,10 @@ function applyConfig(nextConfig, options = {}) {
|
|
|
14093
14426
|
process.env.WORKFLOW_AUTOMATION_ENABLED,
|
|
14094
14427
|
workflowAutomationEnabled,
|
|
14095
14428
|
);
|
|
14429
|
+
applyWorkflowRecoveryPolicy(
|
|
14430
|
+
nextConfig.workflowRecovery,
|
|
14431
|
+
`config-reload:${reason || "unknown"}`,
|
|
14432
|
+
);
|
|
14096
14433
|
{
|
|
14097
14434
|
const dedupMs = Number(
|
|
14098
14435
|
process.env.WORKFLOW_EVENT_DEDUP_WINDOW_MS || workflowEventDedupWindowMs || "15000",
|
|
@@ -14677,7 +15014,12 @@ pollWorkflowSchedulesOnce = async function pollWorkflowSchedulesOnce(
|
|
|
14677
15014
|
) {
|
|
14678
15015
|
try {
|
|
14679
15016
|
const engine = await ensureWorkflowAutomationEngine();
|
|
14680
|
-
if (!engine?.evaluateScheduleTriggers)
|
|
15017
|
+
if (!engine?.evaluateScheduleTriggers) {
|
|
15018
|
+
if (opts?.requireEngine) {
|
|
15019
|
+
throw new Error("workflow automation engine unavailable");
|
|
15020
|
+
}
|
|
15021
|
+
return;
|
|
15022
|
+
}
|
|
14681
15023
|
const includeTaskPoll = opts?.includeTaskPoll !== false;
|
|
14682
15024
|
|
|
14683
15025
|
const triggered = engine.evaluateScheduleTriggers({ configDir: repoRoot });
|
|
@@ -14730,6 +15072,7 @@ pollWorkflowSchedulesOnce = async function pollWorkflowSchedulesOnce(
|
|
|
14730
15072
|
}
|
|
14731
15073
|
} catch (err) {
|
|
14732
15074
|
console.warn(`[workflows] schedule-check error: ${err?.message || err}`);
|
|
15075
|
+
if (opts?.throwOnError) throw err;
|
|
14733
15076
|
}
|
|
14734
15077
|
};
|
|
14735
15078
|
|
|
@@ -14935,10 +15278,15 @@ if (dependabotAutoMerge) {
|
|
|
14935
15278
|
|
|
14936
15279
|
if (telegramWeeklyReportEnabled) {
|
|
14937
15280
|
const weeklyReportPollMs = 60 * 1000;
|
|
15281
|
+
const nextWeeklyReportTime = getNextWeeklyReportTime({
|
|
15282
|
+
now: new Date(),
|
|
15283
|
+
dayOfWeek: telegramWeeklyReportDay,
|
|
15284
|
+
hourUtc: telegramWeeklyReportHour,
|
|
15285
|
+
});
|
|
14938
15286
|
safeSetInterval("telegram-weekly-report", () => maybeSendWeeklyReport(), weeklyReportPollMs);
|
|
14939
15287
|
safeSetTimeout("telegram-weekly-report-initial", () => maybeSendWeeklyReport(), 45 * 1000);
|
|
14940
15288
|
console.log(
|
|
14941
|
-
`[monitor] weekly Telegram report scheduler enabled (day=${telegramWeeklyReportDay}, hourUtc=${telegramWeeklyReportHour}, lookbackDays=${telegramWeeklyReportDays})`,
|
|
15289
|
+
`[monitor] weekly Telegram report scheduler enabled (day=${telegramWeeklyReportDay}, hourUtc=${telegramWeeklyReportHour}, lookbackDays=${telegramWeeklyReportDays}); next scheduled send at ${nextWeeklyReportTime.toISOString()}`,
|
|
14942
15290
|
);
|
|
14943
15291
|
}
|
|
14944
15292
|
|
|
@@ -15118,9 +15466,34 @@ let agentSupervisor = null;
|
|
|
15118
15466
|
if (!isMonitorTestRuntime) {
|
|
15119
15467
|
if (workflowAutomationEnabled) {
|
|
15120
15468
|
await ensureWorkflowAutomationEngine().catch(() => {});
|
|
15121
|
-
|
|
15122
|
-
|
|
15123
|
-
|
|
15469
|
+
runWorkflowRecoveryWithPolicy(
|
|
15470
|
+
"stale-dispatch-unstick",
|
|
15471
|
+
() =>
|
|
15472
|
+
pollWorkflowSchedulesOnce("startup", {
|
|
15473
|
+
includeTaskPoll: false,
|
|
15474
|
+
requireEngine: true,
|
|
15475
|
+
throwOnError: true,
|
|
15476
|
+
}),
|
|
15477
|
+
{
|
|
15478
|
+
trigger: "startup",
|
|
15479
|
+
operationType: "stale-dispatch-unstick",
|
|
15480
|
+
includeTaskPoll: false,
|
|
15481
|
+
},
|
|
15482
|
+
);
|
|
15483
|
+
runWorkflowRecoveryWithPolicy(
|
|
15484
|
+
"workflow-history-unstick",
|
|
15485
|
+
async () => {
|
|
15486
|
+
const engine = await ensureWorkflowAutomationEngine();
|
|
15487
|
+
if (!engine?.resumeInterruptedRuns) {
|
|
15488
|
+
throw new Error("workflow engine resumeInterruptedRuns unavailable");
|
|
15489
|
+
}
|
|
15490
|
+
await engine.resumeInterruptedRuns();
|
|
15491
|
+
},
|
|
15492
|
+
{
|
|
15493
|
+
trigger: "startup",
|
|
15494
|
+
operationType: "workflow-history-unstick",
|
|
15495
|
+
},
|
|
15496
|
+
);
|
|
15124
15497
|
} else {
|
|
15125
15498
|
console.log(
|
|
15126
15499
|
"[workflows] automation disabled (set WORKFLOW_AUTOMATION_ENABLED=true to enable event-driven workflow triggers)",
|
|
@@ -15375,6 +15748,21 @@ if (isExecutorDisabled()) {
|
|
|
15375
15748
|
};
|
|
15376
15749
|
internalTaskExecutor = getTaskExecutor(execOpts);
|
|
15377
15750
|
internalTaskExecutor.start();
|
|
15751
|
+
if (workflowOwnsTaskExecutorLifecycle) {
|
|
15752
|
+
runWorkflowRecoveryWithPolicy(
|
|
15753
|
+
"stale-dispatch-task-poll-unstick",
|
|
15754
|
+
() =>
|
|
15755
|
+
pollWorkflowSchedulesOnce("startup", {
|
|
15756
|
+
requireEngine: true,
|
|
15757
|
+
throwOnError: true,
|
|
15758
|
+
}),
|
|
15759
|
+
{
|
|
15760
|
+
trigger: "startup",
|
|
15761
|
+
operationType: "stale-dispatch-task-poll-unstick",
|
|
15762
|
+
includeTaskPoll: true,
|
|
15763
|
+
},
|
|
15764
|
+
);
|
|
15765
|
+
}
|
|
15378
15766
|
|
|
15379
15767
|
// Write executor slots to status file every 30s for Telegram /tasks
|
|
15380
15768
|
startStatusFileWriter(30000);
|
|
@@ -28,7 +28,15 @@ const DEFAULT_CACHE_DIR = resolve(__dirname, "..", ".cache");
|
|
|
28
28
|
const SNAPSHOT_FILE_NAME = "runtime-accumulator.json";
|
|
29
29
|
const SESSION_LOG_FILE_NAME = "session-accumulator.jsonl";
|
|
30
30
|
const MAX_SESSION_TOKENS = 100;
|
|
31
|
-
const
|
|
31
|
+
const DEFAULT_MAX_COMPLETED_SESSIONS = 50_000;
|
|
32
|
+
const MAX_COMPLETED_SESSIONS = (() => {
|
|
33
|
+
const raw = process.env.RUNTIME_MAX_COMPLETED_SESSIONS;
|
|
34
|
+
if (!raw) return DEFAULT_MAX_COMPLETED_SESSIONS;
|
|
35
|
+
const parsed = Number.parseInt(raw, 10);
|
|
36
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_MAX_COMPLETED_SESSIONS;
|
|
37
|
+
// Hard cap to avoid unbounded memory use even if misconfigured.
|
|
38
|
+
return Math.min(parsed, DEFAULT_MAX_COMPLETED_SESSIONS);
|
|
39
|
+
})();
|
|
32
40
|
|
|
33
41
|
let _cacheDir = DEFAULT_CACHE_DIR;
|
|
34
42
|
let _runtimeFile = resolve(_cacheDir, SNAPSHOT_FILE_NAME);
|