bosun 0.40.21 → 0.41.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +8 -0
- package/README.md +20 -0
- package/agent/agent-custom-tools.mjs +23 -5
- package/agent/agent-event-bus.mjs +248 -6
- package/agent/agent-pool.mjs +131 -30
- package/agent/agent-work-analyzer.mjs +8 -16
- package/agent/primary-agent.mjs +81 -7
- package/agent/retry-queue.mjs +164 -0
- package/bench/swebench/bosun-swebench.mjs +5 -0
- package/bosun.config.example.json +25 -0
- package/bosun.schema.json +825 -183
- package/cli.mjs +267 -8
- package/config/config-doctor.mjs +51 -2
- package/config/config.mjs +232 -5
- package/github/github-auth-manager.mjs +70 -19
- package/infra/library-manager.mjs +894 -60
- package/infra/monitor.mjs +701 -69
- package/infra/runtime-accumulator.mjs +376 -84
- package/infra/session-tracker.mjs +95 -28
- package/infra/test-runtime.mjs +267 -0
- package/lib/codebase-audit.mjs +133 -18
- package/package.json +30 -8
- package/server/setup-web-server.mjs +29 -1
- package/server/ui-server.mjs +1571 -49
- package/setup.mjs +27 -24
- package/shell/codex-shell.mjs +34 -3
- package/shell/copilot-shell.mjs +50 -8
- package/task/msg-hub.mjs +193 -0
- package/task/pipeline.mjs +544 -0
- package/task/task-claims.mjs +6 -10
- package/task/task-cli.mjs +38 -2
- package/task/task-executor-pipeline.mjs +143 -0
- package/task/task-executor.mjs +36 -27
- package/telegram/get-telegram-chat-id.mjs +57 -47
- package/ui/components/chat-view.js +18 -1
- package/ui/components/workspace-switcher.js +321 -9
- package/ui/demo-defaults.js +17830 -10433
- package/ui/demo.html +9 -1
- package/ui/modules/router.js +1 -1
- package/ui/modules/settings-schema.js +2 -0
- package/ui/modules/state.js +54 -57
- package/ui/modules/voice-client-sdk.js +376 -37
- package/ui/modules/voice-client.js +173 -33
- package/ui/setup.html +68 -2
- package/ui/styles/components.css +571 -1
- package/ui/styles.css +201 -1
- package/ui/tabs/dashboard.js +74 -0
- package/ui/tabs/library.js +410 -55
- package/ui/tabs/logs.js +10 -0
- package/ui/tabs/settings.js +178 -99
- package/ui/tabs/tasks.js +1083 -507
- package/ui/tabs/telemetry.js +34 -0
- package/ui/tabs/workflow-canvas-utils.mjs +38 -1
- package/ui/tabs/workflows.js +1275 -402
- package/voice/voice-agents-sdk.mjs +2 -2
- package/voice/voice-relay.mjs +28 -20
- package/workflow/declarative-workflows.mjs +145 -0
- package/workflow/msg-hub.mjs +237 -0
- package/workflow/pipeline-workflows.mjs +287 -0
- package/workflow/pipeline.mjs +828 -315
- package/workflow/project-detection.mjs +559 -0
- package/workflow/workflow-cli.mjs +128 -0
- package/workflow/workflow-contract.mjs +433 -232
- package/workflow/workflow-engine.mjs +510 -47
- package/workflow/workflow-nodes/custom-loader.mjs +251 -0
- package/workflow/workflow-nodes.mjs +2024 -184
- package/workflow/workflow-templates.mjs +118 -24
- package/workflow-templates/agents.mjs +20 -20
- package/workflow-templates/bosun-native.mjs +212 -2
- package/workflow-templates/code-quality.mjs +20 -14
- package/workflow-templates/continuation-loop.mjs +339 -0
- package/workflow-templates/github.mjs +516 -40
- package/workflow-templates/planning.mjs +446 -17
- package/workflow-templates/reliability.mjs +65 -12
- package/workflow-templates/task-batch.mjs +27 -10
- package/workflow-templates/task-execution.mjs +752 -0
- package/workflow-templates/task-lifecycle.mjs +117 -14
- package/workspace/context-cache.mjs +66 -18
- package/workspace/workspace-manager.mjs +153 -1
- package/workflow-templates/issue-continuation.mjs +0 -243
package/.env.example
CHANGED
|
@@ -393,12 +393,20 @@ VOICE_DELEGATE_EXECUTOR=codex-sdk
|
|
|
393
393
|
# INTERNAL_EXECUTOR_STREAM_RETRY_BASE_MS=2000
|
|
394
394
|
# Stream retry backoff max delay in ms (default: 32000)
|
|
395
395
|
# INTERNAL_EXECUTOR_STREAM_RETRY_MAX_MS=32000
|
|
396
|
+
# Retry queue: route to review workflow once retry count reaches this threshold (default: 3)
|
|
397
|
+
# INTERNAL_EXECUTOR_RETRY_REVIEW_THRESHOLD=3
|
|
398
|
+
# Retry queue: default delay before next retry attempt in ms (default: 15000)
|
|
399
|
+
# INTERNAL_EXECUTOR_RETRY_DELAY_MS=15000
|
|
396
400
|
# Abort/retry turns that emit no stream events within this budget (default: 120000)
|
|
397
401
|
# INTERNAL_EXECUTOR_STREAM_FIRST_EVENT_TIMEOUT_MS=120000
|
|
398
402
|
# Cap number of completed stream items retained per turn (default: 600)
|
|
399
403
|
# INTERNAL_EXECUTOR_STREAM_MAX_ITEMS_PER_TURN=600
|
|
400
404
|
# Truncate oversized item payload strings to this char budget (default: 12000)
|
|
401
405
|
# INTERNAL_EXECUTOR_STREAM_MAX_ITEM_CHARS=12000
|
|
406
|
+
# Prompt cache anchoring mode:
|
|
407
|
+
# default = best-effort split between system/user prompt
|
|
408
|
+
# strict = fail fast if task-specific data leaks into system prompt
|
|
409
|
+
# BOSUN_CACHE_ANCHOR_MODE=default
|
|
402
410
|
# Project requirements profile used by planner/replenishment prompts
|
|
403
411
|
# Allowed: simple-feature | feature | large-feature | system | multi-system
|
|
404
412
|
# PROJECT_REQUIREMENTS_PROFILE=feature
|
package/README.md
CHANGED
|
@@ -149,6 +149,26 @@ Bosun enforces a strict quality pipeline in both local hooks and CI:
|
|
|
149
149
|
- **Demo load smoke test** runs in `npm test` and blocks push if `site/index.html` or `site/ui/demo.html` fails to load required assets.
|
|
150
150
|
- **Prepublish checks** validate package contents and release readiness.
|
|
151
151
|
|
|
152
|
+
### Codebase annotation audit
|
|
153
|
+
|
|
154
|
+
Use `bosun audit` to generate and validate repo-level annotations that help future agents navigate the codebase without extra runtime context:
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
# Coverage report for supported source files
|
|
158
|
+
bosun audit scan
|
|
159
|
+
|
|
160
|
+
# Add missing file summaries and risky-function warnings
|
|
161
|
+
bosun audit generate
|
|
162
|
+
bosun audit warn
|
|
163
|
+
|
|
164
|
+
# Rebuild lean manifests and the file responsibility index
|
|
165
|
+
bosun audit manifest
|
|
166
|
+
bosun audit index
|
|
167
|
+
bosun audit trim
|
|
168
|
+
|
|
169
|
+
# CI-safe conformity gate
|
|
170
|
+
bosun audit --ci
|
|
171
|
+
```
|
|
152
172
|
|
|
153
173
|
Notes:
|
|
154
174
|
|
|
@@ -561,12 +561,30 @@ export async function invokeCustomTool(rootDir, toolId, args = [], opts = {}) {
|
|
|
561
561
|
timeout,
|
|
562
562
|
maxBuffer: 10 * 1024 * 1024, // 10 MB
|
|
563
563
|
});
|
|
564
|
-
|
|
565
|
-
stderr
|
|
564
|
+
// Node versions/environments may resolve promisified execFile as:
|
|
565
|
+
// - { stdout, stderr } (modern child_process custom promisify)
|
|
566
|
+
// - stdout string/buffer only (legacy/mocked fallback)
|
|
567
|
+
// - [stdout, stderr] tuple (some custom wrappers)
|
|
568
|
+
if (out && typeof out === "object" && !Array.isArray(out)) {
|
|
569
|
+
stdout = String(out.stdout ?? "");
|
|
570
|
+
stderr = String(out.stderr ?? "");
|
|
571
|
+
} else if (Array.isArray(out)) {
|
|
572
|
+
stdout = String(out[0] ?? "");
|
|
573
|
+
stderr = String(out[1] ?? "");
|
|
574
|
+
} else {
|
|
575
|
+
stdout = String(out ?? "");
|
|
576
|
+
stderr = "";
|
|
577
|
+
}
|
|
566
578
|
} catch (err) {
|
|
567
|
-
stdout = err
|
|
568
|
-
stderr = err
|
|
569
|
-
|
|
579
|
+
stdout = String(err?.stdout ?? "");
|
|
580
|
+
stderr = String(err?.stderr ?? err?.message ?? "");
|
|
581
|
+
const numericExit = Number(err?.code);
|
|
582
|
+
const numericStatus = Number(err?.status);
|
|
583
|
+
exitCode = Number.isFinite(numericExit)
|
|
584
|
+
? numericExit
|
|
585
|
+
: Number.isFinite(numericStatus)
|
|
586
|
+
? numericStatus
|
|
587
|
+
: 1;
|
|
570
588
|
}
|
|
571
589
|
|
|
572
590
|
// Record usage non-blocking
|
|
@@ -20,6 +20,12 @@
|
|
|
20
20
|
* AGENT_EVENT — Frozen enum of all event types
|
|
21
21
|
*/
|
|
22
22
|
|
|
23
|
+
import {
|
|
24
|
+
createRetryQueueState,
|
|
25
|
+
reduceRetryQueue,
|
|
26
|
+
snapshotRetryQueue,
|
|
27
|
+
} from "./retry-queue.mjs";
|
|
28
|
+
|
|
23
29
|
const TAG = "[agent-event-bus]";
|
|
24
30
|
|
|
25
31
|
// ── Event Types ─────────────────────────────────────────────────────────────
|
|
@@ -36,6 +42,7 @@ export const AGENT_EVENT = Object.freeze({
|
|
|
36
42
|
TASK_FAILED: "agent:task-failed",
|
|
37
43
|
TASK_BLOCKED: "agent:task-blocked",
|
|
38
44
|
TASK_STATUS_CHANGE: "agent:task-status-change",
|
|
45
|
+
SESSION_ACCUMULATED: "session-accumulated",
|
|
39
46
|
|
|
40
47
|
// ── Agent self-reports (from agent-endpoint) ──
|
|
41
48
|
AGENT_HEARTBEAT: "agent:heartbeat",
|
|
@@ -48,6 +55,7 @@ export const AGENT_EVENT = Object.freeze({
|
|
|
48
55
|
AUTO_COOLDOWN: "agent:auto-cooldown",
|
|
49
56
|
AUTO_BLOCK: "agent:auto-block",
|
|
50
57
|
AUTO_NEW_SESSION: "agent:auto-new-session",
|
|
58
|
+
RETRY_QUEUE_UPDATED: "agent:retry-queue-updated",
|
|
51
59
|
EXECUTOR_PAUSED: "agent:executor-paused",
|
|
52
60
|
EXECUTOR_RESUMED: "agent:executor-resumed",
|
|
53
61
|
|
|
@@ -80,6 +88,10 @@ const DEFAULTS = {
|
|
|
80
88
|
dedupeWindowMs: 500,
|
|
81
89
|
/** Max auto-action retries before blocking */
|
|
82
90
|
maxAutoRetries: 5,
|
|
91
|
+
/** Retry count threshold before routing to review workflow hook (0 disables) */
|
|
92
|
+
retryReviewThreshold: 0,
|
|
93
|
+
/** Default delay before next retry attempt appears due */
|
|
94
|
+
retryDelayMs: 15_000,
|
|
83
95
|
};
|
|
84
96
|
|
|
85
97
|
// ── AgentEventBus Class ─────────────────────────────────────────────────────
|
|
@@ -112,9 +124,27 @@ export class AgentEventBus {
|
|
|
112
124
|
options.staleThresholdMs || DEFAULTS.staleThresholdMs;
|
|
113
125
|
this._staleCheckIntervalMs =
|
|
114
126
|
options.staleCheckIntervalMs || DEFAULTS.staleCheckIntervalMs;
|
|
115
|
-
this._maxAutoRetries =
|
|
116
|
-
options.maxAutoRetries ?? DEFAULTS.maxAutoRetries;
|
|
127
|
+
this._maxAutoRetries =
|
|
128
|
+
options.maxAutoRetries ?? DEFAULTS.maxAutoRetries;
|
|
117
129
|
this._dedupeWindowMs = options.dedupeWindowMs || DEFAULTS.dedupeWindowMs;
|
|
130
|
+
this._retryReviewThreshold =
|
|
131
|
+
Number.isFinite(Number(options.retryReviewThreshold))
|
|
132
|
+
? Math.max(0, Math.trunc(Number(options.retryReviewThreshold)))
|
|
133
|
+
: DEFAULTS.retryReviewThreshold;
|
|
134
|
+
this._retryDelayMs =
|
|
135
|
+
Number.isFinite(Number(options.retryDelayMs))
|
|
136
|
+
? Math.max(0, Math.trunc(Number(options.retryDelayMs)))
|
|
137
|
+
: DEFAULTS.retryDelayMs;
|
|
138
|
+
this._onRetryThresholdExceeded =
|
|
139
|
+
typeof options.onRetryThresholdExceeded === "function"
|
|
140
|
+
? options.onRetryThresholdExceeded
|
|
141
|
+
: null;
|
|
142
|
+
this._setRetryQueueData =
|
|
143
|
+
typeof options.setRetryQueueData === "function"
|
|
144
|
+
? options.setRetryQueueData
|
|
145
|
+
: (typeof globalThis.__bosun_setRetryQueueData === "function"
|
|
146
|
+
? globalThis.__bosun_setRetryQueueData
|
|
147
|
+
: null);
|
|
118
148
|
|
|
119
149
|
/** @type {Array<{type: string, taskId: string, payload: object, ts: number}>} ring buffer */
|
|
120
150
|
this._eventLog = [];
|
|
@@ -133,6 +163,7 @@ export class AgentEventBus {
|
|
|
133
163
|
|
|
134
164
|
/** @type {Map<string, number>} dedup key → last emit timestamp */
|
|
135
165
|
this._recentEmits = new Map();
|
|
166
|
+
this._retryQueueState = createRetryQueueState();
|
|
136
167
|
|
|
137
168
|
/** @type {ReturnType<typeof setInterval>|null} */
|
|
138
169
|
this._staleCheckTimer = null;
|
|
@@ -257,8 +288,25 @@ export class AgentEventBus {
|
|
|
257
288
|
retryCount: 0,
|
|
258
289
|
lastRetryAt: 0,
|
|
259
290
|
cooldownUntil: 0,
|
|
291
|
+
taskTitle: String(task?.title || "").trim(),
|
|
260
292
|
});
|
|
293
|
+
} else {
|
|
294
|
+
const state = this._autoActionState.get(taskId);
|
|
295
|
+
if (state) {
|
|
296
|
+
if (state.cooldownUntil > 0) {
|
|
297
|
+
state.cooldownUntil = 0;
|
|
298
|
+
}
|
|
299
|
+
const startedTitle = String(task?.title || "").trim();
|
|
300
|
+
if (startedTitle) {
|
|
301
|
+
state.taskTitle = startedTitle;
|
|
302
|
+
}
|
|
303
|
+
this._autoActionState.set(taskId, state);
|
|
304
|
+
}
|
|
261
305
|
}
|
|
306
|
+
this._updateRetryQueue(
|
|
307
|
+
{ type: "remove", taskId },
|
|
308
|
+
{ reason: "task-started", taskId },
|
|
309
|
+
);
|
|
262
310
|
}
|
|
263
311
|
|
|
264
312
|
/**
|
|
@@ -278,6 +326,10 @@ export class AgentEventBus {
|
|
|
278
326
|
prNumber: result?.prNumber || null,
|
|
279
327
|
});
|
|
280
328
|
this._autoActionState.delete(taskId);
|
|
329
|
+
this._updateRetryQueue(
|
|
330
|
+
{ type: "remove", taskId },
|
|
331
|
+
{ reason: "task-completed", taskId },
|
|
332
|
+
);
|
|
281
333
|
|
|
282
334
|
// Auto-review
|
|
283
335
|
if (result?.success && this._reviewAgent) {
|
|
@@ -300,6 +352,17 @@ export class AgentEventBus {
|
|
|
300
352
|
error: errorMsg,
|
|
301
353
|
});
|
|
302
354
|
|
|
355
|
+
const failedTitle = String(task?.title || "").trim();
|
|
356
|
+
if (failedTitle) {
|
|
357
|
+
const state = this._autoActionState.get(taskId) || {
|
|
358
|
+
retryCount: 0,
|
|
359
|
+
lastRetryAt: 0,
|
|
360
|
+
cooldownUntil: 0,
|
|
361
|
+
};
|
|
362
|
+
state.taskTitle = failedTitle;
|
|
363
|
+
this._autoActionState.set(taskId, state);
|
|
364
|
+
}
|
|
365
|
+
|
|
303
366
|
if (this._errorDetector) {
|
|
304
367
|
const cls = this._errorDetector.classify(errorMsg, "");
|
|
305
368
|
this._handleClassification(taskId, cls, errorMsg);
|
|
@@ -390,6 +453,12 @@ export class AgentEventBus {
|
|
|
390
453
|
this._sendTelegram(`:close: Task blocked: "${title}" (source: ${source})`);
|
|
391
454
|
}
|
|
392
455
|
}
|
|
456
|
+
if (newStatus !== "error") {
|
|
457
|
+
this._updateRetryQueue(
|
|
458
|
+
{ type: "remove", taskId },
|
|
459
|
+
{ reason: `status:${newStatus}`, taskId },
|
|
460
|
+
);
|
|
461
|
+
}
|
|
393
462
|
}
|
|
394
463
|
|
|
395
464
|
/**
|
|
@@ -509,6 +578,7 @@ export class AgentEventBus {
|
|
|
509
578
|
* @returns {object}
|
|
510
579
|
*/
|
|
511
580
|
getStatus() {
|
|
581
|
+
const retryQueue = snapshotRetryQueue(this._retryQueueState);
|
|
512
582
|
return {
|
|
513
583
|
started: this._started,
|
|
514
584
|
eventLogSize: this._eventLog.length,
|
|
@@ -516,11 +586,30 @@ export class AgentEventBus {
|
|
|
516
586
|
errorTrackedTasks: this._errorHistory.size,
|
|
517
587
|
autoActionTasks: this._autoActionState.size,
|
|
518
588
|
listenerCount: this._listeners.size,
|
|
589
|
+
retryQueue,
|
|
519
590
|
liveness: this.getAgentLiveness(),
|
|
520
591
|
errorPatterns: this.getErrorPatternSummary(),
|
|
521
592
|
};
|
|
522
593
|
}
|
|
523
594
|
|
|
595
|
+
getRetryQueue() {
|
|
596
|
+
return snapshotRetryQueue(this._retryQueueState);
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
clearRetryQueueTask(taskId, reason = "manual") {
|
|
600
|
+
const id = String(taskId || "").trim();
|
|
601
|
+
if (!id) return;
|
|
602
|
+
const state = this._autoActionState.get(id);
|
|
603
|
+
if (state && state.cooldownUntil > 0) {
|
|
604
|
+
state.cooldownUntil = 0;
|
|
605
|
+
this._autoActionState.set(id, state);
|
|
606
|
+
}
|
|
607
|
+
this._updateRetryQueue(
|
|
608
|
+
{ type: "remove", taskId: id },
|
|
609
|
+
{ reason, taskId: id },
|
|
610
|
+
);
|
|
611
|
+
}
|
|
612
|
+
|
|
524
613
|
// ══════════════════════════════════════════════════════════════════════════
|
|
525
614
|
// INTERNAL
|
|
526
615
|
// ══════════════════════════════════════════════════════════════════════════
|
|
@@ -538,6 +627,39 @@ export class AgentEventBus {
|
|
|
538
627
|
}
|
|
539
628
|
}
|
|
540
629
|
|
|
630
|
+
_updateRetryQueue(action, meta = {}) {
|
|
631
|
+
this._retryQueueState = reduceRetryQueue(this._retryQueueState, action);
|
|
632
|
+
const snapshot = snapshotRetryQueue(this._retryQueueState);
|
|
633
|
+
if (!this._setRetryQueueData && typeof globalThis.__bosun_setRetryQueueData === "function") {
|
|
634
|
+
this._setRetryQueueData = globalThis.__bosun_setRetryQueueData;
|
|
635
|
+
}
|
|
636
|
+
if (typeof this._setRetryQueueData === "function") {
|
|
637
|
+
try {
|
|
638
|
+
this._setRetryQueueData(snapshot);
|
|
639
|
+
} catch {
|
|
640
|
+
/* best effort */
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
this.emit(AGENT_EVENT.RETRY_QUEUE_UPDATED, meta.taskId || "system", {
|
|
644
|
+
reason: meta.reason || "retry-queue-updated",
|
|
645
|
+
retryQueue: snapshot,
|
|
646
|
+
});
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
_tryTriggerRetryThresholdReview(payload) {
|
|
650
|
+
if (!this._onRetryThresholdExceeded) return;
|
|
651
|
+
try {
|
|
652
|
+
const maybePromise = this._onRetryThresholdExceeded(payload);
|
|
653
|
+
if (maybePromise && typeof maybePromise.then === "function") {
|
|
654
|
+
maybePromise.catch((err) => {
|
|
655
|
+
console.warn(`${TAG} retry-threshold hook failed:`, err?.message || err);
|
|
656
|
+
});
|
|
657
|
+
}
|
|
658
|
+
} catch (err) {
|
|
659
|
+
console.warn(`${TAG} retry-threshold hook failed:`, err?.message || err);
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
|
|
541
663
|
_handleClassification(taskId, classification, rawError) {
|
|
542
664
|
const ts = Date.now();
|
|
543
665
|
if (!this._errorHistory.has(taskId)) {
|
|
@@ -597,6 +719,10 @@ export class AgentEventBus {
|
|
|
597
719
|
cooldownUntil: 0,
|
|
598
720
|
};
|
|
599
721
|
const now = Date.now();
|
|
722
|
+
const task = this._resolveTask(taskId);
|
|
723
|
+
const taskTitle =
|
|
724
|
+
String(task?.title || "").trim()
|
|
725
|
+
|| String(this._autoActionState.get(taskId)?.taskTitle || "").trim();
|
|
600
726
|
|
|
601
727
|
if (state.cooldownUntil > now) {
|
|
602
728
|
console.log(
|
|
@@ -607,6 +733,33 @@ export class AgentEventBus {
|
|
|
607
733
|
|
|
608
734
|
switch (action) {
|
|
609
735
|
case "retry_with_prompt": {
|
|
736
|
+
if (this._retryReviewThreshold > 0 && state.retryCount >= this._retryReviewThreshold) {
|
|
737
|
+
const reason =
|
|
738
|
+
recovery?.reason ||
|
|
739
|
+
`retry threshold reached (${state.retryCount}/${this._retryReviewThreshold})`;
|
|
740
|
+
this._updateRetryQueue(
|
|
741
|
+
{ type: "mark-exhausted", taskId },
|
|
742
|
+
{ reason: "retry-threshold-exceeded", taskId },
|
|
743
|
+
);
|
|
744
|
+
this.emit(AGENT_EVENT.AUTO_REVIEW, taskId, {
|
|
745
|
+
reason,
|
|
746
|
+
retryCount: state.retryCount,
|
|
747
|
+
threshold: this._retryReviewThreshold,
|
|
748
|
+
pattern: classification?.pattern || null,
|
|
749
|
+
source: "retry-threshold",
|
|
750
|
+
});
|
|
751
|
+
this._tryTriggerRetryThresholdReview({
|
|
752
|
+
taskId,
|
|
753
|
+
reason,
|
|
754
|
+
retryCount: state.retryCount,
|
|
755
|
+
threshold: this._retryReviewThreshold,
|
|
756
|
+
classification,
|
|
757
|
+
recovery,
|
|
758
|
+
rawError,
|
|
759
|
+
});
|
|
760
|
+
console.log(`${TAG} ${taskId} reached retry threshold (${state.retryCount}); routed to review workflow`);
|
|
761
|
+
return;
|
|
762
|
+
}
|
|
610
763
|
if (state.retryCount >= this._maxAutoRetries) {
|
|
611
764
|
console.log(`${TAG} ${taskId} exhausted retries (${state.retryCount})`);
|
|
612
765
|
this._executeAutoAction(taskId, "block", recovery, classification, rawError);
|
|
@@ -615,6 +768,29 @@ export class AgentEventBus {
|
|
|
615
768
|
state.retryCount++;
|
|
616
769
|
state.lastRetryAt = now;
|
|
617
770
|
this._autoActionState.set(taskId, state);
|
|
771
|
+
const retryDelayMs =
|
|
772
|
+
Number.isFinite(Number(recovery?.cooldownMs))
|
|
773
|
+
? Math.max(0, Math.trunc(Number(recovery.cooldownMs)))
|
|
774
|
+
: this._retryDelayMs;
|
|
775
|
+
const nextAttemptAt = now + retryDelayMs;
|
|
776
|
+
this._updateRetryQueue(
|
|
777
|
+
{
|
|
778
|
+
type: "bump-count",
|
|
779
|
+
taskId,
|
|
780
|
+
retryCount: state.retryCount,
|
|
781
|
+
item: {
|
|
782
|
+
taskId,
|
|
783
|
+
taskTitle,
|
|
784
|
+
retryCount: state.retryCount,
|
|
785
|
+
maxRetries: this._maxAutoRetries,
|
|
786
|
+
reason: recovery?.reason || "error detected",
|
|
787
|
+
lastError: rawError || recovery?.reason || "",
|
|
788
|
+
nextAttemptAt,
|
|
789
|
+
status: "scheduled",
|
|
790
|
+
},
|
|
791
|
+
},
|
|
792
|
+
{ reason: "auto-retry", taskId },
|
|
793
|
+
);
|
|
618
794
|
|
|
619
795
|
this.emit(AGENT_EVENT.AUTO_RETRY, taskId, {
|
|
620
796
|
retryCount: state.retryCount,
|
|
@@ -622,6 +798,8 @@ export class AgentEventBus {
|
|
|
622
798
|
reason: recovery?.reason || "error detected",
|
|
623
799
|
prompt: recovery?.prompt || null,
|
|
624
800
|
pattern: classification?.pattern,
|
|
801
|
+
nextAttemptAt,
|
|
802
|
+
retryDelayMs,
|
|
625
803
|
});
|
|
626
804
|
console.log(
|
|
627
805
|
`${TAG} auto-retry #${state.retryCount}/${this._maxAutoRetries} for ${taskId} (${classification?.pattern || "?"})`,
|
|
@@ -640,6 +818,22 @@ export class AgentEventBus {
|
|
|
640
818
|
reason: recovery?.reason || "rate limited",
|
|
641
819
|
pattern: classification?.pattern,
|
|
642
820
|
});
|
|
821
|
+
this._updateRetryQueue(
|
|
822
|
+
{
|
|
823
|
+
type: "upsert",
|
|
824
|
+
item: {
|
|
825
|
+
taskId,
|
|
826
|
+
taskTitle,
|
|
827
|
+
retryCount: state.retryCount,
|
|
828
|
+
maxRetries: this._maxAutoRetries,
|
|
829
|
+
reason: recovery?.reason || "rate limited",
|
|
830
|
+
lastError: rawError || recovery?.reason || "",
|
|
831
|
+
nextAttemptAt: state.cooldownUntil,
|
|
832
|
+
status: "cooldown",
|
|
833
|
+
},
|
|
834
|
+
},
|
|
835
|
+
{ reason: "auto-cooldown", taskId },
|
|
836
|
+
);
|
|
643
837
|
console.log(
|
|
644
838
|
`${TAG} cooldown ${cooldownMs}ms for ${taskId} (${classification?.pattern || "?"})`,
|
|
645
839
|
);
|
|
@@ -647,6 +841,10 @@ export class AgentEventBus {
|
|
|
647
841
|
}
|
|
648
842
|
|
|
649
843
|
case "block": {
|
|
844
|
+
this._updateRetryQueue(
|
|
845
|
+
{ type: "mark-exhausted", taskId },
|
|
846
|
+
{ reason: "auto-block", taskId },
|
|
847
|
+
);
|
|
650
848
|
this.emit(AGENT_EVENT.AUTO_BLOCK, taskId, {
|
|
651
849
|
reason: recovery?.reason || "too many errors",
|
|
652
850
|
errorCount: recovery?.errorCount || 0,
|
|
@@ -659,8 +857,7 @@ export class AgentEventBus {
|
|
|
659
857
|
} catch { /* best-effort */ }
|
|
660
858
|
}
|
|
661
859
|
if (this._sendTelegram) {
|
|
662
|
-
const
|
|
663
|
-
const title = task?.title || taskId;
|
|
860
|
+
const title = taskTitle || taskId;
|
|
664
861
|
this._sendTelegram(
|
|
665
862
|
`:close: Auto-blocked: "${title}" — ${recovery?.reason || "too many errors"}`,
|
|
666
863
|
);
|
|
@@ -675,6 +872,24 @@ export class AgentEventBus {
|
|
|
675
872
|
state.retryCount++;
|
|
676
873
|
state.lastRetryAt = now;
|
|
677
874
|
this._autoActionState.set(taskId, state);
|
|
875
|
+
this._updateRetryQueue(
|
|
876
|
+
{
|
|
877
|
+
type: "bump-count",
|
|
878
|
+
taskId,
|
|
879
|
+
retryCount: state.retryCount,
|
|
880
|
+
item: {
|
|
881
|
+
taskId,
|
|
882
|
+
taskTitle,
|
|
883
|
+
retryCount: state.retryCount,
|
|
884
|
+
maxRetries: this._maxAutoRetries,
|
|
885
|
+
reason: recovery?.reason || "new session",
|
|
886
|
+
lastError: rawError || recovery?.reason || "",
|
|
887
|
+
nextAttemptAt: now + this._retryDelayMs,
|
|
888
|
+
status: "new-session",
|
|
889
|
+
},
|
|
890
|
+
},
|
|
891
|
+
{ reason: "auto-new-session", taskId },
|
|
892
|
+
);
|
|
678
893
|
|
|
679
894
|
this.emit(AGENT_EVENT.AUTO_NEW_SESSION, taskId, {
|
|
680
895
|
reason: recovery?.reason || "session expired / token overflow",
|
|
@@ -703,12 +918,15 @@ export class AgentEventBus {
|
|
|
703
918
|
|
|
704
919
|
case "manual":
|
|
705
920
|
default: {
|
|
921
|
+
this._updateRetryQueue(
|
|
922
|
+
{ type: "remove", taskId },
|
|
923
|
+
{ reason: "manual-review", taskId },
|
|
924
|
+
);
|
|
706
925
|
console.log(
|
|
707
926
|
`${TAG} manual review needed for ${taskId}: ${recovery?.reason || rawError}`,
|
|
708
927
|
);
|
|
709
928
|
if (this._sendTelegram && (recovery?.errorCount || 0) >= 3) {
|
|
710
|
-
const
|
|
711
|
-
const title = task?.title || taskId;
|
|
929
|
+
const title = taskTitle || taskId;
|
|
712
930
|
this._sendTelegram(
|
|
713
931
|
`:alert: "${title}" needs manual review: ${recovery?.reason || "repeated errors"}`,
|
|
714
932
|
);
|
|
@@ -763,6 +981,29 @@ export class AgentEventBus {
|
|
|
763
981
|
|
|
764
982
|
_checkStaleAgents() {
|
|
765
983
|
const now = Date.now();
|
|
984
|
+
const beforeSnapshot = snapshotRetryQueue(this._retryQueueState);
|
|
985
|
+
this._retryQueueState = reduceRetryQueue(this._retryQueueState, {
|
|
986
|
+
type: "expire",
|
|
987
|
+
now,
|
|
988
|
+
});
|
|
989
|
+
const afterSnapshot = snapshotRetryQueue(this._retryQueueState);
|
|
990
|
+
const queueChanged =
|
|
991
|
+
beforeSnapshot.count !== afterSnapshot.count ||
|
|
992
|
+
JSON.stringify(beforeSnapshot.items) !== JSON.stringify(afterSnapshot.items) ||
|
|
993
|
+
JSON.stringify(beforeSnapshot.stats) !== JSON.stringify(afterSnapshot.stats);
|
|
994
|
+
if (queueChanged && typeof this._setRetryQueueData === "function") {
|
|
995
|
+
try {
|
|
996
|
+
this._setRetryQueueData(afterSnapshot);
|
|
997
|
+
} catch {
|
|
998
|
+
/* best effort */
|
|
999
|
+
}
|
|
1000
|
+
}
|
|
1001
|
+
if (queueChanged) {
|
|
1002
|
+
this.emit(AGENT_EVENT.RETRY_QUEUE_UPDATED, "system", {
|
|
1003
|
+
reason: "expire",
|
|
1004
|
+
retryQueue: afterSnapshot,
|
|
1005
|
+
});
|
|
1006
|
+
}
|
|
766
1007
|
for (const [taskId, lastHb] of this._heartbeats) {
|
|
767
1008
|
const elapsed = now - lastHb;
|
|
768
1009
|
if (elapsed >= this._staleThresholdMs) {
|
|
@@ -821,3 +1062,4 @@ export class AgentEventBus {
|
|
|
821
1062
|
export function createAgentEventBus(options) {
|
|
822
1063
|
return new AgentEventBus(options);
|
|
823
1064
|
}
|
|
1065
|
+
|