bosun 0.41.0 → 0.41.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/.env.example +8 -0
  2. package/README.md +20 -0
  3. package/agent/agent-event-bus.mjs +248 -6
  4. package/agent/agent-pool.mjs +125 -28
  5. package/agent/agent-work-analyzer.mjs +8 -16
  6. package/agent/retry-queue.mjs +164 -0
  7. package/bosun.config.example.json +25 -0
  8. package/bosun.schema.json +825 -183
  9. package/cli.mjs +59 -5
  10. package/config/config.mjs +130 -3
  11. package/infra/monitor.mjs +693 -67
  12. package/infra/runtime-accumulator.mjs +376 -84
  13. package/infra/session-tracker.mjs +82 -25
  14. package/lib/codebase-audit.mjs +133 -18
  15. package/package.json +23 -4
  16. package/server/setup-web-server.mjs +25 -0
  17. package/server/ui-server.mjs +248 -29
  18. package/setup.mjs +27 -24
  19. package/shell/codex-shell.mjs +34 -3
  20. package/shell/copilot-shell.mjs +50 -8
  21. package/task/msg-hub.mjs +193 -0
  22. package/task/pipeline.mjs +544 -0
  23. package/task/task-cli.mjs +38 -2
  24. package/task/task-executor-pipeline.mjs +143 -0
  25. package/task/task-executor.mjs +36 -27
  26. package/telegram/get-telegram-chat-id.mjs +57 -47
  27. package/ui/components/workspace-switcher.js +7 -7
  28. package/ui/demo-defaults.js +15694 -10573
  29. package/ui/modules/settings-schema.js +2 -0
  30. package/ui/modules/state.js +54 -57
  31. package/ui/modules/voice-client-sdk.js +375 -36
  32. package/ui/modules/voice-client.js +140 -31
  33. package/ui/setup.html +68 -2
  34. package/ui/styles/components.css +57 -0
  35. package/ui/styles.css +201 -1
  36. package/ui/tabs/dashboard.js +74 -0
  37. package/ui/tabs/logs.js +10 -0
  38. package/ui/tabs/settings.js +178 -99
  39. package/ui/tabs/tasks.js +31 -1
  40. package/ui/tabs/telemetry.js +34 -0
  41. package/ui/tabs/workflow-canvas-utils.mjs +8 -1
  42. package/ui/tabs/workflows.js +532 -275
  43. package/voice/voice-agents-sdk.mjs +1 -1
  44. package/voice/voice-relay.mjs +6 -6
  45. package/workflow/declarative-workflows.mjs +145 -0
  46. package/workflow/msg-hub.mjs +237 -0
  47. package/workflow/pipeline-workflows.mjs +287 -0
  48. package/workflow/pipeline.mjs +828 -315
  49. package/workflow/workflow-cli.mjs +128 -0
  50. package/workflow/workflow-engine.mjs +329 -17
  51. package/workflow/workflow-nodes/custom-loader.mjs +250 -0
  52. package/workflow/workflow-nodes.mjs +1955 -223
  53. package/workflow/workflow-templates.mjs +26 -8
  54. package/workflow-templates/agents.mjs +0 -1
  55. package/workflow-templates/bosun-native.mjs +212 -2
  56. package/workflow-templates/continuation-loop.mjs +339 -0
  57. package/workflow-templates/github.mjs +516 -40
  58. package/workflow-templates/planning.mjs +446 -17
  59. package/workflow-templates/reliability.mjs +65 -12
  60. package/workflow-templates/task-batch.mjs +24 -8
  61. package/workflow-templates/task-lifecycle.mjs +83 -6
  62. package/workspace/context-cache.mjs +66 -18
  63. package/workspace/workspace-manager.mjs +2 -1
  64. package/workflow-templates/issue-continuation.mjs +0 -243
@@ -335,6 +335,17 @@ function envFlagEnabled(value) {
335
335
  return ["1", "true", "yes", "on", "y"].includes(raw);
336
336
  }
337
337
 
338
+ function applyNodeWarningSuppressionEnv(runtimeEnv) {
339
+ const nextEnv = { ...(runtimeEnv || {}) };
340
+ if (String(process.env.BOSUN_SUPPRESS_NODE_WARNINGS ?? "").trim() === "0") {
341
+ return nextEnv;
342
+ }
343
+ if (!nextEnv.NODE_NO_WARNINGS) {
344
+ nextEnv.NODE_NO_WARNINGS = "1";
345
+ }
346
+ return nextEnv;
347
+ }
348
+
338
349
  const GITHUB_TOKEN_CACHE_TTL_MS = 60_000;
339
350
  let cachedGithubSessionToken = null;
340
351
  let cachedGithubSessionTokenAt = 0;
@@ -885,6 +896,7 @@ function applySdkFailureCooldown(name, error, nowMs = Date.now()) {
885
896
  }
886
897
 
887
898
  const MONITOR_MONITOR_TASK_KEY = "monitor-monitor";
899
+ const MONITOR_MONITOR_THREAD_REFRESH_TURNS_REMAINING = parseBoundedNumber(process.env.DEVMODE_MONITOR_MONITOR_THREAD_REFRESH_TURNS_REMAINING, 5, 1, 1000);
888
900
  let monitorMonitorTimeoutBoundsWarningKey = "";
889
901
  let monitorMonitorTimeoutAdjustmentKey = "";
890
902
 
@@ -1087,6 +1099,7 @@ async function launchCodexThread(prompt, cwd, timeoutMs, extra = {}) {
1087
1099
  onThreadReady = null,
1088
1100
  taskKey: steerKey = null,
1089
1101
  envOverrides = null,
1102
+ systemPrompt = "",
1090
1103
  } = extra;
1091
1104
 
1092
1105
  let reportedThreadId = null;
@@ -1130,7 +1143,8 @@ async function launchCodexThread(prompt, cwd, timeoutMs, extra = {}) {
1130
1143
  envOverrides && typeof envOverrides === "object"
1131
1144
  ? { ...process.env, ...envOverrides }
1132
1145
  : process.env;
1133
- const codexOpts = buildCodexSdkOptions(codexRuntimeEnv);
1146
+ const codexSessionEnv = applyNodeWarningSuppressionEnv(codexRuntimeEnv);
1147
+ const codexOpts = buildCodexSdkOptions(codexSessionEnv);
1134
1148
  const modelOverride = String(extra?.model || "").trim();
1135
1149
  if (modelOverride) {
1136
1150
  codexOpts.env = { ...(codexOpts.env || {}), CODEX_MODEL: modelOverride };
@@ -1198,7 +1212,10 @@ async function launchCodexThread(prompt, cwd, timeoutMs, extra = {}) {
1198
1212
  // ── 4. Stream the turn ───────────────────────────────────────────────────
1199
1213
  try {
1200
1214
  const streamSafety = resolveCodexStreamSafety(timeoutMs);
1201
- const safePrompt = sanitizeAndBoundPrompt(`${prompt}${TOOL_OUTPUT_GUARDRAIL}`);
1215
+ const anchoredPrompt = String(systemPrompt || "").trim()
1216
+ ? `${String(systemPrompt).trim()}\n\n---\n\n${prompt}`
1217
+ : prompt;
1218
+ const safePrompt = sanitizeAndBoundPrompt(`${anchoredPrompt}${TOOL_OUTPUT_GUARDRAIL}`);
1202
1219
  const turn = await thread.runStreamed(safePrompt, {
1203
1220
  signal: controller.signal,
1204
1221
  });
@@ -1433,11 +1450,12 @@ async function launchCopilotThread(prompt, cwd, timeoutMs, extra = {}) {
1433
1450
  envOverrides && typeof envOverrides === "object"
1434
1451
  ? { ...process.env, ...envOverrides }
1435
1452
  : process.env;
1453
+ const runtimeSessionEnv = applyNodeWarningSuppressionEnv(runtimeEnv);
1436
1454
  const token =
1437
- runtimeEnv.COPILOT_CLI_TOKEN ||
1438
- runtimeEnv.GITHUB_TOKEN ||
1439
- runtimeEnv.GH_TOKEN ||
1440
- runtimeEnv.GITHUB_PAT ||
1455
+ runtimeSessionEnv.COPILOT_CLI_TOKEN ||
1456
+ runtimeSessionEnv.GITHUB_TOKEN ||
1457
+ runtimeSessionEnv.GH_TOKEN ||
1458
+ runtimeSessionEnv.GITHUB_PAT ||
1441
1459
  undefined;
1442
1460
 
1443
1461
  // ── 3. Create & start ephemeral client (LOCAL mode) ──────────────────────
@@ -1453,15 +1471,16 @@ async function launchCopilotThread(prompt, cwd, timeoutMs, extra = {}) {
1453
1471
 
1454
1472
  let client;
1455
1473
  let unsubscribe = null;
1474
+ let stopCopilotFirstEventWatch = null;
1456
1475
  let finalResponse = "";
1457
1476
  const allItems = [];
1458
1477
  const autoApprovePermissions = shouldAutoApproveCopilotPermissions();
1459
1478
  const clientEnv = autoApprovePermissions
1460
1479
  ? {
1461
- ...runtimeEnv,
1462
- COPILOT_ALLOW_ALL: runtimeEnv.COPILOT_ALLOW_ALL || "true",
1480
+ ...runtimeSessionEnv,
1481
+ COPILOT_ALLOW_ALL: runtimeSessionEnv.COPILOT_ALLOW_ALL || "true",
1463
1482
  }
1464
- : runtimeEnv;
1483
+ : runtimeSessionEnv;
1465
1484
  try {
1466
1485
  await withSanitizedOpenAiEnv(async () => {
1467
1486
  let clientOpts;
@@ -1481,7 +1500,7 @@ async function launchCopilotThread(prompt, cwd, timeoutMs, extra = {}) {
1481
1500
  }
1482
1501
  }
1483
1502
  const cliLaunch = resolveCopilotCliLaunchConfig({
1484
- env: runtimeEnv,
1503
+ env: runtimeSessionEnv,
1485
1504
  repoRoot: REPO_ROOT,
1486
1505
  cliArgs: buildPoolCopilotCliArgs(mcpConfigPath),
1487
1506
  });
@@ -1687,7 +1706,50 @@ async function launchCopilotThread(prompt, cwd, timeoutMs, extra = {}) {
1687
1706
  // Don't let this timer keep the process alive
1688
1707
  if (ht && typeof ht.unref === "function") ht.unref();
1689
1708
  });
1690
- await Promise.race([sendPromise, copilotHardTimeout]);
1709
+ // Some Copilot SDK builds can stall sendAndWait without yielding any
1710
+ // events. Apply an early watchdog so we can fail over before the full
1711
+ // task timeout elapses.
1712
+ let copilotFirstEventTimeoutMs = null;
1713
+ const firstEventWatch =
1714
+ typeof session.on === "function"
1715
+ ? new Promise((_, reject) => {
1716
+ copilotFirstEventTimeoutMs = getFirstEventTimeoutMs(timeoutMs);
1717
+ if (!Number.isFinite(copilotFirstEventTimeoutMs) || copilotFirstEventTimeoutMs <= 0) {
1718
+ return;
1719
+ }
1720
+ let settled = false;
1721
+ let off = null;
1722
+ const timer = setTimeout(() => {
1723
+ settled = true;
1724
+ if (typeof off === "function") off();
1725
+ reject(new Error("timeout_no_events"));
1726
+ }, clampTimerDelayMs(copilotFirstEventTimeoutMs, "copilot-first-event-timeout"));
1727
+ if (timer && typeof timer.unref === "function") timer.unref();
1728
+ off = session.on((event) => {
1729
+ if (settled) return;
1730
+ if (!event || typeof event !== "object") return;
1731
+ const t = String(event.type || "");
1732
+ if (
1733
+ t === "assistant.message" ||
1734
+ t === "assistant.message_delta" ||
1735
+ t === "session.idle" ||
1736
+ t === "session.error"
1737
+ ) {
1738
+ settled = true;
1739
+ clearTimeout(timer);
1740
+ if (typeof off === "function") off();
1741
+ }
1742
+ });
1743
+ stopCopilotFirstEventWatch = () => {
1744
+ settled = true;
1745
+ clearTimeout(timer);
1746
+ if (typeof off === "function") off();
1747
+ };
1748
+ })
1749
+ : null;
1750
+ await Promise.race(
1751
+ [sendPromise, copilotHardTimeout, firstEventWatch].filter(Boolean),
1752
+ );
1691
1753
  }
1692
1754
 
1693
1755
  const output =
@@ -1709,6 +1771,7 @@ async function launchCopilotThread(prompt, cwd, timeoutMs, extra = {}) {
1709
1771
  err?.name === "AbortError" ||
1710
1772
  errMsg === "timeout" ||
1711
1773
  errMsg === "hard_timeout" ||
1774
+ errMsg === "timeout_no_events" ||
1712
1775
  errMsg === "timeout_waiting_for_idle" ||
1713
1776
  isIdleWaitTimeout;
1714
1777
 
@@ -1730,11 +1793,15 @@ async function launchCopilotThread(prompt, cwd, timeoutMs, extra = {}) {
1730
1793
  }
1731
1794
 
1732
1795
  if (isTimeout) {
1796
+ const noEventsSuffix =
1797
+ errMsg === "timeout_no_events"
1798
+ ? ` (no events received within ${getFirstEventTimeoutMs(timeoutMs)}ms)`
1799
+ : "";
1733
1800
  return {
1734
1801
  success: false,
1735
1802
  output: "",
1736
1803
  items: allItems,
1737
- error: `${TAG} copilot timeout after ${timeoutMs}ms${isIdleWaitTimeout ? " waiting for session.idle" : ""}`,
1804
+ error: `${TAG} copilot timeout after ${timeoutMs}ms${isIdleWaitTimeout ? " waiting for session.idle" : noEventsSuffix}`,
1738
1805
  sdk: "copilot",
1739
1806
  threadId: resumeThreadId,
1740
1807
  };
@@ -1766,6 +1833,13 @@ async function launchCopilotThread(prompt, cwd, timeoutMs, extra = {}) {
1766
1833
  threadId: resumeThreadId,
1767
1834
  };
1768
1835
  } finally {
1836
+ try {
1837
+ if (typeof stopCopilotFirstEventWatch === "function") {
1838
+ stopCopilotFirstEventWatch();
1839
+ }
1840
+ } catch {
1841
+ /* best effort */
1842
+ }
1769
1843
  clearAbortScope();
1770
1844
  if (steerKey) unregisterActiveSession(steerKey);
1771
1845
  try {
@@ -1832,6 +1906,7 @@ async function launchClaudeThread(prompt, cwd, timeoutMs, extra = {}) {
1832
1906
  model: requestedModel = null,
1833
1907
  taskKey: steerKey = null,
1834
1908
  envOverrides = null,
1909
+ systemPrompt = "",
1835
1910
  } = extra;
1836
1911
 
1837
1912
  // ── 1. Load the SDK ──────────────────────────────────────────────────────
@@ -1856,10 +1931,11 @@ async function launchClaudeThread(prompt, cwd, timeoutMs, extra = {}) {
1856
1931
  envOverrides && typeof envOverrides === "object"
1857
1932
  ? { ...process.env, ...envOverrides }
1858
1933
  : process.env;
1934
+ const runtimeSessionEnv = applyNodeWarningSuppressionEnv(runtimeEnv);
1859
1935
  const apiKey =
1860
- runtimeEnv.ANTHROPIC_API_KEY ||
1861
- runtimeEnv.CLAUDE_API_KEY ||
1862
- runtimeEnv.CLAUDE_KEY ||
1936
+ runtimeSessionEnv.ANTHROPIC_API_KEY ||
1937
+ runtimeSessionEnv.CLAUDE_API_KEY ||
1938
+ runtimeSessionEnv.CLAUDE_KEY ||
1863
1939
  undefined;
1864
1940
 
1865
1941
  // ── 3. Build message queue ───────────────────────────────────────────────
@@ -1974,7 +2050,10 @@ async function launchClaudeThread(prompt, cwd, timeoutMs, extra = {}) {
1974
2050
  `# ${extractTaskHeading(prompt)}\n\n${prompt}\n\n---\n` +
1975
2051
  'Do NOT respond with "Ready" or ask what to do. EXECUTE this task.';
1976
2052
 
1977
- msgQueue.push(makeUserMessage(formattedPrompt));
2053
+ const anchoredPrompt = String(systemPrompt || "").trim()
2054
+ ? `${String(systemPrompt).trim()}\n\n---\n\n${formattedPrompt}`
2055
+ : formattedPrompt;
2056
+ msgQueue.push(makeUserMessage(anchoredPrompt));
1978
2057
 
1979
2058
  // Register active session for mid-execution steering (Claude uses message queue)
1980
2059
  if (steerKey) {
@@ -1999,28 +2078,28 @@ async function launchClaudeThread(prompt, cwd, timeoutMs, extra = {}) {
1999
2078
  settingSources: ["user", "project"],
2000
2079
  permissionMode:
2001
2080
  claudePermissionMode ||
2002
- runtimeEnv.CLAUDE_PERMISSION_MODE ||
2081
+ runtimeSessionEnv.CLAUDE_PERMISSION_MODE ||
2003
2082
  "bypassPermissions",
2004
2083
  };
2005
2084
  if (apiKey) options.apiKey = apiKey;
2006
2085
  const explicitAllowedTools = normalizeList(claudeAllowedTools);
2007
2086
  const allowedTools = explicitAllowedTools.length
2008
2087
  ? explicitAllowedTools
2009
- : normalizeList(runtimeEnv.CLAUDE_ALLOWED_TOOLS);
2088
+ : normalizeList(runtimeSessionEnv.CLAUDE_ALLOWED_TOOLS);
2010
2089
  if (allowedTools.length) {
2011
2090
  options.allowedTools = allowedTools;
2012
2091
  }
2013
2092
 
2014
2093
  const model = String(
2015
2094
  requestedModel ||
2016
- runtimeEnv.CLAUDE_MODEL ||
2017
- runtimeEnv.CLAUDE_CODE_MODEL ||
2018
- runtimeEnv.ANTHROPIC_MODEL ||
2095
+ runtimeSessionEnv.CLAUDE_MODEL ||
2096
+ runtimeSessionEnv.CLAUDE_CODE_MODEL ||
2097
+ runtimeSessionEnv.ANTHROPIC_MODEL ||
2019
2098
  "",
2020
2099
  ).trim();
2021
2100
  if (model) options.model = model;
2022
2101
 
2023
- const result = await withTemporaryEnv(runtimeEnv, async () =>
2102
+ const result = await withTemporaryEnv(runtimeSessionEnv, async () =>
2024
2103
  queryFn({
2025
2104
  prompt: msgQueue.iterator(),
2026
2105
  options,
@@ -2803,7 +2882,8 @@ async function resumeCodexThread(threadId, prompt, cwd, timeoutMs, extra = {}) {
2803
2882
  envOverrides && typeof envOverrides === "object"
2804
2883
  ? { ...process.env, ...envOverrides }
2805
2884
  : process.env;
2806
- const codexOpts = buildCodexSdkOptions(codexRuntimeEnv);
2885
+ const codexSessionEnv = applyNodeWarningSuppressionEnv(codexRuntimeEnv);
2886
+ const codexOpts = buildCodexSdkOptions(codexSessionEnv);
2807
2887
  const modelOverride = String(extra?.model || "").trim();
2808
2888
  if (modelOverride) {
2809
2889
  codexOpts.env = { ...(codexOpts.env || {}), CODEX_MODEL: modelOverride };
@@ -3017,6 +3097,7 @@ export async function launchOrResumeThread(
3017
3097
  restBaseEnv,
3018
3098
  resolvedGithubToken,
3019
3099
  );
3100
+ restExtra.envOverrides = applyNodeWarningSuppressionEnv(restExtra.envOverrides);
3020
3101
  // Pass taskKey through as steer key so SDK launchers can register active sessions
3021
3102
  restExtra.taskKey = taskKey;
3022
3103
  if (restExtra.sdk) {
@@ -3039,19 +3120,32 @@ export async function launchOrResumeThread(
3039
3120
  // Check registry for existing thread
3040
3121
  const existing = threadRegistry.get(taskKey);
3041
3122
  if (existing && existing.alive && existing.threadId) {
3123
+ const turnsRemaining = MAX_THREAD_TURNS - existing.turnCount;
3124
+ const shouldForceRefreshMonitorMonitorThread =
3125
+ String(taskKey || "").trim() === MONITOR_MONITOR_TASK_KEY &&
3126
+ turnsRemaining <= MONITOR_MONITOR_THREAD_REFRESH_TURNS_REMAINING;
3127
+ if (shouldForceRefreshMonitorMonitorThread) {
3128
+ console.log(
3129
+ `${TAG} proactively refreshing monitor-monitor thread with ${turnsRemaining} turns remaining (threshold=${MONITOR_MONITOR_THREAD_REFRESH_TURNS_REMAINING})`,
3130
+ );
3131
+ existing.alive = false;
3132
+ threadRegistry.set(taskKey, existing);
3133
+ saveThreadRegistry().catch(() => {});
3134
+ }
3135
+
3042
3136
  // Approaching-exhaustion warning (non-blocking — still proceeds with resume)
3043
3137
  if (
3044
3138
  existing.turnCount >= THREAD_EXHAUSTION_WARNING_THRESHOLD &&
3045
- existing.turnCount < MAX_THREAD_TURNS
3139
+ existing.turnCount < MAX_THREAD_TURNS &&
3140
+ existing.alive
3046
3141
  ) {
3047
- const remaining = MAX_THREAD_TURNS - existing.turnCount;
3048
3142
  console.warn(
3049
- `${TAG} :alert: thread for task "${taskKey}" approaching exhaustion: ${existing.turnCount}/${MAX_THREAD_TURNS} turns (${remaining} remaining)`,
3143
+ `${TAG} :alert: thread for task "${taskKey}" approaching exhaustion: ${existing.turnCount}/${MAX_THREAD_TURNS} turns (${turnsRemaining} remaining)`,
3050
3144
  );
3051
3145
  }
3052
3146
 
3053
3147
  // Check if thread has exceeded max turns — force fresh start
3054
- if (existing.turnCount >= MAX_THREAD_TURNS) {
3148
+ if (existing.alive && existing.turnCount >= MAX_THREAD_TURNS) {
3055
3149
  console.warn(
3056
3150
  `${TAG} thread for task "${taskKey}" exceeded ${MAX_THREAD_TURNS} turns (has ${existing.turnCount}) — invalidating and starting fresh`,
3057
3151
  );
@@ -3059,7 +3153,10 @@ export async function launchOrResumeThread(
3059
3153
  threadRegistry.set(taskKey, existing);
3060
3154
  saveThreadRegistry().catch(() => {});
3061
3155
  // Fall through to fresh launch below
3062
- } else if (Date.now() - existing.createdAt > THREAD_MAX_ABSOLUTE_AGE_MS) {
3156
+ } else if (
3157
+ existing.alive &&
3158
+ Date.now() - existing.createdAt > THREAD_MAX_ABSOLUTE_AGE_MS
3159
+ ) {
3063
3160
  console.warn(
3064
3161
  `${TAG} thread for task "${taskKey}" exceeded absolute age limit — invalidating and starting fresh`,
3065
3162
  );
@@ -69,25 +69,17 @@ const ALERT_COOLDOWN_RETENTION_MS = Math.max(
69
69
  FAILED_SESSION_TRANSIENT_ALERT_MIN_COOLDOWN_MS * 3,
70
70
  3 * 60 * 60 * 1000,
71
71
  ); // keep cooldown history bounded
72
- const ALERT_COOLDOWN_REPLAY_MIN_BYTES = 256 * 1024;
73
- const ALERT_COOLDOWN_REPLAY_DEFAULT_MAX_BYTES = 8 * 1024 * 1024;
74
- const ALERT_COOLDOWN_REPLAY_MAX_CAP_BYTES = 64 * 1024 * 1024;
75
-
76
72
  function normalizeReplayMaxBytes(value) {
73
+ const fallbackBytes = 8 * 1024 * 1024;
74
+ const minBytes = 256 * 1024;
75
+ const maxBytes = 64 * 1024 * 1024;
77
76
  const parsed = Number(value);
78
- if (!Number.isFinite(parsed) || parsed <= 0) {
79
- return ALERT_COOLDOWN_REPLAY_DEFAULT_MAX_BYTES;
80
- }
81
- const rounded = Math.floor(parsed);
82
- return Math.min(
83
- ALERT_COOLDOWN_REPLAY_MAX_CAP_BYTES,
84
- Math.max(ALERT_COOLDOWN_REPLAY_MIN_BYTES, rounded),
85
- );
77
+ if (!Number.isFinite(parsed)) return fallbackBytes;
78
+ const rounded = Math.trunc(parsed);
79
+ return Math.min(maxBytes, Math.max(minBytes, rounded));
86
80
  }
87
-
88
- const ALERT_COOLDOWN_REPLAY_MAX_BYTES = Math.max(
89
- ALERT_COOLDOWN_REPLAY_MIN_BYTES,
90
- normalizeReplayMaxBytes(process.env.AGENT_ALERT_COOLDOWN_REPLAY_MAX_BYTES),
81
+ const ALERT_COOLDOWN_REPLAY_MAX_BYTES = normalizeReplayMaxBytes(
82
+ process.env.AGENT_ALERT_COOLDOWN_REPLAY_MAX_BYTES,
91
83
  );
92
84
 
93
85
  function getAlertCooldownMs(alert) {
@@ -0,0 +1,164 @@
1
+ /**
2
+ * retry-queue.mjs
3
+ *
4
+ * Pure reducer utilities for retry queue state.
5
+ */
6
+
7
+ const DEFAULT_RETENTION_MS = 24 * 60 * 60 * 1000;
8
+
9
+ function isoDayKey(ts) {
10
+ return new Date(ts).toISOString().slice(0, 10);
11
+ }
12
+
13
+ function normalizeItem(raw = {}, now = Date.now()) {
14
+ const taskId = String(raw.taskId || "").trim();
15
+ if (!taskId) return null;
16
+ const retryCount = Number.isFinite(Number(raw.retryCount))
17
+ ? Math.max(0, Math.trunc(Number(raw.retryCount)))
18
+ : 0;
19
+ const nextAttemptAt = Number.isFinite(Number(raw.nextAttemptAt))
20
+ ? Math.max(0, Math.trunc(Number(raw.nextAttemptAt)))
21
+ : now;
22
+ const updatedAt = Number.isFinite(Number(raw.updatedAt))
23
+ ? Math.max(0, Math.trunc(Number(raw.updatedAt)))
24
+ : now;
25
+ const expiresAt = Number.isFinite(Number(raw.expiresAt))
26
+ ? Math.max(0, Math.trunc(Number(raw.expiresAt)))
27
+ : nextAttemptAt + DEFAULT_RETENTION_MS;
28
+ return {
29
+ taskId,
30
+ taskTitle: String(raw.taskTitle || "").trim() || "",
31
+ lastError: String(raw.lastError || "").trim() || "",
32
+ retryCount,
33
+ maxRetries: Number.isFinite(Number(raw.maxRetries))
34
+ ? Math.max(0, Math.trunc(Number(raw.maxRetries)))
35
+ : null,
36
+ nextAttemptAt,
37
+ status: String(raw.status || "pending"),
38
+ reason: String(raw.reason || "").trim() || "",
39
+ updatedAt,
40
+ expiresAt,
41
+ };
42
+ }
43
+
44
+ function materialize(itemsByTask) {
45
+ return Array.from(itemsByTask.values()).sort((a, b) => {
46
+ if (a.nextAttemptAt !== b.nextAttemptAt) return a.nextAttemptAt - b.nextAttemptAt;
47
+ return a.updatedAt - b.updatedAt;
48
+ });
49
+ }
50
+
51
+ function ensureDay(state, now) {
52
+ const dayKey = isoDayKey(now);
53
+ if (state.stats.dayKey === dayKey) return state.stats;
54
+ return {
55
+ ...state.stats,
56
+ dayKey,
57
+ totalRetriesToday: 0,
58
+ };
59
+ }
60
+
61
+ export function createRetryQueueState(now = Date.now()) {
62
+ return {
63
+ itemsByTask: new Map(),
64
+ stats: {
65
+ dayKey: isoDayKey(now),
66
+ totalRetriesToday: 0,
67
+ peakRetryDepth: 0,
68
+ exhaustedTaskIds: [],
69
+ },
70
+ };
71
+ }
72
+
73
+ export function reduceRetryQueue(state, action = {}) {
74
+ const now = Number.isFinite(Number(action.now))
75
+ ? Math.max(0, Math.trunc(Number(action.now)))
76
+ : Date.now();
77
+ const type = String(action.type || "").trim().toLowerCase();
78
+ const nextItems = new Map(state?.itemsByTask || []);
79
+ let stats = ensureDay(
80
+ state && state.stats ? state : createRetryQueueState(now),
81
+ now,
82
+ );
83
+
84
+ if (type === "add" || type === "upsert") {
85
+ const item = normalizeItem(action.item, now);
86
+ if (!item) return { itemsByTask: nextItems, stats };
87
+ nextItems.set(item.taskId, item);
88
+ if (item.retryCount > stats.peakRetryDepth) {
89
+ stats = { ...stats, peakRetryDepth: item.retryCount };
90
+ }
91
+ return { itemsByTask: nextItems, stats };
92
+ }
93
+
94
+ if (type === "remove") {
95
+ const taskId = String(action.taskId || "").trim();
96
+ if (taskId) nextItems.delete(taskId);
97
+ return { itemsByTask: nextItems, stats };
98
+ }
99
+
100
+ if (type === "bump-count") {
101
+ const taskId = String(action.taskId || "").trim();
102
+ if (!taskId) return { itemsByTask: nextItems, stats };
103
+ const prev = nextItems.get(taskId) || normalizeItem({ taskId }, now);
104
+ if (!prev) return { itemsByTask: nextItems, stats };
105
+ const nextRetry = Number.isFinite(Number(action.retryCount))
106
+ ? Math.max(0, Math.trunc(Number(action.retryCount)))
107
+ : prev.retryCount + 1;
108
+ const nextItem = normalizeItem({
109
+ ...prev,
110
+ ...action.item,
111
+ taskId,
112
+ retryCount: nextRetry,
113
+ updatedAt: now,
114
+ }, now);
115
+ nextItems.set(taskId, nextItem);
116
+ const peakRetryDepth = Math.max(stats.peakRetryDepth || 0, nextRetry);
117
+ stats = {
118
+ ...stats,
119
+ totalRetriesToday: Math.max(0, (stats.totalRetriesToday || 0) + 1),
120
+ peakRetryDepth,
121
+ };
122
+ return { itemsByTask: nextItems, stats };
123
+ }
124
+
125
+ if (type === "mark-exhausted") {
126
+ const taskId = String(action.taskId || "").trim();
127
+ if (!taskId) return { itemsByTask: nextItems, stats };
128
+ const exhausted = new Set(stats.exhaustedTaskIds || []);
129
+ exhausted.add(taskId);
130
+ nextItems.delete(taskId);
131
+ stats = { ...stats, exhaustedTaskIds: Array.from(exhausted) };
132
+ return { itemsByTask: nextItems, stats };
133
+ }
134
+
135
+ if (type === "expire") {
136
+ for (const [taskId, item] of nextItems) {
137
+ if (!item) {
138
+ nextItems.delete(taskId);
139
+ continue;
140
+ }
141
+ if (item.expiresAt <= now) {
142
+ nextItems.delete(taskId);
143
+ }
144
+ }
145
+ return { itemsByTask: nextItems, stats };
146
+ }
147
+
148
+ return { itemsByTask: nextItems, stats };
149
+ }
150
+
151
+ export function snapshotRetryQueue(state) {
152
+ const items = materialize(state?.itemsByTask || new Map());
153
+ return {
154
+ count: items.length,
155
+ items,
156
+ stats: {
157
+ totalRetriesToday: Number(state?.stats?.totalRetriesToday || 0),
158
+ peakRetryDepth: Number(state?.stats?.peakRetryDepth || 0),
159
+ exhaustedTaskIds: Array.isArray(state?.stats?.exhaustedTaskIds)
160
+ ? [...new Set(state.stats.exhaustedTaskIds.map((id) => String(id || "").trim()).filter(Boolean))]
161
+ : [],
162
+ },
163
+ };
164
+ }
@@ -5,12 +5,37 @@
5
5
 
6
6
  "_comment_workflowFirst": "Set to true to run everything as workflows (task lifecycle, PR management, etc.)",
7
7
  "workflowFirst": false,
8
+ "workflows": [
9
+ {
10
+ "type": "continuation-loop",
11
+ "enabled": false,
12
+ "taskId": "TASK-123",
13
+ "maxTurns": 8,
14
+ "terminalStates": ["done", "cancelled"],
15
+ "stuckThresholdMs": 300000,
16
+ "onStuck": "retry"
17
+ }
18
+ ],
8
19
  "kanban": {
9
20
  "backend": "vk"
10
21
  },
11
22
  "internalExecutor": {
12
23
  "mode": "vk"
13
24
  },
25
+ "workflows": {
26
+ "code-review": {
27
+ "type": "sequential",
28
+ "stages": ["implement", "test", "review"]
29
+ },
30
+ "parallel-search": {
31
+ "type": "fanout",
32
+ "stages": [
33
+ { "name": "search-codex", "sdk": "codex" },
34
+ { "name": "search-claude", "sdk": "claude" },
35
+ { "name": "search-copilot", "sdk": "copilot" }
36
+ ]
37
+ }
38
+ },
14
39
  "auth": {
15
40
  "copilot": {
16
41
  "sources": ["env", "cli"],