bosun 0.35.2 → 0.35.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -1
- package/agent-hooks.mjs +7 -1
- package/agent-pool.mjs +16 -0
- package/agent-prompts.mjs +190 -4
- package/agent-sdk.mjs +6 -1
- package/agent-work-analyzer.mjs +48 -9
- package/autofix.mjs +32 -18
- package/bosun.schema.json +1 -1
- package/kanban-adapter.mjs +62 -12
- package/monitor.mjs +25 -6
- package/opencode-shell.mjs +881 -0
- package/package.json +5 -2
- package/primary-agent.mjs +43 -0
- package/session-tracker.mjs +55 -1
- package/setup.mjs +33 -4
- package/task-executor.mjs +43 -14
- package/ui/app.js +10 -7
- package/ui/components/chat-view.js +31 -9
- package/ui/components/session-list.js +20 -4
- package/ui/demo.html +49 -0
- package/ui/modules/router.js +2 -0
- package/ui/tabs/agents.js +66 -8
- package/ui/tabs/workflows.js +83 -0
- package/ui-server.mjs +236 -5
- package/workflow-engine.mjs +664 -10
- package/workflow-nodes.mjs +250 -1
- package/workflow-templates/github.mjs +389 -71
- package/workflow-templates/planning.mjs +31 -11
- package/workflow-templates.mjs +219 -2
package/README.md
CHANGED
|
@@ -46,13 +46,26 @@ Requires:
|
|
|
46
46
|
|
|
47
47
|
## What Bosun does
|
|
48
48
|
|
|
49
|
-
- Routes work across Codex, Copilot, and
|
|
49
|
+
- Routes work across Codex, Copilot, Claude, and OpenCode executors
|
|
50
50
|
- Automates retries, failover, and PR lifecycle management
|
|
51
51
|
- Auto-labels attached PRs with `bosun-needs-fix` when CI fails (`Build + Tests`)
|
|
52
|
+
- Merges passing PRs automatically through the **Bosun PR Watchdog** with a mandatory review gate (prevents destructive merges)
|
|
53
|
+
- Persists workflow runs to disk and auto-resumes on restart
|
|
52
54
|
- Monitors runs and recovers from stalled or broken states
|
|
53
55
|
- Provides Telegram control and a Mini App dashboard
|
|
54
56
|
- Integrates with GitHub, Jira, and Vibe-Kanban boards
|
|
55
57
|
|
|
58
|
+
### Executor quick-start
|
|
59
|
+
|
|
60
|
+
| Executor | `primaryAgent` value | Key env vars |
|
|
61
|
+
| ----------------- | -------------------- | ------------------------------------------------------------------------------------- |
|
|
62
|
+
| Codex (OpenAI) | `codex-sdk` | `OPENAI_API_KEY` |
|
|
63
|
+
| Copilot (VS Code) | `copilot-sdk` | VS Code session |
|
|
64
|
+
| Claude | `claude-sdk` | `ANTHROPIC_API_KEY` |
|
|
65
|
+
| OpenCode | `opencode-sdk` | `OPENCODE_MODEL` (e.g. `anthropic/claude-opus-4-5`), `OPENCODE_PORT` (default `4096`) |
|
|
66
|
+
|
|
67
|
+
Set `primaryAgent` in `.bosun/bosun.config.json` or choose an executor preset during `bosun --setup`.
|
|
68
|
+
|
|
56
69
|
---
|
|
57
70
|
|
|
58
71
|
## Telegram weekly report
|
package/agent-hooks.mjs
CHANGED
|
@@ -144,7 +144,7 @@ export const HOOK_EVENTS = Object.freeze([
|
|
|
144
144
|
* Canonical SDK names.
|
|
145
145
|
* @type {readonly string[]}
|
|
146
146
|
*/
|
|
147
|
-
const VALID_SDKS = Object.freeze(["codex", "copilot", "claude"]);
|
|
147
|
+
const VALID_SDKS = Object.freeze(["codex", "copilot", "claude", "opencode"]);
|
|
148
148
|
|
|
149
149
|
/**
|
|
150
150
|
* Wildcard indicating a hook applies to all SDKs.
|
|
@@ -715,6 +715,12 @@ export function registerBuiltinHooks(options = {}) {
|
|
|
715
715
|
});
|
|
716
716
|
}
|
|
717
717
|
|
|
718
|
+
// NOTE: Blind PostPR auto-merge has been intentionally removed.
|
|
719
|
+
// Use the "Bosun PR Watchdog" workflow template (template-bosun-pr-watchdog)
|
|
720
|
+
// to opt-in to automatic merging of bosun-attached PRs after CI passes.
|
|
721
|
+
// This prevents accidental merges in public repos and repos without the
|
|
722
|
+
// required GitHub branch-protection settings for auto-merge.
|
|
723
|
+
|
|
718
724
|
console.log(`${TAG} built-in hooks registered`);
|
|
719
725
|
}
|
|
720
726
|
|
package/agent-pool.mjs
CHANGED
|
@@ -355,6 +355,12 @@ function shouldFallbackForSdkError(error) {
|
|
|
355
355
|
if (message.includes("overloaded") || message.includes("server error")) {
|
|
356
356
|
return true;
|
|
357
357
|
}
|
|
358
|
+
// Spawn failures: binary not found on Windows (.cmd resolution)
|
|
359
|
+
if (message.includes("enoent")) return true;
|
|
360
|
+
if (message.includes("file not found") || message.includes("file specified")) return true;
|
|
361
|
+
if (message.includes("os error 2")) return true;
|
|
362
|
+
if (message.includes("spawn failed")) return true;
|
|
363
|
+
if (message.includes("codex exec exited")) return true;
|
|
358
364
|
return false;
|
|
359
365
|
}
|
|
360
366
|
|
|
@@ -635,6 +641,16 @@ function shouldApplySdkCooldown(error) {
|
|
|
635
641
|
if (message.includes("enotfound")) return true;
|
|
636
642
|
if (message.includes("connection reset")) return true;
|
|
637
643
|
if (message.includes("etimedout")) return true;
|
|
644
|
+
// Spawn failures (binary not found) — apply cooldown so we try fallback SDK
|
|
645
|
+
if (message.includes("enoent")) return true;
|
|
646
|
+
if (message.includes("file not found") || message.includes("file specified")) return true;
|
|
647
|
+
if (message.includes("os error 2")) return true;
|
|
648
|
+
if (message.includes("spawn failed")) return true;
|
|
649
|
+
// Spawn failures: codex binary not found on Windows (.cmd not resolved)
|
|
650
|
+
if (message.includes("enoent")) return true;
|
|
651
|
+
if (message.includes("file not found") || message.includes("file specified")) return true;
|
|
652
|
+
if (message.includes("os error 2")) return true;
|
|
653
|
+
if (message.includes("spawn failed")) return true;
|
|
638
654
|
return false;
|
|
639
655
|
}
|
|
640
656
|
|
package/agent-prompts.mjs
CHANGED
|
@@ -206,11 +206,17 @@ You generate production-grade backlog tasks for autonomous executors.
|
|
|
206
206
|
- Every task title starts with one size label: [xs], [s], [m], [l], [xl], [xxl].
|
|
207
207
|
- Prefer task sets that can run in parallel with low file overlap.
|
|
208
208
|
- Do not call any kanban API, CLI, or external service to create tasks.
|
|
209
|
-
|
|
209
|
+
The workflow will automatically materialize your output into kanban tasks.
|
|
210
|
+
- Output must be machine-parseable JSON — see Output Contract below.
|
|
210
211
|
|
|
211
|
-
## Output Contract (
|
|
212
|
+
## Output Contract (MANDATORY — STRICT)
|
|
212
213
|
|
|
213
|
-
|
|
214
|
+
Your ENTIRE response must be a single fenced JSON block. Do NOT include any
|
|
215
|
+
text, commentary, explanations, or markdown before or after the JSON block.
|
|
216
|
+
The downstream parser extracts JSON from fenced blocks — any deviation causes
|
|
217
|
+
task creation to fail silently.
|
|
218
|
+
|
|
219
|
+
Return exactly this shape:
|
|
214
220
|
|
|
215
221
|
\`\`\`json
|
|
216
222
|
{
|
|
@@ -228,7 +234,8 @@ Return exactly one fenced json block with this shape:
|
|
|
228
234
|
\`\`\`
|
|
229
235
|
|
|
230
236
|
Rules:
|
|
231
|
-
-
|
|
237
|
+
- The \`tasks\` array MUST contain at least the requested task count.
|
|
238
|
+
- Do NOT output partial JSON, truncated arrays, or commentary mixed with JSON.
|
|
232
239
|
- Keep titles unique and specific.
|
|
233
240
|
- Keep file overlap low across tasks to maximize parallel execution.
|
|
234
241
|
- **Module branch routing:** When the task title follows conventional commit format
|
|
@@ -236,6 +243,185 @@ Rules:
|
|
|
236
243
|
This routes the task to the module's dedicated branch for parallel, isolated development.
|
|
237
244
|
Examples: \`feat(veid):\` → \`"base_branch": "origin/veid"\`, \`fix(market):\` → \`"base_branch": "origin/market"\`.
|
|
238
245
|
Omit \`base_branch\` for cross-cutting tasks that span multiple modules.
|
|
246
|
+
`,
|
|
247
|
+
taskManager: `# Bosun Task Manager Agent
|
|
248
|
+
|
|
249
|
+
You are a task management agent for Bosun, an AI orchestrator. You have full CRUD access to the
|
|
250
|
+
task backlog via CLI commands and REST API. Use these tools to create, read, update, and delete tasks.
|
|
251
|
+
|
|
252
|
+
## Available Interfaces
|
|
253
|
+
|
|
254
|
+
You have **three ways** to manage tasks. Use whichever fits your context:
|
|
255
|
+
|
|
256
|
+
### 1. CLI Commands (preferred for agents with shell access)
|
|
257
|
+
|
|
258
|
+
\`\`\`bash
|
|
259
|
+
# List tasks
|
|
260
|
+
bosun task list # all tasks
|
|
261
|
+
bosun task list --status todo --json # filtered, JSON output
|
|
262
|
+
bosun task list --priority high --tag ui # by priority and tag
|
|
263
|
+
bosun task list --search "provider" # text search
|
|
264
|
+
|
|
265
|
+
# Create tasks
|
|
266
|
+
bosun task create --title "[s] fix(cli): Handle exit codes" --priority high --tags "cli,fix"
|
|
267
|
+
bosun task create '{"title":"[m] feat(ui): Dark mode","description":"Add dark mode toggle","tags":["ui"]}'
|
|
268
|
+
|
|
269
|
+
# Bulk create from JSON array
|
|
270
|
+
bosun task create '[{"title":"[s] fix: Bug A"},{"title":"[m] feat: Feature B"}]'
|
|
271
|
+
|
|
272
|
+
# Get task details
|
|
273
|
+
bosun task get <id> # full ID or prefix (e.g. "abc123")
|
|
274
|
+
bosun task get abc123 --json # JSON output
|
|
275
|
+
|
|
276
|
+
# Update tasks
|
|
277
|
+
bosun task update abc123 --status todo --priority critical
|
|
278
|
+
bosun task update abc123 '{"tags":["ui","urgent"],"baseBranch":"origin/ui-rework"}'
|
|
279
|
+
|
|
280
|
+
# Delete tasks
|
|
281
|
+
bosun task delete abc123
|
|
282
|
+
|
|
283
|
+
# Statistics
|
|
284
|
+
bosun task stats
|
|
285
|
+
bosun task stats --json
|
|
286
|
+
|
|
287
|
+
# Bulk import from JSON file
|
|
288
|
+
bosun task import ./backlog.json
|
|
289
|
+
|
|
290
|
+
# Trigger AI task planner
|
|
291
|
+
bosun task plan --count 5 --reason "Sprint planning"
|
|
292
|
+
\`\`\`
|
|
293
|
+
|
|
294
|
+
### 2. REST API (port 18432 — always available when bosun daemon runs)
|
|
295
|
+
|
|
296
|
+
\`\`\`bash
|
|
297
|
+
# List tasks
|
|
298
|
+
curl http://127.0.0.1:18432/api/tasks
|
|
299
|
+
curl "http://127.0.0.1:18432/api/tasks?status=todo"
|
|
300
|
+
|
|
301
|
+
# Get task detail
|
|
302
|
+
curl "http://127.0.0.1:18432/api/tasks/detail?id=<task-id>"
|
|
303
|
+
|
|
304
|
+
# Create task
|
|
305
|
+
curl -X POST http://127.0.0.1:18432/api/tasks/create \\
|
|
306
|
+
-H "Content-Type: application/json" \\
|
|
307
|
+
-d '{"title":"[s] fix(cli): Exit code","priority":"high","tags":["cli"]}'
|
|
308
|
+
|
|
309
|
+
# Update task
|
|
310
|
+
curl -X POST http://127.0.0.1:18432/api/tasks/update \\
|
|
311
|
+
-H "Content-Type: application/json" \\
|
|
312
|
+
-d '{"taskId":"<id>","status":"todo","priority":"critical"}'
|
|
313
|
+
|
|
314
|
+
# Edit task fields
|
|
315
|
+
curl -X POST http://127.0.0.1:18432/api/tasks/edit \\
|
|
316
|
+
-H "Content-Type: application/json" \\
|
|
317
|
+
-d '{"taskId":"<id>","title":"Updated title","description":"Updated desc"}'
|
|
318
|
+
|
|
319
|
+
# Start task execution
|
|
320
|
+
curl -X POST http://127.0.0.1:18432/api/tasks/start \\
|
|
321
|
+
-H "Content-Type: application/json" \\
|
|
322
|
+
-d '{"taskId":"<id>"}'
|
|
323
|
+
\`\`\`
|
|
324
|
+
|
|
325
|
+
### 3. Direct Node.js API (for scripts and other agents)
|
|
326
|
+
|
|
327
|
+
\`\`\`javascript
|
|
328
|
+
import { taskCreate, taskList, taskGet, taskUpdate, taskDelete, taskStats, taskImport } from 'bosun/task-cli.mjs';
|
|
329
|
+
|
|
330
|
+
// Create
|
|
331
|
+
const task = await taskCreate({
|
|
332
|
+
title: "[m] feat(ui): Dark mode",
|
|
333
|
+
description: "Add dark mode toggle to settings panel",
|
|
334
|
+
priority: "high",
|
|
335
|
+
tags: ["ui", "theme"],
|
|
336
|
+
baseBranch: "main"
|
|
337
|
+
});
|
|
338
|
+
|
|
339
|
+
// List with filters
|
|
340
|
+
const todos = await taskList({ status: "todo", priority: "high" });
|
|
341
|
+
|
|
342
|
+
// Update
|
|
343
|
+
await taskUpdate(task.id, { status: "todo", priority: "critical" });
|
|
344
|
+
|
|
345
|
+
// Delete
|
|
346
|
+
await taskDelete(task.id);
|
|
347
|
+
|
|
348
|
+
// Bulk import from file
|
|
349
|
+
const result = await taskImport("./backlog.json");
|
|
350
|
+
\`\`\`
|
|
351
|
+
|
|
352
|
+
## Task Schema
|
|
353
|
+
|
|
354
|
+
Every task has these fields:
|
|
355
|
+
|
|
356
|
+
| Field | Type | Required | Default | Description |
|
|
357
|
+
|-------|------|----------|---------|-------------|
|
|
358
|
+
| \`title\` | string | yes | — | \`[size] type(scope): description\` format |
|
|
359
|
+
| \`description\` | string | — | \`""\` | Full task description (markdown). Primary agent prompt. |
|
|
360
|
+
| \`status\` | string | — | \`"draft"\` | \`draft\` → \`todo\` → \`inprogress\` → \`inreview\` → \`done\` |
|
|
361
|
+
| \`priority\` | string | — | \`"medium"\` | \`low\`, \`medium\`, \`high\`, \`critical\` |
|
|
362
|
+
| \`tags\` | string[] | — | \`[]\` | Lowercase labels for categorization |
|
|
363
|
+
| \`baseBranch\` | string | — | \`"main"\` | Target git branch for this task |
|
|
364
|
+
| \`workspace\` | string | — | cwd | Path to workspace directory |
|
|
365
|
+
| \`repository\` | string | — | \`""\` | Repository identifier (e.g. \`org/repo\`) |
|
|
366
|
+
| \`draft\` | boolean | — | \`true\` | Draft tasks are not picked up by executors |
|
|
367
|
+
|
|
368
|
+
### Structured Description Fields (accepted by create/import)
|
|
369
|
+
|
|
370
|
+
When creating tasks, you can provide structured fields that get formatted into the description:
|
|
371
|
+
|
|
372
|
+
| Field | Type | Description |
|
|
373
|
+
|-------|------|-------------|
|
|
374
|
+
| \`implementation_steps\` | string[] | Ordered steps for the agent to follow |
|
|
375
|
+
| \`acceptance_criteria\` | string[] | Binary pass/fail conditions |
|
|
376
|
+
| \`verification\` | string[] | Commands to run to verify completion |
|
|
377
|
+
|
|
378
|
+
These get appended to the description as markdown sections automatically.
|
|
379
|
+
|
|
380
|
+
### Valid Status Transitions
|
|
381
|
+
|
|
382
|
+
\`\`\`
|
|
383
|
+
draft → todo → inprogress → inreview → done
|
|
384
|
+
↓ ↓
|
|
385
|
+
blocked blocked
|
|
386
|
+
\`\`\`
|
|
387
|
+
|
|
388
|
+
- **draft**: Not yet ready for execution. Agents will not pick these up.
|
|
389
|
+
- **todo**: Ready for execution. Next idle agent will claim it.
|
|
390
|
+
- **inprogress**: Agent is actively working on it.
|
|
391
|
+
- **inreview**: Agent completed, PR created, awaiting review.
|
|
392
|
+
- **done**: Task completed and merged.
|
|
393
|
+
- **blocked**: Stuck on external dependency.
|
|
394
|
+
|
|
395
|
+
## Title Conventions
|
|
396
|
+
|
|
397
|
+
\`\`\`
|
|
398
|
+
[size] type(scope): Concise action-oriented description
|
|
399
|
+
\`\`\`
|
|
400
|
+
|
|
401
|
+
### Size Labels
|
|
402
|
+
| Label | Time | Scope |
|
|
403
|
+
|-------|------|-------|
|
|
404
|
+
| \`[xs]\` | < 30 min | Single-file fix |
|
|
405
|
+
| \`[s]\` | 30 min – 2 hr | Small feature, one module |
|
|
406
|
+
| \`[m]\` | 2 – 6 hr | Multi-file feature |
|
|
407
|
+
| \`[l]\` | 6 – 16 hr | Cross-module work |
|
|
408
|
+
| \`[xl]\` | 1 – 3 days | Major feature |
|
|
409
|
+
|
|
410
|
+
### Conventional Commit Types
|
|
411
|
+
\`feat\`, \`fix\`, \`docs\`, \`style\`, \`refactor\`, \`perf\`, \`test\`, \`build\`, \`ci\`, \`chore\`
|
|
412
|
+
|
|
413
|
+
## Tips for Effective Task Management
|
|
414
|
+
|
|
415
|
+
1. **Match task sizes to project maturity** — If the codebase is still early stage, prioritize [xl] and [l]
|
|
416
|
+
tasks to build core functionality. Switch to [m] and [s] for refinement. Avoid [xs] unless urgent.
|
|
417
|
+
2. **Be specific** — The description is the agent's primary prompt. Include file paths and concrete actions.
|
|
418
|
+
3. **Minimize file overlap** — Tasks editing the same files cause merge conflicts during parallel execution.
|
|
419
|
+
4. **Set baseBranch** — If a task targets a module branch, set \`baseBranch\` to route correctly.
|
|
420
|
+
5. **Use tags** — Tags help filter and organize. Use lowercase, comma-separated.
|
|
421
|
+
6. **Draft first** — Create as \`draft\`, review, then promote to \`todo\` when ready.
|
|
422
|
+
7. **Module branch routing** — When a task title follows conventional commit format
|
|
423
|
+
\`feat(module):\` or \`fix(module):\`, set \`baseBranch\` to \`origin/<module>\` to route the task
|
|
424
|
+
to the module's dedicated branch for parallel, isolated development.
|
|
239
425
|
`,
|
|
240
426
|
monitorMonitor: `# Bosun-Monitor Agent
|
|
241
427
|
|
package/agent-sdk.mjs
CHANGED
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
import { readCodexConfig } from "./codex-config.mjs";
|
|
12
12
|
|
|
13
|
-
const SUPPORTED_PRIMARY = new Set(["codex", "copilot", "claude"]);
|
|
13
|
+
const SUPPORTED_PRIMARY = new Set(["codex", "copilot", "claude", "opencode"]);
|
|
14
14
|
const DEFAULT_PRIMARY = "codex";
|
|
15
15
|
|
|
16
16
|
const DEFAULT_CAPABILITIES_BY_PRIMARY = {
|
|
@@ -29,6 +29,11 @@ const DEFAULT_CAPABILITIES_BY_PRIMARY = {
|
|
|
29
29
|
subagents: true,
|
|
30
30
|
vscodeTools: false,
|
|
31
31
|
},
|
|
32
|
+
opencode: {
|
|
33
|
+
steering: true,
|
|
34
|
+
subagents: true,
|
|
35
|
+
vscodeTools: false,
|
|
36
|
+
},
|
|
32
37
|
};
|
|
33
38
|
|
|
34
39
|
const DEFAULT_CAPABILITIES = {
|
package/agent-work-analyzer.mjs
CHANGED
|
@@ -64,8 +64,9 @@ const activeSessions = new Map();
|
|
|
64
64
|
const alertCooldowns = new Map();
|
|
65
65
|
const ALERT_COOLDOWN_MS = 5 * 60 * 1000; // 5 minutes between same alert
|
|
66
66
|
const FAILED_SESSION_ALERT_MIN_COOLDOWN_MS = 60 * 60 * 1000; // Keep noisy failed-session summaries coarse-grained
|
|
67
|
+
const FAILED_SESSION_TRANSIENT_ALERT_MIN_COOLDOWN_MS = 2 * 60 * 60 * 1000; // Transient API/provider failures should back off longer
|
|
67
68
|
const ALERT_COOLDOWN_RETENTION_MS = Math.max(
|
|
68
|
-
|
|
69
|
+
FAILED_SESSION_TRANSIENT_ALERT_MIN_COOLDOWN_MS * 3,
|
|
69
70
|
3 * 60 * 60 * 1000,
|
|
70
71
|
); // keep cooldown history bounded
|
|
71
72
|
const ALERT_COOLDOWN_REPLAY_MAX_BYTES = Math.max(
|
|
@@ -78,6 +79,9 @@ function getAlertCooldownMs(alert) {
|
|
|
78
79
|
if (type === "failed_session_high_errors") {
|
|
79
80
|
return Math.max(ALERT_COOLDOWN_MS, FAILED_SESSION_ALERT_MIN_COOLDOWN_MS);
|
|
80
81
|
}
|
|
82
|
+
if (type === "failed_session_transient_errors") {
|
|
83
|
+
return Math.max(ALERT_COOLDOWN_MS, FAILED_SESSION_TRANSIENT_ALERT_MIN_COOLDOWN_MS);
|
|
84
|
+
}
|
|
81
85
|
return Math.max(0, ALERT_COOLDOWN_MS);
|
|
82
86
|
}
|
|
83
87
|
|
|
@@ -99,12 +103,35 @@ function deriveAlertScopeId(alert) {
|
|
|
99
103
|
function buildAlertCooldownKey(alert) {
|
|
100
104
|
const type = String(alert?.type || "unknown").trim().toLowerCase() || "unknown";
|
|
101
105
|
const scopeId = deriveAlertScopeId(alert);
|
|
102
|
-
if (
|
|
106
|
+
if (
|
|
107
|
+
scopeId &&
|
|
108
|
+
(
|
|
109
|
+
type === "failed_session_high_errors" ||
|
|
110
|
+
type === "failed_session_transient_errors" ||
|
|
111
|
+
type === "stuck_agent"
|
|
112
|
+
)
|
|
113
|
+
) {
|
|
103
114
|
return `${type}:task:${scopeId}`;
|
|
104
115
|
}
|
|
105
116
|
return `${type}:${String(alert?.attempt_id || "unknown")}`;
|
|
106
117
|
}
|
|
107
118
|
|
|
119
|
+
function isTransientFailureFingerprint(value) {
|
|
120
|
+
const text = String(value || "").toLowerCase();
|
|
121
|
+
if (!text) return false;
|
|
122
|
+
return (
|
|
123
|
+
text.includes("reconnect") ||
|
|
124
|
+
text.includes("stream disconnected") ||
|
|
125
|
+
text.includes("response.failed") ||
|
|
126
|
+
text.includes("rate limit") ||
|
|
127
|
+
text.includes("high demand") ||
|
|
128
|
+
text.includes("provisioned throughput") ||
|
|
129
|
+
text.includes("timeout") ||
|
|
130
|
+
text.includes("econnreset") ||
|
|
131
|
+
text.includes("temporarily unavailable")
|
|
132
|
+
);
|
|
133
|
+
}
|
|
134
|
+
|
|
108
135
|
function pruneStaleAlertCooldowns(nowMs = Date.now()) {
|
|
109
136
|
const now = Number(nowMs) || Date.now();
|
|
110
137
|
const cutoff = now - ALERT_COOLDOWN_RETENTION_MS;
|
|
@@ -124,7 +151,11 @@ async function hydrateAlertCooldownsFromLog() {
|
|
|
124
151
|
const start = Math.max(0, fileStat.size - ALERT_COOLDOWN_REPLAY_MAX_BYTES);
|
|
125
152
|
const stream = createReadStream(ALERTS_LOG, { start, encoding: "utf8" });
|
|
126
153
|
const rl = createInterface({ input: stream, crlfDelay: Infinity });
|
|
127
|
-
const maxCooldownMs = Math.max(
|
|
154
|
+
const maxCooldownMs = Math.max(
|
|
155
|
+
ALERT_COOLDOWN_MS,
|
|
156
|
+
FAILED_SESSION_ALERT_MIN_COOLDOWN_MS,
|
|
157
|
+
FAILED_SESSION_TRANSIENT_ALERT_MIN_COOLDOWN_MS,
|
|
158
|
+
);
|
|
128
159
|
const cutoff = Date.now() - maxCooldownMs;
|
|
129
160
|
for await (const line of rl) {
|
|
130
161
|
const trimmed = String(line || "").trim();
|
|
@@ -514,17 +545,25 @@ async function analyzeSessionEnd(session, event) {
|
|
|
514
545
|
completion_status === "failed" &&
|
|
515
546
|
session.errors.length >= ERROR_LOOP_THRESHOLD
|
|
516
547
|
) {
|
|
548
|
+
const errorFingerprints = [...new Set(session.errors.map((e) => e.fingerprint))];
|
|
549
|
+
const transientErrorCount = errorFingerprints.filter((fp) => isTransientFailureFingerprint(fp)).length;
|
|
550
|
+
const transientOnlySession = transientErrorCount > 0 && transientErrorCount === errorFingerprints.length;
|
|
551
|
+
const alertType = transientOnlySession
|
|
552
|
+
? "failed_session_transient_errors"
|
|
553
|
+
: "failed_session_high_errors";
|
|
554
|
+
const recommendation = transientOnlySession
|
|
555
|
+
? "switch_sdk_or_backoff_retry"
|
|
556
|
+
: "analyze_root_cause";
|
|
557
|
+
|
|
517
558
|
await emitAlert({
|
|
518
|
-
type:
|
|
559
|
+
type: alertType,
|
|
519
560
|
attempt_id: session.attempt_id,
|
|
520
561
|
task_id: session.taskId,
|
|
521
562
|
executor: session.executor,
|
|
522
563
|
error_count: session.errors.length,
|
|
523
|
-
error_fingerprints:
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
recommendation: "analyze_root_cause",
|
|
527
|
-
severity: "high",
|
|
564
|
+
error_fingerprints: errorFingerprints,
|
|
565
|
+
recommendation,
|
|
566
|
+
severity: transientOnlySession ? "medium" : "high",
|
|
528
567
|
});
|
|
529
568
|
}
|
|
530
569
|
}
|
package/autofix.mjs
CHANGED
|
@@ -555,12 +555,14 @@ export function runCodexExec(
|
|
|
555
555
|
env: codexEnv,
|
|
556
556
|
};
|
|
557
557
|
if (process.platform === "win32") {
|
|
558
|
-
// On Windows,
|
|
559
|
-
//
|
|
560
|
-
//
|
|
558
|
+
// On Windows, spawn with shell: true so cmd.exe can resolve .cmd/.ps1
|
|
559
|
+
// shims (e.g. codex.cmd installed by npm). Without shell: true, Node's
|
|
560
|
+
// spawn() looks for a literal "codex" executable which doesn't exist
|
|
561
|
+
// on Windows — only codex.cmd does — causing ENOENT (os error 2).
|
|
562
|
+
// Arguments are passed as an array so shell word-splitting is safe.
|
|
561
563
|
child = spawn("codex", args, {
|
|
562
564
|
...spawnOptions,
|
|
563
|
-
shell:
|
|
565
|
+
shell: true,
|
|
564
566
|
});
|
|
565
567
|
} else {
|
|
566
568
|
child = spawn("codex", args, {
|
|
@@ -620,14 +622,13 @@ export function runCodexExec(
|
|
|
620
622
|
});
|
|
621
623
|
|
|
622
624
|
const timer = setTimeout(() => {
|
|
623
|
-
stream.write(`\n\n## TIMEOUT after ${timeoutMs}ms\n`);
|
|
625
|
+
try { stream.write(`\n\n## TIMEOUT after ${timeoutMs}ms\n`); } catch { /* best effort */ }
|
|
624
626
|
try {
|
|
625
627
|
child.kill("SIGTERM");
|
|
626
628
|
} catch {
|
|
627
629
|
/* best effort */
|
|
628
630
|
}
|
|
629
|
-
|
|
630
|
-
promiseResolve({
|
|
631
|
+
resolveOnce({
|
|
631
632
|
success: false,
|
|
632
633
|
output: stdout,
|
|
633
634
|
error: "timeout after " + timeoutMs + "ms",
|
|
@@ -635,27 +636,40 @@ export function runCodexExec(
|
|
|
635
636
|
});
|
|
636
637
|
}, timeoutMs);
|
|
637
638
|
|
|
638
|
-
|
|
639
|
+
// Guard against double-resolution: on Windows ENOENT spawns fire
|
|
640
|
+
// both "error" and "exit" events — the second promiseResolve is harmless
|
|
641
|
+
// but stream.end() must only be called once.
|
|
642
|
+
let resolved = false;
|
|
643
|
+
function resolveOnce(result) {
|
|
644
|
+
if (resolved) return;
|
|
645
|
+
resolved = true;
|
|
639
646
|
clearTimeout(timer);
|
|
640
|
-
stream.
|
|
641
|
-
|
|
642
|
-
|
|
647
|
+
try { stream.end(); } catch { /* best effort */ }
|
|
648
|
+
promiseResolve(result);
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
child.on("error", (err) => {
|
|
652
|
+
try { stream.write(`\n\n## ERROR: ${err.message}\n`); } catch { /* best effort */ }
|
|
653
|
+
const errorMsg = err.code === "ENOENT"
|
|
654
|
+
? `Codex Exec exited with code 1: Error: The system cannot find the file specified. (os error 2) — is the 'codex' CLI installed and on PATH?`
|
|
655
|
+
: err.message;
|
|
656
|
+
resolveOnce({
|
|
643
657
|
success: false,
|
|
644
658
|
output: stdout,
|
|
645
|
-
error:
|
|
659
|
+
error: errorMsg,
|
|
646
660
|
logPath,
|
|
647
661
|
});
|
|
648
662
|
});
|
|
649
663
|
|
|
650
664
|
child.on("exit", (code) => {
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
665
|
+
try {
|
|
666
|
+
stream.write(`\n\n## Exit code: ${code}\n`);
|
|
667
|
+
stream.write(`\n## stderr:\n${stderr}\n`);
|
|
668
|
+
} catch { /* best effort */ }
|
|
669
|
+
resolveOnce({
|
|
656
670
|
success: code === 0,
|
|
657
671
|
output: stdout + (stderr ? "\n" + stderr : ""),
|
|
658
|
-
error: code !== 0 ? `
|
|
672
|
+
error: code !== 0 ? `Codex Exec exited with code ${code}${stderr ? ": " + stderr.trim().slice(0, 200) : ""}` : null,
|
|
659
673
|
logPath,
|
|
660
674
|
});
|
|
661
675
|
});
|
package/bosun.schema.json
CHANGED
|
@@ -38,7 +38,7 @@
|
|
|
38
38
|
"codexEnabled": { "type": "boolean" },
|
|
39
39
|
"primaryAgent": {
|
|
40
40
|
"type": "string",
|
|
41
|
-
"enum": ["codex-sdk", "copilot-sdk", "claude-sdk"]
|
|
41
|
+
"enum": ["codex-sdk", "copilot-sdk", "claude-sdk", "opencode-sdk"]
|
|
42
42
|
},
|
|
43
43
|
"vkSpawnEnabled": { "type": "boolean" },
|
|
44
44
|
"kanban": {
|
package/kanban-adapter.mjs
CHANGED
|
@@ -293,8 +293,41 @@ function _issueListCacheKey(state, limit) {
|
|
|
293
293
|
}
|
|
294
294
|
|
|
295
295
|
/** Build a cache key for the shared-state cache (per adapter instance). */
|
|
296
|
-
function _sharedStateCacheKey(num) {
|
|
297
|
-
|
|
296
|
+
function _sharedStateCacheKey(num, repoKey = "") {
|
|
297
|
+
const normalizedNum = String(num || "").trim();
|
|
298
|
+
const normalizedRepo = String(repoKey || "").trim().toLowerCase();
|
|
299
|
+
return normalizedRepo ? `${normalizedRepo}#${normalizedNum}` : normalizedNum;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
function parseIssueLocator(issueNumber, defaultOwner, defaultRepo, issueUrl = "") {
|
|
303
|
+
const urlText = String(issueUrl || issueNumber || "").trim();
|
|
304
|
+
const urlMatch = urlText.match(
|
|
305
|
+
/github\.com\/([^/\s]+)\/([^/\s]+)\/issues\/(\d+)(?:\b|$)/i,
|
|
306
|
+
);
|
|
307
|
+
if (urlMatch) {
|
|
308
|
+
const owner = String(urlMatch[1] || "").trim();
|
|
309
|
+
const repo = String(urlMatch[2] || "")
|
|
310
|
+
.trim()
|
|
311
|
+
.replace(/\.git$/i, "");
|
|
312
|
+
const number = String(urlMatch[3] || "").trim();
|
|
313
|
+
return {
|
|
314
|
+
owner,
|
|
315
|
+
repo,
|
|
316
|
+
number,
|
|
317
|
+
repoKey: `${owner}/${repo}`.toLowerCase(),
|
|
318
|
+
};
|
|
319
|
+
}
|
|
320
|
+
const number = String(issueNumber || "")
|
|
321
|
+
.trim()
|
|
322
|
+
.replace(/^#/, "");
|
|
323
|
+
const owner = String(defaultOwner || "").trim();
|
|
324
|
+
const repo = String(defaultRepo || "").trim();
|
|
325
|
+
return {
|
|
326
|
+
owner,
|
|
327
|
+
repo,
|
|
328
|
+
number,
|
|
329
|
+
repoKey: `${owner}/${repo}`.toLowerCase(),
|
|
330
|
+
};
|
|
298
331
|
}
|
|
299
332
|
|
|
300
333
|
function isGhRateLimitError(text) {
|
|
@@ -2361,7 +2394,9 @@ class GitHubIssuesAdapter {
|
|
|
2361
2394
|
for (const task of filtered) {
|
|
2362
2395
|
try {
|
|
2363
2396
|
const sharedState = normalizeSharedStatePayload(
|
|
2364
|
-
await this.readSharedStateFromIssue(task.id
|
|
2397
|
+
await this.readSharedStateFromIssue(task.id, null, {
|
|
2398
|
+
issueUrl: task?.meta?.url || task?.taskUrl || null,
|
|
2399
|
+
}),
|
|
2365
2400
|
);
|
|
2366
2401
|
if (sharedState) {
|
|
2367
2402
|
task.meta.sharedState = sharedState;
|
|
@@ -3234,8 +3269,15 @@ ${stateJson}
|
|
|
3234
3269
|
* console.log(`Task claimed by ${state.ownerId}`);
|
|
3235
3270
|
* }
|
|
3236
3271
|
*/
|
|
3237
|
-
async readSharedStateFromIssue(issueNumber, cachedComments = null) {
|
|
3238
|
-
const
|
|
3272
|
+
async readSharedStateFromIssue(issueNumber, cachedComments = null, options = {}) {
|
|
3273
|
+
const issueUrl = String(options?.issueUrl || "").trim();
|
|
3274
|
+
const locator = parseIssueLocator(
|
|
3275
|
+
issueNumber,
|
|
3276
|
+
this._owner,
|
|
3277
|
+
this._repo,
|
|
3278
|
+
issueUrl,
|
|
3279
|
+
);
|
|
3280
|
+
const num = locator.number;
|
|
3239
3281
|
if (!/^\d+$/.test(num)) {
|
|
3240
3282
|
throw new Error(`Invalid issue number: ${issueNumber}`);
|
|
3241
3283
|
}
|
|
@@ -3243,7 +3285,7 @@ ${stateJson}
|
|
|
3243
3285
|
// If no pre-fetched comments, check the instance-level shared-state cache
|
|
3244
3286
|
// to avoid a separate API call per issue during bulk listTasks cycles.
|
|
3245
3287
|
if (!cachedComments) {
|
|
3246
|
-
const cacheKey = _sharedStateCacheKey(num);
|
|
3288
|
+
const cacheKey = _sharedStateCacheKey(num, locator.repoKey);
|
|
3247
3289
|
const cached = this._sharedStateCache.get(cacheKey);
|
|
3248
3290
|
if (cached && Date.now() - cached.ts < GH_SHARED_STATE_CACHE_TTL_MS) {
|
|
3249
3291
|
return cached.data;
|
|
@@ -3251,7 +3293,8 @@ ${stateJson}
|
|
|
3251
3293
|
}
|
|
3252
3294
|
|
|
3253
3295
|
try {
|
|
3254
|
-
const comments =
|
|
3296
|
+
const comments =
|
|
3297
|
+
cachedComments ?? await this._getIssueComments(num, { issueUrl });
|
|
3255
3298
|
const stateComment = Array.isArray(comments)
|
|
3256
3299
|
? comments
|
|
3257
3300
|
.slice()
|
|
@@ -3263,7 +3306,7 @@ ${stateJson}
|
|
|
3263
3306
|
// Cache the null result too so repeated calls within the TTL skip the API
|
|
3264
3307
|
if (!cachedComments) {
|
|
3265
3308
|
this._sharedStateCache.set(
|
|
3266
|
-
_sharedStateCacheKey(num),
|
|
3309
|
+
_sharedStateCacheKey(num, locator.repoKey),
|
|
3267
3310
|
{ data: null, ts: Date.now() },
|
|
3268
3311
|
);
|
|
3269
3312
|
}
|
|
@@ -3298,7 +3341,7 @@ ${stateJson}
|
|
|
3298
3341
|
// Cache the result for the TTL window
|
|
3299
3342
|
if (!cachedComments) {
|
|
3300
3343
|
this._sharedStateCache.set(
|
|
3301
|
-
_sharedStateCacheKey(num),
|
|
3344
|
+
_sharedStateCacheKey(num, locator.repoKey),
|
|
3302
3345
|
{ data: state, ts: Date.now() },
|
|
3303
3346
|
);
|
|
3304
3347
|
}
|
|
@@ -3424,18 +3467,25 @@ To re-enable bosun for this task, remove the \`${this._codexLabels.ignore}\` lab
|
|
|
3424
3467
|
* Get all comments for an issue.
|
|
3425
3468
|
* @private
|
|
3426
3469
|
*/
|
|
3427
|
-
async _getIssueComments(issueNumber) {
|
|
3470
|
+
async _getIssueComments(issueNumber, options = {}) {
|
|
3471
|
+
const issueUrl = String(options?.issueUrl || "").trim();
|
|
3472
|
+
const locator = parseIssueLocator(
|
|
3473
|
+
issueNumber,
|
|
3474
|
+
this._owner,
|
|
3475
|
+
this._repo,
|
|
3476
|
+
issueUrl,
|
|
3477
|
+
);
|
|
3428
3478
|
try {
|
|
3429
3479
|
const result = await this._gh([
|
|
3430
3480
|
"api",
|
|
3431
|
-
`/repos/${
|
|
3481
|
+
`/repos/${locator.owner}/${locator.repo}/issues/${locator.number}/comments`,
|
|
3432
3482
|
"--jq",
|
|
3433
3483
|
".",
|
|
3434
3484
|
]);
|
|
3435
3485
|
return Array.isArray(result) ? result : [];
|
|
3436
3486
|
} catch (err) {
|
|
3437
3487
|
console.warn(
|
|
3438
|
-
`[kanban] failed to fetch comments for #${
|
|
3488
|
+
`[kanban] failed to fetch comments for ${locator.owner}/${locator.repo}#${locator.number}: ${err.message}`,
|
|
3439
3489
|
);
|
|
3440
3490
|
return [];
|
|
3441
3491
|
}
|