@desplega.ai/agent-swarm 1.92.0 → 1.92.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/README.md +1 -1
  2. package/openapi.json +276 -3
  3. package/package.json +6 -6
  4. package/plugin/skills/pages/SKILL.md +5 -2
  5. package/src/be/db.ts +416 -20
  6. package/src/be/memory/boot-reembed.ts +85 -0
  7. package/src/be/memory/constants.ts +44 -2
  8. package/src/be/memory/providers/openai-embedding.ts +15 -5
  9. package/src/be/memory/providers/sqlite-store.ts +325 -76
  10. package/src/be/memory/reranker.ts +35 -17
  11. package/src/be/memory/types.ts +43 -0
  12. package/src/be/migrations/084_script_run_journal_duration.sql +5 -0
  13. package/src/be/migrations/085_script_runs_kind.sql +9 -0
  14. package/src/be/migrations/086_pages_default_authed.sql +64 -0
  15. package/src/be/migrations/087_skill_files.sql +19 -0
  16. package/src/be/modelsdev-cache.json +5622 -2543
  17. package/src/be/seed-scripts/catalog/boot-triage.ts +221 -0
  18. package/src/be/seed-scripts/catalog/catalog-report.ts +457 -0
  19. package/src/be/seed-scripts/catalog/compound-insights.ts +465 -0
  20. package/src/be/seed-scripts/catalog/gh-pr-snapshot.ts +1 -1
  21. package/src/be/seed-scripts/catalog/memory-eval.ts +1059 -0
  22. package/src/be/seed-scripts/catalog/ops-catalog-audit.ts +34 -439
  23. package/src/be/seed-scripts/catalog/schedule-health.ts +78 -2
  24. package/src/be/seed-scripts/catalog/task-failure-audit.ts +48 -1
  25. package/src/be/seed-scripts/index.ts +32 -4
  26. package/src/be/seed-skills/index.ts +0 -7
  27. package/src/be/skill-sync.ts +91 -7
  28. package/src/commands/runner.ts +6 -2
  29. package/src/heartbeat/templates.ts +20 -16
  30. package/src/http/index.ts +50 -7
  31. package/src/http/mcp-user.ts +23 -0
  32. package/src/http/mcp.ts +58 -0
  33. package/src/http/memory.ts +62 -0
  34. package/src/http/pages.ts +1 -1
  35. package/src/http/script-runs.ts +2 -0
  36. package/src/http/scripts.ts +39 -2
  37. package/src/http/skills.ts +225 -0
  38. package/src/providers/claude-adapter.ts +56 -24
  39. package/src/script-workflows/workflow-ctx.ts +7 -3
  40. package/src/scripts-runtime/sdk-allowlist.ts +1 -0
  41. package/src/scripts-runtime/swarm-sdk.ts +13 -0
  42. package/src/scripts-runtime/types/stdlib.d.ts +1 -0
  43. package/src/scripts-runtime/types/swarm-sdk.d.ts +1 -0
  44. package/src/server.ts +2 -0
  45. package/src/tasks/worker-follow-up.ts +12 -0
  46. package/src/tests/claude-adapter-binary.test.ts +135 -81
  47. package/src/tests/create-page-tool.test.ts +19 -2
  48. package/src/tests/heartbeat-checklist.test.ts +36 -0
  49. package/src/tests/mcp-transport-gc.test.ts +58 -0
  50. package/src/tests/memory-e2e.test.ts +6 -6
  51. package/src/tests/memory-health-endpoint.test.ts +78 -0
  52. package/src/tests/memory-rater-e2e.test.ts +4 -5
  53. package/src/tests/memory-reranker.test.ts +135 -124
  54. package/src/tests/memory-store.test.ts +221 -1
  55. package/src/tests/memory.test.ts +13 -12
  56. package/src/tests/pages-http.test.ts +20 -2
  57. package/src/tests/pages-storage.test.ts +26 -0
  58. package/src/tests/scripts-mcp-e2e.test.ts +53 -0
  59. package/src/tests/seed-scripts.test.ts +328 -3
  60. package/src/tests/skill-files-http.test.ts +171 -0
  61. package/src/tests/skill-files.test.ts +162 -0
  62. package/src/tests/skill-get-file-tool.test.ts +110 -0
  63. package/src/tests/skill-sync.test.ts +125 -6
  64. package/src/tests/task-cascade-fail.test.ts +304 -0
  65. package/src/tools/create-page.ts +2 -2
  66. package/src/tools/skills/index.ts +1 -0
  67. package/src/tools/skills/skill-get-file.ts +80 -0
  68. package/src/tools/tool-config.ts +2 -1
  69. package/src/types.ts +20 -0
  70. package/src/utils/internal-ai/complete-structured.ts +2 -2
  71. package/templates/schedules/daily-blocker-digest/content.md +68 -54
  72. package/templates/schedules/daily-compounding-reflection/content.md +4 -4
  73. package/templates/schedules/daily-hn-briefing/content.md +5 -5
  74. package/templates/schedules/daily-workflow-health-audit/content.md +6 -6
  75. package/templates/schedules/gtm-weekly-review/content.md +9 -9
  76. package/templates/schedules/weekly-dependabot-triage/content.md +24 -20
  77. package/templates/skills/agentmail-sending/content.md +6 -7
  78. package/templates/skills/desloppify/content.md +8 -9
  79. package/templates/skills/jira-interaction/content.md +25 -33
  80. package/templates/skills/kapso-whatsapp/content.md +29 -30
  81. package/templates/skills/linear-interaction/content.md +8 -9
  82. package/templates/skills/profile-corruption-escalation/content.md +44 -85
  83. package/templates/skills/sprite-cli/content.md +4 -5
  84. package/templates/skills/turso-interaction/content.md +14 -17
  85. package/templates/skills/workflow-iterate/content.md +38 -391
  86. package/templates/skills/x-api-interactions/content.md +4 -6
  87. package/templates/workflows/llm-safe-release-context/config.json +13 -0
  88. package/templates/workflows/llm-safe-release-context/content.md +69 -0
  89. package/templates/skills/scheduled-task-resilience/config.json +0 -14
  90. package/templates/skills/scheduled-task-resilience/content.md +0 -95
@@ -22,6 +22,9 @@ import { getScript, upsertScriptByName } from "../scripts/db";
22
22
  import { extractArgsJsonSchema } from "../scripts/extract-schema";
23
23
  import { typecheckScript } from "../scripts/typecheck";
24
24
  import type { Seeder, SeedItem } from "../seed/types";
25
+ import bootTriageSrc from "./catalog/boot-triage.ts" with { type: "text" };
26
+ // @ts-expect-error Bun text imports return the raw source string for bundling standalone scripts.
27
+ import catalogReportSrc from "./catalog/catalog-report.ts" with { type: "text" };
25
28
  import compoundInsightsSrc from "./catalog/compound-insights.ts" with { type: "text" };
26
29
  import dateResolveSrc from "./catalog/date-resolve.ts" with { type: "text" };
27
30
  import fetchReadableSrc from "./catalog/fetch-readable.ts" with { type: "text" };
@@ -30,6 +33,7 @@ import groupCountSrc from "./catalog/group-count.ts" with { type: "text" };
30
33
  import jsonQuerySrc from "./catalog/json-query.ts" with { type: "text" };
31
34
  import linearIssueSrc from "./catalog/linear-issue.ts" with { type: "text" };
32
35
  import memoryDedupCheckSrc from "./catalog/memory-dedup-check.ts" with { type: "text" };
36
+ import memoryEvalSrc from "./catalog/memory-eval.ts" with { type: "text" };
33
37
  import opsCatalogAuditSrc from "./catalog/ops-catalog-audit.ts" with { type: "text" };
34
38
  import scheduleHealthSrc from "./catalog/schedule-health.ts" with { type: "text" };
35
39
  import slackThreadFlattenSrc from "./catalog/slack-thread-flatten.ts" with { type: "text" };
@@ -50,6 +54,14 @@ export type SeedScript = {
50
54
  // module's default export, so the cast restores the real shape.
51
55
  const asText = (s: unknown): string => s as string;
52
56
 
57
+ const CATALOG_REPORT_IMPORT_RE = /^import\s+\{[^}]*\}\s+from "\.\/catalog-report";\n\n?/m;
58
+
59
+ function bundleCatalogReport(source: string): string {
60
+ const helper = asText(catalogReportSrc);
61
+ if (!CATALOG_REPORT_IMPORT_RE.test(source)) return source;
62
+ return `${helper}\n\n${source.replace(CATALOG_REPORT_IMPORT_RE, "")}`;
63
+ }
64
+
53
65
  export const SEED_SCRIPTS: SeedScript[] = [
54
66
  {
55
67
  name: "gh-pr-snapshot",
@@ -103,7 +115,7 @@ export const SEED_SCRIPTS: SeedScript[] = [
103
115
  "Scan recently failed swarm tasks and cluster them by failure reason, agent or schedule to surface recurring problems.",
104
116
  intent:
105
117
  "Find patterns in swarm task failures — which agent, schedule or error keeps breaking — for a reliability review.",
106
- source: asText(taskFailureAuditSrc),
118
+ source: bundleCatalogReport(asText(taskFailureAuditSrc)),
107
119
  },
108
120
  {
109
121
  name: "memory-dedup-check",
@@ -150,7 +162,7 @@ export const SEED_SCRIPTS: SeedScript[] = [
150
162
  "Per-schedule failure rate check over recent tasks — flags schedules with failure rates above a configurable threshold.",
151
163
  intent:
152
164
  "Find unhealthy schedules that keep failing — for daily compounding, reliability reviews, or ops triage.",
153
- source: asText(scheduleHealthSrc),
165
+ source: bundleCatalogReport(asText(scheduleHealthSrc)),
154
166
  },
155
167
  {
156
168
  name: "tool-usage",
@@ -166,7 +178,15 @@ export const SEED_SCRIPTS: SeedScript[] = [
166
178
  "All-in-one swarm-wide daily ops snapshot: task completion/failure summary, real failure clusters (excludes superseded/cancelled bookkeeping), schedule health flags, tool usage top-25, memory health/pollution stats, seed-script candidate tool triplets, and a per-agent breakdown. Aggregates across ALL agents via direct read-only SQL.",
167
179
  intent:
168
180
  "Single-call daily compounding Phase 0 helper — replaces ~25 raw tool roundtrips with one compressed JSON result covering every agent. For daily evolution, self-scripting candidates, ops reviews, or heartbeat context.",
169
- source: asText(compoundInsightsSrc),
181
+ source: bundleCatalogReport(asText(compoundInsightsSrc)),
182
+ },
183
+ {
184
+ name: "memory-eval",
185
+ description:
186
+ "3-axis memory quality evaluation: carry-forward context (do follow-up tasks retrieve useful memories from prior tasks?), follow preferences (are CLAUDE.md/IDENTITY.md/SOUL.md/TOOLS.md memories retrieved and useful?), and stay current (what fraction of retrieved memories are fresh vs stale?). Outputs a baseline report to agent-fs + a swarm Page.",
187
+ intent:
188
+ "Measure memory system health across OpenAI Dreaming-inspired axes — before/after baseline for architecture changes, blog-post numbers, daily quality monitoring.",
189
+ source: asText(memoryEvalSrc),
170
190
  },
171
191
  {
172
192
  name: "ops-catalog-audit",
@@ -174,7 +194,15 @@ export const SEED_SCRIPTS: SeedScript[] = [
174
194
  "Audit-as-code catalog check for schedules, workflows, and prompt/template drift. Clusters actionable findings by goal and can publish an authed HTML report page.",
175
195
  intent:
176
196
  "Re-run the ops inventory audit in one call: duplicate/dead schedules, code-work routing risks, enabled workflow fixtures, structured-output gate gaps, prompt registry drift, stale hosts, and systemDefault skill duplicates.",
177
- source: asText(opsCatalogAuditSrc),
197
+ source: bundleCatalogReport(asText(opsCatalogAuditSrc)),
198
+ },
199
+ {
200
+ name: "boot-triage",
201
+ description:
202
+ "Post-restart heartbeat triage snapshot: deploy restart PR context, recent real failures, stuck offline-agent work, orphaned tasks, and superseded tasks missing resume children.",
203
+ intent:
204
+ "Run immediately after a swarm restart to gather deterministic boot triage data in one read-only call before the Lead decides what to retry, cancel, or escalate.",
205
+ source: asText(bootTriageSrc),
178
206
  },
179
207
  ];
180
208
 
@@ -25,12 +25,6 @@ import kvStorageContent from "../../../templates/skills/kv-storage/content.md" w
25
25
  };
26
26
  import pagesConfig from "../../../templates/skills/pages/config.json" with { type: "text" };
27
27
  import pagesContent from "../../../templates/skills/pages/content.md" with { type: "text" };
28
- import scheduledTaskResilienceConfig from "../../../templates/skills/scheduled-task-resilience/config.json" with {
29
- type: "text",
30
- };
31
- import scheduledTaskResilienceContent from "../../../templates/skills/scheduled-task-resilience/content.md" with {
32
- type: "text",
33
- };
34
28
  import scriptWorkflowsConfig from "../../../templates/skills/script-workflows/config.json" with {
35
29
  type: "text",
36
30
  };
@@ -77,7 +71,6 @@ const BUILT_IN_SKILL_SOURCES = [
77
71
  { config: artifactsConfig, body: artifactsContent },
78
72
  { config: kvStorageConfig, body: kvStorageContent },
79
73
  { config: pagesConfig, body: pagesContent },
80
- { config: scheduledTaskResilienceConfig, body: scheduledTaskResilienceContent },
81
74
  { config: scriptWorkflowsConfig, body: scriptWorkflowsContent },
82
75
  { config: swarmScriptsConfig, body: swarmScriptsContent },
83
76
  { config: workflowIterateConfig, body: workflowIterateContent },
@@ -8,10 +8,11 @@
8
8
  * This runs on the API side — workers call it via POST /api/skills/sync-filesystem.
9
9
  */
10
10
 
11
+ import type { Dirent } from "node:fs";
11
12
  import { existsSync, mkdirSync, readdirSync, rmSync, writeFileSync } from "node:fs";
12
13
  import { homedir } from "node:os";
13
- import { join } from "node:path";
14
- import { getAgentSkills } from "./db";
14
+ import { dirname, join } from "node:path";
15
+ import { getAgentSkills, getSkillFiles } from "./db";
15
16
 
16
17
  export interface SkillSyncResult {
17
18
  synced: number;
@@ -29,11 +30,68 @@ export interface SkillSyncResult {
29
30
  */
30
31
  const SWARM_MARKER_FILE = ".swarm-managed";
31
32
 
33
+ function reconcileManagedSkillFiles(skillDir: string, currentRelativeFiles: Set<string>): number {
34
+ if (!existsSync(join(skillDir, SWARM_MARKER_FILE))) return 0;
35
+
36
+ let removed = 0;
37
+
38
+ const walk = (dir: string, relativeDir = ""): boolean => {
39
+ let entries: Dirent[];
40
+ try {
41
+ entries = readdirSync(dir, { withFileTypes: true });
42
+ } catch {
43
+ return false;
44
+ }
45
+
46
+ let hasEntries = false;
47
+ for (const entry of entries) {
48
+ const relativePath = relativeDir ? `${relativeDir}/${entry.name}` : entry.name;
49
+ const fullPath = join(dir, entry.name);
50
+
51
+ if (entry.isDirectory()) {
52
+ const childHasEntries = walk(fullPath, relativePath);
53
+ if (!childHasEntries) {
54
+ try {
55
+ rmSync(fullPath, { recursive: true, force: true });
56
+ } catch {
57
+ hasEntries = true;
58
+ }
59
+ } else {
60
+ hasEntries = true;
61
+ }
62
+ continue;
63
+ }
64
+
65
+ if (
66
+ relativePath === "SKILL.md" ||
67
+ relativePath === SWARM_MARKER_FILE ||
68
+ currentRelativeFiles.has(relativePath)
69
+ ) {
70
+ hasEntries = true;
71
+ continue;
72
+ }
73
+
74
+ try {
75
+ rmSync(fullPath, { force: true });
76
+ removed++;
77
+ } catch {
78
+ hasEntries = true;
79
+ }
80
+ }
81
+
82
+ return hasEntries;
83
+ };
84
+
85
+ walk(skillDir);
86
+ return removed;
87
+ }
88
+
32
89
  /**
33
90
  * Sync agent's installed skills to the filesystem.
34
91
  *
35
92
  * For simple skills (content in DB): writes SKILL.md to ~/.claude/skills/<name>/
36
- * For complex skills (isComplex=true): skipped here (handled by npx in entrypoint)
93
+ * For DB-backed complex skills: writes SKILL.md plus bundled skill_files rows.
94
+ * Legacy complex skills without skill_files remain handled by npx in entrypoint.
37
95
  */
38
96
  export function syncSkillsToFilesystem(
39
97
  agentId: string,
@@ -44,6 +102,7 @@ export function syncSkillsToFilesystem(
44
102
  const home = homeOverride ?? homedir();
45
103
  const errors: string[] = [];
46
104
  let synced = 0;
105
+ let removed = 0;
47
106
 
48
107
  // Directories to write to
49
108
  const skillDirs: string[] = [];
@@ -67,7 +126,8 @@ export function syncSkillsToFilesystem(
67
126
 
68
127
  for (const skill of skills) {
69
128
  if (!skill.isActive || !skill.isEnabled) continue;
70
- if (skill.isComplex) continue; // Complex skills handled by npx
129
+ const bundledFiles = skill.isComplex ? getSkillFiles(skill.id) : [];
130
+ if (skill.isComplex && bundledFiles.length === 0) continue; // Legacy complex skills handled by npx
71
131
  if (!skill.content) continue;
72
132
 
73
133
  // Sanitize skill name to prevent path traversal (strip /, .., and non-safe chars)
@@ -75,6 +135,9 @@ export function syncSkillsToFilesystem(
75
135
  if (!safeName) continue;
76
136
 
77
137
  writtenNames.add(safeName);
138
+ const currentBundledFilePaths = new Set(
139
+ bundledFiles.filter((file) => !file.isBinary).map((file) => file.path),
140
+ );
78
141
 
79
142
  for (const baseDir of skillDirs) {
80
143
  const skillDir = join(baseDir, safeName);
@@ -83,14 +146,36 @@ export function syncSkillsToFilesystem(
83
146
 
84
147
  try {
85
148
  mkdirSync(skillDir, { recursive: true });
149
+ removed += reconcileManagedSkillFiles(skillDir, currentBundledFilePaths);
86
150
  writeFileSync(skillFile, skill.content, "utf-8");
87
151
  writeFileSync(markerFile, "", "utf-8");
88
152
  synced++;
89
153
  } catch (err) {
90
- errors.push(
91
- `${skill.name} -> ${skillDir}: ${err instanceof Error ? err.message : "Unknown error"}`,
154
+ const msg = err instanceof Error ? err.message : "Unknown error";
155
+ errors.push(`${skill.name} -> ${skillDir}: ${msg}`);
156
+ console.error(
157
+ `[skill-sync] Failed to write SKILL.md for ${skill.name} to ${skillDir}: ${msg}`,
92
158
  );
93
159
  }
160
+
161
+ for (const file of bundledFiles) {
162
+ if (file.isBinary) {
163
+ console.log(`[skill-sync] Skipping binary skill file ${skill.name}/${file.path}`);
164
+ continue;
165
+ }
166
+
167
+ const targetPath = join(skillDir, file.path);
168
+ try {
169
+ mkdirSync(dirname(targetPath), { recursive: true });
170
+ writeFileSync(targetPath, file.content, "utf-8");
171
+ } catch (err) {
172
+ const msg = err instanceof Error ? err.message : "Unknown error";
173
+ errors.push(`${skill.name}/${file.path} -> ${targetPath}: ${msg}`);
174
+ console.error(
175
+ `[skill-sync] Failed to write bundled file ${skill.name}/${file.path} to ${targetPath}: ${msg}`,
176
+ );
177
+ }
178
+ }
94
179
  }
95
180
  }
96
181
 
@@ -98,7 +183,6 @@ export function syncSkillsToFilesystem(
98
183
  // present). Leaves user-installed personal skills alone — important on
99
184
  // local dev where ~/.codex/skills holds skills the user installed
100
185
  // outside the swarm.
101
- let removed = 0;
102
186
  for (const baseDir of skillDirs) {
103
187
  if (!existsSync(baseDir)) continue;
104
188
 
@@ -3127,6 +3127,7 @@ async function checkCompletedProcesses(
3127
3127
  state: RunnerState,
3128
3128
  role: string,
3129
3129
  apiConfig?: ApiConfig,
3130
+ cancelledSignaled?: Set<string>,
3130
3131
  ): Promise<void> {
3131
3132
  const completedTasks: Array<{
3132
3133
  taskId: string;
@@ -3161,6 +3162,9 @@ async function checkCompletedProcesses(
3161
3162
  // Remove completed tasks from the map and ensure they're marked as finished
3162
3163
  for (const { taskId, result, cursorUpdates, workingDir, credentialInfo } of completedTasks) {
3163
3164
  state.activeTasks.delete(taskId);
3165
+ vcsDetectedTasks.delete(taskId);
3166
+ vcsCheckTimestamps.delete(taskId);
3167
+ cancelledSignaled?.delete(taskId);
3164
3168
 
3165
3169
  if (apiConfig) {
3166
3170
  removeActiveSession(apiConfig, taskId);
@@ -4122,7 +4126,7 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
4122
4126
 
4123
4127
  // Wait if at capacity (though unlikely on fresh startup)
4124
4128
  while (state.activeTasks.size >= state.maxConcurrent) {
4125
- await checkCompletedProcesses(state, role, apiConfig);
4129
+ await checkCompletedProcesses(state, role, apiConfig, cancelledSignaled);
4126
4130
  await Bun.sleep(1000);
4127
4131
  }
4128
4132
 
@@ -4341,7 +4345,7 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
4341
4345
  await pingServer(apiConfig, role);
4342
4346
 
4343
4347
  // Check for completed processes first and ensure tasks are marked as finished
4344
- await checkCompletedProcesses(state, role, apiConfig);
4348
+ await checkCompletedProcesses(state, role, apiConfig, cancelledSignaled);
4345
4349
 
4346
4350
  // Live HARNESS_PROVIDER reconciliation. Re-fetches `swarm_config` (overlaid
4347
4351
  // on env) and swaps the adapter if the resolved provider changed —
@@ -27,17 +27,19 @@ Goal: Review system status and your standing orders, take action if needed.
27
27
 
28
28
  ## Instructions
29
29
  1. **Read your HEARTBEAT.md** — run \`read /workspace/HEARTBEAT.md\` to get the latest standing orders (the snapshot above may be slightly stale).
30
- 2. Review the system status above for anything that needs attention (stalled tasks, idle workers with available work, anomalies).
31
- 3. **CRITICAL Reboot failure triage:** Failures with reason "worker session not found" or "worker session heartbeat is stale" indicate tasks that were INTERRUPTED by a server restart. These are NOT "expected auto-cleanup" they represent work that was lost mid-execution. For each such failure:
30
+ 2. **Prune tracked HEARTBEAT items FIRST.** Active Blockers + Watch Items + Open Discussion combined must stay at ≤10 items; 20 is the absolute max only when genuinely super busy. This cap does not apply to evergreen Standing Orders / Governance / Playbook-index reference sections. Before adding anything, re-check every tracked item against its lift trigger and remove anything resolved, stale, or past its trigger date. Lift incident detail to memory; never keep it inline in HEARTBEAT.md. Every new tracked item must include an explicit lift trigger + date; if it has no removal condition, do not add it. If the tracked list is already at/over cap, prune before (or instead of) adding — the cap is binding.
31
+ 3. **Run seeded heartbeat data-gathering.** Use \`script-run\` with global script \`Heartbeat Audit\` and pass the current HEARTBEAT.md text as \`heartbeatMarkdown\`. It covers Rules #10/#13/#15/#16/#17: resolved stale PRs, pool-target risk schedules, schedule/provider failure clusters, and whether daily-blocker-digest ran today. The Slack thread-reply check (Rule #11) stays Lead-side; the script runtime has no Slack token.
32
+ 4. Review the system status above plus the \`Heartbeat Audit\` result for anything that needs attention (stalled tasks, idle workers with available work, anomalies).
33
+ 5. **CRITICAL — Reboot failure triage:** Failures with reason "worker session not found" or "worker session heartbeat is stale" indicate tasks that were INTERRUPTED by a server restart. These are NOT "expected auto-cleanup" — they represent work that was lost mid-execution. For each such failure:
32
34
  - Check what the task was (via \`get-task-details\` with the task ID from the failure)
33
35
  - If a retry task was auto-created (tagged \`reboot-retry\`), verify it is progressing
34
36
  - If no retry exists and the work is still needed, re-create the task
35
37
  - Do NOT dismiss these as "expected" or "auto-cleanup"
36
- 4. Review your standing orders for any periodic checks or actions.
37
- 5. If something needs attention — take action now using your available tools (create tasks, post to Slack, cancel stuck tasks, etc.).
38
- 6. If everything looks healthy and no standing orders are actionable — complete this task with a brief "All clear" summary. You may NOT say "All clear" if reboot-related failures exist that haven't been triaged.
39
- 7. Do NOT create another heartbeat-checklist task — the system handles scheduling.
40
- 8. **Update your standing orders** After every heartbeat check, edit \`/workspace/HEARTBEAT.md\` directly. Add new patterns you noticed (recurring failures, workers needing attention), remove resolved items. This is your live operational runbook keep it current.`,
38
+ 6. Review your standing orders for any periodic checks or actions.
39
+ 7. If something needs attention — take action now using your available tools (create tasks, post to Slack, cancel stuck tasks, etc.).
40
+ 8. If everything looks healthy and no standing orders are actionable — complete this task with a brief "All clear" summary. You may NOT say "All clear" if reboot-related failures exist that haven't been triaged.
41
+ 9. Do NOT create another heartbeat-checklist task — the system handles scheduling.
42
+ 10. **Update HEARTBEAT.md only after pruning.** Keep it current, but keep tracked items capped: remove resolved items, add only dated lift-triggered items, and lift detail to memory instead of growing the file.`,
41
43
  variables: [
42
44
  {
43
45
  name: "system_status",
@@ -73,22 +75,24 @@ The API server has just restarted (deployment, pod rotation, or crash). An aggre
73
75
  {{heartbeat_content}}
74
76
 
75
77
  ## Instructions
76
- 1. **Triage reboot-interrupted work FIRST.** If the "Reboot-Interrupted Work" section above lists tasks:
78
+ 1. **Prune tracked HEARTBEAT items FIRST.** Active Blockers + Watch Items + Open Discussion combined must stay at ≤10 items; 20 is the absolute max only when genuinely super busy. This cap does not apply to evergreen Standing Orders / Governance / Playbook-index reference sections. Before adding anything, re-check every tracked item against its lift trigger and remove anything resolved, stale, or past its trigger date. Lift incident detail to memory; never keep it inline in HEARTBEAT.md. Every new tracked item must include an explicit lift trigger + date; if it has no removal condition, do not add it. If the tracked list is already at/over cap, prune before (or instead of) adding — the cap is binding.
79
+ 2. **Run seeded boot triage.** Use \`script-run\` with global script \`boot-triage\` to gather deploy-restart PR context, recent real failures, stuck offline-agent work, orphaned pending/offered tasks, and superseded tasks missing resume children in one read-only call.
80
+ 3. **Triage reboot-interrupted work FIRST.** If the "Reboot-Interrupted Work" section above or the \`boot-triage\` result lists tasks:
77
81
  - For each task: verify the retry is progressing via \`get-task-details\` with the retry task ID
78
82
  - If a retry failed or is stuck, re-create the task manually
79
83
  - If the work is no longer needed, cancel the retry task
80
84
  - You MUST address every item — do NOT skip this section
81
- 2. **Verify supersede + resume worked end-to-end.** Worker crashes / OOMs are recovered via supersede (parent → \`superseded\`) + a fresh \`taskType=resume\` child created by the heartbeat sweep (DES-523). Sanity check:
85
+ 4. **Verify supersede + resume worked end-to-end.** Worker crashes / OOMs are recovered via supersede (parent → \`superseded\`) + a fresh \`taskType=resume\` child created by the heartbeat sweep (DES-523). Sanity check:
82
86
  - List recent \`superseded\` tasks: \`list-tasks status=superseded\` (last ~hour).
83
87
  - For each, confirm a child task with \`taskType=resume\` and a non-terminal status exists. If a superseded task is missing its resume child, the work is silently dropped — recreate the task manually.
84
88
  - Look for \`in_progress\` tasks older than 5 min on agents that show as offline — the sweep should have caught them. If any remain, recreate as needed.
85
- 3. **Check orphaned tasks.** If the "Orphaned Tasks" section lists pending/offered tasks assigned to offline workers, re-assign or cancel them.
86
- 4. Review agent status — are all expected workers online? If not, note which are missing.
87
- 5. Review your standing orders for any post-reboot checks.
88
- 6. Take action using your available tools.
89
- 7. Complete this task with a summary of what you found and what actions you took. Include the status of each reboot-interrupted task.
90
- 8. Do NOT create another boot-triage task — this is a one-off event.
91
- 9. **Update your standing orders** If the reboot revealed a pattern worth monitoring (e.g., frequent restarts, specific tasks that keep failing), add a standing order to HEARTBEAT.md via \`update-profile\` with \`heartbeatMd\`.`,
89
+ 5. **Check orphaned tasks.** If the "Orphaned Tasks" section or \`boot-triage\` result lists pending/offered tasks assigned to offline workers, re-assign or cancel them.
90
+ 6. Review agent status — are all expected workers online? If not, note which are missing.
91
+ 7. Review your standing orders for any post-reboot checks.
92
+ 8. Take action using your available tools.
93
+ 9. Complete this task with a summary of what you found and what actions you took. Include the status of each reboot-interrupted task.
94
+ 10. Do NOT create another boot-triage task — this is a one-off event.
95
+ 11. **Update HEARTBEAT.md only after pruning.** If the reboot revealed a pattern worth monitoring, add it only as a dated lift-triggered tracked item and only if the cap still holds after pruning. Lift incident detail to memory instead of growing HEARTBEAT.md.`,
92
96
  variables: [
93
97
  {
94
98
  name: "system_status",
package/src/http/index.ts CHANGED
@@ -43,12 +43,17 @@ import { handleHeartbeat } from "./heartbeat";
43
43
  import { handleInboxState } from "./inbox-state";
44
44
  import { handleIntegrations } from "./integrations";
45
45
  import { handleKv } from "./kv";
46
- import { handleMcp } from "./mcp";
46
+ import {
47
+ closeIdleMcpTransports,
48
+ DEFAULT_MCP_TRANSPORT_IDLE_TIMEOUT_MS,
49
+ handleMcp,
50
+ type McpTransportActivity,
51
+ } from "./mcp";
47
52
  import { handleMcpBridge } from "./mcp-bridge";
48
53
  import { handleMcpOAuth, startMcpOAuthPendingGc, stopMcpOAuthPendingGc } from "./mcp-oauth";
49
54
  import { handleMcpServers } from "./mcp-servers";
50
- import { handleMcpUser } from "./mcp-user";
51
- import { handleMemory } from "./memory";
55
+ import { closeIdleMcpUserTransports, handleMcpUser } from "./mcp-user";
56
+ import { handleMemory, startMemoryGc, stopMemoryGc } from "./memory";
52
57
  import { handleMetrics } from "./metrics";
53
58
  import { handlePageProxy } from "./page-proxy";
54
59
  import { handlePages } from "./pages";
@@ -99,12 +104,15 @@ const globalState = globalThis as typeof globalThis & {
99
104
  __transports?: Record<string, StreamableHTTPServerTransport>;
100
105
  __transportsUser?: Record<string, StreamableHTTPServerTransport>;
101
106
  __sessionUsers?: Record<string, string>;
107
+ __transportActivity?: McpTransportActivity;
108
+ __transportActivityUser?: McpTransportActivity;
102
109
  __sigintRegistered?: boolean;
103
110
  __apiGcInterval?: ReturnType<typeof setInterval>;
104
111
  __runId?: string;
105
112
  };
106
113
 
107
114
  const API_GC_INTERVAL_MS = 5 * 60 * 1000;
115
+ const MCP_TRANSPORT_IDLE_TIMEOUT_MS = DEFAULT_MCP_TRANSPORT_IDLE_TIMEOUT_MS;
108
116
 
109
117
  type GcCapableGlobal = typeof globalThis & { gc?: () => void };
110
118
 
@@ -130,11 +138,25 @@ function startApiGcInterval() {
130
138
 
131
139
  const gc = (globalThis as GcCapableGlobal).gc;
132
140
  if (typeof gc !== "function") {
133
- console.log("[HTTP] Explicit GC unavailable; start API with --expose-gc to enable sweeps");
134
- return;
141
+ console.log("[HTTP] Explicit GC unavailable; idle MCP transport sweeps remain enabled");
135
142
  }
136
143
 
137
144
  const interval = setInterval(() => {
145
+ const closedOwnerTransports = closeIdleMcpTransports(transports, transportActivity, {
146
+ idleTimeoutMs: MCP_TRANSPORT_IDLE_TIMEOUT_MS,
147
+ label: "MCP",
148
+ });
149
+ const closedUserTransports = closeIdleMcpUserTransports(
150
+ transportsUser,
151
+ sessionUsers,
152
+ transportActivityUser,
153
+ { idleTimeoutMs: MCP_TRANSPORT_IDLE_TIMEOUT_MS },
154
+ );
155
+ if (closedOwnerTransports > 0 || closedUserTransports > 0) {
156
+ console.log(
157
+ `[HTTP] Closed ${closedOwnerTransports} owner MCP and ${closedUserTransports} user MCP idle transport(s)`,
158
+ );
159
+ }
138
160
  scheduleApiGc("periodic API sweep");
139
161
  }, API_GC_INTERVAL_MS);
140
162
  interval.unref?.();
@@ -151,6 +173,8 @@ const transports: Record<string, StreamableHTTPServerTransport> = globalState.__
151
173
  const transportsUser: Record<string, StreamableHTTPServerTransport> =
152
174
  globalState.__transportsUser ?? {};
153
175
  const sessionUsers: Record<string, string> = globalState.__sessionUsers ?? {};
176
+ const transportActivity: McpTransportActivity = globalState.__transportActivity ?? {};
177
+ const transportActivityUser: McpTransportActivity = globalState.__transportActivityUser ?? {};
154
178
 
155
179
  const httpServer = createHttpServer(async (req, res) => {
156
180
  const startTime = performance.now();
@@ -291,8 +315,8 @@ const httpServer = createHttpServer(async (req, res) => {
291
315
  () => handleSessions(req, res, pathSegments, queryParams),
292
316
  () => handleInboxState(req, res, pathSegments, queryParams),
293
317
  () => handleTaskTemplates(req, res, pathSegments, queryParams),
294
- () => handleMcp(req, res, transports),
295
- () => handleMcpUser(req, res, transportsUser, sessionUsers),
318
+ () => handleMcp(req, res, transports, transportActivity),
319
+ () => handleMcpUser(req, res, transportsUser, sessionUsers, transportActivityUser),
296
320
  ];
297
321
 
298
322
  try {
@@ -334,6 +358,8 @@ globalState.__httpServer = httpServer;
334
358
  globalState.__transports = transports;
335
359
  globalState.__transportsUser = transportsUser;
336
360
  globalState.__sessionUsers = sessionUsers;
361
+ globalState.__transportActivity = transportActivity;
362
+ globalState.__transportActivityUser = transportActivityUser;
337
363
 
338
364
  async function shutdown() {
339
365
  console.log("Shutting down HTTP server...");
@@ -362,6 +388,9 @@ async function shutdown() {
362
388
  // Stop MCP OAuth pending-session garbage collector
363
389
  stopMcpOAuthPendingGc();
364
390
 
391
+ // Stop memory expired-row garbage collector
392
+ stopMemoryGc();
393
+
365
394
  if (globalState.__apiGcInterval) {
366
395
  clearInterval(globalState.__apiGcInterval);
367
396
  delete globalState.__apiGcInterval;
@@ -372,6 +401,7 @@ async function shutdown() {
372
401
  console.log(`[HTTP] Closing transport ${id}`);
373
402
  transport.close();
374
403
  delete transports[id];
404
+ delete transportActivity[id];
375
405
  }
376
406
 
377
407
  for (const [id, transport] of Object.entries(transportsUser)) {
@@ -379,6 +409,7 @@ async function shutdown() {
379
409
  transport.close();
380
410
  delete transportsUser[id];
381
411
  delete sessionUsers[id];
412
+ delete transportActivityUser[id];
382
413
  }
383
414
 
384
415
  // Close all active connections forcefully
@@ -522,6 +553,18 @@ httpServer
522
553
 
523
554
  // Start MCP OAuth pending-session garbage collector (5-min tick)
524
555
  startMcpOAuthPendingGc();
556
+
557
+ // Start expired-memory garbage collector (1-hour tick, immediate first run)
558
+ startMemoryGc();
559
+
560
+ // Background backfill: re-embed any agent_memory rows with wrong-dimension
561
+ // embeddings (e.g. 1536d instead of 512d). Non-blocking, idempotent, no-op
562
+ // when the DB is clean. See src/be/memory/boot-reembed.ts.
563
+ import("../be/memory/boot-reembed")
564
+ .then(({ runBootReembed }) => runBootReembed())
565
+ .catch((err) => {
566
+ console.error("[boot-reembed] startup backfill failed (non-fatal):", err);
567
+ });
525
568
  })
526
569
  .on("error", (err) => {
527
570
  console.error("HTTP Server Error:", err);
@@ -5,6 +5,7 @@ import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js";
5
5
  import { resolveUserByToken } from "@/be/users";
6
6
  import { createUserServer } from "@/server-user";
7
7
  import type { User } from "@/types";
8
+ import { closeIdleMcpTransports, type McpTransportActivity, markMcpTransportActivity } from "./mcp";
8
9
 
9
10
  function unauthorized(res: ServerResponse): true {
10
11
  res.writeHead(401, { "Content-Type": "application/json" });
@@ -32,6 +33,7 @@ export async function handleMcpUser(
32
33
  res: ServerResponse,
33
34
  transports: Record<string, StreamableHTTPServerTransport>,
34
35
  sessionUsers: Record<string, string>,
36
+ sessionActivity: McpTransportActivity = {},
35
37
  ): Promise<boolean> {
36
38
  const sessionId = req.headers["mcp-session-id"] as string | undefined;
37
39
 
@@ -57,16 +59,19 @@ export async function handleMcpUser(
57
59
 
58
60
  if (sessionId && transports[sessionId]) {
59
61
  transport = transports[sessionId];
62
+ markMcpTransportActivity(sessionActivity, sessionId);
60
63
  } else if (!sessionId && isInitializeRequest(body)) {
61
64
  transport = new StreamableHTTPServerTransport({
62
65
  sessionIdGenerator: () => randomUUID(),
63
66
  onsessioninitialized: (id) => {
64
67
  transports[id] = transport;
65
68
  sessionUsers[id] = user.id;
69
+ markMcpTransportActivity(sessionActivity, id);
66
70
  },
67
71
  onsessionclosed: (id) => {
68
72
  delete transports[id];
69
73
  delete sessionUsers[id];
74
+ delete sessionActivity[id];
70
75
  },
71
76
  });
72
77
 
@@ -74,6 +79,7 @@ export async function handleMcpUser(
74
79
  if (transport.sessionId) {
75
80
  delete transports[transport.sessionId];
76
81
  delete sessionUsers[transport.sessionId];
82
+ delete sessionActivity[transport.sessionId];
77
83
  }
78
84
  };
79
85
 
@@ -92,11 +98,13 @@ export async function handleMcpUser(
92
98
  }
93
99
 
94
100
  await transport.handleRequest(req, res, body);
101
+ markMcpTransportActivity(sessionActivity, transport.sessionId);
95
102
  return true;
96
103
  }
97
104
 
98
105
  if (req.method === "GET" || req.method === "DELETE") {
99
106
  if (sessionId && transports[sessionId]) {
107
+ markMcpTransportActivity(sessionActivity, sessionId);
100
108
  await transports[sessionId].handleRequest(req, res);
101
109
  return true;
102
110
  }
@@ -109,3 +117,18 @@ export async function handleMcpUser(
109
117
  res.end("Method not allowed");
110
118
  return true;
111
119
  }
120
+
121
+ export function closeIdleMcpUserTransports(
122
+ transports: Record<string, StreamableHTTPServerTransport>,
123
+ sessionUsers: Record<string, string>,
124
+ sessionActivity: McpTransportActivity,
125
+ options: { now?: number; idleTimeoutMs?: number } = {},
126
+ ): number {
127
+ return closeIdleMcpTransports(transports, sessionActivity, {
128
+ ...options,
129
+ label: "user MCP",
130
+ onClose: (id) => {
131
+ delete sessionUsers[id];
132
+ },
133
+ });
134
+ }