npm - @desplega.ai/agent-swarm - Versions diffs - 1.92.0 → 1.92.2 - Mend

@desplega.ai/agent-swarm 1.92.0 → 1.92.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/README.md +1 -1
package/openapi.json +276 -3
package/package.json +6 -6
package/plugin/skills/pages/SKILL.md +5 -2
package/src/be/db.ts +416 -20
package/src/be/memory/boot-reembed.ts +85 -0
package/src/be/memory/constants.ts +44 -2
package/src/be/memory/providers/openai-embedding.ts +15 -5
package/src/be/memory/providers/sqlite-store.ts +325 -76
package/src/be/memory/reranker.ts +35 -17
package/src/be/memory/types.ts +43 -0
package/src/be/migrations/084_script_run_journal_duration.sql +5 -0
package/src/be/migrations/085_script_runs_kind.sql +9 -0
package/src/be/migrations/086_pages_default_authed.sql +64 -0
package/src/be/migrations/087_skill_files.sql +19 -0
package/src/be/modelsdev-cache.json +5622 -2543
package/src/be/seed-scripts/catalog/boot-triage.ts +221 -0
package/src/be/seed-scripts/catalog/catalog-report.ts +457 -0
package/src/be/seed-scripts/catalog/compound-insights.ts +465 -0
package/src/be/seed-scripts/catalog/gh-pr-snapshot.ts +1 -1
package/src/be/seed-scripts/catalog/memory-eval.ts +1059 -0
package/src/be/seed-scripts/catalog/ops-catalog-audit.ts +34 -439
package/src/be/seed-scripts/catalog/schedule-health.ts +78 -2
package/src/be/seed-scripts/catalog/task-failure-audit.ts +48 -1
package/src/be/seed-scripts/index.ts +32 -4
package/src/be/seed-skills/index.ts +0 -7
package/src/be/skill-sync.ts +91 -7
package/src/commands/runner.ts +6 -2
package/src/heartbeat/templates.ts +20 -16
package/src/http/index.ts +50 -7
package/src/http/mcp-user.ts +23 -0
package/src/http/mcp.ts +58 -0
package/src/http/memory.ts +62 -0
package/src/http/pages.ts +1 -1
package/src/http/script-runs.ts +2 -0
package/src/http/scripts.ts +39 -2
package/src/http/skills.ts +225 -0
package/src/providers/claude-adapter.ts +56 -24
package/src/script-workflows/workflow-ctx.ts +7 -3
package/src/scripts-runtime/sdk-allowlist.ts +1 -0
package/src/scripts-runtime/swarm-sdk.ts +13 -0
package/src/scripts-runtime/types/stdlib.d.ts +1 -0
package/src/scripts-runtime/types/swarm-sdk.d.ts +1 -0
package/src/server.ts +2 -0
package/src/tasks/worker-follow-up.ts +12 -0
package/src/tests/claude-adapter-binary.test.ts +135 -81
package/src/tests/create-page-tool.test.ts +19 -2
package/src/tests/heartbeat-checklist.test.ts +36 -0
package/src/tests/mcp-transport-gc.test.ts +58 -0
package/src/tests/memory-e2e.test.ts +6 -6
package/src/tests/memory-health-endpoint.test.ts +78 -0
package/src/tests/memory-rater-e2e.test.ts +4 -5
package/src/tests/memory-reranker.test.ts +135 -124
package/src/tests/memory-store.test.ts +221 -1
package/src/tests/memory.test.ts +13 -12
package/src/tests/pages-http.test.ts +20 -2
package/src/tests/pages-storage.test.ts +26 -0
package/src/tests/scripts-mcp-e2e.test.ts +53 -0
package/src/tests/seed-scripts.test.ts +328 -3
package/src/tests/skill-files-http.test.ts +171 -0
package/src/tests/skill-files.test.ts +162 -0
package/src/tests/skill-get-file-tool.test.ts +110 -0
package/src/tests/skill-sync.test.ts +125 -6
package/src/tests/task-cascade-fail.test.ts +304 -0
package/src/tools/create-page.ts +2 -2
package/src/tools/skills/index.ts +1 -0
package/src/tools/skills/skill-get-file.ts +80 -0
package/src/tools/tool-config.ts +2 -1
package/src/types.ts +20 -0
package/src/utils/internal-ai/complete-structured.ts +2 -2
package/templates/schedules/daily-blocker-digest/content.md +68 -54
package/templates/schedules/daily-compounding-reflection/content.md +4 -4
package/templates/schedules/daily-hn-briefing/content.md +5 -5
package/templates/schedules/daily-workflow-health-audit/content.md +6 -6
package/templates/schedules/gtm-weekly-review/content.md +9 -9
package/templates/schedules/weekly-dependabot-triage/content.md +24 -20
package/templates/skills/agentmail-sending/content.md +6 -7
package/templates/skills/desloppify/content.md +8 -9
package/templates/skills/jira-interaction/content.md +25 -33
package/templates/skills/kapso-whatsapp/content.md +29 -30
package/templates/skills/linear-interaction/content.md +8 -9
package/templates/skills/profile-corruption-escalation/content.md +44 -85
package/templates/skills/sprite-cli/content.md +4 -5
package/templates/skills/turso-interaction/content.md +14 -17
package/templates/skills/workflow-iterate/content.md +38 -391
package/templates/skills/x-api-interactions/content.md +4 -6
package/templates/workflows/llm-safe-release-context/config.json +13 -0
package/templates/workflows/llm-safe-release-context/content.md +69 -0
package/templates/skills/scheduled-task-resilience/config.json +0 -14
package/templates/skills/scheduled-task-resilience/content.md +0 -95

package/src/be/seed-scripts/index.ts CHANGED Viewed

@@ -22,6 +22,9 @@ import { getScript, upsertScriptByName } from "../scripts/db";
 import { extractArgsJsonSchema } from "../scripts/extract-schema";
 import { typecheckScript } from "../scripts/typecheck";
 import type { Seeder, SeedItem } from "../seed/types";
+import bootTriageSrc from "./catalog/boot-triage.ts" with { type: "text" };
+// @ts-expect-error Bun text imports return the raw source string for bundling standalone scripts.
+import catalogReportSrc from "./catalog/catalog-report.ts" with { type: "text" };
 import compoundInsightsSrc from "./catalog/compound-insights.ts" with { type: "text" };
 import dateResolveSrc from "./catalog/date-resolve.ts" with { type: "text" };
 import fetchReadableSrc from "./catalog/fetch-readable.ts" with { type: "text" };
@@ -30,6 +33,7 @@ import groupCountSrc from "./catalog/group-count.ts" with { type: "text" };
 import jsonQuerySrc from "./catalog/json-query.ts" with { type: "text" };
 import linearIssueSrc from "./catalog/linear-issue.ts" with { type: "text" };
 import memoryDedupCheckSrc from "./catalog/memory-dedup-check.ts" with { type: "text" };
+import memoryEvalSrc from "./catalog/memory-eval.ts" with { type: "text" };
 import opsCatalogAuditSrc from "./catalog/ops-catalog-audit.ts" with { type: "text" };
 import scheduleHealthSrc from "./catalog/schedule-health.ts" with { type: "text" };
 import slackThreadFlattenSrc from "./catalog/slack-thread-flatten.ts" with { type: "text" };
@@ -50,6 +54,14 @@ export type SeedScript = {
 // module's default export, so the cast restores the real shape.
 const asText = (s: unknown): string => s as string;
+const CATALOG_REPORT_IMPORT_RE = /^import\s+\{[^}]*\}\s+from "\.\/catalog-report";\n\n?/m;
+function bundleCatalogReport(source: string): string {
+  const helper = asText(catalogReportSrc);
+  if (!CATALOG_REPORT_IMPORT_RE.test(source)) return source;
+  return `${helper}\n\n${source.replace(CATALOG_REPORT_IMPORT_RE, "")}`;
+}
 export const SEED_SCRIPTS: SeedScript[] = [
   {
     name: "gh-pr-snapshot",
@@ -103,7 +115,7 @@ export const SEED_SCRIPTS: SeedScript[] = [
       "Scan recently failed swarm tasks and cluster them by failure reason, agent or schedule to surface recurring problems.",
     intent:
       "Find patterns in swarm task failures — which agent, schedule or error keeps breaking — for a reliability review.",
-    source: asText(taskFailureAuditSrc),
+    source: bundleCatalogReport(asText(taskFailureAuditSrc)),
   },
   {
     name: "memory-dedup-check",
@@ -150,7 +162,7 @@ export const SEED_SCRIPTS: SeedScript[] = [
       "Per-schedule failure rate check over recent tasks — flags schedules with failure rates above a configurable threshold.",
     intent:
       "Find unhealthy schedules that keep failing — for daily compounding, reliability reviews, or ops triage.",
-    source: asText(scheduleHealthSrc),
+    source: bundleCatalogReport(asText(scheduleHealthSrc)),
   },
   {
     name: "tool-usage",
@@ -166,7 +178,15 @@ export const SEED_SCRIPTS: SeedScript[] = [
       "All-in-one swarm-wide daily ops snapshot: task completion/failure summary, real failure clusters (excludes superseded/cancelled bookkeeping), schedule health flags, tool usage top-25, memory health/pollution stats, seed-script candidate tool triplets, and a per-agent breakdown. Aggregates across ALL agents via direct read-only SQL.",
     intent:
       "Single-call daily compounding Phase 0 helper — replaces ~25 raw tool roundtrips with one compressed JSON result covering every agent. For daily evolution, self-scripting candidates, ops reviews, or heartbeat context.",
-    source: asText(compoundInsightsSrc),
+    source: bundleCatalogReport(asText(compoundInsightsSrc)),
+  },
+  {
+    name: "memory-eval",
+    description:
+      "3-axis memory quality evaluation: carry-forward context (do follow-up tasks retrieve useful memories from prior tasks?), follow preferences (are CLAUDE.md/IDENTITY.md/SOUL.md/TOOLS.md memories retrieved and useful?), and stay current (what fraction of retrieved memories are fresh vs stale?). Outputs a baseline report to agent-fs + a swarm Page.",
+    intent:
+      "Measure memory system health across OpenAI Dreaming-inspired axes — before/after baseline for architecture changes, blog-post numbers, daily quality monitoring.",
+    source: asText(memoryEvalSrc),
   },
   {
     name: "ops-catalog-audit",
@@ -174,7 +194,15 @@ export const SEED_SCRIPTS: SeedScript[] = [
       "Audit-as-code catalog check for schedules, workflows, and prompt/template drift. Clusters actionable findings by goal and can publish an authed HTML report page.",
     intent:
       "Re-run the ops inventory audit in one call: duplicate/dead schedules, code-work routing risks, enabled workflow fixtures, structured-output gate gaps, prompt registry drift, stale hosts, and systemDefault skill duplicates.",
-    source: asText(opsCatalogAuditSrc),
+    source: bundleCatalogReport(asText(opsCatalogAuditSrc)),
+  },
+  {
+    name: "boot-triage",
+    description:
+      "Post-restart heartbeat triage snapshot: deploy restart PR context, recent real failures, stuck offline-agent work, orphaned tasks, and superseded tasks missing resume children.",
+    intent:
+      "Run immediately after a swarm restart to gather deterministic boot triage data in one read-only call before the Lead decides what to retry, cancel, or escalate.",
+    source: asText(bootTriageSrc),
   },
 ];

package/src/be/seed-skills/index.ts CHANGED Viewed

@@ -25,12 +25,6 @@ import kvStorageContent from "../../../templates/skills/kv-storage/content.md" w
 };
 import pagesConfig from "../../../templates/skills/pages/config.json" with { type: "text" };
 import pagesContent from "../../../templates/skills/pages/content.md" with { type: "text" };
-import scheduledTaskResilienceConfig from "../../../templates/skills/scheduled-task-resilience/config.json" with {
-  type: "text",
-};
-import scheduledTaskResilienceContent from "../../../templates/skills/scheduled-task-resilience/content.md" with {
-  type: "text",
-};
 import scriptWorkflowsConfig from "../../../templates/skills/script-workflows/config.json" with {
   type: "text",
 };
@@ -77,7 +71,6 @@ const BUILT_IN_SKILL_SOURCES = [
   { config: artifactsConfig, body: artifactsContent },
   { config: kvStorageConfig, body: kvStorageContent },
   { config: pagesConfig, body: pagesContent },
-  { config: scheduledTaskResilienceConfig, body: scheduledTaskResilienceContent },
   { config: scriptWorkflowsConfig, body: scriptWorkflowsContent },
   { config: swarmScriptsConfig, body: swarmScriptsContent },
   { config: workflowIterateConfig, body: workflowIterateContent },

package/src/be/skill-sync.ts CHANGED Viewed

@@ -8,10 +8,11 @@
  * This runs on the API side — workers call it via POST /api/skills/sync-filesystem.
  */
+import type { Dirent } from "node:fs";
 import { existsSync, mkdirSync, readdirSync, rmSync, writeFileSync } from "node:fs";
 import { homedir } from "node:os";
-import { join } from "node:path";
-import { getAgentSkills } from "./db";
+import { dirname, join } from "node:path";
+import { getAgentSkills, getSkillFiles } from "./db";
 export interface SkillSyncResult {
   synced: number;
@@ -29,11 +30,68 @@ export interface SkillSyncResult {
  */
 const SWARM_MARKER_FILE = ".swarm-managed";
+function reconcileManagedSkillFiles(skillDir: string, currentRelativeFiles: Set<string>): number {
+  if (!existsSync(join(skillDir, SWARM_MARKER_FILE))) return 0;
+  let removed = 0;
+  const walk = (dir: string, relativeDir = ""): boolean => {
+    let entries: Dirent[];
+    try {
+      entries = readdirSync(dir, { withFileTypes: true });
+    } catch {
+      return false;
+    }
+    let hasEntries = false;
+    for (const entry of entries) {
+      const relativePath = relativeDir ? `${relativeDir}/${entry.name}` : entry.name;
+      const fullPath = join(dir, entry.name);
+      if (entry.isDirectory()) {
+        const childHasEntries = walk(fullPath, relativePath);
+        if (!childHasEntries) {
+          try {
+            rmSync(fullPath, { recursive: true, force: true });
+          } catch {
+            hasEntries = true;
+          }
+        } else {
+          hasEntries = true;
+        }
+        continue;
+      }
+      if (
+        relativePath === "SKILL.md" ||
+        relativePath === SWARM_MARKER_FILE ||
+        currentRelativeFiles.has(relativePath)
+      ) {
+        hasEntries = true;
+        continue;
+      }
+      try {
+        rmSync(fullPath, { force: true });
+        removed++;
+      } catch {
+        hasEntries = true;
+      }
+    }
+    return hasEntries;
+  };
+  walk(skillDir);
+  return removed;
+}
 /**
  * Sync agent's installed skills to the filesystem.
  *
  * For simple skills (content in DB): writes SKILL.md to ~/.claude/skills/<name>/
- * For complex skills (isComplex=true): skipped here (handled by npx in entrypoint)
+ * For DB-backed complex skills: writes SKILL.md plus bundled skill_files rows.
+ * Legacy complex skills without skill_files remain handled by npx in entrypoint.
  */
 export function syncSkillsToFilesystem(
   agentId: string,
@@ -44,6 +102,7 @@ export function syncSkillsToFilesystem(
   const home = homeOverride ?? homedir();
   const errors: string[] = [];
   let synced = 0;
+  let removed = 0;
   // Directories to write to
   const skillDirs: string[] = [];
@@ -67,7 +126,8 @@ export function syncSkillsToFilesystem(
   for (const skill of skills) {
     if (!skill.isActive || !skill.isEnabled) continue;
-    if (skill.isComplex) continue; // Complex skills handled by npx
+    const bundledFiles = skill.isComplex ? getSkillFiles(skill.id) : [];
+    if (skill.isComplex && bundledFiles.length === 0) continue; // Legacy complex skills handled by npx
     if (!skill.content) continue;
     // Sanitize skill name to prevent path traversal (strip /, .., and non-safe chars)
@@ -75,6 +135,9 @@ export function syncSkillsToFilesystem(
     if (!safeName) continue;
     writtenNames.add(safeName);
+    const currentBundledFilePaths = new Set(
+      bundledFiles.filter((file) => !file.isBinary).map((file) => file.path),
+    );
     for (const baseDir of skillDirs) {
       const skillDir = join(baseDir, safeName);
@@ -83,14 +146,36 @@ export function syncSkillsToFilesystem(
       try {
         mkdirSync(skillDir, { recursive: true });
+        removed += reconcileManagedSkillFiles(skillDir, currentBundledFilePaths);
         writeFileSync(skillFile, skill.content, "utf-8");
         writeFileSync(markerFile, "", "utf-8");
         synced++;
       } catch (err) {
-        errors.push(
-          `${skill.name} -> ${skillDir}: ${err instanceof Error ? err.message : "Unknown error"}`,
+        const msg = err instanceof Error ? err.message : "Unknown error";
+        errors.push(`${skill.name} -> ${skillDir}: ${msg}`);
+        console.error(
+          `[skill-sync] Failed to write SKILL.md for ${skill.name} to ${skillDir}: ${msg}`,
         );
       }
+      for (const file of bundledFiles) {
+        if (file.isBinary) {
+          console.log(`[skill-sync] Skipping binary skill file ${skill.name}/${file.path}`);
+          continue;
+        }
+        const targetPath = join(skillDir, file.path);
+        try {
+          mkdirSync(dirname(targetPath), { recursive: true });
+          writeFileSync(targetPath, file.content, "utf-8");
+        } catch (err) {
+          const msg = err instanceof Error ? err.message : "Unknown error";
+          errors.push(`${skill.name}/${file.path} -> ${targetPath}: ${msg}`);
+          console.error(
+            `[skill-sync] Failed to write bundled file ${skill.name}/${file.path} to ${targetPath}: ${msg}`,
+          );
+        }
+      }
     }
   }
@@ -98,7 +183,6 @@ export function syncSkillsToFilesystem(
   // present). Leaves user-installed personal skills alone — important on
   // local dev where ~/.codex/skills holds skills the user installed
   // outside the swarm.
-  let removed = 0;
   for (const baseDir of skillDirs) {
     if (!existsSync(baseDir)) continue;

package/src/commands/runner.ts CHANGED Viewed

@@ -3127,6 +3127,7 @@ async function checkCompletedProcesses(
   state: RunnerState,
   role: string,
   apiConfig?: ApiConfig,
+  cancelledSignaled?: Set<string>,
 ): Promise<void> {
   const completedTasks: Array<{
     taskId: string;
@@ -3161,6 +3162,9 @@ async function checkCompletedProcesses(
   // Remove completed tasks from the map and ensure they're marked as finished
   for (const { taskId, result, cursorUpdates, workingDir, credentialInfo } of completedTasks) {
     state.activeTasks.delete(taskId);
+    vcsDetectedTasks.delete(taskId);
+    vcsCheckTimestamps.delete(taskId);
+    cancelledSignaled?.delete(taskId);
     if (apiConfig) {
       removeActiveSession(apiConfig, taskId);
@@ -4122,7 +4126,7 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
         // Wait if at capacity (though unlikely on fresh startup)
         while (state.activeTasks.size >= state.maxConcurrent) {
-          await checkCompletedProcesses(state, role, apiConfig);
+          await checkCompletedProcesses(state, role, apiConfig, cancelledSignaled);
           await Bun.sleep(1000);
         }
@@ -4341,7 +4345,7 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
     await pingServer(apiConfig, role);
     // Check for completed processes first and ensure tasks are marked as finished
-    await checkCompletedProcesses(state, role, apiConfig);
+    await checkCompletedProcesses(state, role, apiConfig, cancelledSignaled);
     // Live HARNESS_PROVIDER reconciliation. Re-fetches `swarm_config` (overlaid
     // on env) and swaps the adapter if the resolved provider changed —

package/src/heartbeat/templates.ts CHANGED Viewed

@@ -27,17 +27,19 @@ Goal: Review system status and your standing orders, take action if needed.
 ## Instructions
 1. **Read your HEARTBEAT.md** — run \`read /workspace/HEARTBEAT.md\` to get the latest standing orders (the snapshot above may be slightly stale).
-2. Review the system status above for anything that needs attention (stalled tasks, idle workers with available work, anomalies).
-3. **CRITICAL — Reboot failure triage:** Failures with reason "worker session not found" or "worker session heartbeat is stale" indicate tasks that were INTERRUPTED by a server restart. These are NOT "expected auto-cleanup" — they represent work that was lost mid-execution. For each such failure:
+2. **Prune tracked HEARTBEAT items FIRST.** Active Blockers + Watch Items + Open Discussion combined must stay at ≤10 items; 20 is the absolute max only when genuinely super busy. This cap does not apply to evergreen Standing Orders / Governance / Playbook-index reference sections. Before adding anything, re-check every tracked item against its lift trigger and remove anything resolved, stale, or past its trigger date. Lift incident detail to memory; never keep it inline in HEARTBEAT.md. Every new tracked item must include an explicit lift trigger + date; if it has no removal condition, do not add it. If the tracked list is already at/over cap, prune before (or instead of) adding — the cap is binding.
+3. **Run seeded heartbeat data-gathering.** Use \`script-run\` with global script \`Heartbeat Audit\` and pass the current HEARTBEAT.md text as \`heartbeatMarkdown\`. It covers Rules #10/#13/#15/#16/#17: resolved stale PRs, pool-target risk schedules, schedule/provider failure clusters, and whether daily-blocker-digest ran today. The Slack thread-reply check (Rule #11) stays Lead-side; the script runtime has no Slack token.
+4. Review the system status above plus the \`Heartbeat Audit\` result for anything that needs attention (stalled tasks, idle workers with available work, anomalies).
+5. **CRITICAL — Reboot failure triage:** Failures with reason "worker session not found" or "worker session heartbeat is stale" indicate tasks that were INTERRUPTED by a server restart. These are NOT "expected auto-cleanup" — they represent work that was lost mid-execution. For each such failure:
    - Check what the task was (via \`get-task-details\` with the task ID from the failure)
    - If a retry task was auto-created (tagged \`reboot-retry\`), verify it is progressing
    - If no retry exists and the work is still needed, re-create the task
    - Do NOT dismiss these as "expected" or "auto-cleanup"
-4. Review your standing orders for any periodic checks or actions.
-5. If something needs attention — take action now using your available tools (create tasks, post to Slack, cancel stuck tasks, etc.).
-6. If everything looks healthy and no standing orders are actionable — complete this task with a brief "All clear" summary. You may NOT say "All clear" if reboot-related failures exist that haven't been triaged.
-7. Do NOT create another heartbeat-checklist task — the system handles scheduling.
-8. **Update your standing orders** — After every heartbeat check, edit \`/workspace/HEARTBEAT.md\` directly. Add new patterns you noticed (recurring failures, workers needing attention), remove resolved items. This is your live operational runbook — keep it current.`,
+6. Review your standing orders for any periodic checks or actions.
+7. If something needs attention — take action now using your available tools (create tasks, post to Slack, cancel stuck tasks, etc.).
+8. If everything looks healthy and no standing orders are actionable — complete this task with a brief "All clear" summary. You may NOT say "All clear" if reboot-related failures exist that haven't been triaged.
+9. Do NOT create another heartbeat-checklist task — the system handles scheduling.
+10. **Update HEARTBEAT.md only after pruning.** Keep it current, but keep tracked items capped: remove resolved items, add only dated lift-triggered items, and lift detail to memory instead of growing the file.`,
   variables: [
     {
       name: "system_status",
@@ -73,22 +75,24 @@ The API server has just restarted (deployment, pod rotation, or crash). An aggre
 {{heartbeat_content}}
 ## Instructions
-1. **Triage reboot-interrupted work FIRST.** If the "Reboot-Interrupted Work" section above lists tasks:
+1. **Prune tracked HEARTBEAT items FIRST.** Active Blockers + Watch Items + Open Discussion combined must stay at ≤10 items; 20 is the absolute max only when genuinely super busy. This cap does not apply to evergreen Standing Orders / Governance / Playbook-index reference sections. Before adding anything, re-check every tracked item against its lift trigger and remove anything resolved, stale, or past its trigger date. Lift incident detail to memory; never keep it inline in HEARTBEAT.md. Every new tracked item must include an explicit lift trigger + date; if it has no removal condition, do not add it. If the tracked list is already at/over cap, prune before (or instead of) adding — the cap is binding.
+2. **Run seeded boot triage.** Use \`script-run\` with global script \`boot-triage\` to gather deploy-restart PR context, recent real failures, stuck offline-agent work, orphaned pending/offered tasks, and superseded tasks missing resume children in one read-only call.
+3. **Triage reboot-interrupted work FIRST.** If the "Reboot-Interrupted Work" section above or the \`boot-triage\` result lists tasks:
    - For each task: verify the retry is progressing via \`get-task-details\` with the retry task ID
    - If a retry failed or is stuck, re-create the task manually
    - If the work is no longer needed, cancel the retry task
    - You MUST address every item — do NOT skip this section
-2. **Verify supersede + resume worked end-to-end.** Worker crashes / OOMs are recovered via supersede (parent → \`superseded\`) + a fresh \`taskType=resume\` child created by the heartbeat sweep (DES-523). Sanity check:
+4. **Verify supersede + resume worked end-to-end.** Worker crashes / OOMs are recovered via supersede (parent → \`superseded\`) + a fresh \`taskType=resume\` child created by the heartbeat sweep (DES-523). Sanity check:
    - List recent \`superseded\` tasks: \`list-tasks status=superseded\` (last ~hour).
    - For each, confirm a child task with \`taskType=resume\` and a non-terminal status exists. If a superseded task is missing its resume child, the work is silently dropped — recreate the task manually.
    - Look for \`in_progress\` tasks older than 5 min on agents that show as offline — the sweep should have caught them. If any remain, recreate as needed.
-3. **Check orphaned tasks.** If the "Orphaned Tasks" section lists pending/offered tasks assigned to offline workers, re-assign or cancel them.
-4. Review agent status — are all expected workers online? If not, note which are missing.
-5. Review your standing orders for any post-reboot checks.
-6. Take action using your available tools.
-7. Complete this task with a summary of what you found and what actions you took. Include the status of each reboot-interrupted task.
-8. Do NOT create another boot-triage task — this is a one-off event.
-9. **Update your standing orders** — If the reboot revealed a pattern worth monitoring (e.g., frequent restarts, specific tasks that keep failing), add a standing order to HEARTBEAT.md via \`update-profile\` with \`heartbeatMd\`.`,
+5. **Check orphaned tasks.** If the "Orphaned Tasks" section or \`boot-triage\` result lists pending/offered tasks assigned to offline workers, re-assign or cancel them.
+6. Review agent status — are all expected workers online? If not, note which are missing.
+7. Review your standing orders for any post-reboot checks.
+8. Take action using your available tools.
+9. Complete this task with a summary of what you found and what actions you took. Include the status of each reboot-interrupted task.
+10. Do NOT create another boot-triage task — this is a one-off event.
+11. **Update HEARTBEAT.md only after pruning.** If the reboot revealed a pattern worth monitoring, add it only as a dated lift-triggered tracked item and only if the cap still holds after pruning. Lift incident detail to memory instead of growing HEARTBEAT.md.`,
   variables: [
     {
       name: "system_status",

package/src/http/index.ts CHANGED Viewed

@@ -43,12 +43,17 @@ import { handleHeartbeat } from "./heartbeat";
 import { handleInboxState } from "./inbox-state";
 import { handleIntegrations } from "./integrations";
 import { handleKv } from "./kv";
-import { handleMcp } from "./mcp";
+import {
+  closeIdleMcpTransports,
+  DEFAULT_MCP_TRANSPORT_IDLE_TIMEOUT_MS,
+  handleMcp,
+  type McpTransportActivity,
+} from "./mcp";
 import { handleMcpBridge } from "./mcp-bridge";
 import { handleMcpOAuth, startMcpOAuthPendingGc, stopMcpOAuthPendingGc } from "./mcp-oauth";
 import { handleMcpServers } from "./mcp-servers";
-import { handleMcpUser } from "./mcp-user";
-import { handleMemory } from "./memory";
+import { closeIdleMcpUserTransports, handleMcpUser } from "./mcp-user";
+import { handleMemory, startMemoryGc, stopMemoryGc } from "./memory";
 import { handleMetrics } from "./metrics";
 import { handlePageProxy } from "./page-proxy";
 import { handlePages } from "./pages";
@@ -99,12 +104,15 @@ const globalState = globalThis as typeof globalThis & {
   __transports?: Record<string, StreamableHTTPServerTransport>;
   __transportsUser?: Record<string, StreamableHTTPServerTransport>;
   __sessionUsers?: Record<string, string>;
+  __transportActivity?: McpTransportActivity;
+  __transportActivityUser?: McpTransportActivity;
   __sigintRegistered?: boolean;
   __apiGcInterval?: ReturnType<typeof setInterval>;
   __runId?: string;
 };
 const API_GC_INTERVAL_MS = 5 * 60 * 1000;
+const MCP_TRANSPORT_IDLE_TIMEOUT_MS = DEFAULT_MCP_TRANSPORT_IDLE_TIMEOUT_MS;
 type GcCapableGlobal = typeof globalThis & { gc?: () => void };
@@ -130,11 +138,25 @@ function startApiGcInterval() {
   const gc = (globalThis as GcCapableGlobal).gc;
   if (typeof gc !== "function") {
-    console.log("[HTTP] Explicit GC unavailable; start API with --expose-gc to enable sweeps");
-    return;
+    console.log("[HTTP] Explicit GC unavailable; idle MCP transport sweeps remain enabled");
   }
   const interval = setInterval(() => {
+    const closedOwnerTransports = closeIdleMcpTransports(transports, transportActivity, {
+      idleTimeoutMs: MCP_TRANSPORT_IDLE_TIMEOUT_MS,
+      label: "MCP",
+    });
+    const closedUserTransports = closeIdleMcpUserTransports(
+      transportsUser,
+      sessionUsers,
+      transportActivityUser,
+      { idleTimeoutMs: MCP_TRANSPORT_IDLE_TIMEOUT_MS },
+    );
+    if (closedOwnerTransports > 0 || closedUserTransports > 0) {
+      console.log(
+        `[HTTP] Closed ${closedOwnerTransports} owner MCP and ${closedUserTransports} user MCP idle transport(s)`,
+      );
+    }
     scheduleApiGc("periodic API sweep");
   }, API_GC_INTERVAL_MS);
   interval.unref?.();
@@ -151,6 +173,8 @@ const transports: Record<string, StreamableHTTPServerTransport> = globalState.__
 const transportsUser: Record<string, StreamableHTTPServerTransport> =
   globalState.__transportsUser ?? {};
 const sessionUsers: Record<string, string> = globalState.__sessionUsers ?? {};
+const transportActivity: McpTransportActivity = globalState.__transportActivity ?? {};
+const transportActivityUser: McpTransportActivity = globalState.__transportActivityUser ?? {};
 const httpServer = createHttpServer(async (req, res) => {
   const startTime = performance.now();
@@ -291,8 +315,8 @@ const httpServer = createHttpServer(async (req, res) => {
         () => handleSessions(req, res, pathSegments, queryParams),
         () => handleInboxState(req, res, pathSegments, queryParams),
         () => handleTaskTemplates(req, res, pathSegments, queryParams),
-        () => handleMcp(req, res, transports),
-        () => handleMcpUser(req, res, transportsUser, sessionUsers),
+        () => handleMcp(req, res, transports, transportActivity),
+        () => handleMcpUser(req, res, transportsUser, sessionUsers, transportActivityUser),
       ];
       try {
@@ -334,6 +358,8 @@ globalState.__httpServer = httpServer;
 globalState.__transports = transports;
 globalState.__transportsUser = transportsUser;
 globalState.__sessionUsers = sessionUsers;
+globalState.__transportActivity = transportActivity;
+globalState.__transportActivityUser = transportActivityUser;
 async function shutdown() {
   console.log("Shutting down HTTP server...");
@@ -362,6 +388,9 @@ async function shutdown() {
   // Stop MCP OAuth pending-session garbage collector
   stopMcpOAuthPendingGc();
+  // Stop memory expired-row garbage collector
+  stopMemoryGc();
   if (globalState.__apiGcInterval) {
     clearInterval(globalState.__apiGcInterval);
     delete globalState.__apiGcInterval;
@@ -372,6 +401,7 @@ async function shutdown() {
     console.log(`[HTTP] Closing transport ${id}`);
     transport.close();
     delete transports[id];
+    delete transportActivity[id];
   }
   for (const [id, transport] of Object.entries(transportsUser)) {
@@ -379,6 +409,7 @@ async function shutdown() {
     transport.close();
     delete transportsUser[id];
     delete sessionUsers[id];
+    delete transportActivityUser[id];
   }
   // Close all active connections forcefully
@@ -522,6 +553,18 @@ httpServer
     // Start MCP OAuth pending-session garbage collector (5-min tick)
     startMcpOAuthPendingGc();
+    // Start expired-memory garbage collector (1-hour tick, immediate first run)
+    startMemoryGc();
+    // Background backfill: re-embed any agent_memory rows with wrong-dimension
+    // embeddings (e.g. 1536d instead of 512d). Non-blocking, idempotent, no-op
+    // when the DB is clean. See src/be/memory/boot-reembed.ts.
+    import("../be/memory/boot-reembed")
+      .then(({ runBootReembed }) => runBootReembed())
+      .catch((err) => {
+        console.error("[boot-reembed] startup backfill failed (non-fatal):", err);
+      });
   })
   .on("error", (err) => {
     console.error("HTTP Server Error:", err);

package/src/http/mcp-user.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js";
 import { resolveUserByToken } from "@/be/users";
 import { createUserServer } from "@/server-user";
 import type { User } from "@/types";
+import { closeIdleMcpTransports, type McpTransportActivity, markMcpTransportActivity } from "./mcp";
 function unauthorized(res: ServerResponse): true {
   res.writeHead(401, { "Content-Type": "application/json" });
@@ -32,6 +33,7 @@ export async function handleMcpUser(
   res: ServerResponse,
   transports: Record<string, StreamableHTTPServerTransport>,
   sessionUsers: Record<string, string>,
+  sessionActivity: McpTransportActivity = {},
 ): Promise<boolean> {
   const sessionId = req.headers["mcp-session-id"] as string | undefined;
@@ -57,16 +59,19 @@ export async function handleMcpUser(
     if (sessionId && transports[sessionId]) {
       transport = transports[sessionId];
+      markMcpTransportActivity(sessionActivity, sessionId);
     } else if (!sessionId && isInitializeRequest(body)) {
       transport = new StreamableHTTPServerTransport({
         sessionIdGenerator: () => randomUUID(),
         onsessioninitialized: (id) => {
           transports[id] = transport;
           sessionUsers[id] = user.id;
+          markMcpTransportActivity(sessionActivity, id);
         },
         onsessionclosed: (id) => {
           delete transports[id];
           delete sessionUsers[id];
+          delete sessionActivity[id];
         },
       });
@@ -74,6 +79,7 @@ export async function handleMcpUser(
         if (transport.sessionId) {
           delete transports[transport.sessionId];
           delete sessionUsers[transport.sessionId];
+          delete sessionActivity[transport.sessionId];
         }
       };
@@ -92,11 +98,13 @@ export async function handleMcpUser(
     }
     await transport.handleRequest(req, res, body);
+    markMcpTransportActivity(sessionActivity, transport.sessionId);
     return true;
   }
   if (req.method === "GET" || req.method === "DELETE") {
     if (sessionId && transports[sessionId]) {
+      markMcpTransportActivity(sessionActivity, sessionId);
       await transports[sessionId].handleRequest(req, res);
       return true;
     }
@@ -109,3 +117,18 @@ export async function handleMcpUser(
   res.end("Method not allowed");
   return true;
 }
+export function closeIdleMcpUserTransports(
+  transports: Record<string, StreamableHTTPServerTransport>,
+  sessionUsers: Record<string, string>,
+  sessionActivity: McpTransportActivity,
+  options: { now?: number; idleTimeoutMs?: number } = {},
+): number {
+  return closeIdleMcpTransports(transports, sessionActivity, {
+    ...options,
+    label: "user MCP",
+    onClose: (id) => {
+      delete sessionUsers[id];
+    },
+  });
+}