npm - @desplega.ai/agent-swarm - Versions diffs - 1.100.2 → 1.100.4 - Mend

@desplega.ai/agent-swarm 1.100.2 → 1.100.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/openapi.json +1 -1
package/package.json +1 -1
package/src/be/db.ts +131 -4
package/src/be/memory/raters/retrieval.ts +6 -3
package/src/be/migrations/097_memory_retrieval_grouping.sql +10 -0
package/src/github/handlers.ts +84 -7
package/src/github/templates.ts +6 -2
package/src/heartbeat/heartbeat.ts +191 -5
package/src/providers/claude-adapter.ts +41 -4
package/src/slack/assistant.ts +28 -0
package/src/slack/channel-join.ts +38 -3
package/src/slack/handlers.ts +4 -1
package/src/tasks/worker-follow-up.ts +181 -20
package/src/tests/claude-adapter-binary.test.ts +74 -0
package/src/tests/github-handlers-inline-comments.test.ts +308 -0
package/src/tests/heartbeat-reroute-decision.test.ts +570 -0
package/src/tests/heartbeat-supersede-resume.test.ts +137 -0
package/src/tests/heartbeat.test.ts +4 -2
package/src/tests/memory-rater-implicit-citation.test.ts +31 -0
package/src/tests/prompt-template-remaining.test.ts +2 -1
package/src/tests/slack-assistant-comention-production.test.ts +319 -0
package/src/tests/slack-assistant-comention.test.ts +139 -0
package/src/tests/slack-channel-join.test.ts +150 -16
package/src/tests/workflow-swarm-script.test.ts +225 -0
package/src/tests/workflow-template.test.ts +17 -0
package/src/tools/send-task.ts +51 -1
package/src/tools/templates.ts +61 -0
package/src/workflows/engine.ts +22 -1
package/src/workflows/retry-poller.ts +2 -3
package/src/workflows/template.ts +48 -0

package/openapi.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "openapi": "3.1.0",
   "info": {
     "title": "Agent Swarm API",
-    "version": "1.100.2",
+    "version": "1.100.4",
     "description": "Multi-agent orchestration API for Claude Code, Codex, and Gemini CLI. Enables task distribution, agent communication, and service discovery.\n\nMCP tools are documented separately in [MCP.md](./MCP.md)."
   },
   "servers": [

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@desplega.ai/agent-swarm",
-  "version": "1.100.2",
+  "version": "1.100.4",
   "description": "Multi-agent orchestration for Claude Code, Codex, Gemini CLI, and other AI coding assistants",
   "license": "MIT",
   "author": "desplega.sh <contact@desplega.sh>",

package/src/be/db.ts CHANGED Viewed

@@ -753,8 +753,13 @@ export function getAllAgents(opts?: { slim?: boolean }): Agent[] {
 }
 export function getLeadAgent(): Agent | null {
-  const agents = getAllAgents();
-  return agents.find((a) => a.isLead) ?? null;
+  const leads = getAllAgents().filter((a) => a.isLead);
+  // Prefer a usable (non-offline) lead so callers route to one that can actually
+  // poll — e.g. an old offline lead must not shadow a live replacement. Falls
+  // back to any lead (incl. offline) so existing "is there a lead at all?"
+  // semantics are preserved; callers that require a live lead must check
+  // `status` themselves (see escalateUnreclaimedResumes).
+  return leads.find((a) => a.status !== "offline") ?? leads[0] ?? null;
 }
 export function updateAgentStatus(id: string, status: AgentStatus): Agent | null {
@@ -1444,6 +1449,31 @@ export function hasNonTerminalResumeChild(parentId: string): boolean {
   return row !== undefined && row !== null;
 }
+/**
+ * True when a non-terminal `reroute-decision` child exists for `parentId`.
+ *
+ * Mirrors {@link hasNonTerminalResumeChild} but filters on
+ * `taskType = 'reroute-decision'` — the Lead-owned re-delegation decision
+ * created when a pinned crash-recovery resume is never reclaimed (DES-523).
+ * Makes escalation idempotent: a later heartbeat sweep must not create a second
+ * decision for the same original task. We filter on the taskType marker
+ * specifically (not any child) so ordinary delegation / completion follow-up
+ * children of the original cannot suppress a needed decision, and nothing else
+ * is mistaken for one.
+ */
+export function hasNonTerminalRerouteDecisionChild(parentId: string): boolean {
+  const row = getDb()
+    .prepare(
+      `SELECT 1 FROM agent_tasks
+       WHERE parentTaskId = ?
+         AND taskType = 'reroute-decision'
+         AND status NOT IN ('completed', 'failed', 'cancelled', 'superseded')
+       LIMIT 1`,
+    )
+    .get(parentId);
+  return row !== undefined && row !== null;
+}
 export function updateTaskClaudeSessionId(
   taskId: string,
   claudeSessionId: string,
@@ -2949,6 +2979,14 @@ export interface CreateTaskOptions {
    * a schema'd task should be defensive about JSON parsing.
    */
   outputSchema?: Record<string, unknown>;
+  /**
+   * When a `parentTaskId` is set, the child inherits the parent's `outputSchema`
+   * by default. Set this to `false` to opt out — used by control-plane children
+   * (e.g. the Lead `reroute-decision` task) that must inherit Slack/VCS context
+   * from the parent but must NOT be forced to satisfy the original work's output
+   * contract on completion (which would block the control task — DES-523).
+   */
+  inheritParentOutputSchema?: boolean;
   followUpConfig?: FollowUpConfig;
   requestedByUserId?: string;
   contextKey?: string;
@@ -3101,8 +3139,15 @@ export function createTaskExtended(task: string, options?: CreateTaskOptions): A
       // Contract (schema validation) — `store-progress` validates completion
       // output against `outputSchema`, runner injects structured-output
-      // instructions only when it's present.
-      if (parent.outputSchema && !options.outputSchema) {
+      // instructions only when it's present. Opt-out via
+      // `inheritParentOutputSchema: false` for control-plane children (e.g. the
+      // Lead reroute-decision) that must not be held to the original work's
+      // output contract.
+      if (
+        parent.outputSchema &&
+        !options.outputSchema &&
+        options.inheritParentOutputSchema !== false
+      ) {
         options.outputSchema = parent.outputSchema;
       }
@@ -6506,6 +6551,88 @@ export function getStalledInProgressTasks(thresholdMinutes: number = 30): AgentT
     .map(rowToAgentTask);
 }
+/**
+ * Genuine same-agent crash-recovery PINS (tagged `crash-recovery-pin`, DES-523
+ * Phase 1) that are still `pending` `graceMin` minutes after creation — the
+ * heartbeat reaper escalates these to a Lead reroute-decision.
+ *
+ * Three scoping clauses, each load-bearing:
+ *  - `tags LIKE '%"crash-recovery-pin"%'` — restricts to resumes actually pinned
+ *    to their original agent on the crash path. Without it, a *pooled* resume
+ *    that `autoAssignPoolTasks` flips to `pending` earlier in the SAME sweep
+ *    (keeping its old `createdAt`) would be reaped and cancelled before the
+ *    assigned worker polls; it also keeps `context_limits` / `manual_supersede`
+ *    pins from being escalated under a `crash_recovery` label. (Literal must
+ *    match `CRASH_RECOVERY_PIN_TAG` in src/tasks/worker-follow-up.ts.)
+ *  - `status = 'pending'` — the "currently unreclaimed" discriminator: when the
+ *    agent reclaims via the normal poll path, `startTask` flips the row to
+ *    `in_progress` and it drops out of this set. (A reclaimed resume whose
+ *    session later orphans can be flipped back to `pending` by
+ *    `resetOrphanedInProgressTasksForAgent`, re-entering this set on a later
+ *    sweep — re-escalating genuinely re-stalled work, which is fine.) We do NOT
+ *    gate on `lastActivityAt` — it is stale for a returned-but-idle agent.
+ *  - `createdAt < cutoff` — `createdAt` is the resume's creation = crash-DETECTION
+ *    time, so the grace window is measured from detection.
+ *
+ * Keys only on reboot-durable columns, so a pending pin survives a server reboot
+ * and is caught on the first post-reboot sweep.
+ */
+export function getStalePinnedResumes(graceMin: number): AgentTask[] {
+  const cutoff = new Date(Date.now() - graceMin * 60 * 1000).toISOString();
+  return getDb()
+    .prepare<AgentTaskRow, [string]>(
+      `SELECT * FROM agent_tasks
+       WHERE taskType = 'resume' AND status = 'pending'
+         AND tags LIKE '%"crash-recovery-pin"%'
+         AND createdAt < ?
+       ORDER BY createdAt ASC`,
+    )
+    .all(cutoff)
+    .map(rowToAgentTask);
+}
+/**
+ * Atomically terminalize a pinned resume ONLY if it is still `pending`, in one
+ * `UPDATE … RETURNING`. Returns the row when the transition fired, or `null`
+ * when it did not (the agent reclaimed it in the gap → `startTask` already
+ * flipped it to `in_progress`). The heartbeat reaper escalates to the Lead ONLY
+ * when this returns a row, closing the TOCTOU window between reading the resume
+ * as `pending` and writing.
+ *
+ * Deliberately NOT `failTask`: `failTask`'s backing SQL is keyed on `id` with no
+ * status precondition, so it would terminalize an `in_progress` resume the
+ * worker just started. The `AND status = 'pending'` here is the guard.
+ */
+export function failPendingResumeIfUnclaimed(
+  taskId: string,
+  status: "cancelled" | "failed",
+  failureReason: string,
+): AgentTask | null {
+  const now = new Date().toISOString();
+  const scrubbedReason = scrubSecrets(failureReason);
+  const row = getDb()
+    .prepare<AgentTaskRow, [string, string, string, string, string]>(
+      `UPDATE agent_tasks SET status = ?, failureReason = ?, finishedAt = ?, lastUpdatedAt = ?
+       WHERE id = ? AND status = 'pending' RETURNING *`,
+    )
+    .get(status, scrubbedReason, now, now, taskId);
+  if (row) {
+    try {
+      createLogEntry({
+        eventType: "task_status_change",
+        taskId,
+        agentId: row.agentId ?? undefined,
+        oldValue: "pending",
+        newValue: status,
+        metadata: { reason: scrubbedReason, reaper: "pin_unreclaimed" },
+      });
+    } catch {}
+  }
+  return row ? rowToAgentTask(row) : null;
+}
 /**
  * Get idle, non-lead, non-offline agents that have capacity for more tasks.
  * Used by the heartbeat for auto-assignment of pool tasks.

package/src/be/memory/raters/retrieval.ts CHANGED Viewed

@@ -42,16 +42,17 @@ export function recordRetrievals(
   const db = getDb();
   const insert = db.prepare(
     `INSERT INTO memory_retrieval
-       (id, taskId, agentId, sessionId, memoryId, similarity, retrievedAt, contextKey, intent, eventType)
-     VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+       (id, taskId, agentId, sessionId, memoryId, similarity, retrievedAt, contextKey, intent, eventType, retrievalId, rank)
+     VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
   );
   const now = new Date().toISOString();
+  const retrievalId = crypto.randomUUID();
   const contextKey = extras?.contextKey ?? null;
   const intent = extras?.intent ?? null;
   const eventType = extras?.eventType ?? "search";
   db.transaction(() => {
-    for (const r of results) {
+    for (const [rank, r] of results.entries()) {
       insert.run(
         crypto.randomUUID(),
         taskId,
@@ -63,6 +64,8 @@ export function recordRetrievals(
         contextKey,
         intent,
         eventType,
+        retrievalId,
+        rank,
       );
     }
   })();

package/src/be/migrations/097_memory_retrieval_grouping.sql ADDED Viewed

@@ -0,0 +1,10 @@
+-- Add explicit per-search grouping to memory_retrieval.
+--
+-- `recordRetrievals()` writes one row per returned memory. A single
+-- retrievalId groups all rows from the same search/get call, and rank
+-- preserves the result order within that call for precision@k/MRR analysis.
+ALTER TABLE memory_retrieval ADD COLUMN retrievalId TEXT;
+ALTER TABLE memory_retrieval ADD COLUMN rank INTEGER;
+CREATE INDEX idx_memret_retrieval_id ON memory_retrieval(retrievalId);

package/src/github/handlers.ts CHANGED Viewed

@@ -3,6 +3,7 @@ import { findUserByExternalId } from "../be/users";
 import { resolveTemplate } from "../prompts/resolver";
 import { githubContextKey } from "../tasks/context-key";
 import { createTaskWithSiblingAwareness } from "../tasks/sibling-awareness";
+import { getInstallationToken } from "./app";
 import {
   detectMention,
   extractMentionContext,
@@ -936,6 +937,69 @@ export async function handleComment(
   return { created: true, taskId: task.id };
 }
+interface ReviewInlineComment {
+  id: number;
+  path: string;
+  line: number | null;
+  body: string;
+  html_url: string;
+  diff_hunk: string;
+}
+function parseNextPageLink(linkHeader: string | null): string | null {
+  if (!linkHeader) return null;
+  const match = linkHeader.match(/<([^>]+)>;\s*rel="next"/);
+  return match ? (match[1] ?? null) : null;
+}
+async function fetchReviewComments(
+  repo: string,
+  prNumber: number,
+  reviewId: number,
+  installationId: number,
+): Promise<ReviewInlineComment[]> {
+  const token = await getInstallationToken(installationId);
+  if (!token) {
+    return [];
+  }
+  const headers = {
+    Accept: "application/vnd.github+json",
+    Authorization: `Bearer ${token}`,
+    "X-GitHub-Api-Version": "2022-11-28",
+  };
+  const allComments: ReviewInlineComment[] = [];
+  let url: string | null =
+    `https://api.github.com/repos/${repo}/pulls/${prNumber}/reviews/${reviewId}/comments?per_page=100`;
+  try {
+    while (url) {
+      const response = await fetch(url, { headers });
+      if (!response.ok) {
+        console.error(`[GitHub] Failed to fetch review inline comments: ${response.status}`);
+        return allComments;
+      }
+      const page = (await response.json()) as ReviewInlineComment[];
+      if (Array.isArray(page)) {
+        allComments.push(...page);
+      }
+      url = parseNextPageLink(response.headers.get("link"));
+    }
+    return allComments;
+  } catch (error) {
+    console.error("[GitHub] Error fetching review inline comments:", error);
+    return allComments;
+  }
+}
+function buildInlineCommentsSection(comments: ReviewInlineComment[]): string {
+  if (comments.length === 0) return "";
+  const items = comments.map((c) => {
+    const loc = c.line ? `${c.path}:${c.line}` : c.path;
+    const hunk = c.diff_hunk ? `\n\`\`\`diff\n${c.diff_hunk.slice(0, 300)}\n\`\`\`` : "";
+    return `- **${loc}**${hunk}\n  > ${c.body}`;
+  });
+  return `\n\n## Inline review comments (${comments.length})\n\n${items.join("\n\n")}`;
+}
 /**
  * Handle pull_request_review events (submitted, edited, dismissed)
  *
@@ -963,15 +1027,21 @@ export async function handlePullRequestReview(
     return { created: false };
   }
-  // Skip "commented" reviews that are empty - these are often just line comments
-  // without an overall review body
-  if (review.state === "commented" && !review.body) {
+  // Deduplicate before making any API calls
+  const eventKey = `pr-review:${repository.full_name}:${pr.number}:${review.id}`;
+  if (isDuplicate(eventKey)) {
     return { created: false };
   }
-  // Deduplicate
-  const eventKey = `pr-review:${repository.full_name}:${pr.number}:${review.id}`;
-  if (isDuplicate(eventKey)) {
+  // Fetch inline comments now so we can decide whether to skip and include them in the task.
+  // Returns [] when no installation credentials are available (graceful degradation).
+  const inlineComments = installation?.id
+    ? await fetchReviewComments(repository.full_name, pr.number, review.id, installation.id)
+    : [];
+  // Skip "commented" reviews only when there is neither an overall body nor any inline
+  // comments — a body-less review with inline comments carries real reviewer feedback.
+  if (review.state === "commented" && !review.body && inlineComments.length === 0) {
     return { created: false };
   }
@@ -992,15 +1062,21 @@ export async function handlePullRequestReview(
   // Build task description
   const reviewBodySection = review.body ? `\n\nReview Comment:\n${review.body}` : "";
+  const inlineCommentsSection = buildInlineCommentsSection(inlineComments);
   const relatedTaskSection = existingTask
     ? `Related task: ${existingTask.id}\n🔀 Consider routing to the same agent working on the related task.\n`
     : "";
-  const reviewSuggestions =
+  const hasInlineComments = inlineComments.length > 0;
+  const baseReviewSuggestion =
     review.state === "approved"
       ? "💡 Suggested: Merge the PR or wait for additional reviews"
       : review.state === "changes_requested"
         ? "💡 Suggested: Address the requested changes and update the PR"
         : "💡 Suggested: Review the feedback and respond if needed";
+  const reviewSuggestions = hasInlineComments
+    ? `${baseReviewSuggestion}\n💬 Address EVERY inline comment. After pushing fixes, reply to and resolve each inline review thread on GitHub so the reviewer sees visible confirmation.`
+    : baseReviewSuggestion;
   const result = resolveTemplate(
     "github.pull_request.review_submitted",
@@ -1013,6 +1089,7 @@ export async function handlePullRequestReview(
       repo_full_name: repository.full_name,
       review_url: review.html_url,
       review_body_section: reviewBodySection,
+      inline_comments_section: inlineCommentsSection,
       related_task_section: relatedTaskSection,
       review_suggestions: reviewSuggestions,
     },

package/src/github/templates.ts CHANGED Viewed

@@ -350,7 +350,7 @@ registerTemplate({
   defaultBody: `PR: {{pr_title}}
 Reviewer: {{sender_login}}
 Repo: {{repo_full_name}}
-URL: {{review_url}}{{review_body_section}}
+URL: {{review_url}}{{review_body_section}}{{inline_comments_section}}
 ---
 {{related_task_section}}{{@template[common.delegation_instruction]}}
@@ -363,7 +363,11 @@ URL: {{review_url}}{{review_body_section}}
     { name: "sender_login", description: "Reviewer login" },
     { name: "repo_full_name", description: "Repository full name (owner/repo)" },
     { name: "review_url", description: "Review HTML URL" },
-    { name: "review_body_section", description: "Review comment section or empty string" },
+    { name: "review_body_section", description: "Review overall body section or empty string" },
+    {
+      name: "inline_comments_section",
+      description: "Formatted inline review comments section or empty string",
+    },
     { name: "related_task_section", description: "Related task info or empty string" },
     { name: "review_suggestions", description: "Context-appropriate review suggestion" },
   ],

package/src/heartbeat/heartbeat.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import {
   cleanupStaleSessions,
   createTaskExtended,
   deleteActiveSession,
+  failPendingResumeIfUnclaimed,
   failTask,
   getActiveSessionForTask,
   getActiveTaskCount,
@@ -14,7 +15,9 @@ import {
   getRecentCompletedCount,
   getRecentFailedCount,
   getRecentFailedTasks,
+  getStalePinnedResumes,
   getStalledInProgressTasks,
+  getTaskById,
   getTaskStats,
   getTasksByStatus,
   getUnassignedPoolTasks,
@@ -25,8 +28,14 @@ import {
   supersedeTask,
   updateAgentStatus,
 } from "../be/db";
+import { repointTrackerSyncBySwarmId } from "../be/db-queries/tracker";
 import { resolveTemplate } from "../prompts/resolver";
-import { createResumeFollowUp, getNextResumeGeneration } from "../tasks/worker-follow-up";
+import {
+  createRerouteDecisionTask,
+  createResumeFollowUp,
+  getNextResumeGeneration,
+  getResumeGeneration,
+} from "../tasks/worker-follow-up";
 import type { AgentTask } from "../types";
 import { getExecutorRegistry } from "../workflows";
 import { recoverIncompleteRuns } from "../workflows/recovery";
@@ -36,8 +45,20 @@ import "./templates";
 /**
  * System tasks that must NOT be auto-resumed — mirrors `runRebootSweep`'s exclusion list
  * to prevent infinite retry loops on the heartbeat/triage system tasks themselves.
+ *
+ * `reroute-decision` is included (DES-523): it is a control-plane Lead task, not
+ * user work. If a Lead crashed while holding one, auto-resuming it would create a
+ * crash-recovery pin for the decision; reaping that pin would then treat the
+ * decision as the `original`, producing nested reroute-decisions ABOUT the control
+ * prompt instead of recovering the real work. So a crashed decision is failed, not
+ * resumed (the original work was already superseded; its recovery chain is separate).
  */
-const SKIP_AUTO_RESUME_TYPES = new Set(["heartbeat-checklist", "boot-triage", "heartbeat"]);
+const SKIP_AUTO_RESUME_TYPES = new Set([
+  "heartbeat-checklist",
+  "boot-triage",
+  "heartbeat",
+  "reroute-decision",
+]);
 // ============================================================================
 // Configuration (env var overrides)
@@ -66,6 +87,29 @@ export const MAX_RESUME_GENERATIONS = Number(process.env.HEARTBEAT_MAX_RESUME_GE
 export const RESUME_BUDGET_EXHAUSTED_REASON = "resume_budget_exhausted";
+/**
+ * Grace window (minutes) a crash-recovery resume pinned to its original agent
+ * (DES-523 Phase 1) waits to be reclaimed before the reaper concludes the agent
+ * is gone and escalates to a Lead re-delegation decision. Generous enough for a
+ * slow container restart / image pull, short enough that a genuinely-gone
+ * agent's work reaches the Lead promptly. Measured from the resume's `createdAt`
+ * (= crash-detection time), so worst-case crash→escalation latency is
+ * ~`STALL_THRESHOLD_NO_SESSION_MIN` + this. Set to `0` to disable the reaper.
+ *
+ * Uses `??` (not `|| 10`) so an explicit `0` is honored as "reaper off" rather
+ * than coerced back to the default.
+ */
+export const HEARTBEAT_RESUME_PIN_GRACE_MIN = (() => {
+  const raw = process.env.HEARTBEAT_RESUME_PIN_GRACE_MIN;
+  if (raw === undefined) return 10;
+  const parsed = Number(raw);
+  // Honor an explicit `0` (reaper off), but fall back to the default on a
+  // non-finite value (e.g. a typo'd `abc` → NaN). Without this guard, NaN passes
+  // the `<= 0` disable check, reaches getStalePinnedResumes(NaN), and throws in
+  // `new Date(NaN).toISOString()` — breaking cleanup on every sweep.
+  return Number.isFinite(parsed) ? parsed : 10;
+})();
 /** Heartbeat checklist interval: how often to check HEARTBEAT.md (default: 30 min) */
 const HEARTBEAT_CHECKLIST_INTERVAL_MS =
   Number(process.env.HEARTBEAT_CHECKLIST_INTERVAL_MS) || 30 * 60 * 1000;
@@ -86,6 +130,17 @@ export interface HeartbeatFindings {
     agentId: string;
     reason: string;
   }>;
+  /**
+   * Crash-recovery resumes pinned back to their original (stable-ID) agent
+   * instead of being released to the role-blind unassigned pool (DES-523). A
+   * subset of `autoResumedTasks`: the resume `taskId` + the agent it pinned to.
+   */
+  pinnedResumes: Array<{ taskId: string; agentId: string }>;
+  /**
+   * Pinned crash-recovery resumes that were never reclaimed within the grace
+   * window and were escalated to a Lead re-delegation decision (DES-523 Phase 3).
+   */
+  escalatedReroutes: Array<{ originalTaskId: string; decisionTaskId: string }>;
   workerHealthFixes: Array<{ agentId: string; oldStatus: string; newStatus: string }>;
   autoAssigned: Array<{ taskId: string; agentId: string }>;
   staleCleanup: {
@@ -157,6 +212,8 @@ export async function codeLevelTriage(): Promise<HeartbeatFindings> {
     stalledTasks: [],
     autoFailedTasks: [],
     autoResumedTasks: [],
+    pinnedResumes: [],
+    escalatedReroutes: [],
     workerHealthFixes: [],
     autoAssigned: [],
     staleCleanup: {
@@ -353,9 +410,20 @@ function remediateCrashedWorkerTask(
       agentId: task.agentId,
       reason: opts.supersedeReason,
     });
-    console.log(
-      `[Heartbeat] Auto-superseded task ${task.id.slice(0, 8)} — created resume ${resume.task.id.slice(0, 8)} (${opts.shortLabel})`,
-    );
+    // Phase 1 (DES-523): when the resume pinned back to the original
+    // (stable-ID) agent, record it so the sweep summary surfaces the pin
+    // rather than a silent pool fallback. `createResumeFollowUp` sets the
+    // resume's `agentId` to the original only on the crash_recovery pin path.
+    if (resume.task.agentId === task.agentId) {
+      findings.pinnedResumes.push({ taskId: resume.task.id, agentId: task.agentId });
+      console.log(
+        `[Heartbeat] Auto-superseded task ${task.id.slice(0, 8)} — pinned resume ${resume.task.id.slice(0, 8)} to original agent ${task.agentId.slice(0, 8)} (${opts.shortLabel})`,
+      );
+    } else {
+      console.log(
+        `[Heartbeat] Auto-superseded task ${task.id.slice(0, 8)} — created resume ${resume.task.id.slice(0, 8)} in unassigned pool (${opts.shortLabel})`,
+      );
+    }
   } else {
     const reason =
       resume.kind === "skipped"
@@ -558,6 +626,113 @@ function autoAssignPoolTasks(findings: HeartbeatFindings): void {
   })();
 }
+/**
+ * Reaper (DES-523 Phase 3): escalate crash-recovery resumes that were pinned to
+ * their original agent (Phase 1) but never reclaimed within
+ * `HEARTBEAT_RESUME_PIN_GRACE_MIN`. This is the ONLY path to the Lead decision —
+ * "gone" can't be told from "restarting" at crash-detection time, so Phase 1
+ * pins optimistically and this reaper decides "gone" once a pin demonstrably
+ * fails to be reclaimed. After this runs, the heartbeat crash path never touches
+ * the unassigned pool.
+ *
+ * Wired into `cleanupStaleResources`, so it runs on every sweep — including the
+ * cleanup-only preflight-bail path and the first post-reboot sweep — and a
+ * pending pin is reaped even when the system otherwise looks idle.
+ */
+function escalateUnreclaimedResumes(findings: HeartbeatFindings): void {
+  // Grace 0 = reaper disabled (rollback switch).
+  if (HEARTBEAT_RESUME_PIN_GRACE_MIN <= 0) return;
+  const stale = getStalePinnedResumes(HEARTBEAT_RESUME_PIN_GRACE_MIN);
+  if (stale.length === 0) return;
+  // A non-offline Lead is required to re-delegate. Without one (none registered,
+  // or the only lead is `offline` after POST /close), leave escalation candidates
+  // `pending` rather than cancel the pin and hand the decision to an agent that
+  // can't poll it (which would strand the work). The budget-exhaustion path below
+  // is independent of the Lead and still runs. `getLeadAgent` already prefers a
+  // non-offline lead, so this also guards the createRerouteDecisionTask assignment.
+  const lead = getLeadAgent();
+  const hasLead = lead != null && lead.status !== "offline";
+  for (const resume of stale) {
+    if (!resume.parentTaskId) continue; // Defensive — resumes always have a parent.
+    // Budget guard: a resume already at the generation cap must NOT spawn another
+    // Lead re-delegation (send-task does not enforce the generation tag, so a
+    // flapping task could loop forever). Terminalize and stop. Atomic, so we
+    // never kill a resume the agent just reclaimed in the gap.
+    if (getResumeGeneration(resume) >= MAX_RESUME_GENERATIONS) {
+      const failed = failPendingResumeIfUnclaimed(
+        resume.id,
+        "failed",
+        RESUME_BUDGET_EXHAUSTED_REASON,
+      );
+      if (failed) {
+        console.warn(
+          `[Heartbeat] Unreclaimed pinned resume ${resume.id.slice(0, 8)} hit the resume-generation cap — terminalized, no Lead decision`,
+        );
+      }
+      continue;
+    }
+    if (!hasLead) continue; // No lead → leave the pin pending; nothing to escalate to.
+    const original = getTaskById(resume.parentTaskId);
+    if (!original) continue; // Parent gone — nothing to escalate against.
+    // Escalate atomically: terminalize the pin + repoint the tracker link
+    // (original → R1 at pin time; R1 is now dead, so move it back so the Lead's
+    // re-delegated resume inherits it via send-task) + create the Lead decision,
+    // all in ONE transaction. A mid-sequence process death therefore can't leave
+    // the pin cancelled with no Lead signal (which would orphan the work — it is
+    // invisible to both the stall detector and this reaper afterward).
+    //  - The conditional terminalize still returns null if the agent reclaimed
+    //    the pin in the gap → abort with no writes and skip (TOCTOU guard).
+    //  - If the decision can't be created (unexpected — hasLead is checked and a
+    //    still-`pending` pin implies no prior decision), throw to roll back the
+    //    cancel so the pin is retried next sweep instead of being stranded.
+    let escalation: { decisionTaskId: string } | null = null;
+    try {
+      escalation = getDb().transaction(() => {
+        const terminalized = failPendingResumeIfUnclaimed(
+          resume.id,
+          "cancelled",
+          "pin_unreclaimed_escalated",
+        );
+        if (!terminalized) return null; // reclaimed in the gap — no writes made
+        repointTrackerSyncBySwarmId(resume.id, original.id);
+        const decision = createRerouteDecisionTask({
+          original,
+          staleResume: resume,
+          reason: "crash_recovery",
+          maxGenerations: MAX_RESUME_GENERATIONS,
+        });
+        if (decision.kind !== "created") {
+          throw new Error(`reroute-decision not created: ${decision.reason}`);
+        }
+        return { decisionTaskId: decision.task.id };
+      })();
+    } catch (err) {
+      console.warn(
+        `[Heartbeat] Reroute escalation rolled back for resume ${resume.id.slice(0, 8)} — ${
+          err instanceof Error ? err.message : String(err)
+        }; pin left pending for the next sweep`,
+      );
+      continue;
+    }
+    if (!escalation) continue; // agent reclaimed the pin in the gap
+    findings.escalatedReroutes.push({
+      originalTaskId: original.id,
+      decisionTaskId: escalation.decisionTaskId,
+    });
+    console.log(
+      `[Heartbeat] Escalated unreclaimed pinned resume ${resume.id.slice(0, 8)} → Lead reroute-decision ${escalation.decisionTaskId.slice(0, 8)} (original ${original.id.slice(0, 8)})`,
+    );
+  }
+}
 /**
  * Call existing stale resource cleanup functions.
  */
@@ -572,6 +747,9 @@ async function cleanupStaleResources(findings: HeartbeatFindings): Promise<void>
   findings.staleCleanup.inboxProcessing = releaseStaleProcessingInbox(
     STALE_CLEANUP_THRESHOLD_MINUTES,
   );
+  // DES-523 Phase 3: escalate pinned crash-recovery resumes that were never
+  // reclaimed within the grace window to a Lead re-delegation decision.
+  escalateUnreclaimedResumes(findings);
   try {
     findings.staleCleanup.workflowRuns = await recoverIncompleteRuns(getExecutorRegistry());
   } catch {
@@ -854,6 +1032,8 @@ export async function runHeartbeatSweep(): Promise<void> {
         stalledTasks: [],
         autoFailedTasks: [],
         autoResumedTasks: [],
+        pinnedResumes: [],
+        escalatedReroutes: [],
         workerHealthFixes: [],
         autoAssigned: [],
         staleCleanup: {
@@ -891,6 +1071,12 @@ function logFindings(findings: HeartbeatFindings): void {
   if (findings.autoResumedTasks.length > 0) {
     parts.push(`auto_resumed=${findings.autoResumedTasks.length}`);
   }
+  if (findings.pinnedResumes.length > 0) {
+    parts.push(`pinned_resumes=${findings.pinnedResumes.length}`);
+  }
+  if (findings.escalatedReroutes.length > 0) {
+    parts.push(`escalated_reroutes=${findings.escalatedReroutes.length}`);
+  }
   if (findings.stalledTasks.length > 0) {
     parts.push(`stalled=${findings.stalledTasks.length}`);
   }