npm - @cleocode/core - Versions diffs - 2026.3.58 → 2026.3.60 - Mend

@cleocode/core 2026.3.58 → 2026.3.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

package/dist/agents/agent-registry.d.ts +206 -0
package/dist/agents/agent-registry.d.ts.map +1 -0
package/dist/agents/agent-registry.js +288 -0
package/dist/agents/agent-registry.js.map +1 -0
package/dist/agents/agent-schema.js +5 -0
package/dist/agents/agent-schema.js.map +1 -1
package/dist/agents/execution-learning.js +474 -0
package/dist/agents/execution-learning.js.map +1 -0
package/dist/agents/health-monitor.d.ts +161 -0
package/dist/agents/health-monitor.d.ts.map +1 -0
package/dist/agents/health-monitor.js +217 -0
package/dist/agents/health-monitor.js.map +1 -0
package/dist/agents/index.d.ts +3 -1
package/dist/agents/index.d.ts.map +1 -1
package/dist/agents/index.js +9 -1
package/dist/agents/index.js.map +1 -1
package/dist/agents/retry.d.ts +57 -4
package/dist/agents/retry.d.ts.map +1 -1
package/dist/agents/retry.js +57 -4
package/dist/agents/retry.js.map +1 -1
package/dist/backfill/index.d.ts +27 -0
package/dist/backfill/index.d.ts.map +1 -1
package/dist/backfill/index.js +229 -0
package/dist/backfill/index.js.map +1 -0
package/dist/bootstrap.d.ts +2 -1
package/dist/bootstrap.d.ts.map +1 -1
package/dist/bootstrap.js +135 -28
package/dist/bootstrap.js.map +1 -1
package/dist/cleo.d.ts +40 -0
package/dist/cleo.d.ts.map +1 -1
package/dist/config.js +83 -0
package/dist/config.js.map +1 -1
package/dist/index.d.ts +1 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1036 -536
package/dist/index.js.map +4 -4
package/dist/intelligence/adaptive-validation.js +497 -0
package/dist/intelligence/adaptive-validation.js.map +1 -0
package/dist/intelligence/impact.d.ts +34 -1
package/dist/intelligence/impact.d.ts.map +1 -1
package/dist/intelligence/impact.js +176 -0
package/dist/intelligence/impact.js.map +1 -1
package/dist/intelligence/index.d.ts +2 -2
package/dist/intelligence/index.d.ts.map +1 -1
package/dist/intelligence/index.js +6 -1
package/dist/intelligence/index.js.map +1 -1
package/dist/intelligence/types.d.ts +60 -0
package/dist/intelligence/types.d.ts.map +1 -1
package/dist/internal.d.ts +5 -4
package/dist/internal.d.ts.map +1 -1
package/dist/internal.js +11 -2
package/dist/internal.js.map +1 -1
package/dist/lib/index.d.ts +10 -0
package/dist/lib/index.d.ts.map +1 -0
package/dist/lib/index.js +10 -0
package/dist/lib/index.js.map +1 -0
package/dist/lib/retry.d.ts +128 -0
package/dist/lib/retry.d.ts.map +1 -0
package/dist/lib/retry.js +152 -0
package/dist/lib/retry.js.map +1 -0
package/dist/nexus/sharing/index.d.ts +48 -2
package/dist/nexus/sharing/index.d.ts.map +1 -1
package/dist/nexus/sharing/index.js +110 -1
package/dist/nexus/sharing/index.js.map +1 -1
package/dist/scaffold.d.ts.map +1 -1
package/dist/scaffold.js +22 -2
package/dist/scaffold.js.map +1 -1
package/dist/sessions/session-enforcement.js +4 -0
package/dist/sessions/session-enforcement.js.map +1 -1
package/dist/stats/index.js +2 -0
package/dist/stats/index.js.map +1 -1
package/dist/stats/workflow-telemetry.d.ts +15 -0
package/dist/stats/workflow-telemetry.d.ts.map +1 -1
package/dist/stats/workflow-telemetry.js +400 -0
package/dist/stats/workflow-telemetry.js.map +1 -0
package/dist/store/brain-schema.js +4 -1
package/dist/store/brain-schema.js.map +1 -1
package/dist/store/converters.js +2 -0
package/dist/store/converters.js.map +1 -1
package/dist/store/cross-db-cleanup.d.ts +35 -0
package/dist/store/cross-db-cleanup.d.ts.map +1 -1
package/dist/store/cross-db-cleanup.js +169 -0
package/dist/store/cross-db-cleanup.js.map +1 -0
package/dist/store/db-helpers.js +2 -0
package/dist/store/db-helpers.js.map +1 -1
package/dist/store/migration-sqlite.js +5 -0
package/dist/store/migration-sqlite.js.map +1 -1
package/dist/store/sqlite-data-accessor.js +20 -28
package/dist/store/sqlite-data-accessor.js.map +1 -1
package/dist/store/sqlite.js +13 -2
package/dist/store/sqlite.js.map +1 -1
package/dist/store/task-store.js +4 -0
package/dist/store/task-store.js.map +1 -1
package/dist/store/tasks-schema.js +50 -20
package/dist/store/tasks-schema.js.map +1 -1
package/dist/tasks/add.js +87 -3
package/dist/tasks/add.js.map +1 -1
package/dist/tasks/complete.d.ts.map +1 -1
package/dist/tasks/complete.js +15 -4
package/dist/tasks/complete.js.map +1 -1
package/dist/tasks/enforcement.d.ts.map +1 -1
package/dist/tasks/enforcement.js +8 -1
package/dist/tasks/enforcement.js.map +1 -1
package/dist/tasks/epic-enforcement.d.ts +61 -0
package/dist/tasks/epic-enforcement.d.ts.map +1 -1
package/dist/tasks/epic-enforcement.js +294 -0
package/dist/tasks/epic-enforcement.js.map +1 -0
package/dist/tasks/index.js +1 -1
package/dist/tasks/index.js.map +1 -1
package/dist/tasks/pipeline-stage.d.ts +70 -1
package/dist/tasks/pipeline-stage.d.ts.map +1 -1
package/dist/tasks/pipeline-stage.js +248 -0
package/dist/tasks/pipeline-stage.js.map +1 -0
package/dist/tasks/update.js +28 -0
package/dist/tasks/update.js.map +1 -1
package/package.json +5 -5
package/schemas/config.schema.json +37 -1547
package/src/__tests__/sharing.test.ts +24 -0
package/src/agents/__tests__/agent-registry.test.ts +351 -0
package/src/agents/__tests__/health-monitor.test.ts +332 -0
package/src/agents/agent-registry.ts +394 -0
package/src/agents/health-monitor.ts +279 -0
package/src/agents/index.ts +24 -1
package/src/agents/retry.ts +57 -4
package/src/backfill/index.ts +27 -0
package/src/bootstrap.ts +171 -30
package/src/cleo.ts +103 -2
package/src/config.ts +3 -3
package/src/index.ts +1 -0
package/src/intelligence/__tests__/impact.test.ts +165 -1
package/src/intelligence/impact.ts +203 -0
package/src/intelligence/index.ts +3 -0
package/src/intelligence/types.ts +76 -0
package/src/internal.ts +20 -0
package/src/lib/__tests__/retry.test.ts +321 -0
package/src/lib/index.ts +16 -0
package/src/lib/retry.ts +224 -0
package/src/nexus/sharing/index.ts +142 -2
package/src/scaffold.ts +24 -2
package/src/stats/workflow-telemetry.ts +15 -0
package/src/store/__tests__/session-store.test.ts +43 -7
package/src/store/__tests__/task-store.test.ts +1 -1
package/src/store/__tests__/test-db-helper.ts +7 -3
package/src/store/cross-db-cleanup.ts +35 -0
package/src/tasks/__tests__/epic-enforcement.test.ts +9 -4
package/src/tasks/__tests__/minimal-test.test.ts +2 -2
package/src/tasks/__tests__/update.test.ts +25 -25
package/src/tasks/complete.ts +11 -6
package/src/tasks/enforcement.ts +6 -3
package/src/tasks/epic-enforcement.ts +61 -0
package/src/tasks/pipeline-stage.ts +70 -1
package/templates/config.template.json +5 -116
package/templates/global-config.template.json +2 -44

package/src/agents/agent-registry.ts ADDED Viewed

@@ -0,0 +1,394 @@
+/**
+ * Agent registry with capacity tracking for load balancing.
+ *
+ * Provides task-count-based capacity queries, specialization lookup,
+ * and performance recording on top of the existing `agent_instances` schema.
+ *
+ * Capacity model: each agent has a maximum of {@link MAX_TASKS_PER_AGENT}
+ * concurrent tasks. "Remaining capacity" is that constant minus the number of
+ * tasks currently assigned to an active agent instance.
+ *
+ * Specializations are stored as a `specializations` array inside the agent's
+ * `metadata_json` column. Use {@link updateAgentSpecializations} to write them.
+ *
+ * Performance recording delegates to the existing `recordAgentExecution`
+ * function in `execution-learning.ts` and wraps it with a simpler metrics
+ * interface suited for load-balancer callers.
+ *
+ * @module agents/agent-registry
+ * @task T041
+ * @epic T038
+ */
+import { and, eq, inArray } from 'drizzle-orm';
+import { getDb } from '../store/sqlite.js';
+import { type AgentInstanceRow, type AgentType, agentInstances } from './agent-schema.js';
+import {
+  type AgentExecutionEvent,
+  type AgentExecutionOutcome,
+  recordAgentExecution,
+} from './execution-learning.js';
+import { listAgentInstances } from './registry.js';
+// ============================================================================
+// Constants
+// ============================================================================
+/**
+ * Maximum number of tasks that can be concurrently assigned to one agent.
+ * Used as the upper bound for task-count-based capacity calculation.
+ */
+export const MAX_TASKS_PER_AGENT = 5;
+// ============================================================================
+// Types
+// ============================================================================
+/**
+ * Task-count-based capacity for a single agent instance.
+ */
+export interface AgentCapacity {
+  /** Agent instance ID. */
+  agentId: string;
+  /** Agent type classification. */
+  agentType: AgentType;
+  /** Current status of the agent. */
+  status: AgentInstanceRow['status'];
+  /** Number of tasks currently assigned to this agent. */
+  activeTasks: number;
+  /** Number of additional tasks this agent can accept (max - active). */
+  remainingCapacity: number;
+  /** Maximum tasks this agent can hold ({@link MAX_TASKS_PER_AGENT}). */
+  maxCapacity: number;
+  /** Whether this agent can accept new tasks. */
+  available: boolean;
+}
+/**
+ * Metrics provided when recording agent performance.
+ */
+export interface AgentPerformanceMetrics {
+  /** Task ID that was processed. */
+  taskId: string;
+  /** Task type label (e.g. "epic", "task", "subtask"). */
+  taskType: string;
+  /** Outcome of the agent's work on the task. */
+  outcome: AgentExecutionOutcome;
+  /** Optional task labels for richer pattern classification. */
+  taskLabels?: string[];
+  /** Session ID the agent was operating under. */
+  sessionId?: string;
+  /** Duration of execution in milliseconds. */
+  durationMs?: number;
+  /** Error message if outcome is "failure". */
+  errorMessage?: string;
+  /** Error classification if outcome is "failure". */
+  errorType?: 'retriable' | 'permanent' | 'unknown';
+}
+// ============================================================================
+// Capacity queries
+// ============================================================================
+/**
+ * Get task-count-based remaining capacity for an agent.
+ *
+ * Remaining capacity = {@link MAX_TASKS_PER_AGENT} minus the number of tasks
+ * currently routed to this agent instance (tracked via the `task_id` column
+ * on `agent_instances` — each instance handles one task at a time; child agents
+ * spawned by an orchestrator appear as sibling rows referencing the same
+ * `parent_agent_id`).
+ *
+ * For capacity purposes the "active tasks" count is derived from the number of
+ * non-terminal sibling rows that share the same `parent_agent_id` as this
+ * agent, plus 1 for the agent's own current task when `task_id` is set.
+ *
+ * @remarks
+ * Agents in terminal states (`stopped`, `crashed`) always return 0 remaining
+ * capacity because they cannot accept work.
+ *
+ * @param agentId - Agent instance ID (agt_...) to check
+ * @param cwd - Working directory used to resolve tasks.db path
+ * @returns Capacity breakdown or null if the agent does not exist
+ *
+ * @example
+ * ```ts
+ * const cap = await getAgentCapacity('agt_20260321120000_ab12cd', '/project');
+ * if (cap && cap.available) {
+ *   console.log(`Agent can take ${cap.remainingCapacity} more tasks`);
+ * }
+ * ```
+ */
+export async function getAgentCapacity(
+  agentId: string,
+  cwd?: string,
+): Promise<AgentCapacity | null> {
+  const db = await getDb(cwd);
+  const agent = await db.select().from(agentInstances).where(eq(agentInstances.id, agentId)).get();
+  if (!agent) return null;
+  // Terminal agents have zero capacity
+  const isTerminal = agent.status === 'stopped' || agent.status === 'crashed';
+  if (isTerminal) {
+    return {
+      agentId: agent.id,
+      agentType: agent.agentType,
+      status: agent.status,
+      activeTasks: 0,
+      remainingCapacity: 0,
+      maxCapacity: MAX_TASKS_PER_AGENT,
+      available: false,
+    };
+  }
+  // Count active child agents (subtasks delegated by this agent)
+  const children = await db
+    .select({ id: agentInstances.id })
+    .from(agentInstances)
+    .where(
+      and(
+        eq(agentInstances.parentAgentId, agentId),
+        inArray(agentInstances.status, ['starting', 'active', 'idle', 'error']),
+      ),
+    )
+    .all();
+  // The agent itself counts as 1 active task when it has a task assigned
+  const selfTask = agent.taskId != null ? 1 : 0;
+  const activeTasks = selfTask + children.length;
+  const remainingCapacity = Math.max(0, MAX_TASKS_PER_AGENT - activeTasks);
+  return {
+    agentId: agent.id,
+    agentType: agent.agentType,
+    status: agent.status,
+    activeTasks,
+    remainingCapacity,
+    maxCapacity: MAX_TASKS_PER_AGENT,
+    available: remainingCapacity > 0,
+  };
+}
+/**
+ * List all non-terminal agents sorted by remaining task capacity (descending).
+ *
+ * Returns agents with the most available slots first, enabling callers to
+ * select the least-loaded agent for new work assignment.
+ *
+ * @remarks
+ * Only agents in `active` or `idle` states are included — `starting` agents
+ * are excluded because they may not yet be ready to accept work.
+ * Terminal agents (`stopped`, `crashed`) are always omitted.
+ *
+ * @param agentType - Optional filter to limit results to one agent type
+ * @param cwd - Working directory used to resolve tasks.db path
+ * @returns Array of capacity entries sorted highest remaining capacity first
+ *
+ * @example
+ * ```ts
+ * const agents = await getAgentsByCapacity('executor', '/project');
+ * const best = agents[0]; // most available slots
+ * if (best && best.available) {
+ *   await assignTask(best.agentId, taskId);
+ * }
+ * ```
+ */
+export async function getAgentsByCapacity(
+  agentType?: AgentType,
+  cwd?: string,
+): Promise<AgentCapacity[]> {
+  const filters: Parameters<typeof listAgentInstances>[0] = agentType
+    ? { status: ['active', 'idle'] as ('active' | 'idle')[], agentType }
+    : { status: ['active', 'idle'] as ('active' | 'idle')[] };
+  const activeAgents = await listAgentInstances(filters, cwd);
+  const capacities = await Promise.all(
+    activeAgents.map((agent) => getAgentCapacity(agent.id, cwd)),
+  );
+  return capacities
+    .filter((c): c is AgentCapacity => c !== null)
+    .sort((a, b) => b.remainingCapacity - a.remainingCapacity);
+}
+// ============================================================================
+// Specializations
+// ============================================================================
+/**
+ * Metadata shape stored in the agent_instances.metadata_json column.
+ * Only the subset relevant to specializations is typed here.
+ *
+ * @internal
+ */
+interface AgentMetadata {
+  specializations?: string[];
+  [key: string]: unknown;
+}
+/**
+ * Get the specialization/skills list for an agent.
+ *
+ * Specializations are stored as a string array under the `specializations`
+ * key in the agent's `metadata_json` column. An empty array is returned when
+ * the field is absent or the agent is not found.
+ *
+ * @remarks
+ * Write specializations with {@link updateAgentSpecializations} when
+ * registering or updating an agent. The metadata column is a free-form JSON
+ * blob — specializations are one namespaced key inside it.
+ *
+ * @param agentId - Agent instance ID (agt_...)
+ * @param cwd - Working directory used to resolve tasks.db path
+ * @returns Array of specialization strings (empty if none recorded)
+ *
+ * @example
+ * ```ts
+ * const skills = await getAgentSpecializations('agt_20260321120000_ab12cd', '/project');
+ * // ['typescript', 'testing', 'documentation']
+ * if (skills.includes('typescript')) {
+ *   console.log('Agent can handle TypeScript tasks');
+ * }
+ * ```
+ */
+export async function getAgentSpecializations(agentId: string, cwd?: string): Promise<string[]> {
+  const db = await getDb(cwd);
+  const agent = await db
+    .select({ metadataJson: agentInstances.metadataJson })
+    .from(agentInstances)
+    .where(eq(agentInstances.id, agentId))
+    .get();
+  if (!agent) return [];
+  try {
+    const meta = JSON.parse(agent.metadataJson ?? '{}') as AgentMetadata;
+    const specs = meta.specializations;
+    if (!Array.isArray(specs)) return [];
+    return specs.filter((s): s is string => typeof s === 'string');
+  } catch {
+    return [];
+  }
+}
+/**
+ * Update the specializations list stored in an agent's metadata.
+ *
+ * Merges the new list into the existing `metadata_json` object, preserving
+ * any other keys already present. Returns the updated specializations list,
+ * or null if the agent was not found.
+ *
+ * @remarks
+ * This is a write-side companion to {@link getAgentSpecializations}. Call it
+ * after {@link registerAgent} to record the skills an agent was spawned with.
+ *
+ * @param agentId - Agent instance ID (agt_...)
+ * @param specializations - New specializations list (replaces existing)
+ * @param cwd - Working directory used to resolve tasks.db path
+ * @returns Updated specializations list, or null if agent not found
+ *
+ * @example
+ * ```ts
+ * await updateAgentSpecializations(
+ *   'agt_20260321120000_ab12cd',
+ *   ['typescript', 'testing'],
+ *   '/project',
+ * );
+ * ```
+ */
+export async function updateAgentSpecializations(
+  agentId: string,
+  specializations: string[],
+  cwd?: string,
+): Promise<string[] | null> {
+  const db = await getDb(cwd);
+  const agent = await db
+    .select({ metadataJson: agentInstances.metadataJson })
+    .from(agentInstances)
+    .where(eq(agentInstances.id, agentId))
+    .get();
+  if (!agent) return null;
+  let existing: AgentMetadata = {};
+  try {
+    existing = JSON.parse(agent.metadataJson ?? '{}') as AgentMetadata;
+  } catch {
+    // Proceed with empty object if metadata is unparseable
+  }
+  const updated: AgentMetadata = { ...existing, specializations };
+  await db
+    .update(agentInstances)
+    .set({ metadataJson: JSON.stringify(updated) })
+    .where(eq(agentInstances.id, agentId));
+  return specializations;
+}
+// ============================================================================
+// Performance recording
+// ============================================================================
+/**
+ * Record agent performance metrics to the BRAIN execution history.
+ *
+ * Translates a simplified {@link AgentPerformanceMetrics} object into the
+ * {@link AgentExecutionEvent} format expected by `execution-learning.ts` and
+ * delegates to {@link recordAgentExecution}. The agent type is resolved from
+ * the `agent_instances` table so callers only need to supply the agent ID.
+ *
+ * @remarks
+ * Recording is best-effort — if brain.db is unavailable the error is swallowed
+ * and null is returned, consistent with the rest of the execution-learning
+ * module. Agent lifecycle code is never disrupted by a brain write failure.
+ *
+ * @param agentId - Agent instance ID whose performance is being recorded
+ * @param metrics - Performance metrics for the task that was processed
+ * @param cwd - Working directory used to resolve tasks.db and brain.db paths
+ * @returns The brain decision ID if recorded, null on failure or not found
+ *
+ * @example
+ * ```ts
+ * const decisionId = await recordAgentPerformance('agt_20260321120000_ab12cd', {
+ *   taskId: 'T041',
+ *   taskType: 'task',
+ *   outcome: 'success',
+ *   durationMs: 4200,
+ *   sessionId: 'ses_20260321_abc',
+ * }, '/project');
+ * ```
+ */
+export async function recordAgentPerformance(
+  agentId: string,
+  metrics: AgentPerformanceMetrics,
+  cwd?: string,
+): Promise<string | null> {
+  const db = await getDb(cwd);
+  const agent = await db
+    .select({ agentType: agentInstances.agentType, sessionId: agentInstances.sessionId })
+    .from(agentInstances)
+    .where(eq(agentInstances.id, agentId))
+    .get();
+  if (!agent) return null;
+  const event: AgentExecutionEvent = {
+    agentId,
+    agentType: agent.agentType,
+    taskId: metrics.taskId,
+    taskType: metrics.taskType,
+    outcome: metrics.outcome,
+    taskLabels: metrics.taskLabels,
+    sessionId: metrics.sessionId ?? agent.sessionId ?? undefined,
+    durationMs: metrics.durationMs,
+    errorMessage: metrics.errorMessage,
+    errorType: metrics.errorType,
+  };
+  const decision = await recordAgentExecution(event, cwd);
+  return decision?.id ?? null;
+}

package/src/agents/health-monitor.ts ADDED Viewed

@@ -0,0 +1,279 @@
+/**
+ * Agent Health Monitoring -- Heartbeat and crash detection for live agent instances.
+ *
+ * Provides the public-facing health API specified by T039:
+ *   - `recordHeartbeat`    — update last_heartbeat for a live agent
+ *   - `checkAgentHealth`   — check health of a specific agent by ID
+ *   - `detectStaleAgents`  — find agents whose heartbeat is older than threshold
+ *   - `detectCrashedAgents` — find active agents with no heartbeat for >3 min
+ *
+ * These functions delegate to the lower-level `registry.ts` primitives
+ * (`heartbeat`, `checkAgentHealth`, `listAgentInstances`) and add the
+ * named, task-spec-aligned surface required for T039.
+ *
+ * @module agents/health-monitor
+ * @task T039
+ * @epic T038
+ */
+import type { AgentInstanceRow, AgentInstanceStatus } from './agent-schema.js';
+import { heartbeat, listAgentInstances, markCrashed } from './registry.js';
+// ============================================================================
+// Constants
+// ============================================================================
+/** Default heartbeat interval (30 seconds) per BRAIN spec. */
+export const HEARTBEAT_INTERVAL_MS = 30_000;
+/** Default staleness threshold: 3 minutes without a heartbeat. */
+export const STALE_THRESHOLD_MS = 3 * 60_000;
+/** Statuses considered "alive" for health-check purposes. */
+const ALIVE_STATUSES: AgentInstanceStatus[] = ['starting', 'active', 'idle'];
+// ============================================================================
+// Types
+// ============================================================================
+/**
+ * Health status of a specific agent instance.
+ */
+export interface AgentHealthStatus {
+  /** Agent instance ID. */
+  agentId: string;
+  /** Current DB status. */
+  status: AgentInstanceStatus;
+  /** ISO timestamp of the last recorded heartbeat. */
+  lastHeartbeat: string;
+  /** Milliseconds since the last heartbeat (at call time). */
+  heartbeatAgeMs: number;
+  /** Whether the agent is considered healthy (heartbeat within threshold). */
+  healthy: boolean;
+  /** Whether the agent is considered stale (heartbeat older than threshold). */
+  stale: boolean;
+  /** Threshold used for staleness determination (ms). */
+  thresholdMs: number;
+}
+// ============================================================================
+// recordHeartbeat
+// ============================================================================
+/**
+ * Record a heartbeat for an agent instance.
+ *
+ * Updates `last_heartbeat` to the current time and returns the agent's
+ * current {@link AgentInstanceStatus}. Returns `null` if the agent does not
+ * exist or is already in a terminal state (`stopped` / `crashed`).
+ *
+ * This is the primary mechanism by which long-running agents signal liveness.
+ * Call this every {@link HEARTBEAT_INTERVAL_MS} (30 s) from the agent loop.
+ *
+ * @param agentId - The agent instance ID (e.g. `agt_20260322120000_a1b2c3`)
+ * @param cwd - Working directory used to resolve the tasks.db path (optional)
+ * @returns The agent's current status, or `null` if not found / terminal
+ *
+ * @remarks
+ * Terminal agents (`stopped`, `crashed`) will NOT have their heartbeat
+ * updated — the existing status is returned as-is so callers can detect
+ * external shutdown signals.
+ *
+ * @example
+ * ```ts
+ * // Inside the agent's main loop:
+ * const heartbeatTimer = setInterval(async () => {
+ *   const status = await recordHeartbeat(agentId);
+ *   if (status === 'stopped' || status === null) {
+ *     // Orchestrator shut us down — exit cleanly
+ *     clearInterval(heartbeatTimer);
+ *     process.exit(0);
+ *   }
+ * }, HEARTBEAT_INTERVAL_MS);
+ * ```
+ */
+export async function recordHeartbeat(
+  agentId: string,
+  cwd?: string,
+): Promise<AgentInstanceStatus | null> {
+  return heartbeat(agentId, cwd);
+}
+// ============================================================================
+// checkAgentHealth
+// ============================================================================
+/**
+ * Check the health of a specific agent instance by ID.
+ *
+ * Queries the agent's current record and returns a structured
+ * {@link AgentHealthStatus} describing staleness, heartbeat age, and
+ * whether the agent is considered healthy relative to `thresholdMs`.
+ *
+ * Returns `null` if the agent ID is not found in the database.
+ *
+ * @param agentId - The agent instance ID to check
+ * @param thresholdMs - Staleness threshold in milliseconds (default: 3 minutes)
+ * @param cwd - Working directory used to resolve the tasks.db path (optional)
+ * @returns {@link AgentHealthStatus} or `null` if the agent does not exist
+ *
+ * @remarks
+ * Returns null if the agent is not found. A non-null result includes
+ * staleness status based on the threshold comparison.
+ *
+ * @example
+ * ```ts
+ * const health = await checkAgentHealth('agt_20260322120000_a1b2c3');
+ * if (health && health.stale) {
+ *   console.log(`Agent stale for ${health.heartbeatAgeMs}ms — presumed crashed`);
+ * }
+ * ```
+ */
+export async function checkAgentHealth(
+  agentId: string,
+  thresholdMs: number = STALE_THRESHOLD_MS,
+  cwd?: string,
+): Promise<AgentHealthStatus | null> {
+  const all = await listAgentInstances(undefined, cwd);
+  const agent = all.find((a) => a.id === agentId);
+  if (!agent) return null;
+  return buildHealthStatus(agent, thresholdMs);
+}
+// ============================================================================
+// detectStaleAgents
+// ============================================================================
+/**
+ * Find all non-terminal agents whose last heartbeat is older than `thresholdMs`.
+ *
+ * "Stale" means an agent with status `starting`, `active`, or `idle` has
+ * not sent a heartbeat within the threshold window. This is a precursor to
+ * crash detection — a stale agent may still recover if it is under heavy load.
+ *
+ * Agents with status `stopped` or `crashed` are excluded — they are already
+ * in a terminal state and do not participate in the heartbeat protocol.
+ *
+ * @param thresholdMs - Staleness threshold in ms (default: 3 minutes / 180 000 ms)
+ * @param cwd - Working directory used to resolve the tasks.db path (optional)
+ * @returns Array of {@link AgentHealthStatus} for each stale agent, sorted by
+ *   heartbeat age descending (most-stale first)
+ *
+ * @remarks
+ * The default threshold matches the crash-detection window specified in T039:
+ * "timeout detection after 3 minutes".
+ *
+ * @example
+ * ```ts
+ * const stale = await detectStaleAgents();
+ * for (const s of stale) {
+ *   console.log(`${s.agentId} has been stale for ${s.heartbeatAgeMs / 1000}s`);
+ * }
+ * ```
+ */
+export async function detectStaleAgents(
+  thresholdMs: number = STALE_THRESHOLD_MS,
+  cwd?: string,
+): Promise<AgentHealthStatus[]> {
+  const agents = await listAgentInstances({ status: ALIVE_STATUSES }, cwd);
+  return agents
+    .map((a) => buildHealthStatus(a, thresholdMs))
+    .filter((s) => s.stale)
+    .sort((a, b) => b.heartbeatAgeMs - a.heartbeatAgeMs);
+}
+// ============================================================================
+// detectCrashedAgents
+// ============================================================================
+/**
+ * Find agents with status `active` whose heartbeat has been silent for
+ * longer than `thresholdMs`, and mark them as `crashed` in the database.
+ *
+ * An agent is considered crashed when it:
+ * 1. Has status `active` (not `idle`, `starting`, `stopped`, or `crashed`)
+ * 2. Has not sent a heartbeat for longer than `thresholdMs`
+ *
+ * Each detected agent is immediately marked `crashed` via {@link markCrashed},
+ * incrementing its error count and writing a reason to `agent_error_log`.
+ *
+ * @param thresholdMs - Crash threshold in ms (default: 3 minutes / 180 000 ms)
+ * @param cwd - Working directory used to resolve the tasks.db path (optional)
+ * @returns Array of agent instance rows for each agent that was just marked
+ *   `crashed`, sorted by last heartbeat ascending (oldest first).
+ *
+ * @remarks
+ * This function is WRITE-side: it mutates the database. Callers should run
+ * it on a schedule (e.g. every 60 s) from an orchestrator or health watchdog.
+ * For a read-only view, use {@link detectStaleAgents} instead.
+ *
+ * @example
+ * ```ts
+ * // Inside an orchestrator health watchdog:
+ * const crashed = await detectCrashedAgents();
+ * if (crashed.length > 0) {
+ *   logger.warn({ crashed: crashed.map(a => a.id) }, 'Agents marked crashed');
+ * }
+ * ```
+ */
+export async function detectCrashedAgents(
+  thresholdMs: number = STALE_THRESHOLD_MS,
+  cwd?: string,
+): Promise<AgentInstanceRow[]> {
+  // Only consider agents that are explicitly 'active' — idle/starting agents
+  // may not yet have established a regular heartbeat interval.
+  const activeAgents = await listAgentInstances({ status: 'active' }, cwd);
+  const cutoff = new Date(Date.now() - thresholdMs).toISOString();
+  const crashed: AgentInstanceRow[] = [];
+  for (const agent of activeAgents) {
+    if (agent.lastHeartbeat < cutoff) {
+      const updated = await markCrashed(
+        agent.id,
+        `Heartbeat timeout — no heartbeat for >${Math.round(thresholdMs / 1000)}s`,
+        cwd,
+      );
+      if (updated) {
+        crashed.push(updated);
+      }
+    }
+  }
+  // Sort oldest-heartbeat first (most severely stale)
+  crashed.sort((a, b) => {
+    const aHb = a.lastHeartbeat ?? '';
+    const bHb = b.lastHeartbeat ?? '';
+    return aHb < bHb ? -1 : aHb > bHb ? 1 : 0;
+  });
+  return crashed;
+}
+// ============================================================================
+// Internal helpers
+// ============================================================================
+/**
+ * Build an {@link AgentHealthStatus} from a raw agent row.
+ */
+function buildHealthStatus(agent: AgentInstanceRow, thresholdMs: number): AgentHealthStatus {
+  const lastHeartbeatMs = new Date(agent.lastHeartbeat).getTime();
+  const heartbeatAgeMs = Date.now() - lastHeartbeatMs;
+  const stale = ALIVE_STATUSES.includes(agent.status as AgentInstanceStatus)
+    ? heartbeatAgeMs > thresholdMs
+    : false;
+  const healthy = !stale && ALIVE_STATUSES.includes(agent.status as AgentInstanceStatus);
+  return {
+    agentId: agent.id,
+    status: agent.status as AgentInstanceStatus,
+    lastHeartbeat: agent.lastHeartbeat,
+    heartbeatAgeMs,
+    healthy,
+    stale,
+    thresholdMs,
+  };
+}