npm - clementine-agent - Versions diffs - 1.18.189 → 1.18.190 - Mend

clementine-agent 1.18.189 → 1.18.190

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/agent/background-tasks.d.ts +9 -0
package/dist/agent/background-tasks.js +14 -0
package/dist/agent/bg-orchestrator.d.ts +103 -0
package/dist/agent/bg-orchestrator.js +323 -0
package/dist/agent/bg-planner.d.ts +142 -0
package/dist/agent/bg-planner.js +321 -0
package/dist/gateway/cron-scheduler.d.ts +17 -0
package/dist/gateway/cron-scheduler.js +148 -1
package/dist/gateway/router.js +23 -7
package/dist/types.d.ts +10 -0
package/package.json +1 -1

package/dist/agent/background-tasks.d.ts CHANGED Viewed

@@ -26,12 +26,21 @@ export interface BackgroundTaskOptions {
 /**
  * Create a new pending task on disk and return it. Caller (the MCP tool)
  * doesn't await execution — the daemon picks the task up asynchronously.
+ *
+ * 1.18.190 — accepts the new chain fields (kind / chainId / planId /
+ * stepIndex / parentTaskId) for planner-orchestrator chained tasks. All
+ * are optional; callers that pass none get the legacy monolithic shape.
  */
 export declare function createBackgroundTask(input: {
     fromAgent: string;
     prompt: string;
     maxMinutes: number;
     sessionKey?: string;
+    kind?: BackgroundTask['kind'];
+    chainId?: string;
+    planId?: string;
+    stepIndex?: number;
+    parentTaskId?: string;
 }, opts?: BackgroundTaskOptions): BackgroundTask;
 /** Load a task by id, or null if not found / malformed. */
 export declare function loadBackgroundTask(id: string, opts?: BackgroundTaskOptions): BackgroundTask | null;

package/dist/agent/background-tasks.js CHANGED Viewed

@@ -45,6 +45,10 @@ function safeWrite(file, task) {
 /**
  * Create a new pending task on disk and return it. Caller (the MCP tool)
  * doesn't await execution — the daemon picks the task up asynchronously.
+ *
+ * 1.18.190 — accepts the new chain fields (kind / chainId / planId /
+ * stepIndex / parentTaskId) for planner-orchestrator chained tasks. All
+ * are optional; callers that pass none get the legacy monolithic shape.
  */
 export function createBackgroundTask(input, opts) {
     const now = new Date();
@@ -58,6 +62,16 @@ export function createBackgroundTask(input, opts) {
     };
     if (input.sessionKey)
         task.sessionKey = input.sessionKey;
+    if (input.kind)
+        task.kind = input.kind;
+    if (input.chainId)
+        task.chainId = input.chainId;
+    if (input.planId)
+        task.planId = input.planId;
+    if (typeof input.stepIndex === 'number')
+        task.stepIndex = input.stepIndex;
+    if (input.parentTaskId)
+        task.parentTaskId = input.parentTaskId;
     safeWrite(pathFor(task.id, opts), task);
     return task;
 }

package/dist/agent/bg-orchestrator.d.ts ADDED Viewed

@@ -0,0 +1,103 @@
+/**
+ * bg-orchestrator — drive a Plan from start to finish by queuing one
+ * bg-task per PlanStep, advancing the chain as each step completes.
+ *
+ * Why this exists (1.18.190)
+ * ──────────────────────────
+ * The bg-planner produces a Plan with 3-7 PlanSteps. This module is
+ * the runtime that executes that Plan, one step at a time, with each
+ * step getting its own fresh bg-task + worker context. The chain only
+ * advances when the previous step completed successfully — failures
+ * pause the chain and notify the owner.
+ *
+ * Architectural role:
+ *   bg-planner.ts → Plan (data)
+ *   bg-orchestrator.ts → drives the Plan (this module)
+ *   background-tasks.ts → bg-task persistence (filesystem)
+ *   run-agent-cron.ts → runs the actual SDK call for each step
+ *
+ * The orchestrator is NEVER the thing reading files or calling APIs.
+ * It's a state machine: read plan → queue next step → wait for step
+ * to finish → repeat. The state machine lives across daemon restarts
+ * because both Plans and BackgroundTasks are filesystem-persisted.
+ *
+ * Why this prevents the autocompact thrash that motivated 1.18.190:
+ *   - each step gets a FRESH bg-task with a FRESH 200K worker window
+ *   - state flows between steps via the project's STATUS.md and the
+ *     plan's `deliverable` fields, NOT via accumulated SDK context
+ *   - no single worker has to do more than ~2-6 tool calls before
+ *     completing its scoped deliverable
+ *   - the model's compaction pressure resets between steps
+ */
+import { createBackgroundTask } from './background-tasks.js';
+import { loadPlan, savePlan } from './bg-planner.js';
+import type { Plan, PlanStep } from './bg-planner.js';
+import type { BackgroundTask } from '../types.js';
+/**
+ * Queue the first step of a freshly-planned chain. Returns the
+ * BackgroundTask created for step 0.
+ *
+ * Caller responsibility: the Plan must already be persisted to disk
+ * (via savePlan) before calling this — the dispatched step task
+ * carries a planId that will be loaded back at execution time.
+ */
+export declare function dispatchChain(plan: Plan): BackgroundTask;
+/**
+ * Called by the bg-task framework when a chained step completes.
+ * Updates the plan's step status, then either:
+ *   - queues the next step (chain continues),
+ *   - marks the plan completed (no more steps), or
+ *   - pauses the chain (step failed; owner notification surfaces elsewhere).
+ *
+ * Returns the next BackgroundTask if one was queued, or null otherwise.
+ *
+ * Safe to call multiple times for the same completed task (idempotent
+ * via the step's status check).
+ */
+export declare function advanceChain(opts: {
+    completedTask: BackgroundTask;
+    /** Optional override for tests; defaults to filesystem. */
+    loadPlanFn?: typeof loadPlan;
+    savePlanFn?: typeof savePlan;
+    createTaskFn?: typeof createBackgroundTask;
+}): BackgroundTask | null;
+/**
+ * Pause a chain explicitly (e.g., owner intervention). The current
+ * running step is left alone — caller can mark it however the
+ * downstream cancellation flow does.
+ */
+export declare function pauseChain(planId: string, projectPath?: string | null, reason?: string): void;
+/**
+ * Resume a paused chain by dispatching its next pending step. If
+ * all steps are terminal, marks the plan completed. Returns the
+ * dispatched task, or null when nothing to dispatch.
+ */
+export declare function resumeChain(planId: string, projectPath?: string | null): BackgroundTask | null;
+/**
+ * Build the focused prompt for one chained worker. Designed to be SMALL
+ * (~1-2KB) and ANCHORED to the step's deliverable so the worker has a
+ * clear stopping condition. Key elements:
+ *   - The original user request (for context, not for re-doing it)
+ *   - The plan summary (what's been done, what's next)
+ *   - THIS step's scope + expected tools + deliverable
+ *   - Posture: "do ONLY this step. Don't overshoot. State your deliverable
+ *     in your final response."
+ *
+ * State that the next step might need is read by that step from the
+ * project STATUS.md or the prior step's deliverable file — NOT from
+ * this step's response text. Result text is for the orchestrator's
+ * advancement decision; deliverables are for the work itself.
+ */
+export declare function buildStepPrompt(plan: Plan, step: PlanStep): string;
+/**
+ * Given a chain step's taskId, derive the directory where the plan
+ * lives. Used by run-agent-cron to set the SDK's `cwd` and
+ * `additionalDirectories` to the project root for the step.
+ */
+export declare function projectDirForChainTask(task: BackgroundTask): string | undefined;
+/**
+ * Format a status line for posting to the originating chat after each
+ * step completes — gives the owner a real-time view of chain progress.
+ */
+export declare function formatChainStatusUpdate(plan: Plan, justCompletedStep: PlanStep): string;
+//# sourceMappingURL=bg-orchestrator.d.ts.map

package/dist/agent/bg-orchestrator.js ADDED Viewed

@@ -0,0 +1,323 @@
+/**
+ * bg-orchestrator — drive a Plan from start to finish by queuing one
+ * bg-task per PlanStep, advancing the chain as each step completes.
+ *
+ * Why this exists (1.18.190)
+ * ──────────────────────────
+ * The bg-planner produces a Plan with 3-7 PlanSteps. This module is
+ * the runtime that executes that Plan, one step at a time, with each
+ * step getting its own fresh bg-task + worker context. The chain only
+ * advances when the previous step completed successfully — failures
+ * pause the chain and notify the owner.
+ *
+ * Architectural role:
+ *   bg-planner.ts → Plan (data)
+ *   bg-orchestrator.ts → drives the Plan (this module)
+ *   background-tasks.ts → bg-task persistence (filesystem)
+ *   run-agent-cron.ts → runs the actual SDK call for each step
+ *
+ * The orchestrator is NEVER the thing reading files or calling APIs.
+ * It's a state machine: read plan → queue next step → wait for step
+ * to finish → repeat. The state machine lives across daemon restarts
+ * because both Plans and BackgroundTasks are filesystem-persisted.
+ *
+ * Why this prevents the autocompact thrash that motivated 1.18.190:
+ *   - each step gets a FRESH bg-task with a FRESH 200K worker window
+ *   - state flows between steps via the project's STATUS.md and the
+ *     plan's `deliverable` fields, NOT via accumulated SDK context
+ *   - no single worker has to do more than ~2-6 tool calls before
+ *     completing its scoped deliverable
+ *   - the model's compaction pressure resets between steps
+ */
+import path from 'node:path';
+import pino from 'pino';
+import { createBackgroundTask } from './background-tasks.js';
+import { loadPlan, savePlan } from './bg-planner.js';
+const logger = pino({ name: 'clementine.bg-orchestrator' });
+// ── Public API ───────────────────────────────────────────────────────
+/**
+ * Queue the first step of a freshly-planned chain. Returns the
+ * BackgroundTask created for step 0.
+ *
+ * Caller responsibility: the Plan must already be persisted to disk
+ * (via savePlan) before calling this — the dispatched step task
+ * carries a planId that will be loaded back at execution time.
+ */
+export function dispatchChain(plan) {
+    if (!plan.steps.length) {
+        throw new Error(`Cannot dispatch chain: plan ${plan.id} has zero steps`);
+    }
+    const firstStep = plan.steps[0];
+    const task = createBackgroundTask({
+        fromAgent: 'clementine',
+        prompt: buildStepPrompt(plan, firstStep),
+        maxMinutes: 30, // generous per-step; the step itself decides how long it needs
+        ...(plan.originatingSessionKey ? { sessionKey: plan.originatingSessionKey } : {}),
+        kind: 'step',
+        chainId: plan.chainId,
+        planId: plan.id,
+        stepIndex: 0,
+    });
+    // Stamp the step with its taskId + mark plan as in_progress so future
+    // resumes don't try to re-dispatch the same step.
+    firstStep.taskId = task.id;
+    firstStep.status = 'running';
+    plan.status = 'in_progress';
+    savePlan(plan, plan.projectPath);
+    logger.info({
+        planId: plan.id,
+        chainId: plan.chainId,
+        stepIndex: 0,
+        stepTitle: firstStep.title,
+        taskId: task.id,
+    }, 'dispatchChain: queued step 0');
+    return task;
+}
+/**
+ * Called by the bg-task framework when a chained step completes.
+ * Updates the plan's step status, then either:
+ *   - queues the next step (chain continues),
+ *   - marks the plan completed (no more steps), or
+ *   - pauses the chain (step failed; owner notification surfaces elsewhere).
+ *
+ * Returns the next BackgroundTask if one was queued, or null otherwise.
+ *
+ * Safe to call multiple times for the same completed task (idempotent
+ * via the step's status check).
+ */
+export function advanceChain(opts) {
+    const { completedTask } = opts;
+    if (!completedTask.planId || typeof completedTask.stepIndex !== 'number') {
+        logger.debug({ taskId: completedTask.id }, 'advanceChain: task has no plan id/step index — not a chain step');
+        return null;
+    }
+    const loadPlanImpl = opts.loadPlanFn ?? loadPlan;
+    const savePlanImpl = opts.savePlanFn ?? savePlan;
+    const createTaskImpl = opts.createTaskFn ?? createBackgroundTask;
+    // Plans live alongside the project when one's set; the task carries
+    // the planId but not the project path. Try the project path first
+    // (fast path), then fall back to the global plans dir inside
+    // loadPlan itself.
+    const plan = loadPlanImpl(completedTask.planId, undefined);
+    if (!plan) {
+        logger.warn({ planId: completedTask.planId, taskId: completedTask.id }, 'advanceChain: plan not found — cannot advance');
+        return null;
+    }
+    const step = plan.steps[completedTask.stepIndex];
+    if (!step) {
+        logger.warn({ planId: plan.id, stepIndex: completedTask.stepIndex }, 'advanceChain: step index out of range');
+        return null;
+    }
+    // Idempotency: if the step has already been marked terminal, don't
+    // advance again. Protects against duplicate completion callbacks.
+    if (step.status === 'done' || step.status === 'failed' || step.status === 'skipped') {
+        logger.debug({ planId: plan.id, stepIndex: step.index, status: step.status }, 'advanceChain: step already terminal — skipping');
+        return null;
+    }
+    // Reflect the task's terminal status onto the plan step.
+    if (completedTask.status === 'done') {
+        step.status = 'done';
+        step.completedAt = completedTask.completedAt ?? new Date().toISOString();
+        step.resultPreview = (completedTask.result ?? '').slice(0, 400);
+    }
+    else {
+        // failed | aborted | interrupted — all map to plan-step failure for now
+        step.status = 'failed';
+        step.completedAt = completedTask.completedAt ?? new Date().toISOString();
+        step.resultPreview = (completedTask.error ?? completedTask.result ?? '').slice(0, 400);
+        plan.status = 'paused';
+        savePlanImpl(plan, plan.projectPath);
+        logger.warn({
+            planId: plan.id,
+            chainId: plan.chainId,
+            stepIndex: step.index,
+            stepTitle: step.title,
+            taskStatus: completedTask.status,
+            error: completedTask.error,
+        }, 'advanceChain: step failed — chain paused');
+        return null;
+    }
+    // Look for the next pending step.
+    const nextStep = plan.steps.find((s, i) => i > step.index && s.status === 'pending');
+    if (!nextStep) {
+        // Chain complete!
+        plan.status = 'completed';
+        savePlanImpl(plan, plan.projectPath);
+        logger.info({
+            planId: plan.id,
+            chainId: plan.chainId,
+            stepCount: plan.steps.length,
+        }, 'advanceChain: chain completed');
+        return null;
+    }
+    // Queue the next step.
+    const nextTask = createTaskImpl({
+        fromAgent: 'clementine',
+        prompt: buildStepPrompt(plan, nextStep),
+        maxMinutes: 30,
+        ...(plan.originatingSessionKey ? { sessionKey: plan.originatingSessionKey } : {}),
+        kind: 'step',
+        chainId: plan.chainId,
+        planId: plan.id,
+        stepIndex: nextStep.index,
+        parentTaskId: completedTask.id,
+    });
+    nextStep.taskId = nextTask.id;
+    nextStep.status = 'running';
+    savePlanImpl(plan, plan.projectPath);
+    logger.info({
+        planId: plan.id,
+        chainId: plan.chainId,
+        stepIndex: nextStep.index,
+        stepTitle: nextStep.title,
+        taskId: nextTask.id,
+        parentTaskId: completedTask.id,
+    }, 'advanceChain: queued next step');
+    return nextTask;
+}
+/**
+ * Pause a chain explicitly (e.g., owner intervention). The current
+ * running step is left alone — caller can mark it however the
+ * downstream cancellation flow does.
+ */
+export function pauseChain(planId, projectPath, reason) {
+    const plan = loadPlan(planId, projectPath ?? undefined);
+    if (!plan)
+        return;
+    plan.status = 'paused';
+    if (reason)
+        plan.notes = `${plan.notes ? plan.notes + '\n' : ''}[paused] ${reason}`;
+    savePlan(plan, plan.projectPath);
+    logger.info({ planId, reason }, 'pauseChain: chain paused');
+}
+/**
+ * Resume a paused chain by dispatching its next pending step. If
+ * all steps are terminal, marks the plan completed. Returns the
+ * dispatched task, or null when nothing to dispatch.
+ */
+export function resumeChain(planId, projectPath) {
+    const plan = loadPlan(planId, projectPath ?? undefined);
+    if (!plan)
+        return null;
+    if (plan.status === 'completed')
+        return null;
+    const nextStep = plan.steps.find((s) => s.status === 'pending');
+    if (!nextStep) {
+        plan.status = 'completed';
+        savePlan(plan, plan.projectPath);
+        return null;
+    }
+    const task = createBackgroundTask({
+        fromAgent: 'clementine',
+        prompt: buildStepPrompt(plan, nextStep),
+        maxMinutes: 30,
+        ...(plan.originatingSessionKey ? { sessionKey: plan.originatingSessionKey } : {}),
+        kind: 'step',
+        chainId: plan.chainId,
+        planId: plan.id,
+        stepIndex: nextStep.index,
+    });
+    nextStep.taskId = task.id;
+    nextStep.status = 'running';
+    plan.status = 'in_progress';
+    savePlan(plan, plan.projectPath);
+    logger.info({ planId, stepIndex: nextStep.index, taskId: task.id }, 'resumeChain: dispatched next step');
+    return task;
+}
+// ── Step prompt construction ─────────────────────────────────────────
+/**
+ * Build the focused prompt for one chained worker. Designed to be SMALL
+ * (~1-2KB) and ANCHORED to the step's deliverable so the worker has a
+ * clear stopping condition. Key elements:
+ *   - The original user request (for context, not for re-doing it)
+ *   - The plan summary (what's been done, what's next)
+ *   - THIS step's scope + expected tools + deliverable
+ *   - Posture: "do ONLY this step. Don't overshoot. State your deliverable
+ *     in your final response."
+ *
+ * State that the next step might need is read by that step from the
+ * project STATUS.md or the prior step's deliverable file — NOT from
+ * this step's response text. Result text is for the orchestrator's
+ * advancement decision; deliverables are for the work itself.
+ */
+export function buildStepPrompt(plan, step) {
+    const lines = [];
+    lines.push(`# Chained step ${step.index + 1} of ${plan.steps.length}`);
+    lines.push('');
+    lines.push(`## Original user request`);
+    lines.push(plan.userRequest);
+    lines.push('');
+    if (plan.projectPath) {
+        lines.push(`## Active project`);
+        lines.push(`Path: \`${plan.projectPath}\``);
+        lines.push('Your cwd is set to this project. Read sources from there, write outputs to `output/`.');
+        lines.push('');
+    }
+    // Concise plan summary — JUST what's been done and what's next.
+    // Don't include full step bodies; that's noise.
+    lines.push(`## Plan summary`);
+    for (const s of plan.steps) {
+        const marker = s.status === 'done' ? '✓' : s.status === 'failed' ? '✗' : s.index === step.index ? '→' : '·';
+        const detail = s.status === 'done' && s.deliverable ? ` (→ ${s.deliverable})` : '';
+        lines.push(`  ${marker} ${s.index + 1}. ${s.title}${detail}`);
+    }
+    lines.push('');
+    lines.push(`## Your step (the → above)`);
+    lines.push(`**Title**: ${step.title}`);
+    lines.push(`**Scope**: ${step.scope}`);
+    if (step.expectedTools.length > 0) {
+        lines.push(`**Expected tool calls**: ${step.expectedTools.join(', ')}`);
+    }
+    if (step.deliverable) {
+        lines.push(`**Deliverable**: ${step.deliverable}`);
+    }
+    lines.push('');
+    lines.push(`## Step posture`);
+    lines.push('Do ONLY this step. Don\'t start the next one — the orchestrator handles that. ' +
+        'When you\'re done, state your deliverable concretely in your final response ' +
+        '(file path, URL, confirmation) so the orchestrator can advance the chain. ' +
+        'If you hit a blocker (missing info, ambiguous scope, tool failure), say so explicitly ' +
+        'and stop — don\'t guess.');
+    return lines.join('\n');
+}
+// ── Convenience helpers used by run-agent-cron ───────────────────────
+/**
+ * Given a chain step's taskId, derive the directory where the plan
+ * lives. Used by run-agent-cron to set the SDK's `cwd` and
+ * `additionalDirectories` to the project root for the step.
+ */
+export function projectDirForChainTask(task) {
+    if (!task.planId)
+        return undefined;
+    const plan = loadPlan(task.planId);
+    return plan?.projectPath ?? undefined;
+}
+/**
+ * Format a status line for posting to the originating chat after each
+ * step completes — gives the owner a real-time view of chain progress.
+ */
+export function formatChainStatusUpdate(plan, justCompletedStep) {
+    const total = plan.steps.length;
+    const done = plan.steps.filter((s) => s.status === 'done').length;
+    const lines = [];
+    lines.push(`**Step ${justCompletedStep.index + 1}/${total} done**: ${justCompletedStep.title}`);
+    if (justCompletedStep.resultPreview) {
+        lines.push(`→ ${justCompletedStep.resultPreview.slice(0, 200)}`);
+    }
+    if (done < total && plan.status === 'in_progress') {
+        const nextStep = plan.steps.find((s) => s.status === 'pending');
+        if (nextStep)
+            lines.push(`Next: ${nextStep.title}`);
+    }
+    else if (plan.status === 'completed') {
+        lines.push(`Chain complete (${done}/${total} steps).`);
+    }
+    else if (plan.status === 'paused') {
+        lines.push(`Chain paused. Tell me how to proceed.`);
+    }
+    return lines.join('\n');
+}
+// path is imported but lint warns when unused — use it once just to keep import meaningful.
+// (orchestrator uses path indirectly via loadPlan/savePlan; this comment keeps the import obvious to future readers)
+void path;
+//# sourceMappingURL=bg-orchestrator.js.map

package/dist/agent/bg-planner.d.ts ADDED Viewed

@@ -0,0 +1,142 @@
+/**
+ * bg-planner — decompose a multi-step user request into a chain of
+ * focused PlanSteps that the orchestrator can dispatch one at a time.
+ *
+ * Why this exists (1.18.190)
+ * ──────────────────────────
+ * Before this, a complex multi-step user ask ("find the coaches project,
+ * build me an HTML report, deploy it to Netlify, verify the URL") got
+ * handed to a single monolithic bg-task worker. The worker had its own
+ * 200K context but still autocompact-thrashed because:
+ *   - tool outputs accumulated across all 5-6 phases of the work
+ *   - the model lost fidelity to its own past tool outputs as the
+ *     output-guard tightened from 30KB → 4KB
+ *   - one bad turn (huge file read, big Glob) poisoned the rest
+ *
+ * The decomposition pattern this module enables:
+ *   1. Planner runs ONCE with Sonnet (not Haiku — plans need real
+ *      reasoning, see "Model choice" below)
+ *   2. Emits a Plan: 3-7 PlanSteps, each with title + scope + expected
+ *      tool calls + deliverable artifact path
+ *   3. Plan persists to <project>/.clementine/plans/<planId>.json
+ *      (or BASE_DIR/plans/<planId>.json if no active project)
+ *   4. Orchestrator (bg-orchestrator.ts) queues one bg-task per step,
+ *      each with a tight scope and a fresh 200K worker window
+ *   5. State flows between steps via STATUS.md + the plan ledger;
+ *      no step accumulates context from prior steps
+ *
+ * Model choice: Sonnet, NOT Haiku
+ * ────────────────────────────────
+ * Planning is a reasoning task, not a transformation. A poorly
+ * decomposed plan costs $5+ in downstream worker thrash; a well-
+ * decomposed plan saves multiples of that. The marginal cost of
+ * Sonnet over Haiku (~$0.05-0.15 vs ~$0.01 per plan) is trivial
+ * compared to the downstream cost of bad decomposition. Haiku is for
+ * mechanical tasks (extraction, classification, routing); decomposing
+ * a multi-domain ask into proper steps is not mechanical.
+ *
+ * If you're tempted to "save tokens" by flipping this to Haiku, read
+ * the 2026-05-12 root-cause plan first
+ * (~/.claude/plans/look-at-the-last-vivid-rossum.md). The whole point
+ * of this ship is to NOT cut corners on the decomposition layer.
+ */
+import type { ProjectMeta } from './assistant.js';
+export interface PlanStep {
+    /** 0-indexed position. */
+    index: number;
+    /** Short imperative title (e.g., "Find the coaches project"). */
+    title: string;
+    /** What this step does, in 1-2 sentences. The chained worker sees this. */
+    scope: string;
+    /** Tools the step is expected to call. The chained worker sees this as
+     *  guidance, not enforcement — overshooting is allowed, just not
+     *  preferred. */
+    expectedTools: string[];
+    /** Where the step's output goes (file path, deploy URL, etc.) — used
+     *  by claim-verification + by the next step to find prior work. */
+    deliverable?: string;
+    /** Step status — orchestrator updates this. */
+    status: 'pending' | 'running' | 'done' | 'failed' | 'skipped';
+    /** Set by orchestrator after dispatch. */
+    taskId?: string;
+    /** Worker's final result text (for visibility, capped). */
+    resultPreview?: string;
+    /** Set on completion. */
+    completedAt?: string;
+}
+export interface Plan {
+    /** Unique plan id — also the filename basename. */
+    id: string;
+    /** Chain id — shared by the planner task and all step tasks for one user request. */
+    chainId: string;
+    /** Original user request the planner decomposed. */
+    userRequest: string;
+    /** Resolved project path (if any) when the planner ran. */
+    projectPath?: string;
+    /** Session key of the originating chat — for delivering the final result. */
+    originatingSessionKey?: string;
+    /** ISO when the planner emitted this. */
+    createdAt: string;
+    /** Steps in execution order. */
+    steps: PlanStep[];
+    /** Overall chain status. Derived from steps; persisted for cheap reads. */
+    status: 'pending' | 'in_progress' | 'completed' | 'paused' | 'failed';
+    /** Total estimated cost (USD) for this plan if every step's expectedTools fire as-projected.
+     *  Informational only — not enforced. */
+    estimatedCostUsd?: number;
+    /** Free-form notes from the planner: known risks, assumptions, etc. */
+    notes?: string;
+}
+/**
+ * Where plans live. If `projectPath` is set, plans go inside that
+ * project's `.clementine/plans/` so they travel with the project; if
+ * no project, plans go under `BASE_DIR/plans/` (global).
+ */
+export declare function plansDir(projectPath?: string | null): string;
+export declare function planFile(planId: string, projectPath?: string | null): string;
+export declare function savePlan(plan: Plan, projectPath?: string | null): string;
+export declare function loadPlan(planId: string, projectPath?: string | null): Plan | null;
+export interface PlanRequestOptions {
+    userRequest: string;
+    originatingSessionKey?: string;
+    project?: ProjectMeta | null;
+    /** Optional override; defaults to Sonnet. NEVER pass Haiku here. */
+    model?: string;
+    /** Override the SDK query function for tests. */
+    llmCall?: (prompt: string, systemPrompt: string, model: string) => Promise<string>;
+}
+/**
+ * Decompose a user request into a Plan. Pure async function — no side
+ * effects except the optional LLM call. Caller decides whether to
+ * persist the result via `savePlan`.
+ *
+ * Behavior:
+ *   - Builds a system prompt describing the decomposition contract
+ *   - Asks the model to emit a JSON object matching the Plan schema
+ *   - Validates the response against the schema; logs and retries once on parse failure
+ *   - Returns a Plan ready for orchestrator dispatch
+ *
+ * Failure modes:
+ *   - LLM returns non-JSON → throws PlanGenerationError
+ *   - LLM returns empty steps → throws PlanGenerationError
+ *   - LLM returns >12 steps → trimmed to first 12 with a warning
+ */
+export declare function planRequest(opts: PlanRequestOptions): Promise<Plan>;
+export declare class PlanGenerationError extends Error {
+    constructor(message: string);
+}
+interface RawPlannerResponse {
+    steps?: Array<{
+        title?: unknown;
+        scope?: unknown;
+        expectedTools?: unknown;
+        deliverable?: unknown;
+    }>;
+    estimatedCostUsd?: number;
+    notes?: unknown;
+}
+/** Defensive JSON parse — strips common LLM wrappers (markdown fences,
+ *  leading/trailing prose) before parsing. */
+export declare function parsePlannerResponse(raw: string): RawPlannerResponse | null;
+export {};
+//# sourceMappingURL=bg-planner.d.ts.map

package/dist/agent/bg-planner.js ADDED Viewed

@@ -0,0 +1,321 @@
+/**
+ * bg-planner — decompose a multi-step user request into a chain of
+ * focused PlanSteps that the orchestrator can dispatch one at a time.
+ *
+ * Why this exists (1.18.190)
+ * ──────────────────────────
+ * Before this, a complex multi-step user ask ("find the coaches project,
+ * build me an HTML report, deploy it to Netlify, verify the URL") got
+ * handed to a single monolithic bg-task worker. The worker had its own
+ * 200K context but still autocompact-thrashed because:
+ *   - tool outputs accumulated across all 5-6 phases of the work
+ *   - the model lost fidelity to its own past tool outputs as the
+ *     output-guard tightened from 30KB → 4KB
+ *   - one bad turn (huge file read, big Glob) poisoned the rest
+ *
+ * The decomposition pattern this module enables:
+ *   1. Planner runs ONCE with Sonnet (not Haiku — plans need real
+ *      reasoning, see "Model choice" below)
+ *   2. Emits a Plan: 3-7 PlanSteps, each with title + scope + expected
+ *      tool calls + deliverable artifact path
+ *   3. Plan persists to <project>/.clementine/plans/<planId>.json
+ *      (or BASE_DIR/plans/<planId>.json if no active project)
+ *   4. Orchestrator (bg-orchestrator.ts) queues one bg-task per step,
+ *      each with a tight scope and a fresh 200K worker window
+ *   5. State flows between steps via STATUS.md + the plan ledger;
+ *      no step accumulates context from prior steps
+ *
+ * Model choice: Sonnet, NOT Haiku
+ * ────────────────────────────────
+ * Planning is a reasoning task, not a transformation. A poorly
+ * decomposed plan costs $5+ in downstream worker thrash; a well-
+ * decomposed plan saves multiples of that. The marginal cost of
+ * Sonnet over Haiku (~$0.05-0.15 vs ~$0.01 per plan) is trivial
+ * compared to the downstream cost of bad decomposition. Haiku is for
+ * mechanical tasks (extraction, classification, routing); decomposing
+ * a multi-domain ask into proper steps is not mechanical.
+ *
+ * If you're tempted to "save tokens" by flipping this to Haiku, read
+ * the 2026-05-12 root-cause plan first
+ * (~/.claude/plans/look-at-the-last-vivid-rossum.md). The whole point
+ * of this ship is to NOT cut corners on the decomposition layer.
+ */
+import fs from 'node:fs';
+import path from 'node:path';
+import { randomUUID } from 'node:crypto';
+import pino from 'pino';
+import { BASE_DIR, MODELS, applyOneMillionContextRecovery, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext } from '../config.js';
+const logger = pino({ name: 'clementine.bg-planner' });
+// ── Persistence ──────────────────────────────────────────────────────
+/**
+ * Where plans live. If `projectPath` is set, plans go inside that
+ * project's `.clementine/plans/` so they travel with the project; if
+ * no project, plans go under `BASE_DIR/plans/` (global).
+ */
+export function plansDir(projectPath) {
+    if (projectPath)
+        return path.join(projectPath, '.clementine', 'plans');
+    return path.join(BASE_DIR, 'plans');
+}
+export function planFile(planId, projectPath) {
+    const safe = String(planId).replace(/[^a-zA-Z0-9_-]/g, '_').slice(0, 96);
+    return path.join(plansDir(projectPath), `${safe}.json`);
+}
+export function savePlan(plan, projectPath) {
+    const dir = plansDir(projectPath);
+    fs.mkdirSync(dir, { recursive: true });
+    const file = planFile(plan.id, projectPath);
+    fs.writeFileSync(file, JSON.stringify(plan, null, 2));
+    return file;
+}
+export function loadPlan(planId, projectPath) {
+    const file = planFile(planId, projectPath);
+    if (!fs.existsSync(file)) {
+        // Fallback: if the project-scoped path is missing, try the global
+        // dir. Common when the project was added AFTER the plan was created.
+        if (projectPath) {
+            const fallback = planFile(planId);
+            if (fs.existsSync(fallback)) {
+                try {
+                    return JSON.parse(fs.readFileSync(fallback, 'utf-8'));
+                }
+                catch {
+                    return null;
+                }
+            }
+        }
+        return null;
+    }
+    try {
+        return JSON.parse(fs.readFileSync(file, 'utf-8'));
+    }
+    catch (err) {
+        logger.warn({ err, planId }, 'plan parse failed');
+        return null;
+    }
+}
+/**
+ * Decompose a user request into a Plan. Pure async function — no side
+ * effects except the optional LLM call. Caller decides whether to
+ * persist the result via `savePlan`.
+ *
+ * Behavior:
+ *   - Builds a system prompt describing the decomposition contract
+ *   - Asks the model to emit a JSON object matching the Plan schema
+ *   - Validates the response against the schema; logs and retries once on parse failure
+ *   - Returns a Plan ready for orchestrator dispatch
+ *
+ * Failure modes:
+ *   - LLM returns non-JSON → throws PlanGenerationError
+ *   - LLM returns empty steps → throws PlanGenerationError
+ *   - LLM returns >12 steps → trimmed to first 12 with a warning
+ */
+export async function planRequest(opts) {
+    const model = opts.model ?? MODELS.sonnet ?? 'claude-sonnet-4-6';
+    const chainId = `chain-${randomUUID().slice(0, 12)}`;
+    const planId = `plan-${randomUUID().slice(0, 12)}`;
+    const systemPrompt = buildPlannerSystemPrompt();
+    const userPrompt = buildPlannerUserPrompt(opts);
+    const text = opts.llmCall
+        ? await opts.llmCall(userPrompt, systemPrompt, model)
+        : await runPlannerLlm(userPrompt, systemPrompt, model);
+    const parsed = parsePlannerResponse(text);
+    if (!parsed || !Array.isArray(parsed.steps) || parsed.steps.length === 0) {
+        throw new PlanGenerationError(`Planner returned no steps (raw response head: ${text.slice(0, 200)})`);
+    }
+    // Cap at 12 steps. Real multi-step work fits in 3-7; >12 is almost
+    // always over-decomposition by the model. Trim to keep chains manageable.
+    const rawSteps = parsed.steps.slice(0, 12);
+    const steps = rawSteps.map((raw, i) => ({
+        index: i,
+        title: String(raw.title ?? `Step ${i + 1}`).slice(0, 160),
+        scope: String(raw.scope ?? '').slice(0, 800),
+        expectedTools: Array.isArray(raw.expectedTools)
+            ? raw.expectedTools.map((t) => String(t)).filter(Boolean).slice(0, 8)
+            : [],
+        ...(raw.deliverable ? { deliverable: String(raw.deliverable).slice(0, 400) } : {}),
+        status: 'pending',
+    }));
+    const plan = {
+        id: planId,
+        chainId,
+        userRequest: opts.userRequest,
+        ...(opts.project?.path ? { projectPath: opts.project.path } : {}),
+        ...(opts.originatingSessionKey ? { originatingSessionKey: opts.originatingSessionKey } : {}),
+        createdAt: new Date().toISOString(),
+        steps,
+        status: 'pending',
+        ...(typeof parsed.estimatedCostUsd === 'number' ? { estimatedCostUsd: parsed.estimatedCostUsd } : {}),
+        ...(parsed.notes ? { notes: String(parsed.notes).slice(0, 600) } : {}),
+    };
+    logger.info({
+        planId,
+        chainId,
+        stepCount: steps.length,
+        model,
+        project: opts.project?.path,
+    }, 'planRequest: emitted plan');
+    return plan;
+}
+export class PlanGenerationError extends Error {
+    constructor(message) {
+        super(message);
+        this.name = 'PlanGenerationError';
+    }
+}
+// ── Internals: prompt construction + SDK call ────────────────────────
+function buildPlannerSystemPrompt() {
+    return [
+        'You are a planning assistant for Clementine, a personal AI agent.',
+        'Your one job: take a multi-step user request and decompose it into 3-7 focused',
+        'subtasks that an execution worker can run one at a time, each in its own fresh',
+        'context window.',
+        '',
+        '## Why decomposition matters',
+        '',
+        'The execution worker has a 200K context budget per step. If a single step',
+        'tries to do too much (read a 10MB CSV + build HTML + deploy + verify), it',
+        'fills its window with tool outputs, the SDK compacts, fidelity degrades, and',
+        'the worker thrashes. Your job is to keep each step BOUNDED so this can\'t',
+        'happen.',
+        '',
+        '## Decomposition principles',
+        '',
+        '1. **One verb per step.** Each step does ONE thing: find, read, build,',
+        '   write, deploy, verify. Compound verbs ("build and deploy") = bad step.',
+        '2. **State flows through disk, not context.** If step 3 needs data from',
+        '   step 1, step 1 writes to a file; step 3 reads it. Don\'t carry data in',
+        '   the chain itself.',
+        '3. **Each step has ONE deliverable.** A file path, a URL, a confirmation.',
+        '   Steps without a clear deliverable are signal that the decomposition is',
+        '   off.',
+        '4. **Estimate tool calls per step.** A step expected to make >10 tool calls',
+        '   probably needs to be split. Aim for 2-6 tool calls per step.',
+        '5. **Match steps to the actual user ask.** Don\'t add steps the user didn\'t',
+        '   request (e.g., don\'t add a "send confirmation email" step unless they',
+        '   asked). Don\'t skip steps they DID ask for.',
+        '',
+        '## Output format — STRICT JSON only',
+        '',
+        'Return ONLY a JSON object with this shape. No markdown fences, no prose:',
+        '',
+        '{',
+        '  "steps": [',
+        '    {',
+        '      "title": "<short imperative title, e.g. \'Find the coaches project\'>",',
+        '      "scope": "<1-2 sentences describing exactly what this step does>",',
+        '      "expectedTools": ["tool_name_1", "tool_name_2"],',
+        '      "deliverable": "<file path | URL | description of the artifact>"',
+        '    }',
+        '    // ... 2-11 more steps',
+        '  ],',
+        '  "estimatedCostUsd": 0.50,',
+        '  "notes": "<known risks or assumptions, optional>"',
+        '}',
+        '',
+        'Available tools the worker can call (these are the most relevant for',
+        'decomposition; full list is bigger):',
+        '- project_discover, project_link, project_deploy (Clementine project tools)',
+        '- Read, Write, Edit (file I/O)',
+        '- Bash (any shell command — prefer `head/awk/jq` over Read for big files)',
+        '- Glob, Grep (search)',
+        '- memory_search, memory_write (Clementine memory)',
+        '- WebFetch, WebSearch (web)',
+        '',
+        'Sample expected-tool sequences:',
+        '- "Find a project" → [project_discover, project_link]',
+        '- "Read source data" → [Bash (head/wc), Read]',
+        '- "Build artifact" → [Read, Write]',
+        '- "Deploy" → [project_deploy]',
+        '- "Verify deploy" → [Bash (curl)]',
+    ].join('\n');
+}
+function buildPlannerUserPrompt(opts) {
+    const lines = [];
+    lines.push('## User request');
+    lines.push(opts.userRequest);
+    if (opts.project) {
+        lines.push('');
+        lines.push('## Active project');
+        lines.push(`Path: ${opts.project.path}`);
+        if (opts.project.description)
+            lines.push(`Description: ${opts.project.description}`);
+        if (opts.project.keywords?.length)
+            lines.push(`Keywords: ${opts.project.keywords.join(', ')}`);
+        // Surface STATUS.md preview if present — it carries state from prior chains.
+        try {
+            const statusPath = path.join(opts.project.path, '.clementine', 'STATUS.md');
+            if (fs.existsSync(statusPath)) {
+                const status = fs.readFileSync(statusPath, 'utf-8').trim();
+                if (status) {
+                    lines.push('');
+                    lines.push('## Project STATUS.md (current state)');
+                    lines.push(status.slice(0, 1500));
+                }
+            }
+        }
+        catch { /* non-fatal */ }
+    }
+    else {
+        lines.push('');
+        lines.push('## Active project');
+        lines.push('(none — if this request implies a project, your first step should be project_discover + project_link to resolve it before doing other work)');
+    }
+    lines.push('');
+    lines.push('Decompose the request into a Plan. Return strict JSON, no prose.');
+    return lines.join('\n');
+}
+async function runPlannerLlm(userPrompt, systemPrompt, model) {
+    const { query } = await import('@anthropic-ai/claude-agent-sdk');
+    let text = '';
+    const stream = query({
+        prompt: userPrompt,
+        options: normalizeClaudeSdkOptionsForOneMillionContext({
+            model,
+            maxTurns: 1, // single shot — emit JSON, done
+            systemPrompt,
+        }),
+    });
+    for await (const msg of stream) {
+        if (msg.type === 'result') {
+            // SDK 'result' message carries the final text.
+            const m = msg;
+            if (m.is_error) {
+                const errorText = Array.isArray(m.errors) ? m.errors.join('; ') : String(m.result ?? '');
+                if (looksLikeClaudeOneMillionContextError(errorText))
+                    applyOneMillionContextRecovery();
+                throw new Error(errorText || 'Planner SDK call failed');
+            }
+            text = m.result ?? '';
+        }
+    }
+    return text;
+}
+/** Defensive JSON parse — strips common LLM wrappers (markdown fences,
+ *  leading/trailing prose) before parsing. */
+export function parsePlannerResponse(raw) {
+    if (!raw || !raw.trim())
+        return null;
+    // Strip ```json ... ``` or ``` ... ``` fences if present.
+    let text = raw.trim();
+    const fenceMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
+    if (fenceMatch)
+        text = fenceMatch[1].trim();
+    // If still not pure JSON, try to extract the first {...} block.
+    if (!text.startsWith('{')) {
+        const objMatch = text.match(/\{[\s\S]*\}/);
+        if (objMatch)
+            text = objMatch[0];
+    }
+    try {
+        const parsed = JSON.parse(text);
+        if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
+            return parsed;
+        }
+        return null;
+    }
+    catch {
+        return null;
+    }
+}
+//# sourceMappingURL=bg-planner.js.map

package/dist/gateway/cron-scheduler.d.ts CHANGED Viewed

@@ -266,6 +266,23 @@ export declare class CronScheduler {
      * out of the trigger interval.
      */
     private processBackgroundTasks;
+    /**
+     * 1.18.190 — execute a planner bg-task. The task's `prompt` is the
+     * original user request that caused chat overflow. Decompose via
+     * `planRequest` (Sonnet), persist the Plan, dispatch step 0 via
+     * `dispatchChain`, then mark this planner task done with a summary.
+     *
+     * Failure mode: if planRequest throws, mark planner task failed
+     * and surface to the originating chat — no chain ever starts.
+     */
+    private runPlannerBackgroundTask;
+    /**
+     * 1.18.190 — after a chained step completes (success or failure),
+     * advance the chain. Success queues the next step; failure pauses
+     * the chain and notifies the owner. The owner gets a per-step
+     * update message so they see progress instead of one final dump.
+     */
+    private advanceChainAfterStep;
     /** Process any pending trigger files and run the corresponding jobs. */
     private processTriggers;
     /** Process any pending goal work trigger files. Routes through the execution advisor. */

package/dist/gateway/cron-scheduler.js CHANGED Viewed

@@ -1978,7 +1978,23 @@ export class CronScheduler {
             const started = markBgTaskRunning(task.id, undefined, { jobName });
             if (!started)
                 continue;
-            logger.info({ id: started.id, fromAgent: started.fromAgent, maxMinutes: started.maxMinutes }, 'Background task picked up');
+            logger.info({
+                id: started.id,
+                fromAgent: started.fromAgent,
+                maxMinutes: started.maxMinutes,
+                kind: started.kind ?? 'monolithic',
+                chainId: started.chainId,
+                planId: started.planId,
+                stepIndex: started.stepIndex,
+            }, 'Background task picked up');
+            // 1.18.190 — planner tasks don't run through the worker pattern.
+            // They make a single LLM call (Sonnet) that decomposes the user
+            // request into a Plan, persist it, then dispatch step 0.
+            if (started.kind === 'planner') {
+                this.runPlannerBackgroundTask(started)
+                    .catch((err) => logger.warn({ err, id: started.id }, 'Planner background task failed at top level'));
+                continue;
+            }
             updateBackgroundTask(started.id, {
                 lastNotifiedAt: new Date().toISOString(),
                 progressMessageCount: 0,
@@ -2031,6 +2047,14 @@ export class CronScheduler {
                 }
                 // Dispatch the deliverable to the originating agent's channel.
                 const completed = loadBackgroundTask(started.id) ?? started;
+                // 1.18.190 — chained step completion. If this task was a step in
+                // a Plan, advance the chain instead of treating completion as the
+                // end of the job. The orchestrator queues the next step and the
+                // owner sees a step-by-step update rather than one final dump.
+                if (completed.kind === 'step') {
+                    this.advanceChainAfterStep(completed).catch((err) => logger.warn({ err, id: completed.id }, 'Failed to advance chain after step completion'));
+                    return;
+                }
                 const deliveryHead = `**Background task ${started.id} done** — ${started.prompt.slice(0, 100).replace(/\s+/g, ' ')}${started.prompt.length > 100 ? '...' : ''}\n\n`;
                 const body = (result ?? '').slice(0, 1500);
                 const deliveryMessage = deliveryHead + body;
@@ -2053,6 +2077,13 @@ export class CronScheduler {
                     logger.warn({ err: saveErr, id: started.id }, 'Failed to mark background task failed');
                 }
                 const failed = loadBackgroundTask(started.id) ?? started;
+                // 1.18.190 — chained step failure pauses the chain. The owner
+                // gets a "chain paused at step N" message instead of just "task
+                // failed" so they can retry/edit/abandon.
+                if (failed.kind === 'step') {
+                    this.advanceChainAfterStep(failed).catch((advanceErr) => logger.warn({ err: advanceErr, id: failed.id }, 'Failed to pause chain after step failure'));
+                    return;
+                }
                 const failMessage = `**Background task ${started.id} failed** — ${errStr.slice(0, 200)}`;
                 this.dispatcher
                     .send(failMessage, this.dispatchContextForBackgroundTask(failed))
@@ -2063,6 +2094,122 @@ export class CronScheduler {
             });
         }
     }
+    /**
+     * 1.18.190 — execute a planner bg-task. The task's `prompt` is the
+     * original user request that caused chat overflow. Decompose via
+     * `planRequest` (Sonnet), persist the Plan, dispatch step 0 via
+     * `dispatchChain`, then mark this planner task done with a summary.
+     *
+     * Failure mode: if planRequest throws, mark planner task failed
+     * and surface to the originating chat — no chain ever starts.
+     */
+    async runPlannerBackgroundTask(task) {
+        try {
+            const { planRequest, savePlan } = await import('../agent/bg-planner.js');
+            const { dispatchChain } = await import('../agent/bg-orchestrator.js');
+            // Resolve the active project for this session, if any. The chat
+            // overflow path already set this when the user mentioned a
+            // project name; if none was set, the planner runs without
+            // project context and its first step is typically a
+            // project_discover.
+            const project = task.sessionKey
+                ? (this.gateway.getSessionProject?.(task.sessionKey) ?? null)
+                : null;
+            logger.info({
+                id: task.id,
+                sessionKey: task.sessionKey,
+                projectPath: project?.path,
+            }, 'Planner bg-task: decomposing user request');
+            const plan = await planRequest({
+                userRequest: task.prompt,
+                ...(task.sessionKey ? { originatingSessionKey: task.sessionKey } : {}),
+                ...(project ? { project } : {}),
+            });
+            savePlan(plan, plan.projectPath);
+            // Mark the planner task done with a concise summary so the
+            // dashboard + recall show what happened.
+            const planSummary = [
+                `Planned ${plan.steps.length} steps for: ${task.prompt.slice(0, 80)}${task.prompt.length > 80 ? '...' : ''}`,
+                '',
+                ...plan.steps.map((s, i) => `${i + 1}. ${s.title}`),
+                '',
+                `Chain id: ${plan.chainId}`,
+                `Plan id: ${plan.id}`,
+            ].join('\n');
+            try {
+                markBgTaskDone(task.id, planSummary);
+            }
+            catch (err) {
+                logger.warn({ err, id: task.id }, 'Failed to mark planner task done');
+            }
+            // Send a kickoff message to the originating chat — "I planned X
+            // steps, starting step 1 now" — so the owner sees the
+            // decomposition before watching steps land one at a time.
+            const completed = loadBackgroundTask(task.id) ?? task;
+            const kickoffHeader = `**Plan ready (${plan.steps.length} steps)** — starting step 1: ${plan.steps[0]?.title ?? '(unknown)'}\n\n`;
+            const kickoffBody = plan.steps.map((s, i) => `${i + 1}. ${s.title}`).join('\n');
+            this.dispatcher
+                .send(kickoffHeader + kickoffBody, this.dispatchContextForBackgroundTask(completed))
+                .catch((err) => logger.debug({ err, id: task.id }, 'Failed to dispatch plan kickoff'));
+            this.mirrorBackgroundTaskToChat(completed.sessionKey, `[Plan ${plan.id} created with ${plan.steps.length} steps for: ${task.prompt.slice(0, 200)}]`, kickoffHeader + kickoffBody, completed.id);
+            // Dispatch the first step. The orchestrator persists task ids
+            // back into the plan so subsequent advancement is idempotent.
+            dispatchChain(plan);
+        }
+        catch (err) {
+            const errStr = String(err).slice(0, 500);
+            logger.warn({ err, id: task.id }, 'Planner bg-task failed');
+            try {
+                markBgTaskFailed(task.id, errStr, 'failed');
+            }
+            catch (saveErr) {
+                logger.warn({ err: saveErr, id: task.id }, 'Failed to mark planner task failed');
+            }
+            const failed = loadBackgroundTask(task.id) ?? task;
+            const failMessage = `**Planning failed** — ${errStr.slice(0, 300)}\n\nThe request didn't decompose into chained steps. Try rephrasing or breaking it up manually.`;
+            this.dispatcher
+                .send(failMessage, this.dispatchContextForBackgroundTask(failed))
+                .catch(() => { });
+        }
+    }
+    /**
+     * 1.18.190 — after a chained step completes (success or failure),
+     * advance the chain. Success queues the next step; failure pauses
+     * the chain and notifies the owner. The owner gets a per-step
+     * update message so they see progress instead of one final dump.
+     */
+    async advanceChainAfterStep(completed) {
+        try {
+            const { advanceChain, formatChainStatusUpdate } = await import('../agent/bg-orchestrator.js');
+            const { loadPlan } = await import('../agent/bg-planner.js');
+            const next = advanceChain({ completedTask: completed });
+            const plan = completed.planId ? loadPlan(completed.planId) : null;
+            // Send a step-level update to the originating chat.
+            if (plan && typeof completed.stepIndex === 'number') {
+                const step = plan.steps[completed.stepIndex];
+                if (step) {
+                    const statusMessage = formatChainStatusUpdate(plan, step);
+                    this.dispatcher
+                        .send(statusMessage, this.dispatchContextForBackgroundTask(completed))
+                        .catch((err) => logger.debug({ err, id: completed.id }, 'Failed to dispatch chain step update'));
+                    this.mirrorBackgroundTaskToChat(completed.sessionKey, `[Chain ${plan.chainId} step ${completed.stepIndex + 1}/${plan.steps.length}: ${step.status}]`, statusMessage, completed.id);
+                }
+            }
+            // If no next step was queued (chain complete or paused), nothing
+            // more to do here — the formatChainStatusUpdate already covered
+            // the user-facing summary.
+            if (!next) {
+                logger.info({
+                    completedTaskId: completed.id,
+                    planId: completed.planId,
+                    stepIndex: completed.stepIndex,
+                }, 'Chain advancement: no next step (complete or paused)');
+            }
+        }
+        catch (err) {
+            logger.warn({ err, id: completed.id }, 'advanceChainAfterStep: failed');
+        }
+    }
     /** Process any pending trigger files and run the corresponding jobs. */
     processTriggers() {
         if (!existsSync(this.triggerDir))

package/dist/gateway/router.js CHANGED Viewed

@@ -430,26 +430,42 @@ export class Gateway {
         ].join('\n');
     }
     queueBackgroundTaskAfterContextOverflow(sessionKey, prompt) {
-        const recommendation = detectComplexTaskForBackground(prompt);
+        // 1.18.190 — the chat-overflow recovery path is now the canonical
+        // entry to the planner-orchestrator chain. Instead of queuing a
+        // monolithic bg-task that tries to do everything in one worker
+        // (which thrashed when the worker's context filled), we queue a
+        // planner task. The planner is a tiny Sonnet LLM call that
+        // decomposes the user's request into 3-7 PlanSteps; the
+        // orchestrator then dispatches one step at a time, each with
+        // its own fresh 200K worker window. See agent/bg-planner.ts +
+        // agent/bg-orchestrator.ts for the full pattern.
+        //
+        // The legacy `detectComplexTaskForBackground` heuristic is no
+        // longer used here — the planner itself decides how to decompose,
+        // and per-step maxMinutes is governed by orchestrator settings.
+        // Planner tasks get a tight 5-minute cap; total chain wall-clock
+        // is the sum of each step's own maxMinutes.
         const task = createBackgroundTask({
             fromAgent: this.backgroundAgentForSession(sessionKey),
             prompt,
-            maxMinutes: recommendation?.suggestedMaxMinutes ?? 60,
+            maxMinutes: 5, // planner needs minutes, not hours
             sessionKey,
+            kind: 'planner',
         });
         logger.warn({
             taskId: task.id,
             sessionKey,
             fromAgent: task.fromAgent,
-            maxMinutes: task.maxMinutes,
-        }, 'Queued background task after repeated chat context overflow');
+            kind: 'planner',
+        }, 'Queued planner task after repeated chat context overflow');
         return {
             task,
             response: [
-                `The live chat context hit the limit, so I moved this into background task **${task.id}** and kept your request attached.`,
+                `The live chat context hit the limit, so I'm decomposing your request into chained steps via background task **${task.id}**.`,
                 '',
-                `It will run as **${task.fromAgent}** in a fresh task session with a ${task.maxMinutes} minute cap.`,
-                `Use \`status ${task.id}\` or check the dashboard Background Tasks panel for progress.`,
+                `Step 1: a Sonnet planner reads the request and emits a plan (~30 seconds).`,
+                `Then each step runs as its own fresh task — you'll see step-by-step updates rather than one big "done" at the end.`,
+                `Use \`status ${task.id}\` or the dashboard Background Tasks panel for progress.`,
             ].join('\n'),
         };
     }

package/dist/types.d.ts CHANGED Viewed

@@ -298,6 +298,16 @@ export interface BackgroundTask {
     mirroredAt?: string;
     verificationFlag?: 'claimed-without-evidence';
     verificationDetails?: string;
+    /** Discriminator: planner / step / monolithic (default). */
+    kind?: 'planner' | 'step' | 'monolithic';
+    /** Chain identifier shared by the planner task and all its step tasks. */
+    chainId?: string;
+    /** Plan identifier (file basename in .clementine/plans/<planId>.json). */
+    planId?: string;
+    /** For kind='step': 0-indexed position within the plan's steps array. */
+    stepIndex?: number;
+    /** For kind='step': the task that queued this one (planner or prev step). */
+    parentTaskId?: string;
 }
 /**
  * State for one specialist agent's heartbeat scheduler. Persisted at

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.18.189",
+  "version": "1.18.190",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",