clementine-agent 1.18.189 → 1.18.190

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,12 +26,21 @@ export interface BackgroundTaskOptions {
26
26
  /**
27
27
  * Create a new pending task on disk and return it. Caller (the MCP tool)
28
28
  * doesn't await execution — the daemon picks the task up asynchronously.
29
+ *
30
+ * 1.18.190 — accepts the new chain fields (kind / chainId / planId /
31
+ * stepIndex / parentTaskId) for planner-orchestrator chained tasks. All
32
+ * are optional; callers that pass none get the legacy monolithic shape.
29
33
  */
30
34
  export declare function createBackgroundTask(input: {
31
35
  fromAgent: string;
32
36
  prompt: string;
33
37
  maxMinutes: number;
34
38
  sessionKey?: string;
39
+ kind?: BackgroundTask['kind'];
40
+ chainId?: string;
41
+ planId?: string;
42
+ stepIndex?: number;
43
+ parentTaskId?: string;
35
44
  }, opts?: BackgroundTaskOptions): BackgroundTask;
36
45
  /** Load a task by id, or null if not found / malformed. */
37
46
  export declare function loadBackgroundTask(id: string, opts?: BackgroundTaskOptions): BackgroundTask | null;
@@ -45,6 +45,10 @@ function safeWrite(file, task) {
45
45
  /**
46
46
  * Create a new pending task on disk and return it. Caller (the MCP tool)
47
47
  * doesn't await execution — the daemon picks the task up asynchronously.
48
+ *
49
+ * 1.18.190 — accepts the new chain fields (kind / chainId / planId /
50
+ * stepIndex / parentTaskId) for planner-orchestrator chained tasks. All
51
+ * are optional; callers that pass none get the legacy monolithic shape.
48
52
  */
49
53
  export function createBackgroundTask(input, opts) {
50
54
  const now = new Date();
@@ -58,6 +62,16 @@ export function createBackgroundTask(input, opts) {
58
62
  };
59
63
  if (input.sessionKey)
60
64
  task.sessionKey = input.sessionKey;
65
+ if (input.kind)
66
+ task.kind = input.kind;
67
+ if (input.chainId)
68
+ task.chainId = input.chainId;
69
+ if (input.planId)
70
+ task.planId = input.planId;
71
+ if (typeof input.stepIndex === 'number')
72
+ task.stepIndex = input.stepIndex;
73
+ if (input.parentTaskId)
74
+ task.parentTaskId = input.parentTaskId;
61
75
  safeWrite(pathFor(task.id, opts), task);
62
76
  return task;
63
77
  }
@@ -0,0 +1,103 @@
1
+ /**
2
+ * bg-orchestrator — drive a Plan from start to finish by queuing one
3
+ * bg-task per PlanStep, advancing the chain as each step completes.
4
+ *
5
+ * Why this exists (1.18.190)
6
+ * ──────────────────────────
7
+ * The bg-planner produces a Plan with 3-7 PlanSteps. This module is
8
+ * the runtime that executes that Plan, one step at a time, with each
9
+ * step getting its own fresh bg-task + worker context. The chain only
10
+ * advances when the previous step completed successfully — failures
11
+ * pause the chain and notify the owner.
12
+ *
13
+ * Architectural role:
14
+ * bg-planner.ts → Plan (data)
15
+ * bg-orchestrator.ts → drives the Plan (this module)
16
+ * background-tasks.ts → bg-task persistence (filesystem)
17
+ * run-agent-cron.ts → runs the actual SDK call for each step
18
+ *
19
+ * The orchestrator is NEVER the thing reading files or calling APIs.
20
+ * It's a state machine: read plan → queue next step → wait for step
21
+ * to finish → repeat. The state machine lives across daemon restarts
22
+ * because both Plans and BackgroundTasks are filesystem-persisted.
23
+ *
24
+ * Why this prevents the autocompact thrash that motivated 1.18.190:
25
+ * - each step gets a FRESH bg-task with a FRESH 200K worker window
26
+ * - state flows between steps via the project's STATUS.md and the
27
+ * plan's `deliverable` fields, NOT via accumulated SDK context
28
+ * - no single worker has to do more than ~2-6 tool calls before
29
+ * completing its scoped deliverable
30
+ * - the model's compaction pressure resets between steps
31
+ */
32
+ import { createBackgroundTask } from './background-tasks.js';
33
+ import { loadPlan, savePlan } from './bg-planner.js';
34
+ import type { Plan, PlanStep } from './bg-planner.js';
35
+ import type { BackgroundTask } from '../types.js';
36
+ /**
37
+ * Queue the first step of a freshly-planned chain. Returns the
38
+ * BackgroundTask created for step 0.
39
+ *
40
+ * Caller responsibility: the Plan must already be persisted to disk
41
+ * (via savePlan) before calling this — the dispatched step task
42
+ * carries a planId that will be loaded back at execution time.
43
+ */
44
+ export declare function dispatchChain(plan: Plan): BackgroundTask;
45
+ /**
46
+ * Called by the bg-task framework when a chained step completes.
47
+ * Updates the plan's step status, then either:
48
+ * - queues the next step (chain continues),
49
+ * - marks the plan completed (no more steps), or
50
+ * - pauses the chain (step failed; owner notification surfaces elsewhere).
51
+ *
52
+ * Returns the next BackgroundTask if one was queued, or null otherwise.
53
+ *
54
+ * Safe to call multiple times for the same completed task (idempotent
55
+ * via the step's status check).
56
+ */
57
+ export declare function advanceChain(opts: {
58
+ completedTask: BackgroundTask;
59
+ /** Optional override for tests; defaults to filesystem. */
60
+ loadPlanFn?: typeof loadPlan;
61
+ savePlanFn?: typeof savePlan;
62
+ createTaskFn?: typeof createBackgroundTask;
63
+ }): BackgroundTask | null;
64
+ /**
65
+ * Pause a chain explicitly (e.g., owner intervention). The current
66
+ * running step is left alone — caller can mark it however the
67
+ * downstream cancellation flow does.
68
+ */
69
+ export declare function pauseChain(planId: string, projectPath?: string | null, reason?: string): void;
70
+ /**
71
+ * Resume a paused chain by dispatching its next pending step. If
72
+ * all steps are terminal, marks the plan completed. Returns the
73
+ * dispatched task, or null when nothing to dispatch.
74
+ */
75
+ export declare function resumeChain(planId: string, projectPath?: string | null): BackgroundTask | null;
76
+ /**
77
+ * Build the focused prompt for one chained worker. Designed to be SMALL
78
+ * (~1-2KB) and ANCHORED to the step's deliverable so the worker has a
79
+ * clear stopping condition. Key elements:
80
+ * - The original user request (for context, not for re-doing it)
81
+ * - The plan summary (what's been done, what's next)
82
+ * - THIS step's scope + expected tools + deliverable
83
+ * - Posture: "do ONLY this step. Don't overshoot. State your deliverable
84
+ * in your final response."
85
+ *
86
+ * State that the next step might need is read by that step from the
87
+ * project STATUS.md or the prior step's deliverable file — NOT from
88
+ * this step's response text. Result text is for the orchestrator's
89
+ * advancement decision; deliverables are for the work itself.
90
+ */
91
+ export declare function buildStepPrompt(plan: Plan, step: PlanStep): string;
92
+ /**
93
+ * Given a chain step's taskId, derive the directory where the plan
94
+ * lives. Used by run-agent-cron to set the SDK's `cwd` and
95
+ * `additionalDirectories` to the project root for the step.
96
+ */
97
+ export declare function projectDirForChainTask(task: BackgroundTask): string | undefined;
98
+ /**
99
+ * Format a status line for posting to the originating chat after each
100
+ * step completes — gives the owner a real-time view of chain progress.
101
+ */
102
+ export declare function formatChainStatusUpdate(plan: Plan, justCompletedStep: PlanStep): string;
103
+ //# sourceMappingURL=bg-orchestrator.d.ts.map
@@ -0,0 +1,323 @@
1
+ /**
2
+ * bg-orchestrator — drive a Plan from start to finish by queuing one
3
+ * bg-task per PlanStep, advancing the chain as each step completes.
4
+ *
5
+ * Why this exists (1.18.190)
6
+ * ──────────────────────────
7
+ * The bg-planner produces a Plan with 3-7 PlanSteps. This module is
8
+ * the runtime that executes that Plan, one step at a time, with each
9
+ * step getting its own fresh bg-task + worker context. The chain only
10
+ * advances when the previous step completed successfully — failures
11
+ * pause the chain and notify the owner.
12
+ *
13
+ * Architectural role:
14
+ * bg-planner.ts → Plan (data)
15
+ * bg-orchestrator.ts → drives the Plan (this module)
16
+ * background-tasks.ts → bg-task persistence (filesystem)
17
+ * run-agent-cron.ts → runs the actual SDK call for each step
18
+ *
19
+ * The orchestrator is NEVER the thing reading files or calling APIs.
20
+ * It's a state machine: read plan → queue next step → wait for step
21
+ * to finish → repeat. The state machine lives across daemon restarts
22
+ * because both Plans and BackgroundTasks are filesystem-persisted.
23
+ *
24
+ * Why this prevents the autocompact thrash that motivated 1.18.190:
25
+ * - each step gets a FRESH bg-task with a FRESH 200K worker window
26
+ * - state flows between steps via the project's STATUS.md and the
27
+ * plan's `deliverable` fields, NOT via accumulated SDK context
28
+ * - no single worker has to do more than ~2-6 tool calls before
29
+ * completing its scoped deliverable
30
+ * - the model's compaction pressure resets between steps
31
+ */
32
+ import path from 'node:path';
33
+ import pino from 'pino';
34
+ import { createBackgroundTask } from './background-tasks.js';
35
+ import { loadPlan, savePlan } from './bg-planner.js';
36
+ const logger = pino({ name: 'clementine.bg-orchestrator' });
37
+ // ── Public API ───────────────────────────────────────────────────────
38
+ /**
39
+ * Queue the first step of a freshly-planned chain. Returns the
40
+ * BackgroundTask created for step 0.
41
+ *
42
+ * Caller responsibility: the Plan must already be persisted to disk
43
+ * (via savePlan) before calling this — the dispatched step task
44
+ * carries a planId that will be loaded back at execution time.
45
+ */
46
+ export function dispatchChain(plan) {
47
+ if (!plan.steps.length) {
48
+ throw new Error(`Cannot dispatch chain: plan ${plan.id} has zero steps`);
49
+ }
50
+ const firstStep = plan.steps[0];
51
+ const task = createBackgroundTask({
52
+ fromAgent: 'clementine',
53
+ prompt: buildStepPrompt(plan, firstStep),
54
+ maxMinutes: 30, // generous per-step; the step itself decides how long it needs
55
+ ...(plan.originatingSessionKey ? { sessionKey: plan.originatingSessionKey } : {}),
56
+ kind: 'step',
57
+ chainId: plan.chainId,
58
+ planId: plan.id,
59
+ stepIndex: 0,
60
+ });
61
+ // Stamp the step with its taskId + mark plan as in_progress so future
62
+ // resumes don't try to re-dispatch the same step.
63
+ firstStep.taskId = task.id;
64
+ firstStep.status = 'running';
65
+ plan.status = 'in_progress';
66
+ savePlan(plan, plan.projectPath);
67
+ logger.info({
68
+ planId: plan.id,
69
+ chainId: plan.chainId,
70
+ stepIndex: 0,
71
+ stepTitle: firstStep.title,
72
+ taskId: task.id,
73
+ }, 'dispatchChain: queued step 0');
74
+ return task;
75
+ }
76
+ /**
77
+ * Called by the bg-task framework when a chained step completes.
78
+ * Updates the plan's step status, then either:
79
+ * - queues the next step (chain continues),
80
+ * - marks the plan completed (no more steps), or
81
+ * - pauses the chain (step failed; owner notification surfaces elsewhere).
82
+ *
83
+ * Returns the next BackgroundTask if one was queued, or null otherwise.
84
+ *
85
+ * Safe to call multiple times for the same completed task (idempotent
86
+ * via the step's status check).
87
+ */
88
+ export function advanceChain(opts) {
89
+ const { completedTask } = opts;
90
+ if (!completedTask.planId || typeof completedTask.stepIndex !== 'number') {
91
+ logger.debug({ taskId: completedTask.id }, 'advanceChain: task has no plan id/step index — not a chain step');
92
+ return null;
93
+ }
94
+ const loadPlanImpl = opts.loadPlanFn ?? loadPlan;
95
+ const savePlanImpl = opts.savePlanFn ?? savePlan;
96
+ const createTaskImpl = opts.createTaskFn ?? createBackgroundTask;
97
+ // Plans live alongside the project when one's set; the task carries
98
+ // the planId but not the project path. Try the project path first
99
+ // (fast path), then fall back to the global plans dir inside
100
+ // loadPlan itself.
101
+ const plan = loadPlanImpl(completedTask.planId, undefined);
102
+ if (!plan) {
103
+ logger.warn({ planId: completedTask.planId, taskId: completedTask.id }, 'advanceChain: plan not found — cannot advance');
104
+ return null;
105
+ }
106
+ const step = plan.steps[completedTask.stepIndex];
107
+ if (!step) {
108
+ logger.warn({ planId: plan.id, stepIndex: completedTask.stepIndex }, 'advanceChain: step index out of range');
109
+ return null;
110
+ }
111
+ // Idempotency: if the step has already been marked terminal, don't
112
+ // advance again. Protects against duplicate completion callbacks.
113
+ if (step.status === 'done' || step.status === 'failed' || step.status === 'skipped') {
114
+ logger.debug({ planId: plan.id, stepIndex: step.index, status: step.status }, 'advanceChain: step already terminal — skipping');
115
+ return null;
116
+ }
117
+ // Reflect the task's terminal status onto the plan step.
118
+ if (completedTask.status === 'done') {
119
+ step.status = 'done';
120
+ step.completedAt = completedTask.completedAt ?? new Date().toISOString();
121
+ step.resultPreview = (completedTask.result ?? '').slice(0, 400);
122
+ }
123
+ else {
124
+ // failed | aborted | interrupted — all map to plan-step failure for now
125
+ step.status = 'failed';
126
+ step.completedAt = completedTask.completedAt ?? new Date().toISOString();
127
+ step.resultPreview = (completedTask.error ?? completedTask.result ?? '').slice(0, 400);
128
+ plan.status = 'paused';
129
+ savePlanImpl(plan, plan.projectPath);
130
+ logger.warn({
131
+ planId: plan.id,
132
+ chainId: plan.chainId,
133
+ stepIndex: step.index,
134
+ stepTitle: step.title,
135
+ taskStatus: completedTask.status,
136
+ error: completedTask.error,
137
+ }, 'advanceChain: step failed — chain paused');
138
+ return null;
139
+ }
140
+ // Look for the next pending step.
141
+ const nextStep = plan.steps.find((s, i) => i > step.index && s.status === 'pending');
142
+ if (!nextStep) {
143
+ // Chain complete!
144
+ plan.status = 'completed';
145
+ savePlanImpl(plan, plan.projectPath);
146
+ logger.info({
147
+ planId: plan.id,
148
+ chainId: plan.chainId,
149
+ stepCount: plan.steps.length,
150
+ }, 'advanceChain: chain completed');
151
+ return null;
152
+ }
153
+ // Queue the next step.
154
+ const nextTask = createTaskImpl({
155
+ fromAgent: 'clementine',
156
+ prompt: buildStepPrompt(plan, nextStep),
157
+ maxMinutes: 30,
158
+ ...(plan.originatingSessionKey ? { sessionKey: plan.originatingSessionKey } : {}),
159
+ kind: 'step',
160
+ chainId: plan.chainId,
161
+ planId: plan.id,
162
+ stepIndex: nextStep.index,
163
+ parentTaskId: completedTask.id,
164
+ });
165
+ nextStep.taskId = nextTask.id;
166
+ nextStep.status = 'running';
167
+ savePlanImpl(plan, plan.projectPath);
168
+ logger.info({
169
+ planId: plan.id,
170
+ chainId: plan.chainId,
171
+ stepIndex: nextStep.index,
172
+ stepTitle: nextStep.title,
173
+ taskId: nextTask.id,
174
+ parentTaskId: completedTask.id,
175
+ }, 'advanceChain: queued next step');
176
+ return nextTask;
177
+ }
178
+ /**
179
+ * Pause a chain explicitly (e.g., owner intervention). The current
180
+ * running step is left alone — caller can mark it however the
181
+ * downstream cancellation flow does.
182
+ */
183
+ export function pauseChain(planId, projectPath, reason) {
184
+ const plan = loadPlan(planId, projectPath ?? undefined);
185
+ if (!plan)
186
+ return;
187
+ plan.status = 'paused';
188
+ if (reason)
189
+ plan.notes = `${plan.notes ? plan.notes + '\n' : ''}[paused] ${reason}`;
190
+ savePlan(plan, plan.projectPath);
191
+ logger.info({ planId, reason }, 'pauseChain: chain paused');
192
+ }
193
+ /**
194
+ * Resume a paused chain by dispatching its next pending step. If
195
+ * all steps are terminal, marks the plan completed. Returns the
196
+ * dispatched task, or null when nothing to dispatch.
197
+ */
198
+ export function resumeChain(planId, projectPath) {
199
+ const plan = loadPlan(planId, projectPath ?? undefined);
200
+ if (!plan)
201
+ return null;
202
+ if (plan.status === 'completed')
203
+ return null;
204
+ const nextStep = plan.steps.find((s) => s.status === 'pending');
205
+ if (!nextStep) {
206
+ plan.status = 'completed';
207
+ savePlan(plan, plan.projectPath);
208
+ return null;
209
+ }
210
+ const task = createBackgroundTask({
211
+ fromAgent: 'clementine',
212
+ prompt: buildStepPrompt(plan, nextStep),
213
+ maxMinutes: 30,
214
+ ...(plan.originatingSessionKey ? { sessionKey: plan.originatingSessionKey } : {}),
215
+ kind: 'step',
216
+ chainId: plan.chainId,
217
+ planId: plan.id,
218
+ stepIndex: nextStep.index,
219
+ });
220
+ nextStep.taskId = task.id;
221
+ nextStep.status = 'running';
222
+ plan.status = 'in_progress';
223
+ savePlan(plan, plan.projectPath);
224
+ logger.info({ planId, stepIndex: nextStep.index, taskId: task.id }, 'resumeChain: dispatched next step');
225
+ return task;
226
+ }
227
+ // ── Step prompt construction ─────────────────────────────────────────
228
+ /**
229
+ * Build the focused prompt for one chained worker. Designed to be SMALL
230
+ * (~1-2KB) and ANCHORED to the step's deliverable so the worker has a
231
+ * clear stopping condition. Key elements:
232
+ * - The original user request (for context, not for re-doing it)
233
+ * - The plan summary (what's been done, what's next)
234
+ * - THIS step's scope + expected tools + deliverable
235
+ * - Posture: "do ONLY this step. Don't overshoot. State your deliverable
236
+ * in your final response."
237
+ *
238
+ * State that the next step might need is read by that step from the
239
+ * project STATUS.md or the prior step's deliverable file — NOT from
240
+ * this step's response text. Result text is for the orchestrator's
241
+ * advancement decision; deliverables are for the work itself.
242
+ */
243
+ export function buildStepPrompt(plan, step) {
244
+ const lines = [];
245
+ lines.push(`# Chained step ${step.index + 1} of ${plan.steps.length}`);
246
+ lines.push('');
247
+ lines.push(`## Original user request`);
248
+ lines.push(plan.userRequest);
249
+ lines.push('');
250
+ if (plan.projectPath) {
251
+ lines.push(`## Active project`);
252
+ lines.push(`Path: \`${plan.projectPath}\``);
253
+ lines.push('Your cwd is set to this project. Read sources from there, write outputs to `output/`.');
254
+ lines.push('');
255
+ }
256
+ // Concise plan summary — JUST what's been done and what's next.
257
+ // Don't include full step bodies; that's noise.
258
+ lines.push(`## Plan summary`);
259
+ for (const s of plan.steps) {
260
+ const marker = s.status === 'done' ? '✓' : s.status === 'failed' ? '✗' : s.index === step.index ? '→' : '·';
261
+ const detail = s.status === 'done' && s.deliverable ? ` (→ ${s.deliverable})` : '';
262
+ lines.push(` ${marker} ${s.index + 1}. ${s.title}${detail}`);
263
+ }
264
+ lines.push('');
265
+ lines.push(`## Your step (the → above)`);
266
+ lines.push(`**Title**: ${step.title}`);
267
+ lines.push(`**Scope**: ${step.scope}`);
268
+ if (step.expectedTools.length > 0) {
269
+ lines.push(`**Expected tool calls**: ${step.expectedTools.join(', ')}`);
270
+ }
271
+ if (step.deliverable) {
272
+ lines.push(`**Deliverable**: ${step.deliverable}`);
273
+ }
274
+ lines.push('');
275
+ lines.push(`## Step posture`);
276
+ lines.push('Do ONLY this step. Don\'t start the next one — the orchestrator handles that. ' +
277
+ 'When you\'re done, state your deliverable concretely in your final response ' +
278
+ '(file path, URL, confirmation) so the orchestrator can advance the chain. ' +
279
+ 'If you hit a blocker (missing info, ambiguous scope, tool failure), say so explicitly ' +
280
+ 'and stop — don\'t guess.');
281
+ return lines.join('\n');
282
+ }
283
+ // ── Convenience helpers used by run-agent-cron ───────────────────────
284
+ /**
285
+ * Given a chain step's taskId, derive the directory where the plan
286
+ * lives. Used by run-agent-cron to set the SDK's `cwd` and
287
+ * `additionalDirectories` to the project root for the step.
288
+ */
289
+ export function projectDirForChainTask(task) {
290
+ if (!task.planId)
291
+ return undefined;
292
+ const plan = loadPlan(task.planId);
293
+ return plan?.projectPath ?? undefined;
294
+ }
295
+ /**
296
+ * Format a status line for posting to the originating chat after each
297
+ * step completes — gives the owner a real-time view of chain progress.
298
+ */
299
+ export function formatChainStatusUpdate(plan, justCompletedStep) {
300
+ const total = plan.steps.length;
301
+ const done = plan.steps.filter((s) => s.status === 'done').length;
302
+ const lines = [];
303
+ lines.push(`**Step ${justCompletedStep.index + 1}/${total} done**: ${justCompletedStep.title}`);
304
+ if (justCompletedStep.resultPreview) {
305
+ lines.push(`→ ${justCompletedStep.resultPreview.slice(0, 200)}`);
306
+ }
307
+ if (done < total && plan.status === 'in_progress') {
308
+ const nextStep = plan.steps.find((s) => s.status === 'pending');
309
+ if (nextStep)
310
+ lines.push(`Next: ${nextStep.title}`);
311
+ }
312
+ else if (plan.status === 'completed') {
313
+ lines.push(`Chain complete (${done}/${total} steps).`);
314
+ }
315
+ else if (plan.status === 'paused') {
316
+ lines.push(`Chain paused. Tell me how to proceed.`);
317
+ }
318
+ return lines.join('\n');
319
+ }
320
+ // path is imported but lint warns when unused — use it once just to keep import meaningful.
321
+ // (orchestrator uses path indirectly via loadPlan/savePlan; this comment keeps the import obvious to future readers)
322
+ void path;
323
+ //# sourceMappingURL=bg-orchestrator.js.map
@@ -0,0 +1,142 @@
1
+ /**
2
+ * bg-planner — decompose a multi-step user request into a chain of
3
+ * focused PlanSteps that the orchestrator can dispatch one at a time.
4
+ *
5
+ * Why this exists (1.18.190)
6
+ * ──────────────────────────
7
+ * Before this, a complex multi-step user ask ("find the coaches project,
8
+ * build me an HTML report, deploy it to Netlify, verify the URL") got
9
+ * handed to a single monolithic bg-task worker. The worker had its own
10
+ * 200K context but still autocompact-thrashed because:
11
+ * - tool outputs accumulated across all 5-6 phases of the work
12
+ * - the model lost fidelity to its own past tool outputs as the
13
+ * output-guard tightened from 30KB → 4KB
14
+ * - one bad turn (huge file read, big Glob) poisoned the rest
15
+ *
16
+ * The decomposition pattern this module enables:
17
+ * 1. Planner runs ONCE with Sonnet (not Haiku — plans need real
18
+ * reasoning, see "Model choice" below)
19
+ * 2. Emits a Plan: 3-7 PlanSteps, each with title + scope + expected
20
+ * tool calls + deliverable artifact path
21
+ * 3. Plan persists to <project>/.clementine/plans/<planId>.json
22
+ * (or BASE_DIR/plans/<planId>.json if no active project)
23
+ * 4. Orchestrator (bg-orchestrator.ts) queues one bg-task per step,
24
+ * each with a tight scope and a fresh 200K worker window
25
+ * 5. State flows between steps via STATUS.md + the plan ledger;
26
+ * no step accumulates context from prior steps
27
+ *
28
+ * Model choice: Sonnet, NOT Haiku
29
+ * ────────────────────────────────
30
+ * Planning is a reasoning task, not a transformation. A poorly
31
+ * decomposed plan costs $5+ in downstream worker thrash; a well-
32
+ * decomposed plan saves multiples of that. The marginal cost of
33
+ * Sonnet over Haiku (~$0.05-0.15 vs ~$0.01 per plan) is trivial
34
+ * compared to the downstream cost of bad decomposition. Haiku is for
35
+ * mechanical tasks (extraction, classification, routing); decomposing
36
+ * a multi-domain ask into proper steps is not mechanical.
37
+ *
38
+ * If you're tempted to "save tokens" by flipping this to Haiku, read
39
+ * the 2026-05-12 root-cause plan first
40
+ * (~/.claude/plans/look-at-the-last-vivid-rossum.md). The whole point
41
+ * of this ship is to NOT cut corners on the decomposition layer.
42
+ */
43
+ import type { ProjectMeta } from './assistant.js';
44
+ export interface PlanStep {
45
+ /** 0-indexed position. */
46
+ index: number;
47
+ /** Short imperative title (e.g., "Find the coaches project"). */
48
+ title: string;
49
+ /** What this step does, in 1-2 sentences. The chained worker sees this. */
50
+ scope: string;
51
+ /** Tools the step is expected to call. The chained worker sees this as
52
+ * guidance, not enforcement — overshooting is allowed, just not
53
+ * preferred. */
54
+ expectedTools: string[];
55
+ /** Where the step's output goes (file path, deploy URL, etc.) — used
56
+ * by claim-verification + by the next step to find prior work. */
57
+ deliverable?: string;
58
+ /** Step status — orchestrator updates this. */
59
+ status: 'pending' | 'running' | 'done' | 'failed' | 'skipped';
60
+ /** Set by orchestrator after dispatch. */
61
+ taskId?: string;
62
+ /** Worker's final result text (for visibility, capped). */
63
+ resultPreview?: string;
64
+ /** Set on completion. */
65
+ completedAt?: string;
66
+ }
67
+ export interface Plan {
68
+ /** Unique plan id — also the filename basename. */
69
+ id: string;
70
+ /** Chain id — shared by the planner task and all step tasks for one user request. */
71
+ chainId: string;
72
+ /** Original user request the planner decomposed. */
73
+ userRequest: string;
74
+ /** Resolved project path (if any) when the planner ran. */
75
+ projectPath?: string;
76
+ /** Session key of the originating chat — for delivering the final result. */
77
+ originatingSessionKey?: string;
78
+ /** ISO when the planner emitted this. */
79
+ createdAt: string;
80
+ /** Steps in execution order. */
81
+ steps: PlanStep[];
82
+ /** Overall chain status. Derived from steps; persisted for cheap reads. */
83
+ status: 'pending' | 'in_progress' | 'completed' | 'paused' | 'failed';
84
+ /** Total estimated cost (USD) for this plan if every step's expectedTools fire as-projected.
85
+ * Informational only — not enforced. */
86
+ estimatedCostUsd?: number;
87
+ /** Free-form notes from the planner: known risks, assumptions, etc. */
88
+ notes?: string;
89
+ }
90
+ /**
91
+ * Where plans live. If `projectPath` is set, plans go inside that
92
+ * project's `.clementine/plans/` so they travel with the project; if
93
+ * no project, plans go under `BASE_DIR/plans/` (global).
94
+ */
95
+ export declare function plansDir(projectPath?: string | null): string;
96
+ export declare function planFile(planId: string, projectPath?: string | null): string;
97
+ export declare function savePlan(plan: Plan, projectPath?: string | null): string;
98
+ export declare function loadPlan(planId: string, projectPath?: string | null): Plan | null;
99
+ export interface PlanRequestOptions {
100
+ userRequest: string;
101
+ originatingSessionKey?: string;
102
+ project?: ProjectMeta | null;
103
+ /** Optional override; defaults to Sonnet. NEVER pass Haiku here. */
104
+ model?: string;
105
+ /** Override the SDK query function for tests. */
106
+ llmCall?: (prompt: string, systemPrompt: string, model: string) => Promise<string>;
107
+ }
108
+ /**
109
+ * Decompose a user request into a Plan. Pure async function — no side
110
+ * effects except the optional LLM call. Caller decides whether to
111
+ * persist the result via `savePlan`.
112
+ *
113
+ * Behavior:
114
+ * - Builds a system prompt describing the decomposition contract
115
+ * - Asks the model to emit a JSON object matching the Plan schema
116
+ * - Validates the response against the schema; logs and retries once on parse failure
117
+ * - Returns a Plan ready for orchestrator dispatch
118
+ *
119
+ * Failure modes:
120
+ * - LLM returns non-JSON → throws PlanGenerationError
121
+ * - LLM returns empty steps → throws PlanGenerationError
122
+ * - LLM returns >12 steps → trimmed to first 12 with a warning
123
+ */
124
+ export declare function planRequest(opts: PlanRequestOptions): Promise<Plan>;
125
+ export declare class PlanGenerationError extends Error {
126
+ constructor(message: string);
127
+ }
128
+ interface RawPlannerResponse {
129
+ steps?: Array<{
130
+ title?: unknown;
131
+ scope?: unknown;
132
+ expectedTools?: unknown;
133
+ deliverable?: unknown;
134
+ }>;
135
+ estimatedCostUsd?: number;
136
+ notes?: unknown;
137
+ }
138
+ /** Defensive JSON parse — strips common LLM wrappers (markdown fences,
139
+ * leading/trailing prose) before parsing. */
140
+ export declare function parsePlannerResponse(raw: string): RawPlannerResponse | null;
141
+ export {};
142
+ //# sourceMappingURL=bg-planner.d.ts.map
@@ -0,0 +1,321 @@
1
+ /**
2
+ * bg-planner — decompose a multi-step user request into a chain of
3
+ * focused PlanSteps that the orchestrator can dispatch one at a time.
4
+ *
5
+ * Why this exists (1.18.190)
6
+ * ──────────────────────────
7
+ * Before this, a complex multi-step user ask ("find the coaches project,
8
+ * build me an HTML report, deploy it to Netlify, verify the URL") got
9
+ * handed to a single monolithic bg-task worker. The worker had its own
10
+ * 200K context but still autocompact-thrashed because:
11
+ * - tool outputs accumulated across all 5-6 phases of the work
12
+ * - the model lost fidelity to its own past tool outputs as the
13
+ * output-guard tightened from 30KB → 4KB
14
+ * - one bad turn (huge file read, big Glob) poisoned the rest
15
+ *
16
+ * The decomposition pattern this module enables:
17
+ * 1. Planner runs ONCE with Sonnet (not Haiku — plans need real
18
+ * reasoning, see "Model choice" below)
19
+ * 2. Emits a Plan: 3-7 PlanSteps, each with title + scope + expected
20
+ * tool calls + deliverable artifact path
21
+ * 3. Plan persists to <project>/.clementine/plans/<planId>.json
22
+ * (or BASE_DIR/plans/<planId>.json if no active project)
23
+ * 4. Orchestrator (bg-orchestrator.ts) queues one bg-task per step,
24
+ * each with a tight scope and a fresh 200K worker window
25
+ * 5. State flows between steps via STATUS.md + the plan ledger;
26
+ * no step accumulates context from prior steps
27
+ *
28
+ * Model choice: Sonnet, NOT Haiku
29
+ * ────────────────────────────────
30
+ * Planning is a reasoning task, not a transformation. A poorly
31
+ * decomposed plan costs $5+ in downstream worker thrash; a well-
32
+ * decomposed plan saves multiples of that. The marginal cost of
33
+ * Sonnet over Haiku (~$0.05-0.15 vs ~$0.01 per plan) is trivial
34
+ * compared to the downstream cost of bad decomposition. Haiku is for
35
+ * mechanical tasks (extraction, classification, routing); decomposing
36
+ * a multi-domain ask into proper steps is not mechanical.
37
+ *
38
+ * If you're tempted to "save tokens" by flipping this to Haiku, read
39
+ * the 2026-05-12 root-cause plan first
40
+ * (~/.claude/plans/look-at-the-last-vivid-rossum.md). The whole point
41
+ * of this ship is to NOT cut corners on the decomposition layer.
42
+ */
43
+ import fs from 'node:fs';
44
+ import path from 'node:path';
45
+ import { randomUUID } from 'node:crypto';
46
+ import pino from 'pino';
47
+ import { BASE_DIR, MODELS, applyOneMillionContextRecovery, looksLikeClaudeOneMillionContextError, normalizeClaudeSdkOptionsForOneMillionContext } from '../config.js';
48
+ const logger = pino({ name: 'clementine.bg-planner' });
49
+ // ── Persistence ──────────────────────────────────────────────────────
50
+ /**
51
+ * Where plans live. If `projectPath` is set, plans go inside that
52
+ * project's `.clementine/plans/` so they travel with the project; if
53
+ * no project, plans go under `BASE_DIR/plans/` (global).
54
+ */
55
+ export function plansDir(projectPath) {
56
+ if (projectPath)
57
+ return path.join(projectPath, '.clementine', 'plans');
58
+ return path.join(BASE_DIR, 'plans');
59
+ }
60
+ export function planFile(planId, projectPath) {
61
+ const safe = String(planId).replace(/[^a-zA-Z0-9_-]/g, '_').slice(0, 96);
62
+ return path.join(plansDir(projectPath), `${safe}.json`);
63
+ }
64
+ export function savePlan(plan, projectPath) {
65
+ const dir = plansDir(projectPath);
66
+ fs.mkdirSync(dir, { recursive: true });
67
+ const file = planFile(plan.id, projectPath);
68
+ fs.writeFileSync(file, JSON.stringify(plan, null, 2));
69
+ return file;
70
+ }
71
+ export function loadPlan(planId, projectPath) {
72
+ const file = planFile(planId, projectPath);
73
+ if (!fs.existsSync(file)) {
74
+ // Fallback: if the project-scoped path is missing, try the global
75
+ // dir. Common when the project was added AFTER the plan was created.
76
+ if (projectPath) {
77
+ const fallback = planFile(planId);
78
+ if (fs.existsSync(fallback)) {
79
+ try {
80
+ return JSON.parse(fs.readFileSync(fallback, 'utf-8'));
81
+ }
82
+ catch {
83
+ return null;
84
+ }
85
+ }
86
+ }
87
+ return null;
88
+ }
89
+ try {
90
+ return JSON.parse(fs.readFileSync(file, 'utf-8'));
91
+ }
92
+ catch (err) {
93
+ logger.warn({ err, planId }, 'plan parse failed');
94
+ return null;
95
+ }
96
+ }
97
+ /**
98
+ * Decompose a user request into a Plan. Pure async function — no side
99
+ * effects except the optional LLM call. Caller decides whether to
100
+ * persist the result via `savePlan`.
101
+ *
102
+ * Behavior:
103
+ * - Builds a system prompt describing the decomposition contract
104
+ * - Asks the model to emit a JSON object matching the Plan schema
105
+ * - Validates the response against the schema; logs and retries once on parse failure
106
+ * - Returns a Plan ready for orchestrator dispatch
107
+ *
108
+ * Failure modes:
109
+ * - LLM returns non-JSON → throws PlanGenerationError
110
+ * - LLM returns empty steps → throws PlanGenerationError
111
+ * - LLM returns >12 steps → trimmed to first 12 with a warning
112
+ */
113
+ export async function planRequest(opts) {
114
+ const model = opts.model ?? MODELS.sonnet ?? 'claude-sonnet-4-6';
115
+ const chainId = `chain-${randomUUID().slice(0, 12)}`;
116
+ const planId = `plan-${randomUUID().slice(0, 12)}`;
117
+ const systemPrompt = buildPlannerSystemPrompt();
118
+ const userPrompt = buildPlannerUserPrompt(opts);
119
+ const text = opts.llmCall
120
+ ? await opts.llmCall(userPrompt, systemPrompt, model)
121
+ : await runPlannerLlm(userPrompt, systemPrompt, model);
122
+ const parsed = parsePlannerResponse(text);
123
+ if (!parsed || !Array.isArray(parsed.steps) || parsed.steps.length === 0) {
124
+ throw new PlanGenerationError(`Planner returned no steps (raw response head: ${text.slice(0, 200)})`);
125
+ }
126
+ // Cap at 12 steps. Real multi-step work fits in 3-7; >12 is almost
127
+ // always over-decomposition by the model. Trim to keep chains manageable.
128
+ const rawSteps = parsed.steps.slice(0, 12);
129
+ const steps = rawSteps.map((raw, i) => ({
130
+ index: i,
131
+ title: String(raw.title ?? `Step ${i + 1}`).slice(0, 160),
132
+ scope: String(raw.scope ?? '').slice(0, 800),
133
+ expectedTools: Array.isArray(raw.expectedTools)
134
+ ? raw.expectedTools.map((t) => String(t)).filter(Boolean).slice(0, 8)
135
+ : [],
136
+ ...(raw.deliverable ? { deliverable: String(raw.deliverable).slice(0, 400) } : {}),
137
+ status: 'pending',
138
+ }));
139
+ const plan = {
140
+ id: planId,
141
+ chainId,
142
+ userRequest: opts.userRequest,
143
+ ...(opts.project?.path ? { projectPath: opts.project.path } : {}),
144
+ ...(opts.originatingSessionKey ? { originatingSessionKey: opts.originatingSessionKey } : {}),
145
+ createdAt: new Date().toISOString(),
146
+ steps,
147
+ status: 'pending',
148
+ ...(typeof parsed.estimatedCostUsd === 'number' ? { estimatedCostUsd: parsed.estimatedCostUsd } : {}),
149
+ ...(parsed.notes ? { notes: String(parsed.notes).slice(0, 600) } : {}),
150
+ };
151
+ logger.info({
152
+ planId,
153
+ chainId,
154
+ stepCount: steps.length,
155
+ model,
156
+ project: opts.project?.path,
157
+ }, 'planRequest: emitted plan');
158
+ return plan;
159
+ }
160
+ export class PlanGenerationError extends Error {
161
+ constructor(message) {
162
+ super(message);
163
+ this.name = 'PlanGenerationError';
164
+ }
165
+ }
166
+ // ── Internals: prompt construction + SDK call ────────────────────────
167
+ function buildPlannerSystemPrompt() {
168
+ return [
169
+ 'You are a planning assistant for Clementine, a personal AI agent.',
170
+ 'Your one job: take a multi-step user request and decompose it into 3-7 focused',
171
+ 'subtasks that an execution worker can run one at a time, each in its own fresh',
172
+ 'context window.',
173
+ '',
174
+ '## Why decomposition matters',
175
+ '',
176
+ 'The execution worker has a 200K context budget per step. If a single step',
177
+ 'tries to do too much (read a 10MB CSV + build HTML + deploy + verify), it',
178
+ 'fills its window with tool outputs, the SDK compacts, fidelity degrades, and',
179
+ 'the worker thrashes. Your job is to keep each step BOUNDED so this can\'t',
180
+ 'happen.',
181
+ '',
182
+ '## Decomposition principles',
183
+ '',
184
+ '1. **One verb per step.** Each step does ONE thing: find, read, build,',
185
+ ' write, deploy, verify. Compound verbs ("build and deploy") = bad step.',
186
+ '2. **State flows through disk, not context.** If step 3 needs data from',
187
+ ' step 1, step 1 writes to a file; step 3 reads it. Don\'t carry data in',
188
+ ' the chain itself.',
189
+ '3. **Each step has ONE deliverable.** A file path, a URL, a confirmation.',
190
+ ' Steps without a clear deliverable are signal that the decomposition is',
191
+ ' off.',
192
+ '4. **Estimate tool calls per step.** A step expected to make >10 tool calls',
193
+ ' probably needs to be split. Aim for 2-6 tool calls per step.',
194
+ '5. **Match steps to the actual user ask.** Don\'t add steps the user didn\'t',
195
+ ' request (e.g., don\'t add a "send confirmation email" step unless they',
196
+ ' asked). Don\'t skip steps they DID ask for.',
197
+ '',
198
+ '## Output format — STRICT JSON only',
199
+ '',
200
+ 'Return ONLY a JSON object with this shape. No markdown fences, no prose:',
201
+ '',
202
+ '{',
203
+ ' "steps": [',
204
+ ' {',
205
+ ' "title": "<short imperative title, e.g. \'Find the coaches project\'>",',
206
+ ' "scope": "<1-2 sentences describing exactly what this step does>",',
207
+ ' "expectedTools": ["tool_name_1", "tool_name_2"],',
208
+ ' "deliverable": "<file path | URL | description of the artifact>"',
209
+ ' }',
210
+ ' // ... 2-11 more steps',
211
+ ' ],',
212
+ ' "estimatedCostUsd": 0.50,',
213
+ ' "notes": "<known risks or assumptions, optional>"',
214
+ '}',
215
+ '',
216
+ 'Available tools the worker can call (these are the most relevant for',
217
+ 'decomposition; full list is bigger):',
218
+ '- project_discover, project_link, project_deploy (Clementine project tools)',
219
+ '- Read, Write, Edit (file I/O)',
220
+ '- Bash (any shell command — prefer `head/awk/jq` over Read for big files)',
221
+ '- Glob, Grep (search)',
222
+ '- memory_search, memory_write (Clementine memory)',
223
+ '- WebFetch, WebSearch (web)',
224
+ '',
225
+ 'Sample expected-tool sequences:',
226
+ '- "Find a project" → [project_discover, project_link]',
227
+ '- "Read source data" → [Bash (head/wc), Read]',
228
+ '- "Build artifact" → [Read, Write]',
229
+ '- "Deploy" → [project_deploy]',
230
+ '- "Verify deploy" → [Bash (curl)]',
231
+ ].join('\n');
232
+ }
233
+ function buildPlannerUserPrompt(opts) {
234
+ const lines = [];
235
+ lines.push('## User request');
236
+ lines.push(opts.userRequest);
237
+ if (opts.project) {
238
+ lines.push('');
239
+ lines.push('## Active project');
240
+ lines.push(`Path: ${opts.project.path}`);
241
+ if (opts.project.description)
242
+ lines.push(`Description: ${opts.project.description}`);
243
+ if (opts.project.keywords?.length)
244
+ lines.push(`Keywords: ${opts.project.keywords.join(', ')}`);
245
+ // Surface STATUS.md preview if present — it carries state from prior chains.
246
+ try {
247
+ const statusPath = path.join(opts.project.path, '.clementine', 'STATUS.md');
248
+ if (fs.existsSync(statusPath)) {
249
+ const status = fs.readFileSync(statusPath, 'utf-8').trim();
250
+ if (status) {
251
+ lines.push('');
252
+ lines.push('## Project STATUS.md (current state)');
253
+ lines.push(status.slice(0, 1500));
254
+ }
255
+ }
256
+ }
257
+ catch { /* non-fatal */ }
258
+ }
259
+ else {
260
+ lines.push('');
261
+ lines.push('## Active project');
262
+ lines.push('(none — if this request implies a project, your first step should be project_discover + project_link to resolve it before doing other work)');
263
+ }
264
+ lines.push('');
265
+ lines.push('Decompose the request into a Plan. Return strict JSON, no prose.');
266
+ return lines.join('\n');
267
+ }
268
+ async function runPlannerLlm(userPrompt, systemPrompt, model) {
269
+ const { query } = await import('@anthropic-ai/claude-agent-sdk');
270
+ let text = '';
271
+ const stream = query({
272
+ prompt: userPrompt,
273
+ options: normalizeClaudeSdkOptionsForOneMillionContext({
274
+ model,
275
+ maxTurns: 1, // single shot — emit JSON, done
276
+ systemPrompt,
277
+ }),
278
+ });
279
+ for await (const msg of stream) {
280
+ if (msg.type === 'result') {
281
+ // SDK 'result' message carries the final text.
282
+ const m = msg;
283
+ if (m.is_error) {
284
+ const errorText = Array.isArray(m.errors) ? m.errors.join('; ') : String(m.result ?? '');
285
+ if (looksLikeClaudeOneMillionContextError(errorText))
286
+ applyOneMillionContextRecovery();
287
+ throw new Error(errorText || 'Planner SDK call failed');
288
+ }
289
+ text = m.result ?? '';
290
+ }
291
+ }
292
+ return text;
293
+ }
294
+ /** Defensive JSON parse — strips common LLM wrappers (markdown fences,
295
+ * leading/trailing prose) before parsing. */
296
+ export function parsePlannerResponse(raw) {
297
+ if (!raw || !raw.trim())
298
+ return null;
299
+ // Strip ```json ... ``` or ``` ... ``` fences if present.
300
+ let text = raw.trim();
301
+ const fenceMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
302
+ if (fenceMatch)
303
+ text = fenceMatch[1].trim();
304
+ // If still not pure JSON, try to extract the first {...} block.
305
+ if (!text.startsWith('{')) {
306
+ const objMatch = text.match(/\{[\s\S]*\}/);
307
+ if (objMatch)
308
+ text = objMatch[0];
309
+ }
310
+ try {
311
+ const parsed = JSON.parse(text);
312
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
313
+ return parsed;
314
+ }
315
+ return null;
316
+ }
317
+ catch {
318
+ return null;
319
+ }
320
+ }
321
+ //# sourceMappingURL=bg-planner.js.map
@@ -266,6 +266,23 @@ export declare class CronScheduler {
266
266
  * out of the trigger interval.
267
267
  */
268
268
  private processBackgroundTasks;
269
+ /**
270
+ * 1.18.190 — execute a planner bg-task. The task's `prompt` is the
271
+ * original user request that caused chat overflow. Decompose via
272
+ * `planRequest` (Sonnet), persist the Plan, dispatch step 0 via
273
+ * `dispatchChain`, then mark this planner task done with a summary.
274
+ *
275
+ * Failure mode: if planRequest throws, mark planner task failed
276
+ * and surface to the originating chat — no chain ever starts.
277
+ */
278
+ private runPlannerBackgroundTask;
279
+ /**
280
+ * 1.18.190 — after a chained step completes (success or failure),
281
+ * advance the chain. Success queues the next step; failure pauses
282
+ * the chain and notifies the owner. The owner gets a per-step
283
+ * update message so they see progress instead of one final dump.
284
+ */
285
+ private advanceChainAfterStep;
269
286
  /** Process any pending trigger files and run the corresponding jobs. */
270
287
  private processTriggers;
271
288
  /** Process any pending goal work trigger files. Routes through the execution advisor. */
@@ -1978,7 +1978,23 @@ export class CronScheduler {
1978
1978
  const started = markBgTaskRunning(task.id, undefined, { jobName });
1979
1979
  if (!started)
1980
1980
  continue;
1981
- logger.info({ id: started.id, fromAgent: started.fromAgent, maxMinutes: started.maxMinutes }, 'Background task picked up');
1981
+ logger.info({
1982
+ id: started.id,
1983
+ fromAgent: started.fromAgent,
1984
+ maxMinutes: started.maxMinutes,
1985
+ kind: started.kind ?? 'monolithic',
1986
+ chainId: started.chainId,
1987
+ planId: started.planId,
1988
+ stepIndex: started.stepIndex,
1989
+ }, 'Background task picked up');
1990
+ // 1.18.190 — planner tasks don't run through the worker pattern.
1991
+ // They make a single LLM call (Sonnet) that decomposes the user
1992
+ // request into a Plan, persist it, then dispatch step 0.
1993
+ if (started.kind === 'planner') {
1994
+ this.runPlannerBackgroundTask(started)
1995
+ .catch((err) => logger.warn({ err, id: started.id }, 'Planner background task failed at top level'));
1996
+ continue;
1997
+ }
1982
1998
  updateBackgroundTask(started.id, {
1983
1999
  lastNotifiedAt: new Date().toISOString(),
1984
2000
  progressMessageCount: 0,
@@ -2031,6 +2047,14 @@ export class CronScheduler {
2031
2047
  }
2032
2048
  // Dispatch the deliverable to the originating agent's channel.
2033
2049
  const completed = loadBackgroundTask(started.id) ?? started;
2050
+ // 1.18.190 — chained step completion. If this task was a step in
2051
+ // a Plan, advance the chain instead of treating completion as the
2052
+ // end of the job. The orchestrator queues the next step and the
2053
+ // owner sees a step-by-step update rather than one final dump.
2054
+ if (completed.kind === 'step') {
2055
+ this.advanceChainAfterStep(completed).catch((err) => logger.warn({ err, id: completed.id }, 'Failed to advance chain after step completion'));
2056
+ return;
2057
+ }
2034
2058
  const deliveryHead = `**Background task ${started.id} done** — ${started.prompt.slice(0, 100).replace(/\s+/g, ' ')}${started.prompt.length > 100 ? '...' : ''}\n\n`;
2035
2059
  const body = (result ?? '').slice(0, 1500);
2036
2060
  const deliveryMessage = deliveryHead + body;
@@ -2053,6 +2077,13 @@ export class CronScheduler {
2053
2077
  logger.warn({ err: saveErr, id: started.id }, 'Failed to mark background task failed');
2054
2078
  }
2055
2079
  const failed = loadBackgroundTask(started.id) ?? started;
2080
+ // 1.18.190 — chained step failure pauses the chain. The owner
2081
+ // gets a "chain paused at step N" message instead of just "task
2082
+ // failed" so they can retry/edit/abandon.
2083
+ if (failed.kind === 'step') {
2084
+ this.advanceChainAfterStep(failed).catch((advanceErr) => logger.warn({ err: advanceErr, id: failed.id }, 'Failed to pause chain after step failure'));
2085
+ return;
2086
+ }
2056
2087
  const failMessage = `**Background task ${started.id} failed** — ${errStr.slice(0, 200)}`;
2057
2088
  this.dispatcher
2058
2089
  .send(failMessage, this.dispatchContextForBackgroundTask(failed))
@@ -2063,6 +2094,122 @@ export class CronScheduler {
2063
2094
  });
2064
2095
  }
2065
2096
  }
2097
+ /**
2098
+ * 1.18.190 — execute a planner bg-task. The task's `prompt` is the
2099
+ * original user request that caused chat overflow. Decompose via
2100
+ * `planRequest` (Sonnet), persist the Plan, dispatch step 0 via
2101
+ * `dispatchChain`, then mark this planner task done with a summary.
2102
+ *
2103
+ * Failure mode: if planRequest throws, mark planner task failed
2104
+ * and surface to the originating chat — no chain ever starts.
2105
+ */
2106
+ async runPlannerBackgroundTask(task) {
2107
+ try {
2108
+ const { planRequest, savePlan } = await import('../agent/bg-planner.js');
2109
+ const { dispatchChain } = await import('../agent/bg-orchestrator.js');
2110
+ // Resolve the active project for this session, if any. The chat
2111
+ // overflow path already set this when the user mentioned a
2112
+ // project name; if none was set, the planner runs without
2113
+ // project context and its first step is typically a
2114
+ // project_discover.
2115
+ const project = task.sessionKey
2116
+ ? (this.gateway.getSessionProject?.(task.sessionKey) ?? null)
2117
+ : null;
2118
+ logger.info({
2119
+ id: task.id,
2120
+ sessionKey: task.sessionKey,
2121
+ projectPath: project?.path,
2122
+ }, 'Planner bg-task: decomposing user request');
2123
+ const plan = await planRequest({
2124
+ userRequest: task.prompt,
2125
+ ...(task.sessionKey ? { originatingSessionKey: task.sessionKey } : {}),
2126
+ ...(project ? { project } : {}),
2127
+ });
2128
+ savePlan(plan, plan.projectPath);
2129
+ // Mark the planner task done with a concise summary so the
2130
+ // dashboard + recall show what happened.
2131
+ const planSummary = [
2132
+ `Planned ${plan.steps.length} steps for: ${task.prompt.slice(0, 80)}${task.prompt.length > 80 ? '...' : ''}`,
2133
+ '',
2134
+ ...plan.steps.map((s, i) => `${i + 1}. ${s.title}`),
2135
+ '',
2136
+ `Chain id: ${plan.chainId}`,
2137
+ `Plan id: ${plan.id}`,
2138
+ ].join('\n');
2139
+ try {
2140
+ markBgTaskDone(task.id, planSummary);
2141
+ }
2142
+ catch (err) {
2143
+ logger.warn({ err, id: task.id }, 'Failed to mark planner task done');
2144
+ }
2145
+ // Send a kickoff message to the originating chat — "I planned X
2146
+ // steps, starting step 1 now" — so the owner sees the
2147
+ // decomposition before watching steps land one at a time.
2148
+ const completed = loadBackgroundTask(task.id) ?? task;
2149
+ const kickoffHeader = `**Plan ready (${plan.steps.length} steps)** — starting step 1: ${plan.steps[0]?.title ?? '(unknown)'}\n\n`;
2150
+ const kickoffBody = plan.steps.map((s, i) => `${i + 1}. ${s.title}`).join('\n');
2151
+ this.dispatcher
2152
+ .send(kickoffHeader + kickoffBody, this.dispatchContextForBackgroundTask(completed))
2153
+ .catch((err) => logger.debug({ err, id: task.id }, 'Failed to dispatch plan kickoff'));
2154
+ this.mirrorBackgroundTaskToChat(completed.sessionKey, `[Plan ${plan.id} created with ${plan.steps.length} steps for: ${task.prompt.slice(0, 200)}]`, kickoffHeader + kickoffBody, completed.id);
2155
+ // Dispatch the first step. The orchestrator persists task ids
2156
+ // back into the plan so subsequent advancement is idempotent.
2157
+ dispatchChain(plan);
2158
+ }
2159
+ catch (err) {
2160
+ const errStr = String(err).slice(0, 500);
2161
+ logger.warn({ err, id: task.id }, 'Planner bg-task failed');
2162
+ try {
2163
+ markBgTaskFailed(task.id, errStr, 'failed');
2164
+ }
2165
+ catch (saveErr) {
2166
+ logger.warn({ err: saveErr, id: task.id }, 'Failed to mark planner task failed');
2167
+ }
2168
+ const failed = loadBackgroundTask(task.id) ?? task;
2169
+ const failMessage = `**Planning failed** — ${errStr.slice(0, 300)}\n\nThe request didn't decompose into chained steps. Try rephrasing or breaking it up manually.`;
2170
+ this.dispatcher
2171
+ .send(failMessage, this.dispatchContextForBackgroundTask(failed))
2172
+ .catch(() => { });
2173
+ }
2174
+ }
2175
+ /**
2176
+ * 1.18.190 — after a chained step completes (success or failure),
2177
+ * advance the chain. Success queues the next step; failure pauses
2178
+ * the chain and notifies the owner. The owner gets a per-step
2179
+ * update message so they see progress instead of one final dump.
2180
+ */
2181
+ async advanceChainAfterStep(completed) {
2182
+ try {
2183
+ const { advanceChain, formatChainStatusUpdate } = await import('../agent/bg-orchestrator.js');
2184
+ const { loadPlan } = await import('../agent/bg-planner.js');
2185
+ const next = advanceChain({ completedTask: completed });
2186
+ const plan = completed.planId ? loadPlan(completed.planId) : null;
2187
+ // Send a step-level update to the originating chat.
2188
+ if (plan && typeof completed.stepIndex === 'number') {
2189
+ const step = plan.steps[completed.stepIndex];
2190
+ if (step) {
2191
+ const statusMessage = formatChainStatusUpdate(plan, step);
2192
+ this.dispatcher
2193
+ .send(statusMessage, this.dispatchContextForBackgroundTask(completed))
2194
+ .catch((err) => logger.debug({ err, id: completed.id }, 'Failed to dispatch chain step update'));
2195
+ this.mirrorBackgroundTaskToChat(completed.sessionKey, `[Chain ${plan.chainId} step ${completed.stepIndex + 1}/${plan.steps.length}: ${step.status}]`, statusMessage, completed.id);
2196
+ }
2197
+ }
2198
+ // If no next step was queued (chain complete or paused), nothing
2199
+ // more to do here — the formatChainStatusUpdate already covered
2200
+ // the user-facing summary.
2201
+ if (!next) {
2202
+ logger.info({
2203
+ completedTaskId: completed.id,
2204
+ planId: completed.planId,
2205
+ stepIndex: completed.stepIndex,
2206
+ }, 'Chain advancement: no next step (complete or paused)');
2207
+ }
2208
+ }
2209
+ catch (err) {
2210
+ logger.warn({ err, id: completed.id }, 'advanceChainAfterStep: failed');
2211
+ }
2212
+ }
2066
2213
  /** Process any pending trigger files and run the corresponding jobs. */
2067
2214
  processTriggers() {
2068
2215
  if (!existsSync(this.triggerDir))
@@ -430,26 +430,42 @@ export class Gateway {
430
430
  ].join('\n');
431
431
  }
432
432
  queueBackgroundTaskAfterContextOverflow(sessionKey, prompt) {
433
- const recommendation = detectComplexTaskForBackground(prompt);
433
+ // 1.18.190 the chat-overflow recovery path is now the canonical
434
+ // entry to the planner-orchestrator chain. Instead of queuing a
435
+ // monolithic bg-task that tries to do everything in one worker
436
+ // (which thrashed when the worker's context filled), we queue a
437
+ // planner task. The planner is a tiny Sonnet LLM call that
438
+ // decomposes the user's request into 3-7 PlanSteps; the
439
+ // orchestrator then dispatches one step at a time, each with
440
+ // its own fresh 200K worker window. See agent/bg-planner.ts +
441
+ // agent/bg-orchestrator.ts for the full pattern.
442
+ //
443
+ // The legacy `detectComplexTaskForBackground` heuristic is no
444
+ // longer used here — the planner itself decides how to decompose,
445
+ // and per-step maxMinutes is governed by orchestrator settings.
446
+ // Planner tasks get a tight 5-minute cap; total chain wall-clock
447
+ // is the sum of each step's own maxMinutes.
434
448
  const task = createBackgroundTask({
435
449
  fromAgent: this.backgroundAgentForSession(sessionKey),
436
450
  prompt,
437
- maxMinutes: recommendation?.suggestedMaxMinutes ?? 60,
451
+ maxMinutes: 5, // planner needs minutes, not hours
438
452
  sessionKey,
453
+ kind: 'planner',
439
454
  });
440
455
  logger.warn({
441
456
  taskId: task.id,
442
457
  sessionKey,
443
458
  fromAgent: task.fromAgent,
444
- maxMinutes: task.maxMinutes,
445
- }, 'Queued background task after repeated chat context overflow');
459
+ kind: 'planner',
460
+ }, 'Queued planner task after repeated chat context overflow');
446
461
  return {
447
462
  task,
448
463
  response: [
449
- `The live chat context hit the limit, so I moved this into background task **${task.id}** and kept your request attached.`,
464
+ `The live chat context hit the limit, so I'm decomposing your request into chained steps via background task **${task.id}**.`,
450
465
  '',
451
- `It will run as **${task.fromAgent}** in a fresh task session with a ${task.maxMinutes} minute cap.`,
452
- `Use \`status ${task.id}\` or check the dashboard Background Tasks panel for progress.`,
466
+ `Step 1: a Sonnet planner reads the request and emits a plan (~30 seconds).`,
467
+ `Then each step runs as its own fresh task you'll see step-by-step updates rather than one big "done" at the end.`,
468
+ `Use \`status ${task.id}\` or the dashboard Background Tasks panel for progress.`,
453
469
  ].join('\n'),
454
470
  };
455
471
  }
package/dist/types.d.ts CHANGED
@@ -298,6 +298,16 @@ export interface BackgroundTask {
298
298
  mirroredAt?: string;
299
299
  verificationFlag?: 'claimed-without-evidence';
300
300
  verificationDetails?: string;
301
+ /** Discriminator: planner / step / monolithic (default). */
302
+ kind?: 'planner' | 'step' | 'monolithic';
303
+ /** Chain identifier shared by the planner task and all its step tasks. */
304
+ chainId?: string;
305
+ /** Plan identifier (file basename in .clementine/plans/<planId>.json). */
306
+ planId?: string;
307
+ /** For kind='step': 0-indexed position within the plan's steps array. */
308
+ stepIndex?: number;
309
+ /** For kind='step': the task that queued this one (planner or prev step). */
310
+ parentTaskId?: string;
301
311
  }
302
312
  /**
303
313
  * State for one specialist agent's heartbeat scheduler. Persisted at
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.18.189",
3
+ "version": "1.18.190",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",