clementine-agent 1.18.188 → 1.18.190

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,12 +26,21 @@ export interface BackgroundTaskOptions {
26
26
  /**
27
27
  * Create a new pending task on disk and return it. Caller (the MCP tool)
28
28
  * doesn't await execution — the daemon picks the task up asynchronously.
29
+ *
30
+ * 1.18.190 — accepts the new chain fields (kind / chainId / planId /
31
+ * stepIndex / parentTaskId) for planner-orchestrator chained tasks. All
32
+ * are optional; callers that pass none get the legacy monolithic shape.
29
33
  */
30
34
  export declare function createBackgroundTask(input: {
31
35
  fromAgent: string;
32
36
  prompt: string;
33
37
  maxMinutes: number;
34
38
  sessionKey?: string;
39
+ kind?: BackgroundTask['kind'];
40
+ chainId?: string;
41
+ planId?: string;
42
+ stepIndex?: number;
43
+ parentTaskId?: string;
35
44
  }, opts?: BackgroundTaskOptions): BackgroundTask;
36
45
  /** Load a task by id, or null if not found / malformed. */
37
46
  export declare function loadBackgroundTask(id: string, opts?: BackgroundTaskOptions): BackgroundTask | null;
@@ -45,6 +45,10 @@ function safeWrite(file, task) {
45
45
  /**
46
46
  * Create a new pending task on disk and return it. Caller (the MCP tool)
47
47
  * doesn't await execution — the daemon picks the task up asynchronously.
48
+ *
49
+ * 1.18.190 — accepts the new chain fields (kind / chainId / planId /
50
+ * stepIndex / parentTaskId) for planner-orchestrator chained tasks. All
51
+ * are optional; callers that pass none get the legacy monolithic shape.
48
52
  */
49
53
  export function createBackgroundTask(input, opts) {
50
54
  const now = new Date();
@@ -58,6 +62,16 @@ export function createBackgroundTask(input, opts) {
58
62
  };
59
63
  if (input.sessionKey)
60
64
  task.sessionKey = input.sessionKey;
65
+ if (input.kind)
66
+ task.kind = input.kind;
67
+ if (input.chainId)
68
+ task.chainId = input.chainId;
69
+ if (input.planId)
70
+ task.planId = input.planId;
71
+ if (typeof input.stepIndex === 'number')
72
+ task.stepIndex = input.stepIndex;
73
+ if (input.parentTaskId)
74
+ task.parentTaskId = input.parentTaskId;
61
75
  safeWrite(pathFor(task.id, opts), task);
62
76
  return task;
63
77
  }
@@ -0,0 +1,103 @@
1
+ /**
2
+ * bg-orchestrator — drive a Plan from start to finish by queuing one
3
+ * bg-task per PlanStep, advancing the chain as each step completes.
4
+ *
5
+ * Why this exists (1.18.190)
6
+ * ──────────────────────────
7
+ * The bg-planner produces a Plan with 3-7 PlanSteps. This module is
8
+ * the runtime that executes that Plan, one step at a time, with each
9
+ * step getting its own fresh bg-task + worker context. The chain only
10
+ * advances when the previous step completed successfully — failures
11
+ * pause the chain and notify the owner.
12
+ *
13
+ * Architectural role:
14
+ * bg-planner.ts → Plan (data)
15
+ * bg-orchestrator.ts → drives the Plan (this module)
16
+ * background-tasks.ts → bg-task persistence (filesystem)
17
+ * run-agent-cron.ts → runs the actual SDK call for each step
18
+ *
19
+ * The orchestrator is NEVER the thing reading files or calling APIs.
20
+ * It's a state machine: read plan → queue next step → wait for step
21
+ * to finish → repeat. The state machine lives across daemon restarts
22
+ * because both Plans and BackgroundTasks are filesystem-persisted.
23
+ *
24
+ * Why this prevents the autocompact thrash that motivated 1.18.190:
25
+ * - each step gets a FRESH bg-task with a FRESH 200K worker window
26
+ * - state flows between steps via the project's STATUS.md and the
27
+ * plan's `deliverable` fields, NOT via accumulated SDK context
28
+ * - no single worker has to do more than ~2-6 tool calls before
29
+ * completing its scoped deliverable
30
+ * - the model's compaction pressure resets between steps
31
+ */
32
+ import { createBackgroundTask } from './background-tasks.js';
33
+ import { loadPlan, savePlan } from './bg-planner.js';
34
+ import type { Plan, PlanStep } from './bg-planner.js';
35
+ import type { BackgroundTask } from '../types.js';
36
+ /**
37
+ * Queue the first step of a freshly-planned chain. Returns the
38
+ * BackgroundTask created for step 0.
39
+ *
40
+ * Caller responsibility: the Plan must already be persisted to disk
41
+ * (via savePlan) before calling this — the dispatched step task
42
+ * carries a planId that will be loaded back at execution time.
43
+ */
44
+ export declare function dispatchChain(plan: Plan): BackgroundTask;
45
+ /**
46
+ * Called by the bg-task framework when a chained step completes.
47
+ * Updates the plan's step status, then either:
48
+ * - queues the next step (chain continues),
49
+ * - marks the plan completed (no more steps), or
50
+ * - pauses the chain (step failed; owner notification surfaces elsewhere).
51
+ *
52
+ * Returns the next BackgroundTask if one was queued, or null otherwise.
53
+ *
54
+ * Safe to call multiple times for the same completed task (idempotent
55
+ * via the step's status check).
56
+ */
57
+ export declare function advanceChain(opts: {
58
+ completedTask: BackgroundTask;
59
+ /** Optional override for tests; defaults to filesystem. */
60
+ loadPlanFn?: typeof loadPlan;
61
+ savePlanFn?: typeof savePlan;
62
+ createTaskFn?: typeof createBackgroundTask;
63
+ }): BackgroundTask | null;
64
+ /**
65
+ * Pause a chain explicitly (e.g., owner intervention). The current
66
+ * running step is left alone — caller can mark it however the
67
+ * downstream cancellation flow does.
68
+ */
69
+ export declare function pauseChain(planId: string, projectPath?: string | null, reason?: string): void;
70
+ /**
71
+ * Resume a paused chain by dispatching its next pending step. If
72
+ * all steps are terminal, marks the plan completed. Returns the
73
+ * dispatched task, or null when nothing to dispatch.
74
+ */
75
+ export declare function resumeChain(planId: string, projectPath?: string | null): BackgroundTask | null;
76
+ /**
77
+ * Build the focused prompt for one chained worker. Designed to be SMALL
78
+ * (~1-2KB) and ANCHORED to the step's deliverable so the worker has a
79
+ * clear stopping condition. Key elements:
80
+ * - The original user request (for context, not for re-doing it)
81
+ * - The plan summary (what's been done, what's next)
82
+ * - THIS step's scope + expected tools + deliverable
83
+ * - Posture: "do ONLY this step. Don't overshoot. State your deliverable
84
+ * in your final response."
85
+ *
86
+ * State that the next step might need is read by that step from the
87
+ * project STATUS.md or the prior step's deliverable file — NOT from
88
+ * this step's response text. Result text is for the orchestrator's
89
+ * advancement decision; deliverables are for the work itself.
90
+ */
91
+ export declare function buildStepPrompt(plan: Plan, step: PlanStep): string;
92
+ /**
93
+ * Given a chain step's taskId, derive the directory where the plan
94
+ * lives. Used by run-agent-cron to set the SDK's `cwd` and
95
+ * `additionalDirectories` to the project root for the step.
96
+ */
97
+ export declare function projectDirForChainTask(task: BackgroundTask): string | undefined;
98
+ /**
99
+ * Format a status line for posting to the originating chat after each
100
+ * step completes — gives the owner a real-time view of chain progress.
101
+ */
102
+ export declare function formatChainStatusUpdate(plan: Plan, justCompletedStep: PlanStep): string;
103
+ //# sourceMappingURL=bg-orchestrator.d.ts.map
@@ -0,0 +1,323 @@
1
+ /**
2
+ * bg-orchestrator — drive a Plan from start to finish by queuing one
3
+ * bg-task per PlanStep, advancing the chain as each step completes.
4
+ *
5
+ * Why this exists (1.18.190)
6
+ * ──────────────────────────
7
+ * The bg-planner produces a Plan with 3-7 PlanSteps. This module is
8
+ * the runtime that executes that Plan, one step at a time, with each
9
+ * step getting its own fresh bg-task + worker context. The chain only
10
+ * advances when the previous step completed successfully — failures
11
+ * pause the chain and notify the owner.
12
+ *
13
+ * Architectural role:
14
+ * bg-planner.ts → Plan (data)
15
+ * bg-orchestrator.ts → drives the Plan (this module)
16
+ * background-tasks.ts → bg-task persistence (filesystem)
17
+ * run-agent-cron.ts → runs the actual SDK call for each step
18
+ *
19
+ * The orchestrator is NEVER the thing reading files or calling APIs.
20
+ * It's a state machine: read plan → queue next step → wait for step
21
+ * to finish → repeat. The state machine lives across daemon restarts
22
+ * because both Plans and BackgroundTasks are filesystem-persisted.
23
+ *
24
+ * Why this prevents the autocompact thrash that motivated 1.18.190:
25
+ * - each step gets a FRESH bg-task with a FRESH 200K worker window
26
+ * - state flows between steps via the project's STATUS.md and the
27
+ * plan's `deliverable` fields, NOT via accumulated SDK context
28
+ * - no single worker has to do more than ~2-6 tool calls before
29
+ * completing its scoped deliverable
30
+ * - the model's compaction pressure resets between steps
31
+ */
32
+ import path from 'node:path';
33
+ import pino from 'pino';
34
+ import { createBackgroundTask } from './background-tasks.js';
35
+ import { loadPlan, savePlan } from './bg-planner.js';
36
+ const logger = pino({ name: 'clementine.bg-orchestrator' });
37
+ // ── Public API ───────────────────────────────────────────────────────
38
+ /**
39
+ * Queue the first step of a freshly-planned chain. Returns the
40
+ * BackgroundTask created for step 0.
41
+ *
42
+ * Caller responsibility: the Plan must already be persisted to disk
43
+ * (via savePlan) before calling this — the dispatched step task
44
+ * carries a planId that will be loaded back at execution time.
45
+ */
46
+ export function dispatchChain(plan) {
47
+ if (!plan.steps.length) {
48
+ throw new Error(`Cannot dispatch chain: plan ${plan.id} has zero steps`);
49
+ }
50
+ const firstStep = plan.steps[0];
51
+ const task = createBackgroundTask({
52
+ fromAgent: 'clementine',
53
+ prompt: buildStepPrompt(plan, firstStep),
54
+ maxMinutes: 30, // generous per-step; the step itself decides how long it needs
55
+ ...(plan.originatingSessionKey ? { sessionKey: plan.originatingSessionKey } : {}),
56
+ kind: 'step',
57
+ chainId: plan.chainId,
58
+ planId: plan.id,
59
+ stepIndex: 0,
60
+ });
61
+ // Stamp the step with its taskId + mark plan as in_progress so future
62
+ // resumes don't try to re-dispatch the same step.
63
+ firstStep.taskId = task.id;
64
+ firstStep.status = 'running';
65
+ plan.status = 'in_progress';
66
+ savePlan(plan, plan.projectPath);
67
+ logger.info({
68
+ planId: plan.id,
69
+ chainId: plan.chainId,
70
+ stepIndex: 0,
71
+ stepTitle: firstStep.title,
72
+ taskId: task.id,
73
+ }, 'dispatchChain: queued step 0');
74
+ return task;
75
+ }
76
+ /**
77
+ * Called by the bg-task framework when a chained step completes.
78
+ * Updates the plan's step status, then either:
79
+ * - queues the next step (chain continues),
80
+ * - marks the plan completed (no more steps), or
81
+ * - pauses the chain (step failed; owner notification surfaces elsewhere).
82
+ *
83
+ * Returns the next BackgroundTask if one was queued, or null otherwise.
84
+ *
85
+ * Safe to call multiple times for the same completed task (idempotent
86
+ * via the step's status check).
87
+ */
88
+ export function advanceChain(opts) {
89
+ const { completedTask } = opts;
90
+ if (!completedTask.planId || typeof completedTask.stepIndex !== 'number') {
91
+ logger.debug({ taskId: completedTask.id }, 'advanceChain: task has no plan id/step index — not a chain step');
92
+ return null;
93
+ }
94
+ const loadPlanImpl = opts.loadPlanFn ?? loadPlan;
95
+ const savePlanImpl = opts.savePlanFn ?? savePlan;
96
+ const createTaskImpl = opts.createTaskFn ?? createBackgroundTask;
97
+ // Plans live alongside the project when one's set; the task carries
98
+ // the planId but not the project path. Try the project path first
99
+ // (fast path), then fall back to the global plans dir inside
100
+ // loadPlan itself.
101
+ const plan = loadPlanImpl(completedTask.planId, undefined);
102
+ if (!plan) {
103
+ logger.warn({ planId: completedTask.planId, taskId: completedTask.id }, 'advanceChain: plan not found — cannot advance');
104
+ return null;
105
+ }
106
+ const step = plan.steps[completedTask.stepIndex];
107
+ if (!step) {
108
+ logger.warn({ planId: plan.id, stepIndex: completedTask.stepIndex }, 'advanceChain: step index out of range');
109
+ return null;
110
+ }
111
+ // Idempotency: if the step has already been marked terminal, don't
112
+ // advance again. Protects against duplicate completion callbacks.
113
+ if (step.status === 'done' || step.status === 'failed' || step.status === 'skipped') {
114
+ logger.debug({ planId: plan.id, stepIndex: step.index, status: step.status }, 'advanceChain: step already terminal — skipping');
115
+ return null;
116
+ }
117
+ // Reflect the task's terminal status onto the plan step.
118
+ if (completedTask.status === 'done') {
119
+ step.status = 'done';
120
+ step.completedAt = completedTask.completedAt ?? new Date().toISOString();
121
+ step.resultPreview = (completedTask.result ?? '').slice(0, 400);
122
+ }
123
+ else {
124
+ // failed | aborted | interrupted — all map to plan-step failure for now
125
+ step.status = 'failed';
126
+ step.completedAt = completedTask.completedAt ?? new Date().toISOString();
127
+ step.resultPreview = (completedTask.error ?? completedTask.result ?? '').slice(0, 400);
128
+ plan.status = 'paused';
129
+ savePlanImpl(plan, plan.projectPath);
130
+ logger.warn({
131
+ planId: plan.id,
132
+ chainId: plan.chainId,
133
+ stepIndex: step.index,
134
+ stepTitle: step.title,
135
+ taskStatus: completedTask.status,
136
+ error: completedTask.error,
137
+ }, 'advanceChain: step failed — chain paused');
138
+ return null;
139
+ }
140
+ // Look for the next pending step.
141
+ const nextStep = plan.steps.find((s, i) => i > step.index && s.status === 'pending');
142
+ if (!nextStep) {
143
+ // Chain complete!
144
+ plan.status = 'completed';
145
+ savePlanImpl(plan, plan.projectPath);
146
+ logger.info({
147
+ planId: plan.id,
148
+ chainId: plan.chainId,
149
+ stepCount: plan.steps.length,
150
+ }, 'advanceChain: chain completed');
151
+ return null;
152
+ }
153
+ // Queue the next step.
154
+ const nextTask = createTaskImpl({
155
+ fromAgent: 'clementine',
156
+ prompt: buildStepPrompt(plan, nextStep),
157
+ maxMinutes: 30,
158
+ ...(plan.originatingSessionKey ? { sessionKey: plan.originatingSessionKey } : {}),
159
+ kind: 'step',
160
+ chainId: plan.chainId,
161
+ planId: plan.id,
162
+ stepIndex: nextStep.index,
163
+ parentTaskId: completedTask.id,
164
+ });
165
+ nextStep.taskId = nextTask.id;
166
+ nextStep.status = 'running';
167
+ savePlanImpl(plan, plan.projectPath);
168
+ logger.info({
169
+ planId: plan.id,
170
+ chainId: plan.chainId,
171
+ stepIndex: nextStep.index,
172
+ stepTitle: nextStep.title,
173
+ taskId: nextTask.id,
174
+ parentTaskId: completedTask.id,
175
+ }, 'advanceChain: queued next step');
176
+ return nextTask;
177
+ }
178
+ /**
179
+ * Pause a chain explicitly (e.g., owner intervention). The current
180
+ * running step is left alone — caller can mark it however the
181
+ * downstream cancellation flow does.
182
+ */
183
+ export function pauseChain(planId, projectPath, reason) {
184
+ const plan = loadPlan(planId, projectPath ?? undefined);
185
+ if (!plan)
186
+ return;
187
+ plan.status = 'paused';
188
+ if (reason)
189
+ plan.notes = `${plan.notes ? plan.notes + '\n' : ''}[paused] ${reason}`;
190
+ savePlan(plan, plan.projectPath);
191
+ logger.info({ planId, reason }, 'pauseChain: chain paused');
192
+ }
193
+ /**
194
+ * Resume a paused chain by dispatching its next pending step. If
195
+ * all steps are terminal, marks the plan completed. Returns the
196
+ * dispatched task, or null when nothing to dispatch.
197
+ */
198
+ export function resumeChain(planId, projectPath) {
199
+ const plan = loadPlan(planId, projectPath ?? undefined);
200
+ if (!plan)
201
+ return null;
202
+ if (plan.status === 'completed')
203
+ return null;
204
+ const nextStep = plan.steps.find((s) => s.status === 'pending');
205
+ if (!nextStep) {
206
+ plan.status = 'completed';
207
+ savePlan(plan, plan.projectPath);
208
+ return null;
209
+ }
210
+ const task = createBackgroundTask({
211
+ fromAgent: 'clementine',
212
+ prompt: buildStepPrompt(plan, nextStep),
213
+ maxMinutes: 30,
214
+ ...(plan.originatingSessionKey ? { sessionKey: plan.originatingSessionKey } : {}),
215
+ kind: 'step',
216
+ chainId: plan.chainId,
217
+ planId: plan.id,
218
+ stepIndex: nextStep.index,
219
+ });
220
+ nextStep.taskId = task.id;
221
+ nextStep.status = 'running';
222
+ plan.status = 'in_progress';
223
+ savePlan(plan, plan.projectPath);
224
+ logger.info({ planId, stepIndex: nextStep.index, taskId: task.id }, 'resumeChain: dispatched next step');
225
+ return task;
226
+ }
227
+ // ── Step prompt construction ─────────────────────────────────────────
228
+ /**
229
+ * Build the focused prompt for one chained worker. Designed to be SMALL
230
+ * (~1-2KB) and ANCHORED to the step's deliverable so the worker has a
231
+ * clear stopping condition. Key elements:
232
+ * - The original user request (for context, not for re-doing it)
233
+ * - The plan summary (what's been done, what's next)
234
+ * - THIS step's scope + expected tools + deliverable
235
+ * - Posture: "do ONLY this step. Don't overshoot. State your deliverable
236
+ * in your final response."
237
+ *
238
+ * State that the next step might need is read by that step from the
239
+ * project STATUS.md or the prior step's deliverable file — NOT from
240
+ * this step's response text. Result text is for the orchestrator's
241
+ * advancement decision; deliverables are for the work itself.
242
+ */
243
+ export function buildStepPrompt(plan, step) {
244
+ const lines = [];
245
+ lines.push(`# Chained step ${step.index + 1} of ${plan.steps.length}`);
246
+ lines.push('');
247
+ lines.push(`## Original user request`);
248
+ lines.push(plan.userRequest);
249
+ lines.push('');
250
+ if (plan.projectPath) {
251
+ lines.push(`## Active project`);
252
+ lines.push(`Path: \`${plan.projectPath}\``);
253
+ lines.push('Your cwd is set to this project. Read sources from there, write outputs to `output/`.');
254
+ lines.push('');
255
+ }
256
+ // Concise plan summary — JUST what's been done and what's next.
257
+ // Don't include full step bodies; that's noise.
258
+ lines.push(`## Plan summary`);
259
+ for (const s of plan.steps) {
260
+ const marker = s.status === 'done' ? '✓' : s.status === 'failed' ? '✗' : s.index === step.index ? '→' : '·';
261
+ const detail = s.status === 'done' && s.deliverable ? ` (→ ${s.deliverable})` : '';
262
+ lines.push(` ${marker} ${s.index + 1}. ${s.title}${detail}`);
263
+ }
264
+ lines.push('');
265
+ lines.push(`## Your step (the → above)`);
266
+ lines.push(`**Title**: ${step.title}`);
267
+ lines.push(`**Scope**: ${step.scope}`);
268
+ if (step.expectedTools.length > 0) {
269
+ lines.push(`**Expected tool calls**: ${step.expectedTools.join(', ')}`);
270
+ }
271
+ if (step.deliverable) {
272
+ lines.push(`**Deliverable**: ${step.deliverable}`);
273
+ }
274
+ lines.push('');
275
+ lines.push(`## Step posture`);
276
+ lines.push('Do ONLY this step. Don\'t start the next one — the orchestrator handles that. ' +
277
+ 'When you\'re done, state your deliverable concretely in your final response ' +
278
+ '(file path, URL, confirmation) so the orchestrator can advance the chain. ' +
279
+ 'If you hit a blocker (missing info, ambiguous scope, tool failure), say so explicitly ' +
280
+ 'and stop — don\'t guess.');
281
+ return lines.join('\n');
282
+ }
283
+ // ── Convenience helpers used by run-agent-cron ───────────────────────
284
+ /**
285
+ * Given a chain step's taskId, derive the directory where the plan
286
+ * lives. Used by run-agent-cron to set the SDK's `cwd` and
287
+ * `additionalDirectories` to the project root for the step.
288
+ */
289
+ export function projectDirForChainTask(task) {
290
+ if (!task.planId)
291
+ return undefined;
292
+ const plan = loadPlan(task.planId);
293
+ return plan?.projectPath ?? undefined;
294
+ }
295
+ /**
296
+ * Format a status line for posting to the originating chat after each
297
+ * step completes — gives the owner a real-time view of chain progress.
298
+ */
299
+ export function formatChainStatusUpdate(plan, justCompletedStep) {
300
+ const total = plan.steps.length;
301
+ const done = plan.steps.filter((s) => s.status === 'done').length;
302
+ const lines = [];
303
+ lines.push(`**Step ${justCompletedStep.index + 1}/${total} done**: ${justCompletedStep.title}`);
304
+ if (justCompletedStep.resultPreview) {
305
+ lines.push(`→ ${justCompletedStep.resultPreview.slice(0, 200)}`);
306
+ }
307
+ if (done < total && plan.status === 'in_progress') {
308
+ const nextStep = plan.steps.find((s) => s.status === 'pending');
309
+ if (nextStep)
310
+ lines.push(`Next: ${nextStep.title}`);
311
+ }
312
+ else if (plan.status === 'completed') {
313
+ lines.push(`Chain complete (${done}/${total} steps).`);
314
+ }
315
+ else if (plan.status === 'paused') {
316
+ lines.push(`Chain paused. Tell me how to proceed.`);
317
+ }
318
+ return lines.join('\n');
319
+ }
320
+ // path is imported but lint warns when unused — use it once just to keep import meaningful.
321
+ // (orchestrator uses path indirectly via loadPlan/savePlan; this comment keeps the import obvious to future readers)
322
+ void path;
323
+ //# sourceMappingURL=bg-orchestrator.js.map
@@ -0,0 +1,142 @@
1
+ /**
2
+ * bg-planner — decompose a multi-step user request into a chain of
3
+ * focused PlanSteps that the orchestrator can dispatch one at a time.
4
+ *
5
+ * Why this exists (1.18.190)
6
+ * ──────────────────────────
7
+ * Before this, a complex multi-step user ask ("find the coaches project,
8
+ * build me an HTML report, deploy it to Netlify, verify the URL") got
9
+ * handed to a single monolithic bg-task worker. The worker had its own
10
+ * 200K context but still autocompact-thrashed because:
11
+ * - tool outputs accumulated across all 5-6 phases of the work
12
+ * - the model lost fidelity to its own past tool outputs as the
13
+ * output-guard tightened from 30KB → 4KB
14
+ * - one bad turn (huge file read, big Glob) poisoned the rest
15
+ *
16
+ * The decomposition pattern this module enables:
17
+ * 1. Planner runs ONCE with Sonnet (not Haiku — plans need real
18
+ * reasoning, see "Model choice" below)
19
+ * 2. Emits a Plan: 3-7 PlanSteps, each with title + scope + expected
20
+ * tool calls + deliverable artifact path
21
+ * 3. Plan persists to <project>/.clementine/plans/<planId>.json
22
+ * (or BASE_DIR/plans/<planId>.json if no active project)
23
+ * 4. Orchestrator (bg-orchestrator.ts) queues one bg-task per step,
24
+ * each with a tight scope and a fresh 200K worker window
25
+ * 5. State flows between steps via STATUS.md + the plan ledger;
26
+ * no step accumulates context from prior steps
27
+ *
28
+ * Model choice: Sonnet, NOT Haiku
29
+ * ────────────────────────────────
30
+ * Planning is a reasoning task, not a transformation. A poorly
31
+ * decomposed plan costs $5+ in downstream worker thrash; a well-
32
+ * decomposed plan saves multiples of that. The marginal cost of
33
+ * Sonnet over Haiku (~$0.05-0.15 vs ~$0.01 per plan) is trivial
34
+ * compared to the downstream cost of bad decomposition. Haiku is for
35
+ * mechanical tasks (extraction, classification, routing); decomposing
36
+ * a multi-domain ask into proper steps is not mechanical.
37
+ *
38
+ * If you're tempted to "save tokens" by flipping this to Haiku, read
39
+ * the 2026-05-12 root-cause plan first
40
+ * (~/.claude/plans/look-at-the-last-vivid-rossum.md). The whole point
41
+ * of this ship is to NOT cut corners on the decomposition layer.
42
+ */
43
+ import type { ProjectMeta } from './assistant.js';
44
+ export interface PlanStep {
45
+ /** 0-indexed position. */
46
+ index: number;
47
+ /** Short imperative title (e.g., "Find the coaches project"). */
48
+ title: string;
49
+ /** What this step does, in 1-2 sentences. The chained worker sees this. */
50
+ scope: string;
51
+ /** Tools the step is expected to call. The chained worker sees this as
52
+ * guidance, not enforcement — overshooting is allowed, just not
53
+ * preferred. */
54
+ expectedTools: string[];
55
+ /** Where the step's output goes (file path, deploy URL, etc.) — used
56
+ * by claim-verification + by the next step to find prior work. */
57
+ deliverable?: string;
58
+ /** Step status — orchestrator updates this. */
59
+ status: 'pending' | 'running' | 'done' | 'failed' | 'skipped';
60
+ /** Set by orchestrator after dispatch. */
61
+ taskId?: string;
62
+ /** Worker's final result text (for visibility, capped). */
63
+ resultPreview?: string;
64
+ /** Set on completion. */
65
+ completedAt?: string;
66
+ }
67
+ export interface Plan {
68
+ /** Unique plan id — also the filename basename. */
69
+ id: string;
70
+ /** Chain id — shared by the planner task and all step tasks for one user request. */
71
+ chainId: string;
72
+ /** Original user request the planner decomposed. */
73
+ userRequest: string;
74
+ /** Resolved project path (if any) when the planner ran. */
75
+ projectPath?: string;
76
+ /** Session key of the originating chat — for delivering the final result. */
77
+ originatingSessionKey?: string;
78
+ /** ISO when the planner emitted this. */
79
+ createdAt: string;
80
+ /** Steps in execution order. */
81
+ steps: PlanStep[];
82
+ /** Overall chain status. Derived from steps; persisted for cheap reads. */
83
+ status: 'pending' | 'in_progress' | 'completed' | 'paused' | 'failed';
84
+ /** Total estimated cost (USD) for this plan if every step's expectedTools fire as-projected.
85
+ * Informational only — not enforced. */
86
+ estimatedCostUsd?: number;
87
+ /** Free-form notes from the planner: known risks, assumptions, etc. */
88
+ notes?: string;
89
+ }
90
+ /**
91
+ * Where plans live. If `projectPath` is set, plans go inside that
92
+ * project's `.clementine/plans/` so they travel with the project; if
93
+ * no project, plans go under `BASE_DIR/plans/` (global).
94
+ */
95
+ export declare function plansDir(projectPath?: string | null): string;
96
+ export declare function planFile(planId: string, projectPath?: string | null): string;
97
+ export declare function savePlan(plan: Plan, projectPath?: string | null): string;
98
+ export declare function loadPlan(planId: string, projectPath?: string | null): Plan | null;
99
+ export interface PlanRequestOptions {
100
+ userRequest: string;
101
+ originatingSessionKey?: string;
102
+ project?: ProjectMeta | null;
103
+ /** Optional override; defaults to Sonnet. NEVER pass Haiku here. */
104
+ model?: string;
105
+ /** Override the SDK query function for tests. */
106
+ llmCall?: (prompt: string, systemPrompt: string, model: string) => Promise<string>;
107
+ }
108
+ /**
109
+ * Decompose a user request into a Plan. Pure async function — no side
110
+ * effects except the optional LLM call. Caller decides whether to
111
+ * persist the result via `savePlan`.
112
+ *
113
+ * Behavior:
114
+ * - Builds a system prompt describing the decomposition contract
115
+ * - Asks the model to emit a JSON object matching the Plan schema
116
+ * - Validates the response against the schema; logs and retries once on parse failure
117
+ * - Returns a Plan ready for orchestrator dispatch
118
+ *
119
+ * Failure modes:
120
+ * - LLM returns non-JSON → throws PlanGenerationError
121
+ * - LLM returns empty steps → throws PlanGenerationError
122
+ * - LLM returns >12 steps → trimmed to first 12 with a warning
123
+ */
124
+ export declare function planRequest(opts: PlanRequestOptions): Promise<Plan>;
125
+ export declare class PlanGenerationError extends Error {
126
+ constructor(message: string);
127
+ }
128
+ interface RawPlannerResponse {
129
+ steps?: Array<{
130
+ title?: unknown;
131
+ scope?: unknown;
132
+ expectedTools?: unknown;
133
+ deliverable?: unknown;
134
+ }>;
135
+ estimatedCostUsd?: number;
136
+ notes?: unknown;
137
+ }
138
+ /** Defensive JSON parse — strips common LLM wrappers (markdown fences,
139
+ * leading/trailing prose) before parsing. */
140
+ export declare function parsePlannerResponse(raw: string): RawPlannerResponse | null;
141
+ export {};
142
+ //# sourceMappingURL=bg-planner.d.ts.map