clementine-agent 1.18.188 → 1.18.190
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/background-tasks.d.ts +9 -0
- package/dist/agent/background-tasks.js +14 -0
- package/dist/agent/bg-orchestrator.d.ts +103 -0
- package/dist/agent/bg-orchestrator.js +323 -0
- package/dist/agent/bg-planner.d.ts +142 -0
- package/dist/agent/bg-planner.js +321 -0
- package/dist/agent/project-resolver.d.ts +9 -0
- package/dist/agent/project-resolver.js +85 -31
- package/dist/agent/run-agent-cron.js +13 -0
- package/dist/gateway/cron-scheduler.d.ts +17 -0
- package/dist/gateway/cron-scheduler.js +148 -1
- package/dist/gateway/router.js +32 -9
- package/dist/types.d.ts +10 -0
- package/package.json +1 -1
|
@@ -26,12 +26,21 @@ export interface BackgroundTaskOptions {
|
|
|
26
26
|
/**
|
|
27
27
|
* Create a new pending task on disk and return it. Caller (the MCP tool)
|
|
28
28
|
* doesn't await execution — the daemon picks the task up asynchronously.
|
|
29
|
+
*
|
|
30
|
+
* 1.18.190 — accepts the new chain fields (kind / chainId / planId /
|
|
31
|
+
* stepIndex / parentTaskId) for planner-orchestrator chained tasks. All
|
|
32
|
+
* are optional; callers that pass none get the legacy monolithic shape.
|
|
29
33
|
*/
|
|
30
34
|
export declare function createBackgroundTask(input: {
|
|
31
35
|
fromAgent: string;
|
|
32
36
|
prompt: string;
|
|
33
37
|
maxMinutes: number;
|
|
34
38
|
sessionKey?: string;
|
|
39
|
+
kind?: BackgroundTask['kind'];
|
|
40
|
+
chainId?: string;
|
|
41
|
+
planId?: string;
|
|
42
|
+
stepIndex?: number;
|
|
43
|
+
parentTaskId?: string;
|
|
35
44
|
}, opts?: BackgroundTaskOptions): BackgroundTask;
|
|
36
45
|
/** Load a task by id, or null if not found / malformed. */
|
|
37
46
|
export declare function loadBackgroundTask(id: string, opts?: BackgroundTaskOptions): BackgroundTask | null;
|
|
@@ -45,6 +45,10 @@ function safeWrite(file, task) {
|
|
|
45
45
|
/**
|
|
46
46
|
* Create a new pending task on disk and return it. Caller (the MCP tool)
|
|
47
47
|
* doesn't await execution — the daemon picks the task up asynchronously.
|
|
48
|
+
*
|
|
49
|
+
* 1.18.190 — accepts the new chain fields (kind / chainId / planId /
|
|
50
|
+
* stepIndex / parentTaskId) for planner-orchestrator chained tasks. All
|
|
51
|
+
* are optional; callers that pass none get the legacy monolithic shape.
|
|
48
52
|
*/
|
|
49
53
|
export function createBackgroundTask(input, opts) {
|
|
50
54
|
const now = new Date();
|
|
@@ -58,6 +62,16 @@ export function createBackgroundTask(input, opts) {
|
|
|
58
62
|
};
|
|
59
63
|
if (input.sessionKey)
|
|
60
64
|
task.sessionKey = input.sessionKey;
|
|
65
|
+
if (input.kind)
|
|
66
|
+
task.kind = input.kind;
|
|
67
|
+
if (input.chainId)
|
|
68
|
+
task.chainId = input.chainId;
|
|
69
|
+
if (input.planId)
|
|
70
|
+
task.planId = input.planId;
|
|
71
|
+
if (typeof input.stepIndex === 'number')
|
|
72
|
+
task.stepIndex = input.stepIndex;
|
|
73
|
+
if (input.parentTaskId)
|
|
74
|
+
task.parentTaskId = input.parentTaskId;
|
|
61
75
|
safeWrite(pathFor(task.id, opts), task);
|
|
62
76
|
return task;
|
|
63
77
|
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* bg-orchestrator — drive a Plan from start to finish by queuing one
|
|
3
|
+
* bg-task per PlanStep, advancing the chain as each step completes.
|
|
4
|
+
*
|
|
5
|
+
* Why this exists (1.18.190)
|
|
6
|
+
* ──────────────────────────
|
|
7
|
+
* The bg-planner produces a Plan with 3-7 PlanSteps. This module is
|
|
8
|
+
* the runtime that executes that Plan, one step at a time, with each
|
|
9
|
+
* step getting its own fresh bg-task + worker context. The chain only
|
|
10
|
+
* advances when the previous step completed successfully — failures
|
|
11
|
+
* pause the chain and notify the owner.
|
|
12
|
+
*
|
|
13
|
+
* Architectural role:
|
|
14
|
+
* bg-planner.ts → Plan (data)
|
|
15
|
+
* bg-orchestrator.ts → drives the Plan (this module)
|
|
16
|
+
* background-tasks.ts → bg-task persistence (filesystem)
|
|
17
|
+
* run-agent-cron.ts → runs the actual SDK call for each step
|
|
18
|
+
*
|
|
19
|
+
* The orchestrator is NEVER the thing reading files or calling APIs.
|
|
20
|
+
* It's a state machine: read plan → queue next step → wait for step
|
|
21
|
+
* to finish → repeat. The state machine lives across daemon restarts
|
|
22
|
+
* because both Plans and BackgroundTasks are filesystem-persisted.
|
|
23
|
+
*
|
|
24
|
+
* Why this prevents the autocompact thrash that motivated 1.18.190:
|
|
25
|
+
* - each step gets a FRESH bg-task with a FRESH 200K worker window
|
|
26
|
+
* - state flows between steps via the project's STATUS.md and the
|
|
27
|
+
* plan's `deliverable` fields, NOT via accumulated SDK context
|
|
28
|
+
* - no single worker has to do more than ~2-6 tool calls before
|
|
29
|
+
* completing its scoped deliverable
|
|
30
|
+
* - the model's compaction pressure resets between steps
|
|
31
|
+
*/
|
|
32
|
+
import { createBackgroundTask } from './background-tasks.js';
|
|
33
|
+
import { loadPlan, savePlan } from './bg-planner.js';
|
|
34
|
+
import type { Plan, PlanStep } from './bg-planner.js';
|
|
35
|
+
import type { BackgroundTask } from '../types.js';
|
|
36
|
+
/**
|
|
37
|
+
* Queue the first step of a freshly-planned chain. Returns the
|
|
38
|
+
* BackgroundTask created for step 0.
|
|
39
|
+
*
|
|
40
|
+
* Caller responsibility: the Plan must already be persisted to disk
|
|
41
|
+
* (via savePlan) before calling this — the dispatched step task
|
|
42
|
+
* carries a planId that will be loaded back at execution time.
|
|
43
|
+
*/
|
|
44
|
+
export declare function dispatchChain(plan: Plan): BackgroundTask;
|
|
45
|
+
/**
|
|
46
|
+
* Called by the bg-task framework when a chained step completes.
|
|
47
|
+
* Updates the plan's step status, then either:
|
|
48
|
+
* - queues the next step (chain continues),
|
|
49
|
+
* - marks the plan completed (no more steps), or
|
|
50
|
+
* - pauses the chain (step failed; owner notification surfaces elsewhere).
|
|
51
|
+
*
|
|
52
|
+
* Returns the next BackgroundTask if one was queued, or null otherwise.
|
|
53
|
+
*
|
|
54
|
+
* Safe to call multiple times for the same completed task (idempotent
|
|
55
|
+
* via the step's status check).
|
|
56
|
+
*/
|
|
57
|
+
export declare function advanceChain(opts: {
|
|
58
|
+
completedTask: BackgroundTask;
|
|
59
|
+
/** Optional override for tests; defaults to filesystem. */
|
|
60
|
+
loadPlanFn?: typeof loadPlan;
|
|
61
|
+
savePlanFn?: typeof savePlan;
|
|
62
|
+
createTaskFn?: typeof createBackgroundTask;
|
|
63
|
+
}): BackgroundTask | null;
|
|
64
|
+
/**
|
|
65
|
+
* Pause a chain explicitly (e.g., owner intervention). The current
|
|
66
|
+
* running step is left alone — caller can mark it however the
|
|
67
|
+
* downstream cancellation flow does.
|
|
68
|
+
*/
|
|
69
|
+
export declare function pauseChain(planId: string, projectPath?: string | null, reason?: string): void;
|
|
70
|
+
/**
|
|
71
|
+
* Resume a paused chain by dispatching its next pending step. If
|
|
72
|
+
* all steps are terminal, marks the plan completed. Returns the
|
|
73
|
+
* dispatched task, or null when nothing to dispatch.
|
|
74
|
+
*/
|
|
75
|
+
export declare function resumeChain(planId: string, projectPath?: string | null): BackgroundTask | null;
|
|
76
|
+
/**
|
|
77
|
+
* Build the focused prompt for one chained worker. Designed to be SMALL
|
|
78
|
+
* (~1-2KB) and ANCHORED to the step's deliverable so the worker has a
|
|
79
|
+
* clear stopping condition. Key elements:
|
|
80
|
+
* - The original user request (for context, not for re-doing it)
|
|
81
|
+
* - The plan summary (what's been done, what's next)
|
|
82
|
+
* - THIS step's scope + expected tools + deliverable
|
|
83
|
+
* - Posture: "do ONLY this step. Don't overshoot. State your deliverable
|
|
84
|
+
* in your final response."
|
|
85
|
+
*
|
|
86
|
+
* State that the next step might need is read by that step from the
|
|
87
|
+
* project STATUS.md or the prior step's deliverable file — NOT from
|
|
88
|
+
* this step's response text. Result text is for the orchestrator's
|
|
89
|
+
* advancement decision; deliverables are for the work itself.
|
|
90
|
+
*/
|
|
91
|
+
export declare function buildStepPrompt(plan: Plan, step: PlanStep): string;
|
|
92
|
+
/**
|
|
93
|
+
* Given a chain step's taskId, derive the directory where the plan
|
|
94
|
+
* lives. Used by run-agent-cron to set the SDK's `cwd` and
|
|
95
|
+
* `additionalDirectories` to the project root for the step.
|
|
96
|
+
*/
|
|
97
|
+
export declare function projectDirForChainTask(task: BackgroundTask): string | undefined;
|
|
98
|
+
/**
|
|
99
|
+
* Format a status line for posting to the originating chat after each
|
|
100
|
+
* step completes — gives the owner a real-time view of chain progress.
|
|
101
|
+
*/
|
|
102
|
+
export declare function formatChainStatusUpdate(plan: Plan, justCompletedStep: PlanStep): string;
|
|
103
|
+
//# sourceMappingURL=bg-orchestrator.d.ts.map
|
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* bg-orchestrator — drive a Plan from start to finish by queuing one
|
|
3
|
+
* bg-task per PlanStep, advancing the chain as each step completes.
|
|
4
|
+
*
|
|
5
|
+
* Why this exists (1.18.190)
|
|
6
|
+
* ──────────────────────────
|
|
7
|
+
* The bg-planner produces a Plan with 3-7 PlanSteps. This module is
|
|
8
|
+
* the runtime that executes that Plan, one step at a time, with each
|
|
9
|
+
* step getting its own fresh bg-task + worker context. The chain only
|
|
10
|
+
* advances when the previous step completed successfully — failures
|
|
11
|
+
* pause the chain and notify the owner.
|
|
12
|
+
*
|
|
13
|
+
* Architectural role:
|
|
14
|
+
* bg-planner.ts → Plan (data)
|
|
15
|
+
* bg-orchestrator.ts → drives the Plan (this module)
|
|
16
|
+
* background-tasks.ts → bg-task persistence (filesystem)
|
|
17
|
+
* run-agent-cron.ts → runs the actual SDK call for each step
|
|
18
|
+
*
|
|
19
|
+
* The orchestrator is NEVER the thing reading files or calling APIs.
|
|
20
|
+
* It's a state machine: read plan → queue next step → wait for step
|
|
21
|
+
* to finish → repeat. The state machine lives across daemon restarts
|
|
22
|
+
* because both Plans and BackgroundTasks are filesystem-persisted.
|
|
23
|
+
*
|
|
24
|
+
* Why this prevents the autocompact thrash that motivated 1.18.190:
|
|
25
|
+
* - each step gets a FRESH bg-task with a FRESH 200K worker window
|
|
26
|
+
* - state flows between steps via the project's STATUS.md and the
|
|
27
|
+
* plan's `deliverable` fields, NOT via accumulated SDK context
|
|
28
|
+
* - no single worker has to do more than ~2-6 tool calls before
|
|
29
|
+
* completing its scoped deliverable
|
|
30
|
+
* - the model's compaction pressure resets between steps
|
|
31
|
+
*/
|
|
32
|
+
import path from 'node:path';
|
|
33
|
+
import pino from 'pino';
|
|
34
|
+
import { createBackgroundTask } from './background-tasks.js';
|
|
35
|
+
import { loadPlan, savePlan } from './bg-planner.js';
|
|
36
|
+
const logger = pino({ name: 'clementine.bg-orchestrator' });
|
|
37
|
+
// ── Public API ───────────────────────────────────────────────────────
|
|
38
|
+
/**
|
|
39
|
+
* Queue the first step of a freshly-planned chain. Returns the
|
|
40
|
+
* BackgroundTask created for step 0.
|
|
41
|
+
*
|
|
42
|
+
* Caller responsibility: the Plan must already be persisted to disk
|
|
43
|
+
* (via savePlan) before calling this — the dispatched step task
|
|
44
|
+
* carries a planId that will be loaded back at execution time.
|
|
45
|
+
*/
|
|
46
|
+
export function dispatchChain(plan) {
|
|
47
|
+
if (!plan.steps.length) {
|
|
48
|
+
throw new Error(`Cannot dispatch chain: plan ${plan.id} has zero steps`);
|
|
49
|
+
}
|
|
50
|
+
const firstStep = plan.steps[0];
|
|
51
|
+
const task = createBackgroundTask({
|
|
52
|
+
fromAgent: 'clementine',
|
|
53
|
+
prompt: buildStepPrompt(plan, firstStep),
|
|
54
|
+
maxMinutes: 30, // generous per-step; the step itself decides how long it needs
|
|
55
|
+
...(plan.originatingSessionKey ? { sessionKey: plan.originatingSessionKey } : {}),
|
|
56
|
+
kind: 'step',
|
|
57
|
+
chainId: plan.chainId,
|
|
58
|
+
planId: plan.id,
|
|
59
|
+
stepIndex: 0,
|
|
60
|
+
});
|
|
61
|
+
// Stamp the step with its taskId + mark plan as in_progress so future
|
|
62
|
+
// resumes don't try to re-dispatch the same step.
|
|
63
|
+
firstStep.taskId = task.id;
|
|
64
|
+
firstStep.status = 'running';
|
|
65
|
+
plan.status = 'in_progress';
|
|
66
|
+
savePlan(plan, plan.projectPath);
|
|
67
|
+
logger.info({
|
|
68
|
+
planId: plan.id,
|
|
69
|
+
chainId: plan.chainId,
|
|
70
|
+
stepIndex: 0,
|
|
71
|
+
stepTitle: firstStep.title,
|
|
72
|
+
taskId: task.id,
|
|
73
|
+
}, 'dispatchChain: queued step 0');
|
|
74
|
+
return task;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Called by the bg-task framework when a chained step completes.
|
|
78
|
+
* Updates the plan's step status, then either:
|
|
79
|
+
* - queues the next step (chain continues),
|
|
80
|
+
* - marks the plan completed (no more steps), or
|
|
81
|
+
* - pauses the chain (step failed; owner notification surfaces elsewhere).
|
|
82
|
+
*
|
|
83
|
+
* Returns the next BackgroundTask if one was queued, or null otherwise.
|
|
84
|
+
*
|
|
85
|
+
* Safe to call multiple times for the same completed task (idempotent
|
|
86
|
+
* via the step's status check).
|
|
87
|
+
*/
|
|
88
|
+
export function advanceChain(opts) {
|
|
89
|
+
const { completedTask } = opts;
|
|
90
|
+
if (!completedTask.planId || typeof completedTask.stepIndex !== 'number') {
|
|
91
|
+
logger.debug({ taskId: completedTask.id }, 'advanceChain: task has no plan id/step index — not a chain step');
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
const loadPlanImpl = opts.loadPlanFn ?? loadPlan;
|
|
95
|
+
const savePlanImpl = opts.savePlanFn ?? savePlan;
|
|
96
|
+
const createTaskImpl = opts.createTaskFn ?? createBackgroundTask;
|
|
97
|
+
// Plans live alongside the project when one's set; the task carries
|
|
98
|
+
// the planId but not the project path. Try the project path first
|
|
99
|
+
// (fast path), then fall back to the global plans dir inside
|
|
100
|
+
// loadPlan itself.
|
|
101
|
+
const plan = loadPlanImpl(completedTask.planId, undefined);
|
|
102
|
+
if (!plan) {
|
|
103
|
+
logger.warn({ planId: completedTask.planId, taskId: completedTask.id }, 'advanceChain: plan not found — cannot advance');
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
const step = plan.steps[completedTask.stepIndex];
|
|
107
|
+
if (!step) {
|
|
108
|
+
logger.warn({ planId: plan.id, stepIndex: completedTask.stepIndex }, 'advanceChain: step index out of range');
|
|
109
|
+
return null;
|
|
110
|
+
}
|
|
111
|
+
// Idempotency: if the step has already been marked terminal, don't
|
|
112
|
+
// advance again. Protects against duplicate completion callbacks.
|
|
113
|
+
if (step.status === 'done' || step.status === 'failed' || step.status === 'skipped') {
|
|
114
|
+
logger.debug({ planId: plan.id, stepIndex: step.index, status: step.status }, 'advanceChain: step already terminal — skipping');
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
// Reflect the task's terminal status onto the plan step.
|
|
118
|
+
if (completedTask.status === 'done') {
|
|
119
|
+
step.status = 'done';
|
|
120
|
+
step.completedAt = completedTask.completedAt ?? new Date().toISOString();
|
|
121
|
+
step.resultPreview = (completedTask.result ?? '').slice(0, 400);
|
|
122
|
+
}
|
|
123
|
+
else {
|
|
124
|
+
// failed | aborted | interrupted — all map to plan-step failure for now
|
|
125
|
+
step.status = 'failed';
|
|
126
|
+
step.completedAt = completedTask.completedAt ?? new Date().toISOString();
|
|
127
|
+
step.resultPreview = (completedTask.error ?? completedTask.result ?? '').slice(0, 400);
|
|
128
|
+
plan.status = 'paused';
|
|
129
|
+
savePlanImpl(plan, plan.projectPath);
|
|
130
|
+
logger.warn({
|
|
131
|
+
planId: plan.id,
|
|
132
|
+
chainId: plan.chainId,
|
|
133
|
+
stepIndex: step.index,
|
|
134
|
+
stepTitle: step.title,
|
|
135
|
+
taskStatus: completedTask.status,
|
|
136
|
+
error: completedTask.error,
|
|
137
|
+
}, 'advanceChain: step failed — chain paused');
|
|
138
|
+
return null;
|
|
139
|
+
}
|
|
140
|
+
// Look for the next pending step.
|
|
141
|
+
const nextStep = plan.steps.find((s, i) => i > step.index && s.status === 'pending');
|
|
142
|
+
if (!nextStep) {
|
|
143
|
+
// Chain complete!
|
|
144
|
+
plan.status = 'completed';
|
|
145
|
+
savePlanImpl(plan, plan.projectPath);
|
|
146
|
+
logger.info({
|
|
147
|
+
planId: plan.id,
|
|
148
|
+
chainId: plan.chainId,
|
|
149
|
+
stepCount: plan.steps.length,
|
|
150
|
+
}, 'advanceChain: chain completed');
|
|
151
|
+
return null;
|
|
152
|
+
}
|
|
153
|
+
// Queue the next step.
|
|
154
|
+
const nextTask = createTaskImpl({
|
|
155
|
+
fromAgent: 'clementine',
|
|
156
|
+
prompt: buildStepPrompt(plan, nextStep),
|
|
157
|
+
maxMinutes: 30,
|
|
158
|
+
...(plan.originatingSessionKey ? { sessionKey: plan.originatingSessionKey } : {}),
|
|
159
|
+
kind: 'step',
|
|
160
|
+
chainId: plan.chainId,
|
|
161
|
+
planId: plan.id,
|
|
162
|
+
stepIndex: nextStep.index,
|
|
163
|
+
parentTaskId: completedTask.id,
|
|
164
|
+
});
|
|
165
|
+
nextStep.taskId = nextTask.id;
|
|
166
|
+
nextStep.status = 'running';
|
|
167
|
+
savePlanImpl(plan, plan.projectPath);
|
|
168
|
+
logger.info({
|
|
169
|
+
planId: plan.id,
|
|
170
|
+
chainId: plan.chainId,
|
|
171
|
+
stepIndex: nextStep.index,
|
|
172
|
+
stepTitle: nextStep.title,
|
|
173
|
+
taskId: nextTask.id,
|
|
174
|
+
parentTaskId: completedTask.id,
|
|
175
|
+
}, 'advanceChain: queued next step');
|
|
176
|
+
return nextTask;
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Pause a chain explicitly (e.g., owner intervention). The current
|
|
180
|
+
* running step is left alone — caller can mark it however the
|
|
181
|
+
* downstream cancellation flow does.
|
|
182
|
+
*/
|
|
183
|
+
export function pauseChain(planId, projectPath, reason) {
|
|
184
|
+
const plan = loadPlan(planId, projectPath ?? undefined);
|
|
185
|
+
if (!plan)
|
|
186
|
+
return;
|
|
187
|
+
plan.status = 'paused';
|
|
188
|
+
if (reason)
|
|
189
|
+
plan.notes = `${plan.notes ? plan.notes + '\n' : ''}[paused] ${reason}`;
|
|
190
|
+
savePlan(plan, plan.projectPath);
|
|
191
|
+
logger.info({ planId, reason }, 'pauseChain: chain paused');
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Resume a paused chain by dispatching its next pending step. If
|
|
195
|
+
* all steps are terminal, marks the plan completed. Returns the
|
|
196
|
+
* dispatched task, or null when nothing to dispatch.
|
|
197
|
+
*/
|
|
198
|
+
export function resumeChain(planId, projectPath) {
|
|
199
|
+
const plan = loadPlan(planId, projectPath ?? undefined);
|
|
200
|
+
if (!plan)
|
|
201
|
+
return null;
|
|
202
|
+
if (plan.status === 'completed')
|
|
203
|
+
return null;
|
|
204
|
+
const nextStep = plan.steps.find((s) => s.status === 'pending');
|
|
205
|
+
if (!nextStep) {
|
|
206
|
+
plan.status = 'completed';
|
|
207
|
+
savePlan(plan, plan.projectPath);
|
|
208
|
+
return null;
|
|
209
|
+
}
|
|
210
|
+
const task = createBackgroundTask({
|
|
211
|
+
fromAgent: 'clementine',
|
|
212
|
+
prompt: buildStepPrompt(plan, nextStep),
|
|
213
|
+
maxMinutes: 30,
|
|
214
|
+
...(plan.originatingSessionKey ? { sessionKey: plan.originatingSessionKey } : {}),
|
|
215
|
+
kind: 'step',
|
|
216
|
+
chainId: plan.chainId,
|
|
217
|
+
planId: plan.id,
|
|
218
|
+
stepIndex: nextStep.index,
|
|
219
|
+
});
|
|
220
|
+
nextStep.taskId = task.id;
|
|
221
|
+
nextStep.status = 'running';
|
|
222
|
+
plan.status = 'in_progress';
|
|
223
|
+
savePlan(plan, plan.projectPath);
|
|
224
|
+
logger.info({ planId, stepIndex: nextStep.index, taskId: task.id }, 'resumeChain: dispatched next step');
|
|
225
|
+
return task;
|
|
226
|
+
}
|
|
227
|
+
// ── Step prompt construction ─────────────────────────────────────────
|
|
228
|
+
/**
|
|
229
|
+
* Build the focused prompt for one chained worker. Designed to be SMALL
|
|
230
|
+
* (~1-2KB) and ANCHORED to the step's deliverable so the worker has a
|
|
231
|
+
* clear stopping condition. Key elements:
|
|
232
|
+
* - The original user request (for context, not for re-doing it)
|
|
233
|
+
* - The plan summary (what's been done, what's next)
|
|
234
|
+
* - THIS step's scope + expected tools + deliverable
|
|
235
|
+
* - Posture: "do ONLY this step. Don't overshoot. State your deliverable
|
|
236
|
+
* in your final response."
|
|
237
|
+
*
|
|
238
|
+
* State that the next step might need is read by that step from the
|
|
239
|
+
* project STATUS.md or the prior step's deliverable file — NOT from
|
|
240
|
+
* this step's response text. Result text is for the orchestrator's
|
|
241
|
+
* advancement decision; deliverables are for the work itself.
|
|
242
|
+
*/
|
|
243
|
+
export function buildStepPrompt(plan, step) {
|
|
244
|
+
const lines = [];
|
|
245
|
+
lines.push(`# Chained step ${step.index + 1} of ${plan.steps.length}`);
|
|
246
|
+
lines.push('');
|
|
247
|
+
lines.push(`## Original user request`);
|
|
248
|
+
lines.push(plan.userRequest);
|
|
249
|
+
lines.push('');
|
|
250
|
+
if (plan.projectPath) {
|
|
251
|
+
lines.push(`## Active project`);
|
|
252
|
+
lines.push(`Path: \`${plan.projectPath}\``);
|
|
253
|
+
lines.push('Your cwd is set to this project. Read sources from there, write outputs to `output/`.');
|
|
254
|
+
lines.push('');
|
|
255
|
+
}
|
|
256
|
+
// Concise plan summary — JUST what's been done and what's next.
|
|
257
|
+
// Don't include full step bodies; that's noise.
|
|
258
|
+
lines.push(`## Plan summary`);
|
|
259
|
+
for (const s of plan.steps) {
|
|
260
|
+
const marker = s.status === 'done' ? '✓' : s.status === 'failed' ? '✗' : s.index === step.index ? '→' : '·';
|
|
261
|
+
const detail = s.status === 'done' && s.deliverable ? ` (→ ${s.deliverable})` : '';
|
|
262
|
+
lines.push(` ${marker} ${s.index + 1}. ${s.title}${detail}`);
|
|
263
|
+
}
|
|
264
|
+
lines.push('');
|
|
265
|
+
lines.push(`## Your step (the → above)`);
|
|
266
|
+
lines.push(`**Title**: ${step.title}`);
|
|
267
|
+
lines.push(`**Scope**: ${step.scope}`);
|
|
268
|
+
if (step.expectedTools.length > 0) {
|
|
269
|
+
lines.push(`**Expected tool calls**: ${step.expectedTools.join(', ')}`);
|
|
270
|
+
}
|
|
271
|
+
if (step.deliverable) {
|
|
272
|
+
lines.push(`**Deliverable**: ${step.deliverable}`);
|
|
273
|
+
}
|
|
274
|
+
lines.push('');
|
|
275
|
+
lines.push(`## Step posture`);
|
|
276
|
+
lines.push('Do ONLY this step. Don\'t start the next one — the orchestrator handles that. ' +
|
|
277
|
+
'When you\'re done, state your deliverable concretely in your final response ' +
|
|
278
|
+
'(file path, URL, confirmation) so the orchestrator can advance the chain. ' +
|
|
279
|
+
'If you hit a blocker (missing info, ambiguous scope, tool failure), say so explicitly ' +
|
|
280
|
+
'and stop — don\'t guess.');
|
|
281
|
+
return lines.join('\n');
|
|
282
|
+
}
|
|
283
|
+
// ── Convenience helpers used by run-agent-cron ───────────────────────
|
|
284
|
+
/**
|
|
285
|
+
* Given a chain step's taskId, derive the directory where the plan
|
|
286
|
+
* lives. Used by run-agent-cron to set the SDK's `cwd` and
|
|
287
|
+
* `additionalDirectories` to the project root for the step.
|
|
288
|
+
*/
|
|
289
|
+
export function projectDirForChainTask(task) {
|
|
290
|
+
if (!task.planId)
|
|
291
|
+
return undefined;
|
|
292
|
+
const plan = loadPlan(task.planId);
|
|
293
|
+
return plan?.projectPath ?? undefined;
|
|
294
|
+
}
|
|
295
|
+
/**
|
|
296
|
+
* Format a status line for posting to the originating chat after each
|
|
297
|
+
* step completes — gives the owner a real-time view of chain progress.
|
|
298
|
+
*/
|
|
299
|
+
export function formatChainStatusUpdate(plan, justCompletedStep) {
|
|
300
|
+
const total = plan.steps.length;
|
|
301
|
+
const done = plan.steps.filter((s) => s.status === 'done').length;
|
|
302
|
+
const lines = [];
|
|
303
|
+
lines.push(`**Step ${justCompletedStep.index + 1}/${total} done**: ${justCompletedStep.title}`);
|
|
304
|
+
if (justCompletedStep.resultPreview) {
|
|
305
|
+
lines.push(`→ ${justCompletedStep.resultPreview.slice(0, 200)}`);
|
|
306
|
+
}
|
|
307
|
+
if (done < total && plan.status === 'in_progress') {
|
|
308
|
+
const nextStep = plan.steps.find((s) => s.status === 'pending');
|
|
309
|
+
if (nextStep)
|
|
310
|
+
lines.push(`Next: ${nextStep.title}`);
|
|
311
|
+
}
|
|
312
|
+
else if (plan.status === 'completed') {
|
|
313
|
+
lines.push(`Chain complete (${done}/${total} steps).`);
|
|
314
|
+
}
|
|
315
|
+
else if (plan.status === 'paused') {
|
|
316
|
+
lines.push(`Chain paused. Tell me how to proceed.`);
|
|
317
|
+
}
|
|
318
|
+
return lines.join('\n');
|
|
319
|
+
}
|
|
320
|
+
// path is imported but lint warns when unused — use it once just to keep import meaningful.
|
|
321
|
+
// (orchestrator uses path indirectly via loadPlan/savePlan; this comment keeps the import obvious to future readers)
|
|
322
|
+
void path;
|
|
323
|
+
//# sourceMappingURL=bg-orchestrator.js.map
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* bg-planner — decompose a multi-step user request into a chain of
|
|
3
|
+
* focused PlanSteps that the orchestrator can dispatch one at a time.
|
|
4
|
+
*
|
|
5
|
+
* Why this exists (1.18.190)
|
|
6
|
+
* ──────────────────────────
|
|
7
|
+
* Before this, a complex multi-step user ask ("find the coaches project,
|
|
8
|
+
* build me an HTML report, deploy it to Netlify, verify the URL") got
|
|
9
|
+
* handed to a single monolithic bg-task worker. The worker had its own
|
|
10
|
+
* 200K context but still autocompact-thrashed because:
|
|
11
|
+
* - tool outputs accumulated across all 5-6 phases of the work
|
|
12
|
+
* - the model lost fidelity to its own past tool outputs as the
|
|
13
|
+
* output-guard tightened from 30KB → 4KB
|
|
14
|
+
* - one bad turn (huge file read, big Glob) poisoned the rest
|
|
15
|
+
*
|
|
16
|
+
* The decomposition pattern this module enables:
|
|
17
|
+
* 1. Planner runs ONCE with Sonnet (not Haiku — plans need real
|
|
18
|
+
* reasoning, see "Model choice" below)
|
|
19
|
+
* 2. Emits a Plan: 3-7 PlanSteps, each with title + scope + expected
|
|
20
|
+
* tool calls + deliverable artifact path
|
|
21
|
+
* 3. Plan persists to <project>/.clementine/plans/<planId>.json
|
|
22
|
+
* (or BASE_DIR/plans/<planId>.json if no active project)
|
|
23
|
+
* 4. Orchestrator (bg-orchestrator.ts) queues one bg-task per step,
|
|
24
|
+
* each with a tight scope and a fresh 200K worker window
|
|
25
|
+
* 5. State flows between steps via STATUS.md + the plan ledger;
|
|
26
|
+
* no step accumulates context from prior steps
|
|
27
|
+
*
|
|
28
|
+
* Model choice: Sonnet, NOT Haiku
|
|
29
|
+
* ────────────────────────────────
|
|
30
|
+
* Planning is a reasoning task, not a transformation. A poorly
|
|
31
|
+
* decomposed plan costs $5+ in downstream worker thrash; a well-
|
|
32
|
+
* decomposed plan saves multiples of that. The marginal cost of
|
|
33
|
+
* Sonnet over Haiku (~$0.05-0.15 vs ~$0.01 per plan) is trivial
|
|
34
|
+
* compared to the downstream cost of bad decomposition. Haiku is for
|
|
35
|
+
* mechanical tasks (extraction, classification, routing); decomposing
|
|
36
|
+
* a multi-domain ask into proper steps is not mechanical.
|
|
37
|
+
*
|
|
38
|
+
* If you're tempted to "save tokens" by flipping this to Haiku, read
|
|
39
|
+
* the 2026-05-12 root-cause plan first
|
|
40
|
+
* (~/.claude/plans/look-at-the-last-vivid-rossum.md). The whole point
|
|
41
|
+
* of this ship is to NOT cut corners on the decomposition layer.
|
|
42
|
+
*/
|
|
43
|
+
import type { ProjectMeta } from './assistant.js';
|
|
44
|
+
export interface PlanStep {
|
|
45
|
+
/** 0-indexed position. */
|
|
46
|
+
index: number;
|
|
47
|
+
/** Short imperative title (e.g., "Find the coaches project"). */
|
|
48
|
+
title: string;
|
|
49
|
+
/** What this step does, in 1-2 sentences. The chained worker sees this. */
|
|
50
|
+
scope: string;
|
|
51
|
+
/** Tools the step is expected to call. The chained worker sees this as
|
|
52
|
+
* guidance, not enforcement — overshooting is allowed, just not
|
|
53
|
+
* preferred. */
|
|
54
|
+
expectedTools: string[];
|
|
55
|
+
/** Where the step's output goes (file path, deploy URL, etc.) — used
|
|
56
|
+
* by claim-verification + by the next step to find prior work. */
|
|
57
|
+
deliverable?: string;
|
|
58
|
+
/** Step status — orchestrator updates this. */
|
|
59
|
+
status: 'pending' | 'running' | 'done' | 'failed' | 'skipped';
|
|
60
|
+
/** Set by orchestrator after dispatch. */
|
|
61
|
+
taskId?: string;
|
|
62
|
+
/** Worker's final result text (for visibility, capped). */
|
|
63
|
+
resultPreview?: string;
|
|
64
|
+
/** Set on completion. */
|
|
65
|
+
completedAt?: string;
|
|
66
|
+
}
|
|
67
|
+
export interface Plan {
|
|
68
|
+
/** Unique plan id — also the filename basename. */
|
|
69
|
+
id: string;
|
|
70
|
+
/** Chain id — shared by the planner task and all step tasks for one user request. */
|
|
71
|
+
chainId: string;
|
|
72
|
+
/** Original user request the planner decomposed. */
|
|
73
|
+
userRequest: string;
|
|
74
|
+
/** Resolved project path (if any) when the planner ran. */
|
|
75
|
+
projectPath?: string;
|
|
76
|
+
/** Session key of the originating chat — for delivering the final result. */
|
|
77
|
+
originatingSessionKey?: string;
|
|
78
|
+
/** ISO when the planner emitted this. */
|
|
79
|
+
createdAt: string;
|
|
80
|
+
/** Steps in execution order. */
|
|
81
|
+
steps: PlanStep[];
|
|
82
|
+
/** Overall chain status. Derived from steps; persisted for cheap reads. */
|
|
83
|
+
status: 'pending' | 'in_progress' | 'completed' | 'paused' | 'failed';
|
|
84
|
+
/** Total estimated cost (USD) for this plan if every step's expectedTools fire as-projected.
|
|
85
|
+
* Informational only — not enforced. */
|
|
86
|
+
estimatedCostUsd?: number;
|
|
87
|
+
/** Free-form notes from the planner: known risks, assumptions, etc. */
|
|
88
|
+
notes?: string;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Where plans live. If `projectPath` is set, plans go inside that
|
|
92
|
+
* project's `.clementine/plans/` so they travel with the project; if
|
|
93
|
+
* no project, plans go under `BASE_DIR/plans/` (global).
|
|
94
|
+
*/
|
|
95
|
+
export declare function plansDir(projectPath?: string | null): string;
|
|
96
|
+
export declare function planFile(planId: string, projectPath?: string | null): string;
|
|
97
|
+
export declare function savePlan(plan: Plan, projectPath?: string | null): string;
|
|
98
|
+
export declare function loadPlan(planId: string, projectPath?: string | null): Plan | null;
|
|
99
|
+
export interface PlanRequestOptions {
|
|
100
|
+
userRequest: string;
|
|
101
|
+
originatingSessionKey?: string;
|
|
102
|
+
project?: ProjectMeta | null;
|
|
103
|
+
/** Optional override; defaults to Sonnet. NEVER pass Haiku here. */
|
|
104
|
+
model?: string;
|
|
105
|
+
/** Override the SDK query function for tests. */
|
|
106
|
+
llmCall?: (prompt: string, systemPrompt: string, model: string) => Promise<string>;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Decompose a user request into a Plan. Pure async function — no side
|
|
110
|
+
* effects except the optional LLM call. Caller decides whether to
|
|
111
|
+
* persist the result via `savePlan`.
|
|
112
|
+
*
|
|
113
|
+
* Behavior:
|
|
114
|
+
* - Builds a system prompt describing the decomposition contract
|
|
115
|
+
* - Asks the model to emit a JSON object matching the Plan schema
|
|
116
|
+
* - Validates the response against the schema; logs and retries once on parse failure
|
|
117
|
+
* - Returns a Plan ready for orchestrator dispatch
|
|
118
|
+
*
|
|
119
|
+
* Failure modes:
|
|
120
|
+
* - LLM returns non-JSON → throws PlanGenerationError
|
|
121
|
+
* - LLM returns empty steps → throws PlanGenerationError
|
|
122
|
+
* - LLM returns >12 steps → trimmed to first 12 with a warning
|
|
123
|
+
*/
|
|
124
|
+
export declare function planRequest(opts: PlanRequestOptions): Promise<Plan>;
|
|
125
|
+
export declare class PlanGenerationError extends Error {
|
|
126
|
+
constructor(message: string);
|
|
127
|
+
}
|
|
128
|
+
interface RawPlannerResponse {
|
|
129
|
+
steps?: Array<{
|
|
130
|
+
title?: unknown;
|
|
131
|
+
scope?: unknown;
|
|
132
|
+
expectedTools?: unknown;
|
|
133
|
+
deliverable?: unknown;
|
|
134
|
+
}>;
|
|
135
|
+
estimatedCostUsd?: number;
|
|
136
|
+
notes?: unknown;
|
|
137
|
+
}
|
|
138
|
+
/** Defensive JSON parse — strips common LLM wrappers (markdown fences,
|
|
139
|
+
* leading/trailing prose) before parsing. */
|
|
140
|
+
export declare function parsePlannerResponse(raw: string): RawPlannerResponse | null;
|
|
141
|
+
export {};
|
|
142
|
+
//# sourceMappingURL=bg-planner.d.ts.map
|