@doingdev/opencode-claude-manager-plugin 0.1.59 → 0.1.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/manager/team-orchestrator.d.ts +10 -1
- package/dist/manager/team-orchestrator.js +77 -1
- package/dist/plugin/agents/common.d.ts +2 -2
- package/dist/plugin/agents/common.js +2 -0
- package/dist/plugin/claude-manager.plugin.js +43 -0
- package/dist/prompts/registry.js +22 -4
- package/dist/src/manager/team-orchestrator.d.ts +10 -1
- package/dist/src/manager/team-orchestrator.js +77 -1
- package/dist/src/plugin/agents/common.d.ts +2 -2
- package/dist/src/plugin/agents/common.js +2 -0
- package/dist/src/plugin/claude-manager.plugin.js +43 -0
- package/dist/src/prompts/registry.js +22 -4
- package/dist/src/types/contracts.d.ts +19 -0
- package/dist/test/claude-manager.plugin.test.js +172 -1
- package/dist/test/prompt-registry.test.js +52 -0
- package/dist/test/team-orchestrator.test.js +158 -2
- package/dist/types/contracts.d.ts +19 -0
- package/package.json +1 -1
|
@@ -2,7 +2,7 @@ import type { ClaudeSessionEventHandler } from '../claude/claude-agent-sdk-adapt
|
|
|
2
2
|
import type { ClaudeSessionService } from '../claude/claude-session.service.js';
|
|
3
3
|
import type { TeamStateStore } from '../state/team-state-store.js';
|
|
4
4
|
import type { TranscriptStore } from '../state/transcript-store.js';
|
|
5
|
-
import type { EngineerFailureResult, EngineerName, EngineerTaskResult, EngineerWorkMode, SynthesizedPlanResult, TeamRecord, WorkerCapabilities } from '../types/contracts.js';
|
|
5
|
+
import type { ActivePlan, EngineerFailureResult, EngineerName, EngineerTaskResult, EngineerWorkMode, SynthesizedPlanResult, TaskSize, TeamRecord, WorkerCapabilities } from '../types/contracts.js';
|
|
6
6
|
interface DispatchEngineerInput {
|
|
7
7
|
teamId: string;
|
|
8
8
|
cwd: string;
|
|
@@ -59,6 +59,15 @@ export declare class TeamOrchestrator {
|
|
|
59
59
|
lead: EngineerName;
|
|
60
60
|
challenger: EngineerName;
|
|
61
61
|
}>;
|
|
62
|
+
getActivePlan(cwd: string, teamId: string): Promise<ActivePlan | null>;
|
|
63
|
+
setActivePlan(cwd: string, teamId: string, plan: {
|
|
64
|
+
summary: string;
|
|
65
|
+
taskSize: TaskSize;
|
|
66
|
+
slices: string[];
|
|
67
|
+
preAuthorized: boolean;
|
|
68
|
+
}): Promise<ActivePlan>;
|
|
69
|
+
clearActivePlan(cwd: string, teamId: string): Promise<void>;
|
|
70
|
+
updateActivePlanSlice(cwd: string, teamId: string, sliceIndex: number, status: 'in_progress' | 'done' | 'skipped'): Promise<void>;
|
|
62
71
|
private buildSessionSystemPrompt;
|
|
63
72
|
private buildEngineerPrompt;
|
|
64
73
|
}
|
|
@@ -383,6 +383,81 @@ export class TeamOrchestrator {
|
|
|
383
383
|
}
|
|
384
384
|
return { lead, challenger };
|
|
385
385
|
}
|
|
386
|
+
async getActivePlan(cwd, teamId) {
|
|
387
|
+
const team = await this.getOrCreateTeam(cwd, teamId);
|
|
388
|
+
return team.activePlan ?? null;
|
|
389
|
+
}
|
|
390
|
+
async setActivePlan(cwd, teamId, plan) {
|
|
391
|
+
await this.getOrCreateTeam(cwd, teamId);
|
|
392
|
+
const now = new Date().toISOString();
|
|
393
|
+
const slices = plan.slices.map((description, index) => ({
|
|
394
|
+
index,
|
|
395
|
+
description,
|
|
396
|
+
status: 'pending',
|
|
397
|
+
}));
|
|
398
|
+
const activePlan = {
|
|
399
|
+
id: `plan-${Date.now()}`,
|
|
400
|
+
summary: plan.summary,
|
|
401
|
+
taskSize: plan.taskSize,
|
|
402
|
+
createdAt: now,
|
|
403
|
+
confirmedAt: now,
|
|
404
|
+
preAuthorized: plan.preAuthorized,
|
|
405
|
+
slices,
|
|
406
|
+
currentSliceIndex: slices.length > 0 ? 0 : null,
|
|
407
|
+
};
|
|
408
|
+
await this.teamStore.updateTeam(cwd, teamId, (team) => ({
|
|
409
|
+
...team,
|
|
410
|
+
updatedAt: now,
|
|
411
|
+
activePlan,
|
|
412
|
+
}));
|
|
413
|
+
return activePlan;
|
|
414
|
+
}
|
|
415
|
+
async clearActivePlan(cwd, teamId) {
|
|
416
|
+
await this.getOrCreateTeam(cwd, teamId);
|
|
417
|
+
const now = new Date().toISOString();
|
|
418
|
+
await this.teamStore.updateTeam(cwd, teamId, (team) => ({
|
|
419
|
+
...team,
|
|
420
|
+
updatedAt: now,
|
|
421
|
+
activePlan: undefined,
|
|
422
|
+
}));
|
|
423
|
+
}
|
|
424
|
+
async updateActivePlanSlice(cwd, teamId, sliceIndex, status) {
|
|
425
|
+
await this.getOrCreateTeam(cwd, teamId);
|
|
426
|
+
const now = new Date().toISOString();
|
|
427
|
+
await this.teamStore.updateTeam(cwd, teamId, (team) => {
|
|
428
|
+
if (!team.activePlan) {
|
|
429
|
+
throw new Error(`Cannot update slice: team "${teamId}" has no active plan. Call confirm_plan first.`);
|
|
430
|
+
}
|
|
431
|
+
const sliceExists = team.activePlan.slices.some((s) => s.index === sliceIndex);
|
|
432
|
+
if (!sliceExists) {
|
|
433
|
+
const sliceCount = team.activePlan.slices.length;
|
|
434
|
+
const rangeMsg = sliceCount === 0 ? 'plan has no slices' : `valid range: 0–${sliceCount - 1}`;
|
|
435
|
+
throw new Error(`Cannot update slice: slice index ${sliceIndex} does not exist in active plan "${team.activePlan.id}" (${rangeMsg}).`);
|
|
436
|
+
}
|
|
437
|
+
const slices = team.activePlan.slices.map((s) => s.index === sliceIndex
|
|
438
|
+
? {
|
|
439
|
+
...s,
|
|
440
|
+
status,
|
|
441
|
+
...(status === 'done' || status === 'skipped' ? { completedAt: now } : {}),
|
|
442
|
+
}
|
|
443
|
+
: s);
|
|
444
|
+
const isLastSlice = sliceIndex === team.activePlan.slices.length - 1;
|
|
445
|
+
const nextIndex = status === 'done' || status === 'skipped'
|
|
446
|
+
? isLastSlice
|
|
447
|
+
? null
|
|
448
|
+
: sliceIndex + 1
|
|
449
|
+
: team.activePlan.currentSliceIndex;
|
|
450
|
+
return {
|
|
451
|
+
...team,
|
|
452
|
+
updatedAt: now,
|
|
453
|
+
activePlan: {
|
|
454
|
+
...team.activePlan,
|
|
455
|
+
slices,
|
|
456
|
+
currentSliceIndex: nextIndex,
|
|
457
|
+
},
|
|
458
|
+
};
|
|
459
|
+
});
|
|
460
|
+
}
|
|
386
461
|
buildSessionSystemPrompt(engineer, mode) {
|
|
387
462
|
const specialistPrompt = this.workerCapabilities[engineer]?.sessionPrompt;
|
|
388
463
|
if (specialistPrompt) {
|
|
@@ -416,7 +491,8 @@ function buildModeInstruction(mode) {
|
|
|
416
491
|
case 'implement':
|
|
417
492
|
return [
|
|
418
493
|
'Implementation mode.',
|
|
419
|
-
'
|
|
494
|
+
'Before making any edits, state a brief implementation plan: which files you will change, what each change does, and why.',
|
|
495
|
+
'Then make the changes, run the most relevant verification (tests, lint, typecheck), and report what changed and what you verified.',
|
|
420
496
|
'Before reporting done, review your own diff for issues that pass tests but break in production.',
|
|
421
497
|
].join(' ');
|
|
422
498
|
case 'verify':
|
|
@@ -11,9 +11,9 @@ export declare const ENGINEER_AGENT_IDS: {
|
|
|
11
11
|
};
|
|
12
12
|
/** General named engineers only (Tom/John/Maya/Sara/Alex). BrowserQA is a specialist registered separately. */
|
|
13
13
|
export declare const ENGINEER_AGENT_NAMES: readonly ["Tom", "John", "Maya", "Sara", "Alex"];
|
|
14
|
-
export declare const CTO_ONLY_TOOL_IDS: readonly ["team_status", "reset_engineer", "list_transcripts", "list_history", "git_diff", "git_commit", "git_reset", "git_status", "git_log", "approval_policy", "approval_decisions", "approval_update"];
|
|
14
|
+
export declare const CTO_ONLY_TOOL_IDS: readonly ["team_status", "reset_engineer", "confirm_plan", "advance_slice", "list_transcripts", "list_history", "git_diff", "git_commit", "git_reset", "git_status", "git_log", "approval_policy", "approval_decisions", "approval_update"];
|
|
15
15
|
export declare const ENGINEER_TOOL_IDS: readonly ["claude"];
|
|
16
|
-
export declare const ALL_RESTRICTED_TOOL_IDS: readonly ["team_status", "reset_engineer", "list_transcripts", "list_history", "git_diff", "git_commit", "git_reset", "git_status", "git_log", "approval_policy", "approval_decisions", "approval_update", "claude"];
|
|
16
|
+
export declare const ALL_RESTRICTED_TOOL_IDS: readonly ["team_status", "reset_engineer", "confirm_plan", "advance_slice", "list_transcripts", "list_history", "git_diff", "git_commit", "git_reset", "git_status", "git_log", "approval_policy", "approval_decisions", "approval_update", "claude"];
|
|
17
17
|
export type ToolPermission = 'allow' | 'ask' | 'deny';
|
|
18
18
|
export type AgentPermission = {
|
|
19
19
|
'*'?: ToolPermission;
|
|
@@ -206,6 +206,49 @@ export const ClaudeManagerPlugin = async ({ worktree, client }) => {
|
|
|
206
206
|
}, null, 2);
|
|
207
207
|
},
|
|
208
208
|
}),
|
|
209
|
+
confirm_plan: tool({
|
|
210
|
+
description: 'Persist plan confirmation and optional slice metadata after the user confirms a plan. For large tasks, provide a slice list to enable per-slice progress tracking. Set preAuthorized to true only when the user has explicitly said to proceed through all slices without further confirmation.',
|
|
211
|
+
args: {
|
|
212
|
+
summary: tool.schema.string().min(1),
|
|
213
|
+
taskSize: tool.schema.enum(['trivial', 'simple', 'large']),
|
|
214
|
+
slices: tool.schema.string().array().optional(),
|
|
215
|
+
preAuthorized: tool.schema.boolean().optional(),
|
|
216
|
+
},
|
|
217
|
+
async execute(args, context) {
|
|
218
|
+
const teamId = context.sessionID;
|
|
219
|
+
annotateToolRun(context, 'Persisting confirmed plan', {
|
|
220
|
+
teamId,
|
|
221
|
+
taskSize: args.taskSize,
|
|
222
|
+
sliceCount: args.slices?.length ?? 0,
|
|
223
|
+
});
|
|
224
|
+
const activePlan = await services.orchestrator.setActivePlan(context.worktree, teamId, {
|
|
225
|
+
summary: args.summary,
|
|
226
|
+
taskSize: args.taskSize,
|
|
227
|
+
slices: args.slices ?? [],
|
|
228
|
+
preAuthorized: args.preAuthorized ?? false,
|
|
229
|
+
});
|
|
230
|
+
return JSON.stringify(activePlan, null, 2);
|
|
231
|
+
},
|
|
232
|
+
}),
|
|
233
|
+
advance_slice: tool({
|
|
234
|
+
description: 'Mark a plan slice as done (or skipped) and advance to the next one. Use this after each slice completes to track large-task progress.',
|
|
235
|
+
args: {
|
|
236
|
+
sliceIndex: tool.schema.number(),
|
|
237
|
+
status: tool.schema.enum(['done', 'skipped']).optional(),
|
|
238
|
+
},
|
|
239
|
+
async execute(args, context) {
|
|
240
|
+
const teamId = context.sessionID;
|
|
241
|
+
const status = args.status ?? 'done';
|
|
242
|
+
annotateToolRun(context, `Advancing slice ${args.sliceIndex} → ${status}`, {
|
|
243
|
+
teamId,
|
|
244
|
+
sliceIndex: args.sliceIndex,
|
|
245
|
+
status,
|
|
246
|
+
});
|
|
247
|
+
await services.orchestrator.updateActivePlanSlice(context.worktree, teamId, args.sliceIndex, status);
|
|
248
|
+
const team = await services.orchestrator.getOrCreateTeam(context.worktree, teamId);
|
|
249
|
+
return JSON.stringify({ activePlan: team.activePlan ?? null }, null, 2);
|
|
250
|
+
},
|
|
251
|
+
}),
|
|
209
252
|
reset_engineer: tool({
|
|
210
253
|
description: 'Reset a stuck or corrupted engineer. Clears the busy flag. Optionally clears the Claude session (starts fresh) and/or wrapper history.',
|
|
211
254
|
args: {
|
package/dist/prompts/registry.js
CHANGED
|
@@ -4,7 +4,7 @@ export const managerPromptRegistry = {
|
|
|
4
4
|
'Your role is to decompose work, delegate precisely, review diffs for production risks, and verify outcomes.',
|
|
5
5
|
'You do not write code. All edits go through engineers. You multiply output by coordinating parallel work and catching issues others miss.',
|
|
6
6
|
'',
|
|
7
|
-
'# Operating Loop: Orient → Classify → Plan → Delegate → Review → Verify → Close',
|
|
7
|
+
'# Operating Loop: Orient → Classify → Plan → Confirm → Delegate → Review → Verify → Close',
|
|
8
8
|
'',
|
|
9
9
|
'## Orient: Understand the request',
|
|
10
10
|
'- Extract what you can from the user message, codebase (read/grep/glob/codesearch), prior engineer results, and `websearch`/`webfetch` when relevant.',
|
|
@@ -15,6 +15,7 @@ export const managerPromptRegistry = {
|
|
|
15
15
|
'',
|
|
16
16
|
'## Classify: Frame the work',
|
|
17
17
|
'- Is this a bug fix, feature, refactor, or something else?',
|
|
18
|
+
'- Task size: classify as trivial (single-line fix, unambiguous, no side effects), simple (one focused task, clear scope, 1–2 files), or large (multiple steps, cross-cutting changes, requires vertical slicing).',
|
|
18
19
|
'- What could go wrong? Is it reversible or irreversible? Can it fail in prod?',
|
|
19
20
|
'- Does it require careful rollout, data migration, observability, or backwards compatibility handling?',
|
|
20
21
|
'- Are there decisions the user has not explicitly made (architecture, scope, deployment strategy)?',
|
|
@@ -24,16 +25,29 @@ export const managerPromptRegistry = {
|
|
|
24
25
|
"- For medium or large tasks: use `task(subagent_type: 'team-planner', ...)` for dual-engineer exploration and plan synthesis.",
|
|
25
26
|
' - Team-planner automatically selects two non-overlapping engineers by availability and context; you may optionally specify lead and challenger.',
|
|
26
27
|
' - Challenger engineer identifies missing decisions, risks, and scope gaps before implementation.',
|
|
28
|
+
'- For large tasks: break into vertical slices before delegating. Each slice must deliver end-to-end, user-testable value independently (e.g., "user can register and receive a confirmation email", "user can view billing history"). Horizontal layers (e.g., "just types", "just tests") are not vertical slices. Document slices when calling `confirm_plan`.',
|
|
27
29
|
'- Break work into independent pieces that can run in parallel. Two engineers exploring then synthesizing beats one engineer doing everything sequentially.',
|
|
28
30
|
'- Before delegating, state your success criteria, not just the task. What done looks like. How you will verify it.',
|
|
29
31
|
'',
|
|
32
|
+
'## Confirm: Get user buy-in before implementing',
|
|
33
|
+
'- After planning but before dispatching any engineer in implement mode, present the plan to the user with the `question` tool.',
|
|
34
|
+
'- State what will be built or changed, which files or systems are affected, what success looks like, and any risks or open decisions.',
|
|
35
|
+
'- If team-planner synthesis surfaced a recommendedQuestion, include it here as part of the confirmation question.',
|
|
36
|
+
'- Do not proceed to implementation until the user confirms the plan.',
|
|
37
|
+
'- After the user confirms, call `confirm_plan` with a summary, taskSize, and (for large tasks) the slice list. Set preAuthorized: true only if the user explicitly says to proceed through all slices without further confirmation.',
|
|
38
|
+
'- For large tasks not preAuthorized: confirm each slice with the user before dispatching it.',
|
|
39
|
+
'- Skip `question` only when: the user has explicitly said "proceed" or "just do it", the change is a trivial fix with no ambiguity, or the task is purely exploratory (no edits).',
|
|
40
|
+
'- If the user refines or rejects the plan, revise it and re-confirm before implementing.',
|
|
41
|
+
'',
|
|
30
42
|
'## Delegate: Send precise assignments',
|
|
31
43
|
"- For single-engineer work: use `task(subagent_type: 'tom'|'john'|'maya'|'sara'|'alex', ...)` and structure the prompt with goal, acceptance criteria, relevant files, constraints, and verification.",
|
|
32
44
|
"- For dual-engineer planning: use `task(subagent_type: 'team-planner', ...)` which will lead + challenger synthesis.",
|
|
33
45
|
"- For browser/UI verification: use `task(subagent_type: 'browser-qa', ...)` with a clear verification goal. BrowserQA uses the Playwright skill to verify in a real browser and can run safe bash when needed.",
|
|
34
46
|
'- Each assignment includes: goal, acceptance criteria, relevant context, constraints, and verification method.',
|
|
47
|
+
'- For large tasks: after each slice completes, call `advance_slice` to record progress, then confirm the next slice with the user before dispatching (unless preAuthorized).',
|
|
35
48
|
'- Reuse the same engineer when follow-up work builds on their prior context.',
|
|
36
49
|
'- Only one implementing engineer modifies the worktree at a time. Parallelize exploration, research, and browser verification freely.',
|
|
50
|
+
'- Context warnings (moderate/high/critical) are informational only. Do NOT reset an engineer session in response to a context warning. Sessions auto-reset only on an actual contextExhausted error.',
|
|
37
51
|
'',
|
|
38
52
|
'## Review: Inspect diffs for production safety',
|
|
39
53
|
'- After an engineer reports implementation done, review the diff with `git_diff` before declaring it complete.',
|
|
@@ -66,6 +80,7 @@ export const managerPromptRegistry = {
|
|
|
66
80
|
'- Questions: Use the `question` tool when a decision will materially affect scope, architecture, or how you verify the outcome. Name the decision, offer 2–3 concrete options, state your recommendation, and say what breaks if the user picks differently. One high-leverage question at a time.',
|
|
67
81
|
'- Reframing: Before planning, ask what the user is actually trying to achieve, not just what they asked for. If the request sounds like a feature, ask what job-to-be-done it serves.',
|
|
68
82
|
'- Engineer selection: When assigning to a single engineer, prefer lower context pressure and less-recently-used engineers. Reuse if follow-up work builds on prior context.',
|
|
83
|
+
'- Context warnings: At moderate/high/critical context levels the system surfaces a warning. These are advisory — do not force session reset. Reserve reset for actual contextExhausted errors only.',
|
|
69
84
|
'- Failure handling:',
|
|
70
85
|
" - contextExhausted: The engineer's session ran out of tokens. The system automatically resets and retries once with the same task on a fresh session.",
|
|
71
86
|
' - sdkError or toolDenied: The underlying SDK failed or a tool call was denied. Investigate the error, adjust constraints, and retry.',
|
|
@@ -92,6 +107,7 @@ export const managerPromptRegistry = {
|
|
|
92
107
|
'',
|
|
93
108
|
'Your wrapper context from prior turns is reloaded automatically. Use it to avoid repeating work or re-explaining context that Claude Code already knows.',
|
|
94
109
|
"Return the tool result directly. Add your own commentary only when something was unexpected or needs the CTO's attention.",
|
|
110
|
+
'If you discover during implementation that the agreed approach is not viable (unexpected constraints, wrong files, missing context), stop immediately and surface the deviation to the CTO before proceeding with a different approach. Do not silently implement something different from what was confirmed.',
|
|
95
111
|
].join('\n'),
|
|
96
112
|
engineerSessionPrompt: [
|
|
97
113
|
'You are an expert software engineer working inside Claude Code.',
|
|
@@ -149,6 +165,7 @@ export const managerPromptRegistry = {
|
|
|
149
165
|
'- If lead and challenger engineer names are both specified, use them.',
|
|
150
166
|
'- If either name is missing, `plan_with_team` will auto-select two non-overlapping engineers based on availability and context.',
|
|
151
167
|
'Do not attempt any planning or analysis yourself. Delegate entirely to `plan_with_team`.',
|
|
168
|
+
'After `plan_with_team` returns, pass the full result back to the CTO unchanged. Do not modify, summarize, or act on the synthesis; the CTO will present it to the user for confirmation.',
|
|
152
169
|
].join('\n'),
|
|
153
170
|
browserQaAgentPrompt: [
|
|
154
171
|
"You are the browser QA specialist on the CTO's team.",
|
|
@@ -165,6 +182,7 @@ export const managerPromptRegistry = {
|
|
|
165
182
|
'- Never simulate or fabricate test results.',
|
|
166
183
|
'- If the Playwright tool is not available, the result will start with PLAYWRIGHT_UNAVAILABLE:.',
|
|
167
184
|
'- Your persistent Claude Code session remembers prior verification runs.',
|
|
185
|
+
'- If the verification scope changes unexpectedly (feature absent, URL wrong, task cannot be completed as specified), stop and report the scope mismatch rather than silently verifying something else.',
|
|
168
186
|
].join('\n'),
|
|
169
187
|
browserQaSessionPrompt: [
|
|
170
188
|
'You are a browser QA specialist. Your job is to verify web features and user flows using the Playwright skill/command.',
|
|
@@ -185,8 +203,8 @@ export const managerPromptRegistry = {
|
|
|
185
203
|
'Allowed tools: Playwright skill/command, safe bash, read-only tools (Read, Grep, Glob). No file editing or code modifications.',
|
|
186
204
|
].join('\n'),
|
|
187
205
|
contextWarnings: {
|
|
188
|
-
moderate: 'Engineer context is
|
|
189
|
-
high: 'Engineer context is
|
|
190
|
-
critical: 'Engineer context is near capacity ({percent}% estimated).
|
|
206
|
+
moderate: 'Engineer context is at {percent}% estimated. Session is healthy; keep the next task focused.',
|
|
207
|
+
high: 'Engineer context is at {percent}% estimated ({turns} turns, ${cost}). Session continues — prefer a narrowly scoped follow-up.',
|
|
208
|
+
critical: 'Engineer context is near capacity ({percent}% estimated). Warn only — do not force a reset; avoid large new tasks in this session.',
|
|
191
209
|
},
|
|
192
210
|
};
|
|
@@ -2,7 +2,7 @@ import type { ClaudeSessionEventHandler } from '../claude/claude-agent-sdk-adapt
|
|
|
2
2
|
import type { ClaudeSessionService } from '../claude/claude-session.service.js';
|
|
3
3
|
import type { TeamStateStore } from '../state/team-state-store.js';
|
|
4
4
|
import type { TranscriptStore } from '../state/transcript-store.js';
|
|
5
|
-
import type { EngineerFailureResult, EngineerName, EngineerTaskResult, EngineerWorkMode, SynthesizedPlanResult, TeamRecord, WorkerCapabilities } from '../types/contracts.js';
|
|
5
|
+
import type { ActivePlan, EngineerFailureResult, EngineerName, EngineerTaskResult, EngineerWorkMode, SynthesizedPlanResult, TaskSize, TeamRecord, WorkerCapabilities } from '../types/contracts.js';
|
|
6
6
|
interface DispatchEngineerInput {
|
|
7
7
|
teamId: string;
|
|
8
8
|
cwd: string;
|
|
@@ -59,6 +59,15 @@ export declare class TeamOrchestrator {
|
|
|
59
59
|
lead: EngineerName;
|
|
60
60
|
challenger: EngineerName;
|
|
61
61
|
}>;
|
|
62
|
+
getActivePlan(cwd: string, teamId: string): Promise<ActivePlan | null>;
|
|
63
|
+
setActivePlan(cwd: string, teamId: string, plan: {
|
|
64
|
+
summary: string;
|
|
65
|
+
taskSize: TaskSize;
|
|
66
|
+
slices: string[];
|
|
67
|
+
preAuthorized: boolean;
|
|
68
|
+
}): Promise<ActivePlan>;
|
|
69
|
+
clearActivePlan(cwd: string, teamId: string): Promise<void>;
|
|
70
|
+
updateActivePlanSlice(cwd: string, teamId: string, sliceIndex: number, status: 'in_progress' | 'done' | 'skipped'): Promise<void>;
|
|
62
71
|
private buildSessionSystemPrompt;
|
|
63
72
|
private buildEngineerPrompt;
|
|
64
73
|
}
|
|
@@ -383,6 +383,81 @@ export class TeamOrchestrator {
|
|
|
383
383
|
}
|
|
384
384
|
return { lead, challenger };
|
|
385
385
|
}
|
|
386
|
+
async getActivePlan(cwd, teamId) {
|
|
387
|
+
const team = await this.getOrCreateTeam(cwd, teamId);
|
|
388
|
+
return team.activePlan ?? null;
|
|
389
|
+
}
|
|
390
|
+
async setActivePlan(cwd, teamId, plan) {
|
|
391
|
+
await this.getOrCreateTeam(cwd, teamId);
|
|
392
|
+
const now = new Date().toISOString();
|
|
393
|
+
const slices = plan.slices.map((description, index) => ({
|
|
394
|
+
index,
|
|
395
|
+
description,
|
|
396
|
+
status: 'pending',
|
|
397
|
+
}));
|
|
398
|
+
const activePlan = {
|
|
399
|
+
id: `plan-${Date.now()}`,
|
|
400
|
+
summary: plan.summary,
|
|
401
|
+
taskSize: plan.taskSize,
|
|
402
|
+
createdAt: now,
|
|
403
|
+
confirmedAt: now,
|
|
404
|
+
preAuthorized: plan.preAuthorized,
|
|
405
|
+
slices,
|
|
406
|
+
currentSliceIndex: slices.length > 0 ? 0 : null,
|
|
407
|
+
};
|
|
408
|
+
await this.teamStore.updateTeam(cwd, teamId, (team) => ({
|
|
409
|
+
...team,
|
|
410
|
+
updatedAt: now,
|
|
411
|
+
activePlan,
|
|
412
|
+
}));
|
|
413
|
+
return activePlan;
|
|
414
|
+
}
|
|
415
|
+
async clearActivePlan(cwd, teamId) {
|
|
416
|
+
await this.getOrCreateTeam(cwd, teamId);
|
|
417
|
+
const now = new Date().toISOString();
|
|
418
|
+
await this.teamStore.updateTeam(cwd, teamId, (team) => ({
|
|
419
|
+
...team,
|
|
420
|
+
updatedAt: now,
|
|
421
|
+
activePlan: undefined,
|
|
422
|
+
}));
|
|
423
|
+
}
|
|
424
|
+
async updateActivePlanSlice(cwd, teamId, sliceIndex, status) {
|
|
425
|
+
await this.getOrCreateTeam(cwd, teamId);
|
|
426
|
+
const now = new Date().toISOString();
|
|
427
|
+
await this.teamStore.updateTeam(cwd, teamId, (team) => {
|
|
428
|
+
if (!team.activePlan) {
|
|
429
|
+
throw new Error(`Cannot update slice: team "${teamId}" has no active plan. Call confirm_plan first.`);
|
|
430
|
+
}
|
|
431
|
+
const sliceExists = team.activePlan.slices.some((s) => s.index === sliceIndex);
|
|
432
|
+
if (!sliceExists) {
|
|
433
|
+
const sliceCount = team.activePlan.slices.length;
|
|
434
|
+
const rangeMsg = sliceCount === 0 ? 'plan has no slices' : `valid range: 0–${sliceCount - 1}`;
|
|
435
|
+
throw new Error(`Cannot update slice: slice index ${sliceIndex} does not exist in active plan "${team.activePlan.id}" (${rangeMsg}).`);
|
|
436
|
+
}
|
|
437
|
+
const slices = team.activePlan.slices.map((s) => s.index === sliceIndex
|
|
438
|
+
? {
|
|
439
|
+
...s,
|
|
440
|
+
status,
|
|
441
|
+
...(status === 'done' || status === 'skipped' ? { completedAt: now } : {}),
|
|
442
|
+
}
|
|
443
|
+
: s);
|
|
444
|
+
const isLastSlice = sliceIndex === team.activePlan.slices.length - 1;
|
|
445
|
+
const nextIndex = status === 'done' || status === 'skipped'
|
|
446
|
+
? isLastSlice
|
|
447
|
+
? null
|
|
448
|
+
: sliceIndex + 1
|
|
449
|
+
: team.activePlan.currentSliceIndex;
|
|
450
|
+
return {
|
|
451
|
+
...team,
|
|
452
|
+
updatedAt: now,
|
|
453
|
+
activePlan: {
|
|
454
|
+
...team.activePlan,
|
|
455
|
+
slices,
|
|
456
|
+
currentSliceIndex: nextIndex,
|
|
457
|
+
},
|
|
458
|
+
};
|
|
459
|
+
});
|
|
460
|
+
}
|
|
386
461
|
buildSessionSystemPrompt(engineer, mode) {
|
|
387
462
|
const specialistPrompt = this.workerCapabilities[engineer]?.sessionPrompt;
|
|
388
463
|
if (specialistPrompt) {
|
|
@@ -416,7 +491,8 @@ function buildModeInstruction(mode) {
|
|
|
416
491
|
case 'implement':
|
|
417
492
|
return [
|
|
418
493
|
'Implementation mode.',
|
|
419
|
-
'
|
|
494
|
+
'Before making any edits, state a brief implementation plan: which files you will change, what each change does, and why.',
|
|
495
|
+
'Then make the changes, run the most relevant verification (tests, lint, typecheck), and report what changed and what you verified.',
|
|
420
496
|
'Before reporting done, review your own diff for issues that pass tests but break in production.',
|
|
421
497
|
].join(' ');
|
|
422
498
|
case 'verify':
|
|
@@ -11,9 +11,9 @@ export declare const ENGINEER_AGENT_IDS: {
|
|
|
11
11
|
};
|
|
12
12
|
/** General named engineers only (Tom/John/Maya/Sara/Alex). BrowserQA is a specialist registered separately. */
|
|
13
13
|
export declare const ENGINEER_AGENT_NAMES: readonly ["Tom", "John", "Maya", "Sara", "Alex"];
|
|
14
|
-
export declare const CTO_ONLY_TOOL_IDS: readonly ["team_status", "reset_engineer", "list_transcripts", "list_history", "git_diff", "git_commit", "git_reset", "git_status", "git_log", "approval_policy", "approval_decisions", "approval_update"];
|
|
14
|
+
export declare const CTO_ONLY_TOOL_IDS: readonly ["team_status", "reset_engineer", "confirm_plan", "advance_slice", "list_transcripts", "list_history", "git_diff", "git_commit", "git_reset", "git_status", "git_log", "approval_policy", "approval_decisions", "approval_update"];
|
|
15
15
|
export declare const ENGINEER_TOOL_IDS: readonly ["claude"];
|
|
16
|
-
export declare const ALL_RESTRICTED_TOOL_IDS: readonly ["team_status", "reset_engineer", "list_transcripts", "list_history", "git_diff", "git_commit", "git_reset", "git_status", "git_log", "approval_policy", "approval_decisions", "approval_update", "claude"];
|
|
16
|
+
export declare const ALL_RESTRICTED_TOOL_IDS: readonly ["team_status", "reset_engineer", "confirm_plan", "advance_slice", "list_transcripts", "list_history", "git_diff", "git_commit", "git_reset", "git_status", "git_log", "approval_policy", "approval_decisions", "approval_update", "claude"];
|
|
17
17
|
export type ToolPermission = 'allow' | 'ask' | 'deny';
|
|
18
18
|
export type AgentPermission = {
|
|
19
19
|
'*'?: ToolPermission;
|
|
@@ -206,6 +206,49 @@ export const ClaudeManagerPlugin = async ({ worktree, client }) => {
|
|
|
206
206
|
}, null, 2);
|
|
207
207
|
},
|
|
208
208
|
}),
|
|
209
|
+
confirm_plan: tool({
|
|
210
|
+
description: 'Persist plan confirmation and optional slice metadata after the user confirms a plan. For large tasks, provide a slice list to enable per-slice progress tracking. Set preAuthorized to true only when the user has explicitly said to proceed through all slices without further confirmation.',
|
|
211
|
+
args: {
|
|
212
|
+
summary: tool.schema.string().min(1),
|
|
213
|
+
taskSize: tool.schema.enum(['trivial', 'simple', 'large']),
|
|
214
|
+
slices: tool.schema.string().array().optional(),
|
|
215
|
+
preAuthorized: tool.schema.boolean().optional(),
|
|
216
|
+
},
|
|
217
|
+
async execute(args, context) {
|
|
218
|
+
const teamId = context.sessionID;
|
|
219
|
+
annotateToolRun(context, 'Persisting confirmed plan', {
|
|
220
|
+
teamId,
|
|
221
|
+
taskSize: args.taskSize,
|
|
222
|
+
sliceCount: args.slices?.length ?? 0,
|
|
223
|
+
});
|
|
224
|
+
const activePlan = await services.orchestrator.setActivePlan(context.worktree, teamId, {
|
|
225
|
+
summary: args.summary,
|
|
226
|
+
taskSize: args.taskSize,
|
|
227
|
+
slices: args.slices ?? [],
|
|
228
|
+
preAuthorized: args.preAuthorized ?? false,
|
|
229
|
+
});
|
|
230
|
+
return JSON.stringify(activePlan, null, 2);
|
|
231
|
+
},
|
|
232
|
+
}),
|
|
233
|
+
advance_slice: tool({
|
|
234
|
+
description: 'Mark a plan slice as done (or skipped) and advance to the next one. Use this after each slice completes to track large-task progress.',
|
|
235
|
+
args: {
|
|
236
|
+
sliceIndex: tool.schema.number(),
|
|
237
|
+
status: tool.schema.enum(['done', 'skipped']).optional(),
|
|
238
|
+
},
|
|
239
|
+
async execute(args, context) {
|
|
240
|
+
const teamId = context.sessionID;
|
|
241
|
+
const status = args.status ?? 'done';
|
|
242
|
+
annotateToolRun(context, `Advancing slice ${args.sliceIndex} → ${status}`, {
|
|
243
|
+
teamId,
|
|
244
|
+
sliceIndex: args.sliceIndex,
|
|
245
|
+
status,
|
|
246
|
+
});
|
|
247
|
+
await services.orchestrator.updateActivePlanSlice(context.worktree, teamId, args.sliceIndex, status);
|
|
248
|
+
const team = await services.orchestrator.getOrCreateTeam(context.worktree, teamId);
|
|
249
|
+
return JSON.stringify({ activePlan: team.activePlan ?? null }, null, 2);
|
|
250
|
+
},
|
|
251
|
+
}),
|
|
209
252
|
reset_engineer: tool({
|
|
210
253
|
description: 'Reset a stuck or corrupted engineer. Clears the busy flag. Optionally clears the Claude session (starts fresh) and/or wrapper history.',
|
|
211
254
|
args: {
|
|
@@ -4,7 +4,7 @@ export const managerPromptRegistry = {
|
|
|
4
4
|
'Your role is to decompose work, delegate precisely, review diffs for production risks, and verify outcomes.',
|
|
5
5
|
'You do not write code. All edits go through engineers. You multiply output by coordinating parallel work and catching issues others miss.',
|
|
6
6
|
'',
|
|
7
|
-
'# Operating Loop: Orient → Classify → Plan → Delegate → Review → Verify → Close',
|
|
7
|
+
'# Operating Loop: Orient → Classify → Plan → Confirm → Delegate → Review → Verify → Close',
|
|
8
8
|
'',
|
|
9
9
|
'## Orient: Understand the request',
|
|
10
10
|
'- Extract what you can from the user message, codebase (read/grep/glob/codesearch), prior engineer results, and `websearch`/`webfetch` when relevant.',
|
|
@@ -15,6 +15,7 @@ export const managerPromptRegistry = {
|
|
|
15
15
|
'',
|
|
16
16
|
'## Classify: Frame the work',
|
|
17
17
|
'- Is this a bug fix, feature, refactor, or something else?',
|
|
18
|
+
'- Task size: classify as trivial (single-line fix, unambiguous, no side effects), simple (one focused task, clear scope, 1–2 files), or large (multiple steps, cross-cutting changes, requires vertical slicing).',
|
|
18
19
|
'- What could go wrong? Is it reversible or irreversible? Can it fail in prod?',
|
|
19
20
|
'- Does it require careful rollout, data migration, observability, or backwards compatibility handling?',
|
|
20
21
|
'- Are there decisions the user has not explicitly made (architecture, scope, deployment strategy)?',
|
|
@@ -24,16 +25,29 @@ export const managerPromptRegistry = {
|
|
|
24
25
|
"- For medium or large tasks: use `task(subagent_type: 'team-planner', ...)` for dual-engineer exploration and plan synthesis.",
|
|
25
26
|
' - Team-planner automatically selects two non-overlapping engineers by availability and context; you may optionally specify lead and challenger.',
|
|
26
27
|
' - Challenger engineer identifies missing decisions, risks, and scope gaps before implementation.',
|
|
28
|
+
'- For large tasks: break into vertical slices before delegating. Each slice must deliver end-to-end, user-testable value independently (e.g., "user can register and receive a confirmation email", "user can view billing history"). Horizontal layers (e.g., "just types", "just tests") are not vertical slices. Document slices when calling `confirm_plan`.',
|
|
27
29
|
'- Break work into independent pieces that can run in parallel. Two engineers exploring then synthesizing beats one engineer doing everything sequentially.',
|
|
28
30
|
'- Before delegating, state your success criteria, not just the task. What done looks like. How you will verify it.',
|
|
29
31
|
'',
|
|
32
|
+
'## Confirm: Get user buy-in before implementing',
|
|
33
|
+
'- After planning but before dispatching any engineer in implement mode, present the plan to the user with the `question` tool.',
|
|
34
|
+
'- State what will be built or changed, which files or systems are affected, what success looks like, and any risks or open decisions.',
|
|
35
|
+
'- If team-planner synthesis surfaced a recommendedQuestion, include it here as part of the confirmation question.',
|
|
36
|
+
'- Do not proceed to implementation until the user confirms the plan.',
|
|
37
|
+
'- After the user confirms, call `confirm_plan` with a summary, taskSize, and (for large tasks) the slice list. Set preAuthorized: true only if the user explicitly says to proceed through all slices without further confirmation.',
|
|
38
|
+
'- For large tasks not preAuthorized: confirm each slice with the user before dispatching it.',
|
|
39
|
+
'- Skip `question` only when: the user has explicitly said "proceed" or "just do it", the change is a trivial fix with no ambiguity, or the task is purely exploratory (no edits).',
|
|
40
|
+
'- If the user refines or rejects the plan, revise it and re-confirm before implementing.',
|
|
41
|
+
'',
|
|
30
42
|
'## Delegate: Send precise assignments',
|
|
31
43
|
"- For single-engineer work: use `task(subagent_type: 'tom'|'john'|'maya'|'sara'|'alex', ...)` and structure the prompt with goal, acceptance criteria, relevant files, constraints, and verification.",
|
|
32
44
|
"- For dual-engineer planning: use `task(subagent_type: 'team-planner', ...)` which will lead + challenger synthesis.",
|
|
33
45
|
"- For browser/UI verification: use `task(subagent_type: 'browser-qa', ...)` with a clear verification goal. BrowserQA uses the Playwright skill to verify in a real browser and can run safe bash when needed.",
|
|
34
46
|
'- Each assignment includes: goal, acceptance criteria, relevant context, constraints, and verification method.',
|
|
47
|
+
'- For large tasks: after each slice completes, call `advance_slice` to record progress, then confirm the next slice with the user before dispatching (unless preAuthorized).',
|
|
35
48
|
'- Reuse the same engineer when follow-up work builds on their prior context.',
|
|
36
49
|
'- Only one implementing engineer modifies the worktree at a time. Parallelize exploration, research, and browser verification freely.',
|
|
50
|
+
'- Context warnings (moderate/high/critical) are informational only. Do NOT reset an engineer session in response to a context warning. Sessions auto-reset only on an actual contextExhausted error.',
|
|
37
51
|
'',
|
|
38
52
|
'## Review: Inspect diffs for production safety',
|
|
39
53
|
'- After an engineer reports implementation done, review the diff with `git_diff` before declaring it complete.',
|
|
@@ -66,6 +80,7 @@ export const managerPromptRegistry = {
|
|
|
66
80
|
'- Questions: Use the `question` tool when a decision will materially affect scope, architecture, or how you verify the outcome. Name the decision, offer 2–3 concrete options, state your recommendation, and say what breaks if the user picks differently. One high-leverage question at a time.',
|
|
67
81
|
'- Reframing: Before planning, ask what the user is actually trying to achieve, not just what they asked for. If the request sounds like a feature, ask what job-to-be-done it serves.',
|
|
68
82
|
'- Engineer selection: When assigning to a single engineer, prefer lower context pressure and less-recently-used engineers. Reuse if follow-up work builds on prior context.',
|
|
83
|
+
'- Context warnings: At moderate/high/critical context levels the system surfaces a warning. These are advisory — do not force session reset. Reserve reset for actual contextExhausted errors only.',
|
|
69
84
|
'- Failure handling:',
|
|
70
85
|
" - contextExhausted: The engineer's session ran out of tokens. The system automatically resets and retries once with the same task on a fresh session.",
|
|
71
86
|
' - sdkError or toolDenied: The underlying SDK failed or a tool call was denied. Investigate the error, adjust constraints, and retry.',
|
|
@@ -92,6 +107,7 @@ export const managerPromptRegistry = {
|
|
|
92
107
|
'',
|
|
93
108
|
'Your wrapper context from prior turns is reloaded automatically. Use it to avoid repeating work or re-explaining context that Claude Code already knows.',
|
|
94
109
|
"Return the tool result directly. Add your own commentary only when something was unexpected or needs the CTO's attention.",
|
|
110
|
+
'If you discover during implementation that the agreed approach is not viable (unexpected constraints, wrong files, missing context), stop immediately and surface the deviation to the CTO before proceeding with a different approach. Do not silently implement something different from what was confirmed.',
|
|
95
111
|
].join('\n'),
|
|
96
112
|
engineerSessionPrompt: [
|
|
97
113
|
'You are an expert software engineer working inside Claude Code.',
|
|
@@ -149,6 +165,7 @@ export const managerPromptRegistry = {
|
|
|
149
165
|
'- If lead and challenger engineer names are both specified, use them.',
|
|
150
166
|
'- If either name is missing, `plan_with_team` will auto-select two non-overlapping engineers based on availability and context.',
|
|
151
167
|
'Do not attempt any planning or analysis yourself. Delegate entirely to `plan_with_team`.',
|
|
168
|
+
'After `plan_with_team` returns, pass the full result back to the CTO unchanged. Do not modify, summarize, or act on the synthesis; the CTO will present it to the user for confirmation.',
|
|
152
169
|
].join('\n'),
|
|
153
170
|
browserQaAgentPrompt: [
|
|
154
171
|
"You are the browser QA specialist on the CTO's team.",
|
|
@@ -165,6 +182,7 @@ export const managerPromptRegistry = {
|
|
|
165
182
|
'- Never simulate or fabricate test results.',
|
|
166
183
|
'- If the Playwright tool is not available, the result will start with PLAYWRIGHT_UNAVAILABLE:.',
|
|
167
184
|
'- Your persistent Claude Code session remembers prior verification runs.',
|
|
185
|
+
'- If the verification scope changes unexpectedly (feature absent, URL wrong, task cannot be completed as specified), stop and report the scope mismatch rather than silently verifying something else.',
|
|
168
186
|
].join('\n'),
|
|
169
187
|
browserQaSessionPrompt: [
|
|
170
188
|
'You are a browser QA specialist. Your job is to verify web features and user flows using the Playwright skill/command.',
|
|
@@ -185,8 +203,8 @@ export const managerPromptRegistry = {
|
|
|
185
203
|
'Allowed tools: Playwright skill/command, safe bash, read-only tools (Read, Grep, Glob). No file editing or code modifications.',
|
|
186
204
|
].join('\n'),
|
|
187
205
|
contextWarnings: {
|
|
188
|
-
moderate: 'Engineer context is
|
|
189
|
-
high: 'Engineer context is
|
|
190
|
-
critical: 'Engineer context is near capacity ({percent}% estimated).
|
|
206
|
+
moderate: 'Engineer context is at {percent}% estimated. Session is healthy; keep the next task focused.',
|
|
207
|
+
high: 'Engineer context is at {percent}% estimated ({turns} turns, ${cost}). Session continues — prefer a narrowly scoped follow-up.',
|
|
208
|
+
critical: 'Engineer context is near capacity ({percent}% estimated). Warn only — do not force a reset; avoid large new tasks in this session.',
|
|
191
209
|
},
|
|
192
210
|
};
|
|
@@ -116,6 +116,24 @@ export interface SessionContextSnapshot {
|
|
|
116
116
|
warningLevel: ContextWarningLevel;
|
|
117
117
|
compactionCount: number;
|
|
118
118
|
}
|
|
119
|
+
export type TaskSize = 'trivial' | 'simple' | 'large';
|
|
120
|
+
export interface PlanSlice {
|
|
121
|
+
index: number;
|
|
122
|
+
description: string;
|
|
123
|
+
status: 'pending' | 'in_progress' | 'done' | 'skipped';
|
|
124
|
+
completedAt?: string;
|
|
125
|
+
}
|
|
126
|
+
export interface ActivePlan {
|
|
127
|
+
id: string;
|
|
128
|
+
summary: string;
|
|
129
|
+
taskSize: TaskSize;
|
|
130
|
+
createdAt: string;
|
|
131
|
+
confirmedAt: string | null;
|
|
132
|
+
preAuthorized: boolean;
|
|
133
|
+
slices: PlanSlice[];
|
|
134
|
+
/** Null when the plan has no slices (trivial/simple tasks). */
|
|
135
|
+
currentSliceIndex: number | null;
|
|
136
|
+
}
|
|
119
137
|
export interface TeamEngineerRecord {
|
|
120
138
|
name: EngineerName;
|
|
121
139
|
wrapperSessionId: string | null;
|
|
@@ -142,6 +160,7 @@ export interface TeamRecord {
|
|
|
142
160
|
createdAt: string;
|
|
143
161
|
updatedAt: string;
|
|
144
162
|
engineers: TeamEngineerRecord[];
|
|
163
|
+
activePlan?: ActivePlan;
|
|
145
164
|
}
|
|
146
165
|
export interface EngineerTaskResult {
|
|
147
166
|
teamId: string;
|
|
@@ -1,6 +1,10 @@
|
|
|
1
|
-
import { describe, expect, it } from 'vitest';
|
|
1
|
+
import { afterEach, describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import { mkdtemp, rm } from 'node:fs/promises';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import { tmpdir } from 'node:os';
|
|
2
5
|
import { ClaudeManagerPlugin } from '../src/plugin/claude-manager.plugin.js';
|
|
3
6
|
import { AGENT_CTO, AGENT_TEAM_PLANNER, ENGINEER_AGENT_IDS, ENGINEER_AGENT_NAMES, } from '../src/plugin/agent-hierarchy.js';
|
|
7
|
+
import { clearPluginServices } from '../src/plugin/service-factory.js';
|
|
4
8
|
describe('ClaudeManagerPlugin', () => {
|
|
5
9
|
it('configures CTO with orchestration tools and question access', async () => {
|
|
6
10
|
const plugin = await ClaudeManagerPlugin({
|
|
@@ -27,6 +31,8 @@ describe('ClaudeManagerPlugin', () => {
|
|
|
27
31
|
question: 'allow',
|
|
28
32
|
team_status: 'allow',
|
|
29
33
|
reset_engineer: 'allow',
|
|
34
|
+
confirm_plan: 'allow',
|
|
35
|
+
advance_slice: 'allow',
|
|
30
36
|
git_diff: 'allow',
|
|
31
37
|
git_commit: 'allow',
|
|
32
38
|
git_reset: 'allow',
|
|
@@ -121,6 +127,8 @@ describe('ClaudeManagerPlugin', () => {
|
|
|
121
127
|
expect(tools['claude']).toBeDefined();
|
|
122
128
|
expect(tools['team_status']).toBeDefined();
|
|
123
129
|
expect(tools['plan_with_team']).toBeDefined();
|
|
130
|
+
expect(tools['confirm_plan']).toBeDefined();
|
|
131
|
+
expect(tools['advance_slice']).toBeDefined();
|
|
124
132
|
expect(tools['reset_engineer']).toBeDefined();
|
|
125
133
|
expect(tools['assign_engineer']).toBeUndefined();
|
|
126
134
|
});
|
|
@@ -144,6 +152,63 @@ describe('ClaudeManagerPlugin', () => {
|
|
|
144
152
|
expect(modelSchema.safeParse(undefined).success).toBe(true);
|
|
145
153
|
expect(modelSchema.safeParse('claude-haiku-4-5').success).toBe(false);
|
|
146
154
|
});
|
|
155
|
+
it('confirm_plan tool validates taskSize enum and requires summary', async () => {
|
|
156
|
+
const plugin = await ClaudeManagerPlugin({
|
|
157
|
+
worktree: '/tmp/project',
|
|
158
|
+
});
|
|
159
|
+
const tools = plugin.tool;
|
|
160
|
+
const confirmPlan = tools['confirm_plan'];
|
|
161
|
+
expect(confirmPlan).toBeDefined();
|
|
162
|
+
const summarySchema = confirmPlan.args.summary;
|
|
163
|
+
const taskSizeSchema = confirmPlan.args.taskSize;
|
|
164
|
+
const slicesSchema = confirmPlan.args.slices;
|
|
165
|
+
const preAuthorizedSchema = confirmPlan.args.preAuthorized;
|
|
166
|
+
expect(summarySchema.safeParse('Billing refactor').success).toBe(true);
|
|
167
|
+
expect(summarySchema.safeParse('').success).toBe(false);
|
|
168
|
+
expect(taskSizeSchema.safeParse('trivial').success).toBe(true);
|
|
169
|
+
expect(taskSizeSchema.safeParse('simple').success).toBe(true);
|
|
170
|
+
expect(taskSizeSchema.safeParse('large').success).toBe(true);
|
|
171
|
+
expect(taskSizeSchema.safeParse('medium').success).toBe(false);
|
|
172
|
+
expect(taskSizeSchema.safeParse('huge').success).toBe(false);
|
|
173
|
+
// slices is optional — absent and array both valid
|
|
174
|
+
expect(slicesSchema.safeParse(undefined).success).toBe(true);
|
|
175
|
+
expect(slicesSchema.safeParse(['slice A', 'slice B']).success).toBe(true);
|
|
176
|
+
// preAuthorized is optional boolean
|
|
177
|
+
expect(preAuthorizedSchema.safeParse(true).success).toBe(true);
|
|
178
|
+
expect(preAuthorizedSchema.safeParse(false).success).toBe(true);
|
|
179
|
+
expect(preAuthorizedSchema.safeParse(undefined).success).toBe(true);
|
|
180
|
+
});
|
|
181
|
+
it('advance_slice tool validates sliceIndex and optional status enum', async () => {
|
|
182
|
+
const plugin = await ClaudeManagerPlugin({
|
|
183
|
+
worktree: '/tmp/project',
|
|
184
|
+
});
|
|
185
|
+
const tools = plugin.tool;
|
|
186
|
+
const advanceSlice = tools['advance_slice'];
|
|
187
|
+
expect(advanceSlice).toBeDefined();
|
|
188
|
+
const sliceIndexSchema = advanceSlice.args.sliceIndex;
|
|
189
|
+
const statusSchema = advanceSlice.args.status;
|
|
190
|
+
expect(sliceIndexSchema.safeParse(0).success).toBe(true);
|
|
191
|
+
expect(sliceIndexSchema.safeParse(2).success).toBe(true);
|
|
192
|
+
expect(sliceIndexSchema.safeParse('0').success).toBe(false);
|
|
193
|
+
expect(statusSchema.safeParse('done').success).toBe(true);
|
|
194
|
+
expect(statusSchema.safeParse('skipped').success).toBe(true);
|
|
195
|
+
expect(statusSchema.safeParse(undefined).success).toBe(true);
|
|
196
|
+
expect(statusSchema.safeParse('in_progress').success).toBe(false);
|
|
197
|
+
});
|
|
198
|
+
it('confirm_plan and advance_slice are denied for engineers', async () => {
|
|
199
|
+
const plugin = await ClaudeManagerPlugin({
|
|
200
|
+
worktree: '/tmp/project',
|
|
201
|
+
});
|
|
202
|
+
const config = {};
|
|
203
|
+
await plugin.config?.(config);
|
|
204
|
+
const agents = (config.agent ?? {});
|
|
205
|
+
for (const engineer of ENGINEER_AGENT_NAMES) {
|
|
206
|
+
const agentId = ENGINEER_AGENT_IDS[engineer];
|
|
207
|
+
const agent = agents[agentId];
|
|
208
|
+
expect(agent.permission['confirm_plan']).toBe('deny');
|
|
209
|
+
expect(agent.permission['advance_slice']).toBe('deny');
|
|
210
|
+
}
|
|
211
|
+
});
|
|
147
212
|
it('exposes hooks for CTO team tracking and wrapper memory injection', async () => {
|
|
148
213
|
const plugin = await ClaudeManagerPlugin({
|
|
149
214
|
worktree: '/tmp/project',
|
|
@@ -270,3 +335,109 @@ describe('Agent ID normalization and lookup helpers', () => {
|
|
|
270
335
|
expect(agents['browser-qa']).toBeDefined();
|
|
271
336
|
});
|
|
272
337
|
});
|
|
338
|
+
describe('confirm_plan and advance_slice tool execution', () => {
|
|
339
|
+
let tempRoot;
|
|
340
|
+
afterEach(async () => {
|
|
341
|
+
clearPluginServices();
|
|
342
|
+
if (tempRoot) {
|
|
343
|
+
await rm(tempRoot, { recursive: true, force: true });
|
|
344
|
+
}
|
|
345
|
+
});
|
|
346
|
+
it('confirm_plan persists an active plan and returns it as JSON', async () => {
|
|
347
|
+
tempRoot = await mkdtemp(join(tmpdir(), 'plugin-exec-'));
|
|
348
|
+
const plugin = await ClaudeManagerPlugin({ worktree: tempRoot });
|
|
349
|
+
const tools = plugin.tool;
|
|
350
|
+
const context = {
|
|
351
|
+
sessionID: 'cto-sess-confirm',
|
|
352
|
+
worktree: tempRoot,
|
|
353
|
+
agent: AGENT_CTO,
|
|
354
|
+
metadata: vi.fn(),
|
|
355
|
+
};
|
|
356
|
+
const result = await tools['confirm_plan'].execute({
|
|
357
|
+
summary: 'Add billing history',
|
|
358
|
+
taskSize: 'large',
|
|
359
|
+
slices: ['user can view invoices', 'user can update payment method'],
|
|
360
|
+
preAuthorized: false,
|
|
361
|
+
}, context);
|
|
362
|
+
const activePlan = JSON.parse(result);
|
|
363
|
+
expect(activePlan['summary']).toBe('Add billing history');
|
|
364
|
+
expect(activePlan['taskSize']).toBe('large');
|
|
365
|
+
expect(activePlan['currentSliceIndex']).toBe(0);
|
|
366
|
+
expect(activePlan['preAuthorized']).toBe(false);
|
|
367
|
+
expect(activePlan['slices'].length).toBe(2);
|
|
368
|
+
});
|
|
369
|
+
it('advance_slice marks a slice done and returns updated plan state', async () => {
|
|
370
|
+
tempRoot = await mkdtemp(join(tmpdir(), 'plugin-exec-'));
|
|
371
|
+
const plugin = await ClaudeManagerPlugin({ worktree: tempRoot });
|
|
372
|
+
const tools = plugin.tool;
|
|
373
|
+
const context = {
|
|
374
|
+
sessionID: 'cto-sess-advance',
|
|
375
|
+
worktree: tempRoot,
|
|
376
|
+
agent: AGENT_CTO,
|
|
377
|
+
metadata: vi.fn(),
|
|
378
|
+
};
|
|
379
|
+
// Set up plan with two slices
|
|
380
|
+
await tools['confirm_plan'].execute({
|
|
381
|
+
summary: 'Two-slice task',
|
|
382
|
+
taskSize: 'large',
|
|
383
|
+
slices: ['user can log in', 'user can log out'],
|
|
384
|
+
preAuthorized: false,
|
|
385
|
+
}, context);
|
|
386
|
+
// Advance non-final slice 0
|
|
387
|
+
const result = await tools['advance_slice'].execute({ sliceIndex: 0, status: 'done' }, context);
|
|
388
|
+
const payload = JSON.parse(result);
|
|
389
|
+
const slices = payload.activePlan['slices'];
|
|
390
|
+
expect(slices[0]['status']).toBe('done');
|
|
391
|
+
expect(payload.activePlan['currentSliceIndex']).toBe(1);
|
|
392
|
+
});
|
|
393
|
+
it('advance_slice sets currentSliceIndex to null when completing the final slice', async () => {
|
|
394
|
+
tempRoot = await mkdtemp(join(tmpdir(), 'plugin-exec-'));
|
|
395
|
+
const plugin = await ClaudeManagerPlugin({ worktree: tempRoot });
|
|
396
|
+
const tools = plugin.tool;
|
|
397
|
+
const context = {
|
|
398
|
+
sessionID: 'cto-sess-final',
|
|
399
|
+
worktree: tempRoot,
|
|
400
|
+
agent: AGENT_CTO,
|
|
401
|
+
metadata: vi.fn(),
|
|
402
|
+
};
|
|
403
|
+
await tools['confirm_plan'].execute({
|
|
404
|
+
summary: 'Single-slice task',
|
|
405
|
+
taskSize: 'large',
|
|
406
|
+
slices: ['ship the feature'],
|
|
407
|
+
preAuthorized: true,
|
|
408
|
+
}, context);
|
|
409
|
+
const result = await tools['advance_slice'].execute({ sliceIndex: 0, status: 'done' }, context);
|
|
410
|
+
const payload = JSON.parse(result);
|
|
411
|
+
expect(payload.activePlan['currentSliceIndex']).toBeNull();
|
|
412
|
+
});
|
|
413
|
+
it('advance_slice throws when there is no active plan', async () => {
|
|
414
|
+
tempRoot = await mkdtemp(join(tmpdir(), 'plugin-exec-'));
|
|
415
|
+
const plugin = await ClaudeManagerPlugin({ worktree: tempRoot });
|
|
416
|
+
const tools = plugin.tool;
|
|
417
|
+
const context = {
|
|
418
|
+
sessionID: 'cto-sess-no-plan',
|
|
419
|
+
worktree: tempRoot,
|
|
420
|
+
agent: AGENT_CTO,
|
|
421
|
+
metadata: vi.fn(),
|
|
422
|
+
};
|
|
423
|
+
await expect(tools['advance_slice'].execute({ sliceIndex: 0 }, context)).rejects.toThrow('has no active plan');
|
|
424
|
+
});
|
|
425
|
+
it('advance_slice throws when the slice index is invalid', async () => {
|
|
426
|
+
tempRoot = await mkdtemp(join(tmpdir(), 'plugin-exec-'));
|
|
427
|
+
const plugin = await ClaudeManagerPlugin({ worktree: tempRoot });
|
|
428
|
+
const tools = plugin.tool;
|
|
429
|
+
const context = {
|
|
430
|
+
sessionID: 'cto-sess-bad-idx',
|
|
431
|
+
worktree: tempRoot,
|
|
432
|
+
agent: AGENT_CTO,
|
|
433
|
+
metadata: vi.fn(),
|
|
434
|
+
};
|
|
435
|
+
await tools['confirm_plan'].execute({
|
|
436
|
+
summary: 'Two-slice task',
|
|
437
|
+
taskSize: 'large',
|
|
438
|
+
slices: ['slice A', 'slice B'],
|
|
439
|
+
preAuthorized: false,
|
|
440
|
+
}, context);
|
|
441
|
+
await expect(tools['advance_slice'].execute({ sliceIndex: 99 }, context)).rejects.toThrow('slice index 99 does not exist');
|
|
442
|
+
});
|
|
443
|
+
});
|
|
@@ -45,6 +45,26 @@ describe('managerPromptRegistry', () => {
|
|
|
45
45
|
expect(managerPromptRegistry.teamPlannerPrompt).toContain('auto-select');
|
|
46
46
|
expect(managerPromptRegistry.teamPlannerPrompt).toContain('engineer');
|
|
47
47
|
});
|
|
48
|
+
it('ctoSystemPrompt includes Confirm step in operating loop before Delegate', () => {
|
|
49
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('Orient → Classify → Plan → Confirm → Delegate');
|
|
50
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('Confirm: Get user buy-in before implementing');
|
|
51
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('recommendedQuestion');
|
|
52
|
+
// Confirm section must appear before Delegate section in the text
|
|
53
|
+
const confirmIdx = managerPromptRegistry.ctoSystemPrompt.indexOf('## Confirm:');
|
|
54
|
+
const delegateIdx = managerPromptRegistry.ctoSystemPrompt.indexOf('## Delegate:');
|
|
55
|
+
expect(confirmIdx).toBeGreaterThan(-1);
|
|
56
|
+
expect(confirmIdx).toBeLessThan(delegateIdx);
|
|
57
|
+
});
|
|
58
|
+
it('engineerAgentPrompt instructs engineers to surface plan deviations', () => {
|
|
59
|
+
expect(managerPromptRegistry.engineerAgentPrompt).toContain('deviation');
|
|
60
|
+
expect(managerPromptRegistry.engineerAgentPrompt).toContain('surface');
|
|
61
|
+
});
|
|
62
|
+
it('browserQaAgentPrompt instructs browser-qa to report scope mismatches', () => {
|
|
63
|
+
expect(managerPromptRegistry.browserQaAgentPrompt).toContain('scope mismatch');
|
|
64
|
+
});
|
|
65
|
+
it('teamPlannerPrompt instructs planner to pass synthesis back unchanged', () => {
|
|
66
|
+
expect(managerPromptRegistry.teamPlannerPrompt).toContain('unchanged');
|
|
67
|
+
});
|
|
48
68
|
it('ctoSystemPrompt delegates single work to named engineers via task() and dual work to team-planner', () => {
|
|
49
69
|
expect(managerPromptRegistry.ctoSystemPrompt).toContain('task(subagent_type:');
|
|
50
70
|
expect(managerPromptRegistry.ctoSystemPrompt).toContain('single-engineer');
|
|
@@ -66,4 +86,36 @@ describe('managerPromptRegistry', () => {
|
|
|
66
86
|
expect(managerPromptRegistry.browserQaSessionPrompt).not.toContain('implement');
|
|
67
87
|
expect(managerPromptRegistry.browserQaSessionPrompt).not.toContain('write code');
|
|
68
88
|
});
|
|
89
|
+
it('ctoSystemPrompt encodes task size classification (trivial/simple/large)', () => {
|
|
90
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('trivial');
|
|
91
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('simple');
|
|
92
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('large');
|
|
93
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('Task size');
|
|
94
|
+
});
|
|
95
|
+
it('ctoSystemPrompt mentions confirm_plan and advance_slice for large task lifecycle', () => {
|
|
96
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('confirm_plan');
|
|
97
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('advance_slice');
|
|
98
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('preAuthorized');
|
|
99
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('vertical slice');
|
|
100
|
+
});
|
|
101
|
+
it('ctoSystemPrompt encodes warn-only context policy', () => {
|
|
102
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('Context warnings');
|
|
103
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('advisory');
|
|
104
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('contextExhausted');
|
|
105
|
+
});
|
|
106
|
+
it('contextWarnings reflect warn-only policy for critical level', () => {
|
|
107
|
+
expect(managerPromptRegistry.contextWarnings.critical).toContain('near capacity');
|
|
108
|
+
expect(managerPromptRegistry.contextWarnings.critical).toContain('Warn only');
|
|
109
|
+
});
|
|
110
|
+
it('ctoSystemPrompt uses genuinely vertical slice examples, not horizontal layers', () => {
|
|
111
|
+
// Horizontal layer examples (internal plumbing only) must not appear
|
|
112
|
+
expect(managerPromptRegistry.ctoSystemPrompt).not.toContain('"types + contracts"');
|
|
113
|
+
expect(managerPromptRegistry.ctoSystemPrompt).not.toContain('"core logic"');
|
|
114
|
+
expect(managerPromptRegistry.ctoSystemPrompt).not.toContain('"plugin tools"');
|
|
115
|
+
// Prompt must describe the end-to-end / user-testable property of a slice
|
|
116
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('end-to-end');
|
|
117
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('user-testable');
|
|
118
|
+
// Horizontal layers must be explicitly called out as wrong
|
|
119
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('Horizontal layer');
|
|
120
|
+
});
|
|
69
121
|
});
|
|
@@ -44,7 +44,8 @@ describe('TeamOrchestrator', () => {
|
|
|
44
44
|
outputTokens: 300,
|
|
45
45
|
contextWindowSize: 200_000,
|
|
46
46
|
});
|
|
47
|
-
const
|
|
47
|
+
const store = new TeamStateStore('.state');
|
|
48
|
+
const orchestrator = new TeamOrchestrator({ runTask }, store, { appendEvents: vi.fn(async () => { }) }, 'Base engineer prompt', 'Synthesis prompt', { BrowserQA: BROWSER_QA_TEST_CAPS });
|
|
48
49
|
const first = await orchestrator.dispatchEngineer({
|
|
49
50
|
teamId: 'team-1',
|
|
50
51
|
cwd: tempRoot,
|
|
@@ -78,6 +79,8 @@ describe('TeamOrchestrator', () => {
|
|
|
78
79
|
expect(runTask.mock.calls[1]?.[0].systemPrompt).toBeUndefined();
|
|
79
80
|
expect(runTask.mock.calls[1]?.[0].prompt).not.toContain('Assigned engineer: Tom.');
|
|
80
81
|
expect(runTask.mock.calls[1]?.[0].prompt).toContain('Implement the chosen fix');
|
|
82
|
+
// Hybrid workflow: implement mode must include a pre-implementation plan step
|
|
83
|
+
expect(runTask.mock.calls[1]?.[0].prompt).toContain('state a brief implementation plan');
|
|
81
84
|
const team = await orchestrator.getOrCreateTeam(tempRoot, 'team-1');
|
|
82
85
|
expect(team.engineers.find((engineer) => engineer.name === 'Tom')).toMatchObject({
|
|
83
86
|
claudeSessionId: 'ses_tom',
|
|
@@ -301,7 +304,8 @@ describe('TeamOrchestrator', () => {
|
|
|
301
304
|
contextWindowSize: 200_000,
|
|
302
305
|
};
|
|
303
306
|
});
|
|
304
|
-
const
|
|
307
|
+
const store = new TeamStateStore('.state');
|
|
308
|
+
const orchestrator = new TeamOrchestrator({ runTask }, store, { appendEvents: vi.fn(async () => { }) }, 'Base engineer prompt', 'Synthesis prompt', { BrowserQA: BROWSER_QA_TEST_CAPS });
|
|
305
309
|
const allEvents = [];
|
|
306
310
|
const result = await orchestrator.dispatchEngineer({
|
|
307
311
|
teamId: 'team-1',
|
|
@@ -339,6 +343,158 @@ describe('TeamOrchestrator', () => {
|
|
|
339
343
|
expect(error.message).toContain('BrowserQA is a browser QA specialist');
|
|
340
344
|
expect(error.message).toContain('does not support implement mode');
|
|
341
345
|
});
|
|
346
|
+
it('setActivePlan persists plan with slices on TeamRecord', async () => {
|
|
347
|
+
tempRoot = await mkdtemp(join(tmpdir(), 'team-orchestrator-'));
|
|
348
|
+
const orchestrator = new TeamOrchestrator({ runTask: vi.fn() }, new TeamStateStore('.state'), { appendEvents: vi.fn(async () => { }) }, 'Base engineer prompt', 'Synthesis prompt', { BrowserQA: BROWSER_QA_TEST_CAPS });
|
|
349
|
+
await orchestrator.getOrCreateTeam(tempRoot, 'team-1');
|
|
350
|
+
const activePlan = await orchestrator.setActivePlan(tempRoot, 'team-1', {
|
|
351
|
+
summary: 'Implement billing refactor in three slices',
|
|
352
|
+
taskSize: 'large',
|
|
353
|
+
slices: ['types + contracts', 'core logic', 'tests'],
|
|
354
|
+
preAuthorized: false,
|
|
355
|
+
});
|
|
356
|
+
expect(activePlan.id).toMatch(/^plan-/);
|
|
357
|
+
expect(activePlan.summary).toBe('Implement billing refactor in three slices');
|
|
358
|
+
expect(activePlan.taskSize).toBe('large');
|
|
359
|
+
expect(activePlan.preAuthorized).toBe(false);
|
|
360
|
+
expect(activePlan.slices).toHaveLength(3);
|
|
361
|
+
expect(activePlan.slices[0]).toMatchObject({
|
|
362
|
+
index: 0,
|
|
363
|
+
description: 'types + contracts',
|
|
364
|
+
status: 'pending',
|
|
365
|
+
});
|
|
366
|
+
expect(activePlan.slices[1]).toMatchObject({
|
|
367
|
+
index: 1,
|
|
368
|
+
description: 'core logic',
|
|
369
|
+
status: 'pending',
|
|
370
|
+
});
|
|
371
|
+
expect(activePlan.slices[2]).toMatchObject({
|
|
372
|
+
index: 2,
|
|
373
|
+
description: 'tests',
|
|
374
|
+
status: 'pending',
|
|
375
|
+
});
|
|
376
|
+
expect(activePlan.currentSliceIndex).toBe(0);
|
|
377
|
+
expect(activePlan.confirmedAt).not.toBeNull();
|
|
378
|
+
const retrieved = await orchestrator.getActivePlan(tempRoot, 'team-1');
|
|
379
|
+
expect(retrieved).toMatchObject({ id: activePlan.id, taskSize: 'large' });
|
|
380
|
+
});
|
|
381
|
+
it('clearActivePlan removes activePlan from TeamRecord', async () => {
|
|
382
|
+
tempRoot = await mkdtemp(join(tmpdir(), 'team-orchestrator-'));
|
|
383
|
+
const orchestrator = new TeamOrchestrator({ runTask: vi.fn() }, new TeamStateStore('.state'), { appendEvents: vi.fn(async () => { }) }, 'Base engineer prompt', 'Synthesis prompt', { BrowserQA: BROWSER_QA_TEST_CAPS });
|
|
384
|
+
await orchestrator.getOrCreateTeam(tempRoot, 'team-1');
|
|
385
|
+
await orchestrator.setActivePlan(tempRoot, 'team-1', {
|
|
386
|
+
summary: 'Small task',
|
|
387
|
+
taskSize: 'simple',
|
|
388
|
+
slices: [],
|
|
389
|
+
preAuthorized: false,
|
|
390
|
+
});
|
|
391
|
+
const beforeClear = await orchestrator.getActivePlan(tempRoot, 'team-1');
|
|
392
|
+
expect(beforeClear).not.toBeNull();
|
|
393
|
+
await orchestrator.clearActivePlan(tempRoot, 'team-1');
|
|
394
|
+
const afterClear = await orchestrator.getActivePlan(tempRoot, 'team-1');
|
|
395
|
+
expect(afterClear).toBeNull();
|
|
396
|
+
});
|
|
397
|
+
it('updateActivePlanSlice marks a slice done and advances currentSliceIndex', async () => {
|
|
398
|
+
tempRoot = await mkdtemp(join(tmpdir(), 'team-orchestrator-'));
|
|
399
|
+
const orchestrator = new TeamOrchestrator({ runTask: vi.fn() }, new TeamStateStore('.state'), { appendEvents: vi.fn(async () => { }) }, 'Base engineer prompt', 'Synthesis prompt', { BrowserQA: BROWSER_QA_TEST_CAPS });
|
|
400
|
+
await orchestrator.getOrCreateTeam(tempRoot, 'team-1');
|
|
401
|
+
await orchestrator.setActivePlan(tempRoot, 'team-1', {
|
|
402
|
+
summary: 'Two-slice task',
|
|
403
|
+
taskSize: 'large',
|
|
404
|
+
slices: ['slice A', 'slice B'],
|
|
405
|
+
preAuthorized: true,
|
|
406
|
+
});
|
|
407
|
+
await orchestrator.updateActivePlanSlice(tempRoot, 'team-1', 0, 'done');
|
|
408
|
+
const plan = await orchestrator.getActivePlan(tempRoot, 'team-1');
|
|
409
|
+
expect(plan).not.toBeNull();
|
|
410
|
+
expect(plan.slices[0]).toMatchObject({ index: 0, status: 'done' });
|
|
411
|
+
expect(plan.slices[0].completedAt).toBeDefined();
|
|
412
|
+
expect(plan.slices[1]).toMatchObject({ index: 1, status: 'pending' });
|
|
413
|
+
expect(plan.currentSliceIndex).toBe(1);
|
|
414
|
+
});
|
|
415
|
+
it('updateActivePlanSlice sets currentSliceIndex to null when the final slice is completed', async () => {
|
|
416
|
+
tempRoot = await mkdtemp(join(tmpdir(), 'team-orchestrator-'));
|
|
417
|
+
const orchestrator = new TeamOrchestrator({ runTask: vi.fn() }, new TeamStateStore('.state'), { appendEvents: vi.fn(async () => { }) }, 'Base engineer prompt', 'Synthesis prompt', { BrowserQA: BROWSER_QA_TEST_CAPS });
|
|
418
|
+
await orchestrator.getOrCreateTeam(tempRoot, 'team-1');
|
|
419
|
+
await orchestrator.setActivePlan(tempRoot, 'team-1', {
|
|
420
|
+
summary: 'Single-slice task',
|
|
421
|
+
taskSize: 'large',
|
|
422
|
+
slices: ['ship the feature'],
|
|
423
|
+
preAuthorized: true,
|
|
424
|
+
});
|
|
425
|
+
// Complete the only slice (index 0, which is also the last)
|
|
426
|
+
await orchestrator.updateActivePlanSlice(tempRoot, 'team-1', 0, 'done');
|
|
427
|
+
const plan = await orchestrator.getActivePlan(tempRoot, 'team-1');
|
|
428
|
+
expect(plan).not.toBeNull();
|
|
429
|
+
expect(plan.slices[0]).toMatchObject({ index: 0, status: 'done' });
|
|
430
|
+
expect(plan.currentSliceIndex).toBeNull();
|
|
431
|
+
});
|
|
432
|
+
it('setActivePlan sets currentSliceIndex to null when no slices are provided', async () => {
|
|
433
|
+
tempRoot = await mkdtemp(join(tmpdir(), 'team-orchestrator-'));
|
|
434
|
+
const orchestrator = new TeamOrchestrator({ runTask: vi.fn() }, new TeamStateStore('.state'), { appendEvents: vi.fn(async () => { }) }, 'Base engineer prompt', 'Synthesis prompt', { BrowserQA: BROWSER_QA_TEST_CAPS });
|
|
435
|
+
await orchestrator.getOrCreateTeam(tempRoot, 'team-1');
|
|
436
|
+
const plan = await orchestrator.setActivePlan(tempRoot, 'team-1', {
|
|
437
|
+
summary: 'Simple no-slice task',
|
|
438
|
+
taskSize: 'simple',
|
|
439
|
+
slices: [],
|
|
440
|
+
preAuthorized: false,
|
|
441
|
+
});
|
|
442
|
+
expect(plan.currentSliceIndex).toBeNull();
|
|
443
|
+
expect(plan.slices).toHaveLength(0);
|
|
444
|
+
});
|
|
445
|
+
it('updateActivePlanSlice throws when team has no active plan', async () => {
|
|
446
|
+
tempRoot = await mkdtemp(join(tmpdir(), 'team-orchestrator-'));
|
|
447
|
+
const orchestrator = new TeamOrchestrator({ runTask: vi.fn() }, new TeamStateStore('.state'), { appendEvents: vi.fn(async () => { }) }, 'Base engineer prompt', 'Synthesis prompt', { BrowserQA: BROWSER_QA_TEST_CAPS });
|
|
448
|
+
await orchestrator.getOrCreateTeam(tempRoot, 'team-1');
|
|
449
|
+
await expect(orchestrator.updateActivePlanSlice(tempRoot, 'team-1', 0, 'done')).rejects.toThrow('has no active plan');
|
|
450
|
+
});
|
|
451
|
+
it('updateActivePlanSlice throws when slice index does not exist', async () => {
|
|
452
|
+
tempRoot = await mkdtemp(join(tmpdir(), 'team-orchestrator-'));
|
|
453
|
+
const orchestrator = new TeamOrchestrator({ runTask: vi.fn() }, new TeamStateStore('.state'), { appendEvents: vi.fn(async () => { }) }, 'Base engineer prompt', 'Synthesis prompt', { BrowserQA: BROWSER_QA_TEST_CAPS });
|
|
454
|
+
await orchestrator.getOrCreateTeam(tempRoot, 'team-1');
|
|
455
|
+
await orchestrator.setActivePlan(tempRoot, 'team-1', {
|
|
456
|
+
summary: 'Two-slice task',
|
|
457
|
+
taskSize: 'large',
|
|
458
|
+
slices: ['slice A', 'slice B'],
|
|
459
|
+
preAuthorized: false,
|
|
460
|
+
});
|
|
461
|
+
// Index 5 does not exist (only 0 and 1 exist)
|
|
462
|
+
await expect(orchestrator.updateActivePlanSlice(tempRoot, 'team-1', 5, 'done')).rejects.toThrow('slice index 5 does not exist');
|
|
463
|
+
});
|
|
464
|
+
it('updateActivePlanSlice throws when active plan has no slices', async () => {
|
|
465
|
+
tempRoot = await mkdtemp(join(tmpdir(), 'team-orchestrator-'));
|
|
466
|
+
const orchestrator = new TeamOrchestrator({ runTask: vi.fn() }, new TeamStateStore('.state'), { appendEvents: vi.fn(async () => { }) }, 'Base engineer prompt', 'Synthesis prompt', { BrowserQA: BROWSER_QA_TEST_CAPS });
|
|
467
|
+
await orchestrator.getOrCreateTeam(tempRoot, 'team-1');
|
|
468
|
+
await orchestrator.setActivePlan(tempRoot, 'team-1', {
|
|
469
|
+
summary: 'No-slice plan',
|
|
470
|
+
taskSize: 'simple',
|
|
471
|
+
slices: [],
|
|
472
|
+
preAuthorized: false,
|
|
473
|
+
});
|
|
474
|
+
await expect(orchestrator.updateActivePlanSlice(tempRoot, 'team-1', 0, 'done')).rejects.toThrow('plan has no slices');
|
|
475
|
+
});
|
|
476
|
+
it('getActivePlan returns null for a new team with no active plan', async () => {
|
|
477
|
+
tempRoot = await mkdtemp(join(tmpdir(), 'team-orchestrator-'));
|
|
478
|
+
const orchestrator = new TeamOrchestrator({ runTask: vi.fn() }, new TeamStateStore('.state'), { appendEvents: vi.fn(async () => { }) }, 'Base engineer prompt', 'Synthesis prompt', { BrowserQA: BROWSER_QA_TEST_CAPS });
|
|
479
|
+
const plan = await orchestrator.getActivePlan(tempRoot, 'team-1');
|
|
480
|
+
expect(plan).toBeNull();
|
|
481
|
+
});
|
|
482
|
+
it('normalizeTeamRecord preserves activePlan from persisted records', async () => {
|
|
483
|
+
tempRoot = await mkdtemp(join(tmpdir(), 'team-orchestrator-'));
|
|
484
|
+
const store = new TeamStateStore('.state');
|
|
485
|
+
const orchestrator = new TeamOrchestrator({ runTask: vi.fn() }, store, { appendEvents: vi.fn(async () => { }) }, 'Base engineer prompt', 'Synthesis prompt', { BrowserQA: BROWSER_QA_TEST_CAPS });
|
|
486
|
+
await orchestrator.getOrCreateTeam(tempRoot, 'team-1');
|
|
487
|
+
await orchestrator.setActivePlan(tempRoot, 'team-1', {
|
|
488
|
+
summary: 'Persist test',
|
|
489
|
+
taskSize: 'simple',
|
|
490
|
+
slices: [],
|
|
491
|
+
preAuthorized: false,
|
|
492
|
+
});
|
|
493
|
+
// Re-read via getOrCreateTeam (triggers normalizeTeamRecord)
|
|
494
|
+
const team = await orchestrator.getOrCreateTeam(tempRoot, 'team-1');
|
|
495
|
+
expect(team.activePlan).toBeDefined();
|
|
496
|
+
expect(team.activePlan.summary).toBe('Persist test');
|
|
497
|
+
});
|
|
342
498
|
it('selectPlanEngineers excludes BrowserQA from planner selection', async () => {
|
|
343
499
|
tempRoot = await mkdtemp(join(tmpdir(), 'planner-exclude-browserqa-'));
|
|
344
500
|
const orchestrator = new TeamOrchestrator({ runTask: vi.fn() }, new TeamStateStore('.state'), { appendEvents: vi.fn(async () => { }) }, 'Engineer prompt', 'Synthesis prompt', { BrowserQA: BROWSER_QA_TEST_CAPS });
|
|
@@ -116,6 +116,24 @@ export interface SessionContextSnapshot {
|
|
|
116
116
|
warningLevel: ContextWarningLevel;
|
|
117
117
|
compactionCount: number;
|
|
118
118
|
}
|
|
119
|
+
export type TaskSize = 'trivial' | 'simple' | 'large';
|
|
120
|
+
export interface PlanSlice {
|
|
121
|
+
index: number;
|
|
122
|
+
description: string;
|
|
123
|
+
status: 'pending' | 'in_progress' | 'done' | 'skipped';
|
|
124
|
+
completedAt?: string;
|
|
125
|
+
}
|
|
126
|
+
export interface ActivePlan {
|
|
127
|
+
id: string;
|
|
128
|
+
summary: string;
|
|
129
|
+
taskSize: TaskSize;
|
|
130
|
+
createdAt: string;
|
|
131
|
+
confirmedAt: string | null;
|
|
132
|
+
preAuthorized: boolean;
|
|
133
|
+
slices: PlanSlice[];
|
|
134
|
+
/** Null when the plan has no slices (trivial/simple tasks). */
|
|
135
|
+
currentSliceIndex: number | null;
|
|
136
|
+
}
|
|
119
137
|
export interface TeamEngineerRecord {
|
|
120
138
|
name: EngineerName;
|
|
121
139
|
wrapperSessionId: string | null;
|
|
@@ -142,6 +160,7 @@ export interface TeamRecord {
|
|
|
142
160
|
createdAt: string;
|
|
143
161
|
updatedAt: string;
|
|
144
162
|
engineers: TeamEngineerRecord[];
|
|
163
|
+
activePlan?: ActivePlan;
|
|
145
164
|
}
|
|
146
165
|
export interface EngineerTaskResult {
|
|
147
166
|
teamId: string;
|