@doingdev/opencode-claude-manager-plugin 0.1.59 → 0.1.61
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -3
- package/dist/claude/claude-agent-sdk-adapter.d.ts +3 -1
- package/dist/claude/claude-agent-sdk-adapter.js +57 -6
- package/dist/manager/team-orchestrator.d.ts +10 -1
- package/dist/manager/team-orchestrator.js +86 -4
- package/dist/plugin/agents/team-planner.js +1 -1
- package/dist/plugin/claude-manager.plugin.js +14 -0
- package/dist/plugin/service-factory.d.ts +1 -0
- package/dist/plugin/service-factory.js +3 -1
- package/dist/prompts/registry.js +38 -20
- package/dist/src/claude/claude-agent-sdk-adapter.d.ts +3 -1
- package/dist/src/claude/claude-agent-sdk-adapter.js +57 -6
- package/dist/src/manager/team-orchestrator.d.ts +10 -1
- package/dist/src/manager/team-orchestrator.js +86 -4
- package/dist/src/plugin/agents/team-planner.js +1 -1
- package/dist/src/plugin/claude-manager.plugin.js +14 -0
- package/dist/src/plugin/service-factory.d.ts +1 -0
- package/dist/src/plugin/service-factory.js +3 -1
- package/dist/src/prompts/registry.js +38 -20
- package/dist/src/types/contracts.d.ts +22 -3
- package/dist/src/util/fs-helpers.d.ts +6 -0
- package/dist/src/util/fs-helpers.js +11 -0
- package/dist/test/claude-agent-sdk-adapter.test.js +118 -1
- package/dist/test/claude-manager.plugin.test.js +47 -3
- package/dist/test/fs-helpers.test.d.ts +1 -0
- package/dist/test/fs-helpers.test.js +56 -0
- package/dist/test/prompt-registry.test.js +54 -6
- package/dist/test/team-orchestrator.test.js +176 -2
- package/dist/types/contracts.d.ts +22 -3
- package/dist/util/fs-helpers.d.ts +6 -0
- package/dist/util/fs-helpers.js +11 -0
- package/package.json +1 -1
|
@@ -219,6 +219,9 @@ export class TeamOrchestrator {
|
|
|
219
219
|
else if (message.includes('context') || message.includes('token limit')) {
|
|
220
220
|
failureKind = 'contextExhausted';
|
|
221
221
|
}
|
|
222
|
+
else if (message.includes('does not support implement mode')) {
|
|
223
|
+
failureKind = 'modeNotSupported';
|
|
224
|
+
}
|
|
222
225
|
else if (message.includes('denied') || message.includes('not allowed')) {
|
|
223
226
|
failureKind = 'toolDenied';
|
|
224
227
|
}
|
|
@@ -383,6 +386,81 @@ export class TeamOrchestrator {
|
|
|
383
386
|
}
|
|
384
387
|
return { lead, challenger };
|
|
385
388
|
}
|
|
389
|
+
async getActivePlan(cwd, teamId) {
|
|
390
|
+
const team = await this.getOrCreateTeam(cwd, teamId);
|
|
391
|
+
return team.activePlan ?? null;
|
|
392
|
+
}
|
|
393
|
+
async setActivePlan(cwd, teamId, plan) {
|
|
394
|
+
await this.getOrCreateTeam(cwd, teamId);
|
|
395
|
+
const now = new Date().toISOString();
|
|
396
|
+
const slices = plan.slices.map((description, index) => ({
|
|
397
|
+
index,
|
|
398
|
+
description,
|
|
399
|
+
status: 'pending',
|
|
400
|
+
}));
|
|
401
|
+
const activePlan = {
|
|
402
|
+
id: `plan-${Date.now()}`,
|
|
403
|
+
summary: plan.summary,
|
|
404
|
+
taskSize: plan.taskSize,
|
|
405
|
+
createdAt: now,
|
|
406
|
+
confirmedAt: now,
|
|
407
|
+
preAuthorized: plan.preAuthorized,
|
|
408
|
+
slices,
|
|
409
|
+
currentSliceIndex: slices.length > 0 ? 0 : null,
|
|
410
|
+
};
|
|
411
|
+
await this.teamStore.updateTeam(cwd, teamId, (team) => ({
|
|
412
|
+
...team,
|
|
413
|
+
updatedAt: now,
|
|
414
|
+
activePlan,
|
|
415
|
+
}));
|
|
416
|
+
return activePlan;
|
|
417
|
+
}
|
|
418
|
+
async clearActivePlan(cwd, teamId) {
|
|
419
|
+
await this.getOrCreateTeam(cwd, teamId);
|
|
420
|
+
const now = new Date().toISOString();
|
|
421
|
+
await this.teamStore.updateTeam(cwd, teamId, (team) => ({
|
|
422
|
+
...team,
|
|
423
|
+
updatedAt: now,
|
|
424
|
+
activePlan: undefined,
|
|
425
|
+
}));
|
|
426
|
+
}
|
|
427
|
+
async updateActivePlanSlice(cwd, teamId, sliceIndex, status) {
|
|
428
|
+
await this.getOrCreateTeam(cwd, teamId);
|
|
429
|
+
const now = new Date().toISOString();
|
|
430
|
+
await this.teamStore.updateTeam(cwd, teamId, (team) => {
|
|
431
|
+
if (!team.activePlan) {
|
|
432
|
+
throw new Error(`Cannot update slice: team "${teamId}" has no active plan. Persist an active plan before updating slices.`);
|
|
433
|
+
}
|
|
434
|
+
const sliceExists = team.activePlan.slices.some((s) => s.index === sliceIndex);
|
|
435
|
+
if (!sliceExists) {
|
|
436
|
+
const sliceCount = team.activePlan.slices.length;
|
|
437
|
+
const rangeMsg = sliceCount === 0 ? 'plan has no slices' : `valid range: 0–${sliceCount - 1}`;
|
|
438
|
+
throw new Error(`Cannot update slice: slice index ${sliceIndex} does not exist in active plan "${team.activePlan.id}" (${rangeMsg}).`);
|
|
439
|
+
}
|
|
440
|
+
const slices = team.activePlan.slices.map((s) => s.index === sliceIndex
|
|
441
|
+
? {
|
|
442
|
+
...s,
|
|
443
|
+
status,
|
|
444
|
+
...(status === 'done' || status === 'skipped' ? { completedAt: now } : {}),
|
|
445
|
+
}
|
|
446
|
+
: s);
|
|
447
|
+
const isLastSlice = sliceIndex === team.activePlan.slices.length - 1;
|
|
448
|
+
const nextIndex = status === 'done' || status === 'skipped'
|
|
449
|
+
? isLastSlice
|
|
450
|
+
? null
|
|
451
|
+
: sliceIndex + 1
|
|
452
|
+
: team.activePlan.currentSliceIndex;
|
|
453
|
+
return {
|
|
454
|
+
...team,
|
|
455
|
+
updatedAt: now,
|
|
456
|
+
activePlan: {
|
|
457
|
+
...team.activePlan,
|
|
458
|
+
slices,
|
|
459
|
+
currentSliceIndex: nextIndex,
|
|
460
|
+
},
|
|
461
|
+
};
|
|
462
|
+
});
|
|
463
|
+
}
|
|
386
464
|
buildSessionSystemPrompt(engineer, mode) {
|
|
387
465
|
const specialistPrompt = this.workerCapabilities[engineer]?.sessionPrompt;
|
|
388
466
|
if (specialistPrompt) {
|
|
@@ -408,15 +486,17 @@ function buildModeInstruction(mode) {
|
|
|
408
486
|
switch (mode) {
|
|
409
487
|
case 'explore':
|
|
410
488
|
return [
|
|
411
|
-
'
|
|
412
|
-
'Read, search, and reason about the codebase.',
|
|
413
|
-
'
|
|
489
|
+
'Exploration mode.',
|
|
490
|
+
'Read, search, and reason about the codebase without editing files.',
|
|
491
|
+
'The caller should specify the desired output for this exploration task, such as root cause, findings, affected files, options, risk review, or a concrete plan.',
|
|
492
|
+
'If the caller does not specify the output shape, return concise findings, relevant file paths, open questions, and the recommended next step.',
|
|
414
493
|
'Do not create or edit files.',
|
|
415
494
|
].join(' ');
|
|
416
495
|
case 'implement':
|
|
417
496
|
return [
|
|
418
497
|
'Implementation mode.',
|
|
419
|
-
'
|
|
498
|
+
'Before making any edits, state a brief implementation plan: which files you will change, what each change does, and why.',
|
|
499
|
+
'Then make the changes, run the most relevant verification (tests, lint, typecheck), and report what changed and what you verified.',
|
|
420
500
|
'Before reporting done, review your own diff for issues that pass tests but break in production.',
|
|
421
501
|
].join(' ');
|
|
422
502
|
case 'verify':
|
|
@@ -502,6 +582,8 @@ export function getFailureGuidanceText(failureKind) {
|
|
|
502
582
|
return 'This engineer is currently working on another assignment. Wait for them to finish, choose a different engineer, or try again shortly.';
|
|
503
583
|
case 'toolDenied':
|
|
504
584
|
return 'A tool permission was denied during the assignment. Check the approval policy and tool permissions, then retry.';
|
|
585
|
+
case 'modeNotSupported':
|
|
586
|
+
return 'This engineer does not support the requested work mode. BrowserQA only supports explore and verify modes — use a general engineer (Tom, John, Maya, Sara, Alex) for implement tasks.';
|
|
505
587
|
case 'aborted':
|
|
506
588
|
return 'The assignment was cancelled by the user or an abort signal was triggered. Review the request and try again.';
|
|
507
589
|
case 'sdkError':
|
|
@@ -13,7 +13,7 @@ function buildTeamPlannerPermissions() {
|
|
|
13
13
|
}
|
|
14
14
|
export function buildTeamPlannerAgentConfig(prompts) {
|
|
15
15
|
return {
|
|
16
|
-
description: '
|
|
16
|
+
description: 'Thin planning wrapper that calls plan_with_team for dual-engineer synthesis with live UI activity.',
|
|
17
17
|
mode: 'subagent',
|
|
18
18
|
hidden: false,
|
|
19
19
|
color: '#D97757',
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { tool } from '@opencode-ai/plugin';
|
|
2
2
|
import { managerPromptRegistry } from '../prompts/registry.js';
|
|
3
|
+
import { appendDebugLog } from '../util/fs-helpers.js';
|
|
3
4
|
import { isEngineerName } from '../team/roster.js';
|
|
4
5
|
import { TeamOrchestrator, createActionableError, getFailureGuidanceText, } from '../manager/team-orchestrator.js';
|
|
5
6
|
import { AGENT_BROWSER_QA, AGENT_CTO, AGENT_TEAM_PLANNER, buildBrowserQaAgentConfig, buildCtoAgentConfig, buildEngineerAgentConfig, buildTeamPlannerAgentConfig, denyRestrictedToolsGlobally, ENGINEER_AGENT_IDS, ENGINEER_AGENT_NAMES, } from './agents/index.js';
|
|
@@ -456,6 +457,19 @@ async function runEngineerAssignment(input, context) {
|
|
|
456
457
|
guidance,
|
|
457
458
|
},
|
|
458
459
|
});
|
|
460
|
+
try {
|
|
461
|
+
await appendDebugLog(services.debugLogPath, {
|
|
462
|
+
type: 'engineer_failure',
|
|
463
|
+
engineer: failure.engineer,
|
|
464
|
+
teamId: failure.teamId,
|
|
465
|
+
mode: failure.mode,
|
|
466
|
+
failureKind: failure.failureKind,
|
|
467
|
+
message: failure.message.slice(0, 300),
|
|
468
|
+
});
|
|
469
|
+
}
|
|
470
|
+
catch {
|
|
471
|
+
// Log write failures must not mask the original error.
|
|
472
|
+
}
|
|
459
473
|
throw createActionableError(failure, error);
|
|
460
474
|
}
|
|
461
475
|
await services.orchestrator.recordWrapperExchange(context.worktree, input.teamId, input.engineer, context.sessionID, input.mode, input.message, result.finalText);
|
|
@@ -11,6 +11,7 @@ interface ClaudeManagerPluginServices {
|
|
|
11
11
|
teamStore: TeamStateStore;
|
|
12
12
|
orchestrator: TeamOrchestrator;
|
|
13
13
|
workerCapabilities: Partial<Record<EngineerName, WorkerCapabilities>>;
|
|
14
|
+
debugLogPath: string;
|
|
14
15
|
}
|
|
15
16
|
export declare function getOrCreatePluginServices(worktree: string): ClaudeManagerPluginServices;
|
|
16
17
|
export declare function clearPluginServices(): void;
|
|
@@ -21,8 +21,9 @@ export function getOrCreatePluginServices(worktree) {
|
|
|
21
21
|
return existing;
|
|
22
22
|
}
|
|
23
23
|
const approvalPolicyPath = path.join(worktree, '.claude-manager', 'approval-policy.json');
|
|
24
|
+
const debugLogPath = path.join(worktree, '.claude-manager', 'debug.log');
|
|
24
25
|
const approvalManager = new ToolApprovalManager(undefined, undefined, approvalPolicyPath);
|
|
25
|
-
const sdkAdapter = new ClaudeAgentSdkAdapter(undefined, approvalManager);
|
|
26
|
+
const sdkAdapter = new ClaudeAgentSdkAdapter(undefined, approvalManager, debugLogPath);
|
|
26
27
|
const sessionService = new ClaudeSessionService(sdkAdapter);
|
|
27
28
|
const gitOps = new GitOperations(worktree);
|
|
28
29
|
const teamStore = new TeamStateStore();
|
|
@@ -37,6 +38,7 @@ export function getOrCreatePluginServices(worktree) {
|
|
|
37
38
|
teamStore,
|
|
38
39
|
orchestrator,
|
|
39
40
|
workerCapabilities,
|
|
41
|
+
debugLogPath,
|
|
40
42
|
};
|
|
41
43
|
serviceRegistry.set(worktree, services);
|
|
42
44
|
return services;
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
export const managerPromptRegistry = {
|
|
2
2
|
ctoSystemPrompt: [
|
|
3
3
|
'You are a principal engineer orchestrating a team of AI-powered engineers.',
|
|
4
|
-
'Your role is to
|
|
4
|
+
'Your role is to investigate first, delegate precisely, review diffs for production risks, and verify outcomes.',
|
|
5
5
|
'You do not write code. All edits go through engineers. You multiply output by coordinating parallel work and catching issues others miss.',
|
|
6
6
|
'',
|
|
7
|
-
'# Operating Loop: Orient →
|
|
7
|
+
'# Operating Loop: Orient → Investigate → Decide → Delegate → Review → Verify → Close',
|
|
8
|
+
'Treat this loop as adaptive, not rigid. You may revisit earlier steps, skip unnecessary steps, or improvise when the work demands it—as long as you stay explicit about why.',
|
|
8
9
|
'',
|
|
9
10
|
'## Orient: Understand the request',
|
|
10
11
|
'- Extract what you can from the user message, codebase (read/grep/glob/codesearch), prior engineer results, and `websearch`/`webfetch` when relevant.',
|
|
@@ -13,27 +14,36 @@ export const managerPromptRegistry = {
|
|
|
13
14
|
'- If requirements are vague or architecture is unclear, use `question` tool with 2–3 concrete options, your recommendation, and what breaks if user picks differently.',
|
|
14
15
|
'- Only ask when the decision will materially change scope, architecture, risk, or how you verify—and you cannot resolve it from context.',
|
|
15
16
|
'',
|
|
16
|
-
'##
|
|
17
|
+
'## Investigate: Reduce uncertainty before choosing a path',
|
|
18
|
+
'- Start with the smallest useful investigation. For a bug, get to root cause. For a feature, inspect the existing surface area before inventing a plan.',
|
|
19
|
+
'- You may investigate yourself with read-only tools or delegate exploration to one engineer when that is faster or gives better continuity.',
|
|
20
|
+
'- When delegating exploration, explicitly say what artifact you want back: root cause, findings, affected files, options, risk review, file map, or a concrete plan.',
|
|
21
|
+
'- Do not default exploration to planning. Use planning only when the task is genuinely complex, ambiguous, cross-cutting, or risky.',
|
|
22
|
+
'',
|
|
23
|
+
'## Decide: Choose the lightest process that fits the work',
|
|
17
24
|
'- Is this a bug fix, feature, refactor, or something else?',
|
|
25
|
+
'- Task size: classify as trivial (single-line fix, unambiguous, no side effects), simple (one focused task, clear scope, 1–2 files), or large (multiple steps, cross-cutting changes, requires vertical slicing).',
|
|
18
26
|
'- What could go wrong? Is it reversible or irreversible? Can it fail in prod?',
|
|
19
27
|
'- Does it require careful rollout, data migration, observability, or backwards compatibility handling?',
|
|
20
28
|
'- Are there decisions the user has not explicitly made (architecture, scope, deployment strategy)?',
|
|
21
|
-
'',
|
|
22
|
-
'
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
' - Team-planner automatically selects two non-overlapping engineers by availability and context; you may optionally specify lead and challenger.',
|
|
26
|
-
' - Challenger engineer identifies missing decisions, risks, and scope gaps before implementation.',
|
|
27
|
-
'- Break work into independent pieces that can run in parallel. Two engineers exploring then synthesizing beats one engineer doing everything sequentially.',
|
|
28
|
-
'- Before delegating, state your success criteria, not just the task. What done looks like. How you will verify it.',
|
|
29
|
+
'- For trivial or simple work with clear scope: delegate directly to one engineer.',
|
|
30
|
+
'- For bugs or unclear requests: investigate first, then decide whether implementation is now straightforward.',
|
|
31
|
+
"- For complex or cross-cutting work: use `task(subagent_type: 'team-planner', ...)` so the wrapper can sharpen the request and run `plan_with_team` with live UI activity.",
|
|
32
|
+
'- Ask the user to confirm only when the decision materially changes scope, risk, rollout, or architecture. Do not force confirmation for every non-trivial task.',
|
|
29
33
|
'',
|
|
30
34
|
'## Delegate: Send precise assignments',
|
|
31
|
-
"- For single-engineer work: use `task(subagent_type: 'tom'|'john'|'maya'|'sara'|'alex', ...)` and structure the prompt with goal, acceptance criteria, relevant
|
|
32
|
-
"- For
|
|
35
|
+
"- For single-engineer work: use `task(subagent_type: 'tom'|'john'|'maya'|'sara'|'alex', ...)` and structure the prompt with goal, mode, expected deliverable, acceptance criteria, relevant context, constraints, and verification.",
|
|
36
|
+
"- For complex planning work: use `task(subagent_type: 'team-planner', ...)`. The wrapper preserves live activity in the UI while it inspects context lightly and runs `plan_with_team`.",
|
|
33
37
|
"- For browser/UI verification: use `task(subagent_type: 'browser-qa', ...)` with a clear verification goal. BrowserQA uses the Playwright skill to verify in a real browser and can run safe bash when needed.",
|
|
34
|
-
'-
|
|
38
|
+
'- For large tasks: break work into genuine vertical slices before implementation. Each slice must deliver end-to-end, user-testable value independently (e.g., "user can register and receive a confirmation email", "user can view billing history"). Horizontal layers (e.g., "just types", "just tests") are not vertical slices.',
|
|
39
|
+
'- Break work into independent pieces that can run in parallel. Two engineers exploring and then synthesizing is often better than one engineer guessing alone.',
|
|
40
|
+
'- Before delegating, state success criteria and expected output shape, not just the task. Say what done looks like and how you will verify it.',
|
|
41
|
+
'- If planning surfaced a recommendedQuestion or the work is risky enough to need confirmation, use the `question` tool before implementation. Otherwise, delegate directly.',
|
|
42
|
+
'',
|
|
43
|
+
'- Each assignment includes: goal, mode, expected deliverable, acceptance criteria, relevant context, constraints, and verification method.',
|
|
35
44
|
'- Reuse the same engineer when follow-up work builds on their prior context.',
|
|
36
45
|
'- Only one implementing engineer modifies the worktree at a time. Parallelize exploration, research, and browser verification freely.',
|
|
46
|
+
'- Context warnings (moderate/high/critical) are informational only. Do NOT reset an engineer session in response to a context warning. Sessions auto-reset only on an actual contextExhausted error.',
|
|
37
47
|
'',
|
|
38
48
|
'## Review: Inspect diffs for production safety',
|
|
39
49
|
'- After an engineer reports implementation done, review the diff with `git_diff` before declaring it complete.',
|
|
@@ -65,7 +75,9 @@ export const managerPromptRegistry = {
|
|
|
65
75
|
'',
|
|
66
76
|
'- Questions: Use the `question` tool when a decision will materially affect scope, architecture, or how you verify the outcome. Name the decision, offer 2–3 concrete options, state your recommendation, and say what breaks if the user picks differently. One high-leverage question at a time.',
|
|
67
77
|
'- Reframing: Before planning, ask what the user is actually trying to achieve, not just what they asked for. If the request sounds like a feature, ask what job-to-be-done it serves.',
|
|
78
|
+
'- Exploration outputs: when you send an engineer in explore mode, specify the expected output explicitly. Examples: root cause, findings, affected files, options, risk review, or implementation plan.',
|
|
68
79
|
'- Engineer selection: When assigning to a single engineer, prefer lower context pressure and less-recently-used engineers. Reuse if follow-up work builds on prior context.',
|
|
80
|
+
'- Context warnings: At moderate/high/critical context levels the system surfaces a warning. These are advisory — do not force session reset. Reserve reset for actual contextExhausted errors only.',
|
|
69
81
|
'- Failure handling:',
|
|
70
82
|
" - contextExhausted: The engineer's session ran out of tokens. The system automatically resets and retries once with the same task on a fresh session.",
|
|
71
83
|
' - sdkError or toolDenied: The underlying SDK failed or a tool call was denied. Investigate the error, adjust constraints, and retry.',
|
|
@@ -92,6 +104,8 @@ export const managerPromptRegistry = {
|
|
|
92
104
|
'',
|
|
93
105
|
'Your wrapper context from prior turns is reloaded automatically. Use it to avoid repeating work or re-explaining context that Claude Code already knows.',
|
|
94
106
|
"Return the tool result directly. Add your own commentary only when something was unexpected or needs the CTO's attention.",
|
|
107
|
+
'Explore mode is caller-directed. Follow the requested output shape instead of defaulting to a plan. If the assignment does not specify the output, return findings, relevant files, open questions, and the recommended next step.',
|
|
108
|
+
'If you discover during implementation that the agreed approach is not viable (unexpected constraints, wrong files, missing context), stop immediately and surface the deviation to the CTO before proceeding with a different approach. Do not silently implement something different from what was confirmed.',
|
|
95
109
|
].join('\n'),
|
|
96
110
|
engineerSessionPrompt: [
|
|
97
111
|
'You are an expert software engineer working inside Claude Code.',
|
|
@@ -101,6 +115,7 @@ export const managerPromptRegistry = {
|
|
|
101
115
|
'When investigating bugs:',
|
|
102
116
|
'- Always explore the root cause before implementing a fix. Do not assume; verify.',
|
|
103
117
|
'- If three fix attempts fail, question the architecture, not the hypothesis.',
|
|
118
|
+
'- In explore mode, return the artifact the caller asked for. Do not default to a plan unless the caller explicitly asks for one.',
|
|
104
119
|
'',
|
|
105
120
|
'When writing code:',
|
|
106
121
|
'- Consider rollout/migration/observability implications: Will this require staged rollout, data migration, new metrics, or log/trace points?',
|
|
@@ -142,13 +157,15 @@ export const managerPromptRegistry = {
|
|
|
142
157
|
'<answer or NONE>',
|
|
143
158
|
].join('\n'),
|
|
144
159
|
teamPlannerPrompt: [
|
|
145
|
-
'You are the team
|
|
160
|
+
'You are the team-planner wrapper. Your job is to help the CTO get a stronger plan for complex work while preserving live activity in the UI.',
|
|
146
161
|
'`plan_with_team` dispatches two engineers in parallel (lead + challenger) then synthesizes their plans.',
|
|
147
162
|
'',
|
|
148
|
-
'Call `plan_with_team`
|
|
163
|
+
'Call `plan_with_team` with the task and any engineer names provided.',
|
|
149
164
|
'- If lead and challenger engineer names are both specified, use them.',
|
|
150
165
|
'- If either name is missing, `plan_with_team` will auto-select two non-overlapping engineers based on availability and context.',
|
|
151
|
-
'Do not
|
|
166
|
+
'- Keep the wrapper thin. Do not do your own repo investigation or solo planning.',
|
|
167
|
+
'- If the request is blocked by a missing decision, ask one focused question with a recommendation instead of guessing.',
|
|
168
|
+
'After `plan_with_team` returns, pass the full result back to the CTO unchanged.',
|
|
152
169
|
].join('\n'),
|
|
153
170
|
browserQaAgentPrompt: [
|
|
154
171
|
"You are the browser QA specialist on the CTO's team.",
|
|
@@ -165,6 +182,7 @@ export const managerPromptRegistry = {
|
|
|
165
182
|
'- Never simulate or fabricate test results.',
|
|
166
183
|
'- If the Playwright tool is not available, the result will start with PLAYWRIGHT_UNAVAILABLE:.',
|
|
167
184
|
'- Your persistent Claude Code session remembers prior verification runs.',
|
|
185
|
+
'- If the verification scope changes unexpectedly (feature absent, URL wrong, task cannot be completed as specified), stop and report the scope mismatch rather than silently verifying something else.',
|
|
168
186
|
].join('\n'),
|
|
169
187
|
browserQaSessionPrompt: [
|
|
170
188
|
'You are a browser QA specialist. Your job is to verify web features and user flows using the Playwright skill/command.',
|
|
@@ -185,8 +203,8 @@ export const managerPromptRegistry = {
|
|
|
185
203
|
'Allowed tools: Playwright skill/command, safe bash, read-only tools (Read, Grep, Glob). No file editing or code modifications.',
|
|
186
204
|
].join('\n'),
|
|
187
205
|
contextWarnings: {
|
|
188
|
-
moderate: 'Engineer context is
|
|
189
|
-
high: 'Engineer context is
|
|
190
|
-
critical: 'Engineer context is near capacity ({percent}% estimated).
|
|
206
|
+
moderate: 'Engineer context is at {percent}% estimated. Session is healthy; keep the next task focused.',
|
|
207
|
+
high: 'Engineer context is at {percent}% estimated ({turns} turns, ${cost}). Session continues — prefer a narrowly scoped follow-up.',
|
|
208
|
+
critical: 'Engineer context is near capacity ({percent}% estimated). Warn only — do not force a reset; avoid large new tasks in this session.',
|
|
191
209
|
},
|
|
192
210
|
};
|
|
@@ -4,9 +4,9 @@ export interface ManagerPromptRegistry {
|
|
|
4
4
|
engineerSessionPrompt: string;
|
|
5
5
|
/** Prompt prepended to the user prompt of the synthesis runTask call inside plan_with_team. */
|
|
6
6
|
planSynthesisPrompt: string;
|
|
7
|
-
/** Visible subagent prompt for teamPlanner — thin
|
|
7
|
+
/** Visible subagent prompt for teamPlanner — thin wrapper that calls plan_with_team. */
|
|
8
8
|
teamPlannerPrompt: string;
|
|
9
|
-
/** Visible subagent prompt for browserQa — thin
|
|
9
|
+
/** Visible subagent prompt for browserQa — thin wrapper that calls claude tool for browser verification. */
|
|
10
10
|
browserQaAgentPrompt: string;
|
|
11
11
|
/** Prompt prepended to browser verification task prompts in Claude Code sessions. */
|
|
12
12
|
browserQaSessionPrompt: string;
|
|
@@ -116,6 +116,24 @@ export interface SessionContextSnapshot {
|
|
|
116
116
|
warningLevel: ContextWarningLevel;
|
|
117
117
|
compactionCount: number;
|
|
118
118
|
}
|
|
119
|
+
export type TaskSize = 'trivial' | 'simple' | 'large';
|
|
120
|
+
export interface PlanSlice {
|
|
121
|
+
index: number;
|
|
122
|
+
description: string;
|
|
123
|
+
status: 'pending' | 'in_progress' | 'done' | 'skipped';
|
|
124
|
+
completedAt?: string;
|
|
125
|
+
}
|
|
126
|
+
export interface ActivePlan {
|
|
127
|
+
id: string;
|
|
128
|
+
summary: string;
|
|
129
|
+
taskSize: TaskSize;
|
|
130
|
+
createdAt: string;
|
|
131
|
+
confirmedAt: string | null;
|
|
132
|
+
preAuthorized: boolean;
|
|
133
|
+
slices: PlanSlice[];
|
|
134
|
+
/** Null when the plan has no slices (trivial/simple tasks). */
|
|
135
|
+
currentSliceIndex: number | null;
|
|
136
|
+
}
|
|
119
137
|
export interface TeamEngineerRecord {
|
|
120
138
|
name: EngineerName;
|
|
121
139
|
wrapperSessionId: string | null;
|
|
@@ -128,7 +146,7 @@ export interface TeamEngineerRecord {
|
|
|
128
146
|
wrapperHistory: WrapperHistoryEntry[];
|
|
129
147
|
context: SessionContextSnapshot;
|
|
130
148
|
}
|
|
131
|
-
export type EngineerFailureKind = 'sdkError' | 'contextExhausted' | 'toolDenied' | 'aborted' | 'engineerBusy' | 'unknown';
|
|
149
|
+
export type EngineerFailureKind = 'sdkError' | 'contextExhausted' | 'toolDenied' | 'modeNotSupported' | 'aborted' | 'engineerBusy' | 'unknown';
|
|
132
150
|
export interface EngineerFailureResult {
|
|
133
151
|
teamId: string;
|
|
134
152
|
engineer: EngineerName;
|
|
@@ -142,6 +160,7 @@ export interface TeamRecord {
|
|
|
142
160
|
createdAt: string;
|
|
143
161
|
updatedAt: string;
|
|
144
162
|
engineers: TeamEngineerRecord[];
|
|
163
|
+
activePlan?: ActivePlan;
|
|
145
164
|
}
|
|
146
165
|
export interface EngineerTaskResult {
|
|
147
166
|
teamId: string;
|
|
@@ -1,2 +1,8 @@
|
|
|
1
1
|
export declare function writeJsonAtomically(filePath: string, data: unknown): Promise<void>;
|
|
2
2
|
export declare function isFileNotFoundError(error: unknown): error is NodeJS.ErrnoException;
|
|
3
|
+
/**
|
|
4
|
+
* Appends a single NDJSON line to a debug log file.
|
|
5
|
+
* Creates the parent directory if it does not exist.
|
|
6
|
+
* A `ts` (ISO timestamp) field is injected automatically.
|
|
7
|
+
*/
|
|
8
|
+
export declare function appendDebugLog(logPath: string, entry: Record<string, unknown>): Promise<void>;
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { randomUUID } from 'node:crypto';
|
|
2
2
|
import { promises as fs } from 'node:fs';
|
|
3
|
+
import path from 'node:path';
|
|
3
4
|
export async function writeJsonAtomically(filePath, data) {
|
|
4
5
|
const tempPath = `${filePath}.${randomUUID()}.tmp`;
|
|
5
6
|
await fs.writeFile(tempPath, `${JSON.stringify(data, null, 2)}\n`, 'utf8');
|
|
@@ -8,3 +9,13 @@ export async function writeJsonAtomically(filePath, data) {
|
|
|
8
9
|
export function isFileNotFoundError(error) {
|
|
9
10
|
return (error instanceof Error && 'code' in error && error.code === 'ENOENT');
|
|
10
11
|
}
|
|
12
|
+
/**
|
|
13
|
+
* Appends a single NDJSON line to a debug log file.
|
|
14
|
+
* Creates the parent directory if it does not exist.
|
|
15
|
+
* A `ts` (ISO timestamp) field is injected automatically.
|
|
16
|
+
*/
|
|
17
|
+
export async function appendDebugLog(logPath, entry) {
|
|
18
|
+
const line = JSON.stringify({ ...entry, ts: new Date().toISOString() }) + '\n';
|
|
19
|
+
await fs.mkdir(path.dirname(logPath), { recursive: true });
|
|
20
|
+
await fs.appendFile(logPath, line, 'utf8');
|
|
21
|
+
}
|
|
@@ -1,5 +1,9 @@
|
|
|
1
|
-
import { describe, expect, it } from 'vitest';
|
|
1
|
+
import { afterEach, describe, expect, it } from 'vitest';
|
|
2
|
+
import { mkdtemp, readFile, rm } from 'node:fs/promises';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import { tmpdir } from 'node:os';
|
|
2
5
|
import { ClaudeAgentSdkAdapter } from '../src/claude/claude-agent-sdk-adapter.js';
|
|
6
|
+
import { ToolApprovalManager } from '../src/claude/tool-approval-manager.js';
|
|
3
7
|
function createFakeQuery(messages) {
|
|
4
8
|
return {
|
|
5
9
|
async *[Symbol.asyncIterator]() {
|
|
@@ -548,4 +552,117 @@ describe('ClaudeAgentSdkAdapter', () => {
|
|
|
548
552
|
});
|
|
549
553
|
expect(capturedPermissionMode).toBe('plan');
|
|
550
554
|
});
|
|
555
|
+
describe('debug log', () => {
|
|
556
|
+
let tmpDir;
|
|
557
|
+
afterEach(async () => {
|
|
558
|
+
if (tmpDir) {
|
|
559
|
+
await rm(tmpDir, { recursive: true, force: true });
|
|
560
|
+
}
|
|
561
|
+
});
|
|
562
|
+
it('appends a tool_denied entry when restrictWriteTools denies a write tool', async () => {
|
|
563
|
+
tmpDir = await mkdtemp(join(tmpdir(), 'adapter-log-'));
|
|
564
|
+
const logPath = join(tmpDir, '.claude-manager', 'debug.log');
|
|
565
|
+
let capturedCanUseTool;
|
|
566
|
+
const adapter = new ClaudeAgentSdkAdapter({
|
|
567
|
+
query: (params) => {
|
|
568
|
+
capturedCanUseTool = params.options?.canUseTool;
|
|
569
|
+
return createFakeQuery([
|
|
570
|
+
{
|
|
571
|
+
type: 'result',
|
|
572
|
+
subtype: 'success',
|
|
573
|
+
session_id: 'ses_log',
|
|
574
|
+
is_error: false,
|
|
575
|
+
result: 'ok',
|
|
576
|
+
num_turns: 1,
|
|
577
|
+
total_cost_usd: 0,
|
|
578
|
+
},
|
|
579
|
+
]);
|
|
580
|
+
},
|
|
581
|
+
listSessions: async () => [],
|
|
582
|
+
getSessionMessages: async () => [],
|
|
583
|
+
}, undefined, logPath);
|
|
584
|
+
await adapter.runSession({ cwd: '/tmp/project', prompt: 'Test', restrictWriteTools: true });
|
|
585
|
+
expect(capturedCanUseTool).toBeDefined();
|
|
586
|
+
const result = await capturedCanUseTool('Edit', { file_path: 'x.ts' }, {});
|
|
587
|
+
expect(result.behavior).toBe('deny');
|
|
588
|
+
const content = await readFile(logPath, 'utf8');
|
|
589
|
+
const entry = JSON.parse(content.trim().split('\n')[0]);
|
|
590
|
+
expect(entry.type).toBe('tool_denied');
|
|
591
|
+
expect(entry.toolName).toBe('Edit');
|
|
592
|
+
expect(entry.reason).toBe('restrictWriteTools');
|
|
593
|
+
expect(typeof entry.ts).toBe('string');
|
|
594
|
+
});
|
|
595
|
+
it('appends a tool_denied entry when the approval manager denies a tool', async () => {
|
|
596
|
+
tmpDir = await mkdtemp(join(tmpdir(), 'adapter-log-policy-'));
|
|
597
|
+
const logPath = join(tmpDir, '.claude-manager', 'debug.log');
|
|
598
|
+
const approvalManager = new ToolApprovalManager({
|
|
599
|
+
rules: [
|
|
600
|
+
{
|
|
601
|
+
id: 'deny-bash',
|
|
602
|
+
toolPattern: 'Bash',
|
|
603
|
+
inputPattern: 'rm -rf /',
|
|
604
|
+
action: 'deny',
|
|
605
|
+
denyMessage: 'rm -rf / is not allowed',
|
|
606
|
+
},
|
|
607
|
+
],
|
|
608
|
+
enabled: true,
|
|
609
|
+
});
|
|
610
|
+
let capturedCanUseTool;
|
|
611
|
+
const adapter = new ClaudeAgentSdkAdapter({
|
|
612
|
+
query: (params) => {
|
|
613
|
+
capturedCanUseTool = params.options?.canUseTool;
|
|
614
|
+
return createFakeQuery([
|
|
615
|
+
{
|
|
616
|
+
type: 'result',
|
|
617
|
+
subtype: 'success',
|
|
618
|
+
session_id: 'ses_policy',
|
|
619
|
+
is_error: false,
|
|
620
|
+
result: 'ok',
|
|
621
|
+
num_turns: 1,
|
|
622
|
+
total_cost_usd: 0,
|
|
623
|
+
},
|
|
624
|
+
]);
|
|
625
|
+
},
|
|
626
|
+
listSessions: async () => [],
|
|
627
|
+
getSessionMessages: async () => [],
|
|
628
|
+
}, approvalManager, logPath);
|
|
629
|
+
await adapter.runSession({ cwd: '/tmp/project', prompt: 'Test' });
|
|
630
|
+
expect(capturedCanUseTool).toBeDefined();
|
|
631
|
+
const result = await capturedCanUseTool('Bash', { command: 'rm -rf /' }, {});
|
|
632
|
+
expect(result.behavior).toBe('deny');
|
|
633
|
+
const content = await readFile(logPath, 'utf8');
|
|
634
|
+
const entry = JSON.parse(content.trim().split('\n')[0]);
|
|
635
|
+
expect(entry.type).toBe('tool_denied');
|
|
636
|
+
expect(entry.toolName).toBe('Bash');
|
|
637
|
+
expect(entry.reason).toBe('approvalPolicy');
|
|
638
|
+
expect(typeof entry.ts).toBe('string');
|
|
639
|
+
});
|
|
640
|
+
it('does not create a log file when no debugLogPath is provided', async () => {
|
|
641
|
+
tmpDir = await mkdtemp(join(tmpdir(), 'adapter-no-log-'));
|
|
642
|
+
let capturedCanUseTool;
|
|
643
|
+
const adapter = new ClaudeAgentSdkAdapter({
|
|
644
|
+
query: (params) => {
|
|
645
|
+
capturedCanUseTool = params.options?.canUseTool;
|
|
646
|
+
return createFakeQuery([
|
|
647
|
+
{
|
|
648
|
+
type: 'result',
|
|
649
|
+
subtype: 'success',
|
|
650
|
+
session_id: 'ses_nolog',
|
|
651
|
+
is_error: false,
|
|
652
|
+
result: 'ok',
|
|
653
|
+
num_turns: 1,
|
|
654
|
+
total_cost_usd: 0,
|
|
655
|
+
},
|
|
656
|
+
]);
|
|
657
|
+
},
|
|
658
|
+
listSessions: async () => [],
|
|
659
|
+
getSessionMessages: async () => [],
|
|
660
|
+
});
|
|
661
|
+
await adapter.runSession({ cwd: '/tmp/project', prompt: 'Test', restrictWriteTools: true });
|
|
662
|
+
const result = await capturedCanUseTool('Edit', { file_path: 'x.ts' }, {});
|
|
663
|
+
expect(result.behavior).toBe('deny');
|
|
664
|
+
// No log file should exist
|
|
665
|
+
await expect(readFile(join(tmpDir, 'debug.log'), 'utf8')).rejects.toThrow();
|
|
666
|
+
});
|
|
667
|
+
});
|
|
551
668
|
});
|
|
@@ -1,6 +1,10 @@
|
|
|
1
|
-
import { describe, expect, it } from 'vitest';
|
|
1
|
+
import { afterEach, describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import { mkdtemp, readFile, rm } from 'node:fs/promises';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import { tmpdir } from 'node:os';
|
|
2
5
|
import { ClaudeManagerPlugin } from '../src/plugin/claude-manager.plugin.js';
|
|
3
|
-
import { AGENT_CTO, AGENT_TEAM_PLANNER, ENGINEER_AGENT_IDS, ENGINEER_AGENT_NAMES, } from '../src/plugin/agent-hierarchy.js';
|
|
6
|
+
import { AGENT_BROWSER_QA, AGENT_CTO, AGENT_TEAM_PLANNER, ENGINEER_AGENT_IDS, ENGINEER_AGENT_NAMES, } from '../src/plugin/agent-hierarchy.js';
|
|
7
|
+
import { clearPluginServices } from '../src/plugin/service-factory.js';
|
|
4
8
|
describe('ClaudeManagerPlugin', () => {
|
|
5
9
|
it('configures CTO with orchestration tools and question access', async () => {
|
|
6
10
|
const plugin = await ClaudeManagerPlugin({
|
|
@@ -93,7 +97,7 @@ describe('ClaudeManagerPlugin', () => {
|
|
|
93
97
|
expect(agent.permission).not.toHaveProperty('grep');
|
|
94
98
|
}
|
|
95
99
|
});
|
|
96
|
-
it('configures team-planner as a planning
|
|
100
|
+
it('configures team-planner as a thin planning wrapper subagent', async () => {
|
|
97
101
|
const plugin = await ClaudeManagerPlugin({
|
|
98
102
|
worktree: '/tmp/project',
|
|
99
103
|
});
|
|
@@ -112,6 +116,7 @@ describe('ClaudeManagerPlugin', () => {
|
|
|
112
116
|
});
|
|
113
117
|
expect(teamPlanner.permission).not.toHaveProperty('read');
|
|
114
118
|
expect(teamPlanner.permission).not.toHaveProperty('grep');
|
|
119
|
+
expect(teamPlanner.permission).not.toHaveProperty('glob');
|
|
115
120
|
});
|
|
116
121
|
it('registers the named engineer bridge and team status tools', async () => {
|
|
117
122
|
const plugin = await ClaudeManagerPlugin({
|
|
@@ -144,6 +149,14 @@ describe('ClaudeManagerPlugin', () => {
|
|
|
144
149
|
expect(modelSchema.safeParse(undefined).success).toBe(true);
|
|
145
150
|
expect(modelSchema.safeParse('claude-haiku-4-5').success).toBe(false);
|
|
146
151
|
});
|
|
152
|
+
it('does not expose explicit plan tracking tools', async () => {
|
|
153
|
+
const plugin = await ClaudeManagerPlugin({
|
|
154
|
+
worktree: '/tmp/project',
|
|
155
|
+
});
|
|
156
|
+
const tools = plugin.tool;
|
|
157
|
+
expect(tools['confirm_plan']).toBeUndefined();
|
|
158
|
+
expect(tools['advance_slice']).toBeUndefined();
|
|
159
|
+
});
|
|
147
160
|
it('exposes hooks for CTO team tracking and wrapper memory injection', async () => {
|
|
148
161
|
const plugin = await ClaudeManagerPlugin({
|
|
149
162
|
worktree: '/tmp/project',
|
|
@@ -270,3 +283,34 @@ describe('Agent ID normalization and lookup helpers', () => {
|
|
|
270
283
|
expect(agents['browser-qa']).toBeDefined();
|
|
271
284
|
});
|
|
272
285
|
});
|
|
286
|
+
describe('claude tool — engineer failure debug log', () => {
|
|
287
|
+
let tempRoot;
|
|
288
|
+
afterEach(async () => {
|
|
289
|
+
clearPluginServices();
|
|
290
|
+
if (tempRoot) {
|
|
291
|
+
await rm(tempRoot, { recursive: true, force: true });
|
|
292
|
+
}
|
|
293
|
+
});
|
|
294
|
+
it('appends an engineer_failure entry to debug.log when dispatchEngineer throws', async () => {
|
|
295
|
+
tempRoot = await mkdtemp(join(tmpdir(), 'plugin-failure-log-'));
|
|
296
|
+
const plugin = await ClaudeManagerPlugin({ worktree: tempRoot });
|
|
297
|
+
const tools = plugin.tool;
|
|
298
|
+
const context = {
|
|
299
|
+
sessionID: 'wrapper-browserqa-fail',
|
|
300
|
+
worktree: tempRoot,
|
|
301
|
+
agent: AGENT_BROWSER_QA,
|
|
302
|
+
metadata: vi.fn(),
|
|
303
|
+
};
|
|
304
|
+
// BrowserQA in implement mode throws synchronously before running a session
|
|
305
|
+
await expect(tools['claude'].execute({ mode: 'implement', message: 'Write a feature' }, context)).rejects.toThrow('modeNotSupported');
|
|
306
|
+
const logPath = join(tempRoot, '.claude-manager', 'debug.log');
|
|
307
|
+
const content = await readFile(logPath, 'utf8');
|
|
308
|
+
const entry = JSON.parse(content.trim().split('\n')[0]);
|
|
309
|
+
expect(entry.type).toBe('engineer_failure');
|
|
310
|
+
expect(entry.engineer).toBe('BrowserQA');
|
|
311
|
+
expect(entry.mode).toBe('implement');
|
|
312
|
+
expect(entry.failureKind).toBe('modeNotSupported');
|
|
313
|
+
expect(typeof entry.message).toBe('string');
|
|
314
|
+
expect(typeof entry.ts).toBe('string');
|
|
315
|
+
});
|
|
316
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|