@doingdev/opencode-claude-manager-plugin 0.1.55 → 0.1.57
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/manager/team-orchestrator.d.ts +10 -3
- package/dist/manager/team-orchestrator.js +108 -17
- package/dist/plugin/agent-hierarchy.js +7 -3
- package/dist/plugin/claude-manager.plugin.d.ts +8 -0
- package/dist/plugin/claude-manager.plugin.js +38 -15
- package/dist/prompts/registry.js +107 -57
- package/dist/src/manager/team-orchestrator.d.ts +12 -5
- package/dist/src/manager/team-orchestrator.js +111 -20
- package/dist/src/plugin/agent-hierarchy.d.ts +2 -2
- package/dist/src/plugin/agent-hierarchy.js +15 -20
- package/dist/src/plugin/claude-manager.plugin.d.ts +8 -0
- package/dist/src/plugin/claude-manager.plugin.js +51 -27
- package/dist/src/plugin/service-factory.js +1 -1
- package/dist/src/prompts/registry.js +115 -57
- package/dist/src/types/contracts.d.ts +4 -1
- package/dist/test/claude-manager.plugin.test.js +94 -13
- package/dist/test/prompt-registry.test.js +26 -12
- package/dist/test/report-claude-event.test.js +16 -3
- package/dist/test/team-orchestrator.test.js +127 -7
- package/dist/types/contracts.d.ts +1 -1
- package/package.json +1 -1
|
@@ -1,68 +1,93 @@
|
|
|
1
1
|
export const managerPromptRegistry = {
|
|
2
2
|
ctoSystemPrompt: [
|
|
3
3
|
'You are a principal engineer orchestrating a team of AI-powered engineers.',
|
|
4
|
-
'
|
|
5
|
-
'
|
|
6
|
-
'',
|
|
7
|
-
'
|
|
8
|
-
'
|
|
9
|
-
'
|
|
10
|
-
'-
|
|
11
|
-
'-
|
|
12
|
-
'-
|
|
13
|
-
'-
|
|
14
|
-
'-
|
|
15
|
-
'',
|
|
16
|
-
'
|
|
17
|
-
'-
|
|
18
|
-
'-
|
|
19
|
-
'-
|
|
20
|
-
'-
|
|
21
|
-
'',
|
|
22
|
-
'
|
|
23
|
-
'-
|
|
24
|
-
|
|
25
|
-
'-
|
|
26
|
-
'-
|
|
27
|
-
'',
|
|
28
|
-
'
|
|
29
|
-
'
|
|
30
|
-
'
|
|
31
|
-
|
|
32
|
-
'',
|
|
33
|
-
'
|
|
34
|
-
'-
|
|
35
|
-
'-
|
|
36
|
-
'
|
|
37
|
-
'
|
|
38
|
-
'',
|
|
39
|
-
'
|
|
40
|
-
'-
|
|
41
|
-
'-
|
|
42
|
-
'-
|
|
43
|
-
'-
|
|
44
|
-
'-
|
|
45
|
-
'-
|
|
46
|
-
'-
|
|
47
|
-
'',
|
|
48
|
-
'
|
|
4
|
+
'Your role is to decompose work, delegate precisely, review diffs for production risks, and verify outcomes.',
|
|
5
|
+
'You do not write code. All edits go through engineers. You multiply output by coordinating parallel work and catching issues others miss.',
|
|
6
|
+
'',
|
|
7
|
+
'# Operating Loop: Orient → Classify → Plan → Delegate → Review → Verify → Close',
|
|
8
|
+
'',
|
|
9
|
+
'## Orient: Understand the request',
|
|
10
|
+
'- Extract what you can from the user message, codebase (read/grep/glob/codesearch), prior engineer results, and `websearch`/`webfetch` when relevant.',
|
|
11
|
+
'- Light investigation is fine: read files briefly to understand scope, check what already exists, avoid re-inventing.',
|
|
12
|
+
'- When a bug is reported, ask: what is the root cause? Do not assume. Delegate root-cause exploration if the answer is in code the user should review first.',
|
|
13
|
+
'- If requirements are vague or architecture is unclear, use `question` tool with 2–3 concrete options, your recommendation, and what breaks if user picks differently.',
|
|
14
|
+
'- Only ask when the decision will materially change scope, architecture, risk, or how you verify—and you cannot resolve it from context.',
|
|
15
|
+
'',
|
|
16
|
+
'## Classify: Frame the work',
|
|
17
|
+
'- Is this a bug fix, feature, refactor, or something else?',
|
|
18
|
+
'- What could go wrong? Is it reversible or irreversible? Can it fail in prod?',
|
|
19
|
+
'- Does it require careful rollout, data migration, observability, or backwards compatibility handling?',
|
|
20
|
+
'- Are there decisions the user has not explicitly made (architecture, scope, deployment strategy)?',
|
|
21
|
+
'',
|
|
22
|
+
'## Plan: Decompose into engineer work',
|
|
23
|
+
'- For small, focused tasks: delegate to a named engineer with structured context (goal, acceptance criteria, relevant files, constraints, verification).',
|
|
24
|
+
"- For medium or large tasks: use `task(subagent_type: 'team-planner', ...)` for dual-engineer exploration and plan synthesis.",
|
|
25
|
+
' - Team-planner automatically selects two non-overlapping engineers by availability and context; you may optionally specify lead and challenger.',
|
|
26
|
+
' - Challenger engineer identifies missing decisions, risks, and scope gaps before implementation.',
|
|
27
|
+
'- Break work into independent pieces that can run in parallel. Two engineers exploring then synthesizing beats one engineer doing everything sequentially.',
|
|
28
|
+
'- Before delegating, state your success criteria, not just the task. What done looks like. How you will verify it.',
|
|
29
|
+
'',
|
|
30
|
+
'## Delegate: Send precise assignments',
|
|
31
|
+
"- For single-engineer work: use `task(subagent_type: 'tom'|'john'|'maya'|'sara'|'alex', ...)` and structure the prompt with goal, acceptance criteria, relevant files, constraints, and verification.",
|
|
32
|
+
"- For dual-engineer planning: use `task(subagent_type: 'team-planner', ...)` which will lead + challenger synthesis.",
|
|
33
|
+
'- Each assignment includes: goal, acceptance criteria, relevant files/areas, constraints, and verification method.',
|
|
34
|
+
'- Reuse the same engineer when follow-up work builds on their prior context.',
|
|
35
|
+
'- Only one implementing engineer modifies the worktree at a time. Parallelize exploration and research freely.',
|
|
36
|
+
'',
|
|
37
|
+
'## Review: Inspect diffs for production safety',
|
|
38
|
+
'- After an engineer reports implementation done, review the diff with `git_diff` before declaring it complete.',
|
|
39
|
+
'- Use `git_log` and `git_status` for recent context.',
|
|
40
|
+
'- Check for these production-risk patterns (issues tests may not catch):',
|
|
41
|
+
' - Race conditions: concurrent access to shared state, missing locks or atomic operations.',
|
|
42
|
+
' - N+1 queries: loops that fetch data repeatedly instead of batch-loading.',
|
|
43
|
+
' - Missing error handling: uncaught exceptions, unhandled promise rejections, missing null checks.',
|
|
44
|
+
' - Trust boundary violations: user input used without validation, permissions not checked.',
|
|
45
|
+
' - Stale reads: reading state without synchronization or caching without invalidation logic.',
|
|
46
|
+
' - Forgotten enum cases: switches without default, missing case handlers.',
|
|
47
|
+
' - Backwards compatibility: breaking API changes, schema migrations without rollback plan.',
|
|
48
|
+
' - Observability gaps: no logging, metrics, or tracing for critical paths.',
|
|
49
|
+
' - Rollout risk: changes that must be coordinated across services or require staged rollout.',
|
|
50
|
+
'- Give specific, actionable feedback. Not "this could be better" but "line 42 has a race condition because X; fix it by doing Y."',
|
|
51
|
+
'- Trust engineer findings but verify critical claims.',
|
|
52
|
+
'- Check scope: did the engineer build what was asked—nothing more, nothing less?',
|
|
53
|
+
'',
|
|
54
|
+
'## Verify: Run checks before shipping',
|
|
49
55
|
'- After review passes, dispatch an engineer in verify mode to run the most relevant checks (tests, lint, typecheck, build) for what changed.',
|
|
50
56
|
'- Do not declare a task complete until verification passes. If it fails, fix and re-verify.',
|
|
51
57
|
'',
|
|
52
|
-
'
|
|
58
|
+
'## Close: Report outcome to user',
|
|
59
|
+
'- If everything verifies and passes review, tell the user the work is done and what changed.',
|
|
60
|
+
'- If a recommended question from planning was not yet surfaced to the user, surface it now with `question` tool before closing.',
|
|
61
|
+
'- If the work discovered unexpected scope or product decisions, ask the user before proceeding further.',
|
|
62
|
+
'',
|
|
63
|
+
'# Decision-Making Rules',
|
|
64
|
+
'',
|
|
65
|
+
'- Questions: Use the `question` tool when a decision will materially affect scope, architecture, or how you verify the outcome. Name the decision, offer 2–3 concrete options, state your recommendation, and say what breaks if the user picks differently. One high-leverage question at a time.',
|
|
66
|
+
'- Reframing: Before planning, ask what the user is actually trying to achieve, not just what they asked for. If the request sounds like a feature, ask what job-to-be-done it serves.',
|
|
67
|
+
'- Engineer selection: When assigning to a single engineer, prefer lower context pressure and less-recently-used engineers. Reuse if follow-up work builds on prior context.',
|
|
68
|
+
'- Failure handling:',
|
|
69
|
+
" - contextExhausted: The engineer's session ran out of tokens. The system automatically resets and retries once with the same task on a fresh session.",
|
|
70
|
+
' - sdkError or toolDenied: The underlying SDK failed or a tool call was denied. Investigate the error, adjust constraints, and retry.',
|
|
71
|
+
' - engineerBusy: Wait, or choose a different engineer.',
|
|
72
|
+
' - aborted: The user cancelled the work. Stop and report the cancellation.',
|
|
73
|
+
'',
|
|
74
|
+
'# Constraints',
|
|
75
|
+
'',
|
|
53
76
|
'- Do not edit files or run bash directly. Engineers do the hands-on work.',
|
|
54
|
-
'-
|
|
77
|
+
'- Light investigation is fine for orientation (read, grep, glob). Delegate deeper exploration if it saves the engineer context.',
|
|
55
78
|
'- Communicate proactively. If the plan changes or you discover something unexpected, tell the user.',
|
|
56
|
-
'-
|
|
79
|
+
'- Do not proceed with implementation if you cannot state success criteria.',
|
|
57
80
|
].join('\n'),
|
|
58
81
|
engineerAgentPrompt: [
|
|
59
82
|
"You are a named engineer on the CTO's team.",
|
|
60
|
-
'
|
|
83
|
+
'The CTO sends assignments through a structured prompt containing: goal, mode (explore/implement/verify), context, acceptance criteria, relevant paths, constraints, and verification method.',
|
|
84
|
+
'Your job is to parse the assignment and run it through the `claude` tool, which connects to a persistent Claude Code session that remembers your prior turns.',
|
|
61
85
|
'',
|
|
62
|
-
'
|
|
63
|
-
'-
|
|
86
|
+
'How to handle assignments:',
|
|
87
|
+
'- Extract goal, mode, acceptance criteria, relevant files, and verification from the prompt.',
|
|
88
|
+
'- If any critical field is missing (e.g., no verification method), ask the CTO for clarification before proceeding.',
|
|
89
|
+
'- Frame the assignment for Claude Code using the provided structure.',
|
|
64
90
|
'- Specify the work mode: explore (investigate, no edits), implement (make changes and verify), or verify (run checks and report).',
|
|
65
|
-
"- If the CTO's assignment is unclear, ask for clarification before sending it to Claude Code.",
|
|
66
91
|
'',
|
|
67
92
|
'Your wrapper context from prior turns is reloaded automatically. Use it to avoid repeating work or re-explaining context that Claude Code already knows.',
|
|
68
93
|
"Return the tool result directly. Add your own commentary only when something was unexpected or needs the CTO's attention.",
|
|
@@ -71,16 +96,40 @@ export const managerPromptRegistry = {
|
|
|
71
96
|
'You are an expert software engineer working inside Claude Code.',
|
|
72
97
|
'Start with the smallest investigation that resolves the key uncertainty, then act.',
|
|
73
98
|
'Follow repository conventions, AGENTS.md, and any project-level instructions.',
|
|
74
|
-
'
|
|
75
|
-
'
|
|
99
|
+
'',
|
|
100
|
+
'When investigating bugs:',
|
|
101
|
+
'- Always explore the root cause before implementing a fix. Do not assume; verify.',
|
|
102
|
+
'- If three fix attempts fail, question the architecture, not the hypothesis.',
|
|
103
|
+
'',
|
|
104
|
+
'When writing code:',
|
|
105
|
+
'- Consider rollout/migration/observability implications: Will this require staged rollout, data migration, new metrics, or log/trace points?',
|
|
106
|
+
'- Check for backwards compatibility: Will this change break existing APIs, integrations, or data formats?',
|
|
107
|
+
'- Think about failure modes: What happens if this code fails? Is it recoverable? Is there an audit trail?',
|
|
108
|
+
'',
|
|
109
|
+
'Verify your work before reporting done:',
|
|
110
|
+
'- Run the most relevant check (test, lint, typecheck, build) for what you changed.',
|
|
111
|
+
'- Review your own diff. Look for these issues tests may not catch:',
|
|
112
|
+
' - Race conditions (concurrent access, missing locks).',
|
|
113
|
+
' - N+1 queries or similar performance patterns.',
|
|
114
|
+
' - Missing error handling or unhandled edge cases.',
|
|
115
|
+
' - Hardcoded values that should be configurable.',
|
|
116
|
+
' - Incomplete enum handling (missing cases).',
|
|
117
|
+
' - Trust boundary violations (user input not validated).',
|
|
118
|
+
' - Stale reads or cache invalidation bugs.',
|
|
119
|
+
'',
|
|
76
120
|
'Report blockers immediately with exact error output. Do not retry silently more than once.',
|
|
77
121
|
'Do not run git commit, git push, git reset, git checkout, or git stash.',
|
|
78
122
|
].join('\n'),
|
|
79
|
-
|
|
80
|
-
'You are
|
|
123
|
+
planSynthesisPrompt: [
|
|
124
|
+
'You are synthesizing two independent engineering plans into one stronger, unified plan.',
|
|
81
125
|
'Compare the lead and challenger plans on clarity, feasibility, risk, and fit to the user request.',
|
|
82
126
|
'Prefer the simplest path that fully addresses the goal. Surface tradeoffs honestly.',
|
|
83
|
-
'
|
|
127
|
+
'',
|
|
128
|
+
'Identify the single most important decision the user must make to execute this plan safely and correctly.',
|
|
129
|
+
'- Look for disagreements between plans, scope boundaries, deployment/rollout strategy, backwards compatibility, or architectural tradeoffs.',
|
|
130
|
+
'- The user may have stated preferences in their request; check if anything is still unsolved.',
|
|
131
|
+
'Write it as Recommended Question and Recommended Answer. Only write NONE if no external decision is genuinely required.',
|
|
132
|
+
'',
|
|
84
133
|
'Do not editorialize or over-explain. Be direct and concise.',
|
|
85
134
|
'',
|
|
86
135
|
'Use this output format exactly:',
|
|
@@ -91,6 +140,15 @@ export const managerPromptRegistry = {
|
|
|
91
140
|
'## Recommended Answer',
|
|
92
141
|
'<answer or NONE>',
|
|
93
142
|
].join('\n'),
|
|
143
|
+
teamPlannerPrompt: [
|
|
144
|
+
'You are the team planner. Your only job is to invoke `plan_with_team`.',
|
|
145
|
+
'`plan_with_team` dispatches two engineers in parallel (lead + challenger) then synthesizes their plans.',
|
|
146
|
+
'',
|
|
147
|
+
'Call `plan_with_team` immediately with the task and any engineer names provided.',
|
|
148
|
+
'- If lead and challenger engineer names are both specified, use them.',
|
|
149
|
+
'- If either name is missing, `plan_with_team` will auto-select two non-overlapping engineers based on availability and context.',
|
|
150
|
+
'Do not attempt any planning or analysis yourself. Delegate entirely to `plan_with_team`.',
|
|
151
|
+
].join('\n'),
|
|
94
152
|
contextWarnings: {
|
|
95
153
|
moderate: 'Engineer context is getting full ({percent}% estimated). Reuse is still fine, but keep the next prompt focused.',
|
|
96
154
|
high: 'Engineer context is heavy ({percent}% estimated, {turns} turns, ${cost}). Prefer a narrowly scoped follow-up or internal compaction.',
|
|
@@ -2,7 +2,10 @@ export interface ManagerPromptRegistry {
|
|
|
2
2
|
ctoSystemPrompt: string;
|
|
3
3
|
engineerAgentPrompt: string;
|
|
4
4
|
engineerSessionPrompt: string;
|
|
5
|
-
|
|
5
|
+
/** Prompt prepended to the user prompt of the synthesis runTask call inside plan_with_team. */
|
|
6
|
+
planSynthesisPrompt: string;
|
|
7
|
+
/** Visible subagent prompt for teamPlanner — thin bridge that calls plan_with_team. */
|
|
8
|
+
teamPlannerPrompt: string;
|
|
6
9
|
contextWarnings: {
|
|
7
10
|
moderate: string;
|
|
8
11
|
high: string;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { describe, expect, it } from 'vitest';
|
|
2
2
|
import { ClaudeManagerPlugin } from '../src/plugin/claude-manager.plugin.js';
|
|
3
|
-
import { AGENT_CTO,
|
|
3
|
+
import { AGENT_CTO, AGENT_TEAM_PLANNER, ENGINEER_AGENT_IDS, ENGINEER_AGENT_NAMES, } from '../src/plugin/agent-hierarchy.js';
|
|
4
4
|
describe('ClaudeManagerPlugin', () => {
|
|
5
5
|
it('configures CTO with orchestration tools and question access', async () => {
|
|
6
6
|
const plugin = await ClaudeManagerPlugin({
|
|
@@ -34,16 +34,38 @@ describe('ClaudeManagerPlugin', () => {
|
|
|
34
34
|
git_log: 'allow',
|
|
35
35
|
claude: 'deny',
|
|
36
36
|
});
|
|
37
|
+
// Task permissions should include both uppercase (user-friendly) and lowercase (canonical) agent IDs.
|
|
37
38
|
expect(cto.permission.task).toEqual({
|
|
38
39
|
'*': 'deny',
|
|
40
|
+
Tom: 'allow',
|
|
39
41
|
tom: 'allow',
|
|
42
|
+
John: 'allow',
|
|
40
43
|
john: 'allow',
|
|
44
|
+
Maya: 'allow',
|
|
41
45
|
maya: 'allow',
|
|
46
|
+
Sara: 'allow',
|
|
42
47
|
sara: 'allow',
|
|
48
|
+
Alex: 'allow',
|
|
43
49
|
alex: 'allow',
|
|
44
|
-
|
|
50
|
+
'team-planner': 'allow',
|
|
45
51
|
});
|
|
46
52
|
});
|
|
53
|
+
it('allows CTO to delegate to engineers using both uppercase and lowercase agent IDs', async () => {
|
|
54
|
+
const plugin = await ClaudeManagerPlugin({
|
|
55
|
+
worktree: '/tmp/project',
|
|
56
|
+
});
|
|
57
|
+
const config = {};
|
|
58
|
+
await plugin.config?.(config);
|
|
59
|
+
const agents = (config.agent ?? {});
|
|
60
|
+
const cto = agents[AGENT_CTO];
|
|
61
|
+
const taskPerms = cto.permission.task;
|
|
62
|
+
// Verify both uppercase and lowercase can be used for delegation.
|
|
63
|
+
// This prevents delegation failures when users write task(subagent_type: 'Maya') vs task(subagent_type: 'maya').
|
|
64
|
+
expect(taskPerms.Tom).toBe('allow');
|
|
65
|
+
expect(taskPerms.tom).toBe('allow');
|
|
66
|
+
expect(taskPerms.Maya).toBe('allow');
|
|
67
|
+
expect(taskPerms.maya).toBe('allow');
|
|
68
|
+
});
|
|
47
69
|
it('configures every named engineer with only the claude bridge tool', async () => {
|
|
48
70
|
const plugin = await ClaudeManagerPlugin({
|
|
49
71
|
worktree: '/tmp/project',
|
|
@@ -69,26 +91,25 @@ describe('ClaudeManagerPlugin', () => {
|
|
|
69
91
|
expect(agent.permission).not.toHaveProperty('grep');
|
|
70
92
|
}
|
|
71
93
|
});
|
|
72
|
-
it('configures
|
|
94
|
+
it('configures team-planner as a planning-bridge subagent', async () => {
|
|
73
95
|
const plugin = await ClaudeManagerPlugin({
|
|
74
96
|
worktree: '/tmp/project',
|
|
75
97
|
});
|
|
76
98
|
const config = {};
|
|
77
99
|
await plugin.config?.(config);
|
|
78
100
|
const agents = (config.agent ?? {});
|
|
79
|
-
const
|
|
80
|
-
expect(
|
|
81
|
-
expect(
|
|
82
|
-
expect(
|
|
83
|
-
expect(
|
|
101
|
+
const teamPlanner = agents[AGENT_TEAM_PLANNER];
|
|
102
|
+
expect(teamPlanner).toBeDefined();
|
|
103
|
+
expect(teamPlanner.mode).toBe('subagent');
|
|
104
|
+
expect(teamPlanner.description.toLowerCase()).toContain('plan');
|
|
105
|
+
expect(teamPlanner.permission).toMatchObject({
|
|
84
106
|
'*': 'deny',
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
glob: 'allow',
|
|
88
|
-
list: 'allow',
|
|
89
|
-
codesearch: 'allow',
|
|
107
|
+
plan_with_team: 'allow',
|
|
108
|
+
question: 'allow',
|
|
90
109
|
claude: 'deny',
|
|
91
110
|
});
|
|
111
|
+
expect(teamPlanner.permission).not.toHaveProperty('read');
|
|
112
|
+
expect(teamPlanner.permission).not.toHaveProperty('grep');
|
|
92
113
|
});
|
|
93
114
|
it('registers the named engineer bridge and team status tools', async () => {
|
|
94
115
|
const plugin = await ClaudeManagerPlugin({
|
|
@@ -129,3 +150,63 @@ describe('ClaudeManagerPlugin', () => {
|
|
|
129
150
|
expect(plugin['experimental.chat.system.transform']).toBeTypeOf('function');
|
|
130
151
|
});
|
|
131
152
|
});
|
|
153
|
+
describe('Agent ID normalization and lookup helpers', () => {
|
|
154
|
+
it('normalizeAgentId converts mixed-case agent IDs to lowercase', async () => {
|
|
155
|
+
const { normalizeAgentId } = await import('../src/plugin/claude-manager.plugin.js');
|
|
156
|
+
expect(normalizeAgentId('Tom')).toBe('tom');
|
|
157
|
+
expect(normalizeAgentId('MAYA')).toBe('maya');
|
|
158
|
+
expect(normalizeAgentId('john')).toBe('john');
|
|
159
|
+
expect(normalizeAgentId('JoHn')).toBe('john');
|
|
160
|
+
});
|
|
161
|
+
it('engineerFromAgent resolves both uppercase and lowercase agent IDs', async () => {
|
|
162
|
+
const { engineerFromAgent } = await import('../src/plugin/claude-manager.plugin.js');
|
|
163
|
+
// Lowercase (canonical)
|
|
164
|
+
expect(engineerFromAgent('tom')).toBe('Tom');
|
|
165
|
+
expect(engineerFromAgent('maya')).toBe('Maya');
|
|
166
|
+
expect(engineerFromAgent('john')).toBe('John');
|
|
167
|
+
// Uppercase (normalized)
|
|
168
|
+
expect(engineerFromAgent('Tom')).toBe('Tom');
|
|
169
|
+
expect(engineerFromAgent('Maya')).toBe('Maya');
|
|
170
|
+
expect(engineerFromAgent('John')).toBe('John');
|
|
171
|
+
// Mixed case
|
|
172
|
+
expect(engineerFromAgent('JoHn')).toBe('John');
|
|
173
|
+
expect(engineerFromAgent('mAyA')).toBe('Maya');
|
|
174
|
+
});
|
|
175
|
+
it('engineerFromAgent throws on invalid agent IDs', async () => {
|
|
176
|
+
const { engineerFromAgent } = await import('../src/plugin/claude-manager.plugin.js');
|
|
177
|
+
expect(() => engineerFromAgent('invalid')).toThrow('The claude tool can only be used from a named engineer agent');
|
|
178
|
+
expect(() => engineerFromAgent('TomInvalid')).toThrow('The claude tool can only be used from a named engineer agent');
|
|
179
|
+
});
|
|
180
|
+
it('isEngineerAgent identifies both uppercase and lowercase agent IDs', async () => {
|
|
181
|
+
const { isEngineerAgent } = await import('../src/plugin/claude-manager.plugin.js');
|
|
182
|
+
// Lowercase (canonical)
|
|
183
|
+
expect(isEngineerAgent('tom')).toBe(true);
|
|
184
|
+
expect(isEngineerAgent('maya')).toBe(true);
|
|
185
|
+
expect(isEngineerAgent('john')).toBe(true);
|
|
186
|
+
expect(isEngineerAgent('sara')).toBe(true);
|
|
187
|
+
expect(isEngineerAgent('alex')).toBe(true);
|
|
188
|
+
// Uppercase (normalized)
|
|
189
|
+
expect(isEngineerAgent('Tom')).toBe(true);
|
|
190
|
+
expect(isEngineerAgent('Maya')).toBe(true);
|
|
191
|
+
expect(isEngineerAgent('John')).toBe(true);
|
|
192
|
+
expect(isEngineerAgent('Sara')).toBe(true);
|
|
193
|
+
expect(isEngineerAgent('Alex')).toBe(true);
|
|
194
|
+
// Mixed case
|
|
195
|
+
expect(isEngineerAgent('JoHn')).toBe(true);
|
|
196
|
+
expect(isEngineerAgent('mAyA')).toBe(true);
|
|
197
|
+
// Invalid
|
|
198
|
+
expect(isEngineerAgent('invalid')).toBe(false);
|
|
199
|
+
expect(isEngineerAgent('cto')).toBe(false);
|
|
200
|
+
expect(isEngineerAgent('team-planner')).toBe(false);
|
|
201
|
+
});
|
|
202
|
+
it('CTO agent config does not have direct assign_engineer access (delegates to named engineers)', async () => {
|
|
203
|
+
const { buildCtoAgentConfig } = await import('../src/plugin/agent-hierarchy.js');
|
|
204
|
+
const { managerPromptRegistry } = await import('../src/prompts/registry.js');
|
|
205
|
+
const ctoConfig = buildCtoAgentConfig(managerPromptRegistry);
|
|
206
|
+
const ctoPermissions = ctoConfig.permission;
|
|
207
|
+
// CTO should NOT have direct access to assign_engineer (uses task() to named engineers instead)
|
|
208
|
+
expect(ctoPermissions['assign_engineer']).not.toBe('allow');
|
|
209
|
+
// CTO should NOT have direct access to plan_with_team (must delegate to team-planner)
|
|
210
|
+
expect(ctoPermissions['plan_with_team']).not.toBe('allow');
|
|
211
|
+
});
|
|
212
|
+
});
|
|
@@ -3,11 +3,13 @@ import { managerPromptRegistry } from '../src/prompts/registry.js';
|
|
|
3
3
|
describe('managerPromptRegistry', () => {
|
|
4
4
|
it('gives the CTO explicit orchestration guidance', () => {
|
|
5
5
|
expect(managerPromptRegistry.ctoSystemPrompt).toContain('You are a principal engineer orchestrating a team of AI-powered engineers');
|
|
6
|
-
expect(managerPromptRegistry.ctoSystemPrompt).toContain('
|
|
7
|
-
expect(managerPromptRegistry.ctoSystemPrompt).toContain('
|
|
8
|
-
expect(managerPromptRegistry.ctoSystemPrompt).toContain('
|
|
6
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('Operating Loop');
|
|
7
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('named engineer');
|
|
8
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('team-planner');
|
|
9
9
|
expect(managerPromptRegistry.ctoSystemPrompt).toContain('question');
|
|
10
|
-
expect(managerPromptRegistry.ctoSystemPrompt).toContain('
|
|
10
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('Review: Inspect diffs for production safety');
|
|
11
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('race condition');
|
|
12
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('contextExhausted');
|
|
11
13
|
expect(managerPromptRegistry.ctoSystemPrompt).not.toContain('clear_session');
|
|
12
14
|
expect(managerPromptRegistry.ctoSystemPrompt).not.toContain('freshSession');
|
|
13
15
|
});
|
|
@@ -21,20 +23,32 @@ describe('managerPromptRegistry', () => {
|
|
|
21
23
|
it('keeps the engineer session prompt direct and repo-aware', () => {
|
|
22
24
|
expect(managerPromptRegistry.engineerSessionPrompt).toContain('expert software engineer');
|
|
23
25
|
expect(managerPromptRegistry.engineerSessionPrompt).toContain('Start with the smallest investigation that resolves the key uncertainty');
|
|
24
|
-
expect(managerPromptRegistry.engineerSessionPrompt).toContain('Verify your
|
|
26
|
+
expect(managerPromptRegistry.engineerSessionPrompt).toContain('Verify your work before reporting done');
|
|
25
27
|
expect(managerPromptRegistry.engineerSessionPrompt).toContain('Do not run git commit');
|
|
28
|
+
expect(managerPromptRegistry.engineerSessionPrompt).toContain('rollout');
|
|
29
|
+
expect(managerPromptRegistry.engineerSessionPrompt).toContain('backwards compatibility');
|
|
26
30
|
});
|
|
27
31
|
it('keeps context warnings available for engineer sessions', () => {
|
|
28
32
|
expect(managerPromptRegistry.contextWarnings.moderate).toContain('{percent}');
|
|
29
33
|
expect(managerPromptRegistry.contextWarnings.high).toContain('{turns}');
|
|
30
34
|
expect(managerPromptRegistry.contextWarnings.critical).toContain('near capacity');
|
|
31
35
|
});
|
|
32
|
-
it('
|
|
33
|
-
expect(managerPromptRegistry.
|
|
34
|
-
expect(managerPromptRegistry.
|
|
35
|
-
expect(managerPromptRegistry.
|
|
36
|
-
expect(managerPromptRegistry.
|
|
37
|
-
expect(managerPromptRegistry.
|
|
38
|
-
|
|
36
|
+
it('planSynthesisPrompt contains synthesis guidance and complete output format', () => {
|
|
37
|
+
expect(managerPromptRegistry.planSynthesisPrompt).toContain('synthesiz');
|
|
38
|
+
expect(managerPromptRegistry.planSynthesisPrompt).toContain('two independent');
|
|
39
|
+
expect(managerPromptRegistry.planSynthesisPrompt).toContain('## Synthesis');
|
|
40
|
+
expect(managerPromptRegistry.planSynthesisPrompt).toContain('## Recommended Question');
|
|
41
|
+
expect(managerPromptRegistry.planSynthesisPrompt).toContain('## Recommended Answer');
|
|
42
|
+
});
|
|
43
|
+
it('teamPlannerPrompt directs the agent to call plan_with_team with autonomous engineer selection', () => {
|
|
44
|
+
expect(managerPromptRegistry.teamPlannerPrompt).toContain('plan_with_team');
|
|
45
|
+
expect(managerPromptRegistry.teamPlannerPrompt).toContain('auto-select');
|
|
46
|
+
expect(managerPromptRegistry.teamPlannerPrompt).toContain('engineer');
|
|
47
|
+
});
|
|
48
|
+
it('ctoSystemPrompt delegates single work to named engineers via task() and dual work to team-planner', () => {
|
|
49
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('task(subagent_type:');
|
|
50
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('single-engineer');
|
|
51
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('team-planner');
|
|
52
|
+
expect(managerPromptRegistry.ctoSystemPrompt).toContain('automatically selects');
|
|
39
53
|
});
|
|
40
54
|
});
|
|
@@ -163,6 +163,19 @@ describe('reportClaudeEvent — via plugin onEvent chain', () => {
|
|
|
163
163
|
expect(call.title).toBe('⚡ Maya → git_status');
|
|
164
164
|
expect(call.metadata.toolArgs).toEqual({});
|
|
165
165
|
});
|
|
166
|
+
it('surfaces status event as visible metadata', async () => {
|
|
167
|
+
const event = {
|
|
168
|
+
type: 'status',
|
|
169
|
+
text: 'Context exhausted; resetting session and retrying once with a fresh session.',
|
|
170
|
+
};
|
|
171
|
+
const { plugin } = await setupPlugin([event]);
|
|
172
|
+
const { metadata, ctx } = makeContext(tempRoot, ENGINEER_AGENT_IDS.Sara, 'wrapper-6');
|
|
173
|
+
await executeClaude(plugin, ctx);
|
|
174
|
+
const statusCall = metadata.mock.calls.find(([c]) => c?.title?.includes('ℹ️'))?.[0];
|
|
175
|
+
expect(statusCall).toBeDefined();
|
|
176
|
+
expect(statusCall.title).toBe('ℹ️ Sara: Context exhausted; resetting session and retrying once with a fresh session.');
|
|
177
|
+
expect(statusCall.metadata.status).toBe('Context exhausted; resetting session and retrying once with a fresh session.');
|
|
178
|
+
});
|
|
166
179
|
});
|
|
167
180
|
// ── Second-invocation continuity ─────────────────────────────────────────────
|
|
168
181
|
describe('second invocation continuity', () => {
|
|
@@ -180,7 +193,7 @@ describe('second invocation continuity', () => {
|
|
|
180
193
|
// ── Phase 1: first task via orchestrator (no real SDK needed) ──────────
|
|
181
194
|
const store = new TeamStateStore();
|
|
182
195
|
await store.setActiveTeam(tempRoot, 'cto-1');
|
|
183
|
-
const orchestrator = new TeamOrchestrator({ runTask: vi.fn() }, store, { appendEvents: vi.fn(async () => undefined) }, 'Base prompt', '
|
|
196
|
+
const orchestrator = new TeamOrchestrator({ runTask: vi.fn() }, store, { appendEvents: vi.fn(async () => undefined) }, 'Base prompt', 'Synthesis prompt');
|
|
184
197
|
await orchestrator.recordWrapperSession(tempRoot, 'cto-1', 'Tom', 'wrapper-tom-1');
|
|
185
198
|
await orchestrator.recordWrapperExchange(tempRoot, 'cto-1', 'Tom', 'wrapper-tom-1', 'explore', 'Investigate the auth flow', 'Found two race conditions in the token refresh path.');
|
|
186
199
|
// ── Phase 2: process restart ───────────────────────────────────────────
|
|
@@ -206,7 +219,7 @@ describe('second invocation continuity', () => {
|
|
|
206
219
|
// ── Phase 1: pre-seed Tom with a claudeSessionId ───────────────────────
|
|
207
220
|
const store = new TeamStateStore();
|
|
208
221
|
await store.setActiveTeam(tempRoot, 'cto-1');
|
|
209
|
-
const orchestrator = new TeamOrchestrator({ runTask: vi.fn() }, store, { appendEvents: vi.fn(async () => undefined) }, 'Base prompt', '
|
|
222
|
+
const orchestrator = new TeamOrchestrator({ runTask: vi.fn() }, store, { appendEvents: vi.fn(async () => undefined) }, 'Base prompt', 'Synthesis prompt');
|
|
210
223
|
await orchestrator.getOrCreateTeam(tempRoot, 'cto-1');
|
|
211
224
|
await store.updateTeam(tempRoot, 'cto-1', (team) => ({
|
|
212
225
|
...team,
|
|
@@ -240,7 +253,7 @@ describe('second invocation continuity', () => {
|
|
|
240
253
|
expect(runTask).toHaveBeenCalledOnce();
|
|
241
254
|
expect(runTask.mock.calls[0]?.[0]).toMatchObject({
|
|
242
255
|
resumeSessionId: 'ses-tom-persisted',
|
|
243
|
-
systemPrompt: undefined, // no new system prompt when resuming
|
|
244
256
|
});
|
|
257
|
+
expect(runTask.mock.calls[0]?.[0].systemPrompt).toBeUndefined(); // no system prompt when resuming
|
|
245
258
|
});
|
|
246
259
|
});
|