@doingdev/opencode-claude-manager-plugin 0.1.58 → 0.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,10 @@
1
- import { describe, expect, it } from 'vitest';
1
+ import { afterEach, describe, expect, it, vi } from 'vitest';
2
+ import { mkdtemp, rm } from 'node:fs/promises';
3
+ import { join } from 'node:path';
4
+ import { tmpdir } from 'node:os';
2
5
  import { ClaudeManagerPlugin } from '../src/plugin/claude-manager.plugin.js';
3
6
  import { AGENT_CTO, AGENT_TEAM_PLANNER, ENGINEER_AGENT_IDS, ENGINEER_AGENT_NAMES, } from '../src/plugin/agent-hierarchy.js';
7
+ import { clearPluginServices } from '../src/plugin/service-factory.js';
4
8
  describe('ClaudeManagerPlugin', () => {
5
9
  it('configures CTO with orchestration tools and question access', async () => {
6
10
  const plugin = await ClaudeManagerPlugin({
@@ -27,6 +31,8 @@ describe('ClaudeManagerPlugin', () => {
27
31
  question: 'allow',
28
32
  team_status: 'allow',
29
33
  reset_engineer: 'allow',
34
+ confirm_plan: 'allow',
35
+ advance_slice: 'allow',
30
36
  git_diff: 'allow',
31
37
  git_commit: 'allow',
32
38
  git_reset: 'allow',
@@ -121,6 +127,8 @@ describe('ClaudeManagerPlugin', () => {
121
127
  expect(tools['claude']).toBeDefined();
122
128
  expect(tools['team_status']).toBeDefined();
123
129
  expect(tools['plan_with_team']).toBeDefined();
130
+ expect(tools['confirm_plan']).toBeDefined();
131
+ expect(tools['advance_slice']).toBeDefined();
124
132
  expect(tools['reset_engineer']).toBeDefined();
125
133
  expect(tools['assign_engineer']).toBeUndefined();
126
134
  });
@@ -144,6 +152,63 @@ describe('ClaudeManagerPlugin', () => {
144
152
  expect(modelSchema.safeParse(undefined).success).toBe(true);
145
153
  expect(modelSchema.safeParse('claude-haiku-4-5').success).toBe(false);
146
154
  });
155
+ it('confirm_plan tool validates taskSize enum and requires summary', async () => {
156
+ const plugin = await ClaudeManagerPlugin({
157
+ worktree: '/tmp/project',
158
+ });
159
+ const tools = plugin.tool;
160
+ const confirmPlan = tools['confirm_plan'];
161
+ expect(confirmPlan).toBeDefined();
162
+ const summarySchema = confirmPlan.args.summary;
163
+ const taskSizeSchema = confirmPlan.args.taskSize;
164
+ const slicesSchema = confirmPlan.args.slices;
165
+ const preAuthorizedSchema = confirmPlan.args.preAuthorized;
166
+ expect(summarySchema.safeParse('Billing refactor').success).toBe(true);
167
+ expect(summarySchema.safeParse('').success).toBe(false);
168
+ expect(taskSizeSchema.safeParse('trivial').success).toBe(true);
169
+ expect(taskSizeSchema.safeParse('simple').success).toBe(true);
170
+ expect(taskSizeSchema.safeParse('large').success).toBe(true);
171
+ expect(taskSizeSchema.safeParse('medium').success).toBe(false);
172
+ expect(taskSizeSchema.safeParse('huge').success).toBe(false);
173
+ // slices is optional — absent and array both valid
174
+ expect(slicesSchema.safeParse(undefined).success).toBe(true);
175
+ expect(slicesSchema.safeParse(['slice A', 'slice B']).success).toBe(true);
176
+ // preAuthorized is optional boolean
177
+ expect(preAuthorizedSchema.safeParse(true).success).toBe(true);
178
+ expect(preAuthorizedSchema.safeParse(false).success).toBe(true);
179
+ expect(preAuthorizedSchema.safeParse(undefined).success).toBe(true);
180
+ });
181
+ it('advance_slice tool validates sliceIndex and optional status enum', async () => {
182
+ const plugin = await ClaudeManagerPlugin({
183
+ worktree: '/tmp/project',
184
+ });
185
+ const tools = plugin.tool;
186
+ const advanceSlice = tools['advance_slice'];
187
+ expect(advanceSlice).toBeDefined();
188
+ const sliceIndexSchema = advanceSlice.args.sliceIndex;
189
+ const statusSchema = advanceSlice.args.status;
190
+ expect(sliceIndexSchema.safeParse(0).success).toBe(true);
191
+ expect(sliceIndexSchema.safeParse(2).success).toBe(true);
192
+ expect(sliceIndexSchema.safeParse('0').success).toBe(false);
193
+ expect(statusSchema.safeParse('done').success).toBe(true);
194
+ expect(statusSchema.safeParse('skipped').success).toBe(true);
195
+ expect(statusSchema.safeParse(undefined).success).toBe(true);
196
+ expect(statusSchema.safeParse('in_progress').success).toBe(false);
197
+ });
198
+ it('confirm_plan and advance_slice are denied for engineers', async () => {
199
+ const plugin = await ClaudeManagerPlugin({
200
+ worktree: '/tmp/project',
201
+ });
202
+ const config = {};
203
+ await plugin.config?.(config);
204
+ const agents = (config.agent ?? {});
205
+ for (const engineer of ENGINEER_AGENT_NAMES) {
206
+ const agentId = ENGINEER_AGENT_IDS[engineer];
207
+ const agent = agents[agentId];
208
+ expect(agent.permission['confirm_plan']).toBe('deny');
209
+ expect(agent.permission['advance_slice']).toBe('deny');
210
+ }
211
+ });
147
212
  it('exposes hooks for CTO team tracking and wrapper memory injection', async () => {
148
213
  const plugin = await ClaudeManagerPlugin({
149
214
  worktree: '/tmp/project',
@@ -270,3 +335,109 @@ describe('Agent ID normalization and lookup helpers', () => {
270
335
  expect(agents['browser-qa']).toBeDefined();
271
336
  });
272
337
  });
338
+ describe('confirm_plan and advance_slice tool execution', () => {
339
+ let tempRoot;
340
+ afterEach(async () => {
341
+ clearPluginServices();
342
+ if (tempRoot) {
343
+ await rm(tempRoot, { recursive: true, force: true });
344
+ }
345
+ });
346
+ it('confirm_plan persists an active plan and returns it as JSON', async () => {
347
+ tempRoot = await mkdtemp(join(tmpdir(), 'plugin-exec-'));
348
+ const plugin = await ClaudeManagerPlugin({ worktree: tempRoot });
349
+ const tools = plugin.tool;
350
+ const context = {
351
+ sessionID: 'cto-sess-confirm',
352
+ worktree: tempRoot,
353
+ agent: AGENT_CTO,
354
+ metadata: vi.fn(),
355
+ };
356
+ const result = await tools['confirm_plan'].execute({
357
+ summary: 'Add billing history',
358
+ taskSize: 'large',
359
+ slices: ['user can view invoices', 'user can update payment method'],
360
+ preAuthorized: false,
361
+ }, context);
362
+ const activePlan = JSON.parse(result);
363
+ expect(activePlan['summary']).toBe('Add billing history');
364
+ expect(activePlan['taskSize']).toBe('large');
365
+ expect(activePlan['currentSliceIndex']).toBe(0);
366
+ expect(activePlan['preAuthorized']).toBe(false);
367
+ expect(activePlan['slices'].length).toBe(2);
368
+ });
369
+ it('advance_slice marks a slice done and returns updated plan state', async () => {
370
+ tempRoot = await mkdtemp(join(tmpdir(), 'plugin-exec-'));
371
+ const plugin = await ClaudeManagerPlugin({ worktree: tempRoot });
372
+ const tools = plugin.tool;
373
+ const context = {
374
+ sessionID: 'cto-sess-advance',
375
+ worktree: tempRoot,
376
+ agent: AGENT_CTO,
377
+ metadata: vi.fn(),
378
+ };
379
+ // Set up plan with two slices
380
+ await tools['confirm_plan'].execute({
381
+ summary: 'Two-slice task',
382
+ taskSize: 'large',
383
+ slices: ['user can log in', 'user can log out'],
384
+ preAuthorized: false,
385
+ }, context);
386
+ // Advance non-final slice 0
387
+ const result = await tools['advance_slice'].execute({ sliceIndex: 0, status: 'done' }, context);
388
+ const payload = JSON.parse(result);
389
+ const slices = payload.activePlan['slices'];
390
+ expect(slices[0]['status']).toBe('done');
391
+ expect(payload.activePlan['currentSliceIndex']).toBe(1);
392
+ });
393
+ it('advance_slice sets currentSliceIndex to null when completing the final slice', async () => {
394
+ tempRoot = await mkdtemp(join(tmpdir(), 'plugin-exec-'));
395
+ const plugin = await ClaudeManagerPlugin({ worktree: tempRoot });
396
+ const tools = plugin.tool;
397
+ const context = {
398
+ sessionID: 'cto-sess-final',
399
+ worktree: tempRoot,
400
+ agent: AGENT_CTO,
401
+ metadata: vi.fn(),
402
+ };
403
+ await tools['confirm_plan'].execute({
404
+ summary: 'Single-slice task',
405
+ taskSize: 'large',
406
+ slices: ['ship the feature'],
407
+ preAuthorized: true,
408
+ }, context);
409
+ const result = await tools['advance_slice'].execute({ sliceIndex: 0, status: 'done' }, context);
410
+ const payload = JSON.parse(result);
411
+ expect(payload.activePlan['currentSliceIndex']).toBeNull();
412
+ });
413
+ it('advance_slice throws when there is no active plan', async () => {
414
+ tempRoot = await mkdtemp(join(tmpdir(), 'plugin-exec-'));
415
+ const plugin = await ClaudeManagerPlugin({ worktree: tempRoot });
416
+ const tools = plugin.tool;
417
+ const context = {
418
+ sessionID: 'cto-sess-no-plan',
419
+ worktree: tempRoot,
420
+ agent: AGENT_CTO,
421
+ metadata: vi.fn(),
422
+ };
423
+ await expect(tools['advance_slice'].execute({ sliceIndex: 0 }, context)).rejects.toThrow('has no active plan');
424
+ });
425
+ it('advance_slice throws when the slice index is invalid', async () => {
426
+ tempRoot = await mkdtemp(join(tmpdir(), 'plugin-exec-'));
427
+ const plugin = await ClaudeManagerPlugin({ worktree: tempRoot });
428
+ const tools = plugin.tool;
429
+ const context = {
430
+ sessionID: 'cto-sess-bad-idx',
431
+ worktree: tempRoot,
432
+ agent: AGENT_CTO,
433
+ metadata: vi.fn(),
434
+ };
435
+ await tools['confirm_plan'].execute({
436
+ summary: 'Two-slice task',
437
+ taskSize: 'large',
438
+ slices: ['slice A', 'slice B'],
439
+ preAuthorized: false,
440
+ }, context);
441
+ await expect(tools['advance_slice'].execute({ sliceIndex: 99 }, context)).rejects.toThrow('slice index 99 does not exist');
442
+ });
443
+ });
@@ -1,12 +1,19 @@
1
- import { afterEach, beforeEach, describe, expect, it } from 'vitest';
1
+ /**
2
+ * Tests for the session-per-team CTO model:
3
+ * - Each CTO session ID is its own team ID (no shared repo-global state).
4
+ * - Same CTO session ID recovers the same team context across restarts.
5
+ * - A different CTO session ID in the same worktree creates an independent team.
6
+ * - Engineers spawned within a CTO session resolve to that CTO's team.
7
+ * - CTO task permissions do not allow self-delegation.
8
+ */
9
+ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
2
10
  import { mkdtemp, rm } from 'node:fs/promises';
3
11
  import { join } from 'node:path';
4
12
  import { tmpdir } from 'node:os';
5
13
  import { ClaudeManagerPlugin } from '../src/plugin/claude-manager.plugin.js';
6
- import { clearPluginServices, getActiveTeamSession } from '../src/plugin/service-factory.js';
7
- import { AGENT_CTO } from '../src/plugin/agent-hierarchy.js';
8
- import { TeamStateStore } from '../src/state/team-state-store.js';
9
- describe('CTO chat.message — persisted active team', () => {
14
+ import { clearPluginServices, getOrCreatePluginServices, getSessionTeam, getWrapperSessionMapping, registerParentSession, } from '../src/plugin/service-factory.js';
15
+ import { AGENT_CTO, ENGINEER_AGENT_IDS } from '../src/plugin/agents/index.js';
16
+ describe('CTO chat.message session-per-team model', () => {
10
17
  let tempRoot;
11
18
  beforeEach(async () => {
12
19
  tempRoot = await mkdtemp(join(tmpdir(), 'cto-team-'));
@@ -18,35 +25,175 @@ describe('CTO chat.message — persisted active team', () => {
18
25
  await rm(tempRoot, { recursive: true, force: true });
19
26
  }
20
27
  });
21
- it('persists the active team on the first CTO message', async () => {
28
+ it('CTO session ID is used directly as the team ID', async () => {
22
29
  const plugin = await ClaudeManagerPlugin({ worktree: tempRoot });
23
30
  const chatMessage = plugin['chat.message'];
24
- await chatMessage({ agent: AGENT_CTO, sessionID: 'session-cto-1' });
25
- const store = new TeamStateStore('.claude-manager');
26
- await expect(store.getActiveTeam(tempRoot)).resolves.toBe('session-cto-1');
27
- expect(getActiveTeamSession(tempRoot)).toBe('session-cto-1');
28
- });
29
- it('a new CTO session adopts the already-persisted active team instead of overwriting it', async () => {
30
- const store = new TeamStateStore('.claude-manager');
31
- // Simulate a pre-existing persisted active team (e.g., from a previous process run).
32
- await store.setActiveTeam(tempRoot, 'old-team-id');
31
+ await chatMessage({ agent: AGENT_CTO, sessionID: 'cto-session-1' });
32
+ expect(getSessionTeam('cto-session-1')).toBe('cto-session-1');
33
+ });
34
+ it('same CTO session ID recovers its team context after a process restart', async () => {
35
+ // Phase 1: CTO session 'cto-1' runs and a team record is created on disk.
36
+ const plugin1 = await ClaudeManagerPlugin({ worktree: tempRoot });
37
+ const chatMessage1 = plugin1['chat.message'];
38
+ await chatMessage1({ agent: AGENT_CTO, sessionID: 'cto-1' });
39
+ const services1 = getOrCreatePluginServices(tempRoot);
40
+ await services1.orchestrator.getOrCreateTeam(tempRoot, 'cto-1'); // persist team to disk
41
+ expect(getSessionTeam('cto-1')).toBe('cto-1');
42
+ // Phase 2: Simulate a process restart (all in-memory state is lost).
43
+ clearPluginServices();
44
+ // Phase 3: The same CTO session ID resumes — it should re-register itself.
45
+ const plugin2 = await ClaudeManagerPlugin({ worktree: tempRoot });
46
+ const chatMessage2 = plugin2['chat.message'];
47
+ await chatMessage2({ agent: AGENT_CTO, sessionID: 'cto-1' });
48
+ expect(getSessionTeam('cto-1')).toBe('cto-1');
49
+ // The persisted team record from Phase 1 should still be accessible.
50
+ const services2 = getOrCreatePluginServices(tempRoot);
51
+ const team = await services2.orchestrator.getOrCreateTeam(tempRoot, 'cto-1');
52
+ expect(team.id).toBe('cto-1');
53
+ });
54
+ it('a different CTO session ID creates an independent team, not adopting prior state', async () => {
55
+ // Phase 1: CTO session 'cto-1' runs.
56
+ const plugin1 = await ClaudeManagerPlugin({ worktree: tempRoot });
57
+ const chatMessage1 = plugin1['chat.message'];
58
+ await chatMessage1({ agent: AGENT_CTO, sessionID: 'cto-1' });
59
+ const services1 = getOrCreatePluginServices(tempRoot);
60
+ await services1.orchestrator.getOrCreateTeam(tempRoot, 'cto-1');
61
+ // Phase 2: Process restart.
62
+ clearPluginServices();
63
+ // Phase 3: A brand-new CTO session 'cto-2' starts.
64
+ const plugin2 = await ClaudeManagerPlugin({ worktree: tempRoot });
65
+ const chatMessage2 = plugin2['chat.message'];
66
+ await chatMessage2({ agent: AGENT_CTO, sessionID: 'cto-2' });
67
+ // Must use its OWN session ID as team — must NOT adopt 'cto-1'.
68
+ expect(getSessionTeam('cto-2')).toBe('cto-2');
69
+ expect(getSessionTeam('cto-1')).toBeUndefined(); // cleared by restart
70
+ // 'cto-1' team data remains on disk, untouched.
71
+ const services2 = getOrCreatePluginServices(tempRoot);
72
+ const team1 = await services2.teamStore.getTeam(tempRoot, 'cto-1');
73
+ expect(team1).not.toBeNull(); // still present
74
+ expect(team1.id).toBe('cto-1');
75
+ });
76
+ it('multiple chat.message calls from the same CTO session do not change the active team', async () => {
33
77
  const plugin = await ClaudeManagerPlugin({ worktree: tempRoot });
34
78
  const chatMessage = plugin['chat.message'];
35
- // New CTO session with a different session ID.
36
- await chatMessage({ agent: AGENT_CTO, sessionID: 'brand-new-cto-session' });
37
- // The persisted active team must NOT be overwritten.
38
- await expect(store.getActiveTeam(tempRoot)).resolves.toBe('old-team-id');
39
- // The in-memory registry must point to the persisted team, NOT the new session.
40
- expect(getActiveTeamSession(tempRoot)).toBe('old-team-id');
41
- });
42
- it('does not overwrite the persisted team across two CTO messages in the same session', async () => {
43
- const store = new TeamStateStore('.claude-manager');
79
+ await chatMessage({ agent: AGENT_CTO, sessionID: 'cto-session-1' });
80
+ await chatMessage({ agent: AGENT_CTO, sessionID: 'cto-session-1' });
81
+ await chatMessage({ agent: AGENT_CTO, sessionID: 'cto-session-1' });
82
+ expect(getSessionTeam('cto-session-1')).toBe('cto-session-1');
83
+ });
84
+ it("engineers spawned during a CTO session resolve to that CTO session's team", async () => {
85
+ const plugin = await ClaudeManagerPlugin({ worktree: tempRoot });
86
+ const chatMessage = plugin['chat.message'];
87
+ // CTO session fires first, establishing the active team.
88
+ await chatMessage({ agent: AGENT_CTO, sessionID: 'cto-A' });
89
+ // Simulate session.created event: OpenCode fires this before chat.message for the engineer.
90
+ registerParentSession('wrapper-tom-1', 'cto-A');
91
+ // Engineer wrapper session fires (spawned by the CTO).
92
+ await chatMessage({ agent: ENGINEER_AGENT_IDS.Tom, sessionID: 'wrapper-tom-1' });
93
+ // The wrapper session must be mapped to the CTO's team, not a new orphan team.
94
+ const mapping = getWrapperSessionMapping(tempRoot, 'wrapper-tom-1');
95
+ expect(mapping).toBeDefined();
96
+ expect(mapping.teamId).toBe('cto-A');
97
+ expect(mapping.workerName).toBe('Tom');
98
+ });
99
+ it('two concurrent CTO sessions each bind their own engineers independently', async () => {
100
+ const plugin = await ClaudeManagerPlugin({ worktree: tempRoot });
101
+ const chatMessage = plugin['chat.message'];
102
+ // Two CTO sessions start concurrently in the same worktree.
103
+ await chatMessage({ agent: AGENT_CTO, sessionID: 'cto-alpha' });
104
+ await chatMessage({ agent: AGENT_CTO, sessionID: 'cto-beta' });
105
+ // Simulate session.created events: each CTO spawns one engineer sub-session.
106
+ // The event hook normally calls registerParentSession; call it directly here.
107
+ registerParentSession('wrapper-tom-alpha', 'cto-alpha');
108
+ registerParentSession('wrapper-sara-beta', 'cto-beta');
109
+ // Engineer wrapper sessions check in.
110
+ await chatMessage({ agent: ENGINEER_AGENT_IDS.Tom, sessionID: 'wrapper-tom-alpha' });
111
+ await chatMessage({ agent: ENGINEER_AGENT_IDS.Sara, sessionID: 'wrapper-sara-beta' });
112
+ // Tom must bind to cto-alpha's team, not cto-beta's.
113
+ const tomMapping = getWrapperSessionMapping(tempRoot, 'wrapper-tom-alpha');
114
+ expect(tomMapping).toBeDefined();
115
+ expect(tomMapping.teamId).toBe('cto-alpha');
116
+ // Sara must bind to cto-beta's team, not cto-alpha's.
117
+ const saraMapping = getWrapperSessionMapping(tempRoot, 'wrapper-sara-beta');
118
+ expect(saraMapping).toBeDefined();
119
+ expect(saraMapping.teamId).toBe('cto-beta');
120
+ });
121
+ });
122
+ describe('CTO task permissions — self-delegation', () => {
123
+ let tempRoot;
124
+ beforeEach(async () => {
125
+ tempRoot = await mkdtemp(join(tmpdir(), 'cto-perms-'));
126
+ clearPluginServices();
127
+ });
128
+ afterEach(async () => {
129
+ clearPluginServices();
130
+ if (tempRoot)
131
+ await rm(tempRoot, { recursive: true, force: true });
132
+ });
133
+ it('CTO task permissions deny delegation to cto', async () => {
44
134
  const plugin = await ClaudeManagerPlugin({ worktree: tempRoot });
135
+ const config = {};
136
+ await plugin.config?.(config);
137
+ const agents = (config.agent ?? {});
138
+ const cto = agents[AGENT_CTO];
139
+ const taskPerms = cto.permission.task;
140
+ // Default deny must apply, and cto must not be explicitly allowed.
141
+ expect(taskPerms['*']).toBe('deny');
142
+ expect(taskPerms['cto']).toBeUndefined();
143
+ expect(taskPerms['CTO']).toBeUndefined();
144
+ });
145
+ });
146
+ describe('CTO tool isolation — concurrent sessions', () => {
147
+ let tempRoot;
148
+ beforeEach(async () => {
149
+ tempRoot = await mkdtemp(join(tmpdir(), 'cto-isolation-'));
150
+ clearPluginServices();
151
+ });
152
+ afterEach(async () => {
153
+ clearPluginServices();
154
+ if (tempRoot)
155
+ await rm(tempRoot, { recursive: true, force: true });
156
+ });
157
+ it('team_status called from CTO-A uses CTO-A team even after CTO-B has chatted', async () => {
158
+ const plugin = await ClaudeManagerPlugin({ worktree: tempRoot });
159
+ const chatMessage = plugin['chat.message'];
160
+ // CTO-A registers first, then CTO-B registers (simulating two concurrent sessions).
161
+ await chatMessage({ agent: AGENT_CTO, sessionID: 'cto-A' });
162
+ await chatMessage({ agent: AGENT_CTO, sessionID: 'cto-B' });
163
+ // Execute team_status as CTO-A (sessionID = 'cto-A').
164
+ const teamStatusTool = plugin.tool['team_status'];
165
+ const ctx = {
166
+ metadata: vi.fn(),
167
+ worktree: tempRoot,
168
+ sessionID: 'cto-A',
169
+ agent: AGENT_CTO,
170
+ abort: new AbortController().signal,
171
+ };
172
+ const result = JSON.parse(await teamStatusTool.execute({}, ctx));
173
+ // Must load cto-A's team, NOT cto-B's (last-write-wins global would give 'cto-B').
174
+ expect(result.id).toBe('cto-A');
175
+ });
176
+ it('resolves engineer team via live SDK lookup when session.created was not received', async () => {
177
+ // Simulate a client whose session.get returns parentID for the engineer session.
178
+ const mockClient = {
179
+ session: {
180
+ get: vi.fn().mockImplementation(async ({ path }) => {
181
+ if (path.id === 'wrapper-tom-live') {
182
+ return { data: { id: 'wrapper-tom-live', parentID: 'cto-live' } };
183
+ }
184
+ return { data: undefined };
185
+ }),
186
+ },
187
+ };
188
+ const plugin = await ClaudeManagerPlugin({ worktree: tempRoot, client: mockClient });
45
189
  const chatMessage = plugin['chat.message'];
46
- await chatMessage({ agent: AGENT_CTO, sessionID: 'session-cto-1' });
47
- await chatMessage({ agent: AGENT_CTO, sessionID: 'session-cto-1' });
48
- // Still the original session persisted.
49
- await expect(store.getActiveTeam(tempRoot)).resolves.toBe('session-cto-1');
50
- expect(getActiveTeamSession(tempRoot)).toBe('session-cto-1');
190
+ // CTO registers its team via chat.message.
191
+ await chatMessage({ agent: AGENT_CTO, sessionID: 'cto-live' });
192
+ // No registerParentSession call — simulates session.created arriving late or being missed.
193
+ // Engineer wrapper fires; resolveTeamId must fall through to live SDK lookup.
194
+ await chatMessage({ agent: ENGINEER_AGENT_IDS.Tom, sessionID: 'wrapper-tom-live' });
195
+ const mapping = getWrapperSessionMapping(tempRoot, 'wrapper-tom-live');
196
+ expect(mapping?.teamId).toBe('cto-live');
197
+ expect(mockClient.session.get).toHaveBeenCalledWith({ path: { id: 'wrapper-tom-live' } });
51
198
  });
52
199
  });
@@ -45,6 +45,26 @@ describe('managerPromptRegistry', () => {
45
45
  expect(managerPromptRegistry.teamPlannerPrompt).toContain('auto-select');
46
46
  expect(managerPromptRegistry.teamPlannerPrompt).toContain('engineer');
47
47
  });
48
+ it('ctoSystemPrompt includes Confirm step in operating loop before Delegate', () => {
49
+ expect(managerPromptRegistry.ctoSystemPrompt).toContain('Orient → Classify → Plan → Confirm → Delegate');
50
+ expect(managerPromptRegistry.ctoSystemPrompt).toContain('Confirm: Get user buy-in before implementing');
51
+ expect(managerPromptRegistry.ctoSystemPrompt).toContain('recommendedQuestion');
52
+ // Confirm section must appear before Delegate section in the text
53
+ const confirmIdx = managerPromptRegistry.ctoSystemPrompt.indexOf('## Confirm:');
54
+ const delegateIdx = managerPromptRegistry.ctoSystemPrompt.indexOf('## Delegate:');
55
+ expect(confirmIdx).toBeGreaterThan(-1);
56
+ expect(confirmIdx).toBeLessThan(delegateIdx);
57
+ });
58
+ it('engineerAgentPrompt instructs engineers to surface plan deviations', () => {
59
+ expect(managerPromptRegistry.engineerAgentPrompt).toContain('deviation');
60
+ expect(managerPromptRegistry.engineerAgentPrompt).toContain('surface');
61
+ });
62
+ it('browserQaAgentPrompt instructs browser-qa to report scope mismatches', () => {
63
+ expect(managerPromptRegistry.browserQaAgentPrompt).toContain('scope mismatch');
64
+ });
65
+ it('teamPlannerPrompt instructs planner to pass synthesis back unchanged', () => {
66
+ expect(managerPromptRegistry.teamPlannerPrompt).toContain('unchanged');
67
+ });
48
68
  it('ctoSystemPrompt delegates single work to named engineers via task() and dual work to team-planner', () => {
49
69
  expect(managerPromptRegistry.ctoSystemPrompt).toContain('task(subagent_type:');
50
70
  expect(managerPromptRegistry.ctoSystemPrompt).toContain('single-engineer');
@@ -66,4 +86,36 @@ describe('managerPromptRegistry', () => {
66
86
  expect(managerPromptRegistry.browserQaSessionPrompt).not.toContain('implement');
67
87
  expect(managerPromptRegistry.browserQaSessionPrompt).not.toContain('write code');
68
88
  });
89
+ it('ctoSystemPrompt encodes task size classification (trivial/simple/large)', () => {
90
+ expect(managerPromptRegistry.ctoSystemPrompt).toContain('trivial');
91
+ expect(managerPromptRegistry.ctoSystemPrompt).toContain('simple');
92
+ expect(managerPromptRegistry.ctoSystemPrompt).toContain('large');
93
+ expect(managerPromptRegistry.ctoSystemPrompt).toContain('Task size');
94
+ });
95
+ it('ctoSystemPrompt mentions confirm_plan and advance_slice for large task lifecycle', () => {
96
+ expect(managerPromptRegistry.ctoSystemPrompt).toContain('confirm_plan');
97
+ expect(managerPromptRegistry.ctoSystemPrompt).toContain('advance_slice');
98
+ expect(managerPromptRegistry.ctoSystemPrompt).toContain('preAuthorized');
99
+ expect(managerPromptRegistry.ctoSystemPrompt).toContain('vertical slice');
100
+ });
101
+ it('ctoSystemPrompt encodes warn-only context policy', () => {
102
+ expect(managerPromptRegistry.ctoSystemPrompt).toContain('Context warnings');
103
+ expect(managerPromptRegistry.ctoSystemPrompt).toContain('advisory');
104
+ expect(managerPromptRegistry.ctoSystemPrompt).toContain('contextExhausted');
105
+ });
106
+ it('contextWarnings reflect warn-only policy for critical level', () => {
107
+ expect(managerPromptRegistry.contextWarnings.critical).toContain('near capacity');
108
+ expect(managerPromptRegistry.contextWarnings.critical).toContain('Warn only');
109
+ });
110
+ it('ctoSystemPrompt uses genuinely vertical slice examples, not horizontal layers', () => {
111
+ // Horizontal layer examples (internal plumbing only) must not appear
112
+ expect(managerPromptRegistry.ctoSystemPrompt).not.toContain('"types + contracts"');
113
+ expect(managerPromptRegistry.ctoSystemPrompt).not.toContain('"core logic"');
114
+ expect(managerPromptRegistry.ctoSystemPrompt).not.toContain('"plugin tools"');
115
+ // Prompt must describe the end-to-end / user-testable property of a slice
116
+ expect(managerPromptRegistry.ctoSystemPrompt).toContain('end-to-end');
117
+ expect(managerPromptRegistry.ctoSystemPrompt).toContain('user-testable');
118
+ // Horizontal layers must be explicitly called out as wrong
119
+ expect(managerPromptRegistry.ctoSystemPrompt).toContain('Horizontal layer');
120
+ });
69
121
  });
@@ -8,7 +8,7 @@ import { mkdtemp, rm } from 'node:fs/promises';
8
8
  import { join } from 'node:path';
9
9
  import { tmpdir } from 'node:os';
10
10
  import { ClaudeManagerPlugin } from '../src/plugin/claude-manager.plugin.js';
11
- import { clearPluginServices, getActiveTeamSession, getOrCreatePluginServices, } from '../src/plugin/service-factory.js';
11
+ import { clearPluginServices, getOrCreatePluginServices, getSessionTeam, registerParentSession, } from '../src/plugin/service-factory.js';
12
12
  import { AGENT_CTO, ENGINEER_AGENT_IDS } from '../src/plugin/agent-hierarchy.js';
13
13
  import { TeamStateStore } from '../src/state/team-state-store.js';
14
14
  import { TeamOrchestrator } from '../src/manager/team-orchestrator.js';
@@ -197,23 +197,25 @@ describe('second invocation continuity', () => {
197
197
  if (tempRoot)
198
198
  await rm(tempRoot, { recursive: true, force: true });
199
199
  });
200
- it('wrapper memory is injected after clearPluginServices and a new plugin instance', async () => {
200
+ it('wrapper memory is injected after clearPluginServices and same CTO session resumes', async () => {
201
201
  // ── Phase 1: first task via orchestrator (no real SDK needed) ──────────
202
+ // Team ID = 'cto-1' (the CTO session ID that originally ran this work).
202
203
  const store = new TeamStateStore();
203
- await store.setActiveTeam(tempRoot, 'cto-1');
204
204
  const orchestrator = new TeamOrchestrator({ runTask: vi.fn() }, store, { appendEvents: vi.fn(async () => undefined) }, 'Base prompt', 'Synthesis prompt', { BrowserQA: BROWSER_QA_TEST_CAPS });
205
205
  await orchestrator.recordWrapperSession(tempRoot, 'cto-1', 'Tom', 'wrapper-tom-1');
206
206
  await orchestrator.recordWrapperExchange(tempRoot, 'cto-1', 'Tom', 'wrapper-tom-1', 'explore', 'Investigate the auth flow', 'Found two race conditions in the token refresh path.');
207
207
  // ── Phase 2: process restart ───────────────────────────────────────────
208
208
  clearPluginServices();
209
- // ── Phase 3: new plugin instance, new CTO session ──────────────────────
209
+ // ── Phase 3: same CTO session resumes, engineers run a new wrapper ──────
210
210
  const plugin2 = await ClaudeManagerPlugin({ worktree: tempRoot });
211
211
  const chatMessage2 = plugin2['chat.message'];
212
212
  const systemTransform2 = plugin2['experimental.chat.system.transform'];
213
- // New CTO session must adopt the persisted team, not create a new one.
214
- await chatMessage2({ agent: AGENT_CTO, sessionID: 'cto-2' });
215
- expect(getActiveTeamSession(tempRoot)).toBe('cto-1');
216
- // Tom's new wrapper session must be registered under the persisted team.
213
+ // Same CTO session ID resumes re-registers itself as the team.
214
+ await chatMessage2({ agent: AGENT_CTO, sessionID: 'cto-1' });
215
+ expect(getSessionTeam('cto-1')).toBe('cto-1');
216
+ // Simulate session.created: OpenCode fires this when cto-1 spawns the new wrapper.
217
+ registerParentSession('wrapper-tom-2', 'cto-1');
218
+ // Tom's new wrapper session is registered under the same team.
217
219
  await chatMessage2({ agent: ENGINEER_AGENT_IDS.Tom, sessionID: 'wrapper-tom-2' });
218
220
  // Transform fires (after chat.message has registered the session mapping).
219
221
  const output = { system: [] };
@@ -224,9 +226,8 @@ describe('second invocation continuity', () => {
224
226
  expect(output.system[0]).toContain('Found two race conditions');
225
227
  });
226
228
  it('existing engineer Claude session is resumed on second invocation', async () => {
227
- // ── Phase 1: pre-seed Tom with a claudeSessionId ───────────────────────
229
+ // ── Phase 1: pre-seed Tom with a claudeSessionId under team 'cto-1' ────
228
230
  const store = new TeamStateStore();
229
- await store.setActiveTeam(tempRoot, 'cto-1');
230
231
  const orchestrator = new TeamOrchestrator({ runTask: vi.fn() }, store, { appendEvents: vi.fn(async () => undefined) }, 'Base prompt', 'Synthesis prompt', { BrowserQA: BROWSER_QA_TEST_CAPS });
231
232
  await orchestrator.getOrCreateTeam(tempRoot, 'cto-1');
232
233
  await store.updateTeam(tempRoot, 'cto-1', (team) => ({
@@ -235,10 +236,13 @@ describe('second invocation continuity', () => {
235
236
  }));
236
237
  // ── Phase 2: process restart ───────────────────────────────────────────
237
238
  clearPluginServices();
238
- // ── Phase 3: new plugin, new CTO, engineer runs second task ───────────
239
+ // ── Phase 3: same CTO session resumes, engineer runs second task ───────
239
240
  const plugin2 = await ClaudeManagerPlugin({ worktree: tempRoot });
240
241
  const chatMessage2 = plugin2['chat.message'];
241
- await chatMessage2({ agent: AGENT_CTO, sessionID: 'cto-2' });
242
+ // Same CTO session ID — re-registers as the team so Tom can find his session.
243
+ await chatMessage2({ agent: AGENT_CTO, sessionID: 'cto-1' });
244
+ // Simulate session.created: OpenCode fires this when cto-1 spawns the new wrapper.
245
+ registerParentSession('wrapper-tom-2', 'cto-1');
242
246
  await chatMessage2({ agent: ENGINEER_AGENT_IDS.Tom, sessionID: 'wrapper-tom-2' });
243
247
  const services2 = getOrCreatePluginServices(tempRoot);
244
248
  // Mock at the session level so dispatchEngineer runs its real logic