@helmiq/crew 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (209) hide show
  1. package/defaults/personas/architect.persona.yaml +72 -0
  2. package/defaults/personas/engineer.persona.yaml +137 -0
  3. package/defaults/personas/persona-spec.schema.yaml +149 -0
  4. package/defaults/personas/reviewer.persona.yaml +47 -0
  5. package/defaults/rubrics/adr.rubric.yaml +48 -0
  6. package/defaults/rubrics/code-review.rubric.yaml +39 -0
  7. package/defaults/rubrics/pull-request.rubric.yaml +40 -0
  8. package/dist/actions/actions.test.d.ts +2 -0
  9. package/dist/actions/actions.test.d.ts.map +1 -0
  10. package/dist/actions/actions.test.js +158 -0
  11. package/dist/actions/direct-dispatcher.d.ts +10 -0
  12. package/dist/actions/direct-dispatcher.d.ts.map +1 -0
  13. package/dist/actions/direct-dispatcher.js +27 -0
  14. package/dist/actions/dispatcher.d.ts +11 -0
  15. package/dist/actions/dispatcher.d.ts.map +1 -0
  16. package/dist/actions/dispatcher.js +1 -0
  17. package/dist/actions/index.d.ts +7 -0
  18. package/dist/actions/index.d.ts.map +1 -0
  19. package/dist/actions/index.js +3 -0
  20. package/dist/actions/registry.d.ts +13 -0
  21. package/dist/actions/registry.d.ts.map +1 -0
  22. package/dist/actions/registry.js +40 -0
  23. package/dist/actions/resolver.d.ts +47 -0
  24. package/dist/actions/resolver.d.ts.map +1 -0
  25. package/dist/actions/resolver.js +43 -0
  26. package/dist/cli/cli.test.d.ts +2 -0
  27. package/dist/cli/cli.test.d.ts.map +1 -0
  28. package/dist/cli/cli.test.js +392 -0
  29. package/dist/cli/run.d.ts +45 -0
  30. package/dist/cli/run.d.ts.map +1 -0
  31. package/dist/cli/run.js +236 -0
  32. package/dist/common/errors.d.ts +76 -0
  33. package/dist/common/errors.d.ts.map +1 -0
  34. package/dist/common/errors.js +74 -0
  35. package/dist/config/config.test.d.ts +2 -0
  36. package/dist/config/config.test.d.ts.map +1 -0
  37. package/dist/config/config.test.js +691 -0
  38. package/dist/config/index.d.ts +7 -0
  39. package/dist/config/index.d.ts.map +1 -0
  40. package/dist/config/index.js +4 -0
  41. package/dist/config/loader.d.ts +16 -0
  42. package/dist/config/loader.d.ts.map +1 -0
  43. package/dist/config/loader.js +56 -0
  44. package/dist/config/model-resolver.d.ts +24 -0
  45. package/dist/config/model-resolver.d.ts.map +1 -0
  46. package/dist/config/model-resolver.js +39 -0
  47. package/dist/config/resolver.d.ts +22 -0
  48. package/dist/config/resolver.d.ts.map +1 -0
  49. package/dist/config/resolver.js +115 -0
  50. package/dist/config/schemas.d.ts +266 -0
  51. package/dist/config/schemas.d.ts.map +1 -0
  52. package/dist/config/schemas.js +115 -0
  53. package/dist/context/artifact-reader.d.ts +12 -0
  54. package/dist/context/artifact-reader.d.ts.map +1 -0
  55. package/dist/context/artifact-reader.js +92 -0
  56. package/dist/context/assembler.d.ts +22 -0
  57. package/dist/context/assembler.d.ts.map +1 -0
  58. package/dist/context/assembler.js +126 -0
  59. package/dist/context/code-reader.d.ts +14 -0
  60. package/dist/context/code-reader.d.ts.map +1 -0
  61. package/dist/context/code-reader.js +56 -0
  62. package/dist/context/context.test.d.ts +2 -0
  63. package/dist/context/context.test.d.ts.map +1 -0
  64. package/dist/context/context.test.js +260 -0
  65. package/dist/context/index.d.ts +9 -0
  66. package/dist/context/index.d.ts.map +1 -0
  67. package/dist/context/index.js +5 -0
  68. package/dist/context/section-extractor.d.ts +9 -0
  69. package/dist/context/section-extractor.d.ts.map +1 -0
  70. package/dist/context/section-extractor.js +32 -0
  71. package/dist/context/token-budget.d.ts +11 -0
  72. package/dist/context/token-budget.d.ts.map +1 -0
  73. package/dist/context/token-budget.js +22 -0
  74. package/dist/control/control.test.d.ts +2 -0
  75. package/dist/control/control.test.d.ts.map +1 -0
  76. package/dist/control/control.test.js +137 -0
  77. package/dist/control/id-generator.d.ts +12 -0
  78. package/dist/control/id-generator.d.ts.map +1 -0
  79. package/dist/control/id-generator.js +20 -0
  80. package/dist/control/index.d.ts +5 -0
  81. package/dist/control/index.d.ts.map +1 -0
  82. package/dist/control/index.js +3 -0
  83. package/dist/control/lock-manager.d.ts +13 -0
  84. package/dist/control/lock-manager.d.ts.map +1 -0
  85. package/dist/control/lock-manager.js +72 -0
  86. package/dist/control/run-state.d.ts +16 -0
  87. package/dist/control/run-state.d.ts.map +1 -0
  88. package/dist/control/run-state.js +55 -0
  89. package/dist/engine/composite.d.ts +34 -0
  90. package/dist/engine/composite.d.ts.map +1 -0
  91. package/dist/engine/composite.js +192 -0
  92. package/dist/engine/composite.test.d.ts +2 -0
  93. package/dist/engine/composite.test.d.ts.map +1 -0
  94. package/dist/engine/composite.test.js +1947 -0
  95. package/dist/engine/engine.test.d.ts +2 -0
  96. package/dist/engine/engine.test.d.ts.map +1 -0
  97. package/dist/engine/engine.test.js +334 -0
  98. package/dist/engine/index.d.ts +10 -0
  99. package/dist/engine/index.d.ts.map +1 -0
  100. package/dist/engine/index.js +5 -0
  101. package/dist/engine/llm-client.d.ts +27 -0
  102. package/dist/engine/llm-client.d.ts.map +1 -0
  103. package/dist/engine/llm-client.js +46 -0
  104. package/dist/engine/simple.d.ts +21 -0
  105. package/dist/engine/simple.d.ts.map +1 -0
  106. package/dist/engine/simple.js +59 -0
  107. package/dist/engine/tool-dispatch.d.ts +37 -0
  108. package/dist/engine/tool-dispatch.d.ts.map +1 -0
  109. package/dist/engine/tool-dispatch.js +146 -0
  110. package/dist/engine/tool-dispatch.test.d.ts +2 -0
  111. package/dist/engine/tool-dispatch.test.d.ts.map +1 -0
  112. package/dist/engine/tool-dispatch.test.js +348 -0
  113. package/dist/engine/tool-filter.d.ts +13 -0
  114. package/dist/engine/tool-filter.d.ts.map +1 -0
  115. package/dist/engine/tool-filter.js +25 -0
  116. package/dist/evaluation/evaluation.test.d.ts +2 -0
  117. package/dist/evaluation/evaluation.test.d.ts.map +1 -0
  118. package/dist/evaluation/evaluation.test.js +490 -0
  119. package/dist/evaluation/evaluator.d.ts +19 -0
  120. package/dist/evaluation/evaluator.d.ts.map +1 -0
  121. package/dist/evaluation/evaluator.js +78 -0
  122. package/dist/evaluation/index.d.ts +4 -0
  123. package/dist/evaluation/index.d.ts.map +1 -0
  124. package/dist/evaluation/index.js +2 -0
  125. package/dist/evaluation/scorer.d.ts +38 -0
  126. package/dist/evaluation/scorer.d.ts.map +1 -0
  127. package/dist/evaluation/scorer.js +94 -0
  128. package/dist/index.d.ts +47 -0
  129. package/dist/index.d.ts.map +1 -0
  130. package/dist/index.js +28 -0
  131. package/dist/providers/index.d.ts +2 -0
  132. package/dist/providers/index.d.ts.map +1 -0
  133. package/dist/providers/index.js +1 -0
  134. package/dist/providers/provider-factory.d.ts +11 -0
  135. package/dist/providers/provider-factory.d.ts.map +1 -0
  136. package/dist/providers/provider-factory.js +30 -0
  137. package/dist/publication/frontmatter.d.ts +21 -0
  138. package/dist/publication/frontmatter.d.ts.map +1 -0
  139. package/dist/publication/frontmatter.js +15 -0
  140. package/dist/publication/git-ops.d.ts +18 -0
  141. package/dist/publication/git-ops.d.ts.map +1 -0
  142. package/dist/publication/git-ops.js +74 -0
  143. package/dist/publication/index.d.ts +9 -0
  144. package/dist/publication/index.d.ts.map +1 -0
  145. package/dist/publication/index.js +5 -0
  146. package/dist/publication/provenance-writer.d.ts +27 -0
  147. package/dist/publication/provenance-writer.d.ts.map +1 -0
  148. package/dist/publication/provenance-writer.js +21 -0
  149. package/dist/publication/publication.test.d.ts +2 -0
  150. package/dist/publication/publication.test.d.ts.map +1 -0
  151. package/dist/publication/publication.test.js +235 -0
  152. package/dist/publication/publisher.d.ts +32 -0
  153. package/dist/publication/publisher.d.ts.map +1 -0
  154. package/dist/publication/publisher.js +113 -0
  155. package/dist/publication/secret-scanner.d.ts +6 -0
  156. package/dist/publication/secret-scanner.d.ts.map +1 -0
  157. package/dist/publication/secret-scanner.js +19 -0
  158. package/dist/tools/index.d.ts +4 -0
  159. package/dist/tools/index.d.ts.map +1 -0
  160. package/dist/tools/index.js +2 -0
  161. package/dist/tools/registry.d.ts +15 -0
  162. package/dist/tools/registry.d.ts.map +1 -0
  163. package/dist/tools/registry.js +288 -0
  164. package/dist/tools/registry.test.d.ts +2 -0
  165. package/dist/tools/registry.test.d.ts.map +1 -0
  166. package/dist/tools/registry.test.js +131 -0
  167. package/dist/tools/tool-groups.d.ts +20 -0
  168. package/dist/tools/tool-groups.d.ts.map +1 -0
  169. package/dist/tools/tool-groups.js +48 -0
  170. package/dist/tools/tool-groups.test.d.ts +2 -0
  171. package/dist/tools/tool-groups.test.d.ts.map +1 -0
  172. package/dist/tools/tool-groups.test.js +127 -0
  173. package/dist/types/artifact-store.d.ts +33 -0
  174. package/dist/types/artifact-store.d.ts.map +1 -0
  175. package/dist/types/artifact-store.js +9 -0
  176. package/dist/types/evaluation-rubric.d.ts +18 -0
  177. package/dist/types/evaluation-rubric.d.ts.map +1 -0
  178. package/dist/types/evaluation-rubric.js +1 -0
  179. package/dist/types/index.d.ts +10 -0
  180. package/dist/types/index.d.ts.map +1 -0
  181. package/dist/types/index.js +1 -0
  182. package/dist/types/llm-provider.d.ts +47 -0
  183. package/dist/types/llm-provider.d.ts.map +1 -0
  184. package/dist/types/llm-provider.js +8 -0
  185. package/dist/types/persona-spec.d.ts +79 -0
  186. package/dist/types/persona-spec.d.ts.map +1 -0
  187. package/dist/types/persona-spec.js +1 -0
  188. package/dist/types/project-config.d.ts +28 -0
  189. package/dist/types/project-config.d.ts.map +1 -0
  190. package/dist/types/project-config.js +1 -0
  191. package/dist/types/provenance.d.ts +67 -0
  192. package/dist/types/provenance.d.ts.map +1 -0
  193. package/dist/types/provenance.js +1 -0
  194. package/dist/types/run-state.d.ts +11 -0
  195. package/dist/types/run-state.d.ts.map +1 -0
  196. package/dist/types/run-state.js +1 -0
  197. package/dist/types/tool-runtime.d.ts +43 -0
  198. package/dist/types/tool-runtime.d.ts.map +1 -0
  199. package/dist/types/tool-runtime.js +30 -0
  200. package/dist/workspace/detect.d.ts +11 -0
  201. package/dist/workspace/detect.d.ts.map +1 -0
  202. package/dist/workspace/detect.js +28 -0
  203. package/dist/workspace/detect.test.d.ts +2 -0
  204. package/dist/workspace/detect.test.d.ts.map +1 -0
  205. package/dist/workspace/detect.test.js +53 -0
  206. package/dist/workspace/index.d.ts +2 -0
  207. package/dist/workspace/index.d.ts.map +1 -0
  208. package/dist/workspace/index.js +1 -0
  209. package/package.json +51 -0
@@ -0,0 +1,1947 @@
1
+ import { describe, it, expect, vi, beforeEach } from 'vitest';
2
+ import { readFile } from 'node:fs/promises';
3
+ import { join } from 'node:path';
4
+ import { tmpdir } from 'node:os';
5
+ import { mkdtemp } from 'node:fs/promises';
6
+ import { executeCompositeTask } from './composite.js';
7
+ const mockModel = {
8
+ alias: 'claude-sonnet',
9
+ provider: 'anthropic',
10
+ concreteModel: 'claude-sonnet-4-20250514',
11
+ apiKeyEnv: 'ANTHROPIC_API_KEY',
12
+ };
13
+ const emptyContext = {
14
+ blocks: [],
15
+ totalTokens: 0,
16
+ budgetLimit: 100_000,
17
+ gaps: [],
18
+ };
19
+ const emptyTools = {};
20
+ function makePersona() {
21
+ return {
22
+ persona: {
23
+ name: 'engineer',
24
+ identity: { role: 'Senior Software Engineer' },
25
+ skills: ['feature-implementation', 'code-review'],
26
+ perception: { per_task: {} },
27
+ tasks: {},
28
+ tools: {
29
+ permitted: ['read-artifact', 'code', 'git', 'shell'],
30
+ denied: ['write-strategy-artifacts'],
31
+ },
32
+ cadence: {},
33
+ evaluation: { rubric: 'test.rubric.yaml' },
34
+ },
35
+ };
36
+ }
37
+ function makeTask(overrides) {
38
+ return {
39
+ mode: 'composite',
40
+ trigger: ['manual'],
41
+ sub_agents: [
42
+ { name: 'planner', skill: 'feature-implementation', reads: [], produces: 'plan' },
43
+ {
44
+ name: 'implementer',
45
+ skill: 'feature-implementation',
46
+ reads: ['plan'],
47
+ produces: 'code',
48
+ tools: ['code', 'git'],
49
+ },
50
+ { name: 'reviewer', skill: 'code-review', reads: ['code', 'plan'], produces: 'review' },
51
+ ],
52
+ published_artifact: 'review',
53
+ ...overrides,
54
+ };
55
+ }
56
+ let callCount = 0;
57
+ function sequentialProvider(responses) {
58
+ callCount = 0;
59
+ return {
60
+ generateText: vi.fn().mockImplementation(() => {
61
+ const text = responses[callCount] ?? `response-${callCount}`;
62
+ callCount++;
63
+ return Promise.resolve({
64
+ text,
65
+ toolCalls: [],
66
+ tokensIn: 10,
67
+ tokensOut: 20,
68
+ });
69
+ }),
70
+ };
71
+ }
72
+ function failingProvider(failAtCall, responses) {
73
+ callCount = 0;
74
+ return {
75
+ generateText: vi.fn().mockImplementation(() => {
76
+ const idx = callCount;
77
+ callCount++;
78
+ if (idx === failAtCall) {
79
+ return Promise.reject(new Error('LLM call failed'));
80
+ }
81
+ const text = responses[idx] ?? `response-${idx}`;
82
+ return Promise.resolve({
83
+ text,
84
+ toolCalls: [],
85
+ tokensIn: 10,
86
+ tokensOut: 20,
87
+ });
88
+ }),
89
+ };
90
+ }
91
+ let tmpDir;
92
+ beforeEach(async () => {
93
+ vi.restoreAllMocks();
94
+ callCount = 0;
95
+ tmpDir = await mkdtemp(join(tmpdir(), 'crew-composite-test-'));
96
+ });
97
+ describe('T-01-004a: sequential sub-agent execution', () => {
98
+ it('executes sub-agents in order and returns final output', async () => {
99
+ const provider = sequentialProvider(['The plan', 'The code', 'The review']);
100
+ const task = makeTask();
101
+ const result = await executeCompositeTask(makePersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir);
102
+ expect(result.content).toBe('The review');
103
+ expect(result.subAgentResults).toHaveLength(3);
104
+ expect(result.subAgentResults[0].name).toBe('planner');
105
+ expect(result.subAgentResults[1].name).toBe('implementer');
106
+ expect(result.subAgentResults[2].name).toBe('reviewer');
107
+ expect(provider.generateText).toHaveBeenCalledTimes(3);
108
+ });
109
+ it('passes working state between sub-agents', async () => {
110
+ const provider = sequentialProvider(['The plan', 'The code', 'The review']);
111
+ const task = makeTask();
112
+ await executeCompositeTask(makePersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir);
113
+ const calls = provider.generateText.mock.calls;
114
+ const implementerPrompt = calls[1][0].messages[0]
115
+ .content;
116
+ expect(implementerPrompt).toContain('The plan');
117
+ const reviewerPrompt = calls[2][0].messages[0]
118
+ .content;
119
+ expect(reviewerPrompt).toContain('The code');
120
+ expect(reviewerPrompt).toContain('The plan');
121
+ });
122
+ it('accumulates token counts across sub-agents', async () => {
123
+ const provider = sequentialProvider(['a', 'b', 'c']);
124
+ const task = makeTask();
125
+ const result = await executeCompositeTask(makePersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir);
126
+ expect(result.tokensIn).toBe(30);
127
+ expect(result.tokensOut).toBe(60);
128
+ });
129
+ it('applies sub-agent tool restrictions', async () => {
130
+ const noop = vi.fn().mockResolvedValue({});
131
+ const tools = {
132
+ 'read-file': { name: 'read-file', description: 'read', parameters: {}, execute: noop },
133
+ 'write-file': { name: 'write-file', description: 'write', parameters: {}, execute: noop },
134
+ 'git-branch': { name: 'git-branch', description: 'git', parameters: {}, execute: noop },
135
+ 'git-commit': { name: 'git-commit', description: 'git', parameters: {}, execute: noop },
136
+ 'run-command': { name: 'run-command', description: 'shell', parameters: {}, execute: noop },
137
+ };
138
+ const provider = sequentialProvider(['a', 'b', 'c']);
139
+ const task = makeTask();
140
+ await executeCompositeTask(makePersona(), task, emptyContext, mockModel, provider, tools, tmpDir);
141
+ const calls = provider.generateText.mock.calls;
142
+ const plannerTools = calls[0][0].tools ?? {};
143
+ expect(Object.keys(plannerTools).sort()).toEqual(['git-branch', 'git-commit', 'read-file', 'run-command', 'write-file'].sort());
144
+ const implementerTools = calls[1][0].tools ?? {};
145
+ expect(Object.keys(implementerTools).sort()).toEqual(['git-branch', 'git-commit', 'read-file', 'write-file'].sort());
146
+ });
147
+ });
148
+ describe('T-01-004b: retry via max_iterations', () => {
149
+ it('retries a failing sub-agent up to max_iterations', async () => {
150
+ const task = makeTask({
151
+ sub_agents: [
152
+ {
153
+ name: 'flaky',
154
+ skill: 'feature-implementation',
155
+ reads: [],
156
+ produces: 'output',
157
+ max_iterations: 3,
158
+ },
159
+ ],
160
+ published_artifact: 'output',
161
+ });
162
+ let attempt = 0;
163
+ const provider = {
164
+ generateText: vi.fn().mockImplementation(() => {
165
+ attempt++;
166
+ if (attempt < 3) {
167
+ return Promise.reject(new Error(`Fail attempt ${attempt}`));
168
+ }
169
+ return Promise.resolve({
170
+ text: 'Success on third try',
171
+ toolCalls: [],
172
+ tokensIn: 10,
173
+ tokensOut: 20,
174
+ });
175
+ }),
176
+ };
177
+ const result = await executeCompositeTask(makePersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir);
178
+ expect(result.content).toBe('Success on third try');
179
+ expect(provider.generateText).toHaveBeenCalledTimes(3);
180
+ });
181
+ it('returns empty content when max_iterations exhausted', async () => {
182
+ const task = makeTask({
183
+ sub_agents: [
184
+ {
185
+ name: 'always-fails',
186
+ skill: 'feature-implementation',
187
+ reads: [],
188
+ produces: 'output',
189
+ max_iterations: 2,
190
+ },
191
+ ],
192
+ published_artifact: 'output',
193
+ });
194
+ const provider = {
195
+ generateText: vi.fn().mockRejectedValue(new Error('Always fails')),
196
+ };
197
+ const result = await executeCompositeTask(makePersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir);
198
+ expect(result.content).toBe('');
199
+ expect(provider.generateText).toHaveBeenCalledTimes(2);
200
+ });
201
+ });
202
+ describe('T-01-004c: jump-back via on_fail/gate/max_loops', () => {
203
+ it('jumps back when gate fails and retries from target', async () => {
204
+ let callIdx = 0;
205
+ const provider = {
206
+ generateText: vi
207
+ .fn()
208
+ .mockImplementation((opts) => {
209
+ callIdx++;
210
+ if (opts.system.includes('gate evaluator')) {
211
+ const pass = callIdx > 4;
212
+ return Promise.resolve({
213
+ text: JSON.stringify({ pass, reason: pass ? 'ok' : 'issues found' }),
214
+ toolCalls: [],
215
+ tokensIn: 5,
216
+ tokensOut: 5,
217
+ });
218
+ }
219
+ return Promise.resolve({
220
+ text: `output-${callIdx}`,
221
+ toolCalls: [],
222
+ tokensIn: 10,
223
+ tokensOut: 20,
224
+ });
225
+ }),
226
+ };
227
+ const task = makeTask({
228
+ sub_agents: [
229
+ { name: 'implementer', skill: 'feature-implementation', reads: [], produces: 'code' },
230
+ {
231
+ name: 'reviewer',
232
+ skill: 'code-review',
233
+ reads: ['code'],
234
+ produces: 'review',
235
+ gate: 'no blocking issues',
236
+ on_fail: 'implementer',
237
+ max_loops: 2,
238
+ },
239
+ ],
240
+ published_artifact: 'review',
241
+ });
242
+ const result = await executeCompositeTask(makePersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir);
243
+ expect(result.content).not.toBe('');
244
+ expect(result.subAgentResults.length).toBeGreaterThan(2);
245
+ });
246
+ it('fails when max_loops exhausted', async () => {
247
+ const provider = {
248
+ generateText: vi
249
+ .fn()
250
+ .mockImplementation((opts) => {
251
+ if (opts.system.includes('gate evaluator')) {
252
+ return Promise.resolve({
253
+ text: JSON.stringify({ pass: false, reason: 'always fails' }),
254
+ toolCalls: [],
255
+ tokensIn: 5,
256
+ tokensOut: 5,
257
+ });
258
+ }
259
+ return Promise.resolve({
260
+ text: 'some output',
261
+ toolCalls: [],
262
+ tokensIn: 10,
263
+ tokensOut: 20,
264
+ });
265
+ }),
266
+ };
267
+ const task = makeTask({
268
+ sub_agents: [
269
+ { name: 'implementer', skill: 'feature-implementation', reads: [], produces: 'code' },
270
+ {
271
+ name: 'reviewer',
272
+ skill: 'code-review',
273
+ reads: ['code'],
274
+ produces: 'review',
275
+ gate: 'no blocking issues',
276
+ on_fail: 'implementer',
277
+ max_loops: 1,
278
+ },
279
+ ],
280
+ published_artifact: 'review',
281
+ });
282
+ const result = await executeCompositeTask(makePersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir);
283
+ expect(result.content).toBe('');
284
+ });
285
+ });
286
+ describe('T-01-004d: checkpoint work products', () => {
287
+ it('writes each sub-agent output to runs/{run_id}/work/', async () => {
288
+ const provider = sequentialProvider(['plan content', 'code content', 'review content']);
289
+ const task = makeTask();
290
+ const result = await executeCompositeTask(makePersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir);
291
+ expect(result.checkpoints).toHaveLength(3);
292
+ for (const cp of result.checkpoints) {
293
+ const content = await readFile(cp.path, 'utf-8');
294
+ expect(content).toBeTruthy();
295
+ }
296
+ const planContent = await readFile(result.checkpoints[0].path, 'utf-8');
297
+ expect(planContent).toBe('plan content');
298
+ const codeContent = await readFile(result.checkpoints[1].path, 'utf-8');
299
+ expect(codeContent).toBe('code content');
300
+ });
301
+ it('names checkpoint files with padded step numbers', async () => {
302
+ const provider = sequentialProvider(['a', 'b', 'c']);
303
+ const task = makeTask();
304
+ const result = await executeCompositeTask(makePersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir);
305
+ expect(result.checkpoints[0].path).toContain('01-planner.md');
306
+ expect(result.checkpoints[1].path).toContain('02-implementer.md');
307
+ expect(result.checkpoints[2].path).toContain('03-reviewer.md');
308
+ });
309
+ });
310
+ describe('T-01-004e: no partial publish on failure', () => {
311
+ it('returns empty content when a sub-agent fails', async () => {
312
+ const provider = failingProvider(1, ['plan OK']);
313
+ const task = makeTask();
314
+ const result = await executeCompositeTask(makePersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir);
315
+ expect(result.content).toBe('');
316
+ expect(result.subAgentResults).toHaveLength(1);
317
+ expect(result.checkpoints).toHaveLength(1);
318
+ });
319
+ it('checkpoints only the steps that succeeded before failure', async () => {
320
+ const provider = failingProvider(2, ['plan', 'code']);
321
+ const task = makeTask();
322
+ const result = await executeCompositeTask(makePersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir);
323
+ expect(result.checkpoints).toHaveLength(2);
324
+ expect(result.content).toBe('');
325
+ });
326
+ it('still records tokens consumed before failure', async () => {
327
+ const provider = failingProvider(1, ['plan']);
328
+ const task = makeTask();
329
+ const result = await executeCompositeTask(makePersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir);
330
+ expect(result.tokensIn).toBeGreaterThan(0);
331
+ expect(result.tokensOut).toBeGreaterThan(0);
332
+ });
333
+ });
334
+ describe('T-FR-04: test-writer sub-agent', () => {
335
+ function makeImplementStoryTask() {
336
+ return {
337
+ mode: 'composite',
338
+ trigger: ['manual'],
339
+ sub_agents: [
340
+ {
341
+ name: 'implementer',
342
+ skill: 'feature-implementation',
343
+ reads: [],
344
+ produces: 'code-changes',
345
+ tools: ['code', 'git'],
346
+ },
347
+ {
348
+ name: 'test-writer',
349
+ skill: 'test-writing',
350
+ reads: ['code-changes', 'requirements'],
351
+ produces: 'test-files',
352
+ tools: ['code', 'git', 'shell'],
353
+ },
354
+ ],
355
+ published_artifact: 'test-files',
356
+ };
357
+ }
358
+ it('receives the test-writing skill prompt in its user message', async () => {
359
+ const provider = sequentialProvider(['code output', 'test output']);
360
+ const skills = {
361
+ 'feature-implementation': 'Feature implementation prompt content',
362
+ 'test-writing': 'Test writing prompt for acceptance criteria coverage',
363
+ };
364
+ await executeCompositeTask(makePersona(), makeImplementStoryTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, skills);
365
+ const calls = provider.generateText.mock.calls;
366
+ const testWriterPrompt = calls[1][0].messages[0]
367
+ .content;
368
+ expect(testWriterPrompt).toContain('Test writing prompt for acceptance criteria coverage');
369
+ expect(testWriterPrompt).not.toContain('Feature implementation prompt content');
370
+ });
371
+ it('receives code-changes from working state', async () => {
372
+ const provider = sequentialProvider(['The implemented code changes', 'The tests']);
373
+ const skills = {
374
+ 'feature-implementation': 'impl prompt',
375
+ 'test-writing': 'test prompt',
376
+ };
377
+ await executeCompositeTask(makePersona(), makeImplementStoryTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, skills);
378
+ const calls = provider.generateText.mock.calls;
379
+ const testWriterPrompt = calls[1][0].messages[0]
380
+ .content;
381
+ expect(testWriterPrompt).toContain('The implemented code changes');
382
+ });
383
+ it('includes requirements from context when present in working state', async () => {
384
+ const contextWithRequirements = {
385
+ blocks: [],
386
+ totalTokens: 0,
387
+ budgetLimit: 100_000,
388
+ gaps: [],
389
+ };
390
+ const provider = sequentialProvider(['code output', 'test output']);
391
+ const skills = {
392
+ 'feature-implementation': 'impl prompt',
393
+ 'test-writing': 'test prompt',
394
+ };
395
+ const task = makeImplementStoryTask();
396
+ task.sub_agents[0].reads = ['requirements'];
397
+ task.sub_agents[0].produces = 'code-changes';
398
+ await executeCompositeTask(makePersona(), task, contextWithRequirements, mockModel, provider, emptyTools, tmpDir, skills);
399
+ const calls = provider.generateText.mock.calls;
400
+ const testWriterPrompt = calls[1][0].messages[0]
401
+ .content;
402
+ expect(testWriterPrompt).toContain('code-changes');
403
+ });
404
+ it('restricts tools to code, git, and shell groups', async () => {
405
+ const noop = async () => ({});
406
+ const tools = {
407
+ 'read-artifact': {
408
+ name: 'read-artifact',
409
+ description: 'read',
410
+ parameters: {},
411
+ execute: noop,
412
+ },
413
+ 'read-file': { name: 'read-file', description: 'read file', parameters: {}, execute: noop },
414
+ 'write-file': {
415
+ name: 'write-file',
416
+ description: 'write file',
417
+ parameters: {},
418
+ execute: noop,
419
+ },
420
+ 'git-branch': { name: 'git-branch', description: 'branch', parameters: {}, execute: noop },
421
+ 'git-commit': { name: 'git-commit', description: 'commit', parameters: {}, execute: noop },
422
+ 'git-diff': { name: 'git-diff', description: 'diff', parameters: {}, execute: noop },
423
+ 'run-command': { name: 'run-command', description: 'shell', parameters: {}, execute: noop },
424
+ };
425
+ const provider = sequentialProvider(['code', 'tests']);
426
+ const skills = {
427
+ 'feature-implementation': 'impl prompt',
428
+ 'test-writing': 'test prompt',
429
+ };
430
+ await executeCompositeTask(makePersona(), makeImplementStoryTask(), emptyContext, mockModel, provider, tools, tmpDir, skills);
431
+ const calls = provider.generateText.mock.calls;
432
+ const testWriterCall = calls[1][0];
433
+ const toolNames = Object.keys(testWriterCall.tools ?? {}).sort();
434
+ expect(toolNames).toEqual([
435
+ 'git-branch',
436
+ 'git-commit',
437
+ 'git-diff',
438
+ 'read-file',
439
+ 'run-command',
440
+ 'write-file',
441
+ ]);
442
+ });
443
+ it('produces test-files output and checkpoints it', async () => {
444
+ const provider = sequentialProvider(['code changes', 'test file content']);
445
+ const skills = {
446
+ 'feature-implementation': 'impl prompt',
447
+ 'test-writing': 'test prompt',
448
+ };
449
+ const result = await executeCompositeTask(makePersona(), makeImplementStoryTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, skills);
450
+ expect(result.content).toBe('test file content');
451
+ expect(result.checkpoints).toHaveLength(2);
452
+ expect(result.checkpoints[1].name).toBe('test-writer');
453
+ const checkpointContent = await readFile(result.checkpoints[1].path, 'utf-8');
454
+ expect(checkpointContent).toBe('test file content');
455
+ });
456
+ it('includes Sub-task: test-files in the prompt', async () => {
457
+ const provider = sequentialProvider(['code', 'tests']);
458
+ const skills = {
459
+ 'feature-implementation': 'impl prompt',
460
+ 'test-writing': 'test prompt',
461
+ };
462
+ await executeCompositeTask(makePersona(), makeImplementStoryTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, skills);
463
+ const calls = provider.generateText.mock.calls;
464
+ const testWriterPrompt = calls[1][0].messages[0]
465
+ .content;
466
+ expect(testWriterPrompt).toContain('Sub-task: test-files');
467
+ });
468
+ });
469
+ describe('T-FR-02: planner sub-agent', () => {
470
+ function makePlannerTask() {
471
+ return {
472
+ mode: 'composite',
473
+ trigger: ['manual'],
474
+ sub_agents: [
475
+ {
476
+ name: 'planner',
477
+ skill: 'implementation-planner',
478
+ reads: ['requirements', 'design', 'codebase-structure'],
479
+ produces: 'implementation-plan',
480
+ tools: ['read-artifact'],
481
+ },
482
+ {
483
+ name: 'implementer',
484
+ skill: 'feature-implementation',
485
+ reads: ['implementation-plan'],
486
+ produces: 'code-changes',
487
+ tools: ['code', 'git'],
488
+ },
489
+ ],
490
+ published_artifact: 'code-changes',
491
+ };
492
+ }
493
+ function makePlannerPersona() {
494
+ return {
495
+ persona: {
496
+ name: 'engineer',
497
+ identity: { role: 'Senior Software Engineer' },
498
+ skills: ['feature-implementation', 'implementation-planner', 'code-review'],
499
+ perception: { per_task: {} },
500
+ tasks: {},
501
+ tools: {
502
+ permitted: ['read-artifact', 'code', 'git', 'shell'],
503
+ denied: ['write-strategy-artifacts'],
504
+ },
505
+ cadence: {},
506
+ evaluation: { rubric: 'test.rubric.yaml' },
507
+ },
508
+ };
509
+ }
510
+ const plannerSkills = {
511
+ 'implementation-planner': 'Analyze requirements and produce a structured implementation plan.',
512
+ 'feature-implementation': 'Implement features following existing patterns.',
513
+ };
514
+ it('T-FR-02-1: planner produces a plan that is checkpointed', async () => {
515
+ const planOutput = '## Implementation Plan\n\n### Files to modify\n- src/engine/composite.ts';
516
+ const provider = sequentialProvider([planOutput, 'code changes']);
517
+ const task = makePlannerTask();
518
+ const result = await executeCompositeTask(makePlannerPersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir, plannerSkills);
519
+ expect(result.subAgentResults[0].name).toBe('planner');
520
+ expect(result.checkpoints[0].name).toBe('planner');
521
+ const checkpointContent = await readFile(result.checkpoints[0].path, 'utf-8');
522
+ expect(checkpointContent).toBe(planOutput);
523
+ });
524
+ it('T-FR-02-2: planner checkpoint is named 01-planner.md', async () => {
525
+ const provider = sequentialProvider(['the plan', 'the code']);
526
+ const task = makePlannerTask();
527
+ const result = await executeCompositeTask(makePlannerPersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir, plannerSkills);
528
+ expect(result.checkpoints[0].path).toContain('01-planner.md');
529
+ });
530
+ it('T-FR-02-3: planner prompt includes implementation-planner skill content', async () => {
531
+ const provider = sequentialProvider(['the plan', 'the code']);
532
+ const task = makePlannerTask();
533
+ await executeCompositeTask(makePlannerPersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir, plannerSkills);
534
+ const calls = provider.generateText.mock.calls;
535
+ const plannerPrompt = calls[0][0].messages[0]
536
+ .content;
537
+ expect(plannerPrompt).toContain('structured implementation plan');
538
+ });
539
+ it('planner output flows to implementer via working state', async () => {
540
+ const planContent = 'Plan: modify src/engine.ts and add tests';
541
+ const provider = sequentialProvider([planContent, 'code written']);
542
+ const task = makePlannerTask();
543
+ await executeCompositeTask(makePlannerPersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir, plannerSkills);
544
+ const calls = provider.generateText.mock.calls;
545
+ const implementerPrompt = calls[1][0].messages[0]
546
+ .content;
547
+ expect(implementerPrompt).toContain(planContent);
548
+ });
549
+ it('planner receives only read-artifact tools when restricted', async () => {
550
+ const noop = vi.fn().mockResolvedValue({});
551
+ const tools = {
552
+ 'read-artifact': {
553
+ name: 'read-artifact',
554
+ description: 'read',
555
+ parameters: {},
556
+ execute: noop,
557
+ },
558
+ 'read-file': { name: 'read-file', description: 'read', parameters: {}, execute: noop },
559
+ 'write-file': { name: 'write-file', description: 'write', parameters: {}, execute: noop },
560
+ 'git-branch': { name: 'git-branch', description: 'git', parameters: {}, execute: noop },
561
+ 'run-command': { name: 'run-command', description: 'shell', parameters: {}, execute: noop },
562
+ };
563
+ const provider = sequentialProvider(['plan', 'code']);
564
+ const task = makePlannerTask();
565
+ await executeCompositeTask(makePlannerPersona(), task, emptyContext, mockModel, provider, tools, tmpDir, plannerSkills);
566
+ const calls = provider.generateText.mock.calls;
567
+ const plannerTools = calls[0][0].tools ?? {};
568
+ expect(Object.keys(plannerTools)).toEqual(['read-artifact']);
569
+ const implementerTools = calls[1][0].tools ?? {};
570
+ expect(Object.keys(implementerTools).sort()).toEqual(['git-branch', 'read-file', 'write-file'].sort());
571
+ });
572
+ it('planner uses different skill prompt than implementer', async () => {
573
+ const provider = sequentialProvider(['the plan', 'the code']);
574
+ const task = makePlannerTask();
575
+ await executeCompositeTask(makePlannerPersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir, plannerSkills);
576
+ const calls = provider.generateText.mock.calls;
577
+ const plannerPrompt = calls[0][0].messages[0]
578
+ .content;
579
+ expect(plannerPrompt).toContain('structured implementation plan');
580
+ expect(plannerPrompt).not.toContain('Implement features');
581
+ const implementerPrompt = calls[1][0].messages[0]
582
+ .content;
583
+ expect(implementerPrompt).toContain('Implement features');
584
+ expect(implementerPrompt).not.toContain('structured implementation plan');
585
+ });
586
+ it('planner receives assembled context blocks', async () => {
587
+ const contextWithBlocks = {
588
+ blocks: [
589
+ {
590
+ content: 'Epic requirements content',
591
+ source: { type: 'requirements', path: 'work/CREW-03/requirements.md' },
592
+ tokens: 250,
593
+ priority: 'required',
594
+ },
595
+ {
596
+ content: 'Design document content',
597
+ source: { type: 'design', path: 'work/CREW-03/design.md' },
598
+ tokens: 250,
599
+ priority: 'required',
600
+ },
601
+ ],
602
+ totalTokens: 500,
603
+ budgetLimit: 100_000,
604
+ gaps: [],
605
+ };
606
+ const provider = sequentialProvider(['the plan', 'the code']);
607
+ const task = makePlannerTask();
608
+ await executeCompositeTask(makePlannerPersona(), task, contextWithBlocks, mockModel, provider, emptyTools, tmpDir, plannerSkills);
609
+ const calls = provider.generateText.mock.calls;
610
+ const plannerPrompt = calls[0][0].messages[0]
611
+ .content;
612
+ expect(plannerPrompt).toContain('Epic requirements content');
613
+ expect(plannerPrompt).toContain('Design document content');
614
+ });
615
+ });
616
+ describe('T-FR-06: self-reviewer and PR author sub-agents', () => {
617
+ function makeSelfReviewTask() {
618
+ return {
619
+ mode: 'composite',
620
+ trigger: ['manual'],
621
+ sub_agents: [
622
+ {
623
+ name: 'implementer',
624
+ skill: 'feature-implementation',
625
+ reads: [],
626
+ produces: 'code-changes',
627
+ tools: ['code', 'git'],
628
+ },
629
+ {
630
+ name: 'self-reviewer',
631
+ skill: 'self-review',
632
+ reads: ['code-changes', 'design', 'standards'],
633
+ produces: 'self-review',
634
+ gate: 'no blocking issues',
635
+ on_fail: 'implementer',
636
+ max_loops: 2,
637
+ },
638
+ {
639
+ name: 'pr-author',
640
+ skill: 'pr-authoring',
641
+ reads: ['code-changes', 'requirements', 'self-review'],
642
+ produces: 'pull-request',
643
+ tools: ['git'],
644
+ },
645
+ ],
646
+ published_artifact: 'pull-request',
647
+ };
648
+ }
649
+ const selfReviewSkills = {
650
+ 'feature-implementation': 'Implement features following existing patterns.',
651
+ 'self-review': 'Review code changes against design and standards. Produce a structured verdict.',
652
+ 'pr-authoring': 'Create a GitHub PR with structured description linking to requirements.',
653
+ };
654
+ it('T-FR-06-1: self-reviewer receives the self-review skill prompt', async () => {
655
+ let callIdx = 0;
656
+ const provider = {
657
+ generateText: vi
658
+ .fn()
659
+ .mockImplementation((opts) => {
660
+ callIdx++;
661
+ if (opts.system.includes('gate evaluator')) {
662
+ return Promise.resolve({
663
+ text: JSON.stringify({ pass: true, reason: 'no issues' }),
664
+ toolCalls: [],
665
+ tokensIn: 5,
666
+ tokensOut: 5,
667
+ });
668
+ }
669
+ return Promise.resolve({
670
+ text: `output-${callIdx}`,
671
+ toolCalls: [],
672
+ tokensIn: 10,
673
+ tokensOut: 20,
674
+ });
675
+ }),
676
+ };
677
+ await executeCompositeTask(makePersona(), makeSelfReviewTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, selfReviewSkills);
678
+ const calls = provider.generateText.mock.calls;
679
+ const selfReviewerPrompt = calls[1][0].messages[0]
680
+ .content;
681
+ expect(selfReviewerPrompt).toContain('Review code changes against design and standards');
682
+ expect(selfReviewerPrompt).not.toContain('Create a GitHub PR');
683
+ });
684
+ it('T-FR-06-1: self-review output is checkpointed', async () => {
685
+ const reviewOutput = '## Self-Review Verdict\n\n**Result:** PASS\n\n### Blocking Issues\n\nNone';
686
+ let callIdx = 0;
687
+ const provider = {
688
+ generateText: vi
689
+ .fn()
690
+ .mockImplementation((opts) => {
691
+ callIdx++;
692
+ if (opts.system.includes('gate evaluator')) {
693
+ return Promise.resolve({
694
+ text: JSON.stringify({ pass: true, reason: 'no blocking issues' }),
695
+ toolCalls: [],
696
+ tokensIn: 5,
697
+ tokensOut: 5,
698
+ });
699
+ }
700
+ if (callIdx === 2) {
701
+ return Promise.resolve({
702
+ text: reviewOutput,
703
+ toolCalls: [],
704
+ tokensIn: 10,
705
+ tokensOut: 20,
706
+ });
707
+ }
708
+ return Promise.resolve({
709
+ text: `output-${callIdx}`,
710
+ toolCalls: [],
711
+ tokensIn: 10,
712
+ tokensOut: 20,
713
+ });
714
+ }),
715
+ };
716
+ const result = await executeCompositeTask(makePersona(), makeSelfReviewTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, selfReviewSkills);
717
+ const selfReviewCheckpoint = result.checkpoints.find((cp) => cp.name === 'self-reviewer');
718
+ expect(selfReviewCheckpoint).toBeDefined();
719
+ const checkpointContent = await readFile(selfReviewCheckpoint.path, 'utf-8');
720
+ expect(checkpointContent).toContain('Self-Review Verdict');
721
+ expect(checkpointContent).toContain('PASS');
722
+ });
723
+ it('T-FR-06-2: gate failure loops back to implementer', async () => {
724
+ let callIdx = 0;
725
+ const provider = {
726
+ generateText: vi
727
+ .fn()
728
+ .mockImplementation((opts) => {
729
+ callIdx++;
730
+ if (opts.system.includes('gate evaluator')) {
731
+ const pass = callIdx > 5;
732
+ return Promise.resolve({
733
+ text: JSON.stringify({
734
+ pass,
735
+ reason: pass ? 'issues resolved' : 'blocking issues found',
736
+ }),
737
+ toolCalls: [],
738
+ tokensIn: 5,
739
+ tokensOut: 5,
740
+ });
741
+ }
742
+ return Promise.resolve({
743
+ text: `output-${callIdx}`,
744
+ toolCalls: [],
745
+ tokensIn: 10,
746
+ tokensOut: 20,
747
+ });
748
+ }),
749
+ };
750
+ const result = await executeCompositeTask(makePersona(), makeSelfReviewTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, selfReviewSkills);
751
+ const implementerResults = result.subAgentResults.filter((r) => r.name === 'implementer');
752
+ expect(implementerResults.length).toBeGreaterThan(1);
753
+ const selfReviewerResults = result.subAgentResults.filter((r) => r.name === 'self-reviewer');
754
+ expect(selfReviewerResults.length).toBeGreaterThan(1);
755
+ });
756
+ it('T-FR-06-2: max_loops exhaustion returns empty content', async () => {
757
+ const provider = {
758
+ generateText: vi
759
+ .fn()
760
+ .mockImplementation((opts) => {
761
+ if (opts.system.includes('gate evaluator')) {
762
+ return Promise.resolve({
763
+ text: JSON.stringify({ pass: false, reason: 'blocking issues persist' }),
764
+ toolCalls: [],
765
+ tokensIn: 5,
766
+ tokensOut: 5,
767
+ });
768
+ }
769
+ return Promise.resolve({
770
+ text: 'some output',
771
+ toolCalls: [],
772
+ tokensIn: 10,
773
+ tokensOut: 20,
774
+ });
775
+ }),
776
+ };
777
+ const result = await executeCompositeTask(makePersona(), makeSelfReviewTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, selfReviewSkills);
778
+ expect(result.content).toBe('');
779
+ });
780
+ it('self-reviewer reads code-changes from working state', async () => {
781
+ let callIdx = 0;
782
+ const provider = {
783
+ generateText: vi
784
+ .fn()
785
+ .mockImplementation((opts) => {
786
+ callIdx++;
787
+ if (opts.system.includes('gate evaluator')) {
788
+ return Promise.resolve({
789
+ text: JSON.stringify({ pass: true, reason: 'ok' }),
790
+ toolCalls: [],
791
+ tokensIn: 5,
792
+ tokensOut: 5,
793
+ });
794
+ }
795
+ if (callIdx === 1) {
796
+ return Promise.resolve({
797
+ text: 'Added src/engine/composite.ts with gate evaluation logic',
798
+ toolCalls: [],
799
+ tokensIn: 10,
800
+ tokensOut: 20,
801
+ });
802
+ }
803
+ return Promise.resolve({
804
+ text: `output-${callIdx}`,
805
+ toolCalls: [],
806
+ tokensIn: 10,
807
+ tokensOut: 20,
808
+ });
809
+ }),
810
+ };
811
+ await executeCompositeTask(makePersona(), makeSelfReviewTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, selfReviewSkills);
812
+ const calls = provider.generateText.mock.calls;
813
+ const selfReviewerPrompt = calls[1][0].messages[0]
814
+ .content;
815
+ expect(selfReviewerPrompt).toContain('Added src/engine/composite.ts with gate evaluation logic');
816
+ });
817
+ it('self-reviewer receives all permitted tools (no sub-agent restriction)', async () => {
818
+ const noop = async () => ({});
819
+ const tools = {
820
+ 'read-artifact': {
821
+ name: 'read-artifact',
822
+ description: 'read',
823
+ parameters: {},
824
+ execute: noop,
825
+ },
826
+ 'read-file': { name: 'read-file', description: 'read file', parameters: {}, execute: noop },
827
+ 'write-file': {
828
+ name: 'write-file',
829
+ description: 'write file',
830
+ parameters: {},
831
+ execute: noop,
832
+ },
833
+ 'git-branch': { name: 'git-branch', description: 'branch', parameters: {}, execute: noop },
834
+ 'git-commit': { name: 'git-commit', description: 'commit', parameters: {}, execute: noop },
835
+ 'run-command': { name: 'run-command', description: 'shell', parameters: {}, execute: noop },
836
+ };
837
+ let callIdx = 0;
838
+ const provider = {
839
+ generateText: vi
840
+ .fn()
841
+ .mockImplementation((opts) => {
842
+ callIdx++;
843
+ if (opts.system.includes('gate evaluator')) {
844
+ return Promise.resolve({
845
+ text: JSON.stringify({ pass: true, reason: 'ok' }),
846
+ toolCalls: [],
847
+ tokensIn: 5,
848
+ tokensOut: 5,
849
+ });
850
+ }
851
+ return Promise.resolve({
852
+ text: `output-${callIdx}`,
853
+ toolCalls: [],
854
+ tokensIn: 10,
855
+ tokensOut: 20,
856
+ });
857
+ }),
858
+ };
859
+ await executeCompositeTask(makePersona(), makeSelfReviewTask(), emptyContext, mockModel, provider, tools, tmpDir, selfReviewSkills);
860
+ const calls = provider.generateText.mock.calls;
861
+ const selfReviewerCall = calls[1][0];
862
+ const toolNames = Object.keys(selfReviewerCall.tools ?? {}).sort();
863
+ expect(toolNames).toEqual([
864
+ 'git-branch',
865
+ 'git-commit',
866
+ 'read-artifact',
867
+ 'read-file',
868
+ 'run-command',
869
+ 'write-file',
870
+ ].sort());
871
+ });
872
+ it('T-FR-06-3: PR author receives pr-authoring skill prompt', async () => {
873
+ let callIdx = 0;
874
+ const provider = {
875
+ generateText: vi
876
+ .fn()
877
+ .mockImplementation((opts) => {
878
+ callIdx++;
879
+ if (opts.system.includes('gate evaluator')) {
880
+ return Promise.resolve({
881
+ text: JSON.stringify({ pass: true, reason: 'ok' }),
882
+ toolCalls: [],
883
+ tokensIn: 5,
884
+ tokensOut: 5,
885
+ });
886
+ }
887
+ return Promise.resolve({
888
+ text: `output-${callIdx}`,
889
+ toolCalls: [],
890
+ tokensIn: 10,
891
+ tokensOut: 20,
892
+ });
893
+ }),
894
+ };
895
+ await executeCompositeTask(makePersona(), makeSelfReviewTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, selfReviewSkills);
896
+ const calls = provider.generateText.mock.calls;
897
+ const prAuthorIdx = calls.findIndex((call) => {
898
+ const msgs = call[0].messages;
899
+ return msgs[0]?.content.includes('Sub-task: pull-request');
900
+ });
901
+ expect(prAuthorIdx).toBeGreaterThan(-1);
902
+ const prAuthorPrompt = calls[prAuthorIdx][0]
903
+ .messages[0].content;
904
+ expect(prAuthorPrompt).toContain('Create a GitHub PR with structured description');
905
+ });
906
+ it('T-FR-06-3: PR author reads self-review from working state', async () => {
907
+ let callIdx = 0;
908
+ const provider = {
909
+ generateText: vi
910
+ .fn()
911
+ .mockImplementation((opts) => {
912
+ callIdx++;
913
+ if (opts.system.includes('gate evaluator')) {
914
+ return Promise.resolve({
915
+ text: JSON.stringify({ pass: true, reason: 'ok' }),
916
+ toolCalls: [],
917
+ tokensIn: 5,
918
+ tokensOut: 5,
919
+ });
920
+ }
921
+ if (callIdx === 2) {
922
+ return Promise.resolve({
923
+ text: '## Self-Review Verdict\n\n**Result:** PASS\n\nNo blocking issues found.',
924
+ toolCalls: [],
925
+ tokensIn: 10,
926
+ tokensOut: 20,
927
+ });
928
+ }
929
+ return Promise.resolve({
930
+ text: `output-${callIdx}`,
931
+ toolCalls: [],
932
+ tokensIn: 10,
933
+ tokensOut: 20,
934
+ });
935
+ }),
936
+ };
937
+ await executeCompositeTask(makePersona(), makeSelfReviewTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, selfReviewSkills);
938
+ const calls = provider.generateText.mock.calls;
939
+ const prAuthorIdx = calls.findIndex((call) => {
940
+ const msgs = call[0].messages;
941
+ return msgs[0]?.content.includes('Sub-task: pull-request');
942
+ });
943
+ const prAuthorPrompt = calls[prAuthorIdx][0]
944
+ .messages[0].content;
945
+ expect(prAuthorPrompt).toContain('Self-Review Verdict');
946
+ expect(prAuthorPrompt).toContain('PASS');
947
+ });
948
+ it('T-FR-06-4: PR author output is the final composite result', async () => {
949
+ const prOutput = 'Created PR #42: feat(CREW-03-010): implement self-reviewer and PR author';
950
+ let nonGateIdx = 0;
951
+ const provider = {
952
+ generateText: vi
953
+ .fn()
954
+ .mockImplementation((opts) => {
955
+ if (opts.system.includes('gate evaluator')) {
956
+ return Promise.resolve({
957
+ text: JSON.stringify({ pass: true, reason: 'ok' }),
958
+ toolCalls: [],
959
+ tokensIn: 5,
960
+ tokensOut: 5,
961
+ });
962
+ }
963
+ nonGateIdx++;
964
+ if (nonGateIdx === 3) {
965
+ return Promise.resolve({
966
+ text: prOutput,
967
+ toolCalls: [],
968
+ tokensIn: 10,
969
+ tokensOut: 20,
970
+ });
971
+ }
972
+ return Promise.resolve({
973
+ text: `output-${nonGateIdx}`,
974
+ toolCalls: [],
975
+ tokensIn: 10,
976
+ tokensOut: 20,
977
+ });
978
+ }),
979
+ };
980
+ const result = await executeCompositeTask(makePersona(), makeSelfReviewTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, selfReviewSkills);
981
+ expect(result.content).toBe(prOutput);
982
+ expect(result.subAgentResults[result.subAgentResults.length - 1].name).toBe('pr-author');
983
+ });
984
+ it('PR author receives only git tools', async () => {
985
+ const noop = async () => ({});
986
+ const tools = {
987
+ 'read-artifact': {
988
+ name: 'read-artifact',
989
+ description: 'read',
990
+ parameters: {},
991
+ execute: noop,
992
+ },
993
+ 'read-file': { name: 'read-file', description: 'read file', parameters: {}, execute: noop },
994
+ 'write-file': {
995
+ name: 'write-file',
996
+ description: 'write file',
997
+ parameters: {},
998
+ execute: noop,
999
+ },
1000
+ 'git-branch': { name: 'git-branch', description: 'branch', parameters: {}, execute: noop },
1001
+ 'git-commit': { name: 'git-commit', description: 'commit', parameters: {}, execute: noop },
1002
+ 'git-push': { name: 'git-push', description: 'push', parameters: {}, execute: noop },
1003
+ 'run-command': { name: 'run-command', description: 'shell', parameters: {}, execute: noop },
1004
+ };
1005
+ let callIdx = 0;
1006
+ const provider = {
1007
+ generateText: vi
1008
+ .fn()
1009
+ .mockImplementation((opts) => {
1010
+ callIdx++;
1011
+ if (opts.system.includes('gate evaluator')) {
1012
+ return Promise.resolve({
1013
+ text: JSON.stringify({ pass: true, reason: 'ok' }),
1014
+ toolCalls: [],
1015
+ tokensIn: 5,
1016
+ tokensOut: 5,
1017
+ });
1018
+ }
1019
+ return Promise.resolve({
1020
+ text: `output-${callIdx}`,
1021
+ toolCalls: [],
1022
+ tokensIn: 10,
1023
+ tokensOut: 20,
1024
+ });
1025
+ }),
1026
+ };
1027
+ await executeCompositeTask(makePersona(), makeSelfReviewTask(), emptyContext, mockModel, provider, tools, tmpDir, selfReviewSkills);
1028
+ const calls = provider.generateText.mock.calls;
1029
+ const prAuthorIdx = calls.findIndex((call) => {
1030
+ const msgs = call[0].messages;
1031
+ return msgs[0]?.content.includes('Sub-task: pull-request');
1032
+ });
1033
+ const prAuthorCall = calls[prAuthorIdx][0];
1034
+ const toolNames = Object.keys(prAuthorCall.tools ?? {}).sort();
1035
+ expect(toolNames).toEqual(['git-branch', 'git-commit', 'git-push']);
1036
+ });
1037
+ it('full chain: implementer -> self-reviewer (pass) -> PR author', async () => {
1038
+ let nonGateIdx = 0;
1039
+ const provider = {
1040
+ generateText: vi
1041
+ .fn()
1042
+ .mockImplementation((opts) => {
1043
+ if (opts.system.includes('gate evaluator')) {
1044
+ return Promise.resolve({
1045
+ text: JSON.stringify({ pass: true, reason: 'no blocking issues' }),
1046
+ toolCalls: [],
1047
+ tokensIn: 5,
1048
+ tokensOut: 5,
1049
+ });
1050
+ }
1051
+ nonGateIdx++;
1052
+ const responses = {
1053
+ 1: 'Implemented code changes for CREW-03-010',
1054
+ 2: '## Self-Review Verdict\n\n**Result:** PASS\n\nNo blocking issues.',
1055
+ 3: 'Created PR #42 with structured description',
1056
+ };
1057
+ return Promise.resolve({
1058
+ text: responses[nonGateIdx] ?? `output-${nonGateIdx}`,
1059
+ toolCalls: [],
1060
+ tokensIn: 10,
1061
+ tokensOut: 20,
1062
+ });
1063
+ }),
1064
+ };
1065
+ const result = await executeCompositeTask(makePersona(), makeSelfReviewTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, selfReviewSkills);
1066
+ expect(result.subAgentResults).toHaveLength(3);
1067
+ expect(result.subAgentResults[0].name).toBe('implementer');
1068
+ expect(result.subAgentResults[1].name).toBe('self-reviewer');
1069
+ expect(result.subAgentResults[2].name).toBe('pr-author');
1070
+ expect(result.content).toBe('Created PR #42 with structured description');
1071
+ expect(result.checkpoints).toHaveLength(3);
1072
+ expect(result.checkpoints[0].name).toBe('implementer');
1073
+ expect(result.checkpoints[1].name).toBe('self-reviewer');
1074
+ expect(result.checkpoints[2].name).toBe('pr-author');
1075
+ });
1076
+ });
1077
+ describe('T-CREW-05: Architect adr-workflow composite task', () => {
1078
+ function makeAdrWorkflowTask(overrides) {
1079
+ return {
1080
+ mode: 'composite',
1081
+ trigger: ['manual'],
1082
+ sub_agents: [
1083
+ {
1084
+ name: 'planner',
1085
+ skill: 'adr-plan',
1086
+ reads: ['requirements', 'architecture', 'adr-register', 'codebase-structure'],
1087
+ produces: 'decision-plan',
1088
+ tools: ['read-artifact', 'read-code'],
1089
+ },
1090
+ {
1091
+ name: 'writer',
1092
+ skill: 'adr-write',
1093
+ reads: ['decision-plan', 'requirements', 'architecture', 'adr-register'],
1094
+ produces: 'adr-drafts',
1095
+ tools: ['read-artifact', 'write-artifact', 'read-code'],
1096
+ },
1097
+ {
1098
+ name: 'reviewer',
1099
+ skill: 'adr-review',
1100
+ reads: ['adr-drafts', 'architecture', 'adr-register'],
1101
+ produces: 'adr-review',
1102
+ tools: ['read-artifact'],
1103
+ gate: 'all ADRs pass quality checks',
1104
+ on_fail: 'writer',
1105
+ max_loops: 2,
1106
+ },
1107
+ ],
1108
+ published_artifact: 'adr-summary',
1109
+ ...overrides,
1110
+ };
1111
+ }
1112
+ function makeArchitectPersona() {
1113
+ return {
1114
+ persona: {
1115
+ name: 'architect',
1116
+ identity: { role: 'Senior Solution Architect' },
1117
+ skills: ['adr-plan', 'adr-write', 'adr-review'],
1118
+ perception: { per_task: {} },
1119
+ tasks: {},
1120
+ tools: {
1121
+ permitted: ['read-artifact', 'write-artifact', 'read-code'],
1122
+ denied: ['write-code', 'git-write', 'git-operations', 'shell'],
1123
+ },
1124
+ cadence: {},
1125
+ evaluation: { rubric: 'rubrics/adr.rubric.yaml' },
1126
+ },
1127
+ };
1128
+ }
1129
+ const adrSkills = {
1130
+ 'adr-plan': 'Identify consequential architecture decisions for this epic.',
1131
+ 'adr-write': 'Write a draft ADR following the project template with balanced options analysis.',
1132
+ 'adr-review': 'Review ADRs for completeness, balance, and consistency with existing decisions.',
1133
+ };
1134
+ function gatePassingAdrProvider(responses) {
1135
+ let subAgentIdx = 0;
1136
+ const subAgentNames = ['planner', 'writer', 'reviewer'];
1137
+ return {
1138
+ generateText: vi
1139
+ .fn()
1140
+ .mockImplementation((opts) => {
1141
+ if (opts.system.includes('gate evaluator')) {
1142
+ return Promise.resolve({
1143
+ text: JSON.stringify({ pass: true, reason: 'all ADRs pass quality checks' }),
1144
+ toolCalls: [],
1145
+ tokensIn: 5,
1146
+ tokensOut: 5,
1147
+ });
1148
+ }
1149
+ const name = subAgentNames[subAgentIdx] ?? 'unknown';
1150
+ subAgentIdx++;
1151
+ return Promise.resolve({
1152
+ text: responses[name] ?? `${name} output`,
1153
+ toolCalls: [],
1154
+ tokensIn: 80,
1155
+ tokensOut: 40,
1156
+ });
1157
+ }),
1158
+ };
1159
+ }
1160
+ it('executes all 3 sub-agents in sequence: planner -> writer -> reviewer', async () => {
1161
+ const provider = gatePassingAdrProvider({
1162
+ planner: '## Decision Plan\n1. ADR-0017: Persona-spec-driven protected paths',
1163
+ writer: '## ADR Drafts\n| ADR-0017 | Proposed |',
1164
+ reviewer: '## ADR Review\nAll ADRs pass quality checks.',
1165
+ });
1166
+ const result = await executeCompositeTask(makeArchitectPersona(), makeAdrWorkflowTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, adrSkills);
1167
+ expect(result.subAgentResults).toHaveLength(3);
1168
+ expect(result.subAgentResults[0].name).toBe('planner');
1169
+ expect(result.subAgentResults[1].name).toBe('writer');
1170
+ expect(result.subAgentResults[2].name).toBe('reviewer');
1171
+ });
1172
+ it('planner output flows to writer via working state', async () => {
1173
+ const planOutput = '## Decision Plan\n1. ADR-0017: Protected path configuration model';
1174
+ const provider = gatePassingAdrProvider({
1175
+ planner: planOutput,
1176
+ writer: 'ADR drafts written',
1177
+ reviewer: 'Review complete',
1178
+ });
1179
+ await executeCompositeTask(makeArchitectPersona(), makeAdrWorkflowTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, adrSkills);
1180
+ const calls = provider.generateText.mock.calls;
1181
+ const writerPrompt = calls[1][0].messages[0].content;
1182
+ expect(writerPrompt).toContain(planOutput);
1183
+ });
1184
+ it('writer output flows to reviewer via working state', async () => {
1185
+ const writerOutput = '## ADR Drafts\n| ADR-0017 | Protected paths | Proposed |';
1186
+ const provider = gatePassingAdrProvider({
1187
+ planner: 'Decision plan',
1188
+ writer: writerOutput,
1189
+ reviewer: 'Review pass',
1190
+ });
1191
+ await executeCompositeTask(makeArchitectPersona(), makeAdrWorkflowTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, adrSkills);
1192
+ const calls = provider.generateText.mock.calls;
1193
+ const reviewerPrompt = calls[2][0].messages[0]
1194
+ .content;
1195
+ expect(reviewerPrompt).toContain(writerOutput);
1196
+ });
1197
+ it('each sub-agent receives its own skill prompt', async () => {
1198
+ const provider = gatePassingAdrProvider({});
1199
+ await executeCompositeTask(makeArchitectPersona(), makeAdrWorkflowTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, adrSkills);
1200
+ const calls = provider.generateText.mock.calls;
1201
+ const plannerPrompt = calls[0][0].messages[0]
1202
+ .content;
1203
+ expect(plannerPrompt).toContain('consequential architecture decisions');
1204
+ expect(plannerPrompt).not.toContain('balanced options analysis');
1205
+ const writerPrompt = calls[1][0].messages[0].content;
1206
+ expect(writerPrompt).toContain('balanced options analysis');
1207
+ expect(writerPrompt).not.toContain('completeness, balance, and consistency');
1208
+ const reviewerPrompt = calls[2][0].messages[0]
1209
+ .content;
1210
+ expect(reviewerPrompt).toContain('completeness, balance, and consistency');
1211
+ });
1212
+ it('checkpoints are named correctly for all 3 sub-agents', async () => {
1213
+ const provider = gatePassingAdrProvider({});
1214
+ const result = await executeCompositeTask(makeArchitectPersona(), makeAdrWorkflowTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, adrSkills);
1215
+ expect(result.checkpoints).toHaveLength(3);
1216
+ expect(result.checkpoints[0].path).toContain('01-planner.md');
1217
+ expect(result.checkpoints[0].name).toBe('planner');
1218
+ expect(result.checkpoints[1].path).toContain('02-writer.md');
1219
+ expect(result.checkpoints[1].name).toBe('writer');
1220
+ expect(result.checkpoints[2].path).toContain('03-reviewer.md');
1221
+ expect(result.checkpoints[2].name).toBe('reviewer');
1222
+ });
1223
+ it('final output is from the reviewer sub-agent', async () => {
1224
+ const reviewOutput = '## ADR Review Summary\nAll 2 ADRs pass quality checks.';
1225
+ const provider = gatePassingAdrProvider({ reviewer: reviewOutput });
1226
+ const result = await executeCompositeTask(makeArchitectPersona(), makeAdrWorkflowTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, adrSkills);
1227
+ expect(result.content).toBe(reviewOutput);
1228
+ });
1229
+ it('applies correct tool restrictions per sub-agent', async () => {
1230
+ const noop = async () => ({});
1231
+ const tools = {
1232
+ 'read-artifact': {
1233
+ name: 'read-artifact',
1234
+ description: 'read artifact',
1235
+ parameters: {},
1236
+ execute: noop,
1237
+ },
1238
+ 'write-artifact': {
1239
+ name: 'write-artifact',
1240
+ description: 'write artifact',
1241
+ parameters: {},
1242
+ execute: noop,
1243
+ },
1244
+ 'read-file': { name: 'read-file', description: 'read', parameters: {}, execute: noop },
1245
+ 'list-directory': {
1246
+ name: 'list-directory',
1247
+ description: 'list',
1248
+ parameters: {},
1249
+ execute: noop,
1250
+ },
1251
+ 'search-codebase': {
1252
+ name: 'search-codebase',
1253
+ description: 'search',
1254
+ parameters: {},
1255
+ execute: noop,
1256
+ },
1257
+ 'write-file': { name: 'write-file', description: 'write', parameters: {}, execute: noop },
1258
+ 'git-branch': { name: 'git-branch', description: 'branch', parameters: {}, execute: noop },
1259
+ 'run-command': { name: 'run-command', description: 'shell', parameters: {}, execute: noop },
1260
+ };
1261
+ const provider = gatePassingAdrProvider({});
1262
+ await executeCompositeTask(makeArchitectPersona(), makeAdrWorkflowTask(), emptyContext, mockModel, provider, tools, tmpDir, adrSkills);
1263
+ const calls = provider.generateText.mock.calls;
1264
+ const plannerTools = Object.keys(calls[0][0].tools ?? {}).sort();
1265
+ expect(plannerTools).toEqual(['read-artifact', 'read-file', 'list-directory', 'search-codebase'].sort());
1266
+ const writerTools = Object.keys(calls[1][0].tools ?? {}).sort();
1267
+ expect(writerTools).toEqual(['read-artifact', 'write-artifact', 'read-file', 'list-directory', 'search-codebase'].sort());
1268
+ const reviewerIdx = calls.findIndex((call) => {
1269
+ const msgs = call[0].messages;
1270
+ return msgs[0]?.content.includes('Sub-task: adr-review');
1271
+ });
1272
+ const reviewerTools = Object.keys(calls[reviewerIdx][0].tools ?? {}).sort();
1273
+ expect(reviewerTools).toEqual(['read-artifact']);
1274
+ });
1275
+ it('denied tools are never provided to any sub-agent', async () => {
1276
+ const noop = async () => ({});
1277
+ const tools = {
1278
+ 'read-artifact': {
1279
+ name: 'read-artifact',
1280
+ description: 'read',
1281
+ parameters: {},
1282
+ execute: noop,
1283
+ },
1284
+ 'write-artifact': {
1285
+ name: 'write-artifact',
1286
+ description: 'write',
1287
+ parameters: {},
1288
+ execute: noop,
1289
+ },
1290
+ 'read-file': { name: 'read-file', description: 'read', parameters: {}, execute: noop },
1291
+ 'write-file': { name: 'write-file', description: 'write', parameters: {}, execute: noop },
1292
+ 'git-branch': { name: 'git-branch', description: 'git', parameters: {}, execute: noop },
1293
+ 'git-commit': { name: 'git-commit', description: 'git', parameters: {}, execute: noop },
1294
+ 'run-command': { name: 'run-command', description: 'shell', parameters: {}, execute: noop },
1295
+ };
1296
+ const provider = gatePassingAdrProvider({});
1297
+ await executeCompositeTask(makeArchitectPersona(), makeAdrWorkflowTask(), emptyContext, mockModel, provider, tools, tmpDir, adrSkills);
1298
+ const calls = provider.generateText.mock.calls;
1299
+ for (const call of calls) {
1300
+ const callTools = call[0].tools ?? {};
1301
+ const toolNames = Object.keys(callTools);
1302
+ expect(toolNames).not.toContain('write-file');
1303
+ expect(toolNames).not.toContain('git-branch');
1304
+ expect(toolNames).not.toContain('git-commit');
1305
+ expect(toolNames).not.toContain('run-command');
1306
+ }
1307
+ });
1308
+ it('reviewer gate failure loops back to writer', async () => {
1309
+ let callIdx = 0;
1310
+ const provider = {
1311
+ generateText: vi
1312
+ .fn()
1313
+ .mockImplementation((opts) => {
1314
+ callIdx++;
1315
+ if (opts.system.includes('gate evaluator')) {
1316
+ const pass = callIdx > 5;
1317
+ return Promise.resolve({
1318
+ text: JSON.stringify({
1319
+ pass,
1320
+ reason: pass ? 'all ADRs pass' : 'ADR-0017 options analysis is one-sided',
1321
+ }),
1322
+ toolCalls: [],
1323
+ tokensIn: 5,
1324
+ tokensOut: 5,
1325
+ });
1326
+ }
1327
+ return Promise.resolve({
1328
+ text: `output-${callIdx}`,
1329
+ toolCalls: [],
1330
+ tokensIn: 80,
1331
+ tokensOut: 40,
1332
+ });
1333
+ }),
1334
+ };
1335
+ const result = await executeCompositeTask(makeArchitectPersona(), makeAdrWorkflowTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, adrSkills);
1336
+ const writerResults = result.subAgentResults.filter((r) => r.name === 'writer');
1337
+ expect(writerResults.length).toBeGreaterThan(1);
1338
+ const reviewerResults = result.subAgentResults.filter((r) => r.name === 'reviewer');
1339
+ expect(reviewerResults.length).toBeGreaterThan(1);
1340
+ expect(result.content).not.toBe('');
1341
+ });
1342
+ it('max_loops exhaustion returns empty content', async () => {
1343
+ const provider = {
1344
+ generateText: vi
1345
+ .fn()
1346
+ .mockImplementation((opts) => {
1347
+ if (opts.system.includes('gate evaluator')) {
1348
+ return Promise.resolve({
1349
+ text: JSON.stringify({ pass: false, reason: 'ADR quality insufficient' }),
1350
+ toolCalls: [],
1351
+ tokensIn: 5,
1352
+ tokensOut: 5,
1353
+ });
1354
+ }
1355
+ return Promise.resolve({
1356
+ text: 'some output',
1357
+ toolCalls: [],
1358
+ tokensIn: 80,
1359
+ tokensOut: 40,
1360
+ });
1361
+ }),
1362
+ };
1363
+ const result = await executeCompositeTask(makeArchitectPersona(), makeAdrWorkflowTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, adrSkills);
1364
+ expect(result.content).toBe('');
1365
+ });
1366
+ it('assembled context blocks are available to planner', async () => {
1367
+ const contextWithBlocks = {
1368
+ blocks: [
1369
+ {
1370
+ content: 'CREW-05 requirements: automate ADR workflow',
1371
+ source: { type: 'requirements', path: 'work/CREW-05/requirements.md' },
1372
+ tokens: 200,
1373
+ priority: 'required',
1374
+ },
1375
+ {
1376
+ content: 'Solution architecture: artifact-centric delivery system',
1377
+ source: { type: 'architecture', path: 'architecture/solution.md' },
1378
+ tokens: 300,
1379
+ priority: 'required',
1380
+ },
1381
+ ],
1382
+ totalTokens: 500,
1383
+ budgetLimit: 100_000,
1384
+ gaps: [],
1385
+ };
1386
+ const provider = gatePassingAdrProvider({});
1387
+ await executeCompositeTask(makeArchitectPersona(), makeAdrWorkflowTask(), contextWithBlocks, mockModel, provider, emptyTools, tmpDir, adrSkills);
1388
+ const calls = provider.generateText.mock.calls;
1389
+ const plannerPrompt = calls[0][0].messages[0]
1390
+ .content;
1391
+ expect(plannerPrompt).toContain('CREW-05 requirements: automate ADR workflow');
1392
+ expect(plannerPrompt).toContain('artifact-centric delivery system');
1393
+ });
1394
+ it('accumulates tokens across all sub-agents', async () => {
1395
+ const provider = gatePassingAdrProvider({});
1396
+ const result = await executeCompositeTask(makeArchitectPersona(), makeAdrWorkflowTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, adrSkills);
1397
+ expect(result.tokensIn).toBe(240);
1398
+ expect(result.tokensOut).toBe(120);
1399
+ });
1400
+ });
1401
+ describe('T-FR-03: implementer sub-agent', () => {
1402
+ function makeImplementerTask() {
1403
+ return {
1404
+ mode: 'composite',
1405
+ trigger: ['manual'],
1406
+ sub_agents: [
1407
+ {
1408
+ name: 'planner',
1409
+ skill: 'implementation-planner',
1410
+ reads: ['requirements', 'design'],
1411
+ produces: 'implementation-plan',
1412
+ },
1413
+ {
1414
+ name: 'implementer',
1415
+ skill: 'implement-code',
1416
+ reads: ['implementation-plan', 'codebase'],
1417
+ produces: 'code-changes',
1418
+ tools: ['code', 'git', 'shell'],
1419
+ },
1420
+ ],
1421
+ published_artifact: 'code-changes',
1422
+ };
1423
+ }
1424
+ function makeImplementerPersona() {
1425
+ return {
1426
+ persona: {
1427
+ name: 'engineer',
1428
+ identity: { role: 'Senior Software Engineer' },
1429
+ skills: [
1430
+ 'feature-implementation',
1431
+ 'implement-code',
1432
+ 'implementation-planner',
1433
+ 'code-review',
1434
+ ],
1435
+ perception: { per_task: {} },
1436
+ tasks: {},
1437
+ tools: {
1438
+ permitted: ['read-artifact', 'code', 'git-operations', 'shell'],
1439
+ denied: ['write-strategy-artifacts'],
1440
+ },
1441
+ cadence: {},
1442
+ evaluation: { rubric: 'test.rubric.yaml' },
1443
+ },
1444
+ };
1445
+ }
1446
+ const implementerSkills = {
1447
+ 'implementation-planner': 'Analyze requirements and produce an implementation plan.',
1448
+ 'implement-code': 'You are the Implementer sub-agent. Read the plan, write code, create branch, commit.',
1449
+ };
1450
+ it('T-FR-03-1: implementer receives implement-code skill prompt', async () => {
1451
+ const provider = sequentialProvider(['the plan', 'code written']);
1452
+ await executeCompositeTask(makeImplementerPersona(), makeImplementerTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, implementerSkills);
1453
+ const calls = provider.generateText.mock.calls;
1454
+ const implementerPrompt = calls[1][0].messages[0]
1455
+ .content;
1456
+ expect(implementerPrompt).toContain('Implementer sub-agent');
1457
+ expect(implementerPrompt).not.toContain('Analyze requirements');
1458
+ });
1459
+ it('T-FR-03-2: implementer receives implementation-plan from working state', async () => {
1460
+ const planText = '## Plan\n\n1. Create src/tools/registry.ts\n2. Create branch feat/CREW-03-007';
1461
+ const provider = sequentialProvider([planText, 'code changes summary']);
1462
+ await executeCompositeTask(makeImplementerPersona(), makeImplementerTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, implementerSkills);
1463
+ const calls = provider.generateText.mock.calls;
1464
+ const implementerPrompt = calls[1][0].messages[0]
1465
+ .content;
1466
+ expect(implementerPrompt).toContain(planText);
1467
+ expect(implementerPrompt).toContain('Working State');
1468
+ });
1469
+ it('T-FR-03-3: implementer output is checkpointed as 02-implementer.md', async () => {
1470
+ const provider = sequentialProvider(['the plan', 'code changes output']);
1471
+ const result = await executeCompositeTask(makeImplementerPersona(), makeImplementerTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, implementerSkills);
1472
+ expect(result.checkpoints[1].name).toBe('implementer');
1473
+ expect(result.checkpoints[1].path).toContain('02-implementer.md');
1474
+ const content = await readFile(result.checkpoints[1].path, 'utf-8');
1475
+ expect(content).toBe('code changes output');
1476
+ });
1477
+ it('T-FR-03-4: implementer gets write-code, git, and shell tools', async () => {
1478
+ const noop = vi.fn().mockResolvedValue({});
1479
+ const tools = {
1480
+ 'read-artifact': {
1481
+ name: 'read-artifact',
1482
+ description: 'read',
1483
+ parameters: {},
1484
+ execute: noop,
1485
+ },
1486
+ 'read-file': { name: 'read-file', description: 'read file', parameters: {}, execute: noop },
1487
+ 'write-file': {
1488
+ name: 'write-file',
1489
+ description: 'write file',
1490
+ parameters: {},
1491
+ execute: noop,
1492
+ },
1493
+ 'list-directory': {
1494
+ name: 'list-directory',
1495
+ description: 'list dir',
1496
+ parameters: {},
1497
+ execute: noop,
1498
+ },
1499
+ 'search-codebase': {
1500
+ name: 'search-codebase',
1501
+ description: 'search',
1502
+ parameters: {},
1503
+ execute: noop,
1504
+ },
1505
+ 'git-branch': { name: 'git-branch', description: 'branch', parameters: {}, execute: noop },
1506
+ 'git-commit': { name: 'git-commit', description: 'commit', parameters: {}, execute: noop },
1507
+ 'git-push': { name: 'git-push', description: 'push', parameters: {}, execute: noop },
1508
+ 'git-diff': { name: 'git-diff', description: 'diff', parameters: {}, execute: noop },
1509
+ 'git-log': { name: 'git-log', description: 'log', parameters: {}, execute: noop },
1510
+ 'create-pr': { name: 'create-pr', description: 'pr', parameters: {}, execute: noop },
1511
+ 'run-command': { name: 'run-command', description: 'shell', parameters: {}, execute: noop },
1512
+ };
1513
+ const provider = sequentialProvider(['plan', 'code']);
1514
+ await executeCompositeTask(makeImplementerPersona(), makeImplementerTask(), emptyContext, mockModel, provider, tools, tmpDir, implementerSkills);
1515
+ const calls = provider.generateText.mock.calls;
1516
+ const implementerTools = calls[1][0].tools ?? {};
1517
+ const toolNames = Object.keys(implementerTools).sort();
1518
+ expect(toolNames).toEqual([
1519
+ 'create-pr',
1520
+ 'git-branch',
1521
+ 'git-commit',
1522
+ 'git-diff',
1523
+ 'git-log',
1524
+ 'git-push',
1525
+ 'list-directory',
1526
+ 'read-file',
1527
+ 'run-command',
1528
+ 'search-codebase',
1529
+ 'write-file',
1530
+ ]);
1531
+ });
1532
+ it('implementer includes Sub-task: code-changes in the prompt', async () => {
1533
+ const provider = sequentialProvider(['plan', 'code']);
1534
+ await executeCompositeTask(makeImplementerPersona(), makeImplementerTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, implementerSkills);
1535
+ const calls = provider.generateText.mock.calls;
1536
+ const implementerPrompt = calls[1][0].messages[0]
1537
+ .content;
1538
+ expect(implementerPrompt).toContain('Sub-task: code-changes');
1539
+ });
1540
+ it('implementer uses different skill than planner', async () => {
1541
+ const provider = sequentialProvider(['plan', 'code']);
1542
+ await executeCompositeTask(makeImplementerPersona(), makeImplementerTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, implementerSkills);
1543
+ const calls = provider.generateText.mock.calls;
1544
+ const plannerPrompt = calls[0][0].messages[0]
1545
+ .content;
1546
+ expect(plannerPrompt).toContain('Analyze requirements');
1547
+ expect(plannerPrompt).not.toContain('Implementer sub-agent');
1548
+ const implementerPrompt = calls[1][0].messages[0]
1549
+ .content;
1550
+ expect(implementerPrompt).toContain('Implementer sub-agent');
1551
+ expect(implementerPrompt).not.toContain('Analyze requirements');
1552
+ });
1553
+ it('implementer output flows to downstream sub-agents', async () => {
1554
+ const codeOutput = 'Branch: feat/CREW-03-007\nFiles: registry.ts, tool-groups.ts\nCommits: 2';
1555
+ const task = {
1556
+ mode: 'composite',
1557
+ trigger: ['manual'],
1558
+ sub_agents: [
1559
+ {
1560
+ name: 'planner',
1561
+ skill: 'implementation-planner',
1562
+ reads: [],
1563
+ produces: 'implementation-plan',
1564
+ },
1565
+ {
1566
+ name: 'implementer',
1567
+ skill: 'implement-code',
1568
+ reads: ['implementation-plan'],
1569
+ produces: 'code-changes',
1570
+ tools: ['code', 'git'],
1571
+ },
1572
+ { name: 'reviewer', skill: 'code-review', reads: ['code-changes'], produces: 'review' },
1573
+ ],
1574
+ published_artifact: 'review',
1575
+ };
1576
+ const skills = {
1577
+ 'implementation-planner': 'planner prompt',
1578
+ 'implement-code': 'implementer prompt',
1579
+ 'code-review': 'review prompt',
1580
+ };
1581
+ const provider = sequentialProvider(['the plan', codeOutput, 'review OK']);
1582
+ await executeCompositeTask(makeImplementerPersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir, skills);
1583
+ const calls = provider.generateText.mock.calls;
1584
+ const reviewerPrompt = calls[2][0].messages[0]
1585
+ .content;
1586
+ expect(reviewerPrompt).toContain(codeOutput);
1587
+ });
1588
+ });
1589
+ describe('T-FR-07: end-to-end implement-story composite task', () => {
1590
+ /**
1591
+ * Full 6-step sub-agent chain matching the engineer persona spec:
1592
+ * planner -> implementer -> test-writer -> quality-checker -> self-reviewer -> pr-author
1593
+ */
1594
+ function makeFullImplementStoryTask() {
1595
+ return {
1596
+ mode: 'composite',
1597
+ trigger: ['manual'],
1598
+ sub_agents: [
1599
+ {
1600
+ name: 'planner',
1601
+ skill: 'implementation-planner',
1602
+ reads: ['requirements', 'design', 'codebase-structure'],
1603
+ produces: 'implementation-plan',
1604
+ tools: ['read-artifact'],
1605
+ },
1606
+ {
1607
+ name: 'implementer',
1608
+ skill: 'implement-code',
1609
+ reads: ['implementation-plan', 'codebase'],
1610
+ produces: 'code-changes',
1611
+ tools: ['code', 'git', 'shell'],
1612
+ },
1613
+ {
1614
+ name: 'test-writer',
1615
+ skill: 'test-writing',
1616
+ reads: ['code-changes', 'requirements'],
1617
+ produces: 'test-files',
1618
+ tools: ['code', 'git', 'shell'],
1619
+ },
1620
+ {
1621
+ name: 'quality-checker',
1622
+ skill: 'quality-check',
1623
+ reads: ['code-changes', 'test-files'],
1624
+ produces: 'quality-report',
1625
+ tools: ['shell', 'code', 'git'],
1626
+ max_iterations: 3,
1627
+ },
1628
+ {
1629
+ name: 'self-reviewer',
1630
+ skill: 'self-review',
1631
+ reads: ['code-changes', 'design', 'standards'],
1632
+ produces: 'self-review',
1633
+ gate: 'no blocking issues',
1634
+ on_fail: 'implementer',
1635
+ max_loops: 2,
1636
+ },
1637
+ {
1638
+ name: 'pr-author',
1639
+ skill: 'pr-authoring',
1640
+ reads: ['code-changes', 'requirements', 'self-review'],
1641
+ produces: 'pull-request',
1642
+ tools: ['git'],
1643
+ },
1644
+ ],
1645
+ published_artifact: 'pull-request',
1646
+ quality_gate: 'human-review',
1647
+ };
1648
+ }
1649
+ function makeEngineerPersona() {
1650
+ return {
1651
+ persona: {
1652
+ name: 'engineer',
1653
+ identity: { role: 'Senior Software Engineer' },
1654
+ skills: [
1655
+ 'feature-implementation',
1656
+ 'implement-code',
1657
+ 'implementation-planner',
1658
+ 'code-review',
1659
+ 'quality-check',
1660
+ 'test-writing',
1661
+ 'self-review',
1662
+ 'pr-authoring',
1663
+ ],
1664
+ perception: { per_task: {} },
1665
+ tasks: {},
1666
+ tools: {
1667
+ permitted: ['read-artifact', 'write-artifact', 'code', 'git-operations', 'shell'],
1668
+ denied: ['write-strategy-artifacts', 'write-standards'],
1669
+ },
1670
+ cadence: {},
1671
+ evaluation: { rubric: 'rubrics/pull-request.rubric.yaml' },
1672
+ },
1673
+ };
1674
+ }
1675
+ const allSkills = {
1676
+ 'implementation-planner': 'Analyze requirements and produce an implementation plan.',
1677
+ 'implement-code': 'Write code following the plan. Create branch, commit changes.',
1678
+ 'test-writing': 'Write tests covering acceptance criteria.',
1679
+ 'quality-check': 'Run lint, typecheck, tests. Fix failures iteratively.',
1680
+ 'self-review': 'Review changes against design. Report blocking issues.',
1681
+ 'pr-authoring': 'Create a GitHub PR with structured description.',
1682
+ 'feature-implementation': 'Feature implementation skill.',
1683
+ 'code-review': 'Code review skill.',
1684
+ };
1685
+ function gatePassingProvider(responses) {
1686
+ let subAgentIdx = 0;
1687
+ const subAgentNames = [
1688
+ 'planner',
1689
+ 'implementer',
1690
+ 'test-writer',
1691
+ 'quality-checker',
1692
+ 'self-reviewer',
1693
+ 'pr-author',
1694
+ ];
1695
+ return {
1696
+ generateText: vi
1697
+ .fn()
1698
+ .mockImplementation((opts) => {
1699
+ if (opts.system.includes('gate evaluator')) {
1700
+ return Promise.resolve({
1701
+ text: JSON.stringify({ pass: true, reason: 'no blocking issues' }),
1702
+ toolCalls: [],
1703
+ tokensIn: 5,
1704
+ tokensOut: 5,
1705
+ });
1706
+ }
1707
+ const name = subAgentNames[subAgentIdx] ?? 'unknown';
1708
+ subAgentIdx++;
1709
+ return Promise.resolve({
1710
+ text: responses[name] ?? `${name} output`,
1711
+ toolCalls: [],
1712
+ tokensIn: 100,
1713
+ tokensOut: 50,
1714
+ });
1715
+ }),
1716
+ };
1717
+ }
1718
+ it('executes all 6 sub-agents in sequence', async () => {
1719
+ const provider = gatePassingProvider({
1720
+ planner: '## Plan\nModify engine/composite.ts',
1721
+ implementer: 'Created branch, wrote code, committed',
1722
+ 'test-writer': 'Added 5 test cases for composite engine',
1723
+ 'quality-checker': 'Lint: pass, Typecheck: pass, Tests: 14/14 pass',
1724
+ 'self-reviewer': '## Verdict: PASS\nNo blocking issues.',
1725
+ 'pr-author': 'Created PR #47: feat(CREW-03-011): end-to-end composite task',
1726
+ });
1727
+ const result = await executeCompositeTask(makeEngineerPersona(), makeFullImplementStoryTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, allSkills);
1728
+ expect(result.subAgentResults).toHaveLength(6);
1729
+ expect(result.subAgentResults[0].name).toBe('planner');
1730
+ expect(result.subAgentResults[1].name).toBe('implementer');
1731
+ expect(result.subAgentResults[2].name).toBe('test-writer');
1732
+ expect(result.subAgentResults[3].name).toBe('quality-checker');
1733
+ expect(result.subAgentResults[4].name).toBe('self-reviewer');
1734
+ expect(result.subAgentResults[5].name).toBe('pr-author');
1735
+ });
1736
+ it('shares working state between all sub-agents', async () => {
1737
+ const planOutput = '## Plan\n1. Modify composite.ts\n2. Add tests';
1738
+ const codeOutput = 'Branch: feat/CREW-03-011\nFiles: composite.ts\nCommits: 3';
1739
+ const testOutput = 'Added test-files covering all 5 AC items';
1740
+ const provider = gatePassingProvider({
1741
+ planner: planOutput,
1742
+ implementer: codeOutput,
1743
+ 'test-writer': testOutput,
1744
+ 'quality-checker': 'All checks pass',
1745
+ 'self-reviewer': 'PASS: no issues',
1746
+ 'pr-author': 'PR created',
1747
+ });
1748
+ await executeCompositeTask(makeEngineerPersona(), makeFullImplementStoryTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, allSkills);
1749
+ const calls = provider.generateText.mock.calls;
1750
+ const implementerPrompt = calls[1][0].messages[0]
1751
+ .content;
1752
+ expect(implementerPrompt).toContain(planOutput);
1753
+ const testWriterPrompt = calls[2][0].messages[0]
1754
+ .content;
1755
+ expect(testWriterPrompt).toContain(codeOutput);
1756
+ const qualityPrompt = calls[3][0].messages[0]
1757
+ .content;
1758
+ expect(qualityPrompt).toContain(codeOutput);
1759
+ expect(qualityPrompt).toContain(testOutput);
1760
+ });
1761
+ it('checkpoints all 6 intermediate work products', async () => {
1762
+ const provider = gatePassingProvider({});
1763
+ const result = await executeCompositeTask(makeEngineerPersona(), makeFullImplementStoryTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, allSkills);
1764
+ expect(result.checkpoints).toHaveLength(6);
1765
+ expect(result.checkpoints[0].path).toContain('01-planner.md');
1766
+ expect(result.checkpoints[1].path).toContain('02-implementer.md');
1767
+ expect(result.checkpoints[2].path).toContain('03-test-writer.md');
1768
+ expect(result.checkpoints[3].path).toContain('04-quality-checker.md');
1769
+ expect(result.checkpoints[4].path).toContain('05-self-reviewer.md');
1770
+ expect(result.checkpoints[5].path).toContain('06-pr-author.md');
1771
+ for (const cp of result.checkpoints) {
1772
+ const content = await readFile(cp.path, 'utf-8');
1773
+ expect(content).toBeTruthy();
1774
+ }
1775
+ });
1776
+ it('final output is from the PR author sub-agent', async () => {
1777
+ const prOutput = 'Created PR #47: feat(CREW-03-011): end-to-end composite task wiring';
1778
+ const provider = gatePassingProvider({
1779
+ 'pr-author': prOutput,
1780
+ });
1781
+ const result = await executeCompositeTask(makeEngineerPersona(), makeFullImplementStoryTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, allSkills);
1782
+ expect(result.content).toBe(prOutput);
1783
+ });
1784
+ it('accumulates tokens across all 6 sub-agents', async () => {
1785
+ const provider = gatePassingProvider({});
1786
+ const result = await executeCompositeTask(makeEngineerPersona(), makeFullImplementStoryTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, allSkills);
1787
+ expect(result.tokensIn).toBe(600);
1788
+ expect(result.tokensOut).toBe(300);
1789
+ });
1790
+ it('self-reviewer gate failure loops back to implementer', async () => {
1791
+ let subAgentIdx = 0;
1792
+ let gateCallCount = 0;
1793
+ const provider = {
1794
+ generateText: vi
1795
+ .fn()
1796
+ .mockImplementation((opts) => {
1797
+ if (opts.system.includes('gate evaluator')) {
1798
+ gateCallCount++;
1799
+ const pass = gateCallCount > 1;
1800
+ return Promise.resolve({
1801
+ text: JSON.stringify({ pass, reason: pass ? 'resolved' : 'blocking issue found' }),
1802
+ toolCalls: [],
1803
+ tokensIn: 5,
1804
+ tokensOut: 5,
1805
+ });
1806
+ }
1807
+ subAgentIdx++;
1808
+ return Promise.resolve({
1809
+ text: `step-${subAgentIdx}`,
1810
+ toolCalls: [],
1811
+ tokensIn: 10,
1812
+ tokensOut: 20,
1813
+ });
1814
+ }),
1815
+ };
1816
+ const result = await executeCompositeTask(makeEngineerPersona(), makeFullImplementStoryTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, allSkills);
1817
+ const names = result.subAgentResults.map((r) => r.name);
1818
+ expect(names[0]).toBe('planner');
1819
+ expect(names[1]).toBe('implementer');
1820
+ expect(names[2]).toBe('test-writer');
1821
+ expect(names[3]).toBe('quality-checker');
1822
+ expect(names[4]).toBe('self-reviewer');
1823
+ expect(names[5]).toBe('implementer');
1824
+ expect(names[names.length - 1]).toBe('pr-author');
1825
+ expect(result.content).not.toBe('');
1826
+ });
1827
+ it('provides assembled context to planner sub-agent', async () => {
1828
+ const contextWithBlocks = {
1829
+ blocks: [
1830
+ {
1831
+ content: 'Story acceptance criteria: AC1, AC2, AC3',
1832
+ source: { type: 'requirements', path: 'work/CREW-03/requirements.md' },
1833
+ tokens: 100,
1834
+ priority: 'required',
1835
+ },
1836
+ {
1837
+ content: 'Technical design: component A, module B',
1838
+ source: { type: 'design', path: 'work/CREW-03/design.md' },
1839
+ tokens: 100,
1840
+ priority: 'required',
1841
+ },
1842
+ ],
1843
+ totalTokens: 200,
1844
+ budgetLimit: 200_000,
1845
+ gaps: [],
1846
+ };
1847
+ const provider = gatePassingProvider({});
1848
+ await executeCompositeTask(makeEngineerPersona(), makeFullImplementStoryTask(), contextWithBlocks, mockModel, provider, emptyTools, tmpDir, allSkills);
1849
+ const calls = provider.generateText.mock.calls;
1850
+ const plannerPrompt = calls[0][0].messages[0]
1851
+ .content;
1852
+ expect(plannerPrompt).toContain('Story acceptance criteria');
1853
+ expect(plannerPrompt).toContain('Technical design');
1854
+ });
1855
+ it('applies correct tool restrictions per sub-agent', async () => {
1856
+ const noop = async () => ({});
1857
+ const tools = {
1858
+ 'read-artifact': {
1859
+ name: 'read-artifact',
1860
+ description: 'read',
1861
+ parameters: {},
1862
+ execute: noop,
1863
+ },
1864
+ 'read-file': { name: 'read-file', description: 'read', parameters: {}, execute: noop },
1865
+ 'write-file': { name: 'write-file', description: 'write', parameters: {}, execute: noop },
1866
+ 'list-directory': {
1867
+ name: 'list-directory',
1868
+ description: 'list',
1869
+ parameters: {},
1870
+ execute: noop,
1871
+ },
1872
+ 'search-codebase': {
1873
+ name: 'search-codebase',
1874
+ description: 'search',
1875
+ parameters: {},
1876
+ execute: noop,
1877
+ },
1878
+ 'git-branch': { name: 'git-branch', description: 'branch', parameters: {}, execute: noop },
1879
+ 'git-commit': { name: 'git-commit', description: 'commit', parameters: {}, execute: noop },
1880
+ 'git-push': { name: 'git-push', description: 'push', parameters: {}, execute: noop },
1881
+ 'git-diff': { name: 'git-diff', description: 'diff', parameters: {}, execute: noop },
1882
+ 'git-log': { name: 'git-log', description: 'log', parameters: {}, execute: noop },
1883
+ 'create-pr': { name: 'create-pr', description: 'pr', parameters: {}, execute: noop },
1884
+ 'run-command': { name: 'run-command', description: 'shell', parameters: {}, execute: noop },
1885
+ };
1886
+ const provider = gatePassingProvider({});
1887
+ await executeCompositeTask(makeEngineerPersona(), makeFullImplementStoryTask(), emptyContext, mockModel, provider, tools, tmpDir, allSkills);
1888
+ const calls = provider.generateText.mock.calls;
1889
+ const plannerTools = Object.keys(calls[0][0].tools ?? {}).sort();
1890
+ expect(plannerTools).toEqual(['read-artifact']);
1891
+ const implementerTools = Object.keys(calls[1][0].tools ?? {}).sort();
1892
+ expect(implementerTools).toEqual([
1893
+ 'create-pr',
1894
+ 'git-branch',
1895
+ 'git-commit',
1896
+ 'git-diff',
1897
+ 'git-log',
1898
+ 'git-push',
1899
+ 'list-directory',
1900
+ 'read-file',
1901
+ 'run-command',
1902
+ 'search-codebase',
1903
+ 'write-file',
1904
+ ]);
1905
+ const testWriterTools = Object.keys(calls[2][0].tools ?? {}).sort();
1906
+ expect(testWriterTools).toEqual([
1907
+ 'create-pr',
1908
+ 'git-branch',
1909
+ 'git-commit',
1910
+ 'git-diff',
1911
+ 'git-log',
1912
+ 'git-push',
1913
+ 'list-directory',
1914
+ 'read-file',
1915
+ 'run-command',
1916
+ 'search-codebase',
1917
+ 'write-file',
1918
+ ]);
1919
+ const qualityCheckerTools = Object.keys(calls[3][0].tools ?? {}).sort();
1920
+ expect(qualityCheckerTools).toEqual([
1921
+ 'create-pr',
1922
+ 'git-branch',
1923
+ 'git-commit',
1924
+ 'git-diff',
1925
+ 'git-log',
1926
+ 'git-push',
1927
+ 'list-directory',
1928
+ 'read-file',
1929
+ 'run-command',
1930
+ 'search-codebase',
1931
+ 'write-file',
1932
+ ]);
1933
+ const prAuthorIdx = calls.findIndex((call) => {
1934
+ const msgs = call[0].messages;
1935
+ return msgs[0]?.content.includes('Sub-task: pull-request');
1936
+ });
1937
+ const prAuthorTools = Object.keys(calls[prAuthorIdx][0].tools ?? {}).sort();
1938
+ expect(prAuthorTools).toEqual([
1939
+ 'create-pr',
1940
+ 'git-branch',
1941
+ 'git-commit',
1942
+ 'git-diff',
1943
+ 'git-log',
1944
+ 'git-push',
1945
+ ]);
1946
+ });
1947
+ });