workspace-maxxing 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/.agents/skills/workspace-maxxing/.workspace-templates/CONTEXT.md +44 -0
  2. package/.agents/skills/workspace-maxxing/.workspace-templates/SYSTEM.md +44 -0
  3. package/.agents/skills/workspace-maxxing/.workspace-templates/references/anti-patterns.md +16 -0
  4. package/.agents/skills/workspace-maxxing/.workspace-templates/references/iron-laws.md +26 -0
  5. package/.agents/skills/workspace-maxxing/.workspace-templates/references/reporting-format.md +52 -0
  6. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/benchmark.ts +171 -0
  7. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/dispatch.ts +473 -0
  8. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/generate-tests.ts +158 -0
  9. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/install-tool.ts +82 -0
  10. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/iterate.ts +265 -0
  11. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/orchestrator.ts +539 -0
  12. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/scaffold.ts +282 -0
  13. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/validate.ts +452 -0
  14. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/architecture/SKILL.md +95 -0
  15. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/fixer/SKILL.md +109 -0
  16. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/iteration/SKILL.md +89 -0
  17. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
  18. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/research/SKILL.md +94 -0
  19. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/testing/SKILL.md +89 -0
  20. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/tooling/SKILL.md +87 -0
  21. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/validation/SKILL.md +103 -0
  22. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/worker/SKILL.md +79 -0
  23. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
  24. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
  25. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
  26. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
  27. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
  28. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/README.md +14 -0
  29. package/.agents/skills/workspace-maxxing/SKILL.md +312 -0
  30. package/.agents/skills/workspace-maxxing/scripts/benchmark.ts +171 -0
  31. package/.agents/skills/workspace-maxxing/scripts/dispatch.ts +473 -0
  32. package/.agents/skills/workspace-maxxing/scripts/generate-tests.ts +158 -0
  33. package/.agents/skills/workspace-maxxing/scripts/install-tool.ts +82 -0
  34. package/.agents/skills/workspace-maxxing/scripts/iterate.ts +265 -0
  35. package/.agents/skills/workspace-maxxing/scripts/orchestrator.ts +539 -0
  36. package/.agents/skills/workspace-maxxing/scripts/scaffold.ts +282 -0
  37. package/.agents/skills/workspace-maxxing/scripts/validate.ts +452 -0
  38. package/README.md +144 -0
  39. package/dist/agent-creator.d.ts +9 -0
  40. package/dist/agent-creator.d.ts.map +1 -0
  41. package/dist/agent-creator.js +199 -0
  42. package/dist/agent-creator.js.map +1 -0
  43. package/dist/agent-iterator.d.ts +38 -0
  44. package/dist/agent-iterator.d.ts.map +1 -0
  45. package/dist/agent-iterator.js +327 -0
  46. package/dist/agent-iterator.js.map +1 -0
  47. package/dist/index.d.ts +3 -0
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +197 -0
  50. package/dist/index.js.map +1 -0
  51. package/dist/install.d.ts +18 -0
  52. package/dist/install.d.ts.map +1 -0
  53. package/dist/install.js +117 -0
  54. package/dist/install.js.map +1 -0
  55. package/dist/platforms/claude.d.ts +7 -0
  56. package/dist/platforms/claude.d.ts.map +1 -0
  57. package/dist/platforms/claude.js +70 -0
  58. package/dist/platforms/claude.js.map +1 -0
  59. package/dist/platforms/copilot.d.ts +7 -0
  60. package/dist/platforms/copilot.d.ts.map +1 -0
  61. package/dist/platforms/copilot.js +75 -0
  62. package/dist/platforms/copilot.js.map +1 -0
  63. package/dist/platforms/gemini.d.ts +7 -0
  64. package/dist/platforms/gemini.d.ts.map +1 -0
  65. package/dist/platforms/gemini.js +81 -0
  66. package/dist/platforms/gemini.js.map +1 -0
  67. package/dist/platforms/index.d.ts +8 -0
  68. package/dist/platforms/index.d.ts.map +1 -0
  69. package/dist/platforms/index.js +41 -0
  70. package/dist/platforms/index.js.map +1 -0
  71. package/dist/platforms/opencode.d.ts +7 -0
  72. package/dist/platforms/opencode.d.ts.map +1 -0
  73. package/dist/platforms/opencode.js +70 -0
  74. package/dist/platforms/opencode.js.map +1 -0
  75. package/dist/scripts/benchmark.d.ts +20 -0
  76. package/dist/scripts/benchmark.d.ts.map +1 -0
  77. package/dist/scripts/benchmark.js +170 -0
  78. package/dist/scripts/benchmark.js.map +1 -0
  79. package/dist/scripts/dispatch.d.ts +32 -0
  80. package/dist/scripts/dispatch.d.ts.map +1 -0
  81. package/dist/scripts/dispatch.js +386 -0
  82. package/dist/scripts/dispatch.js.map +1 -0
  83. package/dist/scripts/generate-tests.d.ts +11 -0
  84. package/dist/scripts/generate-tests.d.ts.map +1 -0
  85. package/dist/scripts/generate-tests.js +118 -0
  86. package/dist/scripts/generate-tests.js.map +1 -0
  87. package/dist/scripts/install-tool.d.ts +8 -0
  88. package/dist/scripts/install-tool.d.ts.map +1 -0
  89. package/dist/scripts/install-tool.js +98 -0
  90. package/dist/scripts/install-tool.js.map +1 -0
  91. package/dist/scripts/iterate.d.ts +44 -0
  92. package/dist/scripts/iterate.d.ts.map +1 -0
  93. package/dist/scripts/iterate.js +260 -0
  94. package/dist/scripts/iterate.js.map +1 -0
  95. package/dist/scripts/orchestrator.d.ts +40 -0
  96. package/dist/scripts/orchestrator.d.ts.map +1 -0
  97. package/dist/scripts/orchestrator.js +378 -0
  98. package/dist/scripts/orchestrator.js.map +1 -0
  99. package/dist/scripts/scaffold.d.ts +8 -0
  100. package/dist/scripts/scaffold.d.ts.map +1 -0
  101. package/dist/scripts/scaffold.js +279 -0
  102. package/dist/scripts/scaffold.js.map +1 -0
  103. package/dist/scripts/validate.d.ts +11 -0
  104. package/dist/scripts/validate.d.ts.map +1 -0
  105. package/dist/scripts/validate.js +472 -0
  106. package/dist/scripts/validate.js.map +1 -0
  107. package/docs/superpowers/plans/2026-04-07-autonomous-iteration-plan.md +1123 -0
  108. package/docs/superpowers/plans/2026-04-07-autonomous-iteration-sub-agent-batches.md +1923 -0
  109. package/docs/superpowers/plans/2026-04-07-autonomous-workflow-sub-skill-plan.md +1505 -0
  110. package/docs/superpowers/plans/2026-04-07-benchmarking-multi-agent-plan.md +854 -0
  111. package/docs/superpowers/plans/2026-04-07-workspace-builder-logic-plan.md +1426 -0
  112. package/docs/superpowers/plans/2026-04-07-workspace-maxxing-plan.md +1299 -0
  113. package/docs/superpowers/plans/2026-04-08-session-294c-subagent-invocation-plan.md +320 -0
  114. package/docs/superpowers/plans/2026-04-08-workflow-prompt-hardening-plan.md +1025 -0
  115. package/docs/superpowers/plans/2026-04-12-workspace-agent-creation-plan.md +992 -0
  116. package/docs/superpowers/specs/2026-04-07-autonomous-iteration-design.md +214 -0
  117. package/docs/superpowers/specs/2026-04-07-autonomous-iteration-sub-agent-batches-design.md +188 -0
  118. package/docs/superpowers/specs/2026-04-07-autonomous-workflow-sub-skill-design.md +137 -0
  119. package/docs/superpowers/specs/2026-04-07-benchmarking-multi-agent-design.md +105 -0
  120. package/docs/superpowers/specs/2026-04-07-workspace-builder-logic-design.md +179 -0
  121. package/docs/superpowers/specs/2026-04-07-workspace-maxxing-design.md +227 -0
  122. package/docs/superpowers/specs/2026-04-08-session-294c-subagent-invocation-design.md +265 -0
  123. package/docs/superpowers/specs/2026-04-08-workflow-prompt-hardening-design.md +146 -0
  124. package/docs/superpowers/specs/2026-04-12-workspace-agent-creation-design.md +239 -0
  125. package/jest.config.js +8 -0
  126. package/package.json +32 -0
  127. package/src/agent-creator.ts +180 -0
  128. package/src/agent-iterator.ts +397 -0
  129. package/src/index.ts +189 -0
  130. package/src/install.ts +105 -0
  131. package/src/platforms/claude.ts +40 -0
  132. package/src/platforms/copilot.ts +50 -0
  133. package/src/platforms/gemini.ts +55 -0
  134. package/src/platforms/index.ts +45 -0
  135. package/src/platforms/opencode.ts +41 -0
  136. package/src/scripts/benchmark.ts +171 -0
  137. package/src/scripts/dispatch.ts +473 -0
  138. package/src/scripts/generate-tests.ts +112 -0
  139. package/src/scripts/install-tool.ts +82 -0
  140. package/src/scripts/iterate.ts +271 -0
  141. package/src/scripts/orchestrator.ts +539 -0
  142. package/src/scripts/scaffold.ts +282 -0
  143. package/src/scripts/validate.ts +516 -0
  144. package/templates/.workspace-templates/CONTEXT.md +44 -0
  145. package/templates/.workspace-templates/SYSTEM.md +44 -0
  146. package/templates/.workspace-templates/references/anti-patterns.md +16 -0
  147. package/templates/.workspace-templates/references/iron-laws.md +26 -0
  148. package/templates/.workspace-templates/references/reporting-format.md +52 -0
  149. package/templates/.workspace-templates/scripts/benchmark.ts +171 -0
  150. package/templates/.workspace-templates/scripts/dispatch.ts +473 -0
  151. package/templates/.workspace-templates/scripts/generate-tests.ts +158 -0
  152. package/templates/.workspace-templates/scripts/install-tool.ts +82 -0
  153. package/templates/.workspace-templates/scripts/iterate.ts +265 -0
  154. package/templates/.workspace-templates/scripts/orchestrator.ts +539 -0
  155. package/templates/.workspace-templates/scripts/scaffold.ts +282 -0
  156. package/templates/.workspace-templates/scripts/validate.ts +452 -0
  157. package/templates/.workspace-templates/skills/architecture/SKILL.md +95 -0
  158. package/templates/.workspace-templates/skills/fixer/SKILL.md +109 -0
  159. package/templates/.workspace-templates/skills/iteration/SKILL.md +89 -0
  160. package/templates/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
  161. package/templates/.workspace-templates/skills/research/SKILL.md +94 -0
  162. package/templates/.workspace-templates/skills/testing/SKILL.md +89 -0
  163. package/templates/.workspace-templates/skills/tooling/SKILL.md +87 -0
  164. package/templates/.workspace-templates/skills/validation/SKILL.md +103 -0
  165. package/templates/.workspace-templates/skills/worker/SKILL.md +79 -0
  166. package/templates/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
  167. package/templates/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
  168. package/templates/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
  169. package/templates/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
  170. package/templates/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
  171. package/templates/.workspace-templates/workspace/README.md +14 -0
  172. package/templates/SKILL.md +347 -0
  173. package/tests/benchmark.test.ts +158 -0
  174. package/tests/cli.test.ts +109 -0
  175. package/tests/dispatch-parallel.test.ts +124 -0
  176. package/tests/dispatch.test.ts +218 -0
  177. package/tests/fixer-skill.test.ts +203 -0
  178. package/tests/generate-tests.test.ts +101 -0
  179. package/tests/install-tool.test.ts +141 -0
  180. package/tests/install.test.ts +144 -0
  181. package/tests/integration.test.ts +324 -0
  182. package/tests/iterate.test.ts +219 -0
  183. package/tests/orchestrator.test.ts +710 -0
  184. package/tests/scaffold.test.ts +238 -0
  185. package/tests/templates-enhanced.test.ts +208 -0
  186. package/tests/templates.test.ts +219 -0
  187. package/tests/validate.test.ts +421 -0
  188. package/tests/validation-enhanced.test.ts +303 -0
  189. package/tests/worker-skill.test.ts +88 -0
  190. package/tsconfig.json +19 -0
  191. package/workspace/00-meta/CONTEXT.md +3 -0
  192. package/workspace/00-meta/execution-log.md +17 -0
  193. package/workspace/00-meta/tools.md +11 -0
  194. package/workspace/01-input/CONTEXT.md +27 -0
  195. package/workspace/CONTEXT.md +35 -0
  196. package/workspace/README.md +14 -0
  197. package/workspace/SYSTEM.md +36 -0
  198. package/workspace-maxxing-0.1.0.tgz +0 -0
@@ -0,0 +1,710 @@
1
+ import * as fs from 'fs';
2
+ import * as path from 'path';
3
+ import * as os from 'os';
4
+ import { splitIntoBatches, createBatchDirectory, getBatchDirectory, createTestCaseDirectory, runBatchLifecycle } from '../src/scripts/orchestrator';
5
+ import * as dispatch from '../src/scripts/dispatch';
6
+ import * as generateTests from '../src/scripts/generate-tests';
7
+ import * as benchmark from '../src/scripts/benchmark';
8
+
9
+ describe('orchestrator', () => {
10
+ let tempDir: string;
11
+
12
+ beforeEach(() => {
13
+ tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'orchestrator-test-'));
14
+ });
15
+
16
+ afterEach(() => {
17
+ fs.rmSync(tempDir, { recursive: true, force: true });
18
+ jest.restoreAllMocks();
19
+ });
20
+
21
+ describe('splitIntoBatches', () => {
22
+ it('splits items into batches of specified size', () => {
23
+ const items = ['tc-001', 'tc-002', 'tc-003', 'tc-004', 'tc-005'];
24
+ const result = splitIntoBatches(items, 3);
25
+
26
+ expect(result).toHaveLength(2);
27
+ expect(result[0]).toEqual(['tc-001', 'tc-002', 'tc-003']);
28
+ expect(result[1]).toEqual(['tc-004', 'tc-005']);
29
+ });
30
+
31
+ it('returns single batch when items fit', () => {
32
+ const items = ['tc-001', 'tc-002'];
33
+ const result = splitIntoBatches(items, 3);
34
+
35
+ expect(result).toHaveLength(1);
36
+ expect(result[0]).toEqual(['tc-001', 'tc-002']);
37
+ });
38
+
39
+ it('returns empty array for empty input', () => {
40
+ const result = splitIntoBatches([], 3);
41
+ expect(result).toEqual([]);
42
+ });
43
+
44
+ it('uses default batch size of 3 when not specified', () => {
45
+ const items = ['a', 'b', 'c', 'd', 'e', 'f', 'g'];
46
+ const result = splitIntoBatches(items);
47
+
48
+ expect(result).toHaveLength(3);
49
+ expect(result[0]).toHaveLength(3);
50
+ expect(result[1]).toHaveLength(3);
51
+ expect(result[2]).toHaveLength(1);
52
+ });
53
+
54
+ it('throws for invalid batch sizes to prevent non-terminating loops', () => {
55
+ expect(() => splitIntoBatches(['tc-001'], 0)).toThrow(/Invalid batchSize/);
56
+ expect(() => splitIntoBatches(['tc-001'], -1)).toThrow(/Invalid batchSize/);
57
+ expect(() => splitIntoBatches(['tc-001'], 1.5)).toThrow(/Invalid batchSize/);
58
+ expect(() => splitIntoBatches(['tc-001'], Number.NaN)).toThrow(/Invalid batchSize/);
59
+ });
60
+ });
61
+
62
+ describe('batch directory management', () => {
63
+ it('creates batch directory structure', () => {
64
+ const baseDir = path.join(tempDir, '.agents', 'iteration');
65
+ const result = createBatchDirectory(baseDir, 1);
66
+
67
+ expect(fs.existsSync(result)).toBe(true);
68
+ expect(result).toContain('batch-01');
69
+ });
70
+
71
+ it('returns existing batch directory path', () => {
72
+ const baseDir = path.join(tempDir, '.agents', 'iteration');
73
+ fs.mkdirSync(path.join(baseDir, 'batch-02'), { recursive: true });
74
+
75
+ const result = getBatchDirectory(baseDir, 2);
76
+ expect(result).toContain('batch-02');
77
+ });
78
+
79
+ it('creates test case directory within batch', () => {
80
+ const baseDir = path.join(tempDir, '.agents', 'iteration');
81
+ const batchDir = createBatchDirectory(baseDir, 1);
82
+ const tcDir = createTestCaseDirectory(batchDir, 'tc-001');
83
+
84
+ expect(fs.existsSync(tcDir)).toBe(true);
85
+ expect(tcDir).toContain('tc-001');
86
+ });
87
+ });
88
+
89
+ describe('batch lifecycle', () => {
90
+ it('rejects invalid numeric config values before running lifecycle', () => {
91
+ expect(() => runBatchLifecycle(tempDir, { batchSize: 0 })).toThrow(/Invalid batchSize/);
92
+ expect(() => runBatchLifecycle(tempDir, { maxFixRetries: -1 })).toThrow(/Invalid maxFixRetries/);
93
+ expect(() => runBatchLifecycle(tempDir, { scoreThreshold: 101 })).toThrow(/Invalid scoreThreshold/);
94
+ expect(() => runBatchLifecycle(tempDir, { workerTimeout: 0 })).toThrow(/Invalid workerTimeout/);
95
+ });
96
+
97
+ it('prefers existing agent-generated test-cases.json over generate fallback', () => {
98
+ const iterationDir = path.join(tempDir, '.agents', 'iteration');
99
+ fs.mkdirSync(iterationDir, { recursive: true });
100
+ fs.writeFileSync(
101
+ path.join(iterationDir, 'test-cases.json'),
102
+ JSON.stringify([
103
+ { id: 'tc-a', input: { payload: 'a' }, expected: { criteria: ['a'] } },
104
+ { id: 'tc-b', input: { payload: 'b' }, expected: { criteria: ['b'] } },
105
+ ], null, 2),
106
+ );
107
+
108
+ const generateSpy = jest.spyOn(generateTests, 'generateTestCases').mockReturnValue({
109
+ testCases: [
110
+ { stage: 'fallback', type: 'sample', input: 'x', expected: 'x' },
111
+ ],
112
+ });
113
+
114
+ const dispatchSpy = jest.spyOn(dispatch, 'dispatchParallel').mockImplementation((invocations) => {
115
+ return invocations.map((inv) => ({
116
+ skill: inv.skill,
117
+ status: 'passed',
118
+ batchId: inv.batchId,
119
+ testCaseId: inv.testCaseId,
120
+ timestamp: '2026-04-07T00:00:00.000Z',
121
+ findings: [],
122
+ recommendations: ['continue'],
123
+ metrics: { latencyMs: 5 },
124
+ nextSkill: 'validation',
125
+ }));
126
+ });
127
+
128
+ jest.spyOn(benchmark, 'calculateBenchmark').mockReturnValue({
129
+ workspace: 'test',
130
+ agent: 'test-agent',
131
+ timestamp: '2026-04-07T00:00:00.000Z',
132
+ rawScore: 90,
133
+ weightedScore: 95,
134
+ stages: [],
135
+ fixSuggestions: [],
136
+ improvementPotential: false,
137
+ });
138
+
139
+ const result = runBatchLifecycle(tempDir, {
140
+ batchSize: 2,
141
+ scoreThreshold: 85,
142
+ maxFixRetries: 1,
143
+ workerTimeout: 300,
144
+ });
145
+
146
+ expect(result.totalBatches).toBe(1);
147
+ expect(generateSpy).not.toHaveBeenCalled();
148
+ expect(dispatchSpy).toHaveBeenCalledTimes(1);
149
+ expect(dispatchSpy.mock.calls[0][0]).toEqual([
150
+ { skill: 'worker', batchId: 1, testCaseId: 'tc-a' },
151
+ { skill: 'worker', batchId: 1, testCaseId: 'tc-b' },
152
+ ]);
153
+ });
154
+
155
+ it('throws when existing test-cases.json has invalid top-level structure', () => {
156
+ const iterationDir = path.join(tempDir, '.agents', 'iteration');
157
+ fs.mkdirSync(iterationDir, { recursive: true });
158
+ fs.writeFileSync(path.join(iterationDir, 'test-cases.json'), JSON.stringify({ foo: 'bar' }, null, 2));
159
+
160
+ expect(() => runBatchLifecycle(tempDir, {
161
+ batchSize: 1,
162
+ scoreThreshold: 85,
163
+ maxFixRetries: 1,
164
+ workerTimeout: 300,
165
+ })).toThrow(/test-cases.json must be an array or an object with a testCases array/);
166
+ });
167
+
168
+ it('throws when any test-case is missing id/input/expected fields', () => {
169
+ const iterationDir = path.join(tempDir, '.agents', 'iteration');
170
+ fs.mkdirSync(iterationDir, { recursive: true });
171
+ fs.writeFileSync(
172
+ path.join(iterationDir, 'test-cases.json'),
173
+ JSON.stringify([
174
+ { id: 'tc-001', input: { payload: 'a' }, expected: { criteria: ['a'] } },
175
+ { id: 'tc-002', input: { payload: 'b' } },
176
+ ], null, 2),
177
+ );
178
+
179
+ jest.spyOn(dispatch, 'dispatchParallel').mockImplementation((invocations) => {
180
+ return invocations.map((inv) => ({
181
+ skill: inv.skill,
182
+ status: 'passed',
183
+ batchId: inv.batchId,
184
+ testCaseId: inv.testCaseId,
185
+ timestamp: '2026-04-07T00:00:00.000Z',
186
+ findings: [],
187
+ recommendations: ['continue'],
188
+ metrics: { latencyMs: 5 },
189
+ nextSkill: 'validation',
190
+ }));
191
+ });
192
+
193
+ jest.spyOn(benchmark, 'calculateBenchmark').mockReturnValue({
194
+ workspace: 'test',
195
+ agent: 'test-agent',
196
+ timestamp: '2026-04-07T00:00:00.000Z',
197
+ rawScore: 90,
198
+ weightedScore: 95,
199
+ stages: [],
200
+ fixSuggestions: [],
201
+ improvementPotential: false,
202
+ });
203
+
204
+ expect(() => runBatchLifecycle(tempDir, {
205
+ batchSize: 1,
206
+ scoreThreshold: 85,
207
+ maxFixRetries: 1,
208
+ workerTimeout: 300,
209
+ })).toThrow(/must include id, input, and expected/);
210
+ });
211
+
212
+ it('throws when test-case ids are duplicated', () => {
213
+ const iterationDir = path.join(tempDir, '.agents', 'iteration');
214
+ fs.mkdirSync(iterationDir, { recursive: true });
215
+ fs.writeFileSync(
216
+ path.join(iterationDir, 'test-cases.json'),
217
+ JSON.stringify([
218
+ { id: 'tc-001', input: { payload: 'a' }, expected: { criteria: ['a'] } },
219
+ { id: 'tc-001', input: { payload: 'b' }, expected: { criteria: ['b'] } },
220
+ ], null, 2),
221
+ );
222
+
223
+ jest.spyOn(dispatch, 'dispatchParallel').mockImplementation((invocations) => {
224
+ return invocations.map((inv) => ({
225
+ skill: inv.skill,
226
+ status: 'passed',
227
+ batchId: inv.batchId,
228
+ testCaseId: inv.testCaseId,
229
+ timestamp: '2026-04-07T00:00:00.000Z',
230
+ findings: [],
231
+ recommendations: ['continue'],
232
+ metrics: { latencyMs: 5 },
233
+ nextSkill: 'validation',
234
+ }));
235
+ });
236
+
237
+ jest.spyOn(benchmark, 'calculateBenchmark').mockReturnValue({
238
+ workspace: 'test',
239
+ agent: 'test-agent',
240
+ timestamp: '2026-04-07T00:00:00.000Z',
241
+ rawScore: 90,
242
+ weightedScore: 95,
243
+ stages: [],
244
+ fixSuggestions: [],
245
+ improvementPotential: false,
246
+ });
247
+
248
+ expect(() => runBatchLifecycle(tempDir, {
249
+ batchSize: 1,
250
+ scoreThreshold: 85,
251
+ maxFixRetries: 1,
252
+ workerTimeout: 300,
253
+ })).toThrow(/Duplicate testCaseId/);
254
+ });
255
+
256
+ it('runs full lifecycle and writes summary for passing batches', () => {
257
+ jest.spyOn(generateTests, 'generateTestCases').mockImplementation((workspacePath, outputPath) => {
258
+ const payload = {
259
+ testCases: [
260
+ { stage: '01-input', type: 'sample' as const, input: 'a', expected: 'a' },
261
+ { stage: '02-output', type: 'sample' as const, input: 'b', expected: 'b' },
262
+ { stage: '03-review', type: 'sample' as const, input: 'c', expected: 'c' },
263
+ { stage: '04-wrap', type: 'sample' as const, input: 'd', expected: 'd' },
264
+ ],
265
+ };
266
+
267
+ if (outputPath) {
268
+ fs.writeFileSync(outputPath, JSON.stringify(payload, null, 2));
269
+ }
270
+
271
+ return payload;
272
+ });
273
+
274
+ const dispatchSpy = jest.spyOn(dispatch, 'dispatchParallel').mockImplementation((invocations) => {
275
+ return invocations.map((inv) => ({
276
+ skill: inv.skill,
277
+ status: 'passed',
278
+ batchId: inv.batchId,
279
+ testCaseId: inv.testCaseId,
280
+ timestamp: '2026-04-07T00:00:00.000Z',
281
+ findings: [],
282
+ recommendations: ['continue'],
283
+ metrics: { latencyMs: 10 },
284
+ nextSkill: 'validation',
285
+ }));
286
+ });
287
+
288
+ jest.spyOn(benchmark, 'calculateBenchmark')
289
+ .mockReturnValueOnce({
290
+ workspace: 'test',
291
+ agent: 'test-agent',
292
+ timestamp: '2026-04-07T00:00:00.000Z',
293
+ rawScore: 80,
294
+ weightedScore: 92,
295
+ stages: [],
296
+ fixSuggestions: [],
297
+ improvementPotential: false,
298
+ })
299
+ .mockReturnValueOnce({
300
+ workspace: 'test',
301
+ agent: 'test-agent',
302
+ timestamp: '2026-04-07T00:00:00.000Z',
303
+ rawScore: 78,
304
+ weightedScore: 88,
305
+ stages: [],
306
+ fixSuggestions: [],
307
+ improvementPotential: false,
308
+ });
309
+
310
+ const result = runBatchLifecycle(tempDir, {
311
+ batchSize: 2,
312
+ maxFixRetries: 2,
313
+ scoreThreshold: 85,
314
+ workerTimeout: 300,
315
+ });
316
+
317
+ expect(result.totalBatches).toBe(2);
318
+ expect(result.passedBatches).toBe(2);
319
+ expect(result.failedBatches).toBe(0);
320
+ expect(result.escalatedBatches).toBe(0);
321
+ expect(result.overallScore).toBe(90);
322
+ expect(result.batchReports.map((r) => r.status)).toEqual(['passed', 'passed']);
323
+
324
+ expect(dispatchSpy).toHaveBeenCalledTimes(2);
325
+ expect(dispatchSpy.mock.calls[0][0]).toEqual([
326
+ { skill: 'worker', batchId: 1, testCaseId: 'tc-001' },
327
+ { skill: 'worker', batchId: 1, testCaseId: 'tc-002' },
328
+ ]);
329
+ expect(dispatchSpy.mock.calls[1][0]).toEqual([
330
+ { skill: 'worker', batchId: 2, testCaseId: 'tc-003' },
331
+ { skill: 'worker', batchId: 2, testCaseId: 'tc-004' },
332
+ ]);
333
+
334
+ const summaryPath = path.join(tempDir, '.agents', 'iteration', 'summary.json');
335
+ expect(fs.existsSync(summaryPath)).toBe(true);
336
+
337
+ const testCasesPath = path.join(tempDir, '.agents', 'iteration', 'test-cases.json');
338
+ expect(fs.existsSync(testCasesPath)).toBe(true);
339
+ const savedTestCases = JSON.parse(fs.readFileSync(testCasesPath, 'utf-8'));
340
+ expect(Array.isArray(savedTestCases.testCases)).toBe(true);
341
+ expect(savedTestCases.testCases).toHaveLength(4);
342
+
343
+ const savedSummary = JSON.parse(fs.readFileSync(summaryPath, 'utf-8'));
344
+ expect(savedSummary.totalBatches).toBe(2);
345
+ expect(savedSummary.overallScore).toBe(90);
346
+ });
347
+
348
+ it('forwards sub-agent runner options to dispatch calls', () => {
349
+ jest.spyOn(generateTests, 'generateTestCases').mockReturnValue({
350
+ testCases: [
351
+ { stage: '01-input', type: 'sample', input: 'a', expected: 'a' },
352
+ ],
353
+ });
354
+
355
+ const dispatchSpy = jest.spyOn(dispatch, 'dispatchParallel').mockImplementation((invocations) => {
356
+ return invocations.map((inv) => ({
357
+ skill: inv.skill,
358
+ status: 'passed',
359
+ batchId: inv.batchId,
360
+ testCaseId: inv.testCaseId,
361
+ timestamp: '2026-04-07T00:00:00.000Z',
362
+ findings: [],
363
+ recommendations: ['continue'],
364
+ metrics: { latencyMs: 10 },
365
+ nextSkill: 'validation',
366
+ }));
367
+ });
368
+
369
+ jest.spyOn(benchmark, 'calculateBenchmark').mockReturnValue({
370
+ workspace: 'test',
371
+ agent: 'test-agent',
372
+ timestamp: '2026-04-07T00:00:00.000Z',
373
+ rawScore: 90,
374
+ weightedScore: 95,
375
+ stages: [],
376
+ fixSuggestions: [],
377
+ improvementPotential: false,
378
+ });
379
+
380
+ runBatchLifecycle(tempDir, {
381
+ batchSize: 1,
382
+ scoreThreshold: 85,
383
+ maxFixRetries: 1,
384
+ workerTimeout: 300,
385
+ subagentRunner: `${process.execPath} fake-runner.js {skill} {batchId} {testCaseId}`,
386
+ });
387
+
388
+ expect(dispatchSpy).toHaveBeenCalled();
389
+ expect(dispatchSpy.mock.calls[0][2]).toEqual(expect.objectContaining({
390
+ workspacePath: tempDir,
391
+ runnerCommand: `${process.execPath} fake-runner.js {skill} {batchId} {testCaseId}`,
392
+ runnerTimeoutSeconds: 300,
393
+ }));
394
+ });
395
+
396
+ it('uses worker timeout to treat long worker dispatch as failed and trigger fixer retry', () => {
397
+ jest.spyOn(generateTests, 'generateTestCases').mockReturnValue({
398
+ testCases: [
399
+ { stage: '01-input', type: 'sample', input: 'slow', expected: 'slow' },
400
+ ],
401
+ });
402
+
403
+ const dispatchSpy = jest.spyOn(dispatch, 'dispatchParallel').mockImplementation((invocations) => {
404
+ return invocations.map((inv) => ({
405
+ skill: inv.skill,
406
+ status: 'passed',
407
+ batchId: inv.batchId,
408
+ testCaseId: inv.testCaseId,
409
+ timestamp: '2026-04-07T00:00:00.000Z',
410
+ findings: [],
411
+ recommendations: ['continue'],
412
+ metrics: { latencyMs: 10 },
413
+ nextSkill: 'validation',
414
+ }));
415
+ });
416
+
417
+ const nowValues = [0, 2001, 3000, 3001];
418
+ jest.spyOn(Date, 'now').mockImplementation(() => nowValues.shift() ?? 3001);
419
+
420
+ jest.spyOn(benchmark, 'calculateBenchmark')
421
+ .mockReturnValueOnce({
422
+ workspace: 'test',
423
+ agent: 'test-agent',
424
+ timestamp: '2026-04-07T00:00:00.000Z',
425
+ rawScore: 90,
426
+ weightedScore: 98,
427
+ stages: [],
428
+ fixSuggestions: [],
429
+ improvementPotential: false,
430
+ })
431
+ .mockReturnValueOnce({
432
+ workspace: 'test',
433
+ agent: 'test-agent',
434
+ timestamp: '2026-04-07T00:00:00.000Z',
435
+ rawScore: 91,
436
+ weightedScore: 98,
437
+ stages: [],
438
+ fixSuggestions: [],
439
+ improvementPotential: false,
440
+ })
441
+ .mockReturnValueOnce({
442
+ workspace: 'test',
443
+ agent: 'test-agent',
444
+ timestamp: '2026-04-07T00:00:00.000Z',
445
+ rawScore: 91,
446
+ weightedScore: 98,
447
+ stages: [],
448
+ fixSuggestions: [],
449
+ improvementPotential: false,
450
+ });
451
+
452
+ const result = runBatchLifecycle(tempDir, {
453
+ batchSize: 1,
454
+ maxFixRetries: 2,
455
+ scoreThreshold: 95,
456
+ workerTimeout: 1,
457
+ });
458
+
459
+ expect(result.totalBatches).toBe(1);
460
+ expect(result.passedBatches).toBe(1);
461
+ expect(result.batchReports[0].status).toBe('passed');
462
+ expect(result.batchReports[0].findings.join(' ')).toMatch(/timeout|Timeout/);
463
+
464
+ expect(dispatchSpy).toHaveBeenCalledTimes(2);
465
+ expect(dispatchSpy.mock.calls[0][0][0].skill).toBe('worker');
466
+ expect(dispatchSpy.mock.calls[1][0][0].skill).toBe('fixer');
467
+ });
468
+
469
+ it('marks batch as failed when fixes clear worker failures but score remains below threshold', () => {
470
+ jest.spyOn(generateTests, 'generateTestCases').mockReturnValue({
471
+ testCases: [
472
+ { stage: '01-input', type: 'sample', input: 'needs-work', expected: 'better' },
473
+ ],
474
+ });
475
+
476
+ const dispatchSpy = jest.spyOn(dispatch, 'dispatchParallel').mockImplementation((invocations) => {
477
+ if (invocations[0]?.skill === 'worker') {
478
+ return [{
479
+ skill: 'worker',
480
+ status: 'failed',
481
+ batchId: 1,
482
+ testCaseId: 'tc-001',
483
+ timestamp: '2026-04-07T00:00:00.000Z',
484
+ findings: ['output missing'],
485
+ recommendations: ['run fixer'],
486
+ metrics: { latencyMs: 20 },
487
+ nextSkill: 'fixer',
488
+ }];
489
+ }
490
+
491
+ return invocations.map((inv) => ({
492
+ skill: inv.skill,
493
+ status: 'passed',
494
+ batchId: inv.batchId,
495
+ testCaseId: inv.testCaseId,
496
+ timestamp: '2026-04-07T00:00:00.000Z',
497
+ findings: ['fixed now'],
498
+ recommendations: ['re-run benchmark'],
499
+ metrics: { latencyMs: 15 },
500
+ nextSkill: 'validation',
501
+ }));
502
+ });
503
+
504
+ const benchmarkSpy = jest.spyOn(benchmark, 'calculateBenchmark')
505
+ .mockReturnValueOnce({
506
+ workspace: 'test',
507
+ agent: 'test-agent',
508
+ timestamp: '2026-04-07T00:00:00.000Z',
509
+ rawScore: 60,
510
+ weightedScore: 80,
511
+ stages: [],
512
+ fixSuggestions: ['improve output'],
513
+ improvementPotential: true,
514
+ })
515
+ .mockReturnValueOnce({
516
+ workspace: 'test',
517
+ agent: 'test-agent',
518
+ timestamp: '2026-04-07T00:00:00.000Z',
519
+ rawScore: 62,
520
+ weightedScore: 80,
521
+ stages: [],
522
+ fixSuggestions: ['keep improving'],
523
+ improvementPotential: true,
524
+ })
525
+ .mockReturnValueOnce({
526
+ workspace: 'test',
527
+ agent: 'test-agent',
528
+ timestamp: '2026-04-07T00:00:00.000Z',
529
+ rawScore: 62,
530
+ weightedScore: 80,
531
+ stages: [],
532
+ fixSuggestions: ['keep improving'],
533
+ improvementPotential: true,
534
+ });
535
+
536
+ const result = runBatchLifecycle(tempDir, {
537
+ batchSize: 1,
538
+ maxFixRetries: 3,
539
+ scoreThreshold: 95,
540
+ workerTimeout: 300,
541
+ });
542
+
543
+ expect(result.totalBatches).toBe(1);
544
+ expect(result.passedBatches).toBe(0);
545
+ expect(result.failedBatches).toBe(1);
546
+ expect(result.escalatedBatches).toBe(0);
547
+ expect(result.batchReports[0].status).toBe('failed');
548
+ expect(result.batchReports[0].findings.join(' ')).toMatch(/below threshold/);
549
+
550
+ expect(dispatchSpy).toHaveBeenCalledTimes(2);
551
+ expect(dispatchSpy.mock.calls[0][0][0].skill).toBe('worker');
552
+ expect(dispatchSpy.mock.calls[1][0][0].skill).toBe('fixer');
553
+ expect(benchmarkSpy).toHaveBeenCalledTimes(3);
554
+ });
555
+
556
+ it('recovers from below-threshold batch after a successful fix attempt', () => {
557
+ jest.spyOn(generateTests, 'generateTestCases').mockReturnValue({
558
+ testCases: [
559
+ { stage: '01-input', type: 'sample', input: 'recover', expected: 'stable' },
560
+ ],
561
+ });
562
+
563
+ const dispatchSpy = jest.spyOn(dispatch, 'dispatchParallel').mockImplementation((invocations) => {
564
+ if (invocations[0]?.skill === 'worker') {
565
+ return [{
566
+ skill: 'worker',
567
+ status: 'failed',
568
+ batchId: 1,
569
+ testCaseId: 'tc-001',
570
+ timestamp: '2026-04-07T00:00:00.000Z',
571
+ findings: ['output missing'],
572
+ recommendations: ['run fixer'],
573
+ metrics: { latencyMs: 20 },
574
+ nextSkill: 'fixer',
575
+ }];
576
+ }
577
+
578
+ return invocations.map((inv) => ({
579
+ skill: inv.skill,
580
+ status: 'passed',
581
+ batchId: inv.batchId,
582
+ testCaseId: inv.testCaseId,
583
+ timestamp: '2026-04-07T00:00:00.000Z',
584
+ findings: ['fixed now'],
585
+ recommendations: ['continue'],
586
+ metrics: { latencyMs: 15 },
587
+ nextSkill: 'validation',
588
+ }));
589
+ });
590
+
591
+ jest.spyOn(benchmark, 'calculateBenchmark')
592
+ .mockReturnValueOnce({
593
+ workspace: 'test',
594
+ agent: 'test-agent',
595
+ timestamp: '2026-04-07T00:00:00.000Z',
596
+ rawScore: 60,
597
+ weightedScore: 80,
598
+ stages: [],
599
+ fixSuggestions: ['improve output'],
600
+ improvementPotential: true,
601
+ })
602
+ .mockReturnValueOnce({
603
+ workspace: 'test',
604
+ agent: 'test-agent',
605
+ timestamp: '2026-04-07T00:00:00.000Z',
606
+ rawScore: 78,
607
+ weightedScore: 97,
608
+ stages: [],
609
+ fixSuggestions: [],
610
+ improvementPotential: false,
611
+ })
612
+ .mockReturnValueOnce({
613
+ workspace: 'test',
614
+ agent: 'test-agent',
615
+ timestamp: '2026-04-07T00:00:00.000Z',
616
+ rawScore: 78,
617
+ weightedScore: 97,
618
+ stages: [],
619
+ fixSuggestions: [],
620
+ improvementPotential: false,
621
+ });
622
+
623
+ const result = runBatchLifecycle(tempDir, {
624
+ batchSize: 1,
625
+ maxFixRetries: 3,
626
+ scoreThreshold: 95,
627
+ workerTimeout: 300,
628
+ });
629
+
630
+ expect(result.totalBatches).toBe(1);
631
+ expect(result.passedBatches).toBe(1);
632
+ expect(result.failedBatches).toBe(0);
633
+ expect(result.escalatedBatches).toBe(0);
634
+ expect(result.batchReports[0].status).toBe('passed');
635
+ expect(result.batchReports[0].findings).toContain('Fix attempt 1: 1 fixes applied');
636
+
637
+ expect(dispatchSpy).toHaveBeenCalledTimes(2);
638
+ expect(dispatchSpy.mock.calls[0][0][0].skill).toBe('worker');
639
+ expect(dispatchSpy.mock.calls[1][0][0].skill).toBe('fixer');
640
+ });
641
+
642
+ it('runs fix loop and escalates when retries are exhausted below score threshold', () => {
643
+ jest.spyOn(generateTests, 'generateTestCases').mockReturnValue({
644
+ testCases: [
645
+ { stage: '01-input', type: 'sample', input: 'needs-fix', expected: 'fixed' },
646
+ ],
647
+ });
648
+
649
+ const dispatchSpy = jest.spyOn(dispatch, 'dispatchParallel').mockImplementation((invocations) => {
650
+ if (invocations[0]?.skill === 'worker') {
651
+ return [{
652
+ skill: 'worker',
653
+ status: 'failed',
654
+ batchId: 1,
655
+ testCaseId: 'tc-001',
656
+ timestamp: '2026-04-07T00:00:00.000Z',
657
+ findings: ['output missing'],
658
+ recommendations: ['run fixer'],
659
+ metrics: { latencyMs: 20 },
660
+ nextSkill: 'fixer',
661
+ }];
662
+ }
663
+
664
+ return invocations.map((inv) => ({
665
+ skill: inv.skill,
666
+ status: 'failed',
667
+ batchId: inv.batchId,
668
+ testCaseId: inv.testCaseId,
669
+ timestamp: '2026-04-07T00:00:00.000Z',
670
+ findings: ['still failing'],
671
+ recommendations: ['retry'],
672
+ metrics: { latencyMs: 15 },
673
+ nextSkill: 'validation',
674
+ }));
675
+ });
676
+
677
+ const benchmarkSpy = jest.spyOn(benchmark, 'calculateBenchmark').mockReturnValue({
678
+ workspace: 'test',
679
+ agent: 'test-agent',
680
+ timestamp: '2026-04-07T00:00:00.000Z',
681
+ rawScore: 60,
682
+ weightedScore: 90,
683
+ stages: [],
684
+ fixSuggestions: ['improve output'],
685
+ improvementPotential: true,
686
+ });
687
+
688
+ const result = runBatchLifecycle(tempDir, {
689
+ batchSize: 1,
690
+ maxFixRetries: 2,
691
+ scoreThreshold: 95,
692
+ workerTimeout: 300,
693
+ });
694
+
695
+ expect(result.totalBatches).toBe(1);
696
+ expect(result.passedBatches).toBe(0);
697
+ expect(result.failedBatches).toBe(0);
698
+ expect(result.escalatedBatches).toBe(1);
699
+ expect(result.batchReports[0].status).toBe('escalated');
700
+ expect(result.batchReports[0].findings).toContain('Max retries exhausted');
701
+
702
+ expect(dispatchSpy).toHaveBeenCalledTimes(3);
703
+ expect(dispatchSpy.mock.calls[0][0][0].skill).toBe('worker');
704
+ expect(dispatchSpy.mock.calls[1][0][0].skill).toBe('fixer');
705
+ expect(dispatchSpy.mock.calls[2][0][0].skill).toBe('fixer');
706
+
707
+ expect(benchmarkSpy).toHaveBeenCalledTimes(4);
708
+ });
709
+ });
710
+ });