workspace-maxxing 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/workspace-maxxing/.workspace-templates/CONTEXT.md +44 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/SYSTEM.md +44 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/references/anti-patterns.md +16 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/references/iron-laws.md +26 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/references/reporting-format.md +52 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/benchmark.ts +171 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/dispatch.ts +473 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/generate-tests.ts +158 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/install-tool.ts +82 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/iterate.ts +265 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/orchestrator.ts +539 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/scaffold.ts +282 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/validate.ts +452 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/architecture/SKILL.md +95 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/fixer/SKILL.md +109 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/iteration/SKILL.md +89 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/research/SKILL.md +94 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/testing/SKILL.md +89 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/tooling/SKILL.md +87 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/validation/SKILL.md +103 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/worker/SKILL.md +79 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/README.md +14 -0
- package/.agents/skills/workspace-maxxing/SKILL.md +312 -0
- package/.agents/skills/workspace-maxxing/scripts/benchmark.ts +171 -0
- package/.agents/skills/workspace-maxxing/scripts/dispatch.ts +473 -0
- package/.agents/skills/workspace-maxxing/scripts/generate-tests.ts +158 -0
- package/.agents/skills/workspace-maxxing/scripts/install-tool.ts +82 -0
- package/.agents/skills/workspace-maxxing/scripts/iterate.ts +265 -0
- package/.agents/skills/workspace-maxxing/scripts/orchestrator.ts +539 -0
- package/.agents/skills/workspace-maxxing/scripts/scaffold.ts +282 -0
- package/.agents/skills/workspace-maxxing/scripts/validate.ts +452 -0
- package/README.md +144 -0
- package/dist/agent-creator.d.ts +9 -0
- package/dist/agent-creator.d.ts.map +1 -0
- package/dist/agent-creator.js +199 -0
- package/dist/agent-creator.js.map +1 -0
- package/dist/agent-iterator.d.ts +38 -0
- package/dist/agent-iterator.d.ts.map +1 -0
- package/dist/agent-iterator.js +327 -0
- package/dist/agent-iterator.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +197 -0
- package/dist/index.js.map +1 -0
- package/dist/install.d.ts +18 -0
- package/dist/install.d.ts.map +1 -0
- package/dist/install.js +117 -0
- package/dist/install.js.map +1 -0
- package/dist/platforms/claude.d.ts +7 -0
- package/dist/platforms/claude.d.ts.map +1 -0
- package/dist/platforms/claude.js +70 -0
- package/dist/platforms/claude.js.map +1 -0
- package/dist/platforms/copilot.d.ts +7 -0
- package/dist/platforms/copilot.d.ts.map +1 -0
- package/dist/platforms/copilot.js +75 -0
- package/dist/platforms/copilot.js.map +1 -0
- package/dist/platforms/gemini.d.ts +7 -0
- package/dist/platforms/gemini.d.ts.map +1 -0
- package/dist/platforms/gemini.js +81 -0
- package/dist/platforms/gemini.js.map +1 -0
- package/dist/platforms/index.d.ts +8 -0
- package/dist/platforms/index.d.ts.map +1 -0
- package/dist/platforms/index.js +41 -0
- package/dist/platforms/index.js.map +1 -0
- package/dist/platforms/opencode.d.ts +7 -0
- package/dist/platforms/opencode.d.ts.map +1 -0
- package/dist/platforms/opencode.js +70 -0
- package/dist/platforms/opencode.js.map +1 -0
- package/dist/scripts/benchmark.d.ts +20 -0
- package/dist/scripts/benchmark.d.ts.map +1 -0
- package/dist/scripts/benchmark.js +170 -0
- package/dist/scripts/benchmark.js.map +1 -0
- package/dist/scripts/dispatch.d.ts +32 -0
- package/dist/scripts/dispatch.d.ts.map +1 -0
- package/dist/scripts/dispatch.js +386 -0
- package/dist/scripts/dispatch.js.map +1 -0
- package/dist/scripts/generate-tests.d.ts +11 -0
- package/dist/scripts/generate-tests.d.ts.map +1 -0
- package/dist/scripts/generate-tests.js +118 -0
- package/dist/scripts/generate-tests.js.map +1 -0
- package/dist/scripts/install-tool.d.ts +8 -0
- package/dist/scripts/install-tool.d.ts.map +1 -0
- package/dist/scripts/install-tool.js +98 -0
- package/dist/scripts/install-tool.js.map +1 -0
- package/dist/scripts/iterate.d.ts +44 -0
- package/dist/scripts/iterate.d.ts.map +1 -0
- package/dist/scripts/iterate.js +260 -0
- package/dist/scripts/iterate.js.map +1 -0
- package/dist/scripts/orchestrator.d.ts +40 -0
- package/dist/scripts/orchestrator.d.ts.map +1 -0
- package/dist/scripts/orchestrator.js +378 -0
- package/dist/scripts/orchestrator.js.map +1 -0
- package/dist/scripts/scaffold.d.ts +8 -0
- package/dist/scripts/scaffold.d.ts.map +1 -0
- package/dist/scripts/scaffold.js +279 -0
- package/dist/scripts/scaffold.js.map +1 -0
- package/dist/scripts/validate.d.ts +11 -0
- package/dist/scripts/validate.d.ts.map +1 -0
- package/dist/scripts/validate.js +472 -0
- package/dist/scripts/validate.js.map +1 -0
- package/docs/superpowers/plans/2026-04-07-autonomous-iteration-plan.md +1123 -0
- package/docs/superpowers/plans/2026-04-07-autonomous-iteration-sub-agent-batches.md +1923 -0
- package/docs/superpowers/plans/2026-04-07-autonomous-workflow-sub-skill-plan.md +1505 -0
- package/docs/superpowers/plans/2026-04-07-benchmarking-multi-agent-plan.md +854 -0
- package/docs/superpowers/plans/2026-04-07-workspace-builder-logic-plan.md +1426 -0
- package/docs/superpowers/plans/2026-04-07-workspace-maxxing-plan.md +1299 -0
- package/docs/superpowers/plans/2026-04-08-session-294c-subagent-invocation-plan.md +320 -0
- package/docs/superpowers/plans/2026-04-08-workflow-prompt-hardening-plan.md +1025 -0
- package/docs/superpowers/plans/2026-04-12-workspace-agent-creation-plan.md +992 -0
- package/docs/superpowers/specs/2026-04-07-autonomous-iteration-design.md +214 -0
- package/docs/superpowers/specs/2026-04-07-autonomous-iteration-sub-agent-batches-design.md +188 -0
- package/docs/superpowers/specs/2026-04-07-autonomous-workflow-sub-skill-design.md +137 -0
- package/docs/superpowers/specs/2026-04-07-benchmarking-multi-agent-design.md +105 -0
- package/docs/superpowers/specs/2026-04-07-workspace-builder-logic-design.md +179 -0
- package/docs/superpowers/specs/2026-04-07-workspace-maxxing-design.md +227 -0
- package/docs/superpowers/specs/2026-04-08-session-294c-subagent-invocation-design.md +265 -0
- package/docs/superpowers/specs/2026-04-08-workflow-prompt-hardening-design.md +146 -0
- package/docs/superpowers/specs/2026-04-12-workspace-agent-creation-design.md +239 -0
- package/jest.config.js +8 -0
- package/package.json +32 -0
- package/src/agent-creator.ts +180 -0
- package/src/agent-iterator.ts +397 -0
- package/src/index.ts +189 -0
- package/src/install.ts +105 -0
- package/src/platforms/claude.ts +40 -0
- package/src/platforms/copilot.ts +50 -0
- package/src/platforms/gemini.ts +55 -0
- package/src/platforms/index.ts +45 -0
- package/src/platforms/opencode.ts +41 -0
- package/src/scripts/benchmark.ts +171 -0
- package/src/scripts/dispatch.ts +473 -0
- package/src/scripts/generate-tests.ts +112 -0
- package/src/scripts/install-tool.ts +82 -0
- package/src/scripts/iterate.ts +271 -0
- package/src/scripts/orchestrator.ts +539 -0
- package/src/scripts/scaffold.ts +282 -0
- package/src/scripts/validate.ts +516 -0
- package/templates/.workspace-templates/CONTEXT.md +44 -0
- package/templates/.workspace-templates/SYSTEM.md +44 -0
- package/templates/.workspace-templates/references/anti-patterns.md +16 -0
- package/templates/.workspace-templates/references/iron-laws.md +26 -0
- package/templates/.workspace-templates/references/reporting-format.md +52 -0
- package/templates/.workspace-templates/scripts/benchmark.ts +171 -0
- package/templates/.workspace-templates/scripts/dispatch.ts +473 -0
- package/templates/.workspace-templates/scripts/generate-tests.ts +158 -0
- package/templates/.workspace-templates/scripts/install-tool.ts +82 -0
- package/templates/.workspace-templates/scripts/iterate.ts +265 -0
- package/templates/.workspace-templates/scripts/orchestrator.ts +539 -0
- package/templates/.workspace-templates/scripts/scaffold.ts +282 -0
- package/templates/.workspace-templates/scripts/validate.ts +452 -0
- package/templates/.workspace-templates/skills/architecture/SKILL.md +95 -0
- package/templates/.workspace-templates/skills/fixer/SKILL.md +109 -0
- package/templates/.workspace-templates/skills/iteration/SKILL.md +89 -0
- package/templates/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
- package/templates/.workspace-templates/skills/research/SKILL.md +94 -0
- package/templates/.workspace-templates/skills/testing/SKILL.md +89 -0
- package/templates/.workspace-templates/skills/tooling/SKILL.md +87 -0
- package/templates/.workspace-templates/skills/validation/SKILL.md +103 -0
- package/templates/.workspace-templates/skills/worker/SKILL.md +79 -0
- package/templates/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
- package/templates/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
- package/templates/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
- package/templates/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
- package/templates/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
- package/templates/.workspace-templates/workspace/README.md +14 -0
- package/templates/SKILL.md +347 -0
- package/tests/benchmark.test.ts +158 -0
- package/tests/cli.test.ts +109 -0
- package/tests/dispatch-parallel.test.ts +124 -0
- package/tests/dispatch.test.ts +218 -0
- package/tests/fixer-skill.test.ts +203 -0
- package/tests/generate-tests.test.ts +101 -0
- package/tests/install-tool.test.ts +141 -0
- package/tests/install.test.ts +144 -0
- package/tests/integration.test.ts +324 -0
- package/tests/iterate.test.ts +219 -0
- package/tests/orchestrator.test.ts +710 -0
- package/tests/scaffold.test.ts +238 -0
- package/tests/templates-enhanced.test.ts +208 -0
- package/tests/templates.test.ts +219 -0
- package/tests/validate.test.ts +421 -0
- package/tests/validation-enhanced.test.ts +303 -0
- package/tests/worker-skill.test.ts +88 -0
- package/tsconfig.json +19 -0
- package/workspace/00-meta/CONTEXT.md +3 -0
- package/workspace/00-meta/execution-log.md +17 -0
- package/workspace/00-meta/tools.md +11 -0
- package/workspace/01-input/CONTEXT.md +27 -0
- package/workspace/CONTEXT.md +35 -0
- package/workspace/README.md +14 -0
- package/workspace/SYSTEM.md +36 -0
- package/workspace-maxxing-0.1.0.tgz +0 -0
|
@@ -0,0 +1,539 @@
|
|
|
1
|
+
import * as fs from 'fs';
|
|
2
|
+
import * as path from 'path';
|
|
3
|
+
|
|
4
|
+
export interface OrchestratorConfig {
|
|
5
|
+
batchSize?: number;
|
|
6
|
+
maxFixRetries?: number;
|
|
7
|
+
scoreThreshold?: number;
|
|
8
|
+
workerTimeout?: number;
|
|
9
|
+
subagentRunner?: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface BatchReport {
|
|
13
|
+
batchId: number;
|
|
14
|
+
testCases: string[];
|
|
15
|
+
score: number;
|
|
16
|
+
status: 'passed' | 'failed' | 'partial' | 'escalated';
|
|
17
|
+
findings: string[];
|
|
18
|
+
timestamp: string;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface OrchestratorSummary {
|
|
22
|
+
totalBatches: number;
|
|
23
|
+
passedBatches: number;
|
|
24
|
+
failedBatches: number;
|
|
25
|
+
escalatedBatches: number;
|
|
26
|
+
overallScore: number;
|
|
27
|
+
batchReports: BatchReport[];
|
|
28
|
+
timestamp: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export const DEFAULT_CONFIG: Required<OrchestratorConfig> = {
|
|
32
|
+
batchSize: 3,
|
|
33
|
+
maxFixRetries: 3,
|
|
34
|
+
scoreThreshold: 85,
|
|
35
|
+
workerTimeout: 300,
|
|
36
|
+
subagentRunner: '',
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
const CONFIG_LIMITS = {
|
|
40
|
+
batchSize: { min: 1, max: 1000 },
|
|
41
|
+
maxFixRetries: { min: 0, max: 20 },
|
|
42
|
+
scoreThreshold: { min: 0, max: 100 },
|
|
43
|
+
workerTimeout: { min: 1, max: 3600 },
|
|
44
|
+
} as const;
|
|
45
|
+
|
|
46
|
+
type ConfigKey = keyof typeof CONFIG_LIMITS;
|
|
47
|
+
|
|
48
|
+
interface WorkerInvocation {
|
|
49
|
+
skill: string;
|
|
50
|
+
batchId: number;
|
|
51
|
+
testCaseId: string;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
interface WorkerResult {
|
|
55
|
+
skill: string;
|
|
56
|
+
status: 'passed' | 'failed' | 'escalated';
|
|
57
|
+
batchId: number;
|
|
58
|
+
testCaseId: string;
|
|
59
|
+
timestamp: string;
|
|
60
|
+
findings: string[];
|
|
61
|
+
recommendations: string[];
|
|
62
|
+
metrics: Record<string, number>;
|
|
63
|
+
nextSkill: string;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
interface BenchmarkSummary {
|
|
67
|
+
weightedScore: number;
|
|
68
|
+
fixSuggestions: string[];
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
interface GeneratedTestCasesResult {
|
|
72
|
+
testCases: unknown[];
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
interface DispatchParallelOptions {
|
|
76
|
+
workspacePath?: string;
|
|
77
|
+
runnerCommand?: string;
|
|
78
|
+
runnerTimeoutSeconds?: number;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
type DispatchParallelFn = (
|
|
82
|
+
invocations: WorkerInvocation[],
|
|
83
|
+
skillsDir: string,
|
|
84
|
+
options?: DispatchParallelOptions,
|
|
85
|
+
) => WorkerResult[];
|
|
86
|
+
type CalculateBenchmarkFn = (workspacePath: string) => BenchmarkSummary;
|
|
87
|
+
|
|
88
|
+
interface TimedDispatchOutcome {
|
|
89
|
+
results: WorkerResult[];
|
|
90
|
+
timedOut: boolean;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function extractTestCaseArray(raw: unknown): Array<Record<string, unknown>> {
|
|
94
|
+
if (Array.isArray(raw)) {
|
|
95
|
+
return raw as Array<Record<string, unknown>>;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (raw && typeof raw === 'object' && Array.isArray((raw as { testCases?: unknown[] }).testCases)) {
|
|
99
|
+
return (raw as { testCases: Array<Record<string, unknown>> }).testCases;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
throw new Error('test-cases.json must be an array or an object with a testCases array');
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function normalizeTestCaseIds(
|
|
106
|
+
testCases: Array<Record<string, unknown>>,
|
|
107
|
+
requireExplicitIds: boolean,
|
|
108
|
+
): string[] {
|
|
109
|
+
const seen = new Set<string>();
|
|
110
|
+
const ids: string[] = [];
|
|
111
|
+
|
|
112
|
+
for (let i = 0; i < testCases.length; i++) {
|
|
113
|
+
const tc = testCases[i];
|
|
114
|
+
if (!tc || typeof tc !== 'object' || tc.input === undefined || tc.expected === undefined) {
|
|
115
|
+
throw new Error(`test-cases.json item at index ${i} must include id, input, and expected`);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const explicitId = typeof tc.id === 'string' ? tc.id.trim() : '';
|
|
119
|
+
if (requireExplicitIds && !explicitId) {
|
|
120
|
+
throw new Error(`test-cases.json item at index ${i} must include id, input, and expected`);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const resolvedId = explicitId || `tc-${String(i + 1).padStart(3, '0')}`;
|
|
124
|
+
if (seen.has(resolvedId)) {
|
|
125
|
+
throw new Error(`Duplicate testCaseId: ${resolvedId}`);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
seen.add(resolvedId);
|
|
129
|
+
ids.push(resolvedId);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return ids;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function validateIntegerConfig(name: ConfigKey, value: number): number {
|
|
136
|
+
const limits = CONFIG_LIMITS[name];
|
|
137
|
+
if (!Number.isFinite(value) || !Number.isInteger(value) || value < limits.min || value > limits.max) {
|
|
138
|
+
throw new RangeError(
|
|
139
|
+
`Invalid ${name}: expected a finite integer between ${limits.min} and ${limits.max}, got ${String(value)}`,
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
return value;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function resolveConfig(config: OrchestratorConfig): Required<OrchestratorConfig> {
|
|
146
|
+
return {
|
|
147
|
+
batchSize: validateIntegerConfig('batchSize', config.batchSize ?? DEFAULT_CONFIG.batchSize),
|
|
148
|
+
maxFixRetries: validateIntegerConfig('maxFixRetries', config.maxFixRetries ?? DEFAULT_CONFIG.maxFixRetries),
|
|
149
|
+
scoreThreshold: validateIntegerConfig('scoreThreshold', config.scoreThreshold ?? DEFAULT_CONFIG.scoreThreshold),
|
|
150
|
+
workerTimeout: validateIntegerConfig('workerTimeout', config.workerTimeout ?? DEFAULT_CONFIG.workerTimeout),
|
|
151
|
+
subagentRunner: (config.subagentRunner ?? '').trim(),
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function dispatchWithTimeout(
|
|
156
|
+
dispatchParallel: DispatchParallelFn,
|
|
157
|
+
invocations: WorkerInvocation[],
|
|
158
|
+
skillsDir: string,
|
|
159
|
+
workerTimeoutSeconds: number,
|
|
160
|
+
dispatchOptions: DispatchParallelOptions,
|
|
161
|
+
): TimedDispatchOutcome {
|
|
162
|
+
const startedAtMs = Date.now();
|
|
163
|
+
const results = dispatchParallel(invocations, skillsDir, dispatchOptions);
|
|
164
|
+
const elapsedMs = Date.now() - startedAtMs;
|
|
165
|
+
const timeoutMs = workerTimeoutSeconds * 1000;
|
|
166
|
+
|
|
167
|
+
if (elapsedMs <= timeoutMs) {
|
|
168
|
+
return { results, timedOut: false };
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
const timeoutFinding = `Worker timeout exceeded (${workerTimeoutSeconds}s) after ${elapsedMs}ms`;
|
|
172
|
+
const timedOutResults: WorkerResult[] = invocations.map((invocation, index) => {
|
|
173
|
+
const existing = results[index];
|
|
174
|
+
const findings = Array.isArray(existing?.findings) ? existing.findings : [];
|
|
175
|
+
const recommendations = Array.isArray(existing?.recommendations) ? existing.recommendations : [];
|
|
176
|
+
|
|
177
|
+
return {
|
|
178
|
+
skill: existing?.skill ?? invocation.skill,
|
|
179
|
+
status: 'failed',
|
|
180
|
+
batchId: invocation.batchId,
|
|
181
|
+
testCaseId: invocation.testCaseId,
|
|
182
|
+
timestamp: existing?.timestamp ?? new Date().toISOString(),
|
|
183
|
+
findings: [...findings, timeoutFinding],
|
|
184
|
+
recommendations: recommendations.length > 0
|
|
185
|
+
? recommendations
|
|
186
|
+
: ['Increase worker timeout or reduce batch complexity'],
|
|
187
|
+
metrics: {
|
|
188
|
+
...(existing?.metrics ?? {}),
|
|
189
|
+
elapsedMs,
|
|
190
|
+
timeoutSeconds: workerTimeoutSeconds,
|
|
191
|
+
},
|
|
192
|
+
nextSkill: existing?.nextSkill ?? 'fixer',
|
|
193
|
+
};
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
return { results: timedOutResults, timedOut: true };
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function mergeWorkerResults(currentResults: WorkerResult[], updates: WorkerResult[]): WorkerResult[] {
|
|
200
|
+
const byTestCaseId = new Map<string, WorkerResult>();
|
|
201
|
+
|
|
202
|
+
currentResults.forEach((result) => byTestCaseId.set(result.testCaseId, result));
|
|
203
|
+
updates.forEach((result) => byTestCaseId.set(result.testCaseId, result));
|
|
204
|
+
|
|
205
|
+
return Array.from(byTestCaseId.values());
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
function parseIntegerArg(flag: string, rawValue: string | undefined): number | undefined {
|
|
209
|
+
if (rawValue === undefined) {
|
|
210
|
+
return undefined;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
const value = Number(rawValue);
|
|
214
|
+
if (!Number.isInteger(value)) {
|
|
215
|
+
throw new Error(`Invalid value for ${flag}: expected an integer, got "${rawValue}"`);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
return value;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
export function splitIntoBatches(items: string[], batchSize: number = DEFAULT_CONFIG.batchSize): string[][] {
|
|
222
|
+
if (items.length === 0) return [];
|
|
223
|
+
const safeBatchSize = validateIntegerConfig('batchSize', batchSize);
|
|
224
|
+
|
|
225
|
+
const batches: string[][] = [];
|
|
226
|
+
for (let i = 0; i < items.length; i += safeBatchSize) {
|
|
227
|
+
batches.push(items.slice(i, i + safeBatchSize));
|
|
228
|
+
}
|
|
229
|
+
return batches;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
export function createBatchDirectory(baseDir: string, batchId: number): string {
|
|
233
|
+
const batchDir = path.join(baseDir, `batch-${String(batchId).padStart(2, '0')}`);
|
|
234
|
+
fs.mkdirSync(batchDir, { recursive: true });
|
|
235
|
+
return batchDir;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
export function getBatchDirectory(baseDir: string, batchId: number): string {
|
|
239
|
+
return path.join(baseDir, `batch-${String(batchId).padStart(2, '0')}`);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
export function createTestCaseDirectory(batchDir: string, testCaseId: string): string {
|
|
243
|
+
const tcDir = path.join(batchDir, testCaseId);
|
|
244
|
+
fs.mkdirSync(tcDir, { recursive: true });
|
|
245
|
+
return tcDir;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
export interface BatchLifecycleResult {
|
|
249
|
+
totalBatches: number;
|
|
250
|
+
passedBatches: number;
|
|
251
|
+
failedBatches: number;
|
|
252
|
+
escalatedBatches: number;
|
|
253
|
+
overallScore: number;
|
|
254
|
+
batchReports: BatchReport[];
|
|
255
|
+
timestamp: string;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
export function runBatchLifecycle(
|
|
259
|
+
workspacePath: string,
|
|
260
|
+
config: OrchestratorConfig = {},
|
|
261
|
+
): BatchLifecycleResult {
|
|
262
|
+
const resolvedConfig = resolveConfig(config);
|
|
263
|
+
|
|
264
|
+
const ws = path.resolve(workspacePath);
|
|
265
|
+
const iterationDir = path.join(ws, '.agents', 'iteration');
|
|
266
|
+
fs.mkdirSync(iterationDir, { recursive: true });
|
|
267
|
+
const { generateTestCases } = require('./generate-tests') as {
|
|
268
|
+
generateTestCases: (workspacePath: string, outputPath?: string) => GeneratedTestCasesResult;
|
|
269
|
+
};
|
|
270
|
+
const { dispatchParallel } = require('./dispatch') as {
|
|
271
|
+
dispatchParallel: DispatchParallelFn;
|
|
272
|
+
};
|
|
273
|
+
const { calculateBenchmark } = require('./benchmark') as {
|
|
274
|
+
calculateBenchmark: CalculateBenchmarkFn;
|
|
275
|
+
};
|
|
276
|
+
|
|
277
|
+
const testCasesResultPath = path.join(iterationDir, 'test-cases.json');
|
|
278
|
+
const existingTestCasesFile = fs.existsSync(testCasesResultPath);
|
|
279
|
+
// Prefer agent-produced test-cases if already present; otherwise generate fallback
|
|
280
|
+
let testCasesResultRaw: any = null;
|
|
281
|
+
if (existingTestCasesFile) {
|
|
282
|
+
try {
|
|
283
|
+
testCasesResultRaw = JSON.parse(fs.readFileSync(testCasesResultPath, 'utf-8'));
|
|
284
|
+
} catch (e) {
|
|
285
|
+
const emsg = (e && (e as any).message) ? (e as any).message : String(e);
|
|
286
|
+
throw new Error(`Failed to parse existing test-cases.json: ${emsg}`);
|
|
287
|
+
}
|
|
288
|
+
} else {
|
|
289
|
+
const generated = generateTestCases(ws);
|
|
290
|
+
fs.writeFileSync(testCasesResultPath, JSON.stringify(generated, null, 2));
|
|
291
|
+
testCasesResultRaw = generated;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
const testCases = extractTestCaseArray(testCasesResultRaw);
|
|
295
|
+
const testCaseIds = normalizeTestCaseIds(testCases, existingTestCasesFile);
|
|
296
|
+
|
|
297
|
+
const dispatchOptions: DispatchParallelOptions = {
|
|
298
|
+
workspacePath: ws,
|
|
299
|
+
runnerCommand: resolvedConfig.subagentRunner || undefined,
|
|
300
|
+
runnerTimeoutSeconds: resolvedConfig.workerTimeout,
|
|
301
|
+
};
|
|
302
|
+
|
|
303
|
+
const batches = splitIntoBatches(testCaseIds, resolvedConfig.batchSize);
|
|
304
|
+
|
|
305
|
+
const batchReports: BatchReport[] = [];
|
|
306
|
+
let passedBatches = 0;
|
|
307
|
+
let failedBatches = 0;
|
|
308
|
+
let escalatedBatches = 0;
|
|
309
|
+
|
|
310
|
+
for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
|
|
311
|
+
const batchId = batchIdx + 1;
|
|
312
|
+
const batchDir = createBatchDirectory(iterationDir, batchId);
|
|
313
|
+
const batchTestCases = batches[batchIdx];
|
|
314
|
+
|
|
315
|
+
const invocations = batchTestCases.map((tcId: string) => ({
|
|
316
|
+
skill: 'worker',
|
|
317
|
+
batchId,
|
|
318
|
+
testCaseId: tcId,
|
|
319
|
+
}));
|
|
320
|
+
|
|
321
|
+
const skillsDir = path.join(ws, '.agents', 'skills', 'workspace-maxxing', 'skills');
|
|
322
|
+
const workerDispatch = dispatchWithTimeout(
|
|
323
|
+
dispatchParallel,
|
|
324
|
+
invocations,
|
|
325
|
+
skillsDir,
|
|
326
|
+
resolvedConfig.workerTimeout,
|
|
327
|
+
dispatchOptions,
|
|
328
|
+
);
|
|
329
|
+
const workerResults = workerDispatch.results;
|
|
330
|
+
|
|
331
|
+
workerResults.forEach((result) => {
|
|
332
|
+
const tcDir = createTestCaseDirectory(batchDir, result.testCaseId);
|
|
333
|
+
fs.writeFileSync(
|
|
334
|
+
path.join(tcDir, 'report.json'),
|
|
335
|
+
JSON.stringify(result, null, 2),
|
|
336
|
+
);
|
|
337
|
+
});
|
|
338
|
+
|
|
339
|
+
const benchmarkResult = calculateBenchmark(ws);
|
|
340
|
+
const batchScore = benchmarkResult.weightedScore;
|
|
341
|
+
const hasWorkerFailures = workerResults.some((result) => result.status !== 'passed');
|
|
342
|
+
|
|
343
|
+
let batchStatus: BatchReport['status'] = 'passed';
|
|
344
|
+
if (batchScore < resolvedConfig.scoreThreshold || hasWorkerFailures) {
|
|
345
|
+
const fixResults = runFixLoop(
|
|
346
|
+
workerResults,
|
|
347
|
+
benchmarkResult.fixSuggestions,
|
|
348
|
+
resolvedConfig.maxFixRetries,
|
|
349
|
+
resolvedConfig.scoreThreshold,
|
|
350
|
+
ws,
|
|
351
|
+
resolvedConfig.workerTimeout,
|
|
352
|
+
batchScore,
|
|
353
|
+
dispatchParallel,
|
|
354
|
+
calculateBenchmark,
|
|
355
|
+
dispatchOptions,
|
|
356
|
+
workerDispatch.timedOut
|
|
357
|
+
? [`Worker timeout exceeded (${resolvedConfig.workerTimeout}s) during worker dispatch`]
|
|
358
|
+
: [],
|
|
359
|
+
);
|
|
360
|
+
|
|
361
|
+
if (fixResults.status === 'escalated') {
|
|
362
|
+
batchStatus = 'escalated';
|
|
363
|
+
escalatedBatches++;
|
|
364
|
+
} else if (fixResults.status === 'failed') {
|
|
365
|
+
batchStatus = 'failed';
|
|
366
|
+
failedBatches++;
|
|
367
|
+
} else {
|
|
368
|
+
batchStatus = 'passed';
|
|
369
|
+
passedBatches++;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
const postFixBenchmark = calculateBenchmark(ws);
|
|
373
|
+
batchReports.push({
|
|
374
|
+
batchId,
|
|
375
|
+
testCases: batchTestCases,
|
|
376
|
+
score: postFixBenchmark.weightedScore,
|
|
377
|
+
status: batchStatus,
|
|
378
|
+
findings: fixResults.findings,
|
|
379
|
+
timestamp: new Date().toISOString(),
|
|
380
|
+
});
|
|
381
|
+
} else {
|
|
382
|
+
passedBatches++;
|
|
383
|
+
batchReports.push({
|
|
384
|
+
batchId,
|
|
385
|
+
testCases: batchTestCases,
|
|
386
|
+
score: batchScore,
|
|
387
|
+
status: 'passed',
|
|
388
|
+
findings: resolvedConfig.subagentRunner
|
|
389
|
+
? ['Batch passed threshold']
|
|
390
|
+
: ['Batch passed threshold', 'Dispatch mode: simulated (configure --subagent-runner for external sub-agents)'],
|
|
391
|
+
timestamp: new Date().toISOString(),
|
|
392
|
+
});
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
const summary: BatchLifecycleResult = {
|
|
397
|
+
totalBatches: batches.length,
|
|
398
|
+
passedBatches,
|
|
399
|
+
failedBatches,
|
|
400
|
+
escalatedBatches,
|
|
401
|
+
overallScore: batchReports.length > 0
|
|
402
|
+
? Math.round(batchReports.reduce((sum, r) => sum + r.score, 0) / batchReports.length)
|
|
403
|
+
: 0,
|
|
404
|
+
batchReports,
|
|
405
|
+
timestamp: new Date().toISOString(),
|
|
406
|
+
};
|
|
407
|
+
|
|
408
|
+
fs.writeFileSync(
|
|
409
|
+
path.join(iterationDir, 'summary.json'),
|
|
410
|
+
JSON.stringify(summary, null, 2),
|
|
411
|
+
);
|
|
412
|
+
|
|
413
|
+
return summary;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
interface FixLoopResult {
|
|
417
|
+
status: 'passed' | 'failed' | 'escalated';
|
|
418
|
+
findings: string[];
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
function runFixLoop(
|
|
422
|
+
workerResults: WorkerResult[],
|
|
423
|
+
fixSuggestions: string[],
|
|
424
|
+
maxRetries: number,
|
|
425
|
+
scoreThreshold: number,
|
|
426
|
+
workspacePath: string,
|
|
427
|
+
workerTimeout: number,
|
|
428
|
+
initialScore: number,
|
|
429
|
+
dispatchParallel: DispatchParallelFn,
|
|
430
|
+
calculateBenchmark: CalculateBenchmarkFn,
|
|
431
|
+
dispatchOptions: DispatchParallelOptions,
|
|
432
|
+
initialFindings: string[] = [],
|
|
433
|
+
): FixLoopResult {
|
|
434
|
+
const findings: string[] = [...initialFindings];
|
|
435
|
+
let currentResults = [...workerResults];
|
|
436
|
+
let latestScore = initialScore;
|
|
437
|
+
|
|
438
|
+
if (fixSuggestions.length > 0) {
|
|
439
|
+
findings.push(`Fix suggestions: ${fixSuggestions.join('; ')}`);
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
for (let retry = 0; retry < maxRetries; retry++) {
|
|
443
|
+
const failingResults = currentResults.filter((r) => r.status !== 'passed');
|
|
444
|
+
|
|
445
|
+
if (failingResults.length === 0) {
|
|
446
|
+
if (latestScore >= scoreThreshold) {
|
|
447
|
+
return { status: 'passed', findings };
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
return {
|
|
451
|
+
status: 'failed',
|
|
452
|
+
findings: [
|
|
453
|
+
...findings,
|
|
454
|
+
`No failing worker outputs remain, but score ${latestScore} is below threshold ${scoreThreshold}`,
|
|
455
|
+
],
|
|
456
|
+
};
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
const fixInvocations = failingResults.map((r) => ({
|
|
460
|
+
skill: 'fixer',
|
|
461
|
+
batchId: r.batchId,
|
|
462
|
+
testCaseId: r.testCaseId,
|
|
463
|
+
}));
|
|
464
|
+
|
|
465
|
+
const skillsDir = path.join(workspacePath, '.agents', 'skills', 'workspace-maxxing', 'skills');
|
|
466
|
+
const fixDispatch = dispatchWithTimeout(
|
|
467
|
+
dispatchParallel,
|
|
468
|
+
fixInvocations,
|
|
469
|
+
skillsDir,
|
|
470
|
+
workerTimeout,
|
|
471
|
+
dispatchOptions,
|
|
472
|
+
);
|
|
473
|
+
const fixResults = fixDispatch.results;
|
|
474
|
+
currentResults = mergeWorkerResults(currentResults, fixResults);
|
|
475
|
+
|
|
476
|
+
findings.push(`Fix attempt ${retry + 1}: ${fixResults.length} fixes applied`);
|
|
477
|
+
if (fixDispatch.timedOut) {
|
|
478
|
+
findings.push(`Worker timeout exceeded (${workerTimeout}s) during fix attempt ${retry + 1}`);
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
const benchmarkResult = calculateBenchmark(workspacePath);
|
|
482
|
+
latestScore = benchmarkResult.weightedScore;
|
|
483
|
+
if (latestScore >= scoreThreshold && currentResults.every((result) => result.status === 'passed')) {
|
|
484
|
+
return { status: 'passed', findings };
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
if (currentResults.every((result) => result.status === 'passed')) {
|
|
489
|
+
return {
|
|
490
|
+
status: 'failed',
|
|
491
|
+
findings: [
|
|
492
|
+
...findings,
|
|
493
|
+
`No failing worker outputs remain, but score ${latestScore} is below threshold ${scoreThreshold}`,
|
|
494
|
+
],
|
|
495
|
+
};
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
return { status: 'escalated', findings: [...findings, 'Max retries exhausted'] };
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
if (require.main === module) {
|
|
502
|
+
const args = process.argv.slice(2);
|
|
503
|
+
const parseArg = (flag: string): string | undefined => {
|
|
504
|
+
const idx = args.indexOf(flag);
|
|
505
|
+
return idx !== -1 ? args[idx + 1] : undefined;
|
|
506
|
+
};
|
|
507
|
+
|
|
508
|
+
const workspace = parseArg('--workspace');
|
|
509
|
+
const batchSizeStr = parseArg('--batch-size');
|
|
510
|
+
const scoreThresholdStr = parseArg('--score-threshold');
|
|
511
|
+
const maxFixRetriesStr = parseArg('--max-fix-retries');
|
|
512
|
+
const workerTimeoutStr = parseArg('--worker-timeout');
|
|
513
|
+
const subagentRunner = parseArg('--subagent-runner') ?? process.env.WORKSPACE_MAXXING_SUBAGENT_RUNNER;
|
|
514
|
+
|
|
515
|
+
if (!workspace) {
|
|
516
|
+
console.error('Usage: node orchestrator.ts --workspace <path> [--batch-size <n>] [--score-threshold <n>] [--max-fix-retries <n>] [--worker-timeout <s>] [--subagent-runner <command>]');
|
|
517
|
+
process.exit(1);
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
const config: OrchestratorConfig = {};
|
|
521
|
+
try {
|
|
522
|
+
const batchSize = parseIntegerArg('--batch-size', batchSizeStr);
|
|
523
|
+
const scoreThreshold = parseIntegerArg('--score-threshold', scoreThresholdStr);
|
|
524
|
+
const maxFixRetries = parseIntegerArg('--max-fix-retries', maxFixRetriesStr);
|
|
525
|
+
const workerTimeout = parseIntegerArg('--worker-timeout', workerTimeoutStr);
|
|
526
|
+
|
|
527
|
+
if (batchSize !== undefined) config.batchSize = batchSize;
|
|
528
|
+
if (scoreThreshold !== undefined) config.scoreThreshold = scoreThreshold;
|
|
529
|
+
if (maxFixRetries !== undefined) config.maxFixRetries = maxFixRetries;
|
|
530
|
+
if (workerTimeout !== undefined) config.workerTimeout = workerTimeout;
|
|
531
|
+
if (subagentRunner) config.subagentRunner = subagentRunner;
|
|
532
|
+
|
|
533
|
+
const summary = runBatchLifecycle(workspace, config);
|
|
534
|
+
console.log(JSON.stringify(summary, null, 2));
|
|
535
|
+
} catch (error) {
|
|
536
|
+
console.error(error instanceof Error ? error.message : String(error));
|
|
537
|
+
process.exit(1);
|
|
538
|
+
}
|
|
539
|
+
}
|