jumpstart-mode 1.1.3 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.cursorrules +44 -42
- package/.github/agents/jumpstart-researcher.agent.md +1 -1
- package/.github/copilot-instructions.md +71 -69
- package/.jumpstart/agents/analyst.md +539 -537
- package/.jumpstart/agents/architect.md +740 -738
- package/.jumpstart/agents/developer.md +715 -713
- package/.jumpstart/compat/assistant-mapping.md +32 -0
- package/.jumpstart/guides/context7-usage.md +242 -0
- package/.jumpstart/templates/documentation-audit.md +8 -2
- package/CLAUDE.md +54 -52
- package/bin/headless-runner.js +658 -0
- package/bin/holodeck.js +512 -0
- package/bin/lib/headless-runner.js +658 -0
- package/bin/lib/holodeck.js +512 -0
- package/package.json +13 -2
|
@@ -0,0 +1,658 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* headless-runner.js — Jump Start Headless Agent Emulation
|
|
5
|
+
*
|
|
6
|
+
* Runs Jump Start agents headlessly with LLM-powered User Proxy.
|
|
7
|
+
* Replaces the VS Code Chat window for automated testing.
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* node bin/headless-runner.js --agent architect --persona compliant-user
|
|
11
|
+
* node bin/headless-runner.js --agent architect --mock
|
|
12
|
+
* node bin/headless-runner.js --agent challenger,analyst,pm,architect --scenario ecommerce
|
|
13
|
+
*
|
|
14
|
+
* Options:
|
|
15
|
+
* --agent <names> Comma-separated agent names to run
|
|
16
|
+
* --persona <name> User proxy persona (default: compliant-user)
|
|
17
|
+
* --model <id> LLM model for agent (default: openai/gpt-4o)
|
|
18
|
+
* --proxy-model <id> LLM model for user proxy (default: gemini/gemini-2.5-flash)
|
|
19
|
+
* --mock Use mock responses (no API calls)
|
|
20
|
+
* --scenario <name> Load scenario from tests/e2e/scenarios/
|
|
21
|
+
* --output <dir> Output directory (default: tests/e2e/.tmp/)
|
|
22
|
+
* --dry-run Don't write files, just simulate
|
|
23
|
+
* --verbose Enable verbose logging
|
|
24
|
+
* --max-turns <n> Maximum conversation turns (default: 50)
|
|
25
|
+
* --help Show help
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
require('dotenv').config();
|
|
29
|
+
|
|
30
|
+
const fs = require('fs');
|
|
31
|
+
const path = require('path');
|
|
32
|
+
const chalk = require('chalk');
|
|
33
|
+
|
|
34
|
+
const { createProvider, listModels } = require('./lib/llm-provider');
|
|
35
|
+
const { createToolBridge } = require('./lib/tool-bridge');
|
|
36
|
+
const { getToolsForPhase } = require('./lib/tool-schemas');
|
|
37
|
+
const { createMockRegistry, createPersonaRegistry } = require('./lib/mock-responses');
|
|
38
|
+
const { SimulationTracer } = require('./lib/simulation-tracer');
|
|
39
|
+
|
|
40
|
+
// ─── Configuration ───────────────────────────────────────────────────────────
|
|
41
|
+
|
|
42
|
+
const ROOT_DIR = path.join(__dirname, '..');
|
|
43
|
+
const AGENTS_DIR = path.join(ROOT_DIR, '.jumpstart', 'agents');
|
|
44
|
+
const PERSONAS_DIR = path.join(ROOT_DIR, 'tests', 'e2e', 'personas');
|
|
45
|
+
const SCENARIOS_DIR = path.join(ROOT_DIR, 'tests', 'e2e', 'scenarios');
|
|
46
|
+
const OUTPUT_DIR = path.join(ROOT_DIR, 'tests', 'e2e', '.tmp');
|
|
47
|
+
const REPORTS_DIR = path.join(ROOT_DIR, 'tests', 'e2e', 'reports');
|
|
48
|
+
|
|
49
|
+
const AGENT_PHASES = ['scout', 'challenger', 'analyst', 'pm', 'architect', 'developer'];
|
|
50
|
+
|
|
51
|
+
const DEFAULT_CONFIG = {
|
|
52
|
+
agentModel: 'openai/gpt-4o',
|
|
53
|
+
proxyModel: 'gemini/gemini-2.5-flash',
|
|
54
|
+
persona: 'compliant-user',
|
|
55
|
+
maxTurns: 50,
|
|
56
|
+
reasoningEffort: 'medium'
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
// ─── Argument Parsing ────────────────────────────────────────────────────────
|
|
60
|
+
|
|
61
|
+
function parseArgs() {
|
|
62
|
+
const args = process.argv.slice(2);
|
|
63
|
+
const options = {
|
|
64
|
+
agents: [],
|
|
65
|
+
persona: DEFAULT_CONFIG.persona,
|
|
66
|
+
model: DEFAULT_CONFIG.agentModel,
|
|
67
|
+
proxyModel: DEFAULT_CONFIG.proxyModel,
|
|
68
|
+
mock: false,
|
|
69
|
+
scenario: null,
|
|
70
|
+
output: OUTPUT_DIR,
|
|
71
|
+
dryRun: false,
|
|
72
|
+
verbose: false,
|
|
73
|
+
maxTurns: DEFAULT_CONFIG.maxTurns,
|
|
74
|
+
help: false
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
for (let i = 0; i < args.length; i++) {
|
|
78
|
+
switch (args[i]) {
|
|
79
|
+
case '--agent':
|
|
80
|
+
case '-a':
|
|
81
|
+
options.agents = args[++i].split(',').map(a => a.trim().toLowerCase());
|
|
82
|
+
break;
|
|
83
|
+
case '--persona':
|
|
84
|
+
case '-p':
|
|
85
|
+
options.persona = args[++i];
|
|
86
|
+
break;
|
|
87
|
+
case '--model':
|
|
88
|
+
case '-m':
|
|
89
|
+
options.model = args[++i];
|
|
90
|
+
break;
|
|
91
|
+
case '--proxy-model':
|
|
92
|
+
options.proxyModel = args[++i];
|
|
93
|
+
break;
|
|
94
|
+
case '--mock':
|
|
95
|
+
options.mock = true;
|
|
96
|
+
break;
|
|
97
|
+
case '--scenario':
|
|
98
|
+
case '-s':
|
|
99
|
+
options.scenario = args[++i];
|
|
100
|
+
break;
|
|
101
|
+
case '--output':
|
|
102
|
+
case '-o':
|
|
103
|
+
options.output = args[++i];
|
|
104
|
+
break;
|
|
105
|
+
case '--dry-run':
|
|
106
|
+
options.dryRun = true;
|
|
107
|
+
break;
|
|
108
|
+
case '--verbose':
|
|
109
|
+
case '-v':
|
|
110
|
+
options.verbose = true;
|
|
111
|
+
break;
|
|
112
|
+
case '--max-turns':
|
|
113
|
+
options.maxTurns = parseInt(args[++i], 10);
|
|
114
|
+
break;
|
|
115
|
+
case '--help':
|
|
116
|
+
case '-h':
|
|
117
|
+
options.help = true;
|
|
118
|
+
break;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return options;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function showHelp() {
|
|
126
|
+
console.log(`
|
|
127
|
+
${chalk.bold('Jump Start Headless Agent Runner')}
|
|
128
|
+
|
|
129
|
+
Runs Jump Start agents headlessly with LLM-powered User Proxy.
|
|
130
|
+
|
|
131
|
+
${chalk.bold('Usage:')}
|
|
132
|
+
node bin/headless-runner.js --agent <names> [options]
|
|
133
|
+
|
|
134
|
+
${chalk.bold('Options:')}
|
|
135
|
+
--agent, -a <names> Comma-separated agent names (challenger,analyst,pm,architect,developer)
|
|
136
|
+
--persona, -p <name> User proxy persona (default: compliant-user)
|
|
137
|
+
--model, -m <id> LLM model for agent (default: ${DEFAULT_CONFIG.agentModel})
|
|
138
|
+
--proxy-model <id> LLM model for user proxy (default: ${DEFAULT_CONFIG.proxyModel})
|
|
139
|
+
--mock Use mock responses (no API calls)
|
|
140
|
+
--scenario, -s <name> Load scenario from tests/e2e/scenarios/
|
|
141
|
+
--output, -o <dir> Output directory (default: tests/e2e/.tmp/)
|
|
142
|
+
--dry-run Simulate without writing files
|
|
143
|
+
--verbose, -v Enable verbose logging
|
|
144
|
+
--max-turns <n> Maximum conversation turns (default: 50)
|
|
145
|
+
--help, -h Show this help
|
|
146
|
+
|
|
147
|
+
${chalk.bold('Examples:')}
|
|
148
|
+
# Run architect with compliant user (mock mode, no API)
|
|
149
|
+
node bin/headless-runner.js --agent architect --mock
|
|
150
|
+
|
|
151
|
+
# Run architect with real LLM
|
|
152
|
+
node bin/headless-runner.js --agent architect --persona compliant-user
|
|
153
|
+
|
|
154
|
+
# Run full pipeline with specific model
|
|
155
|
+
node bin/headless-runner.js --agent challenger,analyst,pm,architect --model openai/gpt-5-mini
|
|
156
|
+
|
|
157
|
+
# Run with scenario fixtures
|
|
158
|
+
node bin/headless-runner.js --agent architect --scenario ecommerce
|
|
159
|
+
|
|
160
|
+
${chalk.bold('Available Models:')}
|
|
161
|
+
${listModels().join('\n ')}
|
|
162
|
+
|
|
163
|
+
${chalk.bold('Available Personas:')}
|
|
164
|
+
compliant-user Approves quickly, picks sensible defaults
|
|
165
|
+
strict-user Asks follow-ups, may reject first proposals
|
|
166
|
+
enterprise-user Enterprise preferences, security-focused
|
|
167
|
+
`);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// ─── Core Runner ─────────────────────────────────────────────────────────────
|
|
171
|
+
|
|
172
|
+
class HeadlessRunner {
|
|
173
|
+
constructor(options) {
|
|
174
|
+
this.options = options;
|
|
175
|
+
this.verbose = options.verbose;
|
|
176
|
+
|
|
177
|
+
// Set up output directory
|
|
178
|
+
this.workspaceDir = options.scenario
|
|
179
|
+
? path.join(options.output, options.scenario)
|
|
180
|
+
: path.join(options.output, `run-${Date.now()}`);
|
|
181
|
+
|
|
182
|
+
// Initialize tracer
|
|
183
|
+
this.tracer = new SimulationTracer(this.workspaceDir, options.scenario || 'headless');
|
|
184
|
+
|
|
185
|
+
// Initialize mock registry if in mock mode
|
|
186
|
+
this.mockRegistry = options.mock
|
|
187
|
+
? createPersonaRegistry(options.persona)
|
|
188
|
+
: null;
|
|
189
|
+
|
|
190
|
+
// Will be initialized per agent
|
|
191
|
+
this.agentProvider = null;
|
|
192
|
+
this.proxyProvider = null;
|
|
193
|
+
this.toolBridge = null;
|
|
194
|
+
|
|
195
|
+
// Conversation state
|
|
196
|
+
this.conversationHistory = [];
|
|
197
|
+
this.userProxyHistory = [];
|
|
198
|
+
this.turnCount = 0;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
log(message, level = 'info') {
|
|
202
|
+
const prefix = {
|
|
203
|
+
info: chalk.blue('ℹ'),
|
|
204
|
+
success: chalk.green('✓'),
|
|
205
|
+
warn: chalk.yellow('⚠'),
|
|
206
|
+
error: chalk.red('✗'),
|
|
207
|
+
debug: chalk.gray('·')
|
|
208
|
+
}[level] || '';
|
|
209
|
+
|
|
210
|
+
if (level === 'debug' && !this.verbose) return;
|
|
211
|
+
console.log(`${prefix} ${message}`);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
async setup() {
|
|
215
|
+
// Create workspace directory structure
|
|
216
|
+
const dirs = [
|
|
217
|
+
this.workspaceDir,
|
|
218
|
+
path.join(this.workspaceDir, 'specs'),
|
|
219
|
+
path.join(this.workspaceDir, 'specs', 'decisions'),
|
|
220
|
+
path.join(this.workspaceDir, 'specs', 'insights'),
|
|
221
|
+
path.join(this.workspaceDir, '.jumpstart'),
|
|
222
|
+
path.join(this.workspaceDir, '.jumpstart', 'state')
|
|
223
|
+
];
|
|
224
|
+
|
|
225
|
+
for (const dir of dirs) {
|
|
226
|
+
if (!fs.existsSync(dir)) {
|
|
227
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Copy scenario fixtures if available
|
|
232
|
+
if (this.options.scenario) {
|
|
233
|
+
const scenarioDir = path.join(SCENARIOS_DIR, this.options.scenario);
|
|
234
|
+
if (fs.existsSync(scenarioDir)) {
|
|
235
|
+
this.copyScenarioFixtures(scenarioDir);
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// Copy base .jumpstart config
|
|
240
|
+
this.copyJumpstartConfig();
|
|
241
|
+
|
|
242
|
+
this.log(`Workspace initialized: ${this.workspaceDir}`, 'success');
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
copyScenarioFixtures(scenarioDir) {
|
|
246
|
+
// Copy scenario config
|
|
247
|
+
const configFile = path.join(scenarioDir, 'config.yaml');
|
|
248
|
+
if (fs.existsSync(configFile)) {
|
|
249
|
+
fs.copyFileSync(configFile, path.join(this.workspaceDir, '.jumpstart', 'config.yaml'));
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// Copy any pre-existing artifacts (for testing later phases)
|
|
253
|
+
for (const phase of AGENT_PHASES) {
|
|
254
|
+
const phaseDir = path.join(scenarioDir, `0${AGENT_PHASES.indexOf(phase) + 1}-${phase}`);
|
|
255
|
+
if (fs.existsSync(phaseDir)) {
|
|
256
|
+
const files = fs.readdirSync(phaseDir);
|
|
257
|
+
for (const file of files) {
|
|
258
|
+
if (file.endsWith('.md')) {
|
|
259
|
+
const src = path.join(phaseDir, file);
|
|
260
|
+
const isInsights = file.includes('insights');
|
|
261
|
+
const dest = isInsights
|
|
262
|
+
? path.join(this.workspaceDir, 'specs', 'insights', file)
|
|
263
|
+
: path.join(this.workspaceDir, 'specs', file);
|
|
264
|
+
fs.copyFileSync(src, dest);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
copyJumpstartConfig() {
|
|
272
|
+
// Copy essential .jumpstart files
|
|
273
|
+
const filesToCopy = ['config.yaml', 'roadmap.md', 'glossary.md'];
|
|
274
|
+
const srcJumpstart = path.join(ROOT_DIR, '.jumpstart');
|
|
275
|
+
const destJumpstart = path.join(this.workspaceDir, '.jumpstart');
|
|
276
|
+
|
|
277
|
+
for (const file of filesToCopy) {
|
|
278
|
+
const src = path.join(srcJumpstart, file);
|
|
279
|
+
const dest = path.join(destJumpstart, file);
|
|
280
|
+
if (fs.existsSync(src) && !fs.existsSync(dest)) {
|
|
281
|
+
fs.copyFileSync(src, dest);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// Copy agents directory
|
|
286
|
+
const srcAgents = path.join(srcJumpstart, 'agents');
|
|
287
|
+
const destAgents = path.join(destJumpstart, 'agents');
|
|
288
|
+
if (fs.existsSync(srcAgents)) {
|
|
289
|
+
if (!fs.existsSync(destAgents)) {
|
|
290
|
+
fs.mkdirSync(destAgents, { recursive: true });
|
|
291
|
+
}
|
|
292
|
+
const agentFiles = fs.readdirSync(srcAgents);
|
|
293
|
+
for (const file of agentFiles) {
|
|
294
|
+
const src = path.join(srcAgents, file);
|
|
295
|
+
const dest = path.join(destAgents, file);
|
|
296
|
+
if (!fs.existsSync(dest)) {
|
|
297
|
+
fs.copyFileSync(src, dest);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
async initializeProviders(agentName) {
|
|
304
|
+
// Initialize agent provider
|
|
305
|
+
this.agentProvider = createProvider({
|
|
306
|
+
model: this.options.model,
|
|
307
|
+
mode: this.options.mock ? 'mock' : 'live',
|
|
308
|
+
mockResponses: this.mockRegistry,
|
|
309
|
+
reasoningEffort: DEFAULT_CONFIG.reasoningEffort
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
// Initialize user proxy provider (only needed for live mode)
|
|
313
|
+
if (!this.options.mock) {
|
|
314
|
+
this.proxyProvider = createProvider({
|
|
315
|
+
model: this.options.proxyModel,
|
|
316
|
+
mode: 'live',
|
|
317
|
+
reasoningEffort: 'low'
|
|
318
|
+
});
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// Initialize tool bridge with user proxy callback
|
|
322
|
+
this.toolBridge = createToolBridge({
|
|
323
|
+
workspaceDir: this.workspaceDir,
|
|
324
|
+
tracer: this.tracer,
|
|
325
|
+
dryRun: this.options.dryRun,
|
|
326
|
+
onUserProxyCall: this.options.mock
|
|
327
|
+
? null // Use default mock behavior
|
|
328
|
+
: (args) => this.callUserProxy(args)
|
|
329
|
+
});
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
loadAgentPrompt(agentName) {
|
|
333
|
+
const agentFile = path.join(AGENTS_DIR, `${agentName}.md`);
|
|
334
|
+
|
|
335
|
+
if (!fs.existsSync(agentFile)) {
|
|
336
|
+
throw new Error(`Agent file not found: ${agentFile}`);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
return fs.readFileSync(agentFile, 'utf8');
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
loadPersonaPrompt() {
|
|
343
|
+
const personaFile = path.join(PERSONAS_DIR, `${this.options.persona}.md`);
|
|
344
|
+
|
|
345
|
+
if (!fs.existsSync(personaFile)) {
|
|
346
|
+
// Use default compliant user
|
|
347
|
+
const defaultFile = path.join(PERSONAS_DIR, 'compliant-user.md');
|
|
348
|
+
if (fs.existsSync(defaultFile)) {
|
|
349
|
+
return fs.readFileSync(defaultFile, 'utf8');
|
|
350
|
+
}
|
|
351
|
+
// Inline fallback
|
|
352
|
+
return `You are a cooperative user who approves requests and picks sensible defaults.
|
|
353
|
+
When asked for approval, say "Approved".
|
|
354
|
+
When asked to choose, pick the recommended option or the first option.
|
|
355
|
+
Be brief and supportive.`;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
return fs.readFileSync(personaFile, 'utf8');
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
async callUserProxy(askQuestionsArgs) {
|
|
362
|
+
// In mock mode, this isn't called (tool bridge handles it)
|
|
363
|
+
// In live mode, we call the User Proxy LLM
|
|
364
|
+
|
|
365
|
+
const questionText = this.formatQuestionsForProxy(askQuestionsArgs);
|
|
366
|
+
|
|
367
|
+
this.userProxyHistory.push({
|
|
368
|
+
role: 'user',
|
|
369
|
+
content: questionText
|
|
370
|
+
});
|
|
371
|
+
|
|
372
|
+
this.log(`[Agent asks] ${askQuestionsArgs.questions.map(q => q.header).join(', ')}`, 'debug');
|
|
373
|
+
|
|
374
|
+
const response = await this.proxyProvider.completion(this.userProxyHistory);
|
|
375
|
+
const proxyAnswer = response.choices[0].message.content;
|
|
376
|
+
|
|
377
|
+
this.userProxyHistory.push({
|
|
378
|
+
role: 'assistant',
|
|
379
|
+
content: proxyAnswer
|
|
380
|
+
});
|
|
381
|
+
|
|
382
|
+
this.log(`[User Proxy] ${proxyAnswer.substring(0, 100)}...`, 'debug');
|
|
383
|
+
|
|
384
|
+
// Log to tracer
|
|
385
|
+
if (this.tracer.logUserProxyExchange) {
|
|
386
|
+
this.tracer.logUserProxyExchange(askQuestionsArgs, proxyAnswer);
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
// Parse proxy answer into structured format
|
|
390
|
+
return this.parseProxyResponse(askQuestionsArgs, proxyAnswer);
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
formatQuestionsForProxy(args) {
|
|
394
|
+
let text = 'The development agent is asking you the following:\n\n';
|
|
395
|
+
|
|
396
|
+
for (const q of args.questions) {
|
|
397
|
+
text += `**${q.header}**: ${q.question}\n`;
|
|
398
|
+
|
|
399
|
+
if (q.options && q.options.length > 0) {
|
|
400
|
+
text += 'Options:\n';
|
|
401
|
+
for (const opt of q.options) {
|
|
402
|
+
const rec = opt.recommended ? ' (recommended)' : '';
|
|
403
|
+
const desc = opt.description ? ` - ${opt.description}` : '';
|
|
404
|
+
text += ` - ${opt.label}${rec}${desc}\n`;
|
|
405
|
+
}
|
|
406
|
+
} else {
|
|
407
|
+
text += '(Free text response expected)\n';
|
|
408
|
+
}
|
|
409
|
+
text += '\n';
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
text += 'Please respond with your selection or answer for each question.';
|
|
413
|
+
return text;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
parseProxyResponse(args, proxyAnswer) {
|
|
417
|
+
const answers = {};
|
|
418
|
+
const answerLower = proxyAnswer.toLowerCase();
|
|
419
|
+
|
|
420
|
+
for (const q of args.questions) {
|
|
421
|
+
if (q.options && q.options.length > 0) {
|
|
422
|
+
// Find which option was selected
|
|
423
|
+
let selected = [];
|
|
424
|
+
|
|
425
|
+
for (const opt of q.options) {
|
|
426
|
+
if (answerLower.includes(opt.label.toLowerCase())) {
|
|
427
|
+
selected.push(opt.label);
|
|
428
|
+
if (!q.multiSelect) break;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// If no match, take recommended or first
|
|
433
|
+
if (selected.length === 0) {
|
|
434
|
+
const rec = q.options.find(o => o.recommended) || q.options[0];
|
|
435
|
+
selected = [rec.label];
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
answers[q.header] = {
|
|
439
|
+
selected: selected,
|
|
440
|
+
freeText: q.allowFreeformInput ? proxyAnswer : null,
|
|
441
|
+
skipped: false
|
|
442
|
+
};
|
|
443
|
+
} else {
|
|
444
|
+
// Free text
|
|
445
|
+
answers[q.header] = {
|
|
446
|
+
selected: [],
|
|
447
|
+
freeText: proxyAnswer,
|
|
448
|
+
skipped: false
|
|
449
|
+
};
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
return { answers };
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
async runAgent(agentName) {
|
|
457
|
+
this.log(`\n${'═'.repeat(60)}`, 'info');
|
|
458
|
+
this.log(`Running agent: ${chalk.bold(agentName)}`, 'info');
|
|
459
|
+
this.log(`${'═'.repeat(60)}`, 'info');
|
|
460
|
+
|
|
461
|
+
this.tracer.startPhase(agentName);
|
|
462
|
+
|
|
463
|
+
// Initialize providers for this agent
|
|
464
|
+
await this.initializeProviders(agentName);
|
|
465
|
+
|
|
466
|
+
// Load prompts
|
|
467
|
+
const agentPrompt = this.loadAgentPrompt(agentName);
|
|
468
|
+
const personaPrompt = this.loadPersonaPrompt();
|
|
469
|
+
|
|
470
|
+
// Initialize conversation histories
|
|
471
|
+
this.conversationHistory = [
|
|
472
|
+
{ role: 'system', content: agentPrompt }
|
|
473
|
+
];
|
|
474
|
+
|
|
475
|
+
this.userProxyHistory = [
|
|
476
|
+
{ role: 'system', content: personaPrompt }
|
|
477
|
+
];
|
|
478
|
+
|
|
479
|
+
// Get tools for this phase
|
|
480
|
+
const tools = getToolsForPhase(agentName);
|
|
481
|
+
|
|
482
|
+
// Add initial user message to start the agent
|
|
483
|
+
const startMessage = this.getAgentStartMessage(agentName);
|
|
484
|
+
this.conversationHistory.push({ role: 'user', content: startMessage });
|
|
485
|
+
|
|
486
|
+
// Main conversation loop
|
|
487
|
+
this.turnCount = 0;
|
|
488
|
+
let sessionActive = true;
|
|
489
|
+
let finalStatus = 'PASS';
|
|
490
|
+
|
|
491
|
+
while (sessionActive && this.turnCount < this.options.maxTurns) {
|
|
492
|
+
this.turnCount++;
|
|
493
|
+
this.log(`Turn ${this.turnCount}/${this.options.maxTurns}`, 'debug');
|
|
494
|
+
|
|
495
|
+
try {
|
|
496
|
+
// Call agent LLM
|
|
497
|
+
const response = await this.agentProvider.completion(
|
|
498
|
+
this.conversationHistory,
|
|
499
|
+
tools
|
|
500
|
+
);
|
|
501
|
+
|
|
502
|
+
const message = response.choices[0].message;
|
|
503
|
+
this.conversationHistory.push(message);
|
|
504
|
+
|
|
505
|
+
// Handle tool calls
|
|
506
|
+
if (message.tool_calls && message.tool_calls.length > 0) {
|
|
507
|
+
for (const toolCall of message.tool_calls) {
|
|
508
|
+
this.log(`Tool: ${toolCall.function.name}`, 'debug');
|
|
509
|
+
|
|
510
|
+
const result = await this.toolBridge.execute(toolCall);
|
|
511
|
+
this.conversationHistory.push(result);
|
|
512
|
+
}
|
|
513
|
+
} else if (message.content) {
|
|
514
|
+
// Text response
|
|
515
|
+
this.log(`Agent: ${message.content.substring(0, 100)}...`, 'debug');
|
|
516
|
+
|
|
517
|
+
// Check for phase completion
|
|
518
|
+
if (this.isPhaseComplete(message.content)) {
|
|
519
|
+
this.log(`Phase ${agentName} complete!`, 'success');
|
|
520
|
+
sessionActive = false;
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
} catch (error) {
|
|
525
|
+
this.log(`Error: ${error.message}`, 'error');
|
|
526
|
+
finalStatus = 'FAIL';
|
|
527
|
+
sessionActive = false;
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
if (this.turnCount >= this.options.maxTurns) {
|
|
532
|
+
this.log(`Max turns reached (${this.options.maxTurns})`, 'warn');
|
|
533
|
+
finalStatus = 'INCOMPLETE';
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
this.tracer.endPhase(agentName, finalStatus);
|
|
537
|
+
|
|
538
|
+
// Log usage
|
|
539
|
+
const usage = this.agentProvider.getUsage();
|
|
540
|
+
this.log(`Usage: ${usage.totalTokens} tokens, ${usage.calls} calls`, 'info');
|
|
541
|
+
|
|
542
|
+
return finalStatus;
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
getAgentStartMessage(agentName) {
|
|
546
|
+
const activations = {
|
|
547
|
+
scout: 'Run /jumpstart.scout to analyze this codebase.',
|
|
548
|
+
challenger: 'Run /jumpstart.challenge. The problem I want to solve is: Build a modern web application.',
|
|
549
|
+
analyst: 'Run /jumpstart.analyze. Please create the product brief based on the challenger brief.',
|
|
550
|
+
pm: 'Run /jumpstart.plan. Please create the PRD based on the product brief.',
|
|
551
|
+
architect: 'Run /jumpstart.architect. Please create the architecture and implementation plan.',
|
|
552
|
+
developer: 'Run /jumpstart.build. Please implement the first task from the implementation plan.'
|
|
553
|
+
};
|
|
554
|
+
|
|
555
|
+
return activations[agentName] || `Run /jumpstart.${agentName}`;
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
isPhaseComplete(content) {
|
|
559
|
+
const completionSignals = [
|
|
560
|
+
'phase gate approval',
|
|
561
|
+
'artifact is ready for review',
|
|
562
|
+
'please review and approve',
|
|
563
|
+
'awaiting your approval',
|
|
564
|
+
'submitted for approval'
|
|
565
|
+
];
|
|
566
|
+
|
|
567
|
+
const lowerContent = content.toLowerCase();
|
|
568
|
+
return completionSignals.some(signal => lowerContent.includes(signal));
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
async run() {
|
|
572
|
+
const startTime = Date.now();
|
|
573
|
+
this.log('Starting headless runner...', 'info');
|
|
574
|
+
this.log(`Mode: ${this.options.mock ? 'MOCK' : 'LIVE'}`, 'info');
|
|
575
|
+
this.log(`Agents: ${this.options.agents.join(', ')}`, 'info');
|
|
576
|
+
this.log(`Persona: ${this.options.persona}`, 'info');
|
|
577
|
+
|
|
578
|
+
await this.setup();
|
|
579
|
+
|
|
580
|
+
const results = {};
|
|
581
|
+
|
|
582
|
+
for (const agent of this.options.agents) {
|
|
583
|
+
if (!AGENT_PHASES.includes(agent)) {
|
|
584
|
+
this.log(`Unknown agent: ${agent}. Skipping.`, 'warn');
|
|
585
|
+
continue;
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
results[agent] = await this.runAgent(agent);
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
// Generate report
|
|
592
|
+
const report = this.tracer.generateReport();
|
|
593
|
+
report.runtime = {
|
|
594
|
+
total_ms: Date.now() - startTime,
|
|
595
|
+
mode: this.options.mock ? 'mock' : 'live',
|
|
596
|
+
model: this.options.model,
|
|
597
|
+
persona: this.options.persona
|
|
598
|
+
};
|
|
599
|
+
report.results = results;
|
|
600
|
+
|
|
601
|
+
// Save report
|
|
602
|
+
const reportPath = path.join(
|
|
603
|
+
REPORTS_DIR,
|
|
604
|
+
`headless-${this.options.scenario || 'run'}-${Date.now()}.json`
|
|
605
|
+
);
|
|
606
|
+
|
|
607
|
+
if (!fs.existsSync(REPORTS_DIR)) {
|
|
608
|
+
fs.mkdirSync(REPORTS_DIR, { recursive: true });
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
fs.writeFileSync(reportPath, JSON.stringify(report, null, 2));
|
|
612
|
+
this.log(`Report saved: ${reportPath}`, 'success');
|
|
613
|
+
|
|
614
|
+
// Summary
|
|
615
|
+
const passed = Object.values(results).filter(r => r === 'PASS').length;
|
|
616
|
+
const total = Object.keys(results).length;
|
|
617
|
+
|
|
618
|
+
console.log('\n' + '═'.repeat(60));
|
|
619
|
+
console.log(chalk.bold('Summary'));
|
|
620
|
+
console.log('═'.repeat(60));
|
|
621
|
+
console.log(`Agents run: ${total}`);
|
|
622
|
+
console.log(`Passed: ${chalk.green(passed)}`);
|
|
623
|
+
console.log(`Failed: ${chalk.red(total - passed)}`);
|
|
624
|
+
console.log(`Duration: ${((Date.now() - startTime) / 1000).toFixed(1)}s`);
|
|
625
|
+
console.log(`Output: ${this.workspaceDir}`);
|
|
626
|
+
|
|
627
|
+
return passed === total ? 0 : 1;
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// ─── Main Entry Point ────────────────────────────────────────────────────────
|
|
632
|
+
|
|
633
|
+
async function main() {
|
|
634
|
+
const options = parseArgs();
|
|
635
|
+
|
|
636
|
+
if (options.help) {
|
|
637
|
+
showHelp();
|
|
638
|
+
process.exit(0);
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
if (options.agents.length === 0) {
|
|
642
|
+
console.error(chalk.red('Error: No agents specified. Use --agent <names>'));
|
|
643
|
+
showHelp();
|
|
644
|
+
process.exit(1);
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
const runner = new HeadlessRunner(options);
|
|
648
|
+
const exitCode = await runner.run();
|
|
649
|
+
process.exit(exitCode);
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
main().catch(error => {
|
|
653
|
+
console.error(chalk.red(`Fatal error: ${error.message}`));
|
|
654
|
+
if (process.env.DEBUG) {
|
|
655
|
+
console.error(error.stack);
|
|
656
|
+
}
|
|
657
|
+
process.exit(1);
|
|
658
|
+
});
|