opencode-design-lab 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.opencode/plugins/design-lab.js +1015 -0
- package/README.md +371 -0
- package/package.json +48 -0
|
@@ -0,0 +1,1015 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import * as fs from "fs";
|
|
3
|
+
import * as path from "path";
|
|
4
|
+
import * as os from "os";
|
|
5
|
+
import { tool } from "@opencode-ai/plugin";
|
|
6
|
+
import pino from "pino";
|
|
7
|
+
|
|
8
|
+
//#region src/config/schema.ts
|
|
9
|
+
/**
|
|
10
|
+
* Configuration schema for OpenCode Design Lab plugin
|
|
11
|
+
*/
|
|
12
|
+
const DesignLabConfigSchema = z.object({
|
|
13
|
+
design_models: z.array(z.string()).min(2, "At least 2 design models required"),
|
|
14
|
+
review_models: z.array(z.string()).optional(),
|
|
15
|
+
base_output_dir: z.string().default(".design-lab"),
|
|
16
|
+
design_agent_temperature: z.number().min(0).max(2).default(.7),
|
|
17
|
+
review_agent_temperature: z.number().min(0).max(2).default(.1),
|
|
18
|
+
topic_generator_model: z.string().optional()
|
|
19
|
+
});
|
|
20
|
+
/**
|
|
21
|
+
* Design artifact schema - what each design agent must produce
|
|
22
|
+
*/
|
|
23
|
+
const DesignArtifactSchema = z.object({
|
|
24
|
+
title: z.string(),
|
|
25
|
+
summary: z.string(),
|
|
26
|
+
assumptions: z.array(z.string()),
|
|
27
|
+
architecture_overview: z.string(),
|
|
28
|
+
components: z.array(z.object({
|
|
29
|
+
name: z.string(),
|
|
30
|
+
description: z.string(),
|
|
31
|
+
responsibilities: z.array(z.string())
|
|
32
|
+
})),
|
|
33
|
+
data_flow: z.string(),
|
|
34
|
+
tradeoffs: z.array(z.object({
|
|
35
|
+
aspect: z.string(),
|
|
36
|
+
options: z.array(z.string()),
|
|
37
|
+
chosen: z.string(),
|
|
38
|
+
rationale: z.string()
|
|
39
|
+
})),
|
|
40
|
+
risks: z.array(z.object({
|
|
41
|
+
risk: z.string(),
|
|
42
|
+
impact: z.enum([
|
|
43
|
+
"low",
|
|
44
|
+
"medium",
|
|
45
|
+
"high"
|
|
46
|
+
]),
|
|
47
|
+
mitigation: z.string()
|
|
48
|
+
})),
|
|
49
|
+
open_questions: z.array(z.string())
|
|
50
|
+
});
|
|
51
|
+
/**
|
|
52
|
+
* Score schema - what review agents must produce
|
|
53
|
+
*/
|
|
54
|
+
const ScoreSchema = z.object({
|
|
55
|
+
design_id: z.string(),
|
|
56
|
+
reviewer_model: z.string(),
|
|
57
|
+
scores: z.object({
|
|
58
|
+
clarity: z.number().min(0).max(10),
|
|
59
|
+
feasibility: z.number().min(0).max(10),
|
|
60
|
+
scalability: z.number().min(0).max(10),
|
|
61
|
+
maintainability: z.number().min(0).max(10),
|
|
62
|
+
completeness: z.number().min(0).max(10),
|
|
63
|
+
overall: z.number().min(0).max(10)
|
|
64
|
+
}),
|
|
65
|
+
justification: z.string(),
|
|
66
|
+
strengths: z.array(z.string()),
|
|
67
|
+
weaknesses: z.array(z.string()),
|
|
68
|
+
missing_considerations: z.array(z.string())
|
|
69
|
+
});
|
|
70
|
+
/**
|
|
71
|
+
* Aggregated ranking schema
|
|
72
|
+
*/
|
|
73
|
+
const RankingSchema = z.object({
|
|
74
|
+
design_id: z.string(),
|
|
75
|
+
rank: z.number().int().positive(),
|
|
76
|
+
average_score: z.number(),
|
|
77
|
+
score_breakdown: z.record(z.string(), z.number()),
|
|
78
|
+
variance: z.number(),
|
|
79
|
+
reviewer_count: z.number().int()
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
//#endregion
|
|
83
|
+
//#region src/config/loader.ts
|
|
84
|
+
/**
|
|
85
|
+
* Deep merge two objects, with override taking precedence
|
|
86
|
+
*/
|
|
87
|
+
function deepMerge(base, override) {
|
|
88
|
+
const result = { ...base };
|
|
89
|
+
for (const key in override) {
|
|
90
|
+
const overrideValue = override[key];
|
|
91
|
+
const baseValue = base[key];
|
|
92
|
+
if (typeof overrideValue === "object" && overrideValue !== null && !Array.isArray(overrideValue) && typeof baseValue === "object" && baseValue !== null && !Array.isArray(baseValue)) result[key] = deepMerge(baseValue, overrideValue);
|
|
93
|
+
else result[key] = overrideValue;
|
|
94
|
+
}
|
|
95
|
+
return result;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Parse JSONC (JSON with comments)
|
|
99
|
+
*/
|
|
100
|
+
function parseJsonc(content) {
|
|
101
|
+
const withoutComments = content.replace(/\/\*[\s\S]*?\*\//g, "").replace(/\/\/.*/g, "");
|
|
102
|
+
return JSON.parse(withoutComments);
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Load config from a specific path if it exists
|
|
106
|
+
*/
|
|
107
|
+
function loadConfigFromPath(configPath) {
|
|
108
|
+
try {
|
|
109
|
+
const possiblePaths = [
|
|
110
|
+
configPath,
|
|
111
|
+
`${configPath}.json`,
|
|
112
|
+
`${configPath}.jsonc`
|
|
113
|
+
];
|
|
114
|
+
for (const fullPath of possiblePaths) if (fs.existsSync(fullPath)) {
|
|
115
|
+
const rawConfig = parseJsonc(fs.readFileSync(fullPath, "utf-8"));
|
|
116
|
+
const result = DesignLabConfigSchema.partial().safeParse(rawConfig);
|
|
117
|
+
if (!result.success) {
|
|
118
|
+
console.error(`Config validation error in ${fullPath}:`, result.error.issues);
|
|
119
|
+
return null;
|
|
120
|
+
}
|
|
121
|
+
return result.data;
|
|
122
|
+
}
|
|
123
|
+
} catch (err) {
|
|
124
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
125
|
+
console.error(`Error loading config from ${configPath}:`, errorMsg);
|
|
126
|
+
}
|
|
127
|
+
return null;
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Get the user config directory (cross-platform)
|
|
131
|
+
* OpenCode uses ~/.config/opencode on all Unix-like platforms
|
|
132
|
+
*/
|
|
133
|
+
function getUserConfigDir() {
|
|
134
|
+
if (process.platform === "win32") return process.env.APPDATA || path.join(os.homedir(), "AppData", "Roaming");
|
|
135
|
+
return process.env.XDG_CONFIG_HOME || path.join(os.homedir(), ".config");
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Load and merge plugin configuration from multiple sources
|
|
139
|
+
*
|
|
140
|
+
* Priority (highest to lowest):
|
|
141
|
+
* 1. Project-level config: .opencode/design-lab.json(c)
|
|
142
|
+
* 2. User-level config: ~/.config/opencode/design-lab.json(c)
|
|
143
|
+
*
|
|
144
|
+
* @param directory - Project directory
|
|
145
|
+
* @returns Merged and validated configuration
|
|
146
|
+
*/
|
|
147
|
+
function loadPluginConfig(directory) {
|
|
148
|
+
const userConfigPath = path.join(getUserConfigDir(), "opencode", "design-lab");
|
|
149
|
+
const projectConfigPath = path.join(directory, ".opencode", "design-lab");
|
|
150
|
+
const userConfig = loadConfigFromPath(userConfigPath);
|
|
151
|
+
const projectConfig = loadConfigFromPath(projectConfigPath);
|
|
152
|
+
let mergedConfig = {};
|
|
153
|
+
if (userConfig) mergedConfig = deepMerge(mergedConfig, userConfig);
|
|
154
|
+
if (projectConfig) mergedConfig = deepMerge(mergedConfig, projectConfig);
|
|
155
|
+
const result = DesignLabConfigSchema.safeParse(mergedConfig);
|
|
156
|
+
if (!result.success) throw new Error(`Invalid design-lab configuration: ${result.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join(", ")}`);
|
|
157
|
+
return result.data;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
//#endregion
|
|
161
|
+
//#region src/agents/index.ts
|
|
162
|
+
/**
|
|
163
|
+
* System prompt for design generation agents
|
|
164
|
+
*/
|
|
165
|
+
const DESIGN_AGENT_SYSTEM_PROMPT = `You are a senior software architect generating a design proposal for a technical system.
|
|
166
|
+
|
|
167
|
+
## Your Task
|
|
168
|
+
|
|
169
|
+
You will receive design requirements and must produce a comprehensive design document as structured JSON.
|
|
170
|
+
|
|
171
|
+
## Critical Rules
|
|
172
|
+
|
|
173
|
+
1. **Output ONLY valid JSON** - No markdown, no explanations, no code blocks, just pure JSON
|
|
174
|
+
2. **Follow the schema exactly** - All required fields must be present
|
|
175
|
+
3. **Be specific and actionable** - Avoid vague statements
|
|
176
|
+
4. **Consider real-world constraints** - Think about scalability, maintainability, and security
|
|
177
|
+
5. **Identify risks proactively** - Every design has risks, acknowledge them
|
|
178
|
+
6. **List open questions** - What would you need to clarify with stakeholders?
|
|
179
|
+
|
|
180
|
+
## Required Output Schema
|
|
181
|
+
|
|
182
|
+
Your response must be a JSON object with this exact structure:
|
|
183
|
+
|
|
184
|
+
{
|
|
185
|
+
"title": "Short, descriptive title for the design",
|
|
186
|
+
"summary": "2-3 paragraph executive summary of the design",
|
|
187
|
+
"assumptions": ["List of assumptions you're making"],
|
|
188
|
+
"architecture_overview": "High-level description of the architecture approach",
|
|
189
|
+
"components": [
|
|
190
|
+
{
|
|
191
|
+
"name": "Component name",
|
|
192
|
+
"description": "What this component does",
|
|
193
|
+
"responsibilities": ["List of responsibilities"]
|
|
194
|
+
}
|
|
195
|
+
],
|
|
196
|
+
"data_flow": "Description of how data flows through the system",
|
|
197
|
+
"tradeoffs": [
|
|
198
|
+
{
|
|
199
|
+
"aspect": "What aspect this tradeoff concerns",
|
|
200
|
+
"options": ["Option 1", "Option 2"],
|
|
201
|
+
"chosen": "Which option you chose",
|
|
202
|
+
"rationale": "Why you chose this option"
|
|
203
|
+
}
|
|
204
|
+
],
|
|
205
|
+
"risks": [
|
|
206
|
+
{
|
|
207
|
+
"risk": "Description of the risk",
|
|
208
|
+
"impact": "low|medium|high",
|
|
209
|
+
"mitigation": "How to mitigate this risk"
|
|
210
|
+
}
|
|
211
|
+
],
|
|
212
|
+
"open_questions": ["Questions that need stakeholder input"]
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
Remember: Your entire response must be valid JSON. No other text.`;
|
|
216
|
+
/**
|
|
217
|
+
* Create a design agent configuration for a specific model
|
|
218
|
+
*/
|
|
219
|
+
function createDesignAgent(model, temperature) {
|
|
220
|
+
return {
|
|
221
|
+
model,
|
|
222
|
+
temperature,
|
|
223
|
+
mode: "subagent",
|
|
224
|
+
prompt: DESIGN_AGENT_SYSTEM_PROMPT,
|
|
225
|
+
tools: {
|
|
226
|
+
write: false,
|
|
227
|
+
edit: false,
|
|
228
|
+
bash: false,
|
|
229
|
+
task: false,
|
|
230
|
+
delegate_task: false
|
|
231
|
+
}
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* System prompt for review agents
|
|
236
|
+
*/
|
|
237
|
+
const REVIEW_AGENT_SYSTEM_PROMPT = `You are a senior technical reviewer evaluating software design proposals.
|
|
238
|
+
|
|
239
|
+
## Your Task
|
|
240
|
+
|
|
241
|
+
You will receive multiple design proposals for the same requirements. You must:
|
|
242
|
+
1. Analyze each design thoroughly
|
|
243
|
+
2. Compare them objectively
|
|
244
|
+
3. Provide scores for each design
|
|
245
|
+
4. Generate a markdown review with your analysis
|
|
246
|
+
|
|
247
|
+
## Scoring Criteria (0-10 scale)
|
|
248
|
+
|
|
249
|
+
- **Clarity**: How well-explained and understandable is the design?
|
|
250
|
+
- **Feasibility**: Can this design be realistically implemented?
|
|
251
|
+
- **Scalability**: Will this design handle growth well?
|
|
252
|
+
- **Maintainability**: Will this design be easy to maintain and evolve?
|
|
253
|
+
- **Completeness**: Does this design address all requirements?
|
|
254
|
+
- **Overall**: Your overall assessment
|
|
255
|
+
|
|
256
|
+
## Review Format
|
|
257
|
+
|
|
258
|
+
First, provide a detailed markdown review comparing all designs:
|
|
259
|
+
|
|
260
|
+
1. Executive summary of each design
|
|
261
|
+
2. Comparative analysis across dimensions
|
|
262
|
+
3. Strengths and weaknesses of each
|
|
263
|
+
4. Your recommendation
|
|
264
|
+
|
|
265
|
+
Then, provide a score table in markdown like:
|
|
266
|
+
|
|
267
|
+
| Design | Clarity | Feasibility | Scalability | Maintainability | Completeness | Overall |
|
|
268
|
+
|--------|---------|-------------|-------------|-----------------|--------------|---------|
|
|
269
|
+
| model-name | 8 | 9 | 7 | 8 | 8 | 8 |
|
|
270
|
+
|
|
271
|
+
## Important
|
|
272
|
+
|
|
273
|
+
- Be objective and fair
|
|
274
|
+
- Support your scores with reasoning
|
|
275
|
+
- Consider the requirements when scoring
|
|
276
|
+
- Do not be biased by model names`;
|
|
277
|
+
/**
|
|
278
|
+
* Create a review agent configuration for a specific model
|
|
279
|
+
*/
|
|
280
|
+
function createReviewAgent(model, temperature) {
|
|
281
|
+
return {
|
|
282
|
+
model,
|
|
283
|
+
temperature,
|
|
284
|
+
mode: "subagent",
|
|
285
|
+
prompt: REVIEW_AGENT_SYSTEM_PROMPT,
|
|
286
|
+
tools: {
|
|
287
|
+
write: false,
|
|
288
|
+
edit: false,
|
|
289
|
+
bash: false,
|
|
290
|
+
task: false,
|
|
291
|
+
delegate_task: false
|
|
292
|
+
}
|
|
293
|
+
};
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
//#endregion
|
|
297
|
+
//#region src/utils/logger.ts
|
|
298
|
+
const logLevel = process.env.LOG_LEVEL || "info";
|
|
299
|
+
const levelNames = {
|
|
300
|
+
10: "TRACE",
|
|
301
|
+
20: "DEBUG",
|
|
302
|
+
30: "INFO",
|
|
303
|
+
40: "WARN",
|
|
304
|
+
50: "ERROR",
|
|
305
|
+
60: "FATAL"
|
|
306
|
+
};
|
|
307
|
+
function formatTimestamp() {
|
|
308
|
+
const now = /* @__PURE__ */ new Date();
|
|
309
|
+
return `${String(now.getHours()).padStart(2, "0")}:${String(now.getMinutes()).padStart(2, "0")}:${String(now.getSeconds()).padStart(2, "0")}.${String(now.getMilliseconds()).padStart(3, "0")}`;
|
|
310
|
+
}
|
|
311
|
+
function createLogStream() {
|
|
312
|
+
const logPath = path.join(process.cwd(), "design-lab.log");
|
|
313
|
+
const stream = fs.createWriteStream(logPath, { flags: "a" });
|
|
314
|
+
return pino.multistream([{
|
|
315
|
+
level: "trace",
|
|
316
|
+
stream: { write: (chunk) => {
|
|
317
|
+
try {
|
|
318
|
+
const log = JSON.parse(chunk);
|
|
319
|
+
const timestamp = formatTimestamp();
|
|
320
|
+
const level = levelNames[log.level] || "UNKNOWN";
|
|
321
|
+
const message = log.msg || "";
|
|
322
|
+
stream.write(`[${timestamp}] ${level}: ${message}\n`);
|
|
323
|
+
} catch (e) {
|
|
324
|
+
stream.write(chunk + "\n");
|
|
325
|
+
}
|
|
326
|
+
} }
|
|
327
|
+
}]);
|
|
328
|
+
}
|
|
329
|
+
const logger = pino({
|
|
330
|
+
level: logLevel,
|
|
331
|
+
timestamp: false
|
|
332
|
+
}, createLogStream());
|
|
333
|
+
|
|
334
|
+
//#endregion
|
|
335
|
+
//#region src/utils/session-helpers.ts
|
|
336
|
+
/**
|
|
337
|
+
* Poll interval for checking session completion
|
|
338
|
+
*/
|
|
339
|
+
const POLL_INTERVAL_MS = 500;
|
|
340
|
+
/**
|
|
341
|
+
* Maximum time to wait for a session to complete
|
|
342
|
+
*/
|
|
343
|
+
const MAX_POLL_TIME_MS = 600 * 1e3;
|
|
344
|
+
/**
|
|
345
|
+
* Create a new agent session
|
|
346
|
+
*/
|
|
347
|
+
async function createAgentSession(ctx, parentSessionID, title, directory) {
|
|
348
|
+
logger.info({
|
|
349
|
+
parentSessionID,
|
|
350
|
+
title
|
|
351
|
+
}, "Creating agent session");
|
|
352
|
+
const createResult = await ctx.client.session.create({
|
|
353
|
+
body: {
|
|
354
|
+
parentID: parentSessionID,
|
|
355
|
+
title
|
|
356
|
+
},
|
|
357
|
+
query: { directory }
|
|
358
|
+
});
|
|
359
|
+
if (createResult.error) {
|
|
360
|
+
logger.error({ error: createResult.error }, "Failed to create session");
|
|
361
|
+
throw new Error(`Failed to create session: ${createResult.error}`);
|
|
362
|
+
}
|
|
363
|
+
logger.info({ sessionID: createResult.data.id }, "Session created successfully");
|
|
364
|
+
return createResult.data.id;
|
|
365
|
+
}
|
|
366
|
+
/**
|
|
367
|
+
* Send a prompt to a session with timeout
|
|
368
|
+
*/
|
|
369
|
+
async function sendPrompt(ctx, sessionID, prompt, tools) {
|
|
370
|
+
logger.info({
|
|
371
|
+
sessionID,
|
|
372
|
+
promptLength: prompt.length,
|
|
373
|
+
tools
|
|
374
|
+
}, "Sending prompt");
|
|
375
|
+
const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(/* @__PURE__ */ new Error("Prompt send timeout after 180 seconds")), 18e4));
|
|
376
|
+
const sendPromise = ctx.client.session.prompt({
|
|
377
|
+
path: { id: sessionID },
|
|
378
|
+
body: {
|
|
379
|
+
tools: {
|
|
380
|
+
...tools,
|
|
381
|
+
task: false,
|
|
382
|
+
delegate_task: false
|
|
383
|
+
},
|
|
384
|
+
parts: [{
|
|
385
|
+
type: "text",
|
|
386
|
+
text: prompt
|
|
387
|
+
}]
|
|
388
|
+
}
|
|
389
|
+
});
|
|
390
|
+
const result = await Promise.race([sendPromise, timeoutPromise]);
|
|
391
|
+
if (result.error) {
|
|
392
|
+
logger.error({
|
|
393
|
+
sessionID,
|
|
394
|
+
error: result.error
|
|
395
|
+
}, "Failed to send prompt");
|
|
396
|
+
throw new Error(`Failed to send prompt: ${result.error}`);
|
|
397
|
+
}
|
|
398
|
+
logger.info({ sessionID }, "Prompt sent successfully");
|
|
399
|
+
}
|
|
400
|
+
/**
|
|
401
|
+
* Poll for session completion
|
|
402
|
+
*/
|
|
403
|
+
async function pollForCompletion(ctx, sessionID, abortSignal) {
|
|
404
|
+
const pollStart = Date.now();
|
|
405
|
+
let lastMsgCount = 0;
|
|
406
|
+
let stablePolls = 0;
|
|
407
|
+
const STABILITY_REQUIRED = 3;
|
|
408
|
+
let pollCount = 0;
|
|
409
|
+
logger.info({ sessionID }, "Starting polling for completion");
|
|
410
|
+
while (Date.now() - pollStart < MAX_POLL_TIME_MS) {
|
|
411
|
+
pollCount++;
|
|
412
|
+
if (abortSignal?.aborted) {
|
|
413
|
+
logger.warn({ sessionID }, "Polling aborted by signal");
|
|
414
|
+
throw new Error("Task aborted");
|
|
415
|
+
}
|
|
416
|
+
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
|
|
417
|
+
const sessionStatus = ((await ctx.client.session.status()).data ?? {})[sessionID];
|
|
418
|
+
if (pollCount % 10 === 0) logger.info({
|
|
419
|
+
sessionID,
|
|
420
|
+
status: sessionStatus?.type,
|
|
421
|
+
pollCount,
|
|
422
|
+
elapsed: Date.now() - pollStart
|
|
423
|
+
}, "Polling status check");
|
|
424
|
+
if (sessionStatus && sessionStatus.type !== "idle") {
|
|
425
|
+
stablePolls = 0;
|
|
426
|
+
lastMsgCount = 0;
|
|
427
|
+
continue;
|
|
428
|
+
}
|
|
429
|
+
const messagesCheck = await ctx.client.session.messages({ path: { id: sessionID } });
|
|
430
|
+
const currentMsgCount = (messagesCheck.data ?? messagesCheck).length;
|
|
431
|
+
if (currentMsgCount > 0 && currentMsgCount === lastMsgCount) {
|
|
432
|
+
stablePolls++;
|
|
433
|
+
logger.debug({
|
|
434
|
+
sessionID,
|
|
435
|
+
stablePolls,
|
|
436
|
+
currentMsgCount
|
|
437
|
+
}, "Message count stable");
|
|
438
|
+
if (stablePolls >= STABILITY_REQUIRED) {
|
|
439
|
+
logger.info({
|
|
440
|
+
sessionID,
|
|
441
|
+
totalPolls: pollCount,
|
|
442
|
+
elapsed: Date.now() - pollStart
|
|
443
|
+
}, "Session completion confirmed");
|
|
444
|
+
return;
|
|
445
|
+
}
|
|
446
|
+
} else {
|
|
447
|
+
stablePolls = 0;
|
|
448
|
+
lastMsgCount = currentMsgCount;
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
logger.error({
|
|
452
|
+
sessionID,
|
|
453
|
+
totalPolls: pollCount,
|
|
454
|
+
elapsed: MAX_POLL_TIME_MS
|
|
455
|
+
}, "Session timed out");
|
|
456
|
+
throw new Error("Session timed out after 10 minutes");
|
|
457
|
+
}
|
|
458
|
+
/**
|
|
459
|
+
* Extract text content from session messages
|
|
460
|
+
*/
|
|
461
|
+
async function extractSessionOutput(ctx, sessionID) {
|
|
462
|
+
const messagesResult = await ctx.client.session.messages({ path: { id: sessionID } });
|
|
463
|
+
if (messagesResult.error) throw new Error(`Failed to get messages: ${messagesResult.error}`);
|
|
464
|
+
const assistantMessages = messagesResult.data.filter((m) => m.info?.role === "assistant");
|
|
465
|
+
if (assistantMessages.length === 0) throw new Error("No assistant response found");
|
|
466
|
+
const extractedContent = [];
|
|
467
|
+
for (const message of assistantMessages) for (const part of message.parts ?? []) if ((part.type === "text" || part.type === "reasoning") && part.text) extractedContent.push(part.text);
|
|
468
|
+
return extractedContent.join("\n\n");
|
|
469
|
+
}
|
|
470
|
+
/**
|
|
471
|
+
* Extract JSON from text that may contain markdown code blocks
|
|
472
|
+
*/
|
|
473
|
+
function extractJSON(text) {
|
|
474
|
+
const jsonBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
475
|
+
if (jsonBlockMatch) return JSON.parse(jsonBlockMatch[1].trim());
|
|
476
|
+
const jsonMatch = text.match(/(\{[\s\S]*\}|\[[\s\S]*\])/);
|
|
477
|
+
if (jsonMatch) return JSON.parse(jsonMatch[1]);
|
|
478
|
+
return JSON.parse(text.trim());
|
|
479
|
+
}
|
|
480
|
+
/**
|
|
481
|
+
* Extract short model name from full model string
|
|
482
|
+
* e.g., "zhipuai-coding-plan/glm-4.6" -> "glm-4.6"
|
|
483
|
+
*/
|
|
484
|
+
function getModelShortName(modelName) {
|
|
485
|
+
const parts = modelName.split("/");
|
|
486
|
+
return parts[parts.length - 1] || modelName;
|
|
487
|
+
}
|
|
488
|
+
/**
|
|
489
|
+
* Sanitize a string for use in file/directory names
|
|
490
|
+
*/
|
|
491
|
+
function sanitizeForFilename(text) {
|
|
492
|
+
return text.toLowerCase().replace(/\//g, "-").replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").substring(0, 50);
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
//#endregion
|
|
496
|
+
//#region src/tools/generate-designs.ts
|
|
497
|
+
/**
|
|
498
|
+
* Create the generate_designs tool
|
|
499
|
+
*/
|
|
500
|
+
function createGenerateDesignsTool(ctx, config) {
|
|
501
|
+
return tool({
|
|
502
|
+
description: `Generate multiple independent design proposals for a technical system.
|
|
503
|
+
|
|
504
|
+
This tool creates design proposals using ${config.design_models.length} different AI models:
|
|
505
|
+
${config.design_models.map((m) => `- ${m}`).join("\n")}
|
|
506
|
+
|
|
507
|
+
Each model generates a design completely independently, without seeing other models' outputs.
|
|
508
|
+
|
|
509
|
+
Use this when you want to explore multiple approaches to a design problem and compare them.`,
|
|
510
|
+
args: {
|
|
511
|
+
requirements: tool.schema.string().describe("Detailed requirements for the design. Include problem statement, constraints, and non-functional requirements."),
|
|
512
|
+
topic: tool.schema.string().describe("Optional short topic name (2-4 words) for the design session. If not provided, one will be generated.").optional()
|
|
513
|
+
},
|
|
514
|
+
async execute(args, toolContext) {
|
|
515
|
+
const { requirements, topic: userTopic } = args;
|
|
516
|
+
const sessionID = toolContext.sessionID;
|
|
517
|
+
const topic = userTopic ? sanitizeForFilename(userTopic) : await generateTopic(ctx, config, requirements, sessionID);
|
|
518
|
+
const date = (/* @__PURE__ */ new Date()).toISOString().split("T")[0];
|
|
519
|
+
const labDir = path.join(ctx.directory, config.base_output_dir, `${date}-${topic}`);
|
|
520
|
+
if (fs.existsSync(labDir)) {
|
|
521
|
+
logger.warn({ labDir }, "Lab directory already exists, using existing");
|
|
522
|
+
return `Error: Lab directory already exists at ${labDir}. This may be from a previous attempt. Please check the existing designs or delete the directory to retry.`;
|
|
523
|
+
}
|
|
524
|
+
const designsDir = path.join(labDir, "designs");
|
|
525
|
+
const reviewsDir = path.join(labDir, "reviews");
|
|
526
|
+
const scoresDir = path.join(labDir, "scores");
|
|
527
|
+
fs.mkdirSync(designsDir, { recursive: true });
|
|
528
|
+
fs.mkdirSync(reviewsDir, { recursive: true });
|
|
529
|
+
fs.mkdirSync(scoresDir, { recursive: true });
|
|
530
|
+
const taskData = {
|
|
531
|
+
requirements,
|
|
532
|
+
topic,
|
|
533
|
+
created: (/* @__PURE__ */ new Date()).toISOString(),
|
|
534
|
+
design_models: config.design_models,
|
|
535
|
+
review_models: config.review_models ?? config.design_models
|
|
536
|
+
};
|
|
537
|
+
fs.writeFileSync(path.join(labDir, "task.json"), JSON.stringify(taskData, null, 2));
|
|
538
|
+
const results = [];
|
|
539
|
+
for (const model of config.design_models) try {
|
|
540
|
+
logger.info({ model }, `Starting design generation for model: ${model}`);
|
|
541
|
+
const design = await generateDesign(ctx, config, model, requirements, sessionID);
|
|
542
|
+
const validationResult = DesignArtifactSchema.safeParse(design);
|
|
543
|
+
if (!validationResult.success) {
|
|
544
|
+
logger.error({
|
|
545
|
+
model,
|
|
546
|
+
errors: validationResult.error.issues
|
|
547
|
+
}, "Design schema validation failed");
|
|
548
|
+
results.push({
|
|
549
|
+
model,
|
|
550
|
+
success: false,
|
|
551
|
+
error: `Schema validation failed: ${validationResult.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join(", ")}`
|
|
552
|
+
});
|
|
553
|
+
continue;
|
|
554
|
+
}
|
|
555
|
+
const modelShortName = getModelShortName(model);
|
|
556
|
+
const designFile = path.join(designsDir, `${sanitizeForFilename(modelShortName)}.json`);
|
|
557
|
+
fs.writeFileSync(designFile, JSON.stringify(design, null, 2));
|
|
558
|
+
logger.info({
|
|
559
|
+
model,
|
|
560
|
+
designFile
|
|
561
|
+
}, "Design saved as JSON");
|
|
562
|
+
const markdownFile = path.join(designsDir, `${sanitizeForFilename(modelShortName)}.md`);
|
|
563
|
+
const markdown = formatDesignAsMarkdown(design, model);
|
|
564
|
+
fs.writeFileSync(markdownFile, markdown);
|
|
565
|
+
logger.info({
|
|
566
|
+
model,
|
|
567
|
+
markdownFile
|
|
568
|
+
}, "Design saved as Markdown");
|
|
569
|
+
results.push({
|
|
570
|
+
model,
|
|
571
|
+
success: true
|
|
572
|
+
});
|
|
573
|
+
} catch (err) {
|
|
574
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
575
|
+
logger.error({
|
|
576
|
+
model,
|
|
577
|
+
error: errorMsg
|
|
578
|
+
}, "Design generation failed");
|
|
579
|
+
results.push({
|
|
580
|
+
model,
|
|
581
|
+
success: false,
|
|
582
|
+
error: errorMsg
|
|
583
|
+
});
|
|
584
|
+
}
|
|
585
|
+
const successCount = results.filter((r) => r.success).length;
|
|
586
|
+
return `Design generation complete.
|
|
587
|
+
|
|
588
|
+
**Lab Directory**: ${labDir}
|
|
589
|
+
|
|
590
|
+
**Results**: ${successCount} successful, ${results.filter((r) => !r.success).length} failed
|
|
591
|
+
|
|
592
|
+
${results.map((r) => r.success ? `✅ ${r.model}: Generated successfully` : `❌ ${r.model}: ${r.error}`).join("\n")}
|
|
593
|
+
|
|
594
|
+
${successCount >= 2 ? `\nNext step: Run the review_designs tool to evaluate and compare the designs.` : `\nWarning: At least 2 successful designs are needed for meaningful comparison.`}`;
|
|
595
|
+
}
|
|
596
|
+
});
|
|
597
|
+
}
|
|
598
|
+
/**
|
|
599
|
+
* Format a design artifact as markdown for human readability
|
|
600
|
+
*/
|
|
601
|
+
function formatDesignAsMarkdown(design, model) {
|
|
602
|
+
let md = `# ${design.title}\n\n`;
|
|
603
|
+
md += `**Model**: ${model}\n\n`;
|
|
604
|
+
md += `## Summary\n\n${design.summary}\n\n`;
|
|
605
|
+
md += `## Assumptions\n\n`;
|
|
606
|
+
design.assumptions.forEach((a) => md += `- ${a}\n`);
|
|
607
|
+
md += `\n`;
|
|
608
|
+
md += `## Architecture Overview\n\n${design.architecture_overview}\n\n`;
|
|
609
|
+
md += `## Components\n\n`;
|
|
610
|
+
design.components.forEach((c) => {
|
|
611
|
+
md += `### ${c.name}\n\n${c.description}\n\n**Responsibilities**:\n`;
|
|
612
|
+
c.responsibilities.forEach((r) => md += `- ${r}\n`);
|
|
613
|
+
md += `\n`;
|
|
614
|
+
});
|
|
615
|
+
md += `## Data Flow\n\n${design.data_flow}\n\n`;
|
|
616
|
+
md += `## Tradeoffs\n\n`;
|
|
617
|
+
design.tradeoffs.forEach((t) => {
|
|
618
|
+
md += `### ${t.aspect}\n\n`;
|
|
619
|
+
md += `**Options**: ${t.options.join(", ")}\n\n`;
|
|
620
|
+
md += `**Chosen**: ${t.chosen}\n\n`;
|
|
621
|
+
md += `**Rationale**: ${t.rationale}\n\n`;
|
|
622
|
+
});
|
|
623
|
+
md += `## Risks\n\n`;
|
|
624
|
+
design.risks.forEach((r) => {
|
|
625
|
+
md += `### ${r.risk} (Impact: ${r.impact})\n\n`;
|
|
626
|
+
md += `**Mitigation**: ${r.mitigation}\n\n`;
|
|
627
|
+
});
|
|
628
|
+
md += `## Open Questions\n\n`;
|
|
629
|
+
design.open_questions.forEach((q) => md += `- ${q}\n`);
|
|
630
|
+
return md;
|
|
631
|
+
}
|
|
632
|
+
/**
|
|
633
|
+
* Generate a topic name from requirements
|
|
634
|
+
*/
|
|
635
|
+
async function generateTopic(ctx, config, requirements, parentSessionID) {
|
|
636
|
+
config.topic_generator_model || config.design_models[0];
|
|
637
|
+
const sessionID = await createAgentSession(ctx, parentSessionID, "Topic Generation", ctx.directory);
|
|
638
|
+
await sendPrompt(ctx, sessionID, `Generate a concise 2-4 word topic name for this design task. Output ONLY the topic name, nothing else.
|
|
639
|
+
|
|
640
|
+
Requirements:
|
|
641
|
+
${requirements.substring(0, 500)}`, {
|
|
642
|
+
write: false,
|
|
643
|
+
edit: false,
|
|
644
|
+
bash: false
|
|
645
|
+
});
|
|
646
|
+
await pollForCompletion(ctx, sessionID);
|
|
647
|
+
return sanitizeForFilename((await extractSessionOutput(ctx, sessionID)).trim());
|
|
648
|
+
}
|
|
649
|
+
/**
|
|
650
|
+
* Generate a single design using a specific model
|
|
651
|
+
*/
|
|
652
|
+
async function generateDesign(ctx, config, model, requirements, parentSessionID) {
|
|
653
|
+
const agentConfig = createDesignAgent(model, config.design_agent_temperature);
|
|
654
|
+
const sessionID = await createAgentSession(ctx, parentSessionID, `Design Generation - ${model}`, ctx.directory);
|
|
655
|
+
const prompt = `Generate a comprehensive design proposal for the following requirements.
|
|
656
|
+
|
|
657
|
+
## Requirements
|
|
658
|
+
|
|
659
|
+
${requirements}
|
|
660
|
+
|
|
661
|
+
## Instructions
|
|
662
|
+
|
|
663
|
+
1. Analyze the requirements thoroughly
|
|
664
|
+
2. Consider multiple approaches before deciding
|
|
665
|
+
3. Output your design as valid JSON following the required schema
|
|
666
|
+
4. Be specific and actionable in your design
|
|
667
|
+
|
|
668
|
+
Remember: Your entire response must be valid JSON with no other text.`;
|
|
669
|
+
const fullPrompt = `${agentConfig.prompt}\n\n${prompt}`;
|
|
670
|
+
logger.info({
|
|
671
|
+
model,
|
|
672
|
+
sessionID
|
|
673
|
+
}, "Sending design prompt to agent");
|
|
674
|
+
await sendPrompt(ctx, sessionID, fullPrompt, agentConfig.tools);
|
|
675
|
+
logger.info({
|
|
676
|
+
model,
|
|
677
|
+
sessionID
|
|
678
|
+
}, "Polling for completion");
|
|
679
|
+
await pollForCompletion(ctx, sessionID);
|
|
680
|
+
logger.info({
|
|
681
|
+
model,
|
|
682
|
+
sessionID
|
|
683
|
+
}, "Extracting session output");
|
|
684
|
+
const output = await extractSessionOutput(ctx, sessionID);
|
|
685
|
+
logger.info({
|
|
686
|
+
model,
|
|
687
|
+
outputLength: output.length
|
|
688
|
+
}, "Extracting JSON from output");
|
|
689
|
+
return extractJSON(output);
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
//#endregion
|
|
693
|
+
//#region src/tools/review-designs.ts
|
|
694
|
+
/**
|
|
695
|
+
* Create the review_designs tool
|
|
696
|
+
*/
|
|
697
|
+
function createReviewDesignsTool(ctx, config) {
|
|
698
|
+
const reviewModels = config.review_models ?? config.design_models;
|
|
699
|
+
return tool({
|
|
700
|
+
description: `Review and score design proposals using ${reviewModels.length} reviewer models.
|
|
701
|
+
|
|
702
|
+
Each reviewer analyzes all designs and provides:
|
|
703
|
+
1. A markdown review comparing the designs
|
|
704
|
+
2. Numeric scores (0-10) across dimensions: clarity, feasibility, scalability, maintainability, completeness, overall
|
|
705
|
+
|
|
706
|
+
Use this after generate_designs to evaluate and compare the generated designs.`,
|
|
707
|
+
args: { lab_path: tool.schema.string().describe(`Path to the design lab directory (e.g., .design-lab/2024-01-15-api-gateway). If not provided, uses the most recent lab.`).optional() },
|
|
708
|
+
async execute(args, toolContext) {
|
|
709
|
+
const sessionID = toolContext.sessionID;
|
|
710
|
+
const labDir = args.lab_path ? path.resolve(ctx.directory, args.lab_path) : findMostRecentLab$1(ctx.directory, config.base_output_dir);
|
|
711
|
+
if (!labDir) return "Error: No design lab found. Run generate_designs first.";
|
|
712
|
+
const designsDir = path.join(labDir, "designs");
|
|
713
|
+
const reviewsDir = path.join(labDir, "reviews");
|
|
714
|
+
const scoresDir = path.join(labDir, "scores");
|
|
715
|
+
const designFiles = fs.readdirSync(designsDir).filter((f) => f.endsWith(".json"));
|
|
716
|
+
if (designFiles.length === 0) return "Error: No designs found in the lab directory.";
|
|
717
|
+
const designs = {};
|
|
718
|
+
for (const file of designFiles) {
|
|
719
|
+
const designId = file.replace(".json", "");
|
|
720
|
+
const content = fs.readFileSync(path.join(designsDir, file), "utf-8");
|
|
721
|
+
designs[designId] = JSON.parse(content);
|
|
722
|
+
}
|
|
723
|
+
const taskPath = path.join(labDir, "task.json");
|
|
724
|
+
const requirements = JSON.parse(fs.readFileSync(taskPath, "utf-8")).requirements;
|
|
725
|
+
const results = [];
|
|
726
|
+
for (const model of reviewModels) try {
|
|
727
|
+
const { review, scores } = await generateReview(ctx, config, model, requirements, designs, sessionID);
|
|
728
|
+
const reviewFile = path.join(reviewsDir, `review-${sanitizeForFilename(getModelShortName(model))}.md`);
|
|
729
|
+
fs.writeFileSync(reviewFile, review);
|
|
730
|
+
logger.info({
|
|
731
|
+
model,
|
|
732
|
+
reviewFile
|
|
733
|
+
}, "Review saved");
|
|
734
|
+
for (const score of scores) {
|
|
735
|
+
const validationResult = ScoreSchema.safeParse(score);
|
|
736
|
+
if (!validationResult.success) console.warn(`Score validation warning for ${score.design_id}:`, validationResult.error);
|
|
737
|
+
const designShortName = getModelShortName(score.design_id);
|
|
738
|
+
const reviewerShortName = getModelShortName(model);
|
|
739
|
+
const scoreFile = path.join(scoresDir, `${sanitizeForFilename(designShortName)}-reviewed-by-${sanitizeForFilename(reviewerShortName)}.json`);
|
|
740
|
+
fs.writeFileSync(scoreFile, JSON.stringify(score, null, 2));
|
|
741
|
+
}
|
|
742
|
+
results.push({
|
|
743
|
+
model,
|
|
744
|
+
success: true
|
|
745
|
+
});
|
|
746
|
+
} catch (err) {
|
|
747
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
748
|
+
results.push({
|
|
749
|
+
model,
|
|
750
|
+
success: false,
|
|
751
|
+
error: errorMsg
|
|
752
|
+
});
|
|
753
|
+
}
|
|
754
|
+
const successCount = results.filter((r) => r.success).length;
|
|
755
|
+
return `Review complete.
|
|
756
|
+
|
|
757
|
+
**Lab Directory**: ${labDir}
|
|
758
|
+
|
|
759
|
+
**Results**: ${successCount} successful, ${results.filter((r) => !r.success).length} failed
|
|
760
|
+
|
|
761
|
+
${results.map((r) => r.success ? `✅ ${r.model}: Review generated` : `❌ ${r.model}: ${r.error}`).join("\n")}
|
|
762
|
+
|
|
763
|
+
**Reviews saved to**: ${reviewsDir}
|
|
764
|
+
**Scores saved to**: ${scoresDir}
|
|
765
|
+
|
|
766
|
+
${successCount > 0 ? `\nNext step: Run the aggregate_scores tool to generate final rankings.` : ""}`;
|
|
767
|
+
}
|
|
768
|
+
});
|
|
769
|
+
}
|
|
770
|
+
/**
|
|
771
|
+
* Find the most recent design lab directory
|
|
772
|
+
*/
|
|
773
|
+
function findMostRecentLab$1(projectDir, baseDir) {
|
|
774
|
+
const labBaseDir = path.join(projectDir, baseDir);
|
|
775
|
+
if (!fs.existsSync(labBaseDir)) return null;
|
|
776
|
+
const labs = fs.readdirSync(labBaseDir).filter((d) => fs.statSync(path.join(labBaseDir, d)).isDirectory()).sort().reverse();
|
|
777
|
+
if (labs.length === 0) return null;
|
|
778
|
+
return path.join(labBaseDir, labs[0]);
|
|
779
|
+
}
|
|
780
|
+
/**
|
|
781
|
+
* Generate a review using a specific model
|
|
782
|
+
*/
|
|
783
|
+
async function generateReview(ctx, config, model, requirements, designs, parentSessionID) {
|
|
784
|
+
const agentConfig = createReviewAgent(model, config.review_agent_temperature);
|
|
785
|
+
const sessionID = await createAgentSession(ctx, parentSessionID, `Design Review - ${model}`, ctx.directory);
|
|
786
|
+
const prompt = `Review and compare the following design proposals.
|
|
787
|
+
|
|
788
|
+
## Original Requirements
|
|
789
|
+
|
|
790
|
+
${requirements}
|
|
791
|
+
|
|
792
|
+
## Designs to Review
|
|
793
|
+
|
|
794
|
+
${Object.entries(designs).map(([id, design]) => `## Design: ${id}\n\n\`\`\`json\n${JSON.stringify(design, null, 2)}\n\`\`\``).join("\n\n---\n\n")}
|
|
795
|
+
|
|
796
|
+
## Your Task
|
|
797
|
+
|
|
798
|
+
1. Analyze each design thoroughly
|
|
799
|
+
2. Compare them across dimensions: clarity, feasibility, scalability, maintainability, completeness
|
|
800
|
+
3. Provide a detailed markdown review with your analysis
|
|
801
|
+
4. At the end, include a score table in markdown format
|
|
802
|
+
5. Identify strengths and weaknesses of each design
|
|
803
|
+
|
|
804
|
+
Be objective and support your assessments with specific observations.`;
|
|
805
|
+
await sendPrompt(ctx, sessionID, `${agentConfig.prompt}\n\n${prompt}`, agentConfig.tools);
|
|
806
|
+
await pollForCompletion(ctx, sessionID);
|
|
807
|
+
const review = await extractSessionOutput(ctx, sessionID);
|
|
808
|
+
await sendPrompt(ctx, sessionID, `Now output the scores for each design as a JSON array. Each element should have this structure:
|
|
809
|
+
|
|
810
|
+
{
|
|
811
|
+
"design_id": "EXACT_DESIGN_ID_FROM_LIST_BELOW",
|
|
812
|
+
"reviewer_model": "${model}",
|
|
813
|
+
"scores": {
|
|
814
|
+
"clarity": 0-10,
|
|
815
|
+
"feasibility": 0-10,
|
|
816
|
+
"scalability": 0-10,
|
|
817
|
+
"maintainability": 0-10,
|
|
818
|
+
"completeness": 0-10,
|
|
819
|
+
"overall": 0-10
|
|
820
|
+
},
|
|
821
|
+
"justification": "Brief overall justification",
|
|
822
|
+
"strengths": ["list", "of", "strengths"],
|
|
823
|
+
"weaknesses": ["list", "of", "weaknesses"],
|
|
824
|
+
"missing_considerations": ["list", "of", "things", "missing"]
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
**IMPORTANT**: The "design_id" field MUST be one of these exact values:
|
|
828
|
+
${Object.keys(designs).map((id) => `- "${id}"`).join("\n")}
|
|
829
|
+
|
|
830
|
+
Output ONLY the JSON array with one score object per design. No other text.`, agentConfig.tools);
|
|
831
|
+
await pollForCompletion(ctx, sessionID);
|
|
832
|
+
return {
|
|
833
|
+
review,
|
|
834
|
+
scores: extractJSON(await extractSessionOutput(ctx, sessionID))
|
|
835
|
+
};
|
|
836
|
+
}
|
|
837
|
+
|
|
838
|
+
//#endregion
|
|
839
|
+
//#region src/tools/aggregate-scores.ts
|
|
840
|
+
/**
|
|
841
|
+
* Create the aggregate_scores tool
|
|
842
|
+
*/
|
|
843
|
+
function createAggregateScoresTool(ctx, config) {
|
|
844
|
+
return tool({
|
|
845
|
+
description: `Aggregate scores from all reviewers and generate final rankings.
|
|
846
|
+
|
|
847
|
+
This tool:
|
|
848
|
+
1. Reads all score files from the reviews
|
|
849
|
+
2. Calculates average scores per design
|
|
850
|
+
3. Computes variance/disagreement metrics
|
|
851
|
+
4. Generates a final ranking with results.md
|
|
852
|
+
|
|
853
|
+
Use this after review_designs to get the final comparison.`,
|
|
854
|
+
args: { lab_path: tool.schema.string().describe(`Path to the design lab directory. If not provided, uses the most recent lab.`).optional() },
|
|
855
|
+
async execute(args) {
|
|
856
|
+
const labDir = args.lab_path ? path.resolve(ctx.directory, args.lab_path) : findMostRecentLab(ctx.directory, config.base_output_dir);
|
|
857
|
+
if (!labDir) return "Error: No design lab found. Run generate_designs first.";
|
|
858
|
+
const scoresDir = path.join(labDir, "scores");
|
|
859
|
+
const resultsDir = path.join(labDir, "results");
|
|
860
|
+
if (!fs.existsSync(scoresDir)) return "Error: No scores directory found. Run review_designs first.";
|
|
861
|
+
fs.mkdirSync(resultsDir, { recursive: true });
|
|
862
|
+
const scoreFiles = fs.readdirSync(scoresDir).filter((f) => f.endsWith(".json"));
|
|
863
|
+
if (scoreFiles.length === 0) return "Error: No score files found. Run review_designs first.";
|
|
864
|
+
const allScores = [];
|
|
865
|
+
for (const file of scoreFiles) {
|
|
866
|
+
const content = fs.readFileSync(path.join(scoresDir, file), "utf-8");
|
|
867
|
+
allScores.push(JSON.parse(content));
|
|
868
|
+
}
|
|
869
|
+
const scoresByDesign = {};
|
|
870
|
+
for (const score of allScores) {
|
|
871
|
+
if (!scoresByDesign[score.design_id]) scoresByDesign[score.design_id] = [];
|
|
872
|
+
scoresByDesign[score.design_id].push(score);
|
|
873
|
+
}
|
|
874
|
+
const rankings = [];
|
|
875
|
+
for (const [designId, scores] of Object.entries(scoresByDesign)) {
|
|
876
|
+
const overallScores = scores.map((s) => s.scores.overall);
|
|
877
|
+
const avgOverall = overallScores.reduce((a, b) => a + b, 0) / overallScores.length;
|
|
878
|
+
const variance = overallScores.reduce((sum, s) => sum + Math.pow(s - avgOverall, 2), 0) / overallScores.length;
|
|
879
|
+
const dimensions = [
|
|
880
|
+
"clarity",
|
|
881
|
+
"feasibility",
|
|
882
|
+
"scalability",
|
|
883
|
+
"maintainability",
|
|
884
|
+
"completeness",
|
|
885
|
+
"overall"
|
|
886
|
+
];
|
|
887
|
+
const scoreBreakdown = {};
|
|
888
|
+
for (const dim of dimensions) {
|
|
889
|
+
const dimScores = scores.map((s) => s.scores[dim]);
|
|
890
|
+
scoreBreakdown[dim] = dimScores.reduce((a, b) => a + b, 0) / dimScores.length;
|
|
891
|
+
}
|
|
892
|
+
rankings.push({
|
|
893
|
+
design_id: designId,
|
|
894
|
+
rank: 0,
|
|
895
|
+
average_score: avgOverall,
|
|
896
|
+
score_breakdown: scoreBreakdown,
|
|
897
|
+
variance,
|
|
898
|
+
reviewer_count: scores.length
|
|
899
|
+
});
|
|
900
|
+
}
|
|
901
|
+
rankings.sort((a, b) => b.average_score - a.average_score);
|
|
902
|
+
rankings.forEach((r, i) => {
|
|
903
|
+
r.rank = i + 1;
|
|
904
|
+
});
|
|
905
|
+
fs.writeFileSync(path.join(resultsDir, "ranking.json"), JSON.stringify(rankings, null, 2));
|
|
906
|
+
const resultsMarkdown = generateResultsMarkdown(rankings, allScores);
|
|
907
|
+
fs.writeFileSync(path.join(resultsDir, "results.md"), resultsMarkdown);
|
|
908
|
+
return `Aggregation complete.
|
|
909
|
+
|
|
910
|
+
**Rankings saved to**: ${path.join(resultsDir, "ranking.json")}
|
|
911
|
+
**Results summary saved to**: ${path.join(resultsDir, "results.md")}
|
|
912
|
+
|
|
913
|
+
## Final Rankings
|
|
914
|
+
|
|
915
|
+
${rankings.map((r) => `${r.rank}. **${r.design_id}** - Score: ${r.average_score.toFixed(1)}/10 (variance: ${r.variance.toFixed(2)})`).join("\n")}
|
|
916
|
+
|
|
917
|
+
View the full results in ${path.join(resultsDir, "results.md")}`;
|
|
918
|
+
}
|
|
919
|
+
});
|
|
920
|
+
}
|
|
921
|
+
/**
|
|
922
|
+
* Find the most recent design lab directory
|
|
923
|
+
*/
|
|
924
|
+
function findMostRecentLab(projectDir, baseDir) {
|
|
925
|
+
const labBaseDir = path.join(projectDir, baseDir);
|
|
926
|
+
if (!fs.existsSync(labBaseDir)) return null;
|
|
927
|
+
const labs = fs.readdirSync(labBaseDir).filter((d) => fs.statSync(path.join(labBaseDir, d)).isDirectory()).sort().reverse();
|
|
928
|
+
if (labs.length === 0) return null;
|
|
929
|
+
return path.join(labBaseDir, labs[0]);
|
|
930
|
+
}
|
|
931
|
+
/**
|
|
932
|
+
* Generate the results markdown file
|
|
933
|
+
*/
|
|
934
|
+
function generateResultsMarkdown(rankings, allScores) {
|
|
935
|
+
const dimensions = [
|
|
936
|
+
"clarity",
|
|
937
|
+
"feasibility",
|
|
938
|
+
"scalability",
|
|
939
|
+
"maintainability",
|
|
940
|
+
"completeness",
|
|
941
|
+
"overall"
|
|
942
|
+
];
|
|
943
|
+
const reviewers = [...new Set(allScores.map((s) => s.reviewer_model))];
|
|
944
|
+
let md = `# Design Lab Results
|
|
945
|
+
|
|
946
|
+
Generated: ${(/* @__PURE__ */ new Date()).toISOString()}
|
|
947
|
+
|
|
948
|
+
## Summary
|
|
949
|
+
|
|
950
|
+
| Rank | Design | Avg Score | Variance | Reviewers |
|
|
951
|
+
|------|--------|-----------|----------|-----------|
|
|
952
|
+
${rankings.map((r) => `| ${r.rank} | ${r.design_id} | ${r.average_score.toFixed(1)} | ${r.variance.toFixed(2)} | ${r.reviewer_count} |`).join("\n")}
|
|
953
|
+
|
|
954
|
+
## Detailed Score Breakdown
|
|
955
|
+
|
|
956
|
+
### Average Scores by Dimension
|
|
957
|
+
|
|
958
|
+
| Design | ${dimensions.join(" | ")} |
|
|
959
|
+
|--------|${"---|".repeat(dimensions.length)}
|
|
960
|
+
${rankings.map((r) => `| ${r.design_id} | ${dimensions.map((d) => r.score_breakdown[d].toFixed(1)).join(" | ")} |`).join("\n")}
|
|
961
|
+
|
|
962
|
+
## Reviewer × Design Matrix
|
|
963
|
+
|
|
964
|
+
### Overall Scores
|
|
965
|
+
|
|
966
|
+
| Reviewer | ${rankings.map((r) => r.design_id).join(" | ")} |
|
|
967
|
+
|----------|${"---|".repeat(rankings.length)}
|
|
968
|
+
${reviewers.map((reviewer) => {
|
|
969
|
+
return `| ${reviewer} | ${rankings.map((r) => {
|
|
970
|
+
const score = allScores.find((s) => s.design_id === r.design_id && s.reviewer_model === reviewer);
|
|
971
|
+
return score ? score.scores.overall.toFixed(1) : "N/A";
|
|
972
|
+
}).join(" | ")} |`;
|
|
973
|
+
}).join("\n")}
|
|
974
|
+
|
|
975
|
+
## Key Observations
|
|
976
|
+
|
|
977
|
+
`;
|
|
978
|
+
const topDesign = rankings[0];
|
|
979
|
+
md += `### Winner: ${topDesign.design_id}
|
|
980
|
+
|
|
981
|
+
- **Average Score**: ${topDesign.average_score.toFixed(1)}/10
|
|
982
|
+
- **Variance**: ${topDesign.variance.toFixed(2)} (${topDesign.variance < 1 ? "high consensus" : topDesign.variance < 2 ? "moderate consensus" : "low consensus"})
|
|
983
|
+
|
|
984
|
+
`;
|
|
985
|
+
md += `### Strengths and Weaknesses\n\n`;
|
|
986
|
+
for (const ranking of rankings.slice(0, 3)) {
|
|
987
|
+
const designScores = allScores.filter((s) => s.design_id === ranking.design_id);
|
|
988
|
+
const strengths = [...new Set(designScores.flatMap((s) => s.strengths))].slice(0, 3);
|
|
989
|
+
const weaknesses = [...new Set(designScores.flatMap((s) => s.weaknesses))].slice(0, 3);
|
|
990
|
+
md += `#### ${ranking.rank}. ${ranking.design_id}\n\n`;
|
|
991
|
+
md += `**Strengths**:\n${strengths.map((s) => `- ${s}`).join("\n")}\n\n`;
|
|
992
|
+
md += `**Weaknesses**:\n${weaknesses.map((w) => `- ${w}`).join("\n")}\n\n`;
|
|
993
|
+
}
|
|
994
|
+
return md;
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
//#endregion
|
|
998
|
+
//#region src/design-lab.ts
|
|
999
|
+
/**
|
|
1000
|
+
* OpenCode Design Lab Plugin
|
|
1001
|
+
*
|
|
1002
|
+
* Generates multiple independent design proposals using different AI models,
|
|
1003
|
+
* then systematically evaluates, compares, and ranks those designs.
|
|
1004
|
+
*/
|
|
1005
|
+
const DesignLab = async (ctx) => {
|
|
1006
|
+
const config = loadPluginConfig(ctx.directory);
|
|
1007
|
+
return { tool: {
|
|
1008
|
+
generate_designs: createGenerateDesignsTool(ctx, config),
|
|
1009
|
+
review_designs: createReviewDesignsTool(ctx, config),
|
|
1010
|
+
aggregate_scores: createAggregateScoresTool(ctx, config)
|
|
1011
|
+
} };
|
|
1012
|
+
};
|
|
1013
|
+
|
|
1014
|
+
//#endregion
|
|
1015
|
+
export { DesignLab };
|