opencode-design-lab 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1015 @@
1
+ import { z } from "zod";
2
+ import * as fs from "fs";
3
+ import * as path from "path";
4
+ import * as os from "os";
5
+ import { tool } from "@opencode-ai/plugin";
6
+ import pino from "pino";
7
+
8
+ //#region src/config/schema.ts
9
+ /**
10
+ * Configuration schema for OpenCode Design Lab plugin
11
+ */
12
+ const DesignLabConfigSchema = z.object({
13
+ design_models: z.array(z.string()).min(2, "At least 2 design models required"),
14
+ review_models: z.array(z.string()).optional(),
15
+ base_output_dir: z.string().default(".design-lab"),
16
+ design_agent_temperature: z.number().min(0).max(2).default(.7),
17
+ review_agent_temperature: z.number().min(0).max(2).default(.1),
18
+ topic_generator_model: z.string().optional()
19
+ });
20
+ /**
21
+ * Design artifact schema - what each design agent must produce
22
+ */
23
+ const DesignArtifactSchema = z.object({
24
+ title: z.string(),
25
+ summary: z.string(),
26
+ assumptions: z.array(z.string()),
27
+ architecture_overview: z.string(),
28
+ components: z.array(z.object({
29
+ name: z.string(),
30
+ description: z.string(),
31
+ responsibilities: z.array(z.string())
32
+ })),
33
+ data_flow: z.string(),
34
+ tradeoffs: z.array(z.object({
35
+ aspect: z.string(),
36
+ options: z.array(z.string()),
37
+ chosen: z.string(),
38
+ rationale: z.string()
39
+ })),
40
+ risks: z.array(z.object({
41
+ risk: z.string(),
42
+ impact: z.enum([
43
+ "low",
44
+ "medium",
45
+ "high"
46
+ ]),
47
+ mitigation: z.string()
48
+ })),
49
+ open_questions: z.array(z.string())
50
+ });
51
+ /**
52
+ * Score schema - what review agents must produce
53
+ */
54
+ const ScoreSchema = z.object({
55
+ design_id: z.string(),
56
+ reviewer_model: z.string(),
57
+ scores: z.object({
58
+ clarity: z.number().min(0).max(10),
59
+ feasibility: z.number().min(0).max(10),
60
+ scalability: z.number().min(0).max(10),
61
+ maintainability: z.number().min(0).max(10),
62
+ completeness: z.number().min(0).max(10),
63
+ overall: z.number().min(0).max(10)
64
+ }),
65
+ justification: z.string(),
66
+ strengths: z.array(z.string()),
67
+ weaknesses: z.array(z.string()),
68
+ missing_considerations: z.array(z.string())
69
+ });
70
+ /**
71
+ * Aggregated ranking schema
72
+ */
73
+ const RankingSchema = z.object({
74
+ design_id: z.string(),
75
+ rank: z.number().int().positive(),
76
+ average_score: z.number(),
77
+ score_breakdown: z.record(z.string(), z.number()),
78
+ variance: z.number(),
79
+ reviewer_count: z.number().int()
80
+ });
81
+
82
+ //#endregion
83
+ //#region src/config/loader.ts
84
+ /**
85
+ * Deep merge two objects, with override taking precedence
86
+ */
87
+ function deepMerge(base, override) {
88
+ const result = { ...base };
89
+ for (const key in override) {
90
+ const overrideValue = override[key];
91
+ const baseValue = base[key];
92
+ if (typeof overrideValue === "object" && overrideValue !== null && !Array.isArray(overrideValue) && typeof baseValue === "object" && baseValue !== null && !Array.isArray(baseValue)) result[key] = deepMerge(baseValue, overrideValue);
93
+ else result[key] = overrideValue;
94
+ }
95
+ return result;
96
+ }
97
+ /**
98
+ * Parse JSONC (JSON with comments)
99
+ */
100
+ function parseJsonc(content) {
101
+ const withoutComments = content.replace(/\/\*[\s\S]*?\*\//g, "").replace(/\/\/.*/g, "");
102
+ return JSON.parse(withoutComments);
103
+ }
104
+ /**
105
+ * Load config from a specific path if it exists
106
+ */
107
+ function loadConfigFromPath(configPath) {
108
+ try {
109
+ const possiblePaths = [
110
+ configPath,
111
+ `${configPath}.json`,
112
+ `${configPath}.jsonc`
113
+ ];
114
+ for (const fullPath of possiblePaths) if (fs.existsSync(fullPath)) {
115
+ const rawConfig = parseJsonc(fs.readFileSync(fullPath, "utf-8"));
116
+ const result = DesignLabConfigSchema.partial().safeParse(rawConfig);
117
+ if (!result.success) {
118
+ console.error(`Config validation error in ${fullPath}:`, result.error.issues);
119
+ return null;
120
+ }
121
+ return result.data;
122
+ }
123
+ } catch (err) {
124
+ const errorMsg = err instanceof Error ? err.message : String(err);
125
+ console.error(`Error loading config from ${configPath}:`, errorMsg);
126
+ }
127
+ return null;
128
+ }
129
+ /**
130
+ * Get the user config directory (cross-platform)
131
+ * OpenCode uses ~/.config/opencode on all Unix-like platforms
132
+ */
133
+ function getUserConfigDir() {
134
+ if (process.platform === "win32") return process.env.APPDATA || path.join(os.homedir(), "AppData", "Roaming");
135
+ return process.env.XDG_CONFIG_HOME || path.join(os.homedir(), ".config");
136
+ }
137
+ /**
138
+ * Load and merge plugin configuration from multiple sources
139
+ *
140
+ * Priority (highest to lowest):
141
+ * 1. Project-level config: .opencode/design-lab.json(c)
142
+ * 2. User-level config: ~/.config/opencode/design-lab.json(c)
143
+ *
144
+ * @param directory - Project directory
145
+ * @returns Merged and validated configuration
146
+ */
147
+ function loadPluginConfig(directory) {
148
+ const userConfigPath = path.join(getUserConfigDir(), "opencode", "design-lab");
149
+ const projectConfigPath = path.join(directory, ".opencode", "design-lab");
150
+ const userConfig = loadConfigFromPath(userConfigPath);
151
+ const projectConfig = loadConfigFromPath(projectConfigPath);
152
+ let mergedConfig = {};
153
+ if (userConfig) mergedConfig = deepMerge(mergedConfig, userConfig);
154
+ if (projectConfig) mergedConfig = deepMerge(mergedConfig, projectConfig);
155
+ const result = DesignLabConfigSchema.safeParse(mergedConfig);
156
+ if (!result.success) throw new Error(`Invalid design-lab configuration: ${result.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join(", ")}`);
157
+ return result.data;
158
+ }
159
+
160
+ //#endregion
161
+ //#region src/agents/index.ts
162
+ /**
163
+ * System prompt for design generation agents
164
+ */
165
+ const DESIGN_AGENT_SYSTEM_PROMPT = `You are a senior software architect generating a design proposal for a technical system.
166
+
167
+ ## Your Task
168
+
169
+ You will receive design requirements and must produce a comprehensive design document as structured JSON.
170
+
171
+ ## Critical Rules
172
+
173
+ 1. **Output ONLY valid JSON** - No markdown, no explanations, no code blocks, just pure JSON
174
+ 2. **Follow the schema exactly** - All required fields must be present
175
+ 3. **Be specific and actionable** - Avoid vague statements
176
+ 4. **Consider real-world constraints** - Think about scalability, maintainability, and security
177
+ 5. **Identify risks proactively** - Every design has risks, acknowledge them
178
+ 6. **List open questions** - What would you need to clarify with stakeholders?
179
+
180
+ ## Required Output Schema
181
+
182
+ Your response must be a JSON object with this exact structure:
183
+
184
+ {
185
+ "title": "Short, descriptive title for the design",
186
+ "summary": "2-3 paragraph executive summary of the design",
187
+ "assumptions": ["List of assumptions you're making"],
188
+ "architecture_overview": "High-level description of the architecture approach",
189
+ "components": [
190
+ {
191
+ "name": "Component name",
192
+ "description": "What this component does",
193
+ "responsibilities": ["List of responsibilities"]
194
+ }
195
+ ],
196
+ "data_flow": "Description of how data flows through the system",
197
+ "tradeoffs": [
198
+ {
199
+ "aspect": "What aspect this tradeoff concerns",
200
+ "options": ["Option 1", "Option 2"],
201
+ "chosen": "Which option you chose",
202
+ "rationale": "Why you chose this option"
203
+ }
204
+ ],
205
+ "risks": [
206
+ {
207
+ "risk": "Description of the risk",
208
+ "impact": "low|medium|high",
209
+ "mitigation": "How to mitigate this risk"
210
+ }
211
+ ],
212
+ "open_questions": ["Questions that need stakeholder input"]
213
+ }
214
+
215
+ Remember: Your entire response must be valid JSON. No other text.`;
216
+ /**
217
+ * Create a design agent configuration for a specific model
218
+ */
219
+ function createDesignAgent(model, temperature) {
220
+ return {
221
+ model,
222
+ temperature,
223
+ mode: "subagent",
224
+ prompt: DESIGN_AGENT_SYSTEM_PROMPT,
225
+ tools: {
226
+ write: false,
227
+ edit: false,
228
+ bash: false,
229
+ task: false,
230
+ delegate_task: false
231
+ }
232
+ };
233
+ }
234
+ /**
235
+ * System prompt for review agents
236
+ */
237
+ const REVIEW_AGENT_SYSTEM_PROMPT = `You are a senior technical reviewer evaluating software design proposals.
238
+
239
+ ## Your Task
240
+
241
+ You will receive multiple design proposals for the same requirements. You must:
242
+ 1. Analyze each design thoroughly
243
+ 2. Compare them objectively
244
+ 3. Provide scores for each design
245
+ 4. Generate a markdown review with your analysis
246
+
247
+ ## Scoring Criteria (0-10 scale)
248
+
249
+ - **Clarity**: How well-explained and understandable is the design?
250
+ - **Feasibility**: Can this design be realistically implemented?
251
+ - **Scalability**: Will this design handle growth well?
252
+ - **Maintainability**: Will this design be easy to maintain and evolve?
253
+ - **Completeness**: Does this design address all requirements?
254
+ - **Overall**: Your overall assessment
255
+
256
+ ## Review Format
257
+
258
+ First, provide a detailed markdown review comparing all designs:
259
+
260
+ 1. Executive summary of each design
261
+ 2. Comparative analysis across dimensions
262
+ 3. Strengths and weaknesses of each
263
+ 4. Your recommendation
264
+
265
+ Then, provide a score table in markdown like:
266
+
267
+ | Design | Clarity | Feasibility | Scalability | Maintainability | Completeness | Overall |
268
+ |--------|---------|-------------|-------------|-----------------|--------------|---------|
269
+ | model-name | 8 | 9 | 7 | 8 | 8 | 8 |
270
+
271
+ ## Important
272
+
273
+ - Be objective and fair
274
+ - Support your scores with reasoning
275
+ - Consider the requirements when scoring
276
+ - Do not be biased by model names`;
277
+ /**
278
+ * Create a review agent configuration for a specific model
279
+ */
280
+ function createReviewAgent(model, temperature) {
281
+ return {
282
+ model,
283
+ temperature,
284
+ mode: "subagent",
285
+ prompt: REVIEW_AGENT_SYSTEM_PROMPT,
286
+ tools: {
287
+ write: false,
288
+ edit: false,
289
+ bash: false,
290
+ task: false,
291
+ delegate_task: false
292
+ }
293
+ };
294
+ }
295
+
296
+ //#endregion
297
+ //#region src/utils/logger.ts
298
+ const logLevel = process.env.LOG_LEVEL || "info";
299
+ const levelNames = {
300
+ 10: "TRACE",
301
+ 20: "DEBUG",
302
+ 30: "INFO",
303
+ 40: "WARN",
304
+ 50: "ERROR",
305
+ 60: "FATAL"
306
+ };
307
+ function formatTimestamp() {
308
+ const now = /* @__PURE__ */ new Date();
309
+ return `${String(now.getHours()).padStart(2, "0")}:${String(now.getMinutes()).padStart(2, "0")}:${String(now.getSeconds()).padStart(2, "0")}.${String(now.getMilliseconds()).padStart(3, "0")}`;
310
+ }
311
+ function createLogStream() {
312
+ const logPath = path.join(process.cwd(), "design-lab.log");
313
+ const stream = fs.createWriteStream(logPath, { flags: "a" });
314
+ return pino.multistream([{
315
+ level: "trace",
316
+ stream: { write: (chunk) => {
317
+ try {
318
+ const log = JSON.parse(chunk);
319
+ const timestamp = formatTimestamp();
320
+ const level = levelNames[log.level] || "UNKNOWN";
321
+ const message = log.msg || "";
322
+ stream.write(`[${timestamp}] ${level}: ${message}\n`);
323
+ } catch (e) {
324
+ stream.write(chunk + "\n");
325
+ }
326
+ } }
327
+ }]);
328
+ }
329
+ const logger = pino({
330
+ level: logLevel,
331
+ timestamp: false
332
+ }, createLogStream());
333
+
334
+ //#endregion
335
+ //#region src/utils/session-helpers.ts
336
+ /**
337
+ * Poll interval for checking session completion
338
+ */
339
+ const POLL_INTERVAL_MS = 500;
340
+ /**
341
+ * Maximum time to wait for a session to complete
342
+ */
343
+ const MAX_POLL_TIME_MS = 600 * 1e3;
344
+ /**
345
+ * Create a new agent session
346
+ */
347
+ async function createAgentSession(ctx, parentSessionID, title, directory) {
348
+ logger.info({
349
+ parentSessionID,
350
+ title
351
+ }, "Creating agent session");
352
+ const createResult = await ctx.client.session.create({
353
+ body: {
354
+ parentID: parentSessionID,
355
+ title
356
+ },
357
+ query: { directory }
358
+ });
359
+ if (createResult.error) {
360
+ logger.error({ error: createResult.error }, "Failed to create session");
361
+ throw new Error(`Failed to create session: ${createResult.error}`);
362
+ }
363
+ logger.info({ sessionID: createResult.data.id }, "Session created successfully");
364
+ return createResult.data.id;
365
+ }
366
+ /**
367
+ * Send a prompt to a session with timeout
368
+ */
369
+ async function sendPrompt(ctx, sessionID, prompt, tools) {
370
+ logger.info({
371
+ sessionID,
372
+ promptLength: prompt.length,
373
+ tools
374
+ }, "Sending prompt");
375
+ const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(/* @__PURE__ */ new Error("Prompt send timeout after 180 seconds")), 18e4));
376
+ const sendPromise = ctx.client.session.prompt({
377
+ path: { id: sessionID },
378
+ body: {
379
+ tools: {
380
+ ...tools,
381
+ task: false,
382
+ delegate_task: false
383
+ },
384
+ parts: [{
385
+ type: "text",
386
+ text: prompt
387
+ }]
388
+ }
389
+ });
390
+ const result = await Promise.race([sendPromise, timeoutPromise]);
391
+ if (result.error) {
392
+ logger.error({
393
+ sessionID,
394
+ error: result.error
395
+ }, "Failed to send prompt");
396
+ throw new Error(`Failed to send prompt: ${result.error}`);
397
+ }
398
+ logger.info({ sessionID }, "Prompt sent successfully");
399
+ }
400
+ /**
401
+ * Poll for session completion
402
+ */
403
+ async function pollForCompletion(ctx, sessionID, abortSignal) {
404
+ const pollStart = Date.now();
405
+ let lastMsgCount = 0;
406
+ let stablePolls = 0;
407
+ const STABILITY_REQUIRED = 3;
408
+ let pollCount = 0;
409
+ logger.info({ sessionID }, "Starting polling for completion");
410
+ while (Date.now() - pollStart < MAX_POLL_TIME_MS) {
411
+ pollCount++;
412
+ if (abortSignal?.aborted) {
413
+ logger.warn({ sessionID }, "Polling aborted by signal");
414
+ throw new Error("Task aborted");
415
+ }
416
+ await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
417
+ const sessionStatus = ((await ctx.client.session.status()).data ?? {})[sessionID];
418
+ if (pollCount % 10 === 0) logger.info({
419
+ sessionID,
420
+ status: sessionStatus?.type,
421
+ pollCount,
422
+ elapsed: Date.now() - pollStart
423
+ }, "Polling status check");
424
+ if (sessionStatus && sessionStatus.type !== "idle") {
425
+ stablePolls = 0;
426
+ lastMsgCount = 0;
427
+ continue;
428
+ }
429
+ const messagesCheck = await ctx.client.session.messages({ path: { id: sessionID } });
430
+ const currentMsgCount = (messagesCheck.data ?? messagesCheck).length;
431
+ if (currentMsgCount > 0 && currentMsgCount === lastMsgCount) {
432
+ stablePolls++;
433
+ logger.debug({
434
+ sessionID,
435
+ stablePolls,
436
+ currentMsgCount
437
+ }, "Message count stable");
438
+ if (stablePolls >= STABILITY_REQUIRED) {
439
+ logger.info({
440
+ sessionID,
441
+ totalPolls: pollCount,
442
+ elapsed: Date.now() - pollStart
443
+ }, "Session completion confirmed");
444
+ return;
445
+ }
446
+ } else {
447
+ stablePolls = 0;
448
+ lastMsgCount = currentMsgCount;
449
+ }
450
+ }
451
+ logger.error({
452
+ sessionID,
453
+ totalPolls: pollCount,
454
+ elapsed: MAX_POLL_TIME_MS
455
+ }, "Session timed out");
456
+ throw new Error("Session timed out after 10 minutes");
457
+ }
458
+ /**
459
+ * Extract text content from session messages
460
+ */
461
+ async function extractSessionOutput(ctx, sessionID) {
462
+ const messagesResult = await ctx.client.session.messages({ path: { id: sessionID } });
463
+ if (messagesResult.error) throw new Error(`Failed to get messages: ${messagesResult.error}`);
464
+ const assistantMessages = messagesResult.data.filter((m) => m.info?.role === "assistant");
465
+ if (assistantMessages.length === 0) throw new Error("No assistant response found");
466
+ const extractedContent = [];
467
+ for (const message of assistantMessages) for (const part of message.parts ?? []) if ((part.type === "text" || part.type === "reasoning") && part.text) extractedContent.push(part.text);
468
+ return extractedContent.join("\n\n");
469
+ }
470
+ /**
471
+ * Extract JSON from text that may contain markdown code blocks
472
+ */
473
+ function extractJSON(text) {
474
+ const jsonBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
475
+ if (jsonBlockMatch) return JSON.parse(jsonBlockMatch[1].trim());
476
+ const jsonMatch = text.match(/(\{[\s\S]*\}|\[[\s\S]*\])/);
477
+ if (jsonMatch) return JSON.parse(jsonMatch[1]);
478
+ return JSON.parse(text.trim());
479
+ }
480
+ /**
481
+ * Extract short model name from full model string
482
+ * e.g., "zhipuai-coding-plan/glm-4.6" -> "glm-4.6"
483
+ */
484
+ function getModelShortName(modelName) {
485
+ const parts = modelName.split("/");
486
+ return parts[parts.length - 1] || modelName;
487
+ }
488
+ /**
489
+ * Sanitize a string for use in file/directory names
490
+ */
491
+ function sanitizeForFilename(text) {
492
+ return text.toLowerCase().replace(/\//g, "-").replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").substring(0, 50);
493
+ }
494
+
495
+ //#endregion
496
+ //#region src/tools/generate-designs.ts
497
+ /**
498
+ * Create the generate_designs tool
499
+ */
500
+ function createGenerateDesignsTool(ctx, config) {
501
+ return tool({
502
+ description: `Generate multiple independent design proposals for a technical system.
503
+
504
+ This tool creates design proposals using ${config.design_models.length} different AI models:
505
+ ${config.design_models.map((m) => `- ${m}`).join("\n")}
506
+
507
+ Each model generates a design completely independently, without seeing other models' outputs.
508
+
509
+ Use this when you want to explore multiple approaches to a design problem and compare them.`,
510
+ args: {
511
+ requirements: tool.schema.string().describe("Detailed requirements for the design. Include problem statement, constraints, and non-functional requirements."),
512
+ topic: tool.schema.string().describe("Optional short topic name (2-4 words) for the design session. If not provided, one will be generated.").optional()
513
+ },
514
+ async execute(args, toolContext) {
515
+ const { requirements, topic: userTopic } = args;
516
+ const sessionID = toolContext.sessionID;
517
+ const topic = userTopic ? sanitizeForFilename(userTopic) : await generateTopic(ctx, config, requirements, sessionID);
518
+ const date = (/* @__PURE__ */ new Date()).toISOString().split("T")[0];
519
+ const labDir = path.join(ctx.directory, config.base_output_dir, `${date}-${topic}`);
520
+ if (fs.existsSync(labDir)) {
521
+ logger.warn({ labDir }, "Lab directory already exists, using existing");
522
+ return `Error: Lab directory already exists at ${labDir}. This may be from a previous attempt. Please check the existing designs or delete the directory to retry.`;
523
+ }
524
+ const designsDir = path.join(labDir, "designs");
525
+ const reviewsDir = path.join(labDir, "reviews");
526
+ const scoresDir = path.join(labDir, "scores");
527
+ fs.mkdirSync(designsDir, { recursive: true });
528
+ fs.mkdirSync(reviewsDir, { recursive: true });
529
+ fs.mkdirSync(scoresDir, { recursive: true });
530
+ const taskData = {
531
+ requirements,
532
+ topic,
533
+ created: (/* @__PURE__ */ new Date()).toISOString(),
534
+ design_models: config.design_models,
535
+ review_models: config.review_models ?? config.design_models
536
+ };
537
+ fs.writeFileSync(path.join(labDir, "task.json"), JSON.stringify(taskData, null, 2));
538
+ const results = [];
539
+ for (const model of config.design_models) try {
540
+ logger.info({ model }, `Starting design generation for model: ${model}`);
541
+ const design = await generateDesign(ctx, config, model, requirements, sessionID);
542
+ const validationResult = DesignArtifactSchema.safeParse(design);
543
+ if (!validationResult.success) {
544
+ logger.error({
545
+ model,
546
+ errors: validationResult.error.issues
547
+ }, "Design schema validation failed");
548
+ results.push({
549
+ model,
550
+ success: false,
551
+ error: `Schema validation failed: ${validationResult.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join(", ")}`
552
+ });
553
+ continue;
554
+ }
555
+ const modelShortName = getModelShortName(model);
556
+ const designFile = path.join(designsDir, `${sanitizeForFilename(modelShortName)}.json`);
557
+ fs.writeFileSync(designFile, JSON.stringify(design, null, 2));
558
+ logger.info({
559
+ model,
560
+ designFile
561
+ }, "Design saved as JSON");
562
+ const markdownFile = path.join(designsDir, `${sanitizeForFilename(modelShortName)}.md`);
563
+ const markdown = formatDesignAsMarkdown(design, model);
564
+ fs.writeFileSync(markdownFile, markdown);
565
+ logger.info({
566
+ model,
567
+ markdownFile
568
+ }, "Design saved as Markdown");
569
+ results.push({
570
+ model,
571
+ success: true
572
+ });
573
+ } catch (err) {
574
+ const errorMsg = err instanceof Error ? err.message : String(err);
575
+ logger.error({
576
+ model,
577
+ error: errorMsg
578
+ }, "Design generation failed");
579
+ results.push({
580
+ model,
581
+ success: false,
582
+ error: errorMsg
583
+ });
584
+ }
585
+ const successCount = results.filter((r) => r.success).length;
586
+ return `Design generation complete.
587
+
588
+ **Lab Directory**: ${labDir}
589
+
590
+ **Results**: ${successCount} successful, ${results.filter((r) => !r.success).length} failed
591
+
592
+ ${results.map((r) => r.success ? `✅ ${r.model}: Generated successfully` : `❌ ${r.model}: ${r.error}`).join("\n")}
593
+
594
+ ${successCount >= 2 ? `\nNext step: Run the review_designs tool to evaluate and compare the designs.` : `\nWarning: At least 2 successful designs are needed for meaningful comparison.`}`;
595
+ }
596
+ });
597
+ }
598
+ /**
599
+ * Format a design artifact as markdown for human readability
600
+ */
601
+ function formatDesignAsMarkdown(design, model) {
602
+ let md = `# ${design.title}\n\n`;
603
+ md += `**Model**: ${model}\n\n`;
604
+ md += `## Summary\n\n${design.summary}\n\n`;
605
+ md += `## Assumptions\n\n`;
606
+ design.assumptions.forEach((a) => md += `- ${a}\n`);
607
+ md += `\n`;
608
+ md += `## Architecture Overview\n\n${design.architecture_overview}\n\n`;
609
+ md += `## Components\n\n`;
610
+ design.components.forEach((c) => {
611
+ md += `### ${c.name}\n\n${c.description}\n\n**Responsibilities**:\n`;
612
+ c.responsibilities.forEach((r) => md += `- ${r}\n`);
613
+ md += `\n`;
614
+ });
615
+ md += `## Data Flow\n\n${design.data_flow}\n\n`;
616
+ md += `## Tradeoffs\n\n`;
617
+ design.tradeoffs.forEach((t) => {
618
+ md += `### ${t.aspect}\n\n`;
619
+ md += `**Options**: ${t.options.join(", ")}\n\n`;
620
+ md += `**Chosen**: ${t.chosen}\n\n`;
621
+ md += `**Rationale**: ${t.rationale}\n\n`;
622
+ });
623
+ md += `## Risks\n\n`;
624
+ design.risks.forEach((r) => {
625
+ md += `### ${r.risk} (Impact: ${r.impact})\n\n`;
626
+ md += `**Mitigation**: ${r.mitigation}\n\n`;
627
+ });
628
+ md += `## Open Questions\n\n`;
629
+ design.open_questions.forEach((q) => md += `- ${q}\n`);
630
+ return md;
631
+ }
632
+ /**
633
+ * Generate a topic name from requirements
634
+ */
635
+ async function generateTopic(ctx, config, requirements, parentSessionID) {
636
+ config.topic_generator_model || config.design_models[0];
637
+ const sessionID = await createAgentSession(ctx, parentSessionID, "Topic Generation", ctx.directory);
638
+ await sendPrompt(ctx, sessionID, `Generate a concise 2-4 word topic name for this design task. Output ONLY the topic name, nothing else.
639
+
640
+ Requirements:
641
+ ${requirements.substring(0, 500)}`, {
642
+ write: false,
643
+ edit: false,
644
+ bash: false
645
+ });
646
+ await pollForCompletion(ctx, sessionID);
647
+ return sanitizeForFilename((await extractSessionOutput(ctx, sessionID)).trim());
648
+ }
649
+ /**
650
+ * Generate a single design using a specific model
651
+ */
652
+ async function generateDesign(ctx, config, model, requirements, parentSessionID) {
653
+ const agentConfig = createDesignAgent(model, config.design_agent_temperature);
654
+ const sessionID = await createAgentSession(ctx, parentSessionID, `Design Generation - ${model}`, ctx.directory);
655
+ const prompt = `Generate a comprehensive design proposal for the following requirements.
656
+
657
+ ## Requirements
658
+
659
+ ${requirements}
660
+
661
+ ## Instructions
662
+
663
+ 1. Analyze the requirements thoroughly
664
+ 2. Consider multiple approaches before deciding
665
+ 3. Output your design as valid JSON following the required schema
666
+ 4. Be specific and actionable in your design
667
+
668
+ Remember: Your entire response must be valid JSON with no other text.`;
669
+ const fullPrompt = `${agentConfig.prompt}\n\n${prompt}`;
670
+ logger.info({
671
+ model,
672
+ sessionID
673
+ }, "Sending design prompt to agent");
674
+ await sendPrompt(ctx, sessionID, fullPrompt, agentConfig.tools);
675
+ logger.info({
676
+ model,
677
+ sessionID
678
+ }, "Polling for completion");
679
+ await pollForCompletion(ctx, sessionID);
680
+ logger.info({
681
+ model,
682
+ sessionID
683
+ }, "Extracting session output");
684
+ const output = await extractSessionOutput(ctx, sessionID);
685
+ logger.info({
686
+ model,
687
+ outputLength: output.length
688
+ }, "Extracting JSON from output");
689
+ return extractJSON(output);
690
+ }
691
+
692
+ //#endregion
693
+ //#region src/tools/review-designs.ts
694
+ /**
695
+ * Create the review_designs tool
696
+ */
697
+ function createReviewDesignsTool(ctx, config) {
698
+ const reviewModels = config.review_models ?? config.design_models;
699
+ return tool({
700
+ description: `Review and score design proposals using ${reviewModels.length} reviewer models.
701
+
702
+ Each reviewer analyzes all designs and provides:
703
+ 1. A markdown review comparing the designs
704
+ 2. Numeric scores (0-10) across dimensions: clarity, feasibility, scalability, maintainability, completeness, overall
705
+
706
+ Use this after generate_designs to evaluate and compare the generated designs.`,
707
+ args: { lab_path: tool.schema.string().describe(`Path to the design lab directory (e.g., .design-lab/2024-01-15-api-gateway). If not provided, uses the most recent lab.`).optional() },
708
+ async execute(args, toolContext) {
709
+ const sessionID = toolContext.sessionID;
710
+ const labDir = args.lab_path ? path.resolve(ctx.directory, args.lab_path) : findMostRecentLab$1(ctx.directory, config.base_output_dir);
711
+ if (!labDir) return "Error: No design lab found. Run generate_designs first.";
712
+ const designsDir = path.join(labDir, "designs");
713
+ const reviewsDir = path.join(labDir, "reviews");
714
+ const scoresDir = path.join(labDir, "scores");
715
+ const designFiles = fs.readdirSync(designsDir).filter((f) => f.endsWith(".json"));
716
+ if (designFiles.length === 0) return "Error: No designs found in the lab directory.";
717
+ const designs = {};
718
+ for (const file of designFiles) {
719
+ const designId = file.replace(".json", "");
720
+ const content = fs.readFileSync(path.join(designsDir, file), "utf-8");
721
+ designs[designId] = JSON.parse(content);
722
+ }
723
+ const taskPath = path.join(labDir, "task.json");
724
+ const requirements = JSON.parse(fs.readFileSync(taskPath, "utf-8")).requirements;
725
+ const results = [];
726
+ for (const model of reviewModels) try {
727
+ const { review, scores } = await generateReview(ctx, config, model, requirements, designs, sessionID);
728
+ const reviewFile = path.join(reviewsDir, `review-${sanitizeForFilename(getModelShortName(model))}.md`);
729
+ fs.writeFileSync(reviewFile, review);
730
+ logger.info({
731
+ model,
732
+ reviewFile
733
+ }, "Review saved");
734
+ for (const score of scores) {
735
+ const validationResult = ScoreSchema.safeParse(score);
736
+ if (!validationResult.success) console.warn(`Score validation warning for ${score.design_id}:`, validationResult.error);
737
+ const designShortName = getModelShortName(score.design_id);
738
+ const reviewerShortName = getModelShortName(model);
739
+ const scoreFile = path.join(scoresDir, `${sanitizeForFilename(designShortName)}-reviewed-by-${sanitizeForFilename(reviewerShortName)}.json`);
740
+ fs.writeFileSync(scoreFile, JSON.stringify(score, null, 2));
741
+ }
742
+ results.push({
743
+ model,
744
+ success: true
745
+ });
746
+ } catch (err) {
747
+ const errorMsg = err instanceof Error ? err.message : String(err);
748
+ results.push({
749
+ model,
750
+ success: false,
751
+ error: errorMsg
752
+ });
753
+ }
754
+ const successCount = results.filter((r) => r.success).length;
755
+ return `Review complete.
756
+
757
+ **Lab Directory**: ${labDir}
758
+
759
+ **Results**: ${successCount} successful, ${results.filter((r) => !r.success).length} failed
760
+
761
+ ${results.map((r) => r.success ? `✅ ${r.model}: Review generated` : `❌ ${r.model}: ${r.error}`).join("\n")}
762
+
763
+ **Reviews saved to**: ${reviewsDir}
764
+ **Scores saved to**: ${scoresDir}
765
+
766
+ ${successCount > 0 ? `\nNext step: Run the aggregate_scores tool to generate final rankings.` : ""}`;
767
+ }
768
+ });
769
+ }
770
+ /**
771
+ * Find the most recent design lab directory
772
+ */
773
+ function findMostRecentLab$1(projectDir, baseDir) {
774
+ const labBaseDir = path.join(projectDir, baseDir);
775
+ if (!fs.existsSync(labBaseDir)) return null;
776
+ const labs = fs.readdirSync(labBaseDir).filter((d) => fs.statSync(path.join(labBaseDir, d)).isDirectory()).sort().reverse();
777
+ if (labs.length === 0) return null;
778
+ return path.join(labBaseDir, labs[0]);
779
+ }
780
+ /**
781
+ * Generate a review using a specific model
782
+ */
783
+ async function generateReview(ctx, config, model, requirements, designs, parentSessionID) {
784
+ const agentConfig = createReviewAgent(model, config.review_agent_temperature);
785
+ const sessionID = await createAgentSession(ctx, parentSessionID, `Design Review - ${model}`, ctx.directory);
786
+ const prompt = `Review and compare the following design proposals.
787
+
788
+ ## Original Requirements
789
+
790
+ ${requirements}
791
+
792
+ ## Designs to Review
793
+
794
+ ${Object.entries(designs).map(([id, design]) => `## Design: ${id}\n\n\`\`\`json\n${JSON.stringify(design, null, 2)}\n\`\`\``).join("\n\n---\n\n")}
795
+
796
+ ## Your Task
797
+
798
+ 1. Analyze each design thoroughly
799
+ 2. Compare them across dimensions: clarity, feasibility, scalability, maintainability, completeness
800
+ 3. Provide a detailed markdown review with your analysis
801
+ 4. At the end, include a score table in markdown format
802
+ 5. Identify strengths and weaknesses of each design
803
+
804
+ Be objective and support your assessments with specific observations.`;
805
+ await sendPrompt(ctx, sessionID, `${agentConfig.prompt}\n\n${prompt}`, agentConfig.tools);
806
+ await pollForCompletion(ctx, sessionID);
807
+ const review = await extractSessionOutput(ctx, sessionID);
808
+ await sendPrompt(ctx, sessionID, `Now output the scores for each design as a JSON array. Each element should have this structure:
809
+
810
+ {
811
+ "design_id": "EXACT_DESIGN_ID_FROM_LIST_BELOW",
812
+ "reviewer_model": "${model}",
813
+ "scores": {
814
+ "clarity": 0-10,
815
+ "feasibility": 0-10,
816
+ "scalability": 0-10,
817
+ "maintainability": 0-10,
818
+ "completeness": 0-10,
819
+ "overall": 0-10
820
+ },
821
+ "justification": "Brief overall justification",
822
+ "strengths": ["list", "of", "strengths"],
823
+ "weaknesses": ["list", "of", "weaknesses"],
824
+ "missing_considerations": ["list", "of", "things", "missing"]
825
+ }
826
+
827
+ **IMPORTANT**: The "design_id" field MUST be one of these exact values:
828
+ ${Object.keys(designs).map((id) => `- "${id}"`).join("\n")}
829
+
830
+ Output ONLY the JSON array with one score object per design. No other text.`, agentConfig.tools);
831
+ await pollForCompletion(ctx, sessionID);
832
+ return {
833
+ review,
834
+ scores: extractJSON(await extractSessionOutput(ctx, sessionID))
835
+ };
836
+ }
837
+
838
+ //#endregion
839
+ //#region src/tools/aggregate-scores.ts
840
+ /**
841
+ * Create the aggregate_scores tool
842
+ */
843
+ function createAggregateScoresTool(ctx, config) {
844
+ return tool({
845
+ description: `Aggregate scores from all reviewers and generate final rankings.
846
+
847
+ This tool:
848
+ 1. Reads all score files from the reviews
849
+ 2. Calculates average scores per design
850
+ 3. Computes variance/disagreement metrics
851
+ 4. Generates a final ranking with results.md
852
+
853
+ Use this after review_designs to get the final comparison.`,
854
+ args: { lab_path: tool.schema.string().describe(`Path to the design lab directory. If not provided, uses the most recent lab.`).optional() },
855
+ async execute(args) {
856
+ const labDir = args.lab_path ? path.resolve(ctx.directory, args.lab_path) : findMostRecentLab(ctx.directory, config.base_output_dir);
857
+ if (!labDir) return "Error: No design lab found. Run generate_designs first.";
858
+ const scoresDir = path.join(labDir, "scores");
859
+ const resultsDir = path.join(labDir, "results");
860
+ if (!fs.existsSync(scoresDir)) return "Error: No scores directory found. Run review_designs first.";
861
+ fs.mkdirSync(resultsDir, { recursive: true });
862
+ const scoreFiles = fs.readdirSync(scoresDir).filter((f) => f.endsWith(".json"));
863
+ if (scoreFiles.length === 0) return "Error: No score files found. Run review_designs first.";
864
+ const allScores = [];
865
+ for (const file of scoreFiles) {
866
+ const content = fs.readFileSync(path.join(scoresDir, file), "utf-8");
867
+ allScores.push(JSON.parse(content));
868
+ }
869
+ const scoresByDesign = {};
870
+ for (const score of allScores) {
871
+ if (!scoresByDesign[score.design_id]) scoresByDesign[score.design_id] = [];
872
+ scoresByDesign[score.design_id].push(score);
873
+ }
874
+ const rankings = [];
875
+ for (const [designId, scores] of Object.entries(scoresByDesign)) {
876
+ const overallScores = scores.map((s) => s.scores.overall);
877
+ const avgOverall = overallScores.reduce((a, b) => a + b, 0) / overallScores.length;
878
+ const variance = overallScores.reduce((sum, s) => sum + Math.pow(s - avgOverall, 2), 0) / overallScores.length;
879
+ const dimensions = [
880
+ "clarity",
881
+ "feasibility",
882
+ "scalability",
883
+ "maintainability",
884
+ "completeness",
885
+ "overall"
886
+ ];
887
+ const scoreBreakdown = {};
888
+ for (const dim of dimensions) {
889
+ const dimScores = scores.map((s) => s.scores[dim]);
890
+ scoreBreakdown[dim] = dimScores.reduce((a, b) => a + b, 0) / dimScores.length;
891
+ }
892
+ rankings.push({
893
+ design_id: designId,
894
+ rank: 0,
895
+ average_score: avgOverall,
896
+ score_breakdown: scoreBreakdown,
897
+ variance,
898
+ reviewer_count: scores.length
899
+ });
900
+ }
901
+ rankings.sort((a, b) => b.average_score - a.average_score);
902
+ rankings.forEach((r, i) => {
903
+ r.rank = i + 1;
904
+ });
905
+ fs.writeFileSync(path.join(resultsDir, "ranking.json"), JSON.stringify(rankings, null, 2));
906
+ const resultsMarkdown = generateResultsMarkdown(rankings, allScores);
907
+ fs.writeFileSync(path.join(resultsDir, "results.md"), resultsMarkdown);
908
+ return `Aggregation complete.
909
+
910
+ **Rankings saved to**: ${path.join(resultsDir, "ranking.json")}
911
+ **Results summary saved to**: ${path.join(resultsDir, "results.md")}
912
+
913
+ ## Final Rankings
914
+
915
+ ${rankings.map((r) => `${r.rank}. **${r.design_id}** - Score: ${r.average_score.toFixed(1)}/10 (variance: ${r.variance.toFixed(2)})`).join("\n")}
916
+
917
+ View the full results in ${path.join(resultsDir, "results.md")}`;
918
+ }
919
+ });
920
+ }
921
+ /**
922
+ * Find the most recent design lab directory
923
+ */
924
+ function findMostRecentLab(projectDir, baseDir) {
925
+ const labBaseDir = path.join(projectDir, baseDir);
926
+ if (!fs.existsSync(labBaseDir)) return null;
927
+ const labs = fs.readdirSync(labBaseDir).filter((d) => fs.statSync(path.join(labBaseDir, d)).isDirectory()).sort().reverse();
928
+ if (labs.length === 0) return null;
929
+ return path.join(labBaseDir, labs[0]);
930
+ }
931
+ /**
932
+ * Generate the results markdown file
933
+ */
934
+ function generateResultsMarkdown(rankings, allScores) {
935
+ const dimensions = [
936
+ "clarity",
937
+ "feasibility",
938
+ "scalability",
939
+ "maintainability",
940
+ "completeness",
941
+ "overall"
942
+ ];
943
+ const reviewers = [...new Set(allScores.map((s) => s.reviewer_model))];
944
+ let md = `# Design Lab Results
945
+
946
+ Generated: ${(/* @__PURE__ */ new Date()).toISOString()}
947
+
948
+ ## Summary
949
+
950
+ | Rank | Design | Avg Score | Variance | Reviewers |
951
+ |------|--------|-----------|----------|-----------|
952
+ ${rankings.map((r) => `| ${r.rank} | ${r.design_id} | ${r.average_score.toFixed(1)} | ${r.variance.toFixed(2)} | ${r.reviewer_count} |`).join("\n")}
953
+
954
+ ## Detailed Score Breakdown
955
+
956
+ ### Average Scores by Dimension
957
+
958
+ | Design | ${dimensions.join(" | ")} |
959
+ |--------|${"---|".repeat(dimensions.length)}
960
+ ${rankings.map((r) => `| ${r.design_id} | ${dimensions.map((d) => r.score_breakdown[d].toFixed(1)).join(" | ")} |`).join("\n")}
961
+
962
+ ## Reviewer × Design Matrix
963
+
964
+ ### Overall Scores
965
+
966
+ | Reviewer | ${rankings.map((r) => r.design_id).join(" | ")} |
967
+ |----------|${"---|".repeat(rankings.length)}
968
+ ${reviewers.map((reviewer) => {
969
+ return `| ${reviewer} | ${rankings.map((r) => {
970
+ const score = allScores.find((s) => s.design_id === r.design_id && s.reviewer_model === reviewer);
971
+ return score ? score.scores.overall.toFixed(1) : "N/A";
972
+ }).join(" | ")} |`;
973
+ }).join("\n")}
974
+
975
+ ## Key Observations
976
+
977
+ `;
978
+ const topDesign = rankings[0];
979
+ md += `### Winner: ${topDesign.design_id}
980
+
981
+ - **Average Score**: ${topDesign.average_score.toFixed(1)}/10
982
+ - **Variance**: ${topDesign.variance.toFixed(2)} (${topDesign.variance < 1 ? "high consensus" : topDesign.variance < 2 ? "moderate consensus" : "low consensus"})
983
+
984
+ `;
985
+ md += `### Strengths and Weaknesses\n\n`;
986
+ for (const ranking of rankings.slice(0, 3)) {
987
+ const designScores = allScores.filter((s) => s.design_id === ranking.design_id);
988
+ const strengths = [...new Set(designScores.flatMap((s) => s.strengths))].slice(0, 3);
989
+ const weaknesses = [...new Set(designScores.flatMap((s) => s.weaknesses))].slice(0, 3);
990
+ md += `#### ${ranking.rank}. ${ranking.design_id}\n\n`;
991
+ md += `**Strengths**:\n${strengths.map((s) => `- ${s}`).join("\n")}\n\n`;
992
+ md += `**Weaknesses**:\n${weaknesses.map((w) => `- ${w}`).join("\n")}\n\n`;
993
+ }
994
+ return md;
995
+ }
996
+
997
+ //#endregion
998
+ //#region src/design-lab.ts
999
+ /**
1000
+ * OpenCode Design Lab Plugin
1001
+ *
1002
+ * Generates multiple independent design proposals using different AI models,
1003
+ * then systematically evaluates, compares, and ranks those designs.
1004
+ */
1005
+ const DesignLab = async (ctx) => {
1006
+ const config = loadPluginConfig(ctx.directory);
1007
+ return { tool: {
1008
+ generate_designs: createGenerateDesignsTool(ctx, config),
1009
+ review_designs: createReviewDesignsTool(ctx, config),
1010
+ aggregate_scores: createAggregateScoresTool(ctx, config)
1011
+ } };
1012
+ };
1013
+
1014
+ //#endregion
1015
+ export { DesignLab };