opencode-design-lab 0.0.0 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,236 @@
1
- import { z } from "zod";
2
- import * as fs from "fs";
1
+ import pino from "pino";
3
2
  import * as path from "path";
3
+ import * as fs from "fs";
4
+ import { z } from "zod";
4
5
  import * as os from "os";
5
- import { tool } from "@opencode-ai/plugin";
6
- import pino from "pino";
7
6
 
7
+ //#region src/utils/logger.ts
8
+ const logLevel = process.env.LOG_LEVEL || "info";
9
+ const levelNames = {
10
+ 10: "TRACE",
11
+ 20: "DEBUG",
12
+ 30: "INFO",
13
+ 40: "WARN",
14
+ 50: "ERROR",
15
+ 60: "FATAL"
16
+ };
17
+ function formatTimestamp() {
18
+ const now = /* @__PURE__ */ new Date();
19
+ return `${String(now.getHours()).padStart(2, "0")}:${String(now.getMinutes()).padStart(2, "0")}:${String(now.getSeconds()).padStart(2, "0")}.${String(now.getMilliseconds()).padStart(3, "0")}`;
20
+ }
21
+ function createLogStream() {
22
+ const logPath = path.join(process.cwd(), "design-lab.log");
23
+ const stream = fs.createWriteStream(logPath, { flags: "a" });
24
+ return pino.multistream([{
25
+ level: "trace",
26
+ stream: { write: (chunk) => {
27
+ try {
28
+ const log = JSON.parse(chunk);
29
+ const timestamp = formatTimestamp();
30
+ const level = levelNames[log.level] || "UNKNOWN";
31
+ const message = log.msg || "";
32
+ stream.write(`[${timestamp}] ${level}: ${message}\n`);
33
+ } catch (e) {
34
+ stream.write(chunk + "\n");
35
+ }
36
+ } }
37
+ }]);
38
+ }
39
+ const logger = pino({
40
+ level: logLevel,
41
+ timestamp: false
42
+ }, createLogStream());
43
+
44
+ //#endregion
45
+ //#region src/utils/session-helpers.ts
46
+ /**
47
+ * Maximum time to wait for a session to complete
48
+ */
49
+ const MAX_POLL_TIME_MS = 600 * 1e3;
50
+ /**
51
+ * Extract short model name from full model string
52
+ * e.g., "zhipuai-coding-plan/glm-4.6" -> "glm-4.6"
53
+ */
54
+ function getModelShortName(modelName) {
55
+ const parts = modelName.split("/");
56
+ return parts[parts.length - 1] || modelName;
57
+ }
58
+
59
+ //#endregion
60
+ //#region src/agents/index.ts
61
+ const DESIGNER_SUBAGENT_PREFIX = "designer_model_";
62
+ /**
63
+ * Build the agent name for a designer subagent model.
64
+ */
65
+ function getDesignerSubagentName(model) {
66
+ return `${DESIGNER_SUBAGENT_PREFIX}${normalizeAgentSuffix(model)}`;
67
+ }
68
+ /**
69
+ * Build the file stem used for design and review markdown files.
70
+ */
71
+ function getDesignerModelFileStem(model) {
72
+ return normalizeModelSlug(model);
73
+ }
74
+ /**
75
+ * Create the primary designer agent configuration.
76
+ */
77
+ function createDesignerPrimaryAgent(options) {
78
+ return {
79
+ description: "Design Lab coordinator that orchestrates model subagents.",
80
+ mode: "primary",
81
+ model: options.designModels[0]?.model ?? options.reviewModels[0]?.model,
82
+ prompt: buildDesignerPrimaryPrompt(options),
83
+ tools: {
84
+ read: true,
85
+ bash: true,
86
+ delegate_task: true,
87
+ edit: false,
88
+ task: false,
89
+ write: false
90
+ },
91
+ permission: {
92
+ bash: "allow",
93
+ edit: "deny",
94
+ webfetch: "deny"
95
+ }
96
+ };
97
+ }
98
+ /**
99
+ * Create a designer subagent configuration for a specific model.
100
+ */
101
+ function createDesignerModelAgent(model) {
102
+ return {
103
+ description: "Design Lab subagent that writes designs or reviews to files.",
104
+ mode: "subagent",
105
+ model,
106
+ prompt: buildDesignerSubagentPrompt(model),
107
+ tools: {
108
+ read: true,
109
+ write: true,
110
+ edit: false,
111
+ bash: false,
112
+ task: false,
113
+ delegate_task: false
114
+ },
115
+ permission: {
116
+ bash: "deny",
117
+ edit: "allow",
118
+ webfetch: "deny"
119
+ }
120
+ };
121
+ }
122
+ function buildDesignerPrimaryPrompt(options) {
123
+ return `You are the Design Lab primary agent. Your job is to orchestrate model subagents to produce design and review markdown files.
124
+
125
+ ## Available subagents
126
+
127
+ Design subagents:
128
+ ${options.designModels.map((spec) => `- ${spec.agentName} (model: ${spec.model}, file: ${spec.fileStem}.md)`).join("\n")}
129
+
130
+ Review subagents:
131
+ ${options.reviewModels.map((spec) => `- ${spec.agentName} (model: ${spec.model}, file: review-${spec.fileStem}.md)`).join("\n")}
132
+
133
+ ## Workflow
134
+
135
+ 1. Create a new run directory under "${options.baseOutputDir}" using the format:
136
+ ${options.baseOutputDir}/YYYY-MM-DD-topic/
137
+ Use a short, lowercase, hyphenated topic derived from the request.
138
+ Use bash for date generation (e.g., "date +%F") and directory creation.
139
+ 2. Create subdirectories:
140
+ - designs/
141
+ - reviews/
142
+ 3. For each design subagent, delegate a design task sequentially:
143
+ - Provide the requirements and the exact output_file path:
144
+ ${options.baseOutputDir}/YYYY-MM-DD-topic/designs/{fileStem}.md
145
+ - The output_file path is mandatory. If you omit it, the subagent must fail.
146
+ - Instruct the subagent to write ONLY to the file and NOT to output the design in chat.
147
+ 4. After all designs are written, delegate review tasks sequentially:
148
+ - Provide the list of design file paths.
149
+ - Provide the exact output_file path:
150
+ ${options.baseOutputDir}/YYYY-MM-DD-topic/reviews/review-{fileStem}.md
151
+ - Each reviewer must produce ONE markdown report comparing ALL designs at once.
152
+ 5. After all reviews are written, read every review file and produce a short summary:
153
+ - Which design is recommended overall
154
+ - Approximate scores per design (from the score table)
155
+ - Notable disagreements between reviewers
156
+
157
+ ## Output rules
158
+
159
+ - Never paste design or review content into the main chat.
160
+ - Return only a concise summary with the run directory, file paths, and the review summary.
161
+ - If asked "what agents will you call", list the design subagents by name.
162
+ - If the user asks for parallel execution, explain that you run sequentially for stability.
163
+ - Use only the subagents listed above; do not invent agent names.`;
164
+ }
165
+ function buildDesignerSubagentPrompt(model) {
166
+ return `You are a Design Lab subagent for model: ${model}.
167
+
168
+ You only take tasks from the primary designer agent. You must write outputs to files and keep chat responses minimal.
169
+
170
+ ## Global rules
171
+
172
+ - Use only read and write tools when needed.
173
+ - NEVER output the design or review content in chat.
174
+ - ALWAYS write to the exact output_file path provided.
175
+ - If output_file is missing or unclear, reply with: "FAILED: missing output_file".
176
+ - After writing, reply with: "WROTE: <output_file>".
177
+ - If you cannot complete the task, reply with: "FAILED: <reason>".
178
+
179
+ ## Design tasks
180
+
181
+ When asked to design:
182
+ - Produce a concise but complete Markdown design document.
183
+ - Use these sections (in this order): Title, Summary, Goals, Non-Goals, Architecture, Components, Data Flow, Tradeoffs, Risks, Open Questions.
184
+ - Write the design to the provided output_file.
185
+
186
+ ## Review tasks
187
+
188
+ When asked to review:
189
+ - Read all provided design files.
190
+ - Produce ONE Markdown report that compares all designs at once.
191
+ - Use the fixed scoring standard below for ALL reviews.
192
+ - Include sections in this exact order:
193
+ 1. Executive Summary
194
+ 2. Comparison Table
195
+ 3. Strengths
196
+ 4. Weaknesses
197
+ 5. Recommendation
198
+ 6. Open Questions
199
+ 7. Scoring Standard
200
+ - At the very bottom, include a Scores Table that rates EACH design.
201
+ - Write the report to the provided output_file.
202
+
203
+ ## Fixed Scoring Standard
204
+
205
+ - Scale: 0-10 for each criterion (10 is best).
206
+ - Criteria and weights (total 100%):
207
+ - Clarity: 20%
208
+ - Feasibility: 25%
209
+ - Scalability: 20%
210
+ - Maintainability: 20%
211
+ - Completeness: 15%
212
+ - Weighted Total (0-10) = sum(score * weight) / 100.
213
+
214
+ ## Scores Table Format (must be last in the report)
215
+
216
+ | Design | Clarity (20%) | Feasibility (25%) | Scalability (20%) | Maintainability (20%) | Completeness (15%) | Weighted Total (0-10) |
217
+ |--------|---------------|-------------------|-------------------|-----------------------|--------------------|-----------------------|
218
+ | model-a | 8 | 9 | 7 | 8 | 8 | 8.1 |`;
219
+ }
220
+ function normalizeModelSlug(model) {
221
+ return getModelShortName(model).toLowerCase().replace(/\//g, "-").replace(/[._\s]+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/-+/g, "-").replace(/^-+|-+$/g, "");
222
+ }
223
+ function normalizeAgentSuffix(model) {
224
+ return normalizeModelSlug(model).replace(/-/g, "");
225
+ }
226
+
227
+ //#endregion
8
228
  //#region src/config/schema.ts
9
229
  /**
10
230
  * Configuration schema for OpenCode Design Lab plugin
11
231
  */
12
232
  const DesignLabConfigSchema = z.object({
233
+ "$schema": z.string().optional(),
13
234
  design_models: z.array(z.string()).min(2, "At least 2 design models required"),
14
235
  review_models: z.array(z.string()).optional(),
15
236
  base_output_dir: z.string().default(".design-lab"),
@@ -98,7 +319,7 @@ function deepMerge(base, override) {
98
319
  * Parse JSONC (JSON with comments)
99
320
  */
100
321
  function parseJsonc(content) {
101
- const withoutComments = content.replace(/\/\*[\s\S]*?\*\//g, "").replace(/\/\/.*/g, "");
322
+ const withoutComments = content.replace(/\/\*[\s\S]*?\*\//g, "").replace(/^\s*\/\/.*/gm, "");
102
323
  return JSON.parse(withoutComments);
103
324
  }
104
325
  /**
@@ -157,843 +378,6 @@ function loadPluginConfig(directory) {
157
378
  return result.data;
158
379
  }
159
380
 
160
- //#endregion
161
- //#region src/agents/index.ts
162
- /**
163
- * System prompt for design generation agents
164
- */
165
- const DESIGN_AGENT_SYSTEM_PROMPT = `You are a senior software architect generating a design proposal for a technical system.
166
-
167
- ## Your Task
168
-
169
- You will receive design requirements and must produce a comprehensive design document as structured JSON.
170
-
171
- ## Critical Rules
172
-
173
- 1. **Output ONLY valid JSON** - No markdown, no explanations, no code blocks, just pure JSON
174
- 2. **Follow the schema exactly** - All required fields must be present
175
- 3. **Be specific and actionable** - Avoid vague statements
176
- 4. **Consider real-world constraints** - Think about scalability, maintainability, and security
177
- 5. **Identify risks proactively** - Every design has risks, acknowledge them
178
- 6. **List open questions** - What would you need to clarify with stakeholders?
179
-
180
- ## Required Output Schema
181
-
182
- Your response must be a JSON object with this exact structure:
183
-
184
- {
185
- "title": "Short, descriptive title for the design",
186
- "summary": "2-3 paragraph executive summary of the design",
187
- "assumptions": ["List of assumptions you're making"],
188
- "architecture_overview": "High-level description of the architecture approach",
189
- "components": [
190
- {
191
- "name": "Component name",
192
- "description": "What this component does",
193
- "responsibilities": ["List of responsibilities"]
194
- }
195
- ],
196
- "data_flow": "Description of how data flows through the system",
197
- "tradeoffs": [
198
- {
199
- "aspect": "What aspect this tradeoff concerns",
200
- "options": ["Option 1", "Option 2"],
201
- "chosen": "Which option you chose",
202
- "rationale": "Why you chose this option"
203
- }
204
- ],
205
- "risks": [
206
- {
207
- "risk": "Description of the risk",
208
- "impact": "low|medium|high",
209
- "mitigation": "How to mitigate this risk"
210
- }
211
- ],
212
- "open_questions": ["Questions that need stakeholder input"]
213
- }
214
-
215
- Remember: Your entire response must be valid JSON. No other text.`;
216
- /**
217
- * Create a design agent configuration for a specific model
218
- */
219
- function createDesignAgent(model, temperature) {
220
- return {
221
- model,
222
- temperature,
223
- mode: "subagent",
224
- prompt: DESIGN_AGENT_SYSTEM_PROMPT,
225
- tools: {
226
- write: false,
227
- edit: false,
228
- bash: false,
229
- task: false,
230
- delegate_task: false
231
- }
232
- };
233
- }
234
- /**
235
- * System prompt for review agents
236
- */
237
- const REVIEW_AGENT_SYSTEM_PROMPT = `You are a senior technical reviewer evaluating software design proposals.
238
-
239
- ## Your Task
240
-
241
- You will receive multiple design proposals for the same requirements. You must:
242
- 1. Analyze each design thoroughly
243
- 2. Compare them objectively
244
- 3. Provide scores for each design
245
- 4. Generate a markdown review with your analysis
246
-
247
- ## Scoring Criteria (0-10 scale)
248
-
249
- - **Clarity**: How well-explained and understandable is the design?
250
- - **Feasibility**: Can this design be realistically implemented?
251
- - **Scalability**: Will this design handle growth well?
252
- - **Maintainability**: Will this design be easy to maintain and evolve?
253
- - **Completeness**: Does this design address all requirements?
254
- - **Overall**: Your overall assessment
255
-
256
- ## Review Format
257
-
258
- First, provide a detailed markdown review comparing all designs:
259
-
260
- 1. Executive summary of each design
261
- 2. Comparative analysis across dimensions
262
- 3. Strengths and weaknesses of each
263
- 4. Your recommendation
264
-
265
- Then, provide a score table in markdown like:
266
-
267
- | Design | Clarity | Feasibility | Scalability | Maintainability | Completeness | Overall |
268
- |--------|---------|-------------|-------------|-----------------|--------------|---------|
269
- | model-name | 8 | 9 | 7 | 8 | 8 | 8 |
270
-
271
- ## Important
272
-
273
- - Be objective and fair
274
- - Support your scores with reasoning
275
- - Consider the requirements when scoring
276
- - Do not be biased by model names`;
277
- /**
278
- * Create a review agent configuration for a specific model
279
- */
280
- function createReviewAgent(model, temperature) {
281
- return {
282
- model,
283
- temperature,
284
- mode: "subagent",
285
- prompt: REVIEW_AGENT_SYSTEM_PROMPT,
286
- tools: {
287
- write: false,
288
- edit: false,
289
- bash: false,
290
- task: false,
291
- delegate_task: false
292
- }
293
- };
294
- }
295
-
296
- //#endregion
297
- //#region src/utils/logger.ts
298
- const logLevel = process.env.LOG_LEVEL || "info";
299
- const levelNames = {
300
- 10: "TRACE",
301
- 20: "DEBUG",
302
- 30: "INFO",
303
- 40: "WARN",
304
- 50: "ERROR",
305
- 60: "FATAL"
306
- };
307
- function formatTimestamp() {
308
- const now = /* @__PURE__ */ new Date();
309
- return `${String(now.getHours()).padStart(2, "0")}:${String(now.getMinutes()).padStart(2, "0")}:${String(now.getSeconds()).padStart(2, "0")}.${String(now.getMilliseconds()).padStart(3, "0")}`;
310
- }
311
- function createLogStream() {
312
- const logPath = path.join(process.cwd(), "design-lab.log");
313
- const stream = fs.createWriteStream(logPath, { flags: "a" });
314
- return pino.multistream([{
315
- level: "trace",
316
- stream: { write: (chunk) => {
317
- try {
318
- const log = JSON.parse(chunk);
319
- const timestamp = formatTimestamp();
320
- const level = levelNames[log.level] || "UNKNOWN";
321
- const message = log.msg || "";
322
- stream.write(`[${timestamp}] ${level}: ${message}\n`);
323
- } catch (e) {
324
- stream.write(chunk + "\n");
325
- }
326
- } }
327
- }]);
328
- }
329
- const logger = pino({
330
- level: logLevel,
331
- timestamp: false
332
- }, createLogStream());
333
-
334
- //#endregion
335
- //#region src/utils/session-helpers.ts
336
- /**
337
- * Poll interval for checking session completion
338
- */
339
- const POLL_INTERVAL_MS = 500;
340
- /**
341
- * Maximum time to wait for a session to complete
342
- */
343
- const MAX_POLL_TIME_MS = 600 * 1e3;
344
- /**
345
- * Create a new agent session
346
- */
347
- async function createAgentSession(ctx, parentSessionID, title, directory) {
348
- logger.info({
349
- parentSessionID,
350
- title
351
- }, "Creating agent session");
352
- const createResult = await ctx.client.session.create({
353
- body: {
354
- parentID: parentSessionID,
355
- title
356
- },
357
- query: { directory }
358
- });
359
- if (createResult.error) {
360
- logger.error({ error: createResult.error }, "Failed to create session");
361
- throw new Error(`Failed to create session: ${createResult.error}`);
362
- }
363
- logger.info({ sessionID: createResult.data.id }, "Session created successfully");
364
- return createResult.data.id;
365
- }
366
- /**
367
- * Send a prompt to a session with timeout
368
- */
369
- async function sendPrompt(ctx, sessionID, prompt, tools) {
370
- logger.info({
371
- sessionID,
372
- promptLength: prompt.length,
373
- tools
374
- }, "Sending prompt");
375
- const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(/* @__PURE__ */ new Error("Prompt send timeout after 180 seconds")), 18e4));
376
- const sendPromise = ctx.client.session.prompt({
377
- path: { id: sessionID },
378
- body: {
379
- tools: {
380
- ...tools,
381
- task: false,
382
- delegate_task: false
383
- },
384
- parts: [{
385
- type: "text",
386
- text: prompt
387
- }]
388
- }
389
- });
390
- const result = await Promise.race([sendPromise, timeoutPromise]);
391
- if (result.error) {
392
- logger.error({
393
- sessionID,
394
- error: result.error
395
- }, "Failed to send prompt");
396
- throw new Error(`Failed to send prompt: ${result.error}`);
397
- }
398
- logger.info({ sessionID }, "Prompt sent successfully");
399
- }
400
- /**
401
- * Poll for session completion
402
- */
403
- async function pollForCompletion(ctx, sessionID, abortSignal) {
404
- const pollStart = Date.now();
405
- let lastMsgCount = 0;
406
- let stablePolls = 0;
407
- const STABILITY_REQUIRED = 3;
408
- let pollCount = 0;
409
- logger.info({ sessionID }, "Starting polling for completion");
410
- while (Date.now() - pollStart < MAX_POLL_TIME_MS) {
411
- pollCount++;
412
- if (abortSignal?.aborted) {
413
- logger.warn({ sessionID }, "Polling aborted by signal");
414
- throw new Error("Task aborted");
415
- }
416
- await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
417
- const sessionStatus = ((await ctx.client.session.status()).data ?? {})[sessionID];
418
- if (pollCount % 10 === 0) logger.info({
419
- sessionID,
420
- status: sessionStatus?.type,
421
- pollCount,
422
- elapsed: Date.now() - pollStart
423
- }, "Polling status check");
424
- if (sessionStatus && sessionStatus.type !== "idle") {
425
- stablePolls = 0;
426
- lastMsgCount = 0;
427
- continue;
428
- }
429
- const messagesCheck = await ctx.client.session.messages({ path: { id: sessionID } });
430
- const currentMsgCount = (messagesCheck.data ?? messagesCheck).length;
431
- if (currentMsgCount > 0 && currentMsgCount === lastMsgCount) {
432
- stablePolls++;
433
- logger.debug({
434
- sessionID,
435
- stablePolls,
436
- currentMsgCount
437
- }, "Message count stable");
438
- if (stablePolls >= STABILITY_REQUIRED) {
439
- logger.info({
440
- sessionID,
441
- totalPolls: pollCount,
442
- elapsed: Date.now() - pollStart
443
- }, "Session completion confirmed");
444
- return;
445
- }
446
- } else {
447
- stablePolls = 0;
448
- lastMsgCount = currentMsgCount;
449
- }
450
- }
451
- logger.error({
452
- sessionID,
453
- totalPolls: pollCount,
454
- elapsed: MAX_POLL_TIME_MS
455
- }, "Session timed out");
456
- throw new Error("Session timed out after 10 minutes");
457
- }
458
- /**
459
- * Extract text content from session messages
460
- */
461
- async function extractSessionOutput(ctx, sessionID) {
462
- const messagesResult = await ctx.client.session.messages({ path: { id: sessionID } });
463
- if (messagesResult.error) throw new Error(`Failed to get messages: ${messagesResult.error}`);
464
- const assistantMessages = messagesResult.data.filter((m) => m.info?.role === "assistant");
465
- if (assistantMessages.length === 0) throw new Error("No assistant response found");
466
- const extractedContent = [];
467
- for (const message of assistantMessages) for (const part of message.parts ?? []) if ((part.type === "text" || part.type === "reasoning") && part.text) extractedContent.push(part.text);
468
- return extractedContent.join("\n\n");
469
- }
470
- /**
471
- * Extract JSON from text that may contain markdown code blocks
472
- */
473
- function extractJSON(text) {
474
- const jsonBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
475
- if (jsonBlockMatch) return JSON.parse(jsonBlockMatch[1].trim());
476
- const jsonMatch = text.match(/(\{[\s\S]*\}|\[[\s\S]*\])/);
477
- if (jsonMatch) return JSON.parse(jsonMatch[1]);
478
- return JSON.parse(text.trim());
479
- }
480
- /**
481
- * Extract short model name from full model string
482
- * e.g., "zhipuai-coding-plan/glm-4.6" -> "glm-4.6"
483
- */
484
- function getModelShortName(modelName) {
485
- const parts = modelName.split("/");
486
- return parts[parts.length - 1] || modelName;
487
- }
488
- /**
489
- * Sanitize a string for use in file/directory names
490
- */
491
- function sanitizeForFilename(text) {
492
- return text.toLowerCase().replace(/\//g, "-").replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").substring(0, 50);
493
- }
494
-
495
- //#endregion
496
- //#region src/tools/generate-designs.ts
497
- /**
498
- * Create the generate_designs tool
499
- */
500
- function createGenerateDesignsTool(ctx, config) {
501
- return tool({
502
- description: `Generate multiple independent design proposals for a technical system.
503
-
504
- This tool creates design proposals using ${config.design_models.length} different AI models:
505
- ${config.design_models.map((m) => `- ${m}`).join("\n")}
506
-
507
- Each model generates a design completely independently, without seeing other models' outputs.
508
-
509
- Use this when you want to explore multiple approaches to a design problem and compare them.`,
510
- args: {
511
- requirements: tool.schema.string().describe("Detailed requirements for the design. Include problem statement, constraints, and non-functional requirements."),
512
- topic: tool.schema.string().describe("Optional short topic name (2-4 words) for the design session. If not provided, one will be generated.").optional()
513
- },
514
- async execute(args, toolContext) {
515
- const { requirements, topic: userTopic } = args;
516
- const sessionID = toolContext.sessionID;
517
- const topic = userTopic ? sanitizeForFilename(userTopic) : await generateTopic(ctx, config, requirements, sessionID);
518
- const date = (/* @__PURE__ */ new Date()).toISOString().split("T")[0];
519
- const labDir = path.join(ctx.directory, config.base_output_dir, `${date}-${topic}`);
520
- if (fs.existsSync(labDir)) {
521
- logger.warn({ labDir }, "Lab directory already exists, using existing");
522
- return `Error: Lab directory already exists at ${labDir}. This may be from a previous attempt. Please check the existing designs or delete the directory to retry.`;
523
- }
524
- const designsDir = path.join(labDir, "designs");
525
- const reviewsDir = path.join(labDir, "reviews");
526
- const scoresDir = path.join(labDir, "scores");
527
- fs.mkdirSync(designsDir, { recursive: true });
528
- fs.mkdirSync(reviewsDir, { recursive: true });
529
- fs.mkdirSync(scoresDir, { recursive: true });
530
- const taskData = {
531
- requirements,
532
- topic,
533
- created: (/* @__PURE__ */ new Date()).toISOString(),
534
- design_models: config.design_models,
535
- review_models: config.review_models ?? config.design_models
536
- };
537
- fs.writeFileSync(path.join(labDir, "task.json"), JSON.stringify(taskData, null, 2));
538
- const results = [];
539
- for (const model of config.design_models) try {
540
- logger.info({ model }, `Starting design generation for model: ${model}`);
541
- const design = await generateDesign(ctx, config, model, requirements, sessionID);
542
- const validationResult = DesignArtifactSchema.safeParse(design);
543
- if (!validationResult.success) {
544
- logger.error({
545
- model,
546
- errors: validationResult.error.issues
547
- }, "Design schema validation failed");
548
- results.push({
549
- model,
550
- success: false,
551
- error: `Schema validation failed: ${validationResult.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join(", ")}`
552
- });
553
- continue;
554
- }
555
- const modelShortName = getModelShortName(model);
556
- const designFile = path.join(designsDir, `${sanitizeForFilename(modelShortName)}.json`);
557
- fs.writeFileSync(designFile, JSON.stringify(design, null, 2));
558
- logger.info({
559
- model,
560
- designFile
561
- }, "Design saved as JSON");
562
- const markdownFile = path.join(designsDir, `${sanitizeForFilename(modelShortName)}.md`);
563
- const markdown = formatDesignAsMarkdown(design, model);
564
- fs.writeFileSync(markdownFile, markdown);
565
- logger.info({
566
- model,
567
- markdownFile
568
- }, "Design saved as Markdown");
569
- results.push({
570
- model,
571
- success: true
572
- });
573
- } catch (err) {
574
- const errorMsg = err instanceof Error ? err.message : String(err);
575
- logger.error({
576
- model,
577
- error: errorMsg
578
- }, "Design generation failed");
579
- results.push({
580
- model,
581
- success: false,
582
- error: errorMsg
583
- });
584
- }
585
- const successCount = results.filter((r) => r.success).length;
586
- return `Design generation complete.
587
-
588
- **Lab Directory**: ${labDir}
589
-
590
- **Results**: ${successCount} successful, ${results.filter((r) => !r.success).length} failed
591
-
592
- ${results.map((r) => r.success ? `✅ ${r.model}: Generated successfully` : `❌ ${r.model}: ${r.error}`).join("\n")}
593
-
594
- ${successCount >= 2 ? `\nNext step: Run the review_designs tool to evaluate and compare the designs.` : `\nWarning: At least 2 successful designs are needed for meaningful comparison.`}`;
595
- }
596
- });
597
- }
598
- /**
599
- * Format a design artifact as markdown for human readability
600
- */
601
- function formatDesignAsMarkdown(design, model) {
602
- let md = `# ${design.title}\n\n`;
603
- md += `**Model**: ${model}\n\n`;
604
- md += `## Summary\n\n${design.summary}\n\n`;
605
- md += `## Assumptions\n\n`;
606
- design.assumptions.forEach((a) => md += `- ${a}\n`);
607
- md += `\n`;
608
- md += `## Architecture Overview\n\n${design.architecture_overview}\n\n`;
609
- md += `## Components\n\n`;
610
- design.components.forEach((c) => {
611
- md += `### ${c.name}\n\n${c.description}\n\n**Responsibilities**:\n`;
612
- c.responsibilities.forEach((r) => md += `- ${r}\n`);
613
- md += `\n`;
614
- });
615
- md += `## Data Flow\n\n${design.data_flow}\n\n`;
616
- md += `## Tradeoffs\n\n`;
617
- design.tradeoffs.forEach((t) => {
618
- md += `### ${t.aspect}\n\n`;
619
- md += `**Options**: ${t.options.join(", ")}\n\n`;
620
- md += `**Chosen**: ${t.chosen}\n\n`;
621
- md += `**Rationale**: ${t.rationale}\n\n`;
622
- });
623
- md += `## Risks\n\n`;
624
- design.risks.forEach((r) => {
625
- md += `### ${r.risk} (Impact: ${r.impact})\n\n`;
626
- md += `**Mitigation**: ${r.mitigation}\n\n`;
627
- });
628
- md += `## Open Questions\n\n`;
629
- design.open_questions.forEach((q) => md += `- ${q}\n`);
630
- return md;
631
- }
632
- /**
633
- * Generate a topic name from requirements
634
- */
635
- async function generateTopic(ctx, config, requirements, parentSessionID) {
636
- config.topic_generator_model || config.design_models[0];
637
- const sessionID = await createAgentSession(ctx, parentSessionID, "Topic Generation", ctx.directory);
638
- await sendPrompt(ctx, sessionID, `Generate a concise 2-4 word topic name for this design task. Output ONLY the topic name, nothing else.
639
-
640
- Requirements:
641
- ${requirements.substring(0, 500)}`, {
642
- write: false,
643
- edit: false,
644
- bash: false
645
- });
646
- await pollForCompletion(ctx, sessionID);
647
- return sanitizeForFilename((await extractSessionOutput(ctx, sessionID)).trim());
648
- }
649
- /**
650
- * Generate a single design using a specific model
651
- */
652
- async function generateDesign(ctx, config, model, requirements, parentSessionID) {
653
- const agentConfig = createDesignAgent(model, config.design_agent_temperature);
654
- const sessionID = await createAgentSession(ctx, parentSessionID, `Design Generation - ${model}`, ctx.directory);
655
- const prompt = `Generate a comprehensive design proposal for the following requirements.
656
-
657
- ## Requirements
658
-
659
- ${requirements}
660
-
661
- ## Instructions
662
-
663
- 1. Analyze the requirements thoroughly
664
- 2. Consider multiple approaches before deciding
665
- 3. Output your design as valid JSON following the required schema
666
- 4. Be specific and actionable in your design
667
-
668
- Remember: Your entire response must be valid JSON with no other text.`;
669
- const fullPrompt = `${agentConfig.prompt}\n\n${prompt}`;
670
- logger.info({
671
- model,
672
- sessionID
673
- }, "Sending design prompt to agent");
674
- await sendPrompt(ctx, sessionID, fullPrompt, agentConfig.tools);
675
- logger.info({
676
- model,
677
- sessionID
678
- }, "Polling for completion");
679
- await pollForCompletion(ctx, sessionID);
680
- logger.info({
681
- model,
682
- sessionID
683
- }, "Extracting session output");
684
- const output = await extractSessionOutput(ctx, sessionID);
685
- logger.info({
686
- model,
687
- outputLength: output.length
688
- }, "Extracting JSON from output");
689
- return extractJSON(output);
690
- }
691
-
692
- //#endregion
693
- //#region src/tools/review-designs.ts
694
- /**
695
- * Create the review_designs tool
696
- */
697
- function createReviewDesignsTool(ctx, config) {
698
- const reviewModels = config.review_models ?? config.design_models;
699
- return tool({
700
- description: `Review and score design proposals using ${reviewModels.length} reviewer models.
701
-
702
- Each reviewer analyzes all designs and provides:
703
- 1. A markdown review comparing the designs
704
- 2. Numeric scores (0-10) across dimensions: clarity, feasibility, scalability, maintainability, completeness, overall
705
-
706
- Use this after generate_designs to evaluate and compare the generated designs.`,
707
- args: { lab_path: tool.schema.string().describe(`Path to the design lab directory (e.g., .design-lab/2024-01-15-api-gateway). If not provided, uses the most recent lab.`).optional() },
708
- async execute(args, toolContext) {
709
- const sessionID = toolContext.sessionID;
710
- const labDir = args.lab_path ? path.resolve(ctx.directory, args.lab_path) : findMostRecentLab$1(ctx.directory, config.base_output_dir);
711
- if (!labDir) return "Error: No design lab found. Run generate_designs first.";
712
- const designsDir = path.join(labDir, "designs");
713
- const reviewsDir = path.join(labDir, "reviews");
714
- const scoresDir = path.join(labDir, "scores");
715
- const designFiles = fs.readdirSync(designsDir).filter((f) => f.endsWith(".json"));
716
- if (designFiles.length === 0) return "Error: No designs found in the lab directory.";
717
- const designs = {};
718
- for (const file of designFiles) {
719
- const designId = file.replace(".json", "");
720
- const content = fs.readFileSync(path.join(designsDir, file), "utf-8");
721
- designs[designId] = JSON.parse(content);
722
- }
723
- const taskPath = path.join(labDir, "task.json");
724
- const requirements = JSON.parse(fs.readFileSync(taskPath, "utf-8")).requirements;
725
- const results = [];
726
- for (const model of reviewModels) try {
727
- const { review, scores } = await generateReview(ctx, config, model, requirements, designs, sessionID);
728
- const reviewFile = path.join(reviewsDir, `review-${sanitizeForFilename(getModelShortName(model))}.md`);
729
- fs.writeFileSync(reviewFile, review);
730
- logger.info({
731
- model,
732
- reviewFile
733
- }, "Review saved");
734
- for (const score of scores) {
735
- const validationResult = ScoreSchema.safeParse(score);
736
- if (!validationResult.success) console.warn(`Score validation warning for ${score.design_id}:`, validationResult.error);
737
- const designShortName = getModelShortName(score.design_id);
738
- const reviewerShortName = getModelShortName(model);
739
- const scoreFile = path.join(scoresDir, `${sanitizeForFilename(designShortName)}-reviewed-by-${sanitizeForFilename(reviewerShortName)}.json`);
740
- fs.writeFileSync(scoreFile, JSON.stringify(score, null, 2));
741
- }
742
- results.push({
743
- model,
744
- success: true
745
- });
746
- } catch (err) {
747
- const errorMsg = err instanceof Error ? err.message : String(err);
748
- results.push({
749
- model,
750
- success: false,
751
- error: errorMsg
752
- });
753
- }
754
- const successCount = results.filter((r) => r.success).length;
755
- return `Review complete.
756
-
757
- **Lab Directory**: ${labDir}
758
-
759
- **Results**: ${successCount} successful, ${results.filter((r) => !r.success).length} failed
760
-
761
- ${results.map((r) => r.success ? `✅ ${r.model}: Review generated` : `❌ ${r.model}: ${r.error}`).join("\n")}
762
-
763
- **Reviews saved to**: ${reviewsDir}
764
- **Scores saved to**: ${scoresDir}
765
-
766
- ${successCount > 0 ? `\nNext step: Run the aggregate_scores tool to generate final rankings.` : ""}`;
767
- }
768
- });
769
- }
770
- /**
771
- * Find the most recent design lab directory
772
- */
773
- function findMostRecentLab$1(projectDir, baseDir) {
774
- const labBaseDir = path.join(projectDir, baseDir);
775
- if (!fs.existsSync(labBaseDir)) return null;
776
- const labs = fs.readdirSync(labBaseDir).filter((d) => fs.statSync(path.join(labBaseDir, d)).isDirectory()).sort().reverse();
777
- if (labs.length === 0) return null;
778
- return path.join(labBaseDir, labs[0]);
779
- }
780
- /**
781
- * Generate a review using a specific model
782
- */
783
- async function generateReview(ctx, config, model, requirements, designs, parentSessionID) {
784
- const agentConfig = createReviewAgent(model, config.review_agent_temperature);
785
- const sessionID = await createAgentSession(ctx, parentSessionID, `Design Review - ${model}`, ctx.directory);
786
- const prompt = `Review and compare the following design proposals.
787
-
788
- ## Original Requirements
789
-
790
- ${requirements}
791
-
792
- ## Designs to Review
793
-
794
- ${Object.entries(designs).map(([id, design]) => `## Design: ${id}\n\n\`\`\`json\n${JSON.stringify(design, null, 2)}\n\`\`\``).join("\n\n---\n\n")}
795
-
796
- ## Your Task
797
-
798
- 1. Analyze each design thoroughly
799
- 2. Compare them across dimensions: clarity, feasibility, scalability, maintainability, completeness
800
- 3. Provide a detailed markdown review with your analysis
801
- 4. At the end, include a score table in markdown format
802
- 5. Identify strengths and weaknesses of each design
803
-
804
- Be objective and support your assessments with specific observations.`;
805
- await sendPrompt(ctx, sessionID, `${agentConfig.prompt}\n\n${prompt}`, agentConfig.tools);
806
- await pollForCompletion(ctx, sessionID);
807
- const review = await extractSessionOutput(ctx, sessionID);
808
- await sendPrompt(ctx, sessionID, `Now output the scores for each design as a JSON array. Each element should have this structure:
809
-
810
- {
811
- "design_id": "EXACT_DESIGN_ID_FROM_LIST_BELOW",
812
- "reviewer_model": "${model}",
813
- "scores": {
814
- "clarity": 0-10,
815
- "feasibility": 0-10,
816
- "scalability": 0-10,
817
- "maintainability": 0-10,
818
- "completeness": 0-10,
819
- "overall": 0-10
820
- },
821
- "justification": "Brief overall justification",
822
- "strengths": ["list", "of", "strengths"],
823
- "weaknesses": ["list", "of", "weaknesses"],
824
- "missing_considerations": ["list", "of", "things", "missing"]
825
- }
826
-
827
- **IMPORTANT**: The "design_id" field MUST be one of these exact values:
828
- ${Object.keys(designs).map((id) => `- "${id}"`).join("\n")}
829
-
830
- Output ONLY the JSON array with one score object per design. No other text.`, agentConfig.tools);
831
- await pollForCompletion(ctx, sessionID);
832
- return {
833
- review,
834
- scores: extractJSON(await extractSessionOutput(ctx, sessionID))
835
- };
836
- }
837
-
838
- //#endregion
839
- //#region src/tools/aggregate-scores.ts
840
- /**
841
- * Create the aggregate_scores tool
842
- */
843
- function createAggregateScoresTool(ctx, config) {
844
- return tool({
845
- description: `Aggregate scores from all reviewers and generate final rankings.
846
-
847
- This tool:
848
- 1. Reads all score files from the reviews
849
- 2. Calculates average scores per design
850
- 3. Computes variance/disagreement metrics
851
- 4. Generates a final ranking with results.md
852
-
853
- Use this after review_designs to get the final comparison.`,
854
- args: { lab_path: tool.schema.string().describe(`Path to the design lab directory. If not provided, uses the most recent lab.`).optional() },
855
- async execute(args) {
856
- const labDir = args.lab_path ? path.resolve(ctx.directory, args.lab_path) : findMostRecentLab(ctx.directory, config.base_output_dir);
857
- if (!labDir) return "Error: No design lab found. Run generate_designs first.";
858
- const scoresDir = path.join(labDir, "scores");
859
- const resultsDir = path.join(labDir, "results");
860
- if (!fs.existsSync(scoresDir)) return "Error: No scores directory found. Run review_designs first.";
861
- fs.mkdirSync(resultsDir, { recursive: true });
862
- const scoreFiles = fs.readdirSync(scoresDir).filter((f) => f.endsWith(".json"));
863
- if (scoreFiles.length === 0) return "Error: No score files found. Run review_designs first.";
864
- const allScores = [];
865
- for (const file of scoreFiles) {
866
- const content = fs.readFileSync(path.join(scoresDir, file), "utf-8");
867
- allScores.push(JSON.parse(content));
868
- }
869
- const scoresByDesign = {};
870
- for (const score of allScores) {
871
- if (!scoresByDesign[score.design_id]) scoresByDesign[score.design_id] = [];
872
- scoresByDesign[score.design_id].push(score);
873
- }
874
- const rankings = [];
875
- for (const [designId, scores] of Object.entries(scoresByDesign)) {
876
- const overallScores = scores.map((s) => s.scores.overall);
877
- const avgOverall = overallScores.reduce((a, b) => a + b, 0) / overallScores.length;
878
- const variance = overallScores.reduce((sum, s) => sum + Math.pow(s - avgOverall, 2), 0) / overallScores.length;
879
- const dimensions = [
880
- "clarity",
881
- "feasibility",
882
- "scalability",
883
- "maintainability",
884
- "completeness",
885
- "overall"
886
- ];
887
- const scoreBreakdown = {};
888
- for (const dim of dimensions) {
889
- const dimScores = scores.map((s) => s.scores[dim]);
890
- scoreBreakdown[dim] = dimScores.reduce((a, b) => a + b, 0) / dimScores.length;
891
- }
892
- rankings.push({
893
- design_id: designId,
894
- rank: 0,
895
- average_score: avgOverall,
896
- score_breakdown: scoreBreakdown,
897
- variance,
898
- reviewer_count: scores.length
899
- });
900
- }
901
- rankings.sort((a, b) => b.average_score - a.average_score);
902
- rankings.forEach((r, i) => {
903
- r.rank = i + 1;
904
- });
905
- fs.writeFileSync(path.join(resultsDir, "ranking.json"), JSON.stringify(rankings, null, 2));
906
- const resultsMarkdown = generateResultsMarkdown(rankings, allScores);
907
- fs.writeFileSync(path.join(resultsDir, "results.md"), resultsMarkdown);
908
- return `Aggregation complete.
909
-
910
- **Rankings saved to**: ${path.join(resultsDir, "ranking.json")}
911
- **Results summary saved to**: ${path.join(resultsDir, "results.md")}
912
-
913
- ## Final Rankings
914
-
915
- ${rankings.map((r) => `${r.rank}. **${r.design_id}** - Score: ${r.average_score.toFixed(1)}/10 (variance: ${r.variance.toFixed(2)})`).join("\n")}
916
-
917
- View the full results in ${path.join(resultsDir, "results.md")}`;
918
- }
919
- });
920
- }
921
- /**
922
- * Find the most recent design lab directory
923
- */
924
- function findMostRecentLab(projectDir, baseDir) {
925
- const labBaseDir = path.join(projectDir, baseDir);
926
- if (!fs.existsSync(labBaseDir)) return null;
927
- const labs = fs.readdirSync(labBaseDir).filter((d) => fs.statSync(path.join(labBaseDir, d)).isDirectory()).sort().reverse();
928
- if (labs.length === 0) return null;
929
- return path.join(labBaseDir, labs[0]);
930
- }
931
- /**
932
- * Generate the results markdown file
933
- */
934
- function generateResultsMarkdown(rankings, allScores) {
935
- const dimensions = [
936
- "clarity",
937
- "feasibility",
938
- "scalability",
939
- "maintainability",
940
- "completeness",
941
- "overall"
942
- ];
943
- const reviewers = [...new Set(allScores.map((s) => s.reviewer_model))];
944
- let md = `# Design Lab Results
945
-
946
- Generated: ${(/* @__PURE__ */ new Date()).toISOString()}
947
-
948
- ## Summary
949
-
950
- | Rank | Design | Avg Score | Variance | Reviewers |
951
- |------|--------|-----------|----------|-----------|
952
- ${rankings.map((r) => `| ${r.rank} | ${r.design_id} | ${r.average_score.toFixed(1)} | ${r.variance.toFixed(2)} | ${r.reviewer_count} |`).join("\n")}
953
-
954
- ## Detailed Score Breakdown
955
-
956
- ### Average Scores by Dimension
957
-
958
- | Design | ${dimensions.join(" | ")} |
959
- |--------|${"---|".repeat(dimensions.length)}
960
- ${rankings.map((r) => `| ${r.design_id} | ${dimensions.map((d) => r.score_breakdown[d].toFixed(1)).join(" | ")} |`).join("\n")}
961
-
962
- ## Reviewer × Design Matrix
963
-
964
- ### Overall Scores
965
-
966
- | Reviewer | ${rankings.map((r) => r.design_id).join(" | ")} |
967
- |----------|${"---|".repeat(rankings.length)}
968
- ${reviewers.map((reviewer) => {
969
- return `| ${reviewer} | ${rankings.map((r) => {
970
- const score = allScores.find((s) => s.design_id === r.design_id && s.reviewer_model === reviewer);
971
- return score ? score.scores.overall.toFixed(1) : "N/A";
972
- }).join(" | ")} |`;
973
- }).join("\n")}
974
-
975
- ## Key Observations
976
-
977
- `;
978
- const topDesign = rankings[0];
979
- md += `### Winner: ${topDesign.design_id}
980
-
981
- - **Average Score**: ${topDesign.average_score.toFixed(1)}/10
982
- - **Variance**: ${topDesign.variance.toFixed(2)} (${topDesign.variance < 1 ? "high consensus" : topDesign.variance < 2 ? "moderate consensus" : "low consensus"})
983
-
984
- `;
985
- md += `### Strengths and Weaknesses\n\n`;
986
- for (const ranking of rankings.slice(0, 3)) {
987
- const designScores = allScores.filter((s) => s.design_id === ranking.design_id);
988
- const strengths = [...new Set(designScores.flatMap((s) => s.strengths))].slice(0, 3);
989
- const weaknesses = [...new Set(designScores.flatMap((s) => s.weaknesses))].slice(0, 3);
990
- md += `#### ${ranking.rank}. ${ranking.design_id}\n\n`;
991
- md += `**Strengths**:\n${strengths.map((s) => `- ${s}`).join("\n")}\n\n`;
992
- md += `**Weaknesses**:\n${weaknesses.map((w) => `- ${w}`).join("\n")}\n\n`;
993
- }
994
- return md;
995
- }
996
-
997
381
  //#endregion
998
382
  //#region src/design-lab.ts
999
383
  /**
@@ -1003,13 +387,48 @@ ${reviewers.map((reviewer) => {
1003
387
  * then systematically evaluates, compares, and ranks those designs.
1004
388
  */
1005
389
  const DesignLab = async (ctx) => {
1006
- const config = loadPluginConfig(ctx.directory);
1007
- return { tool: {
1008
- generate_designs: createGenerateDesignsTool(ctx, config),
1009
- review_designs: createReviewDesignsTool(ctx, config),
1010
- aggregate_scores: createAggregateScoresTool(ctx, config)
390
+ const pluginConfig = loadPluginConfig(ctx.directory);
391
+ logger.info("Design Lab Plugin Loaded");
392
+ return { config: async (config) => {
393
+ const designModels = uniqueModels(pluginConfig.design_models);
394
+ const reviewModels = uniqueModels(pluginConfig.review_models ?? pluginConfig.design_models);
395
+ const allModels = uniqueModels([...designModels, ...reviewModels]);
396
+ const modelSpecs = new Map(allModels.map((model) => [model, {
397
+ model,
398
+ agentName: getDesignerSubagentName(model),
399
+ fileStem: getDesignerModelFileStem(model)
400
+ }]));
401
+ const designSpecs = designModels.map((model) => modelSpecs.get(model)).filter(isModelSpec);
402
+ const reviewSpecs = reviewModels.map((model) => modelSpecs.get(model)).filter(isModelSpec);
403
+ const subagentEntries = Array.from(modelSpecs.values()).map((spec) => [spec.agentName, createDesignerModelAgent(spec.model)]);
404
+ config.agent = {
405
+ ...config.agent ?? {},
406
+ designer: createDesignerPrimaryAgent({
407
+ baseOutputDir: pluginConfig.base_output_dir,
408
+ designModels: designSpecs,
409
+ reviewModels: reviewSpecs
410
+ }),
411
+ ...Object.fromEntries(subagentEntries)
412
+ };
413
+ const agentKeys = Object.keys(config.agent ?? {});
414
+ logger.info({
415
+ designModels,
416
+ reviewModels,
417
+ agentsRegistered: agentKeys
418
+ }, "DesignLab agents registered");
1011
419
  } };
1012
420
  };
421
+ function uniqueModels(models) {
422
+ const seen = /* @__PURE__ */ new Set();
423
+ return models.filter((model) => {
424
+ if (seen.has(model)) return false;
425
+ seen.add(model);
426
+ return true;
427
+ });
428
+ }
429
+ function isModelSpec(spec) {
430
+ return Boolean(spec);
431
+ }
1013
432
 
1014
433
  //#endregion
1015
434
  export { DesignLab };