opencode-design-lab 0.0.0 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.opencode/plugins/design-lab.js +266 -847
- package/README.md +43 -288
- package/package.json +1 -1
|
@@ -1,15 +1,236 @@
|
|
|
1
|
-
import
|
|
2
|
-
import * as fs from "fs";
|
|
1
|
+
import pino from "pino";
|
|
3
2
|
import * as path from "path";
|
|
3
|
+
import * as fs from "fs";
|
|
4
|
+
import { z } from "zod";
|
|
4
5
|
import * as os from "os";
|
|
5
|
-
import { tool } from "@opencode-ai/plugin";
|
|
6
|
-
import pino from "pino";
|
|
7
6
|
|
|
7
|
+
//#region src/utils/logger.ts
|
|
8
|
+
const logLevel = process.env.LOG_LEVEL || "info";
|
|
9
|
+
const levelNames = {
|
|
10
|
+
10: "TRACE",
|
|
11
|
+
20: "DEBUG",
|
|
12
|
+
30: "INFO",
|
|
13
|
+
40: "WARN",
|
|
14
|
+
50: "ERROR",
|
|
15
|
+
60: "FATAL"
|
|
16
|
+
};
|
|
17
|
+
function formatTimestamp() {
|
|
18
|
+
const now = /* @__PURE__ */ new Date();
|
|
19
|
+
return `${String(now.getHours()).padStart(2, "0")}:${String(now.getMinutes()).padStart(2, "0")}:${String(now.getSeconds()).padStart(2, "0")}.${String(now.getMilliseconds()).padStart(3, "0")}`;
|
|
20
|
+
}
|
|
21
|
+
function createLogStream() {
|
|
22
|
+
const logPath = path.join(process.cwd(), "design-lab.log");
|
|
23
|
+
const stream = fs.createWriteStream(logPath, { flags: "a" });
|
|
24
|
+
return pino.multistream([{
|
|
25
|
+
level: "trace",
|
|
26
|
+
stream: { write: (chunk) => {
|
|
27
|
+
try {
|
|
28
|
+
const log = JSON.parse(chunk);
|
|
29
|
+
const timestamp = formatTimestamp();
|
|
30
|
+
const level = levelNames[log.level] || "UNKNOWN";
|
|
31
|
+
const message = log.msg || "";
|
|
32
|
+
stream.write(`[${timestamp}] ${level}: ${message}\n`);
|
|
33
|
+
} catch (e) {
|
|
34
|
+
stream.write(chunk + "\n");
|
|
35
|
+
}
|
|
36
|
+
} }
|
|
37
|
+
}]);
|
|
38
|
+
}
|
|
39
|
+
const logger = pino({
|
|
40
|
+
level: logLevel,
|
|
41
|
+
timestamp: false
|
|
42
|
+
}, createLogStream());
|
|
43
|
+
|
|
44
|
+
//#endregion
|
|
45
|
+
//#region src/utils/session-helpers.ts
|
|
46
|
+
/**
|
|
47
|
+
* Maximum time to wait for a session to complete
|
|
48
|
+
*/
|
|
49
|
+
const MAX_POLL_TIME_MS = 600 * 1e3;
|
|
50
|
+
/**
|
|
51
|
+
* Extract short model name from full model string
|
|
52
|
+
* e.g., "zhipuai-coding-plan/glm-4.6" -> "glm-4.6"
|
|
53
|
+
*/
|
|
54
|
+
function getModelShortName(modelName) {
|
|
55
|
+
const parts = modelName.split("/");
|
|
56
|
+
return parts[parts.length - 1] || modelName;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
//#endregion
|
|
60
|
+
//#region src/agents/index.ts
|
|
61
|
+
const DESIGNER_SUBAGENT_PREFIX = "designer_model_";
|
|
62
|
+
/**
|
|
63
|
+
* Build the agent name for a designer subagent model.
|
|
64
|
+
*/
|
|
65
|
+
function getDesignerSubagentName(model) {
|
|
66
|
+
return `${DESIGNER_SUBAGENT_PREFIX}${normalizeAgentSuffix(model)}`;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Build the file stem used for design and review markdown files.
|
|
70
|
+
*/
|
|
71
|
+
function getDesignerModelFileStem(model) {
|
|
72
|
+
return normalizeModelSlug(model);
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Create the primary designer agent configuration.
|
|
76
|
+
*/
|
|
77
|
+
function createDesignerPrimaryAgent(options) {
|
|
78
|
+
return {
|
|
79
|
+
description: "Design Lab coordinator that orchestrates model subagents.",
|
|
80
|
+
mode: "primary",
|
|
81
|
+
model: options.designModels[0]?.model ?? options.reviewModels[0]?.model,
|
|
82
|
+
prompt: buildDesignerPrimaryPrompt(options),
|
|
83
|
+
tools: {
|
|
84
|
+
read: true,
|
|
85
|
+
bash: true,
|
|
86
|
+
delegate_task: true,
|
|
87
|
+
edit: false,
|
|
88
|
+
task: false,
|
|
89
|
+
write: false
|
|
90
|
+
},
|
|
91
|
+
permission: {
|
|
92
|
+
bash: "allow",
|
|
93
|
+
edit: "deny",
|
|
94
|
+
webfetch: "deny"
|
|
95
|
+
}
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Create a designer subagent configuration for a specific model.
|
|
100
|
+
*/
|
|
101
|
+
function createDesignerModelAgent(model) {
|
|
102
|
+
return {
|
|
103
|
+
description: "Design Lab subagent that writes designs or reviews to files.",
|
|
104
|
+
mode: "subagent",
|
|
105
|
+
model,
|
|
106
|
+
prompt: buildDesignerSubagentPrompt(model),
|
|
107
|
+
tools: {
|
|
108
|
+
read: true,
|
|
109
|
+
write: true,
|
|
110
|
+
edit: false,
|
|
111
|
+
bash: false,
|
|
112
|
+
task: false,
|
|
113
|
+
delegate_task: false
|
|
114
|
+
},
|
|
115
|
+
permission: {
|
|
116
|
+
bash: "deny",
|
|
117
|
+
edit: "allow",
|
|
118
|
+
webfetch: "deny"
|
|
119
|
+
}
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
function buildDesignerPrimaryPrompt(options) {
|
|
123
|
+
return `You are the Design Lab primary agent. Your job is to orchestrate model subagents to produce design and review markdown files.
|
|
124
|
+
|
|
125
|
+
## Available subagents
|
|
126
|
+
|
|
127
|
+
Design subagents:
|
|
128
|
+
${options.designModels.map((spec) => `- ${spec.agentName} (model: ${spec.model}, file: ${spec.fileStem}.md)`).join("\n")}
|
|
129
|
+
|
|
130
|
+
Review subagents:
|
|
131
|
+
${options.reviewModels.map((spec) => `- ${spec.agentName} (model: ${spec.model}, file: review-${spec.fileStem}.md)`).join("\n")}
|
|
132
|
+
|
|
133
|
+
## Workflow
|
|
134
|
+
|
|
135
|
+
1. Create a new run directory under "${options.baseOutputDir}" using the format:
|
|
136
|
+
${options.baseOutputDir}/YYYY-MM-DD-topic/
|
|
137
|
+
Use a short, lowercase, hyphenated topic derived from the request.
|
|
138
|
+
Use bash for date generation (e.g., "date +%F") and directory creation.
|
|
139
|
+
2. Create subdirectories:
|
|
140
|
+
- designs/
|
|
141
|
+
- reviews/
|
|
142
|
+
3. For each design subagent, delegate a design task sequentially:
|
|
143
|
+
- Provide the requirements and the exact output_file path:
|
|
144
|
+
${options.baseOutputDir}/YYYY-MM-DD-topic/designs/{fileStem}.md
|
|
145
|
+
- The output_file path is mandatory. If you omit it, the subagent must fail.
|
|
146
|
+
- Instruct the subagent to write ONLY to the file and NOT to output the design in chat.
|
|
147
|
+
4. After all designs are written, delegate review tasks sequentially:
|
|
148
|
+
- Provide the list of design file paths.
|
|
149
|
+
- Provide the exact output_file path:
|
|
150
|
+
${options.baseOutputDir}/YYYY-MM-DD-topic/reviews/review-{fileStem}.md
|
|
151
|
+
- Each reviewer must produce ONE markdown report comparing ALL designs at once.
|
|
152
|
+
5. After all reviews are written, read every review file and produce a short summary:
|
|
153
|
+
- Which design is recommended overall
|
|
154
|
+
- Approximate scores per design (from the score table)
|
|
155
|
+
- Notable disagreements between reviewers
|
|
156
|
+
|
|
157
|
+
## Output rules
|
|
158
|
+
|
|
159
|
+
- Never paste design or review content into the main chat.
|
|
160
|
+
- Return only a concise summary with the run directory, file paths, and the review summary.
|
|
161
|
+
- If asked "what agents will you call", list the design subagents by name.
|
|
162
|
+
- If the user asks for parallel execution, explain that you run sequentially for stability.
|
|
163
|
+
- Use only the subagents listed above; do not invent agent names.`;
|
|
164
|
+
}
|
|
165
|
+
function buildDesignerSubagentPrompt(model) {
|
|
166
|
+
return `You are a Design Lab subagent for model: ${model}.
|
|
167
|
+
|
|
168
|
+
You only take tasks from the primary designer agent. You must write outputs to files and keep chat responses minimal.
|
|
169
|
+
|
|
170
|
+
## Global rules
|
|
171
|
+
|
|
172
|
+
- Use only read and write tools when needed.
|
|
173
|
+
- NEVER output the design or review content in chat.
|
|
174
|
+
- ALWAYS write to the exact output_file path provided.
|
|
175
|
+
- If output_file is missing or unclear, reply with: "FAILED: missing output_file".
|
|
176
|
+
- After writing, reply with: "WROTE: <output_file>".
|
|
177
|
+
- If you cannot complete the task, reply with: "FAILED: <reason>".
|
|
178
|
+
|
|
179
|
+
## Design tasks
|
|
180
|
+
|
|
181
|
+
When asked to design:
|
|
182
|
+
- Produce a concise but complete Markdown design document.
|
|
183
|
+
- Use these sections (in this order): Title, Summary, Goals, Non-Goals, Architecture, Components, Data Flow, Tradeoffs, Risks, Open Questions.
|
|
184
|
+
- Write the design to the provided output_file.
|
|
185
|
+
|
|
186
|
+
## Review tasks
|
|
187
|
+
|
|
188
|
+
When asked to review:
|
|
189
|
+
- Read all provided design files.
|
|
190
|
+
- Produce ONE Markdown report that compares all designs at once.
|
|
191
|
+
- Use the fixed scoring standard below for ALL reviews.
|
|
192
|
+
- Include sections in this exact order:
|
|
193
|
+
1. Executive Summary
|
|
194
|
+
2. Comparison Table
|
|
195
|
+
3. Strengths
|
|
196
|
+
4. Weaknesses
|
|
197
|
+
5. Recommendation
|
|
198
|
+
6. Open Questions
|
|
199
|
+
7. Scoring Standard
|
|
200
|
+
- At the very bottom, include a Scores Table that rates EACH design.
|
|
201
|
+
- Write the report to the provided output_file.
|
|
202
|
+
|
|
203
|
+
## Fixed Scoring Standard
|
|
204
|
+
|
|
205
|
+
- Scale: 0-10 for each criterion (10 is best).
|
|
206
|
+
- Criteria and weights (total 100%):
|
|
207
|
+
- Clarity: 20%
|
|
208
|
+
- Feasibility: 25%
|
|
209
|
+
- Scalability: 20%
|
|
210
|
+
- Maintainability: 20%
|
|
211
|
+
- Completeness: 15%
|
|
212
|
+
- Weighted Total (0-10) = sum(score * weight) / 100.
|
|
213
|
+
|
|
214
|
+
## Scores Table Format (must be last in the report)
|
|
215
|
+
|
|
216
|
+
| Design | Clarity (20%) | Feasibility (25%) | Scalability (20%) | Maintainability (20%) | Completeness (15%) | Weighted Total (0-10) |
|
|
217
|
+
|--------|---------------|-------------------|-------------------|-----------------------|--------------------|-----------------------|
|
|
218
|
+
| model-a | 8 | 9 | 7 | 8 | 8 | 8.1 |`;
|
|
219
|
+
}
|
|
220
|
+
function normalizeModelSlug(model) {
|
|
221
|
+
return getModelShortName(model).toLowerCase().replace(/\//g, "-").replace(/[._\s]+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/-+/g, "-").replace(/^-+|-+$/g, "");
|
|
222
|
+
}
|
|
223
|
+
function normalizeAgentSuffix(model) {
|
|
224
|
+
return normalizeModelSlug(model).replace(/-/g, "");
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
//#endregion
|
|
8
228
|
//#region src/config/schema.ts
|
|
9
229
|
/**
|
|
10
230
|
* Configuration schema for OpenCode Design Lab plugin
|
|
11
231
|
*/
|
|
12
232
|
const DesignLabConfigSchema = z.object({
|
|
233
|
+
"$schema": z.string().optional(),
|
|
13
234
|
design_models: z.array(z.string()).min(2, "At least 2 design models required"),
|
|
14
235
|
review_models: z.array(z.string()).optional(),
|
|
15
236
|
base_output_dir: z.string().default(".design-lab"),
|
|
@@ -98,7 +319,7 @@ function deepMerge(base, override) {
|
|
|
98
319
|
* Parse JSONC (JSON with comments)
|
|
99
320
|
*/
|
|
100
321
|
function parseJsonc(content) {
|
|
101
|
-
const withoutComments = content.replace(/\/\*[\s\S]*?\*\//g, "").replace(
|
|
322
|
+
const withoutComments = content.replace(/\/\*[\s\S]*?\*\//g, "").replace(/^\s*\/\/.*/gm, "");
|
|
102
323
|
return JSON.parse(withoutComments);
|
|
103
324
|
}
|
|
104
325
|
/**
|
|
@@ -157,843 +378,6 @@ function loadPluginConfig(directory) {
|
|
|
157
378
|
return result.data;
|
|
158
379
|
}
|
|
159
380
|
|
|
160
|
-
//#endregion
|
|
161
|
-
//#region src/agents/index.ts
|
|
162
|
-
/**
|
|
163
|
-
* System prompt for design generation agents
|
|
164
|
-
*/
|
|
165
|
-
const DESIGN_AGENT_SYSTEM_PROMPT = `You are a senior software architect generating a design proposal for a technical system.
|
|
166
|
-
|
|
167
|
-
## Your Task
|
|
168
|
-
|
|
169
|
-
You will receive design requirements and must produce a comprehensive design document as structured JSON.
|
|
170
|
-
|
|
171
|
-
## Critical Rules
|
|
172
|
-
|
|
173
|
-
1. **Output ONLY valid JSON** - No markdown, no explanations, no code blocks, just pure JSON
|
|
174
|
-
2. **Follow the schema exactly** - All required fields must be present
|
|
175
|
-
3. **Be specific and actionable** - Avoid vague statements
|
|
176
|
-
4. **Consider real-world constraints** - Think about scalability, maintainability, and security
|
|
177
|
-
5. **Identify risks proactively** - Every design has risks, acknowledge them
|
|
178
|
-
6. **List open questions** - What would you need to clarify with stakeholders?
|
|
179
|
-
|
|
180
|
-
## Required Output Schema
|
|
181
|
-
|
|
182
|
-
Your response must be a JSON object with this exact structure:
|
|
183
|
-
|
|
184
|
-
{
|
|
185
|
-
"title": "Short, descriptive title for the design",
|
|
186
|
-
"summary": "2-3 paragraph executive summary of the design",
|
|
187
|
-
"assumptions": ["List of assumptions you're making"],
|
|
188
|
-
"architecture_overview": "High-level description of the architecture approach",
|
|
189
|
-
"components": [
|
|
190
|
-
{
|
|
191
|
-
"name": "Component name",
|
|
192
|
-
"description": "What this component does",
|
|
193
|
-
"responsibilities": ["List of responsibilities"]
|
|
194
|
-
}
|
|
195
|
-
],
|
|
196
|
-
"data_flow": "Description of how data flows through the system",
|
|
197
|
-
"tradeoffs": [
|
|
198
|
-
{
|
|
199
|
-
"aspect": "What aspect this tradeoff concerns",
|
|
200
|
-
"options": ["Option 1", "Option 2"],
|
|
201
|
-
"chosen": "Which option you chose",
|
|
202
|
-
"rationale": "Why you chose this option"
|
|
203
|
-
}
|
|
204
|
-
],
|
|
205
|
-
"risks": [
|
|
206
|
-
{
|
|
207
|
-
"risk": "Description of the risk",
|
|
208
|
-
"impact": "low|medium|high",
|
|
209
|
-
"mitigation": "How to mitigate this risk"
|
|
210
|
-
}
|
|
211
|
-
],
|
|
212
|
-
"open_questions": ["Questions that need stakeholder input"]
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
Remember: Your entire response must be valid JSON. No other text.`;
|
|
216
|
-
/**
|
|
217
|
-
* Create a design agent configuration for a specific model
|
|
218
|
-
*/
|
|
219
|
-
function createDesignAgent(model, temperature) {
|
|
220
|
-
return {
|
|
221
|
-
model,
|
|
222
|
-
temperature,
|
|
223
|
-
mode: "subagent",
|
|
224
|
-
prompt: DESIGN_AGENT_SYSTEM_PROMPT,
|
|
225
|
-
tools: {
|
|
226
|
-
write: false,
|
|
227
|
-
edit: false,
|
|
228
|
-
bash: false,
|
|
229
|
-
task: false,
|
|
230
|
-
delegate_task: false
|
|
231
|
-
}
|
|
232
|
-
};
|
|
233
|
-
}
|
|
234
|
-
/**
|
|
235
|
-
* System prompt for review agents
|
|
236
|
-
*/
|
|
237
|
-
const REVIEW_AGENT_SYSTEM_PROMPT = `You are a senior technical reviewer evaluating software design proposals.
|
|
238
|
-
|
|
239
|
-
## Your Task
|
|
240
|
-
|
|
241
|
-
You will receive multiple design proposals for the same requirements. You must:
|
|
242
|
-
1. Analyze each design thoroughly
|
|
243
|
-
2. Compare them objectively
|
|
244
|
-
3. Provide scores for each design
|
|
245
|
-
4. Generate a markdown review with your analysis
|
|
246
|
-
|
|
247
|
-
## Scoring Criteria (0-10 scale)
|
|
248
|
-
|
|
249
|
-
- **Clarity**: How well-explained and understandable is the design?
|
|
250
|
-
- **Feasibility**: Can this design be realistically implemented?
|
|
251
|
-
- **Scalability**: Will this design handle growth well?
|
|
252
|
-
- **Maintainability**: Will this design be easy to maintain and evolve?
|
|
253
|
-
- **Completeness**: Does this design address all requirements?
|
|
254
|
-
- **Overall**: Your overall assessment
|
|
255
|
-
|
|
256
|
-
## Review Format
|
|
257
|
-
|
|
258
|
-
First, provide a detailed markdown review comparing all designs:
|
|
259
|
-
|
|
260
|
-
1. Executive summary of each design
|
|
261
|
-
2. Comparative analysis across dimensions
|
|
262
|
-
3. Strengths and weaknesses of each
|
|
263
|
-
4. Your recommendation
|
|
264
|
-
|
|
265
|
-
Then, provide a score table in markdown like:
|
|
266
|
-
|
|
267
|
-
| Design | Clarity | Feasibility | Scalability | Maintainability | Completeness | Overall |
|
|
268
|
-
|--------|---------|-------------|-------------|-----------------|--------------|---------|
|
|
269
|
-
| model-name | 8 | 9 | 7 | 8 | 8 | 8 |
|
|
270
|
-
|
|
271
|
-
## Important
|
|
272
|
-
|
|
273
|
-
- Be objective and fair
|
|
274
|
-
- Support your scores with reasoning
|
|
275
|
-
- Consider the requirements when scoring
|
|
276
|
-
- Do not be biased by model names`;
|
|
277
|
-
/**
|
|
278
|
-
* Create a review agent configuration for a specific model
|
|
279
|
-
*/
|
|
280
|
-
function createReviewAgent(model, temperature) {
|
|
281
|
-
return {
|
|
282
|
-
model,
|
|
283
|
-
temperature,
|
|
284
|
-
mode: "subagent",
|
|
285
|
-
prompt: REVIEW_AGENT_SYSTEM_PROMPT,
|
|
286
|
-
tools: {
|
|
287
|
-
write: false,
|
|
288
|
-
edit: false,
|
|
289
|
-
bash: false,
|
|
290
|
-
task: false,
|
|
291
|
-
delegate_task: false
|
|
292
|
-
}
|
|
293
|
-
};
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
//#endregion
|
|
297
|
-
//#region src/utils/logger.ts
|
|
298
|
-
const logLevel = process.env.LOG_LEVEL || "info";
|
|
299
|
-
const levelNames = {
|
|
300
|
-
10: "TRACE",
|
|
301
|
-
20: "DEBUG",
|
|
302
|
-
30: "INFO",
|
|
303
|
-
40: "WARN",
|
|
304
|
-
50: "ERROR",
|
|
305
|
-
60: "FATAL"
|
|
306
|
-
};
|
|
307
|
-
function formatTimestamp() {
|
|
308
|
-
const now = /* @__PURE__ */ new Date();
|
|
309
|
-
return `${String(now.getHours()).padStart(2, "0")}:${String(now.getMinutes()).padStart(2, "0")}:${String(now.getSeconds()).padStart(2, "0")}.${String(now.getMilliseconds()).padStart(3, "0")}`;
|
|
310
|
-
}
|
|
311
|
-
function createLogStream() {
|
|
312
|
-
const logPath = path.join(process.cwd(), "design-lab.log");
|
|
313
|
-
const stream = fs.createWriteStream(logPath, { flags: "a" });
|
|
314
|
-
return pino.multistream([{
|
|
315
|
-
level: "trace",
|
|
316
|
-
stream: { write: (chunk) => {
|
|
317
|
-
try {
|
|
318
|
-
const log = JSON.parse(chunk);
|
|
319
|
-
const timestamp = formatTimestamp();
|
|
320
|
-
const level = levelNames[log.level] || "UNKNOWN";
|
|
321
|
-
const message = log.msg || "";
|
|
322
|
-
stream.write(`[${timestamp}] ${level}: ${message}\n`);
|
|
323
|
-
} catch (e) {
|
|
324
|
-
stream.write(chunk + "\n");
|
|
325
|
-
}
|
|
326
|
-
} }
|
|
327
|
-
}]);
|
|
328
|
-
}
|
|
329
|
-
const logger = pino({
|
|
330
|
-
level: logLevel,
|
|
331
|
-
timestamp: false
|
|
332
|
-
}, createLogStream());
|
|
333
|
-
|
|
334
|
-
//#endregion
|
|
335
|
-
//#region src/utils/session-helpers.ts
|
|
336
|
-
/**
|
|
337
|
-
* Poll interval for checking session completion
|
|
338
|
-
*/
|
|
339
|
-
const POLL_INTERVAL_MS = 500;
|
|
340
|
-
/**
|
|
341
|
-
* Maximum time to wait for a session to complete
|
|
342
|
-
*/
|
|
343
|
-
const MAX_POLL_TIME_MS = 600 * 1e3;
|
|
344
|
-
/**
|
|
345
|
-
* Create a new agent session
|
|
346
|
-
*/
|
|
347
|
-
async function createAgentSession(ctx, parentSessionID, title, directory) {
|
|
348
|
-
logger.info({
|
|
349
|
-
parentSessionID,
|
|
350
|
-
title
|
|
351
|
-
}, "Creating agent session");
|
|
352
|
-
const createResult = await ctx.client.session.create({
|
|
353
|
-
body: {
|
|
354
|
-
parentID: parentSessionID,
|
|
355
|
-
title
|
|
356
|
-
},
|
|
357
|
-
query: { directory }
|
|
358
|
-
});
|
|
359
|
-
if (createResult.error) {
|
|
360
|
-
logger.error({ error: createResult.error }, "Failed to create session");
|
|
361
|
-
throw new Error(`Failed to create session: ${createResult.error}`);
|
|
362
|
-
}
|
|
363
|
-
logger.info({ sessionID: createResult.data.id }, "Session created successfully");
|
|
364
|
-
return createResult.data.id;
|
|
365
|
-
}
|
|
366
|
-
/**
|
|
367
|
-
* Send a prompt to a session with timeout
|
|
368
|
-
*/
|
|
369
|
-
async function sendPrompt(ctx, sessionID, prompt, tools) {
|
|
370
|
-
logger.info({
|
|
371
|
-
sessionID,
|
|
372
|
-
promptLength: prompt.length,
|
|
373
|
-
tools
|
|
374
|
-
}, "Sending prompt");
|
|
375
|
-
const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(/* @__PURE__ */ new Error("Prompt send timeout after 180 seconds")), 18e4));
|
|
376
|
-
const sendPromise = ctx.client.session.prompt({
|
|
377
|
-
path: { id: sessionID },
|
|
378
|
-
body: {
|
|
379
|
-
tools: {
|
|
380
|
-
...tools,
|
|
381
|
-
task: false,
|
|
382
|
-
delegate_task: false
|
|
383
|
-
},
|
|
384
|
-
parts: [{
|
|
385
|
-
type: "text",
|
|
386
|
-
text: prompt
|
|
387
|
-
}]
|
|
388
|
-
}
|
|
389
|
-
});
|
|
390
|
-
const result = await Promise.race([sendPromise, timeoutPromise]);
|
|
391
|
-
if (result.error) {
|
|
392
|
-
logger.error({
|
|
393
|
-
sessionID,
|
|
394
|
-
error: result.error
|
|
395
|
-
}, "Failed to send prompt");
|
|
396
|
-
throw new Error(`Failed to send prompt: ${result.error}`);
|
|
397
|
-
}
|
|
398
|
-
logger.info({ sessionID }, "Prompt sent successfully");
|
|
399
|
-
}
|
|
400
|
-
/**
|
|
401
|
-
* Poll for session completion
|
|
402
|
-
*/
|
|
403
|
-
async function pollForCompletion(ctx, sessionID, abortSignal) {
|
|
404
|
-
const pollStart = Date.now();
|
|
405
|
-
let lastMsgCount = 0;
|
|
406
|
-
let stablePolls = 0;
|
|
407
|
-
const STABILITY_REQUIRED = 3;
|
|
408
|
-
let pollCount = 0;
|
|
409
|
-
logger.info({ sessionID }, "Starting polling for completion");
|
|
410
|
-
while (Date.now() - pollStart < MAX_POLL_TIME_MS) {
|
|
411
|
-
pollCount++;
|
|
412
|
-
if (abortSignal?.aborted) {
|
|
413
|
-
logger.warn({ sessionID }, "Polling aborted by signal");
|
|
414
|
-
throw new Error("Task aborted");
|
|
415
|
-
}
|
|
416
|
-
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
|
|
417
|
-
const sessionStatus = ((await ctx.client.session.status()).data ?? {})[sessionID];
|
|
418
|
-
if (pollCount % 10 === 0) logger.info({
|
|
419
|
-
sessionID,
|
|
420
|
-
status: sessionStatus?.type,
|
|
421
|
-
pollCount,
|
|
422
|
-
elapsed: Date.now() - pollStart
|
|
423
|
-
}, "Polling status check");
|
|
424
|
-
if (sessionStatus && sessionStatus.type !== "idle") {
|
|
425
|
-
stablePolls = 0;
|
|
426
|
-
lastMsgCount = 0;
|
|
427
|
-
continue;
|
|
428
|
-
}
|
|
429
|
-
const messagesCheck = await ctx.client.session.messages({ path: { id: sessionID } });
|
|
430
|
-
const currentMsgCount = (messagesCheck.data ?? messagesCheck).length;
|
|
431
|
-
if (currentMsgCount > 0 && currentMsgCount === lastMsgCount) {
|
|
432
|
-
stablePolls++;
|
|
433
|
-
logger.debug({
|
|
434
|
-
sessionID,
|
|
435
|
-
stablePolls,
|
|
436
|
-
currentMsgCount
|
|
437
|
-
}, "Message count stable");
|
|
438
|
-
if (stablePolls >= STABILITY_REQUIRED) {
|
|
439
|
-
logger.info({
|
|
440
|
-
sessionID,
|
|
441
|
-
totalPolls: pollCount,
|
|
442
|
-
elapsed: Date.now() - pollStart
|
|
443
|
-
}, "Session completion confirmed");
|
|
444
|
-
return;
|
|
445
|
-
}
|
|
446
|
-
} else {
|
|
447
|
-
stablePolls = 0;
|
|
448
|
-
lastMsgCount = currentMsgCount;
|
|
449
|
-
}
|
|
450
|
-
}
|
|
451
|
-
logger.error({
|
|
452
|
-
sessionID,
|
|
453
|
-
totalPolls: pollCount,
|
|
454
|
-
elapsed: MAX_POLL_TIME_MS
|
|
455
|
-
}, "Session timed out");
|
|
456
|
-
throw new Error("Session timed out after 10 minutes");
|
|
457
|
-
}
|
|
458
|
-
/**
|
|
459
|
-
* Extract text content from session messages
|
|
460
|
-
*/
|
|
461
|
-
async function extractSessionOutput(ctx, sessionID) {
|
|
462
|
-
const messagesResult = await ctx.client.session.messages({ path: { id: sessionID } });
|
|
463
|
-
if (messagesResult.error) throw new Error(`Failed to get messages: ${messagesResult.error}`);
|
|
464
|
-
const assistantMessages = messagesResult.data.filter((m) => m.info?.role === "assistant");
|
|
465
|
-
if (assistantMessages.length === 0) throw new Error("No assistant response found");
|
|
466
|
-
const extractedContent = [];
|
|
467
|
-
for (const message of assistantMessages) for (const part of message.parts ?? []) if ((part.type === "text" || part.type === "reasoning") && part.text) extractedContent.push(part.text);
|
|
468
|
-
return extractedContent.join("\n\n");
|
|
469
|
-
}
|
|
470
|
-
/**
|
|
471
|
-
* Extract JSON from text that may contain markdown code blocks
|
|
472
|
-
*/
|
|
473
|
-
function extractJSON(text) {
|
|
474
|
-
const jsonBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
475
|
-
if (jsonBlockMatch) return JSON.parse(jsonBlockMatch[1].trim());
|
|
476
|
-
const jsonMatch = text.match(/(\{[\s\S]*\}|\[[\s\S]*\])/);
|
|
477
|
-
if (jsonMatch) return JSON.parse(jsonMatch[1]);
|
|
478
|
-
return JSON.parse(text.trim());
|
|
479
|
-
}
|
|
480
|
-
/**
|
|
481
|
-
* Extract short model name from full model string
|
|
482
|
-
* e.g., "zhipuai-coding-plan/glm-4.6" -> "glm-4.6"
|
|
483
|
-
*/
|
|
484
|
-
function getModelShortName(modelName) {
|
|
485
|
-
const parts = modelName.split("/");
|
|
486
|
-
return parts[parts.length - 1] || modelName;
|
|
487
|
-
}
|
|
488
|
-
/**
|
|
489
|
-
* Sanitize a string for use in file/directory names
|
|
490
|
-
*/
|
|
491
|
-
function sanitizeForFilename(text) {
|
|
492
|
-
return text.toLowerCase().replace(/\//g, "-").replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").substring(0, 50);
|
|
493
|
-
}
|
|
494
|
-
|
|
495
|
-
//#endregion
|
|
496
|
-
//#region src/tools/generate-designs.ts
|
|
497
|
-
/**
|
|
498
|
-
* Create the generate_designs tool
|
|
499
|
-
*/
|
|
500
|
-
function createGenerateDesignsTool(ctx, config) {
|
|
501
|
-
return tool({
|
|
502
|
-
description: `Generate multiple independent design proposals for a technical system.
|
|
503
|
-
|
|
504
|
-
This tool creates design proposals using ${config.design_models.length} different AI models:
|
|
505
|
-
${config.design_models.map((m) => `- ${m}`).join("\n")}
|
|
506
|
-
|
|
507
|
-
Each model generates a design completely independently, without seeing other models' outputs.
|
|
508
|
-
|
|
509
|
-
Use this when you want to explore multiple approaches to a design problem and compare them.`,
|
|
510
|
-
args: {
|
|
511
|
-
requirements: tool.schema.string().describe("Detailed requirements for the design. Include problem statement, constraints, and non-functional requirements."),
|
|
512
|
-
topic: tool.schema.string().describe("Optional short topic name (2-4 words) for the design session. If not provided, one will be generated.").optional()
|
|
513
|
-
},
|
|
514
|
-
async execute(args, toolContext) {
|
|
515
|
-
const { requirements, topic: userTopic } = args;
|
|
516
|
-
const sessionID = toolContext.sessionID;
|
|
517
|
-
const topic = userTopic ? sanitizeForFilename(userTopic) : await generateTopic(ctx, config, requirements, sessionID);
|
|
518
|
-
const date = (/* @__PURE__ */ new Date()).toISOString().split("T")[0];
|
|
519
|
-
const labDir = path.join(ctx.directory, config.base_output_dir, `${date}-${topic}`);
|
|
520
|
-
if (fs.existsSync(labDir)) {
|
|
521
|
-
logger.warn({ labDir }, "Lab directory already exists, using existing");
|
|
522
|
-
return `Error: Lab directory already exists at ${labDir}. This may be from a previous attempt. Please check the existing designs or delete the directory to retry.`;
|
|
523
|
-
}
|
|
524
|
-
const designsDir = path.join(labDir, "designs");
|
|
525
|
-
const reviewsDir = path.join(labDir, "reviews");
|
|
526
|
-
const scoresDir = path.join(labDir, "scores");
|
|
527
|
-
fs.mkdirSync(designsDir, { recursive: true });
|
|
528
|
-
fs.mkdirSync(reviewsDir, { recursive: true });
|
|
529
|
-
fs.mkdirSync(scoresDir, { recursive: true });
|
|
530
|
-
const taskData = {
|
|
531
|
-
requirements,
|
|
532
|
-
topic,
|
|
533
|
-
created: (/* @__PURE__ */ new Date()).toISOString(),
|
|
534
|
-
design_models: config.design_models,
|
|
535
|
-
review_models: config.review_models ?? config.design_models
|
|
536
|
-
};
|
|
537
|
-
fs.writeFileSync(path.join(labDir, "task.json"), JSON.stringify(taskData, null, 2));
|
|
538
|
-
const results = [];
|
|
539
|
-
for (const model of config.design_models) try {
|
|
540
|
-
logger.info({ model }, `Starting design generation for model: ${model}`);
|
|
541
|
-
const design = await generateDesign(ctx, config, model, requirements, sessionID);
|
|
542
|
-
const validationResult = DesignArtifactSchema.safeParse(design);
|
|
543
|
-
if (!validationResult.success) {
|
|
544
|
-
logger.error({
|
|
545
|
-
model,
|
|
546
|
-
errors: validationResult.error.issues
|
|
547
|
-
}, "Design schema validation failed");
|
|
548
|
-
results.push({
|
|
549
|
-
model,
|
|
550
|
-
success: false,
|
|
551
|
-
error: `Schema validation failed: ${validationResult.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join(", ")}`
|
|
552
|
-
});
|
|
553
|
-
continue;
|
|
554
|
-
}
|
|
555
|
-
const modelShortName = getModelShortName(model);
|
|
556
|
-
const designFile = path.join(designsDir, `${sanitizeForFilename(modelShortName)}.json`);
|
|
557
|
-
fs.writeFileSync(designFile, JSON.stringify(design, null, 2));
|
|
558
|
-
logger.info({
|
|
559
|
-
model,
|
|
560
|
-
designFile
|
|
561
|
-
}, "Design saved as JSON");
|
|
562
|
-
const markdownFile = path.join(designsDir, `${sanitizeForFilename(modelShortName)}.md`);
|
|
563
|
-
const markdown = formatDesignAsMarkdown(design, model);
|
|
564
|
-
fs.writeFileSync(markdownFile, markdown);
|
|
565
|
-
logger.info({
|
|
566
|
-
model,
|
|
567
|
-
markdownFile
|
|
568
|
-
}, "Design saved as Markdown");
|
|
569
|
-
results.push({
|
|
570
|
-
model,
|
|
571
|
-
success: true
|
|
572
|
-
});
|
|
573
|
-
} catch (err) {
|
|
574
|
-
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
575
|
-
logger.error({
|
|
576
|
-
model,
|
|
577
|
-
error: errorMsg
|
|
578
|
-
}, "Design generation failed");
|
|
579
|
-
results.push({
|
|
580
|
-
model,
|
|
581
|
-
success: false,
|
|
582
|
-
error: errorMsg
|
|
583
|
-
});
|
|
584
|
-
}
|
|
585
|
-
const successCount = results.filter((r) => r.success).length;
|
|
586
|
-
return `Design generation complete.
|
|
587
|
-
|
|
588
|
-
**Lab Directory**: ${labDir}
|
|
589
|
-
|
|
590
|
-
**Results**: ${successCount} successful, ${results.filter((r) => !r.success).length} failed
|
|
591
|
-
|
|
592
|
-
${results.map((r) => r.success ? `✅ ${r.model}: Generated successfully` : `❌ ${r.model}: ${r.error}`).join("\n")}
|
|
593
|
-
|
|
594
|
-
${successCount >= 2 ? `\nNext step: Run the review_designs tool to evaluate and compare the designs.` : `\nWarning: At least 2 successful designs are needed for meaningful comparison.`}`;
|
|
595
|
-
}
|
|
596
|
-
});
|
|
597
|
-
}
|
|
598
|
-
/**
|
|
599
|
-
* Format a design artifact as markdown for human readability
|
|
600
|
-
*/
|
|
601
|
-
function formatDesignAsMarkdown(design, model) {
|
|
602
|
-
let md = `# ${design.title}\n\n`;
|
|
603
|
-
md += `**Model**: ${model}\n\n`;
|
|
604
|
-
md += `## Summary\n\n${design.summary}\n\n`;
|
|
605
|
-
md += `## Assumptions\n\n`;
|
|
606
|
-
design.assumptions.forEach((a) => md += `- ${a}\n`);
|
|
607
|
-
md += `\n`;
|
|
608
|
-
md += `## Architecture Overview\n\n${design.architecture_overview}\n\n`;
|
|
609
|
-
md += `## Components\n\n`;
|
|
610
|
-
design.components.forEach((c) => {
|
|
611
|
-
md += `### ${c.name}\n\n${c.description}\n\n**Responsibilities**:\n`;
|
|
612
|
-
c.responsibilities.forEach((r) => md += `- ${r}\n`);
|
|
613
|
-
md += `\n`;
|
|
614
|
-
});
|
|
615
|
-
md += `## Data Flow\n\n${design.data_flow}\n\n`;
|
|
616
|
-
md += `## Tradeoffs\n\n`;
|
|
617
|
-
design.tradeoffs.forEach((t) => {
|
|
618
|
-
md += `### ${t.aspect}\n\n`;
|
|
619
|
-
md += `**Options**: ${t.options.join(", ")}\n\n`;
|
|
620
|
-
md += `**Chosen**: ${t.chosen}\n\n`;
|
|
621
|
-
md += `**Rationale**: ${t.rationale}\n\n`;
|
|
622
|
-
});
|
|
623
|
-
md += `## Risks\n\n`;
|
|
624
|
-
design.risks.forEach((r) => {
|
|
625
|
-
md += `### ${r.risk} (Impact: ${r.impact})\n\n`;
|
|
626
|
-
md += `**Mitigation**: ${r.mitigation}\n\n`;
|
|
627
|
-
});
|
|
628
|
-
md += `## Open Questions\n\n`;
|
|
629
|
-
design.open_questions.forEach((q) => md += `- ${q}\n`);
|
|
630
|
-
return md;
|
|
631
|
-
}
|
|
632
|
-
/**
|
|
633
|
-
* Generate a topic name from requirements
|
|
634
|
-
*/
|
|
635
|
-
async function generateTopic(ctx, config, requirements, parentSessionID) {
|
|
636
|
-
config.topic_generator_model || config.design_models[0];
|
|
637
|
-
const sessionID = await createAgentSession(ctx, parentSessionID, "Topic Generation", ctx.directory);
|
|
638
|
-
await sendPrompt(ctx, sessionID, `Generate a concise 2-4 word topic name for this design task. Output ONLY the topic name, nothing else.
|
|
639
|
-
|
|
640
|
-
Requirements:
|
|
641
|
-
${requirements.substring(0, 500)}`, {
|
|
642
|
-
write: false,
|
|
643
|
-
edit: false,
|
|
644
|
-
bash: false
|
|
645
|
-
});
|
|
646
|
-
await pollForCompletion(ctx, sessionID);
|
|
647
|
-
return sanitizeForFilename((await extractSessionOutput(ctx, sessionID)).trim());
|
|
648
|
-
}
|
|
649
|
-
/**
|
|
650
|
-
* Generate a single design using a specific model
|
|
651
|
-
*/
|
|
652
|
-
async function generateDesign(ctx, config, model, requirements, parentSessionID) {
|
|
653
|
-
const agentConfig = createDesignAgent(model, config.design_agent_temperature);
|
|
654
|
-
const sessionID = await createAgentSession(ctx, parentSessionID, `Design Generation - ${model}`, ctx.directory);
|
|
655
|
-
const prompt = `Generate a comprehensive design proposal for the following requirements.
|
|
656
|
-
|
|
657
|
-
## Requirements
|
|
658
|
-
|
|
659
|
-
${requirements}
|
|
660
|
-
|
|
661
|
-
## Instructions
|
|
662
|
-
|
|
663
|
-
1. Analyze the requirements thoroughly
|
|
664
|
-
2. Consider multiple approaches before deciding
|
|
665
|
-
3. Output your design as valid JSON following the required schema
|
|
666
|
-
4. Be specific and actionable in your design
|
|
667
|
-
|
|
668
|
-
Remember: Your entire response must be valid JSON with no other text.`;
|
|
669
|
-
const fullPrompt = `${agentConfig.prompt}\n\n${prompt}`;
|
|
670
|
-
logger.info({
|
|
671
|
-
model,
|
|
672
|
-
sessionID
|
|
673
|
-
}, "Sending design prompt to agent");
|
|
674
|
-
await sendPrompt(ctx, sessionID, fullPrompt, agentConfig.tools);
|
|
675
|
-
logger.info({
|
|
676
|
-
model,
|
|
677
|
-
sessionID
|
|
678
|
-
}, "Polling for completion");
|
|
679
|
-
await pollForCompletion(ctx, sessionID);
|
|
680
|
-
logger.info({
|
|
681
|
-
model,
|
|
682
|
-
sessionID
|
|
683
|
-
}, "Extracting session output");
|
|
684
|
-
const output = await extractSessionOutput(ctx, sessionID);
|
|
685
|
-
logger.info({
|
|
686
|
-
model,
|
|
687
|
-
outputLength: output.length
|
|
688
|
-
}, "Extracting JSON from output");
|
|
689
|
-
return extractJSON(output);
|
|
690
|
-
}
|
|
691
|
-
|
|
692
|
-
//#endregion
|
|
693
|
-
//#region src/tools/review-designs.ts
|
|
694
|
-
/**
|
|
695
|
-
* Create the review_designs tool
|
|
696
|
-
*/
|
|
697
|
-
function createReviewDesignsTool(ctx, config) {
|
|
698
|
-
const reviewModels = config.review_models ?? config.design_models;
|
|
699
|
-
return tool({
|
|
700
|
-
description: `Review and score design proposals using ${reviewModels.length} reviewer models.
|
|
701
|
-
|
|
702
|
-
Each reviewer analyzes all designs and provides:
|
|
703
|
-
1. A markdown review comparing the designs
|
|
704
|
-
2. Numeric scores (0-10) across dimensions: clarity, feasibility, scalability, maintainability, completeness, overall
|
|
705
|
-
|
|
706
|
-
Use this after generate_designs to evaluate and compare the generated designs.`,
|
|
707
|
-
args: { lab_path: tool.schema.string().describe(`Path to the design lab directory (e.g., .design-lab/2024-01-15-api-gateway). If not provided, uses the most recent lab.`).optional() },
|
|
708
|
-
async execute(args, toolContext) {
|
|
709
|
-
const sessionID = toolContext.sessionID;
|
|
710
|
-
const labDir = args.lab_path ? path.resolve(ctx.directory, args.lab_path) : findMostRecentLab$1(ctx.directory, config.base_output_dir);
|
|
711
|
-
if (!labDir) return "Error: No design lab found. Run generate_designs first.";
|
|
712
|
-
const designsDir = path.join(labDir, "designs");
|
|
713
|
-
const reviewsDir = path.join(labDir, "reviews");
|
|
714
|
-
const scoresDir = path.join(labDir, "scores");
|
|
715
|
-
const designFiles = fs.readdirSync(designsDir).filter((f) => f.endsWith(".json"));
|
|
716
|
-
if (designFiles.length === 0) return "Error: No designs found in the lab directory.";
|
|
717
|
-
const designs = {};
|
|
718
|
-
for (const file of designFiles) {
|
|
719
|
-
const designId = file.replace(".json", "");
|
|
720
|
-
const content = fs.readFileSync(path.join(designsDir, file), "utf-8");
|
|
721
|
-
designs[designId] = JSON.parse(content);
|
|
722
|
-
}
|
|
723
|
-
const taskPath = path.join(labDir, "task.json");
|
|
724
|
-
const requirements = JSON.parse(fs.readFileSync(taskPath, "utf-8")).requirements;
|
|
725
|
-
const results = [];
|
|
726
|
-
for (const model of reviewModels) try {
|
|
727
|
-
const { review, scores } = await generateReview(ctx, config, model, requirements, designs, sessionID);
|
|
728
|
-
const reviewFile = path.join(reviewsDir, `review-${sanitizeForFilename(getModelShortName(model))}.md`);
|
|
729
|
-
fs.writeFileSync(reviewFile, review);
|
|
730
|
-
logger.info({
|
|
731
|
-
model,
|
|
732
|
-
reviewFile
|
|
733
|
-
}, "Review saved");
|
|
734
|
-
for (const score of scores) {
|
|
735
|
-
const validationResult = ScoreSchema.safeParse(score);
|
|
736
|
-
if (!validationResult.success) console.warn(`Score validation warning for ${score.design_id}:`, validationResult.error);
|
|
737
|
-
const designShortName = getModelShortName(score.design_id);
|
|
738
|
-
const reviewerShortName = getModelShortName(model);
|
|
739
|
-
const scoreFile = path.join(scoresDir, `${sanitizeForFilename(designShortName)}-reviewed-by-${sanitizeForFilename(reviewerShortName)}.json`);
|
|
740
|
-
fs.writeFileSync(scoreFile, JSON.stringify(score, null, 2));
|
|
741
|
-
}
|
|
742
|
-
results.push({
|
|
743
|
-
model,
|
|
744
|
-
success: true
|
|
745
|
-
});
|
|
746
|
-
} catch (err) {
|
|
747
|
-
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
748
|
-
results.push({
|
|
749
|
-
model,
|
|
750
|
-
success: false,
|
|
751
|
-
error: errorMsg
|
|
752
|
-
});
|
|
753
|
-
}
|
|
754
|
-
const successCount = results.filter((r) => r.success).length;
|
|
755
|
-
return `Review complete.
|
|
756
|
-
|
|
757
|
-
**Lab Directory**: ${labDir}
|
|
758
|
-
|
|
759
|
-
**Results**: ${successCount} successful, ${results.filter((r) => !r.success).length} failed
|
|
760
|
-
|
|
761
|
-
${results.map((r) => r.success ? `✅ ${r.model}: Review generated` : `❌ ${r.model}: ${r.error}`).join("\n")}
|
|
762
|
-
|
|
763
|
-
**Reviews saved to**: ${reviewsDir}
|
|
764
|
-
**Scores saved to**: ${scoresDir}
|
|
765
|
-
|
|
766
|
-
${successCount > 0 ? `\nNext step: Run the aggregate_scores tool to generate final rankings.` : ""}`;
|
|
767
|
-
}
|
|
768
|
-
});
|
|
769
|
-
}
|
|
770
|
-
/**
|
|
771
|
-
* Find the most recent design lab directory
|
|
772
|
-
*/
|
|
773
|
-
function findMostRecentLab$1(projectDir, baseDir) {
|
|
774
|
-
const labBaseDir = path.join(projectDir, baseDir);
|
|
775
|
-
if (!fs.existsSync(labBaseDir)) return null;
|
|
776
|
-
const labs = fs.readdirSync(labBaseDir).filter((d) => fs.statSync(path.join(labBaseDir, d)).isDirectory()).sort().reverse();
|
|
777
|
-
if (labs.length === 0) return null;
|
|
778
|
-
return path.join(labBaseDir, labs[0]);
|
|
779
|
-
}
|
|
780
|
-
/**
|
|
781
|
-
* Generate a review using a specific model
|
|
782
|
-
*/
|
|
783
|
-
async function generateReview(ctx, config, model, requirements, designs, parentSessionID) {
|
|
784
|
-
const agentConfig = createReviewAgent(model, config.review_agent_temperature);
|
|
785
|
-
const sessionID = await createAgentSession(ctx, parentSessionID, `Design Review - ${model}`, ctx.directory);
|
|
786
|
-
const prompt = `Review and compare the following design proposals.
|
|
787
|
-
|
|
788
|
-
## Original Requirements
|
|
789
|
-
|
|
790
|
-
${requirements}
|
|
791
|
-
|
|
792
|
-
## Designs to Review
|
|
793
|
-
|
|
794
|
-
${Object.entries(designs).map(([id, design]) => `## Design: ${id}\n\n\`\`\`json\n${JSON.stringify(design, null, 2)}\n\`\`\``).join("\n\n---\n\n")}
|
|
795
|
-
|
|
796
|
-
## Your Task
|
|
797
|
-
|
|
798
|
-
1. Analyze each design thoroughly
|
|
799
|
-
2. Compare them across dimensions: clarity, feasibility, scalability, maintainability, completeness
|
|
800
|
-
3. Provide a detailed markdown review with your analysis
|
|
801
|
-
4. At the end, include a score table in markdown format
|
|
802
|
-
5. Identify strengths and weaknesses of each design
|
|
803
|
-
|
|
804
|
-
Be objective and support your assessments with specific observations.`;
|
|
805
|
-
await sendPrompt(ctx, sessionID, `${agentConfig.prompt}\n\n${prompt}`, agentConfig.tools);
|
|
806
|
-
await pollForCompletion(ctx, sessionID);
|
|
807
|
-
const review = await extractSessionOutput(ctx, sessionID);
|
|
808
|
-
await sendPrompt(ctx, sessionID, `Now output the scores for each design as a JSON array. Each element should have this structure:
|
|
809
|
-
|
|
810
|
-
{
|
|
811
|
-
"design_id": "EXACT_DESIGN_ID_FROM_LIST_BELOW",
|
|
812
|
-
"reviewer_model": "${model}",
|
|
813
|
-
"scores": {
|
|
814
|
-
"clarity": 0-10,
|
|
815
|
-
"feasibility": 0-10,
|
|
816
|
-
"scalability": 0-10,
|
|
817
|
-
"maintainability": 0-10,
|
|
818
|
-
"completeness": 0-10,
|
|
819
|
-
"overall": 0-10
|
|
820
|
-
},
|
|
821
|
-
"justification": "Brief overall justification",
|
|
822
|
-
"strengths": ["list", "of", "strengths"],
|
|
823
|
-
"weaknesses": ["list", "of", "weaknesses"],
|
|
824
|
-
"missing_considerations": ["list", "of", "things", "missing"]
|
|
825
|
-
}
|
|
826
|
-
|
|
827
|
-
**IMPORTANT**: The "design_id" field MUST be one of these exact values:
|
|
828
|
-
${Object.keys(designs).map((id) => `- "${id}"`).join("\n")}
|
|
829
|
-
|
|
830
|
-
Output ONLY the JSON array with one score object per design. No other text.`, agentConfig.tools);
|
|
831
|
-
await pollForCompletion(ctx, sessionID);
|
|
832
|
-
return {
|
|
833
|
-
review,
|
|
834
|
-
scores: extractJSON(await extractSessionOutput(ctx, sessionID))
|
|
835
|
-
};
|
|
836
|
-
}
|
|
837
|
-
|
|
838
|
-
//#endregion
|
|
839
|
-
//#region src/tools/aggregate-scores.ts
|
|
840
|
-
/**
|
|
841
|
-
* Create the aggregate_scores tool
|
|
842
|
-
*/
|
|
843
|
-
function createAggregateScoresTool(ctx, config) {
|
|
844
|
-
return tool({
|
|
845
|
-
description: `Aggregate scores from all reviewers and generate final rankings.
|
|
846
|
-
|
|
847
|
-
This tool:
|
|
848
|
-
1. Reads all score files from the reviews
|
|
849
|
-
2. Calculates average scores per design
|
|
850
|
-
3. Computes variance/disagreement metrics
|
|
851
|
-
4. Generates a final ranking with results.md
|
|
852
|
-
|
|
853
|
-
Use this after review_designs to get the final comparison.`,
|
|
854
|
-
args: { lab_path: tool.schema.string().describe(`Path to the design lab directory. If not provided, uses the most recent lab.`).optional() },
|
|
855
|
-
async execute(args) {
|
|
856
|
-
const labDir = args.lab_path ? path.resolve(ctx.directory, args.lab_path) : findMostRecentLab(ctx.directory, config.base_output_dir);
|
|
857
|
-
if (!labDir) return "Error: No design lab found. Run generate_designs first.";
|
|
858
|
-
const scoresDir = path.join(labDir, "scores");
|
|
859
|
-
const resultsDir = path.join(labDir, "results");
|
|
860
|
-
if (!fs.existsSync(scoresDir)) return "Error: No scores directory found. Run review_designs first.";
|
|
861
|
-
fs.mkdirSync(resultsDir, { recursive: true });
|
|
862
|
-
const scoreFiles = fs.readdirSync(scoresDir).filter((f) => f.endsWith(".json"));
|
|
863
|
-
if (scoreFiles.length === 0) return "Error: No score files found. Run review_designs first.";
|
|
864
|
-
const allScores = [];
|
|
865
|
-
for (const file of scoreFiles) {
|
|
866
|
-
const content = fs.readFileSync(path.join(scoresDir, file), "utf-8");
|
|
867
|
-
allScores.push(JSON.parse(content));
|
|
868
|
-
}
|
|
869
|
-
const scoresByDesign = {};
|
|
870
|
-
for (const score of allScores) {
|
|
871
|
-
if (!scoresByDesign[score.design_id]) scoresByDesign[score.design_id] = [];
|
|
872
|
-
scoresByDesign[score.design_id].push(score);
|
|
873
|
-
}
|
|
874
|
-
const rankings = [];
|
|
875
|
-
for (const [designId, scores] of Object.entries(scoresByDesign)) {
|
|
876
|
-
const overallScores = scores.map((s) => s.scores.overall);
|
|
877
|
-
const avgOverall = overallScores.reduce((a, b) => a + b, 0) / overallScores.length;
|
|
878
|
-
const variance = overallScores.reduce((sum, s) => sum + Math.pow(s - avgOverall, 2), 0) / overallScores.length;
|
|
879
|
-
const dimensions = [
|
|
880
|
-
"clarity",
|
|
881
|
-
"feasibility",
|
|
882
|
-
"scalability",
|
|
883
|
-
"maintainability",
|
|
884
|
-
"completeness",
|
|
885
|
-
"overall"
|
|
886
|
-
];
|
|
887
|
-
const scoreBreakdown = {};
|
|
888
|
-
for (const dim of dimensions) {
|
|
889
|
-
const dimScores = scores.map((s) => s.scores[dim]);
|
|
890
|
-
scoreBreakdown[dim] = dimScores.reduce((a, b) => a + b, 0) / dimScores.length;
|
|
891
|
-
}
|
|
892
|
-
rankings.push({
|
|
893
|
-
design_id: designId,
|
|
894
|
-
rank: 0,
|
|
895
|
-
average_score: avgOverall,
|
|
896
|
-
score_breakdown: scoreBreakdown,
|
|
897
|
-
variance,
|
|
898
|
-
reviewer_count: scores.length
|
|
899
|
-
});
|
|
900
|
-
}
|
|
901
|
-
rankings.sort((a, b) => b.average_score - a.average_score);
|
|
902
|
-
rankings.forEach((r, i) => {
|
|
903
|
-
r.rank = i + 1;
|
|
904
|
-
});
|
|
905
|
-
fs.writeFileSync(path.join(resultsDir, "ranking.json"), JSON.stringify(rankings, null, 2));
|
|
906
|
-
const resultsMarkdown = generateResultsMarkdown(rankings, allScores);
|
|
907
|
-
fs.writeFileSync(path.join(resultsDir, "results.md"), resultsMarkdown);
|
|
908
|
-
return `Aggregation complete.
|
|
909
|
-
|
|
910
|
-
**Rankings saved to**: ${path.join(resultsDir, "ranking.json")}
|
|
911
|
-
**Results summary saved to**: ${path.join(resultsDir, "results.md")}
|
|
912
|
-
|
|
913
|
-
## Final Rankings
|
|
914
|
-
|
|
915
|
-
${rankings.map((r) => `${r.rank}. **${r.design_id}** - Score: ${r.average_score.toFixed(1)}/10 (variance: ${r.variance.toFixed(2)})`).join("\n")}
|
|
916
|
-
|
|
917
|
-
View the full results in ${path.join(resultsDir, "results.md")}`;
|
|
918
|
-
}
|
|
919
|
-
});
|
|
920
|
-
}
|
|
921
|
-
/**
|
|
922
|
-
* Find the most recent design lab directory
|
|
923
|
-
*/
|
|
924
|
-
function findMostRecentLab(projectDir, baseDir) {
|
|
925
|
-
const labBaseDir = path.join(projectDir, baseDir);
|
|
926
|
-
if (!fs.existsSync(labBaseDir)) return null;
|
|
927
|
-
const labs = fs.readdirSync(labBaseDir).filter((d) => fs.statSync(path.join(labBaseDir, d)).isDirectory()).sort().reverse();
|
|
928
|
-
if (labs.length === 0) return null;
|
|
929
|
-
return path.join(labBaseDir, labs[0]);
|
|
930
|
-
}
|
|
931
|
-
/**
|
|
932
|
-
* Generate the results markdown file
|
|
933
|
-
*/
|
|
934
|
-
function generateResultsMarkdown(rankings, allScores) {
|
|
935
|
-
const dimensions = [
|
|
936
|
-
"clarity",
|
|
937
|
-
"feasibility",
|
|
938
|
-
"scalability",
|
|
939
|
-
"maintainability",
|
|
940
|
-
"completeness",
|
|
941
|
-
"overall"
|
|
942
|
-
];
|
|
943
|
-
const reviewers = [...new Set(allScores.map((s) => s.reviewer_model))];
|
|
944
|
-
let md = `# Design Lab Results
|
|
945
|
-
|
|
946
|
-
Generated: ${(/* @__PURE__ */ new Date()).toISOString()}
|
|
947
|
-
|
|
948
|
-
## Summary
|
|
949
|
-
|
|
950
|
-
| Rank | Design | Avg Score | Variance | Reviewers |
|
|
951
|
-
|------|--------|-----------|----------|-----------|
|
|
952
|
-
${rankings.map((r) => `| ${r.rank} | ${r.design_id} | ${r.average_score.toFixed(1)} | ${r.variance.toFixed(2)} | ${r.reviewer_count} |`).join("\n")}
|
|
953
|
-
|
|
954
|
-
## Detailed Score Breakdown
|
|
955
|
-
|
|
956
|
-
### Average Scores by Dimension
|
|
957
|
-
|
|
958
|
-
| Design | ${dimensions.join(" | ")} |
|
|
959
|
-
|--------|${"---|".repeat(dimensions.length)}
|
|
960
|
-
${rankings.map((r) => `| ${r.design_id} | ${dimensions.map((d) => r.score_breakdown[d].toFixed(1)).join(" | ")} |`).join("\n")}
|
|
961
|
-
|
|
962
|
-
## Reviewer × Design Matrix
|
|
963
|
-
|
|
964
|
-
### Overall Scores
|
|
965
|
-
|
|
966
|
-
| Reviewer | ${rankings.map((r) => r.design_id).join(" | ")} |
|
|
967
|
-
|----------|${"---|".repeat(rankings.length)}
|
|
968
|
-
${reviewers.map((reviewer) => {
|
|
969
|
-
return `| ${reviewer} | ${rankings.map((r) => {
|
|
970
|
-
const score = allScores.find((s) => s.design_id === r.design_id && s.reviewer_model === reviewer);
|
|
971
|
-
return score ? score.scores.overall.toFixed(1) : "N/A";
|
|
972
|
-
}).join(" | ")} |`;
|
|
973
|
-
}).join("\n")}
|
|
974
|
-
|
|
975
|
-
## Key Observations
|
|
976
|
-
|
|
977
|
-
`;
|
|
978
|
-
const topDesign = rankings[0];
|
|
979
|
-
md += `### Winner: ${topDesign.design_id}
|
|
980
|
-
|
|
981
|
-
- **Average Score**: ${topDesign.average_score.toFixed(1)}/10
|
|
982
|
-
- **Variance**: ${topDesign.variance.toFixed(2)} (${topDesign.variance < 1 ? "high consensus" : topDesign.variance < 2 ? "moderate consensus" : "low consensus"})
|
|
983
|
-
|
|
984
|
-
`;
|
|
985
|
-
md += `### Strengths and Weaknesses\n\n`;
|
|
986
|
-
for (const ranking of rankings.slice(0, 3)) {
|
|
987
|
-
const designScores = allScores.filter((s) => s.design_id === ranking.design_id);
|
|
988
|
-
const strengths = [...new Set(designScores.flatMap((s) => s.strengths))].slice(0, 3);
|
|
989
|
-
const weaknesses = [...new Set(designScores.flatMap((s) => s.weaknesses))].slice(0, 3);
|
|
990
|
-
md += `#### ${ranking.rank}. ${ranking.design_id}\n\n`;
|
|
991
|
-
md += `**Strengths**:\n${strengths.map((s) => `- ${s}`).join("\n")}\n\n`;
|
|
992
|
-
md += `**Weaknesses**:\n${weaknesses.map((w) => `- ${w}`).join("\n")}\n\n`;
|
|
993
|
-
}
|
|
994
|
-
return md;
|
|
995
|
-
}
|
|
996
|
-
|
|
997
381
|
//#endregion
|
|
998
382
|
//#region src/design-lab.ts
|
|
999
383
|
/**
|
|
@@ -1003,13 +387,48 @@ ${reviewers.map((reviewer) => {
|
|
|
1003
387
|
* then systematically evaluates, compares, and ranks those designs.
|
|
1004
388
|
*/
|
|
1005
389
|
const DesignLab = async (ctx) => {
|
|
1006
|
-
const
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
390
|
+
const pluginConfig = loadPluginConfig(ctx.directory);
|
|
391
|
+
logger.info("Design Lab Plugin Loaded");
|
|
392
|
+
return { config: async (config) => {
|
|
393
|
+
const designModels = uniqueModels(pluginConfig.design_models);
|
|
394
|
+
const reviewModels = uniqueModels(pluginConfig.review_models ?? pluginConfig.design_models);
|
|
395
|
+
const allModels = uniqueModels([...designModels, ...reviewModels]);
|
|
396
|
+
const modelSpecs = new Map(allModels.map((model) => [model, {
|
|
397
|
+
model,
|
|
398
|
+
agentName: getDesignerSubagentName(model),
|
|
399
|
+
fileStem: getDesignerModelFileStem(model)
|
|
400
|
+
}]));
|
|
401
|
+
const designSpecs = designModels.map((model) => modelSpecs.get(model)).filter(isModelSpec);
|
|
402
|
+
const reviewSpecs = reviewModels.map((model) => modelSpecs.get(model)).filter(isModelSpec);
|
|
403
|
+
const subagentEntries = Array.from(modelSpecs.values()).map((spec) => [spec.agentName, createDesignerModelAgent(spec.model)]);
|
|
404
|
+
config.agent = {
|
|
405
|
+
...config.agent ?? {},
|
|
406
|
+
designer: createDesignerPrimaryAgent({
|
|
407
|
+
baseOutputDir: pluginConfig.base_output_dir,
|
|
408
|
+
designModels: designSpecs,
|
|
409
|
+
reviewModels: reviewSpecs
|
|
410
|
+
}),
|
|
411
|
+
...Object.fromEntries(subagentEntries)
|
|
412
|
+
};
|
|
413
|
+
const agentKeys = Object.keys(config.agent ?? {});
|
|
414
|
+
logger.info({
|
|
415
|
+
designModels,
|
|
416
|
+
reviewModels,
|
|
417
|
+
agentsRegistered: agentKeys
|
|
418
|
+
}, "DesignLab agents registered");
|
|
1011
419
|
} };
|
|
1012
420
|
};
|
|
421
|
+
function uniqueModels(models) {
|
|
422
|
+
const seen = /* @__PURE__ */ new Set();
|
|
423
|
+
return models.filter((model) => {
|
|
424
|
+
if (seen.has(model)) return false;
|
|
425
|
+
seen.add(model);
|
|
426
|
+
return true;
|
|
427
|
+
});
|
|
428
|
+
}
|
|
429
|
+
function isModelSpec(spec) {
|
|
430
|
+
return Boolean(spec);
|
|
431
|
+
}
|
|
1013
432
|
|
|
1014
433
|
//#endregion
|
|
1015
434
|
export { DesignLab };
|