@mindstudio-ai/remy 0.1.13 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/headless.js
CHANGED
|
@@ -2258,6 +2258,14 @@ var BROWSER_TOOLS = [
|
|
|
2258
2258
|
type: "object",
|
|
2259
2259
|
properties: {}
|
|
2260
2260
|
}
|
|
2261
|
+
},
|
|
2262
|
+
{
|
|
2263
|
+
name: "resetBrowser",
|
|
2264
|
+
description: "Reset the browser to a clean state. Call this once after all tests are complete to restore the preview for the user. Fire and forget \u2014 does not wait for the reload to finish.",
|
|
2265
|
+
inputSchema: {
|
|
2266
|
+
type: "object",
|
|
2267
|
+
properties: {}
|
|
2268
|
+
}
|
|
2261
2269
|
}
|
|
2262
2270
|
];
|
|
2263
2271
|
var BROWSER_EXTERNAL_TOOLS = /* @__PURE__ */ new Set(["browserCommand", "screenshot"]);
|
|
@@ -2307,7 +2315,17 @@ var browserAutomationTool = {
|
|
|
2307
2315
|
task: input.task,
|
|
2308
2316
|
tools: BROWSER_TOOLS,
|
|
2309
2317
|
externalTools: BROWSER_EXTERNAL_TOOLS,
|
|
2310
|
-
executeTool: async () =>
|
|
2318
|
+
executeTool: async (name) => {
|
|
2319
|
+
if (name === "resetBrowser") {
|
|
2320
|
+
try {
|
|
2321
|
+
await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
|
|
2322
|
+
return "Browser reset triggered.";
|
|
2323
|
+
} catch {
|
|
2324
|
+
return "Error: could not reset browser.";
|
|
2325
|
+
}
|
|
2326
|
+
}
|
|
2327
|
+
return `Error: unknown local tool "${name}"`;
|
|
2328
|
+
},
|
|
2311
2329
|
apiConfig: context.apiConfig,
|
|
2312
2330
|
model: context.model,
|
|
2313
2331
|
signal: context.signal,
|
|
@@ -2462,6 +2480,32 @@ var DESIGN_RESEARCH_TOOLS = [
|
|
|
2462
2480
|
},
|
|
2463
2481
|
required: ["prompts"]
|
|
2464
2482
|
}
|
|
2483
|
+
},
|
|
2484
|
+
{
|
|
2485
|
+
name: "editImage",
|
|
2486
|
+
description: "Edit an existing image using a text instruction. Takes a source image URL and a prompt describing the edits (color grading, style transfer, modifications, adding/removing elements). Returns a new CDN URL.",
|
|
2487
|
+
inputSchema: {
|
|
2488
|
+
type: "object",
|
|
2489
|
+
properties: {
|
|
2490
|
+
imageUrl: {
|
|
2491
|
+
type: "string",
|
|
2492
|
+
description: "URL of the source image to edit."
|
|
2493
|
+
},
|
|
2494
|
+
prompt: {
|
|
2495
|
+
type: "string",
|
|
2496
|
+
description: 'What to change. Describe the edit as an instruction: "apply warm golden hour color grading", "make the background darker", "add a subtle film grain texture".'
|
|
2497
|
+
},
|
|
2498
|
+
width: {
|
|
2499
|
+
type: "number",
|
|
2500
|
+
description: "Output width in pixels. Default 2048. Range: 2048-4096."
|
|
2501
|
+
},
|
|
2502
|
+
height: {
|
|
2503
|
+
type: "number",
|
|
2504
|
+
description: "Output height in pixels. Default 2048. Range: 2048-4096."
|
|
2505
|
+
}
|
|
2506
|
+
},
|
|
2507
|
+
required: ["imageUrl", "prompt"]
|
|
2508
|
+
}
|
|
2465
2509
|
}
|
|
2466
2510
|
];
|
|
2467
2511
|
function runCli(cmd) {
|
|
@@ -2507,37 +2551,17 @@ async function executeDesignTool(name, input) {
|
|
|
2507
2551
|
`mindstudio analyze-image --prompt ${JSON.stringify(DESIGN_REFERENCE_PROMPT)} --image-url ${JSON.stringify(input.imageUrl)} --no-meta`
|
|
2508
2552
|
);
|
|
2509
2553
|
case "screenshotAndAnalyze": {
|
|
2510
|
-
const
|
|
2511
|
-
`mindstudio
|
|
2554
|
+
const ssUrl = await runCli(
|
|
2555
|
+
`mindstudio screenshot-url --url ${JSON.stringify(input.url)} --mode viewport --width 1440 --delay 2000 --output-key screenshotUrl --no-meta`
|
|
2512
2556
|
);
|
|
2513
|
-
|
|
2514
|
-
|
|
2515
|
-
);
|
|
2516
|
-
if (!screenshotMatch) {
|
|
2517
|
-
try {
|
|
2518
|
-
const parsed = JSON.parse(screenshotResult);
|
|
2519
|
-
const ssUrl = parsed.screenshot || parsed.screenshotUrl || parsed.content?.screenshotUrl;
|
|
2520
|
-
if (ssUrl) {
|
|
2521
|
-
const analysisPrompt2 = input.prompt || DESIGN_REFERENCE_PROMPT;
|
|
2522
|
-
const analysis2 = await runCli(
|
|
2523
|
-
`mindstudio analyze-image --prompt ${JSON.stringify(analysisPrompt2)} --image-url ${JSON.stringify(ssUrl)} --no-meta`
|
|
2524
|
-
);
|
|
2525
|
-
return `Screenshot: ${ssUrl}
|
|
2526
|
-
|
|
2527
|
-
${analysis2}`;
|
|
2528
|
-
}
|
|
2529
|
-
} catch {
|
|
2530
|
-
}
|
|
2531
|
-
return `Fetched ${input.url} but could not extract screenshot URL.
|
|
2532
|
-
|
|
2533
|
-
Page content:
|
|
2534
|
-
${screenshotResult}`;
|
|
2557
|
+
if (ssUrl.startsWith("Error")) {
|
|
2558
|
+
return `Could not screenshot ${input.url}: ${ssUrl}`;
|
|
2535
2559
|
}
|
|
2536
2560
|
const analysisPrompt = input.prompt || DESIGN_REFERENCE_PROMPT;
|
|
2537
2561
|
const analysis = await runCli(
|
|
2538
|
-
`mindstudio analyze-image --prompt ${JSON.stringify(analysisPrompt)} --image-url ${JSON.stringify(
|
|
2562
|
+
`mindstudio analyze-image --prompt ${JSON.stringify(analysisPrompt)} --image-url ${JSON.stringify(ssUrl)} --no-meta`
|
|
2539
2563
|
);
|
|
2540
|
-
return `Screenshot: ${
|
|
2564
|
+
return `Screenshot: ${ssUrl}
|
|
2541
2565
|
|
|
2542
2566
|
${analysis}`;
|
|
2543
2567
|
}
|
|
@@ -2581,6 +2605,24 @@ ${analysis}`;
|
|
|
2581
2605
|
}));
|
|
2582
2606
|
return runCli(`mindstudio batch '${JSON.stringify(steps)}' --no-meta`);
|
|
2583
2607
|
}
|
|
2608
|
+
case "editImage": {
|
|
2609
|
+
const width = input.width || 2048;
|
|
2610
|
+
const height = input.height || 2048;
|
|
2611
|
+
const step = JSON.stringify({
|
|
2612
|
+
prompt: input.prompt,
|
|
2613
|
+
imageModelOverride: {
|
|
2614
|
+
model: "seedream-4.5",
|
|
2615
|
+
config: {
|
|
2616
|
+
images: [input.imageUrl],
|
|
2617
|
+
width,
|
|
2618
|
+
height
|
|
2619
|
+
}
|
|
2620
|
+
}
|
|
2621
|
+
});
|
|
2622
|
+
return runCli(
|
|
2623
|
+
`mindstudio generate-image '${step}' --output-key imageUrl --no-meta`
|
|
2624
|
+
);
|
|
2625
|
+
}
|
|
2584
2626
|
default:
|
|
2585
2627
|
return `Error: unknown tool "${name}"`;
|
|
2586
2628
|
}
|
|
@@ -2666,7 +2708,7 @@ ${pairingList}
|
|
|
2666
2708
|
const inspirationSection = images.length ? `<inspiration_images>
|
|
2667
2709
|
## Design inspiration
|
|
2668
2710
|
|
|
2669
|
-
|
|
2711
|
+
This is what the bar looks like. These are real sites that made it onto curated design galleries because they did something bold, intentional, and memorable. Study the moves they make \u2014 the confident color choices, the unexpected layouts, the typography that carries the whole page. Your recommendations should feel like they belong in this company.
|
|
2670
2712
|
|
|
2671
2713
|
${imageList}
|
|
2672
2714
|
</inspiration_images>` : "";
|
|
@@ -2687,8 +2729,8 @@ The visual design expert can be used for all things visual design, from quick qu
|
|
|
2687
2729
|
- Layout and composition ideas that go beyond generic AI defaults
|
|
2688
2730
|
- Analyzing a reference site or screenshot for design insights (it can take screenshots and do research on its own)
|
|
2689
2731
|
- Beautiful layout images or photos
|
|
2690
|
-
- Icon recommendations
|
|
2691
|
-
- Proposing full visual directions during intake
|
|
2732
|
+
- Icon recommendations or AI image editing
|
|
2733
|
+
- Proposing full visual design and layout directions during intake
|
|
2692
2734
|
|
|
2693
2735
|
**How to write the task:**
|
|
2694
2736
|
Include context about the app \u2014 what it does, who uses it, what mood or feeling the interface should convey. If the user has any specific requirements, be sure to include them. The agent can not see your conversation with the user, so you need to include all details. More context produces better results. For quick questions ("three font pairings for a <x> app"), brief is fine. You can ask for multiple topics, multiple options, etc.
|
|
@@ -3195,6 +3237,8 @@ async function runTurn(params) {
|
|
|
3195
3237
|
});
|
|
3196
3238
|
}
|
|
3197
3239
|
state.messages.push(userMsg);
|
|
3240
|
+
let lastCompletedTools = "";
|
|
3241
|
+
let lastCompletedResult = "";
|
|
3198
3242
|
while (true) {
|
|
3199
3243
|
let getOrCreateAccumulator2 = function(id, name) {
|
|
3200
3244
|
let acc = toolInputAccumulators.get(id);
|
|
@@ -3281,7 +3325,8 @@ async function runTurn(params) {
|
|
|
3281
3325
|
apiConfig,
|
|
3282
3326
|
getContext: () => ({
|
|
3283
3327
|
assistantText: assistantText.slice(-500),
|
|
3284
|
-
lastToolName: toolCalls.at(-1)?.name
|
|
3328
|
+
lastToolName: toolCalls.at(-1)?.name || lastCompletedTools || void 0,
|
|
3329
|
+
lastToolResult: lastCompletedResult || void 0
|
|
3285
3330
|
}),
|
|
3286
3331
|
onStatus: (label) => onEvent({ type: "status", message: label }),
|
|
3287
3332
|
signal
|
|
@@ -3410,15 +3455,6 @@ async function runTurn(params) {
|
|
|
3410
3455
|
count: toolCalls.length,
|
|
3411
3456
|
tools: toolCalls.map((tc) => tc.name)
|
|
3412
3457
|
});
|
|
3413
|
-
const toolStatusWatcher = startStatusWatcher({
|
|
3414
|
-
apiConfig,
|
|
3415
|
-
getContext: () => ({
|
|
3416
|
-
assistantText: assistantText.slice(-500),
|
|
3417
|
-
lastToolName: toolCalls.map((tc) => tc.name).join(", ")
|
|
3418
|
-
}),
|
|
3419
|
-
onStatus: (label) => onEvent({ type: "status", message: label }),
|
|
3420
|
-
signal
|
|
3421
|
-
});
|
|
3422
3458
|
const results = await Promise.all(
|
|
3423
3459
|
toolCalls.map(async (tc) => {
|
|
3424
3460
|
if (signal?.aborted) {
|
|
@@ -3476,7 +3512,8 @@ async function runTurn(params) {
|
|
|
3476
3512
|
}
|
|
3477
3513
|
})
|
|
3478
3514
|
);
|
|
3479
|
-
|
|
3515
|
+
lastCompletedTools = toolCalls.map((tc) => tc.name).join(", ");
|
|
3516
|
+
lastCompletedResult = results.at(-1)?.result ?? "";
|
|
3480
3517
|
for (const r of results) {
|
|
3481
3518
|
state.messages.push({
|
|
3482
3519
|
role: "user",
|
package/dist/index.js
CHANGED
|
@@ -2208,6 +2208,14 @@ var init_tools = __esm({
|
|
|
2208
2208
|
type: "object",
|
|
2209
2209
|
properties: {}
|
|
2210
2210
|
}
|
|
2211
|
+
},
|
|
2212
|
+
{
|
|
2213
|
+
name: "resetBrowser",
|
|
2214
|
+
description: "Reset the browser to a clean state. Call this once after all tests are complete to restore the preview for the user. Fire and forget \u2014 does not wait for the reload to finish.",
|
|
2215
|
+
inputSchema: {
|
|
2216
|
+
type: "object",
|
|
2217
|
+
properties: {}
|
|
2218
|
+
}
|
|
2211
2219
|
}
|
|
2212
2220
|
];
|
|
2213
2221
|
BROWSER_EXTERNAL_TOOLS = /* @__PURE__ */ new Set(["browserCommand", "screenshot"]);
|
|
@@ -2273,7 +2281,17 @@ var init_browserAutomation = __esm({
|
|
|
2273
2281
|
task: input.task,
|
|
2274
2282
|
tools: BROWSER_TOOLS,
|
|
2275
2283
|
externalTools: BROWSER_EXTERNAL_TOOLS,
|
|
2276
|
-
executeTool: async () =>
|
|
2284
|
+
executeTool: async (name) => {
|
|
2285
|
+
if (name === "resetBrowser") {
|
|
2286
|
+
try {
|
|
2287
|
+
await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
|
|
2288
|
+
return "Browser reset triggered.";
|
|
2289
|
+
} catch {
|
|
2290
|
+
return "Error: could not reset browser.";
|
|
2291
|
+
}
|
|
2292
|
+
}
|
|
2293
|
+
return `Error: unknown local tool "${name}"`;
|
|
2294
|
+
},
|
|
2277
2295
|
apiConfig: context.apiConfig,
|
|
2278
2296
|
model: context.model,
|
|
2279
2297
|
signal: context.signal,
|
|
@@ -2331,37 +2349,17 @@ async function executeDesignTool(name, input) {
|
|
|
2331
2349
|
`mindstudio analyze-image --prompt ${JSON.stringify(DESIGN_REFERENCE_PROMPT)} --image-url ${JSON.stringify(input.imageUrl)} --no-meta`
|
|
2332
2350
|
);
|
|
2333
2351
|
case "screenshotAndAnalyze": {
|
|
2334
|
-
const
|
|
2335
|
-
`mindstudio
|
|
2336
|
-
);
|
|
2337
|
-
const screenshotMatch = screenshotResult.match(
|
|
2338
|
-
/https:\/\/[^\s"']+(?:\.png|\.jpg|\.jpeg|\.webp|screenshot[^\s"']*)/i
|
|
2352
|
+
const ssUrl = await runCli(
|
|
2353
|
+
`mindstudio screenshot-url --url ${JSON.stringify(input.url)} --mode viewport --width 1440 --delay 2000 --output-key screenshotUrl --no-meta`
|
|
2339
2354
|
);
|
|
2340
|
-
if (
|
|
2341
|
-
|
|
2342
|
-
const parsed = JSON.parse(screenshotResult);
|
|
2343
|
-
const ssUrl = parsed.screenshot || parsed.screenshotUrl || parsed.content?.screenshotUrl;
|
|
2344
|
-
if (ssUrl) {
|
|
2345
|
-
const analysisPrompt2 = input.prompt || DESIGN_REFERENCE_PROMPT;
|
|
2346
|
-
const analysis2 = await runCli(
|
|
2347
|
-
`mindstudio analyze-image --prompt ${JSON.stringify(analysisPrompt2)} --image-url ${JSON.stringify(ssUrl)} --no-meta`
|
|
2348
|
-
);
|
|
2349
|
-
return `Screenshot: ${ssUrl}
|
|
2350
|
-
|
|
2351
|
-
${analysis2}`;
|
|
2352
|
-
}
|
|
2353
|
-
} catch {
|
|
2354
|
-
}
|
|
2355
|
-
return `Fetched ${input.url} but could not extract screenshot URL.
|
|
2356
|
-
|
|
2357
|
-
Page content:
|
|
2358
|
-
${screenshotResult}`;
|
|
2355
|
+
if (ssUrl.startsWith("Error")) {
|
|
2356
|
+
return `Could not screenshot ${input.url}: ${ssUrl}`;
|
|
2359
2357
|
}
|
|
2360
2358
|
const analysisPrompt = input.prompt || DESIGN_REFERENCE_PROMPT;
|
|
2361
2359
|
const analysis = await runCli(
|
|
2362
|
-
`mindstudio analyze-image --prompt ${JSON.stringify(analysisPrompt)} --image-url ${JSON.stringify(
|
|
2360
|
+
`mindstudio analyze-image --prompt ${JSON.stringify(analysisPrompt)} --image-url ${JSON.stringify(ssUrl)} --no-meta`
|
|
2363
2361
|
);
|
|
2364
|
-
return `Screenshot: ${
|
|
2362
|
+
return `Screenshot: ${ssUrl}
|
|
2365
2363
|
|
|
2366
2364
|
${analysis}`;
|
|
2367
2365
|
}
|
|
@@ -2405,6 +2403,24 @@ ${analysis}`;
|
|
|
2405
2403
|
}));
|
|
2406
2404
|
return runCli(`mindstudio batch '${JSON.stringify(steps)}' --no-meta`);
|
|
2407
2405
|
}
|
|
2406
|
+
case "editImage": {
|
|
2407
|
+
const width = input.width || 2048;
|
|
2408
|
+
const height = input.height || 2048;
|
|
2409
|
+
const step = JSON.stringify({
|
|
2410
|
+
prompt: input.prompt,
|
|
2411
|
+
imageModelOverride: {
|
|
2412
|
+
model: "seedream-4.5",
|
|
2413
|
+
config: {
|
|
2414
|
+
images: [input.imageUrl],
|
|
2415
|
+
width,
|
|
2416
|
+
height
|
|
2417
|
+
}
|
|
2418
|
+
}
|
|
2419
|
+
});
|
|
2420
|
+
return runCli(
|
|
2421
|
+
`mindstudio generate-image '${step}' --output-key imageUrl --no-meta`
|
|
2422
|
+
);
|
|
2423
|
+
}
|
|
2408
2424
|
default:
|
|
2409
2425
|
return `Error: unknown tool "${name}"`;
|
|
2410
2426
|
}
|
|
@@ -2555,6 +2571,32 @@ Be specific and concise.`;
|
|
|
2555
2571
|
},
|
|
2556
2572
|
required: ["prompts"]
|
|
2557
2573
|
}
|
|
2574
|
+
},
|
|
2575
|
+
{
|
|
2576
|
+
name: "editImage",
|
|
2577
|
+
description: "Edit an existing image using a text instruction. Takes a source image URL and a prompt describing the edits (color grading, style transfer, modifications, adding/removing elements). Returns a new CDN URL.",
|
|
2578
|
+
inputSchema: {
|
|
2579
|
+
type: "object",
|
|
2580
|
+
properties: {
|
|
2581
|
+
imageUrl: {
|
|
2582
|
+
type: "string",
|
|
2583
|
+
description: "URL of the source image to edit."
|
|
2584
|
+
},
|
|
2585
|
+
prompt: {
|
|
2586
|
+
type: "string",
|
|
2587
|
+
description: 'What to change. Describe the edit as an instruction: "apply warm golden hour color grading", "make the background darker", "add a subtle film grain texture".'
|
|
2588
|
+
},
|
|
2589
|
+
width: {
|
|
2590
|
+
type: "number",
|
|
2591
|
+
description: "Output width in pixels. Default 2048. Range: 2048-4096."
|
|
2592
|
+
},
|
|
2593
|
+
height: {
|
|
2594
|
+
type: "number",
|
|
2595
|
+
description: "Output height in pixels. Default 2048. Range: 2048-4096."
|
|
2596
|
+
}
|
|
2597
|
+
},
|
|
2598
|
+
required: ["imageUrl", "prompt"]
|
|
2599
|
+
}
|
|
2558
2600
|
}
|
|
2559
2601
|
];
|
|
2560
2602
|
}
|
|
@@ -2623,7 +2665,7 @@ ${pairingList}
|
|
|
2623
2665
|
const inspirationSection = images.length ? `<inspiration_images>
|
|
2624
2666
|
## Design inspiration
|
|
2625
2667
|
|
|
2626
|
-
|
|
2668
|
+
This is what the bar looks like. These are real sites that made it onto curated design galleries because they did something bold, intentional, and memorable. Study the moves they make \u2014 the confident color choices, the unexpected layouts, the typography that carries the whole page. Your recommendations should feel like they belong in this company.
|
|
2627
2669
|
|
|
2628
2670
|
${imageList}
|
|
2629
2671
|
</inspiration_images>` : "";
|
|
@@ -2674,8 +2716,8 @@ The visual design expert can be used for all things visual design, from quick qu
|
|
|
2674
2716
|
- Layout and composition ideas that go beyond generic AI defaults
|
|
2675
2717
|
- Analyzing a reference site or screenshot for design insights (it can take screenshots and do research on its own)
|
|
2676
2718
|
- Beautiful layout images or photos
|
|
2677
|
-
- Icon recommendations
|
|
2678
|
-
- Proposing full visual directions during intake
|
|
2719
|
+
- Icon recommendations or AI image editing
|
|
2720
|
+
- Proposing full visual design and layout directions during intake
|
|
2679
2721
|
|
|
2680
2722
|
**How to write the task:**
|
|
2681
2723
|
Include context about the app \u2014 what it does, who uses it, what mood or feeling the interface should convey. If the user has any specific requirements, be sure to include them. The agent can not see your conversation with the user, so you need to include all details. More context produces better results. For quick questions ("three font pairings for a <x> app"), brief is fine. You can ask for multiple topics, multiple options, etc.
|
|
@@ -3230,6 +3272,8 @@ async function runTurn(params) {
|
|
|
3230
3272
|
});
|
|
3231
3273
|
}
|
|
3232
3274
|
state.messages.push(userMsg);
|
|
3275
|
+
let lastCompletedTools = "";
|
|
3276
|
+
let lastCompletedResult = "";
|
|
3233
3277
|
while (true) {
|
|
3234
3278
|
let getOrCreateAccumulator2 = function(id, name) {
|
|
3235
3279
|
let acc = toolInputAccumulators.get(id);
|
|
@@ -3316,7 +3360,8 @@ async function runTurn(params) {
|
|
|
3316
3360
|
apiConfig,
|
|
3317
3361
|
getContext: () => ({
|
|
3318
3362
|
assistantText: assistantText.slice(-500),
|
|
3319
|
-
lastToolName: toolCalls.at(-1)?.name
|
|
3363
|
+
lastToolName: toolCalls.at(-1)?.name || lastCompletedTools || void 0,
|
|
3364
|
+
lastToolResult: lastCompletedResult || void 0
|
|
3320
3365
|
}),
|
|
3321
3366
|
onStatus: (label) => onEvent({ type: "status", message: label }),
|
|
3322
3367
|
signal
|
|
@@ -3445,15 +3490,6 @@ async function runTurn(params) {
|
|
|
3445
3490
|
count: toolCalls.length,
|
|
3446
3491
|
tools: toolCalls.map((tc) => tc.name)
|
|
3447
3492
|
});
|
|
3448
|
-
const toolStatusWatcher = startStatusWatcher({
|
|
3449
|
-
apiConfig,
|
|
3450
|
-
getContext: () => ({
|
|
3451
|
-
assistantText: assistantText.slice(-500),
|
|
3452
|
-
lastToolName: toolCalls.map((tc) => tc.name).join(", ")
|
|
3453
|
-
}),
|
|
3454
|
-
onStatus: (label) => onEvent({ type: "status", message: label }),
|
|
3455
|
-
signal
|
|
3456
|
-
});
|
|
3457
3493
|
const results = await Promise.all(
|
|
3458
3494
|
toolCalls.map(async (tc) => {
|
|
3459
3495
|
if (signal?.aborted) {
|
|
@@ -3511,7 +3547,8 @@ async function runTurn(params) {
|
|
|
3511
3547
|
}
|
|
3512
3548
|
})
|
|
3513
3549
|
);
|
|
3514
|
-
|
|
3550
|
+
lastCompletedTools = toolCalls.map((tc) => tc.name).join(", ");
|
|
3551
|
+
lastCompletedResult = results.at(-1)?.result ?? "";
|
|
3515
3552
|
for (const r of results) {
|
|
3516
3553
|
state.messages.push({
|
|
3517
3554
|
role: "user",
|
|
@@ -22,9 +22,19 @@ Start from these four and extend as needed. Add interface specs for other interf
|
|
|
22
22
|
|
|
23
23
|
Users often care about look and feel as much as (or more than) underlying data structures. Don't treat the brand and interface specs as an afterthought — for many users, the visual identity and voice are the first things they want to get right.
|
|
24
24
|
|
|
25
|
-
Write specs in natural, human language. Describe what the app does the way you'd explain it to a colleague. The spec rendered with annotations hidden is a human-forward document that anyone can read. The spec with annotations visible is the agent-forward document that drives code generation. Keep the prose clean and readable — technical details like column types, status values, and implementation hints belong in annotations, not in the prose.
|
|
25
|
+
Write specs in natural, human language. Describe what the app does the way you'd explain it to a colleague. The spec rendered with annotations hidden is a human-forward document that anyone can read. The spec with annotations visible is the agent-forward document that drives code generation. Keep the prose clean and readable — technical details like column types, status values, CSS properties, code snippets, and implementation hints belong in annotations, not in the prose.
|
|
26
26
|
|
|
27
|
-
When you have image URLs (from the design expert, stock photos, or AI generation), embed them directly in the spec using markdown image syntax
|
|
27
|
+
When you have image URLs (from the design expert, stock photos, or AI generation), embed them directly in the spec using markdown image syntax. Write descriptive alt text that captures what the image actually depicts (this helps accessibility and helps the coding agent understand the image without loading it). Use the surrounding prose to explain the design intent — what the image is for, how it should be used in the layout, and why it was chosen.
|
|
28
|
+
|
|
29
|
+
```markdown
|
|
30
|
+
### Hero Section
|
|
31
|
+
|
|
32
|
+
The hero uses a full-bleed editorial photograph. The image should be used as
|
|
33
|
+
a background with the headline overlaid where there's negative space.
|
|
34
|
+
|
|
35
|
+

|
|
37
|
+
```
|
|
28
38
|
|
|
29
39
|
**Refining with the user:**
|
|
30
40
|
After writing the first draft, guide the user through it. Don't just ask "does this look good?" — the user is seeing a multi-section spec for the first time.
|
package/dist/static/authoring.md
CHANGED
|
@@ -22,9 +22,19 @@ Start from these four and extend as needed. Add interface specs for other interf
|
|
|
22
22
|
|
|
23
23
|
Users often care about look and feel as much as (or more than) underlying data structures. Don't treat the brand and interface specs as an afterthought — for many users, the visual identity and voice are the first things they want to get right.
|
|
24
24
|
|
|
25
|
-
Write specs in natural, human language. Describe what the app does the way you'd explain it to a colleague. The spec rendered with annotations hidden is a human-forward document that anyone can read. The spec with annotations visible is the agent-forward document that drives code generation. Keep the prose clean and readable — technical details like column types, status values, and implementation hints belong in annotations, not in the prose.
|
|
25
|
+
Write specs in natural, human language. Describe what the app does the way you'd explain it to a colleague. The spec rendered with annotations hidden is a human-forward document that anyone can read. The spec with annotations visible is the agent-forward document that drives code generation. Keep the prose clean and readable — technical details like column types, status values, CSS properties, code snippets, and implementation hints belong in annotations, not in the prose.
|
|
26
26
|
|
|
27
|
-
When you have image URLs (from the design expert, stock photos, or AI generation), embed them directly in the spec using markdown image syntax
|
|
27
|
+
When you have image URLs (from the design expert, stock photos, or AI generation), embed them directly in the spec using markdown image syntax. Write descriptive alt text that captures what the image actually depicts (this helps accessibility and helps the coding agent understand the image without loading it). Use the surrounding prose to explain the design intent — what the image is for, how it should be used in the layout, and why it was chosen.
|
|
28
|
+
|
|
29
|
+
```markdown
|
|
30
|
+
### Hero Section
|
|
31
|
+
|
|
32
|
+
The hero uses a full-bleed editorial photograph. The image should be used as
|
|
33
|
+
a background with the headline overlaid where there's negative space.
|
|
34
|
+
|
|
35
|
+

|
|
37
|
+
```
|
|
28
38
|
|
|
29
39
|
**Refining with the user:**
|
|
30
40
|
After writing the first draft, guide the user through it. Don't just ask "does this look good?" — the user is seeing a multi-section spec for the first time.
|
|
@@ -95,6 +95,7 @@ Check a count with evaluate:
|
|
|
95
95
|
- evaluate auto-returns simple expressions. `"script": "document.title"` works directly. For multi-statement scripts, use explicit return.
|
|
96
96
|
- The snapshot in the response is always the most current page state. Even if a wait times out, check the snapshot field; the content you were waiting for may have appeared by then.
|
|
97
97
|
- Execution stops on first error. If step 2 of 5 fails, steps 3-5 don't run. The response will contain results for steps 0-2 (with step 2 having an error field) plus the current snapshot. Adjust and retry from the failed step.
|
|
98
|
+
- Always call `resetBrowser` as your final action after all tests are complete. This restores the preview to a clean state for the user.
|
|
98
99
|
</rules>
|
|
99
100
|
|
|
100
101
|
<voice>
|
|
@@ -6,37 +6,66 @@ Not every interface needs images. A productivity dashboard, a finance tool, or a
|
|
|
6
6
|
|
|
7
7
|
Do not provide images as "references" - images must be ready-to-use assets that can be included directly in the design.
|
|
8
8
|
|
|
9
|
-
###
|
|
9
|
+
### Three tools
|
|
10
10
|
|
|
11
11
|
**AI-generated photos and images** (`generateImages`) — Seedream produces high-quality results for both photorealistic images and abstract/creative visuals. You have full control over the output: style, composition, colors, mood. When generating multiple images, batch them in a single `generateImages` call — they run in parallel. Generated images are production assets, not mockups or concepts — they are hosted on MindStudio CDN at full resolution and will be used directly in the final interface.
|
|
12
12
|
|
|
13
|
-
**
|
|
13
|
+
**Image editing** (`editImage`) — takes an existing image URL and a text instruction describing what to change. Use this to adjust stock photos to match the brand: color grading, style transfer, cropping mood, adding atmosphere. Find a great stock photo, then edit it to align with the design direction.
|
|
14
|
+
|
|
15
|
+
**Stock photography** (`searchStockPhotos`) — Pexels has modern, editorial-style photos. Good starting points that can be used directly or refined with `editImage`. Write specific queries: "person writing in notebook at minimalist desk, natural light" not "office."
|
|
14
16
|
|
|
15
17
|
### Writing good generation prompts
|
|
16
18
|
|
|
17
|
-
|
|
19
|
+
Write prompts as natural sentences describing a scene, not as comma-separated keyword lists. Describe what a camera would see, not art direction instructions.
|
|
20
|
+
|
|
21
|
+
**Structure:** Subject and action first, then setting, then style and technical details. Include the intended use when relevant.
|
|
22
|
+
|
|
23
|
+
- "A woman laughing while reading on a sun-drenched balcony overlooking a Mediterranean harbor. Editorial photography, shot on Kodak Portra 400, 85mm lens at f/2, soft golden hour light, shallow depth of field. For a lifestyle app hero section."
|
|
24
|
+
- "An overhead view of a cluttered designer's desk with fabric swatches, sketches, and a coffee cup. Natural window light from the left, slightly desaturated tones, Canon 5D with 35mm lens. For an about page."
|
|
25
|
+
- "Smooth organic shapes in deep navy and warm amber, flowing liquid forms with subtle grain texture. Abstract digital art, high contrast, editorial feel."
|
|
26
|
+
|
|
27
|
+
**Photography vocabulary produces the best results.** The model responds strongly to specific references:
|
|
28
|
+
- Film stocks: Kodak Portra, Fuji Superia, Cinestill 800T, expired film
|
|
29
|
+
- Lenses: 85mm f/1.4, 35mm wide angle, 50mm Summilux, macro
|
|
30
|
+
- Lighting: golden hour, chiaroscuro, tungsten warmth, soft diffused studio light, direct flash
|
|
31
|
+
- Shot types: close-up, overhead flat lay, low angle, eye-level candid, aerial
|
|
32
|
+
- Techniques: shallow depth of field, halation around highlights, film grain, motion blur
|
|
18
33
|
|
|
19
|
-
**
|
|
20
|
-
- "Digital photography, soft natural window light, shallow depth of field. A ceramic coffee cup on a marble countertop, morning light casting long shadows, warm tones."
|
|
21
|
-
- "Flat vector illustration, clean lines, limited color palette. An isometric view of a workspace with a laptop, plant, and notebook."
|
|
22
|
-
- "Abstract digital art, fluid gradients, high contrast. Deep navy flowing into warm amber, organic liquid shapes, editorial feel."
|
|
34
|
+
**Declare the medium early.** Saying "editorial photograph" vs "watercolor painting" vs "3D render" doesn't just change style — it changes the model's entire approach to composition, color, and detail. Set this expectation in the first sentence.
|
|
23
35
|
|
|
24
|
-
**For
|
|
36
|
+
**For text in images**, wrap the exact text in double quotes and specify the style: `A neon sign reading "OPEN" in cursive pink lettering against a dark brick wall.`
|
|
37
|
+
|
|
38
|
+
**Compose for the layout.** If you know the image will have text overlaid, request space for it: "negative space in the upper left for headline text" or "clean sky area above the subject." If it's a background, consider "centered subject with clean margins." The first few words of the prompt carry the most weight — lead with the medium and subject.
|
|
25
39
|
|
|
26
40
|
**Avoid:**
|
|
27
41
|
- Hex codes in prompts — the model renders them as visible text. Describe colors by name instead.
|
|
28
|
-
-
|
|
42
|
+
- Keyword lists separated by commas — write sentences.
|
|
43
|
+
- Describing positions of arms, legs, or specific limb arrangements.
|
|
44
|
+
- Conflicting style instructions ("photorealistic cartoon").
|
|
45
|
+
- Describing what you don't want — say "empty street" not "street with no cars."
|
|
46
|
+
- Mentioning "text" or "text placement" in prompts — the model will try to render text. Request the composition you want ("negative space in the left third") without saying why.
|
|
47
|
+
- Brand names (camera brands, font names, company names) can get rendered as visible text. Use technical specs ("medium format, 120mm lens") instead of brand names ("Hasselblad") when possible.
|
|
48
|
+
- UI component language — "glass morphism effect", "card design", "button with hover state". Write prompts as if briefing a photographer or artist, not describing CSS.
|
|
49
|
+
- Generating text that should be HTML. Headlines, body copy, CTAs, and any text the user needs to read or interact with belongs in the markup, not baked into an image. Text *within a scene* is fine — a neon sign, a logo on a t-shirt, text on a billboard in a cityscape, an app screen in a device mockup. That's part of the visual content.
|
|
50
|
+
|
|
51
|
+
### How generated images work in the UI
|
|
52
|
+
|
|
53
|
+
Every generated image is a full rectangular frame — a photograph, a poster, a painting, a texture. The image generator does not produce isolated elements, transparent PNGs, or UI components. The coding agent controls how images are used: cropping, blending, overlaying, masking with CSS.
|
|
54
|
+
|
|
55
|
+
This means you can generate a dramatic texture and the coding agent uses it as a card background with a blend mode. You can generate an editorial photo and the coding agent overlays text on it for a hero section. Think of yourself as providing visual ingredients, not finished UI.
|
|
29
56
|
|
|
30
57
|
### What makes good photos and images
|
|
31
58
|
|
|
32
|
-
|
|
59
|
+
It's 2026. Everything is lifestyle and editorial. Even a landing page for a productivity tool or a SaaS product should feel like a magazine spread, not a tech blog. The era of sterile stock-photo-of-a-laptop-on-a-desk is over. People respond to beautiful, dramatic, emotionally resonant imagery.
|
|
60
|
+
|
|
61
|
+
Default to photography with real subjects — people, scenes, moments, environments. Use editorial and fashion photography vocabulary in your prompts. When abstract art is the right call (textures, editorial collages, gradient art), make it bold and intentional, not generic gradient blobs.
|
|
62
|
+
|
|
63
|
+
The coding agent should never need to source its own imagery. Always provide URLs.
|
|
33
64
|
|
|
34
65
|
### When to use images
|
|
35
66
|
|
|
36
67
|
Include image recommendations in your designs when the product calls for it. A landing page without photography feels like a wireframe. A feature section with a real image feels finished. When proposing layouts, specify where images go and what they should depict — don't leave it to the coding agent to figure out.
|
|
37
68
|
|
|
38
|
-
The coding agent should never need to source its own imagery. Always provide URLs.
|
|
39
|
-
|
|
40
69
|
### CDN image transforms
|
|
41
70
|
|
|
42
71
|
Generated images and uploaded images are hosted on `i.mscdn.ai`. Use query string parameters to request appropriately sized images rather than CSS-scaling full-resolution originals:
|