npm - @mindstudio-ai/remy - Versions diffs - 0.1.13 → 0.1.15 - Mend

@mindstudio-ai/remy 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/headless.js +78 -41
package/dist/index.js +78 -41
package/dist/prompt/static/authoring.md +12 -2
package/dist/static/authoring.md +12 -2
package/dist/subagents/browserAutomation/prompt.md +1 -0
package/dist/subagents/designExpert/prompts/images.md +41 -12
package/package.json +1 -1

package/dist/headless.js CHANGED Viewed

@@ -2258,6 +2258,14 @@ var BROWSER_TOOLS = [
       type: "object",
       properties: {}
     }
+  },
+  {
+    name: "resetBrowser",
+    description: "Reset the browser to a clean state. Call this once after all tests are complete to restore the preview for the user. Fire and forget \u2014 does not wait for the reload to finish.",
+    inputSchema: {
+      type: "object",
+      properties: {}
+    }
   }
 ];
 var BROWSER_EXTERNAL_TOOLS = /* @__PURE__ */ new Set(["browserCommand", "screenshot"]);
@@ -2307,7 +2315,17 @@ var browserAutomationTool = {
       task: input.task,
       tools: BROWSER_TOOLS,
       externalTools: BROWSER_EXTERNAL_TOOLS,
-      executeTool: async () => "Error: no local tools in browser automation",
+      executeTool: async (name) => {
+        if (name === "resetBrowser") {
+          try {
+            await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
+            return "Browser reset triggered.";
+          } catch {
+            return "Error: could not reset browser.";
+          }
+        }
+        return `Error: unknown local tool "${name}"`;
+      },
       apiConfig: context.apiConfig,
       model: context.model,
       signal: context.signal,
@@ -2462,6 +2480,32 @@ var DESIGN_RESEARCH_TOOLS = [
       },
       required: ["prompts"]
     }
+  },
+  {
+    name: "editImage",
+    description: "Edit an existing image using a text instruction. Takes a source image URL and a prompt describing the edits (color grading, style transfer, modifications, adding/removing elements). Returns a new CDN URL.",
+    inputSchema: {
+      type: "object",
+      properties: {
+        imageUrl: {
+          type: "string",
+          description: "URL of the source image to edit."
+        },
+        prompt: {
+          type: "string",
+          description: 'What to change. Describe the edit as an instruction: "apply warm golden hour color grading", "make the background darker", "add a subtle film grain texture".'
+        },
+        width: {
+          type: "number",
+          description: "Output width in pixels. Default 2048. Range: 2048-4096."
+        },
+        height: {
+          type: "number",
+          description: "Output height in pixels. Default 2048. Range: 2048-4096."
+        }
+      },
+      required: ["imageUrl", "prompt"]
+    }
   }
 ];
 function runCli(cmd) {
@@ -2507,37 +2551,17 @@ async function executeDesignTool(name, input) {
         `mindstudio analyze-image --prompt ${JSON.stringify(DESIGN_REFERENCE_PROMPT)} --image-url ${JSON.stringify(input.imageUrl)} --no-meta`
       );
     case "screenshotAndAnalyze": {
-      const screenshotResult = await runCli(
-        `mindstudio scrape-url --url ${JSON.stringify(input.url)} --page-options ${JSON.stringify(JSON.stringify({ onlyMainContent: true, screenshot: true }))} --no-meta`
+      const ssUrl = await runCli(
+        `mindstudio screenshot-url --url ${JSON.stringify(input.url)} --mode viewport --width 1440 --delay 2000 --output-key screenshotUrl --no-meta`
       );
-      const screenshotMatch = screenshotResult.match(
-        /https:\/\/[^\s"']+(?:\.png|\.jpg|\.jpeg|\.webp|screenshot[^\s"']*)/i
-      );
-      if (!screenshotMatch) {
-        try {
-          const parsed = JSON.parse(screenshotResult);
-          const ssUrl = parsed.screenshot || parsed.screenshotUrl || parsed.content?.screenshotUrl;
-          if (ssUrl) {
-            const analysisPrompt2 = input.prompt || DESIGN_REFERENCE_PROMPT;
-            const analysis2 = await runCli(
-              `mindstudio analyze-image --prompt ${JSON.stringify(analysisPrompt2)} --image-url ${JSON.stringify(ssUrl)} --no-meta`
-            );
-            return `Screenshot: ${ssUrl}
-${analysis2}`;
-          }
-        } catch {
-        }
-        return `Fetched ${input.url} but could not extract screenshot URL.
-Page content:
-${screenshotResult}`;
+      if (ssUrl.startsWith("Error")) {
+        return `Could not screenshot ${input.url}: ${ssUrl}`;
       }
       const analysisPrompt = input.prompt || DESIGN_REFERENCE_PROMPT;
       const analysis = await runCli(
-        `mindstudio analyze-image --prompt ${JSON.stringify(analysisPrompt)} --image-url ${JSON.stringify(screenshotMatch[0])} --no-meta`
+        `mindstudio analyze-image --prompt ${JSON.stringify(analysisPrompt)} --image-url ${JSON.stringify(ssUrl)} --no-meta`
       );
-      return `Screenshot: ${screenshotMatch[0]}
+      return `Screenshot: ${ssUrl}
 ${analysis}`;
     }
@@ -2581,6 +2605,24 @@ ${analysis}`;
       }));
       return runCli(`mindstudio batch '${JSON.stringify(steps)}' --no-meta`);
     }
+    case "editImage": {
+      const width = input.width || 2048;
+      const height = input.height || 2048;
+      const step = JSON.stringify({
+        prompt: input.prompt,
+        imageModelOverride: {
+          model: "seedream-4.5",
+          config: {
+            images: [input.imageUrl],
+            width,
+            height
+          }
+        }
+      });
+      return runCli(
+        `mindstudio generate-image '${step}' --output-key imageUrl --no-meta`
+      );
+    }
     default:
       return `Error: unknown tool "${name}"`;
   }
@@ -2666,7 +2708,7 @@ ${pairingList}
   const inspirationSection = images.length ? `<inspiration_images>
 ## Design inspiration
-A random sample of pre-analyzed design references. Use these observations to inform your recommendations and build something creative, unique, and compelling.
+This is what the bar looks like. These are real sites that made it onto curated design galleries because they did something bold, intentional, and memorable. Study the moves they make \u2014 the confident color choices, the unexpected layouts, the typography that carries the whole page. Your recommendations should feel like they belong in this company.
 ${imageList}
 </inspiration_images>` : "";
@@ -2687,8 +2729,8 @@ The visual design expert can be used for all things visual design, from quick qu
 - Layout and composition ideas that go beyond generic AI defaults
 - Analyzing a reference site or screenshot for design insights (it can take screenshots and do research on its own)
 - Beautiful layout images or photos
-- Icon recommendations
-- Proposing full visual directions during intake
+- Icon recommendations or AI image editing
+- Proposing full visual design and layout directions during intake
 **How to write the task:**
 Include context about the app \u2014 what it does, who uses it, what mood or feeling the interface should convey. If the user has any specific requirements, be sure to include them. The agent can not see your conversation with the user, so you need to include all details. More context produces better results. For quick questions ("three font pairings for a <x> app"), brief is fine. You can ask for multiple topics, multiple options, etc.
@@ -3195,6 +3237,8 @@ async function runTurn(params) {
     });
   }
   state.messages.push(userMsg);
+  let lastCompletedTools = "";
+  let lastCompletedResult = "";
   while (true) {
     let getOrCreateAccumulator2 = function(id, name) {
       let acc = toolInputAccumulators.get(id);
@@ -3281,7 +3325,8 @@ async function runTurn(params) {
       apiConfig,
       getContext: () => ({
         assistantText: assistantText.slice(-500),
-        lastToolName: toolCalls.at(-1)?.name
+        lastToolName: toolCalls.at(-1)?.name || lastCompletedTools || void 0,
+        lastToolResult: lastCompletedResult || void 0
       }),
       onStatus: (label) => onEvent({ type: "status", message: label }),
       signal
@@ -3410,15 +3455,6 @@ async function runTurn(params) {
       count: toolCalls.length,
       tools: toolCalls.map((tc) => tc.name)
     });
-    const toolStatusWatcher = startStatusWatcher({
-      apiConfig,
-      getContext: () => ({
-        assistantText: assistantText.slice(-500),
-        lastToolName: toolCalls.map((tc) => tc.name).join(", ")
-      }),
-      onStatus: (label) => onEvent({ type: "status", message: label }),
-      signal
-    });
     const results = await Promise.all(
       toolCalls.map(async (tc) => {
         if (signal?.aborted) {
@@ -3476,7 +3512,8 @@ async function runTurn(params) {
         }
       })
     );
-    toolStatusWatcher.stop();
+    lastCompletedTools = toolCalls.map((tc) => tc.name).join(", ");
+    lastCompletedResult = results.at(-1)?.result ?? "";
     for (const r of results) {
       state.messages.push({
         role: "user",

package/dist/index.js CHANGED Viewed

@@ -2208,6 +2208,14 @@ var init_tools = __esm({
           type: "object",
           properties: {}
         }
+      },
+      {
+        name: "resetBrowser",
+        description: "Reset the browser to a clean state. Call this once after all tests are complete to restore the preview for the user. Fire and forget \u2014 does not wait for the reload to finish.",
+        inputSchema: {
+          type: "object",
+          properties: {}
+        }
       }
     ];
     BROWSER_EXTERNAL_TOOLS = /* @__PURE__ */ new Set(["browserCommand", "screenshot"]);
@@ -2273,7 +2281,17 @@ var init_browserAutomation = __esm({
           task: input.task,
           tools: BROWSER_TOOLS,
           externalTools: BROWSER_EXTERNAL_TOOLS,
-          executeTool: async () => "Error: no local tools in browser automation",
+          executeTool: async (name) => {
+            if (name === "resetBrowser") {
+              try {
+                await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
+                return "Browser reset triggered.";
+              } catch {
+                return "Error: could not reset browser.";
+              }
+            }
+            return `Error: unknown local tool "${name}"`;
+          },
           apiConfig: context.apiConfig,
           model: context.model,
           signal: context.signal,
@@ -2331,37 +2349,17 @@ async function executeDesignTool(name, input) {
         `mindstudio analyze-image --prompt ${JSON.stringify(DESIGN_REFERENCE_PROMPT)} --image-url ${JSON.stringify(input.imageUrl)} --no-meta`
       );
     case "screenshotAndAnalyze": {
-      const screenshotResult = await runCli(
-        `mindstudio scrape-url --url ${JSON.stringify(input.url)} --page-options ${JSON.stringify(JSON.stringify({ onlyMainContent: true, screenshot: true }))} --no-meta`
-      );
-      const screenshotMatch = screenshotResult.match(
-        /https:\/\/[^\s"']+(?:\.png|\.jpg|\.jpeg|\.webp|screenshot[^\s"']*)/i
+      const ssUrl = await runCli(
+        `mindstudio screenshot-url --url ${JSON.stringify(input.url)} --mode viewport --width 1440 --delay 2000 --output-key screenshotUrl --no-meta`
       );
-      if (!screenshotMatch) {
-        try {
-          const parsed = JSON.parse(screenshotResult);
-          const ssUrl = parsed.screenshot || parsed.screenshotUrl || parsed.content?.screenshotUrl;
-          if (ssUrl) {
-            const analysisPrompt2 = input.prompt || DESIGN_REFERENCE_PROMPT;
-            const analysis2 = await runCli(
-              `mindstudio analyze-image --prompt ${JSON.stringify(analysisPrompt2)} --image-url ${JSON.stringify(ssUrl)} --no-meta`
-            );
-            return `Screenshot: ${ssUrl}
-${analysis2}`;
-          }
-        } catch {
-        }
-        return `Fetched ${input.url} but could not extract screenshot URL.
-Page content:
-${screenshotResult}`;
+      if (ssUrl.startsWith("Error")) {
+        return `Could not screenshot ${input.url}: ${ssUrl}`;
       }
       const analysisPrompt = input.prompt || DESIGN_REFERENCE_PROMPT;
       const analysis = await runCli(
-        `mindstudio analyze-image --prompt ${JSON.stringify(analysisPrompt)} --image-url ${JSON.stringify(screenshotMatch[0])} --no-meta`
+        `mindstudio analyze-image --prompt ${JSON.stringify(analysisPrompt)} --image-url ${JSON.stringify(ssUrl)} --no-meta`
       );
-      return `Screenshot: ${screenshotMatch[0]}
+      return `Screenshot: ${ssUrl}
 ${analysis}`;
     }
@@ -2405,6 +2403,24 @@ ${analysis}`;
       }));
       return runCli(`mindstudio batch '${JSON.stringify(steps)}' --no-meta`);
     }
+    case "editImage": {
+      const width = input.width || 2048;
+      const height = input.height || 2048;
+      const step = JSON.stringify({
+        prompt: input.prompt,
+        imageModelOverride: {
+          model: "seedream-4.5",
+          config: {
+            images: [input.imageUrl],
+            width,
+            height
+          }
+        }
+      });
+      return runCli(
+        `mindstudio generate-image '${step}' --output-key imageUrl --no-meta`
+      );
+    }
     default:
       return `Error: unknown tool "${name}"`;
   }
@@ -2555,6 +2571,32 @@ Be specific and concise.`;
           },
           required: ["prompts"]
         }
+      },
+      {
+        name: "editImage",
+        description: "Edit an existing image using a text instruction. Takes a source image URL and a prompt describing the edits (color grading, style transfer, modifications, adding/removing elements). Returns a new CDN URL.",
+        inputSchema: {
+          type: "object",
+          properties: {
+            imageUrl: {
+              type: "string",
+              description: "URL of the source image to edit."
+            },
+            prompt: {
+              type: "string",
+              description: 'What to change. Describe the edit as an instruction: "apply warm golden hour color grading", "make the background darker", "add a subtle film grain texture".'
+            },
+            width: {
+              type: "number",
+              description: "Output width in pixels. Default 2048. Range: 2048-4096."
+            },
+            height: {
+              type: "number",
+              description: "Output height in pixels. Default 2048. Range: 2048-4096."
+            }
+          },
+          required: ["imageUrl", "prompt"]
+        }
       }
     ];
   }
@@ -2623,7 +2665,7 @@ ${pairingList}
   const inspirationSection = images.length ? `<inspiration_images>
 ## Design inspiration
-A random sample of pre-analyzed design references. Use these observations to inform your recommendations and build something creative, unique, and compelling.
+This is what the bar looks like. These are real sites that made it onto curated design galleries because they did something bold, intentional, and memorable. Study the moves they make \u2014 the confident color choices, the unexpected layouts, the typography that carries the whole page. Your recommendations should feel like they belong in this company.
 ${imageList}
 </inspiration_images>` : "";
@@ -2674,8 +2716,8 @@ The visual design expert can be used for all things visual design, from quick qu
 - Layout and composition ideas that go beyond generic AI defaults
 - Analyzing a reference site or screenshot for design insights (it can take screenshots and do research on its own)
 - Beautiful layout images or photos
-- Icon recommendations
-- Proposing full visual directions during intake
+- Icon recommendations or AI image editing
+- Proposing full visual design and layout directions during intake
 **How to write the task:**
 Include context about the app \u2014 what it does, who uses it, what mood or feeling the interface should convey. If the user has any specific requirements, be sure to include them. The agent can not see your conversation with the user, so you need to include all details. More context produces better results. For quick questions ("three font pairings for a <x> app"), brief is fine. You can ask for multiple topics, multiple options, etc.
@@ -3230,6 +3272,8 @@ async function runTurn(params) {
     });
   }
   state.messages.push(userMsg);
+  let lastCompletedTools = "";
+  let lastCompletedResult = "";
   while (true) {
     let getOrCreateAccumulator2 = function(id, name) {
       let acc = toolInputAccumulators.get(id);
@@ -3316,7 +3360,8 @@ async function runTurn(params) {
       apiConfig,
       getContext: () => ({
         assistantText: assistantText.slice(-500),
-        lastToolName: toolCalls.at(-1)?.name
+        lastToolName: toolCalls.at(-1)?.name || lastCompletedTools || void 0,
+        lastToolResult: lastCompletedResult || void 0
       }),
       onStatus: (label) => onEvent({ type: "status", message: label }),
       signal
@@ -3445,15 +3490,6 @@ async function runTurn(params) {
       count: toolCalls.length,
       tools: toolCalls.map((tc) => tc.name)
     });
-    const toolStatusWatcher = startStatusWatcher({
-      apiConfig,
-      getContext: () => ({
-        assistantText: assistantText.slice(-500),
-        lastToolName: toolCalls.map((tc) => tc.name).join(", ")
-      }),
-      onStatus: (label) => onEvent({ type: "status", message: label }),
-      signal
-    });
     const results = await Promise.all(
       toolCalls.map(async (tc) => {
         if (signal?.aborted) {
@@ -3511,7 +3547,8 @@ async function runTurn(params) {
         }
       })
     );
-    toolStatusWatcher.stop();
+    lastCompletedTools = toolCalls.map((tc) => tc.name).join(", ");
+    lastCompletedResult = results.at(-1)?.result ?? "";
     for (const r of results) {
       state.messages.push({
         role: "user",

package/dist/prompt/static/authoring.md CHANGED Viewed

@@ -22,9 +22,19 @@ Start from these four and extend as needed. Add interface specs for other interf
 Users often care about look and feel as much as (or more than) underlying data structures. Don't treat the brand and interface specs as an afterthought — for many users, the visual identity and voice are the first things they want to get right.
-Write specs in natural, human language. Describe what the app does the way you'd explain it to a colleague. The spec rendered with annotations hidden is a human-forward document that anyone can read. The spec with annotations visible is the agent-forward document that drives code generation. Keep the prose clean and readable — technical details like column types, status values, and implementation hints belong in annotations, not in the prose.
+Write specs in natural, human language. Describe what the app does the way you'd explain it to a colleague. The spec rendered with annotations hidden is a human-forward document that anyone can read. The spec with annotations visible is the agent-forward document that drives code generation. Keep the prose clean and readable — technical details like column types, status values, CSS properties, code snippets, and implementation hints belong in annotations, not in the prose.
-When you have image URLs (from the design expert, stock photos, or AI generation), embed them directly in the spec using markdown image syntax (`![description](url)`). The spec should be a visual document — if there's a hero image, a background photo, or a generated graphic, include it inline so the user can see it and the coding agent can reference it during build.
+When you have image URLs (from the design expert, stock photos, or AI generation), embed them directly in the spec using markdown image syntax. Write descriptive alt text that captures what the image actually depicts (this helps accessibility and helps the coding agent understand the image without loading it). Use the surrounding prose to explain the design intent — what the image is for, how it should be used in the layout, and why it was chosen.
+```markdown
+### Hero Section
+The hero uses a full-bleed editorial photograph. The image should be used as
+a background with the headline overlaid where there's negative space.
+![Editorial portrait, warm golden hour lighting, person looking out over a
+city skyline, shallow depth of field, shot on 85mm](https://i.mscdn.ai/...)
+```
 **Refining with the user:**
 After writing the first draft, guide the user through it. Don't just ask "does this look good?" — the user is seeing a multi-section spec for the first time.

package/dist/static/authoring.md CHANGED Viewed

@@ -22,9 +22,19 @@ Start from these four and extend as needed. Add interface specs for other interf
 Users often care about look and feel as much as (or more than) underlying data structures. Don't treat the brand and interface specs as an afterthought — for many users, the visual identity and voice are the first things they want to get right.
-Write specs in natural, human language. Describe what the app does the way you'd explain it to a colleague. The spec rendered with annotations hidden is a human-forward document that anyone can read. The spec with annotations visible is the agent-forward document that drives code generation. Keep the prose clean and readable — technical details like column types, status values, and implementation hints belong in annotations, not in the prose.
+Write specs in natural, human language. Describe what the app does the way you'd explain it to a colleague. The spec rendered with annotations hidden is a human-forward document that anyone can read. The spec with annotations visible is the agent-forward document that drives code generation. Keep the prose clean and readable — technical details like column types, status values, CSS properties, code snippets, and implementation hints belong in annotations, not in the prose.
-When you have image URLs (from the design expert, stock photos, or AI generation), embed them directly in the spec using markdown image syntax (`![description](url)`). The spec should be a visual document — if there's a hero image, a background photo, or a generated graphic, include it inline so the user can see it and the coding agent can reference it during build.
+When you have image URLs (from the design expert, stock photos, or AI generation), embed them directly in the spec using markdown image syntax. Write descriptive alt text that captures what the image actually depicts (this helps accessibility and helps the coding agent understand the image without loading it). Use the surrounding prose to explain the design intent — what the image is for, how it should be used in the layout, and why it was chosen.
+```markdown
+### Hero Section
+The hero uses a full-bleed editorial photograph. The image should be used as
+a background with the headline overlaid where there's negative space.
+![Editorial portrait, warm golden hour lighting, person looking out over a
+city skyline, shallow depth of field, shot on 85mm](https://i.mscdn.ai/...)
+```
 **Refining with the user:**
 After writing the first draft, guide the user through it. Don't just ask "does this look good?" — the user is seeing a multi-section spec for the first time.

package/dist/subagents/browserAutomation/prompt.md CHANGED Viewed

@@ -95,6 +95,7 @@ Check a count with evaluate:
   - evaluate auto-returns simple expressions. `"script": "document.title"` works directly. For multi-statement scripts, use explicit return.
   - The snapshot in the response is always the most current page state. Even if a wait times out, check the snapshot field; the content you were waiting for may have appeared by then.
   - Execution stops on first error. If step 2 of 5 fails, steps 3-5 don't run. The response will contain results for steps 0-2 (with step 2 having an error field) plus the current snapshot. Adjust and retry from the failed step.
+  - Always call `resetBrowser` as your final action after all tests are complete. This restores the preview to a clean state for the user.
 </rules>
 <voice>

package/dist/subagents/designExpert/prompts/images.md CHANGED Viewed

@@ -6,37 +6,66 @@ Not every interface needs images. A productivity dashboard, a finance tool, or a
 Do not provide images as "references" - images must be ready-to-use assets that can be included directly in the design.
-### Two sources
+### Three tools
 **AI-generated photos and images** (`generateImages`) — Seedream produces high-quality results for both photorealistic images and abstract/creative visuals. You have full control over the output: style, composition, colors, mood. When generating multiple images, batch them in a single `generateImages` call — they run in parallel. Generated images are production assets, not mockups or concepts — they are hosted on MindStudio CDN at full resolution and will be used directly in the final interface.
-**Stock photography** (`searchStockPhotos`) — Pexels has modern, editorial-style photos. Useful for quick placeholders, mockups, or when you need a specific real-world subject (a specific city, a recognizable object, etc.). Write specific queries: "person writing in notebook at minimalist desk, natural light" not "office."
+**Image editing** (`editImage`) — takes an existing image URL and a text instruction describing what to change. Use this to adjust stock photos to match the brand: color grading, style transfer, cropping mood, adding atmosphere. Find a great stock photo, then edit it to align with the design direction.
+**Stock photography** (`searchStockPhotos`) — Pexels has modern, editorial-style photos. Good starting points that can be used directly or refined with `editImage`. Write specific queries: "person writing in notebook at minimalist desk, natural light" not "office."
 ### Writing good generation prompts
-Lead with the visual style, then describe the content. This order helps the model establish the look before filling in details.
+Write prompts as natural sentences describing a scene, not as comma-separated keyword lists. Describe what a camera would see, not art direction instructions.
+**Structure:** Subject and action first, then setting, then style and technical details. Include the intended use when relevant.
+- "A woman laughing while reading on a sun-drenched balcony overlooking a Mediterranean harbor. Editorial photography, shot on Kodak Portra 400, 85mm lens at f/2, soft golden hour light, shallow depth of field. For a lifestyle app hero section."
+- "An overhead view of a cluttered designer's desk with fabric swatches, sketches, and a coffee cup. Natural window light from the left, slightly desaturated tones, Canon 5D with 35mm lens. For an about page."
+- "Smooth organic shapes in deep navy and warm amber, flowing liquid forms with subtle grain texture. Abstract digital art, high contrast, editorial feel."
+**Photography vocabulary produces the best results.** The model responds strongly to specific references:
+- Film stocks: Kodak Portra, Fuji Superia, Cinestill 800T, expired film
+- Lenses: 85mm f/1.4, 35mm wide angle, 50mm Summilux, macro
+- Lighting: golden hour, chiaroscuro, tungsten warmth, soft diffused studio light, direct flash
+- Shot types: close-up, overhead flat lay, low angle, eye-level candid, aerial
+- Techniques: shallow depth of field, halation around highlights, film grain, motion blur
-**Structure:** Style/medium first, then subject, then details.
-- "Digital photography, soft natural window light, shallow depth of field. A ceramic coffee cup on a marble countertop, morning light casting long shadows, warm tones."
-- "Flat vector illustration, clean lines, limited color palette. An isometric view of a workspace with a laptop, plant, and notebook."
-- "Abstract digital art, fluid gradients, high contrast. Deep navy flowing into warm amber, organic liquid shapes, editorial feel."
+**Declare the medium early.** Saying "editorial photograph" vs "watercolor painting" vs "3D render" doesn't just change style — it changes the model's entire approach to composition, color, and detail. Set this expectation in the first sentence.
-**For photorealistic images:** Specify the photography style (editorial, portrait, product, aerial), lighting (natural, studio, golden hour, direct flash), and camera characteristics (close-up, wide angle, shallow depth of field, slightly grainy texture).
+**For text in images**, wrap the exact text in double quotes and specify the style: `A neon sign reading "OPEN" in cursive pink lettering against a dark brick wall.`
+**Compose for the layout.** If you know the image will have text overlaid, request space for it: "negative space in the upper left for headline text" or "clean sky area above the subject." If it's a background, consider "centered subject with clean margins." The first few words of the prompt carry the most weight — lead with the medium and subject.
 **Avoid:**
 - Hex codes in prompts — the model renders them as visible text. Describe colors by name instead.
-- Describing positions of arms, legs, or specific limb arrangements — this confuses image models.
+- Keyword lists separated by commas — write sentences.
+- Describing positions of arms, legs, or specific limb arrangements.
+- Conflicting style instructions ("photorealistic cartoon").
+- Describing what you don't want — say "empty street" not "street with no cars."
+- Mentioning "text" or "text placement" in prompts — the model will try to render text. Request the composition you want ("negative space in the left third") without saying why.
+- Brand names (camera brands, font names, company names) can get rendered as visible text. Use technical specs ("medium format, 120mm lens") instead of brand names ("Hasselblad") when possible.
+- UI component language — "glass morphism effect", "card design", "button with hover state". Write prompts as if briefing a photographer or artist, not describing CSS.
+- Generating text that should be HTML. Headlines, body copy, CTAs, and any text the user needs to read or interact with belongs in the markup, not baked into an image. Text *within a scene* is fine — a neon sign, a logo on a t-shirt, text on a billboard in a cityscape, an app screen in a device mockup. That's part of the visual content.
+### How generated images work in the UI
+Every generated image is a full rectangular frame — a photograph, a poster, a painting, a texture. The image generator does not produce isolated elements, transparent PNGs, or UI components. The coding agent controls how images are used: cropping, blending, overlaying, masking with CSS.
+This means you can generate a dramatic texture and the coding agent uses it as a card background with a blend mode. You can generate an editorial photo and the coding agent overlays text on it for a hero section. Think of yourself as providing visual ingredients, not finished UI.
 ### What makes good photos and images
-Think about what would actually appear on this page if a real design team made it. Photos and images should have real subjects that connect to the product's story — people, places, objects, scenes. You can make things that are truly beautiful. Generic abstract visuals are the AI image equivalent of purple gradients: safe, meaningless, forgettable. Push for images with specificity, strong subjects, and emotional resonance.
+It's 2026. Everything is lifestyle and editorial. Even a landing page for a productivity tool or a SaaS product should feel like a magazine spread, not a tech blog. The era of sterile stock-photo-of-a-laptop-on-a-desk is over. People respond to beautiful, dramatic, emotionally resonant imagery.
+Default to photography with real subjects — people, scenes, moments, environments. Use editorial and fashion photography vocabulary in your prompts. When abstract art is the right call (textures, editorial collages, gradient art), make it bold and intentional, not generic gradient blobs.
+The coding agent should never need to source its own imagery. Always provide URLs.
 ### When to use images
 Include image recommendations in your designs when the product calls for it. A landing page without photography feels like a wireframe. A feature section with a real image feels finished. When proposing layouts, specify where images go and what they should depict — don't leave it to the coding agent to figure out.
-The coding agent should never need to source its own imagery. Always provide URLs.
 ### CDN image transforms
 Generated images and uploaded images are hosted on `i.mscdn.ai`. Use query string parameters to request appropriately sized images rather than CSS-scaling full-resolution originals:

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mindstudio-ai/remy",
-  "version": "0.1.13",
+  "version": "0.1.15",
   "description": "MindStudio coding agent",
   "repository": {
     "type": "git",