npm - @mindstudio-ai/remy - Versions diffs - 0.1.197 → 0.1.199 - Mend

@mindstudio-ai/remy 0.1.197 → 0.1.199

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/headless.js +36 -7
package/dist/index.js +36 -7
package/dist/subagents/designExpert/tools/images/enhance-image-prompt.md +11 -3
package/package.json +1 -1

package/dist/headless.js CHANGED Viewed

@@ -4232,7 +4232,15 @@ var SYSTEM_PROMPT = readAsset(
   "subagents/designExpert/tools/images/enhance-image-prompt.md"
 );
 async function enhanceImagePrompt(params) {
-  const { brief, width, height, transparentBackground, onLog, model } = params;
+  const {
+    brief,
+    width,
+    height,
+    transparentBackground,
+    hasReferenceImage,
+    onLog,
+    model
+  } = params;
   const contextParts = [
     `Dimensions: ${width}x${height}${width > height ? " (landscape)" : width < height ? " (portrait)" : " (square)"}`
   ];
@@ -4241,6 +4249,11 @@ async function enhanceImagePrompt(params) {
       "Transparent background: yes \u2014 the background will be removed. Focus on the subject as an isolated element."
     );
   }
+  if (hasReferenceImage) {
+    contextParts.push(
+      "Reference image: yes \u2014 a reference image is provided to the generation model alongside this prompt to guide style, subject, or composition. Complement it; don't re-describe what it already carries."
+    );
+  }
   const context = `<context>
 ${contextParts.join("\n")}
 </context>`;
@@ -4271,6 +4284,7 @@ async function generateImageAssets(opts) {
     prompts,
     sourceImages,
     transparentBackground,
+    enhancePrompts,
     onLog,
     imageGenerationModel: genModel,
     imageAnalysisModel,
@@ -4280,21 +4294,29 @@ async function generateImageAssets(opts) {
   const height = opts.height || 2048;
   const config = { width, height };
   if (sourceImages?.length) {
+    const [firstImage] = sourceImages;
     config.images = sourceImages;
-  }
-  const isEdit = !!sourceImages?.length;
-  const enhancedPrompts = isEdit ? prompts : await Promise.all(
+    config.source_images = sourceImages;
+    config.image_ref = sourceImages;
+    config.image = firstImage;
+    config.image_url = firstImage;
+    config.source_image = firstImage;
+    config.source = firstImage;
+  }
+  const hasReference = !!sourceImages?.length;
+  const enhancedPrompts = enhancePrompts ? await Promise.all(
     prompts.map(
       (brief) => enhanceImagePrompt({
         brief,
         width,
         height,
         transparentBackground,
+        hasReferenceImage: hasReference,
         onLog,
         model: imagePromptEnhancerModel
       })
     )
-  );
+  ) : prompts;
   let imageUrls;
   if (enhancedPrompts.length === 1) {
     const step = JSON.stringify({
@@ -4364,7 +4386,7 @@ async function generateImageAssets(opts) {
       if (url.startsWith("Error")) {
         return {
           prompt: prompts[i],
-          ...!isEdit && { enhancedPrompt: enhancedPrompts[i] },
+          ...enhancePrompts && { enhancedPrompt: enhancedPrompts[i] },
           error: url
         };
       }
@@ -4377,7 +4399,7 @@ async function generateImageAssets(opts) {
       return {
         url,
         prompt: prompts[i],
-        ...!isEdit && { enhancedPrompt: enhancedPrompts[i] },
+        ...enhancePrompts && { enhancedPrompt: enhancedPrompts[i] },
         analysis,
         width,
         height
@@ -4402,6 +4424,10 @@ var definition6 = {
         },
         description: "One or more image briefs describing what you want. Focus on subject, mood, style, and intended use \u2014 the tool optimizes your brief into a model-ready prompt automatically. Multiple briefs run in parallel."
       },
+      referenceImage: {
+        type: "string",
+        description: "Optional URL of a single reference image to guide the generation \u2014 for style, subject, character consistency, or composition. Your prompt still describes the desired result; the reference conditions it. Applies to every prompt in the batch."
+      },
       width: {
         type: "number",
         description: "Image width in pixels. Default 2048. Range: 2048-4096."
@@ -4424,6 +4450,8 @@ async function execute6(input, onLog, context) {
     width: input.width,
     height: input.height,
     transparentBackground: input.transparentBackground,
+    sourceImages: input.referenceImage ? [input.referenceImage] : void 0,
+    enhancePrompts: true,
     onLog,
     imageGenerationModel: resolveModel(
       "imageGeneration",
@@ -4493,6 +4521,7 @@ async function execute7(input, onLog, context) {
     width: input.width,
     height: input.height,
     transparentBackground: input.transparentBackground,
+    enhancePrompts: false,
     onLog,
     imageGenerationModel: resolveModel(
       "imageGeneration",

package/dist/index.js CHANGED Viewed

@@ -4957,7 +4957,15 @@ var init_screenshot3 = __esm({
 // src/subagents/designExpert/tools/images/enhancePrompt.ts
 async function enhanceImagePrompt(params) {
-  const { brief, width, height, transparentBackground, onLog, model } = params;
+  const {
+    brief,
+    width,
+    height,
+    transparentBackground,
+    hasReferenceImage,
+    onLog,
+    model
+  } = params;
   const contextParts = [
     `Dimensions: ${width}x${height}${width > height ? " (landscape)" : width < height ? " (portrait)" : " (square)"}`
   ];
@@ -4966,6 +4974,11 @@ async function enhanceImagePrompt(params) {
       "Transparent background: yes \u2014 the background will be removed. Focus on the subject as an isolated element."
     );
   }
+  if (hasReferenceImage) {
+    contextParts.push(
+      "Reference image: yes \u2014 a reference image is provided to the generation model alongside this prompt to guide style, subject, or composition. Complement it; don't re-describe what it already carries."
+    );
+  }
   const context = `<context>
 ${contextParts.join("\n")}
 </context>`;
@@ -5006,6 +5019,7 @@ async function generateImageAssets(opts) {
     prompts,
     sourceImages,
     transparentBackground,
+    enhancePrompts,
     onLog,
     imageGenerationModel: genModel,
     imageAnalysisModel,
@@ -5015,21 +5029,29 @@ async function generateImageAssets(opts) {
   const height = opts.height || 2048;
   const config = { width, height };
   if (sourceImages?.length) {
+    const [firstImage] = sourceImages;
     config.images = sourceImages;
-  }
-  const isEdit = !!sourceImages?.length;
-  const enhancedPrompts = isEdit ? prompts : await Promise.all(
+    config.source_images = sourceImages;
+    config.image_ref = sourceImages;
+    config.image = firstImage;
+    config.image_url = firstImage;
+    config.source_image = firstImage;
+    config.source = firstImage;
+  }
+  const hasReference = !!sourceImages?.length;
+  const enhancedPrompts = enhancePrompts ? await Promise.all(
     prompts.map(
       (brief) => enhanceImagePrompt({
         brief,
         width,
         height,
         transparentBackground,
+        hasReferenceImage: hasReference,
         onLog,
         model: imagePromptEnhancerModel
       })
     )
-  );
+  ) : prompts;
   let imageUrls;
   if (enhancedPrompts.length === 1) {
     const step = JSON.stringify({
@@ -5099,7 +5121,7 @@ async function generateImageAssets(opts) {
       if (url.startsWith("Error")) {
         return {
           prompt: prompts[i],
-          ...!isEdit && { enhancedPrompt: enhancedPrompts[i] },
+          ...enhancePrompts && { enhancedPrompt: enhancedPrompts[i] },
           error: url
         };
       }
@@ -5112,7 +5134,7 @@ async function generateImageAssets(opts) {
       return {
         url,
         prompt: prompts[i],
-        ...!isEdit && { enhancedPrompt: enhancedPrompts[i] },
+        ...enhancePrompts && { enhancedPrompt: enhancedPrompts[i] },
         analysis,
         width,
         height
@@ -5144,6 +5166,8 @@ async function execute6(input, onLog, context) {
     width: input.width,
     height: input.height,
     transparentBackground: input.transparentBackground,
+    sourceImages: input.referenceImage ? [input.referenceImage] : void 0,
+    enhancePrompts: true,
     onLog,
     imageGenerationModel: resolveModel(
       "imageGeneration",
@@ -5182,6 +5206,10 @@ var init_generateImages = __esm({
             },
             description: "One or more image briefs describing what you want. Focus on subject, mood, style, and intended use \u2014 the tool optimizes your brief into a model-ready prompt automatically. Multiple briefs run in parallel."
           },
+          referenceImage: {
+            type: "string",
+            description: "Optional URL of a single reference image to guide the generation \u2014 for style, subject, character consistency, or composition. Your prompt still describes the desired result; the reference conditions it. Applies to every prompt in the batch."
+          },
           width: {
             type: "number",
             description: "Image width in pixels. Default 2048. Range: 2048-4096."
@@ -5214,6 +5242,7 @@ async function execute7(input, onLog, context) {
     width: input.width,
     height: input.height,
     transparentBackground: input.transparentBackground,
+    enhancePrompts: false,
     onLog,
     imageGenerationModel: resolveModel(
       "imageGeneration",

package/dist/subagents/designExpert/tools/images/enhance-image-prompt.md CHANGED Viewed

@@ -15,12 +15,19 @@ Examples of good density:
 These are non-negotiable. Violating them produces bad output.
 - **No hex codes.** The model renders hex codes as visible text in the image. Describe colors by name and relationship: "deep emerald green with a smooth satin finish" or "warm sand beige fading into pale desaturated blue" — never "#7C3AED".
-- **No quoted strings.** Any single or double quoted string gets rendered as literal text in the image.
 - **No physical object framing.** Words like "artwork", "painting", "canvas", "print", "app icon", "square digital artwork" produce photorealistic mockups of a painting in a frame or an icon inset on a background. Describe the visual content directly.
-- **No text triggers.** Words like "poster", "magazine cover", "editorial spread", "sign", or brand names risk rendering literal text, mastheads, or mockup layouts. If you want an editorial photography *style*, describe the photographic qualities — not the format.
 - **Describe what you want, not what you don't want.** Negation doesn't work — "street with no cars" activates "cars." Say "empty street" instead.
 - **No body part positioning.** Don't describe specific arrangements of arms, legs, or limbs.
-- **No brand names.** Things like "Apple style" or "Nintendo style" will generate literal logos in the output.
+- **No other brands as a style shortcut.** Don't borrow another company's identity as shorthand — "Apple style", "Nintendo style" — it renders that company's literal logo. (A brand's *own* name or wordmark on its *own* asset is intended text, not this — see Text & wordmarks below.)
+## Text & wordmarks
+The model renders text well — but only the text you tell it to, so quotation marks mean "render this literally." Use them deliberately.
+- **Reproduce intended text exactly.** When the brief names a wordmark, brand name, label, sign, headline, or UI copy, carry the exact string through in quotes — e.g. a wordmark reading "Solid Credit". Never drop it, paraphrase it, or genericize it to "a wordmark": the literal text is usually the whole point of the asset, and if you omit it the model fills the space with an invented placeholder.
+- **Direct the typography and placement.** Specify weight, case, color, and position so the text lands where the designer wants it — "a near-black grotesque sans-serif wordmark reading 'Solid Credit', centered directly below the mark."
+- **Keep it short.** Wordmarks, labels, and short taglines render reliably; full sentences and paragraphs degrade into garbled glyphs. Trim long copy to the few words that matter, or leave it out.
+- **Don't summon text you don't want.** When the image should have no text, don't quote stray descriptive phrases, and avoid format words that imply copy — "poster", "magazine cover", "sign", "billboard" — which can produce spurious text or mastheads. For an editorial *style*, describe the photographic qualities, not the format.
 ## Composition
@@ -34,6 +41,7 @@ You'll receive context about the generation parameters. Use them:
 - **Dimensions**: If the image is wide (landscape), compose horizontally. If tall (portrait), compose vertically. If square, center the subject.
 - **Transparent background**: The background will be removed after generation and the image will be trimmed to the subject bounds (no extra padding). Don't describe elaborate backgrounds — focus on the subject. Describe it as an isolated element.
+- **Reference image**: When a reference image is provided, the generation model receives it alongside your prompt to guide style, subject, or composition. Write the prompt to *complement* the reference, not duplicate it: describe the scene, action, and anything new or changed, and lean on the reference for what it already establishes (a specific face, product, logo, or art style). Don't exhaustively re-describe those — over-specifying competes with the reference image and can distort it.
 ## Photography prompts

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mindstudio-ai/remy",
-  "version": "0.1.197",
+  "version": "0.1.199",
   "description": "MindStudio coding agent",
   "repository": {
     "type": "git",