npm - @mindstudio-ai/remy - Versions diffs - 0.1.41 → 0.1.43 - Mend

@mindstudio-ai/remy 0.1.41 → 0.1.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/headless.js +63 -38
package/dist/index.js +76 -42
package/dist/prompt/compiled/sdk-actions.md +14 -1
package/dist/subagents/codeSanityCheck/prompt.md +2 -0
package/dist/subagents/designExpert/tools/images/enhance-image-prompt.md +1 -1
package/package.json +1 -1

package/dist/headless.js CHANGED Viewed

@@ -2141,6 +2141,17 @@ var runMethodTool = {
   }
 };
+// src/subagents/common/analyzeImage.ts
+var VISION_MODEL = "gemini-3-flash";
+var VISION_MODEL_OVERRIDE = JSON.stringify({ model: VISION_MODEL });
+async function analyzeImage(params) {
+  const { prompt, imageUrl, timeout = 2e5, onLog } = params;
+  return runCli(
+    `mindstudio analyze-image --prompt ${JSON.stringify(prompt)} --image-url ${JSON.stringify(imageUrl)} --vision-model-override ${JSON.stringify(VISION_MODEL_OVERRIDE)} --output-key analysis --no-meta`,
+    { timeout, onLog }
+  );
+}
 // src/tools/_helpers/screenshot.ts
 var SCREENSHOT_ANALYSIS_PROMPT = "Describe everything visible on screen from top to bottom \u2014 every element, its position, its size relative to the viewport, its colors, its content. Be comprehensive, thorough, and spatial. After the inventory, note anything that looks visually broken (overlapping elements, clipped text, misaligned components). Respond only with your analysis as Markdown and absolutely no other text. Do not use emojis - use unicode if you need symbols.";
 async function captureAndAnalyzeScreenshot(promptOrOptions) {
@@ -2165,10 +2176,11 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
     return url;
   }
   const analysisPrompt = prompt || SCREENSHOT_ANALYSIS_PROMPT;
-  const analysis = await runCli(
-    `mindstudio analyze-image --prompt ${JSON.stringify(analysisPrompt)} --image-url ${JSON.stringify(url)} --output-key analysis --no-meta`,
-    { timeout: 2e5, onLog }
-  );
+  const analysis = await analyzeImage({
+    prompt: analysisPrompt,
+    imageUrl: url,
+    onLog
+  });
   return JSON.stringify({ url, analysis });
 }
@@ -2946,10 +2958,11 @@ async function execute3(input, onLog) {
     }
     imageUrl = ssUrl;
   }
-  const analysis = await runCli(
-    `mindstudio analyze-image --prompt ${JSON.stringify(analysisPrompt)} --image-url ${JSON.stringify(imageUrl)} --output-key analysis --no-meta`,
-    { timeout: 2e5, onLog }
-  );
+  const analysis = await analyzeImage({
+    prompt: analysisPrompt,
+    imageUrl,
+    onLog
+  });
   return JSON.stringify({ url: imageUrl, analysis });
 }
@@ -2981,10 +2994,11 @@ var definition4 = {
 async function execute4(input, onLog) {
   const imageUrl = input.imageUrl;
   const prompt = input.prompt || DEFAULT_PROMPT;
-  const analysis = await runCli(
-    `mindstudio analyze-image --prompt ${JSON.stringify(prompt)} --image-url ${JSON.stringify(imageUrl)} --output-key analysis --no-meta`,
-    { timeout: 2e5, onLog }
-  );
+  const analysis = await analyzeImage({
+    prompt,
+    imageUrl,
+    onLog
+  });
   return JSON.stringify({ url: imageUrl, analysis });
 }
@@ -3030,10 +3044,9 @@ var SYSTEM_PROMPT = readAsset(
   "subagents/designExpert/tools/images/enhance-image-prompt.md"
 );
 async function enhanceImagePrompt(params) {
-  const { brief, aspectRatio, transparentBackground, onLog } = params;
-  const orientation = aspectRatio === "1:1" ? "square" : ["16:9", "4:3", "3:2"].includes(aspectRatio) ? "landscape" : "portrait";
+  const { brief, width, height, transparentBackground, onLog } = params;
   const contextParts = [
-    `Aspect ratio: ${aspectRatio} (${orientation})`
+    `Dimensions: ${width}x${height}${width > height ? " (landscape)" : width < height ? " (portrait)" : " (square)"}`
   ];
   if (transparentBackground) {
     contextParts.push(
@@ -3058,17 +3071,19 @@ ${brief}
 var ANALYZE_PROMPT = "You are reviewing this image for a visual designer sourcing assets for a project. Describe: what the image depicts, the mood and color palette, how the lighting and composition work, any text present in the image, whether there are any issues (artifacts, distortions), and how it could be used in a layout for an app or website. Be concise and practical. Respond only with your analysis as Markdown and absolutely no other text. Do not use emojis - use unicode if you need symbols.";
 async function generateImageAssets(opts) {
   const { prompts, sourceImages, transparentBackground, onLog } = opts;
-  const aspectRatio = opts.aspectRatio || "1:1";
-  const config = {
-    aspect_ratio: aspectRatio,
-    ...sourceImages?.length && { source_images: sourceImages }
-  };
+  const width = opts.width || 2048;
+  const height = opts.height || 2048;
+  const config = { width, height };
+  if (sourceImages?.length) {
+    config.images = sourceImages;
+  }
   const isEdit = !!sourceImages?.length;
   const enhancedPrompts = isEdit ? prompts : await Promise.all(
     prompts.map(
       (brief) => enhanceImagePrompt({
         brief,
-        aspectRatio,
+        width,
+        height,
         transparentBackground,
         onLog
       })
@@ -3079,7 +3094,7 @@ async function generateImageAssets(opts) {
     const step = JSON.stringify({
       prompt: enhancedPrompts[0],
       imageModelOverride: {
-        model: "gemini-3.1-flash-image",
+        model: "seedream-4.5",
         config
       }
     });
@@ -3094,7 +3109,7 @@ async function generateImageAssets(opts) {
       step: {
         prompt,
         imageModelOverride: {
-          model: "gemini-3.1-flash-image",
+          model: "seedream-4.5",
           config
         }
       }
@@ -3137,16 +3152,18 @@ async function generateImageAssets(opts) {
           error: url
         };
       }
-      const analysis = await runCli(
-        `mindstudio analyze-image --prompt ${JSON.stringify(ANALYZE_PROMPT)} --image-url ${JSON.stringify(url)} --output-key analysis --no-meta`,
-        { timeout: 2e5, onLog }
-      );
+      const analysis = await analyzeImage({
+        prompt: ANALYZE_PROMPT,
+        imageUrl: url,
+        onLog
+      });
       return {
         url,
         prompt: prompts[i],
         ...!isEdit && { enhancedPrompt: enhancedPrompts[i] },
         analysis,
-        aspectRatio
+        width,
+        height
       };
     })
   );
@@ -3167,10 +3184,13 @@ var definition6 = {
         },
         description: "One or more image briefs describing what you want. Focus on subject, mood, style, and intended use \u2014 the tool optimizes your brief into a model-ready prompt automatically. Multiple briefs run in parallel."
       },
-      aspectRatio: {
-        type: "string",
-        enum: ["1:1", "16:9", "9:16", "3:4", "4:3", "2:3", "3:2"],
-        description: "Aspect ratio. Default 1:1."
+      width: {
+        type: "number",
+        description: "Image width in pixels. Default 2048. Range: 2048-4096."
+      },
+      height: {
+        type: "number",
+        description: "Image height in pixels. Default 2048. Range: 2048-4096."
       },
       transparentBackground: {
         type: "boolean",
@@ -3183,7 +3203,8 @@ var definition6 = {
 async function execute6(input, onLog) {
   return generateImageAssets({
     prompts: input.prompts,
-    aspectRatio: input.aspectRatio,
+    width: input.width,
+    height: input.height,
     transparentBackground: input.transparentBackground,
     onLog
   });
@@ -3215,10 +3236,13 @@ var definition7 = {
         },
         description: "One or more source/reference image URLs. These are used as the basis for the edit \u2014 the AI will use them as reference for style, subject, or composition."
       },
-      aspectRatio: {
-        type: "string",
-        enum: ["1:1", "16:9", "9:16", "3:4", "4:3", "2:3", "3:2"],
-        description: "Output aspect ratio. Default 1:1."
+      width: {
+        type: "number",
+        description: "Output width in pixels. Default 2048. Range: 2048-4096."
+      },
+      height: {
+        type: "number",
+        description: "Output height in pixels. Default 2048. Range: 2048-4096."
       },
       transparentBackground: {
         type: "boolean",
@@ -3232,7 +3256,8 @@ async function execute7(input, onLog) {
   return generateImageAssets({
     prompts: input.prompts,
     sourceImages: input.sourceImages,
-    aspectRatio: input.aspectRatio,
+    width: input.width,
+    height: input.height,
     transparentBackground: input.transparentBackground,
     onLog
   });

package/dist/index.js CHANGED Viewed

@@ -2057,6 +2057,24 @@ var init_runMethod = __esm({
   }
 });
+// src/subagents/common/analyzeImage.ts
+async function analyzeImage(params) {
+  const { prompt, imageUrl, timeout = 2e5, onLog } = params;
+  return runCli(
+    `mindstudio analyze-image --prompt ${JSON.stringify(prompt)} --image-url ${JSON.stringify(imageUrl)} --vision-model-override ${JSON.stringify(VISION_MODEL_OVERRIDE)} --output-key analysis --no-meta`,
+    { timeout, onLog }
+  );
+}
+var VISION_MODEL, VISION_MODEL_OVERRIDE;
+var init_analyzeImage = __esm({
+  "src/subagents/common/analyzeImage.ts"() {
+    "use strict";
+    init_runCli();
+    VISION_MODEL = "gemini-3-flash";
+    VISION_MODEL_OVERRIDE = JSON.stringify({ model: VISION_MODEL });
+  }
+});
 // src/tools/_helpers/screenshot.ts
 async function captureAndAnalyzeScreenshot(promptOrOptions) {
   let prompt;
@@ -2080,10 +2098,11 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
     return url;
   }
   const analysisPrompt = prompt || SCREENSHOT_ANALYSIS_PROMPT;
-  const analysis = await runCli(
-    `mindstudio analyze-image --prompt ${JSON.stringify(analysisPrompt)} --image-url ${JSON.stringify(url)} --output-key analysis --no-meta`,
-    { timeout: 2e5, onLog }
-  );
+  const analysis = await analyzeImage({
+    prompt: analysisPrompt,
+    imageUrl: url,
+    onLog
+  });
   return JSON.stringify({ url, analysis });
 }
 var SCREENSHOT_ANALYSIS_PROMPT;
@@ -2091,7 +2110,7 @@ var init_screenshot = __esm({
   "src/tools/_helpers/screenshot.ts"() {
     "use strict";
     init_sidecar();
-    init_runCli();
+    init_analyzeImage();
     SCREENSHOT_ANALYSIS_PROMPT = "Describe everything visible on screen from top to bottom \u2014 every element, its position, its size relative to the viewport, its colors, its content. Be comprehensive, thorough, and spatial. After the inventory, note anything that looks visually broken (overlapping elements, clipped text, misaligned components). Respond only with your analysis as Markdown and absolutely no other text. Do not use emojis - use unicode if you need symbols.";
   }
 });
@@ -2939,10 +2958,11 @@ async function execute3(input, onLog) {
     }
     imageUrl = ssUrl;
   }
-  const analysis = await runCli(
-    `mindstudio analyze-image --prompt ${JSON.stringify(analysisPrompt)} --image-url ${JSON.stringify(imageUrl)} --output-key analysis --no-meta`,
-    { timeout: 2e5, onLog }
-  );
+  const analysis = await analyzeImage({
+    prompt: analysisPrompt,
+    imageUrl,
+    onLog
+  });
   return JSON.stringify({ url: imageUrl, analysis });
 }
 var DESIGN_REFERENCE_PROMPT, definition3;
@@ -2950,6 +2970,7 @@ var init_analyzeDesign = __esm({
   "src/subagents/designExpert/tools/analyzeDesign.ts"() {
     "use strict";
     init_runCli();
+    init_analyzeImage();
     DESIGN_REFERENCE_PROMPT = `
 You are analyzing a screenshot of a real website or app for a designer's personal technique/inspiration reference notes.
@@ -3003,17 +3024,18 @@ __export(analyzeImage_exports, {
 async function execute4(input, onLog) {
   const imageUrl = input.imageUrl;
   const prompt = input.prompt || DEFAULT_PROMPT;
-  const analysis = await runCli(
-    `mindstudio analyze-image --prompt ${JSON.stringify(prompt)} --image-url ${JSON.stringify(imageUrl)} --output-key analysis --no-meta`,
-    { timeout: 2e5, onLog }
-  );
+  const analysis = await analyzeImage({
+    prompt,
+    imageUrl,
+    onLog
+  });
   return JSON.stringify({ url: imageUrl, analysis });
 }
 var DEFAULT_PROMPT, definition4;
-var init_analyzeImage = __esm({
+var init_analyzeImage2 = __esm({
   "src/subagents/designExpert/tools/analyzeImage.ts"() {
     "use strict";
-    init_runCli();
+    init_analyzeImage();
     DEFAULT_PROMPT = "Describe everything visible in this image \u2014 every element, its position, its size relative to the frame, its colors, its content. Be comprhensive, thorough and spatial. After the inventory, note anything that looks visually broken (overlapping elements, clipped text, misaligned components). Respond only with your analysis as Markdown and absolutely no other text. Do not use emojis - use unicode if you need symbols.";
     definition4 = {
       name: "analyzeImage",
@@ -3075,10 +3097,9 @@ var init_screenshot3 = __esm({
 // src/subagents/designExpert/tools/images/enhancePrompt.ts
 async function enhanceImagePrompt(params) {
-  const { brief, aspectRatio, transparentBackground, onLog } = params;
-  const orientation = aspectRatio === "1:1" ? "square" : ["16:9", "4:3", "3:2"].includes(aspectRatio) ? "landscape" : "portrait";
+  const { brief, width, height, transparentBackground, onLog } = params;
   const contextParts = [
-    `Aspect ratio: ${aspectRatio} (${orientation})`
+    `Dimensions: ${width}x${height}${width > height ? " (landscape)" : width < height ? " (portrait)" : " (square)"}`
   ];
   if (transparentBackground) {
     contextParts.push(
@@ -3113,17 +3134,19 @@ var init_enhancePrompt = __esm({
 // src/subagents/designExpert/tools/images/imageGenerator.ts
 async function generateImageAssets(opts) {
   const { prompts, sourceImages, transparentBackground, onLog } = opts;
-  const aspectRatio = opts.aspectRatio || "1:1";
-  const config = {
-    aspect_ratio: aspectRatio,
-    ...sourceImages?.length && { source_images: sourceImages }
-  };
+  const width = opts.width || 2048;
+  const height = opts.height || 2048;
+  const config = { width, height };
+  if (sourceImages?.length) {
+    config.images = sourceImages;
+  }
   const isEdit = !!sourceImages?.length;
   const enhancedPrompts = isEdit ? prompts : await Promise.all(
     prompts.map(
       (brief) => enhanceImagePrompt({
         brief,
-        aspectRatio,
+        width,
+        height,
         transparentBackground,
         onLog
       })
@@ -3134,7 +3157,7 @@ async function generateImageAssets(opts) {
     const step = JSON.stringify({
       prompt: enhancedPrompts[0],
       imageModelOverride: {
-        model: "gemini-3.1-flash-image",
+        model: "seedream-4.5",
         config
       }
     });
@@ -3149,7 +3172,7 @@ async function generateImageAssets(opts) {
       step: {
         prompt,
         imageModelOverride: {
-          model: "gemini-3.1-flash-image",
+          model: "seedream-4.5",
           config
         }
       }
@@ -3192,16 +3215,18 @@ async function generateImageAssets(opts) {
           error: url
         };
       }
-      const analysis = await runCli(
-        `mindstudio analyze-image --prompt ${JSON.stringify(ANALYZE_PROMPT)} --image-url ${JSON.stringify(url)} --output-key analysis --no-meta`,
-        { timeout: 2e5, onLog }
-      );
+      const analysis = await analyzeImage({
+        prompt: ANALYZE_PROMPT,
+        imageUrl: url,
+        onLog
+      });
       return {
         url,
         prompt: prompts[i],
         ...!isEdit && { enhancedPrompt: enhancedPrompts[i] },
         analysis,
-        aspectRatio
+        width,
+        height
       };
     })
   );
@@ -3212,6 +3237,7 @@ var init_imageGenerator = __esm({
   "src/subagents/designExpert/tools/images/imageGenerator.ts"() {
     "use strict";
     init_runCli();
+    init_analyzeImage();
     init_enhancePrompt();
     ANALYZE_PROMPT = "You are reviewing this image for a visual designer sourcing assets for a project. Describe: what the image depicts, the mood and color palette, how the lighting and composition work, any text present in the image, whether there are any issues (artifacts, distortions), and how it could be used in a layout for an app or website. Be concise and practical. Respond only with your analysis as Markdown and absolutely no other text. Do not use emojis - use unicode if you need symbols.";
   }
@@ -3226,7 +3252,8 @@ __export(generateImages_exports, {
 async function execute6(input, onLog) {
   return generateImageAssets({
     prompts: input.prompts,
-    aspectRatio: input.aspectRatio,
+    width: input.width,
+    height: input.height,
     transparentBackground: input.transparentBackground,
     onLog
   });
@@ -3249,10 +3276,13 @@ var init_generateImages = __esm({
             },
             description: "One or more image briefs describing what you want. Focus on subject, mood, style, and intended use \u2014 the tool optimizes your brief into a model-ready prompt automatically. Multiple briefs run in parallel."
           },
-          aspectRatio: {
-            type: "string",
-            enum: ["1:1", "16:9", "9:16", "3:4", "4:3", "2:3", "3:2"],
-            description: "Aspect ratio. Default 1:1."
+          width: {
+            type: "number",
+            description: "Image width in pixels. Default 2048. Range: 2048-4096."
+          },
+          height: {
+            type: "number",
+            description: "Image height in pixels. Default 2048. Range: 2048-4096."
           },
           transparentBackground: {
             type: "boolean",
@@ -3275,7 +3305,8 @@ async function execute7(input, onLog) {
   return generateImageAssets({
     prompts: input.prompts,
     sourceImages: input.sourceImages,
-    aspectRatio: input.aspectRatio,
+    width: input.width,
+    height: input.height,
     transparentBackground: input.transparentBackground,
     onLog
   });
@@ -3305,10 +3336,13 @@ var init_editImages = __esm({
             },
             description: "One or more source/reference image URLs. These are used as the basis for the edit \u2014 the AI will use them as reference for style, subject, or composition."
           },
-          aspectRatio: {
-            type: "string",
-            enum: ["1:1", "16:9", "9:16", "3:4", "4:3", "2:3", "3:2"],
-            description: "Output aspect ratio. Default 1:1."
+          width: {
+            type: "number",
+            description: "Output width in pixels. Default 2048. Range: 2048-4096."
+          },
+          height: {
+            type: "number",
+            description: "Output height in pixels. Default 2048. Range: 2048-4096."
           },
           transparentBackground: {
             type: "boolean",
@@ -3336,7 +3370,7 @@ var init_tools2 = __esm({
     init_searchGoogle2();
     init_fetchUrl2();
     init_analyzeDesign();
-    init_analyzeImage();
+    init_analyzeImage2();
     init_screenshot3();
     init_generateImages();
     init_editImages();

package/dist/prompt/compiled/sdk-actions.md CHANGED Viewed

@@ -41,7 +41,6 @@ result.$billingCost;         // cost in credits (if applicable)
 | `generateMusic` | Music from text description | `prompt` | `audioUrl` |
 | `generateLipsync` | Animate face to match audio | `imageUrl`, `audioUrl` | `videoUrl` |
 | `generateAsset` | HTML/PDF/PNG/video output | `prompt` | `assetUrl` |
-| `generateChart` | Chart from data | `data`, `chartType` | `imageUrl` |
 ### AI Analysis
@@ -133,6 +132,8 @@ const { content } = await agent.generateText({
 });
 ```
+Make sure to prioritize new, popular models. MindStudio has a ton of models available - most of them are historical/legacy. Always choose latest generation models from leading providers - Anthropic Claude 4 family, Google Gemini 3, OpenAI GPT 5 to start - the user can adjust later.
 ### Batch Execution
 Run up to 50 actions in parallel:
@@ -144,3 +145,15 @@ const result = await agent.executeStepBatch([
 ]);
 // result.results[0].output, result.results[1].output
 ```
+### AI Generation
+MindStudio SDK allows us to build all kinds of amazing AI experiences in apps, including:
+  - Text generation
+  - Image generation (including images with text - AI has gotten good at that now)
+  - Image "remixing" (e.g., user uploads an image, use it as the source image to an image generation model to remix it, or add multiple to generate a collage, etc)
+  - Video generation (including generating video from reference images, start frame images, with audio including voice, etc)
+  - TTS/audio generation
+  - Detailed image and video analysis
+Consider the ways in which AI can be incorporated into backend methods to solve problems and be creative.

package/dist/subagents/codeSanityCheck/prompt.md CHANGED Viewed

@@ -22,6 +22,8 @@ These are things we already know about and have decided to accept:
   - swr
   - framer-motion
   - styled-components
+- Preferences:
+  - use [wouter](https://github.com/molefrog/wouter) for React routing instead of reaching for react-router
 ### Common pitfalls (always flag these)

package/dist/subagents/designExpert/tools/images/enhance-image-prompt.md CHANGED Viewed

@@ -32,7 +32,7 @@ These are non-negotiable. Violating them produces bad output.
 You'll receive context about the generation parameters. Use them:
-- **Aspect ratio**: If the image is landscape (16:9, 4:3, 3:2), compose horizontally. If portrait (9:16, 3:4, 2:3), compose vertically. If square (1:1), center the subject.
+- **Dimensions**: If the image is wide (landscape), compose horizontally. If tall (portrait), compose vertically. If square, center the subject.
 - **Transparent background**: The background will be removed after generation. Don't describe elaborate backgrounds — focus on the subject. Describe it as an isolated element.
 ## Photography prompts

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mindstudio-ai/remy",
-  "version": "0.1.41",
+  "version": "0.1.43",
   "description": "MindStudio coding agent",
   "repository": {
     "type": "git",