@mindstudio-ai/remy 0.1.42 → 0.1.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/headless.js CHANGED
@@ -2225,7 +2225,7 @@ function startStatusWatcher(config) {
2225
2225
  inflight = true;
2226
2226
  try {
2227
2227
  const ctx = getContext();
2228
- if (!ctx.assistantText && !ctx.lastToolName) {
2228
+ if (!ctx.assistantText && !ctx.lastToolName && !ctx.userMessage) {
2229
2229
  return;
2230
2230
  }
2231
2231
  const res = await fetch(url, {
@@ -2368,7 +2368,8 @@ ${partial}` : "[INTERRUPTED] Agent was interrupted before producing output.",
2368
2368
  getContext: () => ({
2369
2369
  assistantText: getPartialText(contentBlocks),
2370
2370
  lastToolName: currentToolNames || void 0,
2371
- lastToolResult: lastToolResult || void 0
2371
+ lastToolResult: lastToolResult || void 0,
2372
+ userMessage: task
2372
2373
  }),
2373
2374
  onStatus: (label) => emit2({ type: "status", message: label }),
2374
2375
  signal
@@ -2597,14 +2598,30 @@ Current date/time: ${(/* @__PURE__ */ new Date()).toISOString().replace("T", " "
2597
2598
  return wrapRun();
2598
2599
  }
2599
2600
  log4.info("Sub-agent backgrounded", { requestId, parentToolId, agentName });
2601
+ toolRegistry?.register({
2602
+ id: parentToolId,
2603
+ name: agentName,
2604
+ input: { task },
2605
+ abortController: bgAbort,
2606
+ startedAt: Date.now(),
2607
+ settle: () => {
2608
+ },
2609
+ rerun: () => {
2610
+ },
2611
+ getPartialResult: () => ""
2612
+ });
2600
2613
  const ack = await generateBackgroundAck({
2601
2614
  apiConfig,
2602
2615
  agentName: subAgentId || "agent",
2603
2616
  task
2604
2617
  });
2605
- wrapRun().then((finalResult) => onBackgroundComplete?.(finalResult)).catch(
2606
- (err) => onBackgroundComplete?.({ text: `Error: ${err.message}`, messages: [] })
2607
- );
2618
+ wrapRun().then((finalResult) => {
2619
+ toolRegistry?.unregister(parentToolId);
2620
+ onBackgroundComplete?.(finalResult);
2621
+ }).catch((err) => {
2622
+ toolRegistry?.unregister(parentToolId);
2623
+ onBackgroundComplete?.({ text: `Error: ${err.message}`, messages: [] });
2624
+ });
2608
2625
  return { text: ack, messages: [], backgrounded: true };
2609
2626
  }
2610
2627
 
@@ -3044,25 +3061,27 @@ var SYSTEM_PROMPT = readAsset(
3044
3061
  "subagents/designExpert/tools/images/enhance-image-prompt.md"
3045
3062
  );
3046
3063
  async function enhanceImagePrompt(params) {
3047
- const { brief, aspectRatio, transparentBackground, onLog } = params;
3048
- const orientation = aspectRatio === "1:1" ? "square" : ["16:9", "4:3", "3:2"].includes(aspectRatio) ? "landscape" : "portrait";
3064
+ const { brief, width, height, transparentBackground, onLog } = params;
3049
3065
  const contextParts = [
3050
- `Aspect ratio: ${aspectRatio} (${orientation})`
3066
+ `Dimensions: ${width}x${height}${width > height ? " (landscape)" : width < height ? " (portrait)" : " (square)"}`
3051
3067
  ];
3052
3068
  if (transparentBackground) {
3053
3069
  contextParts.push(
3054
3070
  "Transparent background: yes \u2014 the background will be removed. Focus on the subject as an isolated element."
3055
3071
  );
3056
3072
  }
3057
- const message = `<context>
3073
+ const context = `<context>
3058
3074
  ${contextParts.join("\n")}
3059
- </context>
3075
+ </context>`;
3076
+ const message = `${SYSTEM_PROMPT}
3077
+
3078
+ ${context}
3060
3079
 
3061
3080
  <brief>
3062
3081
  ${brief}
3063
3082
  </brief>`;
3064
3083
  const enhanced = await runCli(
3065
- `mindstudio generate-text --prompt ${JSON.stringify(SYSTEM_PROMPT)} --message ${JSON.stringify(message)} --output-key enhanced --no-meta`,
3084
+ `mindstudio generate-text --message ${JSON.stringify(message)} --output-key content --no-meta`,
3066
3085
  { timeout: 6e4, onLog }
3067
3086
  );
3068
3087
  return enhanced.trim();
@@ -3072,17 +3091,19 @@ ${brief}
3072
3091
  var ANALYZE_PROMPT = "You are reviewing this image for a visual designer sourcing assets for a project. Describe: what the image depicts, the mood and color palette, how the lighting and composition work, any text present in the image, whether there are any issues (artifacts, distortions), and how it could be used in a layout for an app or website. Be concise and practical. Respond only with your analysis as Markdown and absolutely no other text. Do not use emojis - use unicode if you need symbols.";
3073
3092
  async function generateImageAssets(opts) {
3074
3093
  const { prompts, sourceImages, transparentBackground, onLog } = opts;
3075
- const aspectRatio = opts.aspectRatio || "1:1";
3076
- const config = {
3077
- aspect_ratio: aspectRatio,
3078
- ...sourceImages?.length && { source_images: sourceImages }
3079
- };
3094
+ const width = opts.width || 2048;
3095
+ const height = opts.height || 2048;
3096
+ const config = { width, height };
3097
+ if (sourceImages?.length) {
3098
+ config.images = sourceImages;
3099
+ }
3080
3100
  const isEdit = !!sourceImages?.length;
3081
3101
  const enhancedPrompts = isEdit ? prompts : await Promise.all(
3082
3102
  prompts.map(
3083
3103
  (brief) => enhanceImagePrompt({
3084
3104
  brief,
3085
- aspectRatio,
3105
+ width,
3106
+ height,
3086
3107
  transparentBackground,
3087
3108
  onLog
3088
3109
  })
@@ -3093,7 +3114,7 @@ async function generateImageAssets(opts) {
3093
3114
  const step = JSON.stringify({
3094
3115
  prompt: enhancedPrompts[0],
3095
3116
  imageModelOverride: {
3096
- model: "gemini-3.1-flash-image",
3117
+ model: "seedream-4.5",
3097
3118
  config
3098
3119
  }
3099
3120
  });
@@ -3108,7 +3129,7 @@ async function generateImageAssets(opts) {
3108
3129
  step: {
3109
3130
  prompt,
3110
3131
  imageModelOverride: {
3111
- model: "gemini-3.1-flash-image",
3132
+ model: "seedream-4.5",
3112
3133
  config
3113
3134
  }
3114
3135
  }
@@ -3161,7 +3182,8 @@ async function generateImageAssets(opts) {
3161
3182
  prompt: prompts[i],
3162
3183
  ...!isEdit && { enhancedPrompt: enhancedPrompts[i] },
3163
3184
  analysis,
3164
- aspectRatio
3185
+ width,
3186
+ height
3165
3187
  };
3166
3188
  })
3167
3189
  );
@@ -3182,10 +3204,13 @@ var definition6 = {
3182
3204
  },
3183
3205
  description: "One or more image briefs describing what you want. Focus on subject, mood, style, and intended use \u2014 the tool optimizes your brief into a model-ready prompt automatically. Multiple briefs run in parallel."
3184
3206
  },
3185
- aspectRatio: {
3186
- type: "string",
3187
- enum: ["1:1", "16:9", "9:16", "3:4", "4:3", "2:3", "3:2"],
3188
- description: "Aspect ratio. Default 1:1."
3207
+ width: {
3208
+ type: "number",
3209
+ description: "Image width in pixels. Default 2048. Range: 2048-4096."
3210
+ },
3211
+ height: {
3212
+ type: "number",
3213
+ description: "Image height in pixels. Default 2048. Range: 2048-4096."
3189
3214
  },
3190
3215
  transparentBackground: {
3191
3216
  type: "boolean",
@@ -3198,7 +3223,8 @@ var definition6 = {
3198
3223
  async function execute6(input, onLog) {
3199
3224
  return generateImageAssets({
3200
3225
  prompts: input.prompts,
3201
- aspectRatio: input.aspectRatio,
3226
+ width: input.width,
3227
+ height: input.height,
3202
3228
  transparentBackground: input.transparentBackground,
3203
3229
  onLog
3204
3230
  });
@@ -3230,10 +3256,13 @@ var definition7 = {
3230
3256
  },
3231
3257
  description: "One or more source/reference image URLs. These are used as the basis for the edit \u2014 the AI will use them as reference for style, subject, or composition."
3232
3258
  },
3233
- aspectRatio: {
3234
- type: "string",
3235
- enum: ["1:1", "16:9", "9:16", "3:4", "4:3", "2:3", "3:2"],
3236
- description: "Output aspect ratio. Default 1:1."
3259
+ width: {
3260
+ type: "number",
3261
+ description: "Output width in pixels. Default 2048. Range: 2048-4096."
3262
+ },
3263
+ height: {
3264
+ type: "number",
3265
+ description: "Output height in pixels. Default 2048. Range: 2048-4096."
3237
3266
  },
3238
3267
  transparentBackground: {
3239
3268
  type: "boolean",
@@ -3247,7 +3276,8 @@ async function execute7(input, onLog) {
3247
3276
  return generateImageAssets({
3248
3277
  prompts: input.prompts,
3249
3278
  sourceImages: input.sourceImages,
3250
- aspectRatio: input.aspectRatio,
3279
+ width: input.width,
3280
+ height: input.height,
3251
3281
  transparentBackground: input.transparentBackground,
3252
3282
  onLog
3253
3283
  });
@@ -4728,7 +4758,9 @@ async function runTurn(params) {
4728
4758
  toolRegistry?.register(entry);
4729
4759
  run(tc.input);
4730
4760
  const r = await resultPromise;
4731
- toolRegistry?.unregister(tc.id);
4761
+ if (!tc.input.background) {
4762
+ toolRegistry?.unregister(tc.id);
4763
+ }
4732
4764
  log7.info("Tool completed", {
4733
4765
  requestId,
4734
4766
  toolCallId: tc.id,
package/dist/index.js CHANGED
@@ -2163,7 +2163,7 @@ function startStatusWatcher(config) {
2163
2163
  inflight = true;
2164
2164
  try {
2165
2165
  const ctx = getContext();
2166
- if (!ctx.assistantText && !ctx.lastToolName) {
2166
+ if (!ctx.assistantText && !ctx.lastToolName && !ctx.userMessage) {
2167
2167
  return;
2168
2168
  }
2169
2169
  const res = await fetch(url, {
@@ -2315,7 +2315,8 @@ ${partial}` : "[INTERRUPTED] Agent was interrupted before producing output.",
2315
2315
  getContext: () => ({
2316
2316
  assistantText: getPartialText(contentBlocks),
2317
2317
  lastToolName: currentToolNames || void 0,
2318
- lastToolResult: lastToolResult || void 0
2318
+ lastToolResult: lastToolResult || void 0,
2319
+ userMessage: task
2319
2320
  }),
2320
2321
  onStatus: (label) => emit2({ type: "status", message: label }),
2321
2322
  signal
@@ -2544,14 +2545,30 @@ Current date/time: ${(/* @__PURE__ */ new Date()).toISOString().replace("T", " "
2544
2545
  return wrapRun();
2545
2546
  }
2546
2547
  log3.info("Sub-agent backgrounded", { requestId, parentToolId, agentName });
2548
+ toolRegistry?.register({
2549
+ id: parentToolId,
2550
+ name: agentName,
2551
+ input: { task },
2552
+ abortController: bgAbort,
2553
+ startedAt: Date.now(),
2554
+ settle: () => {
2555
+ },
2556
+ rerun: () => {
2557
+ },
2558
+ getPartialResult: () => ""
2559
+ });
2547
2560
  const ack = await generateBackgroundAck({
2548
2561
  apiConfig,
2549
2562
  agentName: subAgentId || "agent",
2550
2563
  task
2551
2564
  });
2552
- wrapRun().then((finalResult) => onBackgroundComplete?.(finalResult)).catch(
2553
- (err) => onBackgroundComplete?.({ text: `Error: ${err.message}`, messages: [] })
2554
- );
2565
+ wrapRun().then((finalResult) => {
2566
+ toolRegistry?.unregister(parentToolId);
2567
+ onBackgroundComplete?.(finalResult);
2568
+ }).catch((err) => {
2569
+ toolRegistry?.unregister(parentToolId);
2570
+ onBackgroundComplete?.({ text: `Error: ${err.message}`, messages: [] });
2571
+ });
2555
2572
  return { text: ack, messages: [], backgrounded: true };
2556
2573
  }
2557
2574
  var log3;
@@ -3097,25 +3114,27 @@ var init_screenshot3 = __esm({
3097
3114
 
3098
3115
  // src/subagents/designExpert/tools/images/enhancePrompt.ts
3099
3116
  async function enhanceImagePrompt(params) {
3100
- const { brief, aspectRatio, transparentBackground, onLog } = params;
3101
- const orientation = aspectRatio === "1:1" ? "square" : ["16:9", "4:3", "3:2"].includes(aspectRatio) ? "landscape" : "portrait";
3117
+ const { brief, width, height, transparentBackground, onLog } = params;
3102
3118
  const contextParts = [
3103
- `Aspect ratio: ${aspectRatio} (${orientation})`
3119
+ `Dimensions: ${width}x${height}${width > height ? " (landscape)" : width < height ? " (portrait)" : " (square)"}`
3104
3120
  ];
3105
3121
  if (transparentBackground) {
3106
3122
  contextParts.push(
3107
3123
  "Transparent background: yes \u2014 the background will be removed. Focus on the subject as an isolated element."
3108
3124
  );
3109
3125
  }
3110
- const message = `<context>
3126
+ const context = `<context>
3111
3127
  ${contextParts.join("\n")}
3112
- </context>
3128
+ </context>`;
3129
+ const message = `${SYSTEM_PROMPT}
3130
+
3131
+ ${context}
3113
3132
 
3114
3133
  <brief>
3115
3134
  ${brief}
3116
3135
  </brief>`;
3117
3136
  const enhanced = await runCli(
3118
- `mindstudio generate-text --prompt ${JSON.stringify(SYSTEM_PROMPT)} --message ${JSON.stringify(message)} --output-key enhanced --no-meta`,
3137
+ `mindstudio generate-text --message ${JSON.stringify(message)} --output-key content --no-meta`,
3119
3138
  { timeout: 6e4, onLog }
3120
3139
  );
3121
3140
  return enhanced.trim();
@@ -3135,17 +3154,19 @@ var init_enhancePrompt = __esm({
3135
3154
  // src/subagents/designExpert/tools/images/imageGenerator.ts
3136
3155
  async function generateImageAssets(opts) {
3137
3156
  const { prompts, sourceImages, transparentBackground, onLog } = opts;
3138
- const aspectRatio = opts.aspectRatio || "1:1";
3139
- const config = {
3140
- aspect_ratio: aspectRatio,
3141
- ...sourceImages?.length && { source_images: sourceImages }
3142
- };
3157
+ const width = opts.width || 2048;
3158
+ const height = opts.height || 2048;
3159
+ const config = { width, height };
3160
+ if (sourceImages?.length) {
3161
+ config.images = sourceImages;
3162
+ }
3143
3163
  const isEdit = !!sourceImages?.length;
3144
3164
  const enhancedPrompts = isEdit ? prompts : await Promise.all(
3145
3165
  prompts.map(
3146
3166
  (brief) => enhanceImagePrompt({
3147
3167
  brief,
3148
- aspectRatio,
3168
+ width,
3169
+ height,
3149
3170
  transparentBackground,
3150
3171
  onLog
3151
3172
  })
@@ -3156,7 +3177,7 @@ async function generateImageAssets(opts) {
3156
3177
  const step = JSON.stringify({
3157
3178
  prompt: enhancedPrompts[0],
3158
3179
  imageModelOverride: {
3159
- model: "gemini-3.1-flash-image",
3180
+ model: "seedream-4.5",
3160
3181
  config
3161
3182
  }
3162
3183
  });
@@ -3171,7 +3192,7 @@ async function generateImageAssets(opts) {
3171
3192
  step: {
3172
3193
  prompt,
3173
3194
  imageModelOverride: {
3174
- model: "gemini-3.1-flash-image",
3195
+ model: "seedream-4.5",
3175
3196
  config
3176
3197
  }
3177
3198
  }
@@ -3224,7 +3245,8 @@ async function generateImageAssets(opts) {
3224
3245
  prompt: prompts[i],
3225
3246
  ...!isEdit && { enhancedPrompt: enhancedPrompts[i] },
3226
3247
  analysis,
3227
- aspectRatio
3248
+ width,
3249
+ height
3228
3250
  };
3229
3251
  })
3230
3252
  );
@@ -3250,7 +3272,8 @@ __export(generateImages_exports, {
3250
3272
  async function execute6(input, onLog) {
3251
3273
  return generateImageAssets({
3252
3274
  prompts: input.prompts,
3253
- aspectRatio: input.aspectRatio,
3275
+ width: input.width,
3276
+ height: input.height,
3254
3277
  transparentBackground: input.transparentBackground,
3255
3278
  onLog
3256
3279
  });
@@ -3273,10 +3296,13 @@ var init_generateImages = __esm({
3273
3296
  },
3274
3297
  description: "One or more image briefs describing what you want. Focus on subject, mood, style, and intended use \u2014 the tool optimizes your brief into a model-ready prompt automatically. Multiple briefs run in parallel."
3275
3298
  },
3276
- aspectRatio: {
3277
- type: "string",
3278
- enum: ["1:1", "16:9", "9:16", "3:4", "4:3", "2:3", "3:2"],
3279
- description: "Aspect ratio. Default 1:1."
3299
+ width: {
3300
+ type: "number",
3301
+ description: "Image width in pixels. Default 2048. Range: 2048-4096."
3302
+ },
3303
+ height: {
3304
+ type: "number",
3305
+ description: "Image height in pixels. Default 2048. Range: 2048-4096."
3280
3306
  },
3281
3307
  transparentBackground: {
3282
3308
  type: "boolean",
@@ -3299,7 +3325,8 @@ async function execute7(input, onLog) {
3299
3325
  return generateImageAssets({
3300
3326
  prompts: input.prompts,
3301
3327
  sourceImages: input.sourceImages,
3302
- aspectRatio: input.aspectRatio,
3328
+ width: input.width,
3329
+ height: input.height,
3303
3330
  transparentBackground: input.transparentBackground,
3304
3331
  onLog
3305
3332
  });
@@ -3329,10 +3356,13 @@ var init_editImages = __esm({
3329
3356
  },
3330
3357
  description: "One or more source/reference image URLs. These are used as the basis for the edit \u2014 the AI will use them as reference for style, subject, or composition."
3331
3358
  },
3332
- aspectRatio: {
3333
- type: "string",
3334
- enum: ["1:1", "16:9", "9:16", "3:4", "4:3", "2:3", "3:2"],
3335
- description: "Output aspect ratio. Default 1:1."
3359
+ width: {
3360
+ type: "number",
3361
+ description: "Output width in pixels. Default 2048. Range: 2048-4096."
3362
+ },
3363
+ height: {
3364
+ type: "number",
3365
+ description: "Output height in pixels. Default 2048. Range: 2048-4096."
3336
3366
  },
3337
3367
  transparentBackground: {
3338
3368
  type: "boolean",
@@ -4970,7 +5000,9 @@ async function runTurn(params) {
4970
5000
  toolRegistry?.register(entry);
4971
5001
  run(tc.input);
4972
5002
  const r = await resultPromise;
4973
- toolRegistry?.unregister(tc.id);
5003
+ if (!tc.input.background) {
5004
+ toolRegistry?.unregister(tc.id);
5005
+ }
4974
5006
  log6.info("Tool completed", {
4975
5007
  requestId,
4976
5008
  toolCallId: tc.id,
@@ -41,7 +41,6 @@ result.$billingCost; // cost in credits (if applicable)
41
41
  | `generateMusic` | Music from text description | `prompt` | `audioUrl` |
42
42
  | `generateLipsync` | Animate face to match audio | `imageUrl`, `audioUrl` | `videoUrl` |
43
43
  | `generateAsset` | HTML/PDF/PNG/video output | `prompt` | `assetUrl` |
44
- | `generateChart` | Chart from data | `data`, `chartType` | `imageUrl` |
45
44
 
46
45
  ### AI Analysis
47
46
 
@@ -133,6 +132,8 @@ const { content } = await agent.generateText({
133
132
  });
134
133
  ```
135
134
 
135
+ Make sure to prioritize new, popular models. MindStudio has a ton of models available - most of them are historical/legacy. Always choose latest generation models from leading providers - Anthropic Claude 4 family, Google Gemini 3, OpenAI GPT 5 to start - the user can adjust later.
136
+
136
137
  ### Batch Execution
137
138
 
138
139
  Run up to 50 actions in parallel:
@@ -144,3 +145,15 @@ const result = await agent.executeStepBatch([
144
145
  ]);
145
146
  // result.results[0].output, result.results[1].output
146
147
  ```
148
+
149
+ ### AI Generation
150
+
151
+ MindStudio SDK allows us to build all kinds of amazing AI experiences in apps, including:
152
+ - Text generation
153
+ - Image generation (including images with text - AI has gotten good at that now)
154
+ - Image "remixing" (e.g., user uploads an image, use it as the source image to an image generation model to remix it, or add multiple to generate a collage, etc)
155
+ - Video generation (including generating video from reference images, start frame images, with audio including voice, etc)
156
+ - TTS/audio generation
157
+ - Detailed image and video analysis
158
+
159
+ Consider the ways in which AI can be incorporated into backend methods to solve problems and be creative.
@@ -59,6 +59,8 @@ When you receive background results:
59
59
 
60
60
  #### When to Background
61
61
 
62
- Only background the following tasks, unless the user specifically asks you to do work in the background:
62
+ You can only background the following two tasks, unless the user specifically asks you to do work in the background:
63
63
  - `productVision` seeding the intiial roadmap after writing the spec for the first time. This task takes a while and we can allow the user to continue building while it happens in the background
64
64
  - After writing the spec, once you have finalized the shape of the app, ask `visualDesignExpert` to create an "iphone app store" style icon for the app, then set it with `setProjectMetadata({ iconUrl: ... })`
65
+
66
+ Do not background any other tasks.
@@ -32,7 +32,7 @@ These are non-negotiable. Violating them produces bad output.
32
32
 
33
33
  You'll receive context about the generation parameters. Use them:
34
34
 
35
- - **Aspect ratio**: If the image is landscape (16:9, 4:3, 3:2), compose horizontally. If portrait (9:16, 3:4, 2:3), compose vertically. If square (1:1), center the subject.
35
+ - **Dimensions**: If the image is wide (landscape), compose horizontally. If tall (portrait), compose vertically. If square, center the subject.
36
36
  - **Transparent background**: The background will be removed after generation. Don't describe elaborate backgrounds — focus on the subject. Describe it as an isolated element.
37
37
 
38
38
  ## Photography prompts
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mindstudio-ai/remy",
3
- "version": "0.1.42",
3
+ "version": "0.1.44",
4
4
  "description": "MindStudio coding agent",
5
5
  "repository": {
6
6
  "type": "git",