@mindstudio-ai/remy 0.1.197 → 0.1.199

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/headless.js CHANGED
@@ -4232,7 +4232,15 @@ var SYSTEM_PROMPT = readAsset(
4232
4232
  "subagents/designExpert/tools/images/enhance-image-prompt.md"
4233
4233
  );
4234
4234
  async function enhanceImagePrompt(params) {
4235
- const { brief, width, height, transparentBackground, onLog, model } = params;
4235
+ const {
4236
+ brief,
4237
+ width,
4238
+ height,
4239
+ transparentBackground,
4240
+ hasReferenceImage,
4241
+ onLog,
4242
+ model
4243
+ } = params;
4236
4244
  const contextParts = [
4237
4245
  `Dimensions: ${width}x${height}${width > height ? " (landscape)" : width < height ? " (portrait)" : " (square)"}`
4238
4246
  ];
@@ -4241,6 +4249,11 @@ async function enhanceImagePrompt(params) {
4241
4249
  "Transparent background: yes \u2014 the background will be removed. Focus on the subject as an isolated element."
4242
4250
  );
4243
4251
  }
4252
+ if (hasReferenceImage) {
4253
+ contextParts.push(
4254
+ "Reference image: yes \u2014 a reference image is provided to the generation model alongside this prompt to guide style, subject, or composition. Complement it; don't re-describe what it already carries."
4255
+ );
4256
+ }
4244
4257
  const context = `<context>
4245
4258
  ${contextParts.join("\n")}
4246
4259
  </context>`;
@@ -4271,6 +4284,7 @@ async function generateImageAssets(opts) {
4271
4284
  prompts,
4272
4285
  sourceImages,
4273
4286
  transparentBackground,
4287
+ enhancePrompts,
4274
4288
  onLog,
4275
4289
  imageGenerationModel: genModel,
4276
4290
  imageAnalysisModel,
@@ -4280,21 +4294,29 @@ async function generateImageAssets(opts) {
4280
4294
  const height = opts.height || 2048;
4281
4295
  const config = { width, height };
4282
4296
  if (sourceImages?.length) {
4297
+ const [firstImage] = sourceImages;
4283
4298
  config.images = sourceImages;
4284
- }
4285
- const isEdit = !!sourceImages?.length;
4286
- const enhancedPrompts = isEdit ? prompts : await Promise.all(
4299
+ config.source_images = sourceImages;
4300
+ config.image_ref = sourceImages;
4301
+ config.image = firstImage;
4302
+ config.image_url = firstImage;
4303
+ config.source_image = firstImage;
4304
+ config.source = firstImage;
4305
+ }
4306
+ const hasReference = !!sourceImages?.length;
4307
+ const enhancedPrompts = enhancePrompts ? await Promise.all(
4287
4308
  prompts.map(
4288
4309
  (brief) => enhanceImagePrompt({
4289
4310
  brief,
4290
4311
  width,
4291
4312
  height,
4292
4313
  transparentBackground,
4314
+ hasReferenceImage: hasReference,
4293
4315
  onLog,
4294
4316
  model: imagePromptEnhancerModel
4295
4317
  })
4296
4318
  )
4297
- );
4319
+ ) : prompts;
4298
4320
  let imageUrls;
4299
4321
  if (enhancedPrompts.length === 1) {
4300
4322
  const step = JSON.stringify({
@@ -4364,7 +4386,7 @@ async function generateImageAssets(opts) {
4364
4386
  if (url.startsWith("Error")) {
4365
4387
  return {
4366
4388
  prompt: prompts[i],
4367
- ...!isEdit && { enhancedPrompt: enhancedPrompts[i] },
4389
+ ...enhancePrompts && { enhancedPrompt: enhancedPrompts[i] },
4368
4390
  error: url
4369
4391
  };
4370
4392
  }
@@ -4377,7 +4399,7 @@ async function generateImageAssets(opts) {
4377
4399
  return {
4378
4400
  url,
4379
4401
  prompt: prompts[i],
4380
- ...!isEdit && { enhancedPrompt: enhancedPrompts[i] },
4402
+ ...enhancePrompts && { enhancedPrompt: enhancedPrompts[i] },
4381
4403
  analysis,
4382
4404
  width,
4383
4405
  height
@@ -4402,6 +4424,10 @@ var definition6 = {
4402
4424
  },
4403
4425
  description: "One or more image briefs describing what you want. Focus on subject, mood, style, and intended use \u2014 the tool optimizes your brief into a model-ready prompt automatically. Multiple briefs run in parallel."
4404
4426
  },
4427
+ referenceImage: {
4428
+ type: "string",
4429
+ description: "Optional URL of a single reference image to guide the generation \u2014 for style, subject, character consistency, or composition. Your prompt still describes the desired result; the reference conditions it. Applies to every prompt in the batch."
4430
+ },
4405
4431
  width: {
4406
4432
  type: "number",
4407
4433
  description: "Image width in pixels. Default 2048. Range: 2048-4096."
@@ -4424,6 +4450,8 @@ async function execute6(input, onLog, context) {
4424
4450
  width: input.width,
4425
4451
  height: input.height,
4426
4452
  transparentBackground: input.transparentBackground,
4453
+ sourceImages: input.referenceImage ? [input.referenceImage] : void 0,
4454
+ enhancePrompts: true,
4427
4455
  onLog,
4428
4456
  imageGenerationModel: resolveModel(
4429
4457
  "imageGeneration",
@@ -4493,6 +4521,7 @@ async function execute7(input, onLog, context) {
4493
4521
  width: input.width,
4494
4522
  height: input.height,
4495
4523
  transparentBackground: input.transparentBackground,
4524
+ enhancePrompts: false,
4496
4525
  onLog,
4497
4526
  imageGenerationModel: resolveModel(
4498
4527
  "imageGeneration",
package/dist/index.js CHANGED
@@ -4957,7 +4957,15 @@ var init_screenshot3 = __esm({
4957
4957
 
4958
4958
  // src/subagents/designExpert/tools/images/enhancePrompt.ts
4959
4959
  async function enhanceImagePrompt(params) {
4960
- const { brief, width, height, transparentBackground, onLog, model } = params;
4960
+ const {
4961
+ brief,
4962
+ width,
4963
+ height,
4964
+ transparentBackground,
4965
+ hasReferenceImage,
4966
+ onLog,
4967
+ model
4968
+ } = params;
4961
4969
  const contextParts = [
4962
4970
  `Dimensions: ${width}x${height}${width > height ? " (landscape)" : width < height ? " (portrait)" : " (square)"}`
4963
4971
  ];
@@ -4966,6 +4974,11 @@ async function enhanceImagePrompt(params) {
4966
4974
  "Transparent background: yes \u2014 the background will be removed. Focus on the subject as an isolated element."
4967
4975
  );
4968
4976
  }
4977
+ if (hasReferenceImage) {
4978
+ contextParts.push(
4979
+ "Reference image: yes \u2014 a reference image is provided to the generation model alongside this prompt to guide style, subject, or composition. Complement it; don't re-describe what it already carries."
4980
+ );
4981
+ }
4969
4982
  const context = `<context>
4970
4983
  ${contextParts.join("\n")}
4971
4984
  </context>`;
@@ -5006,6 +5019,7 @@ async function generateImageAssets(opts) {
5006
5019
  prompts,
5007
5020
  sourceImages,
5008
5021
  transparentBackground,
5022
+ enhancePrompts,
5009
5023
  onLog,
5010
5024
  imageGenerationModel: genModel,
5011
5025
  imageAnalysisModel,
@@ -5015,21 +5029,29 @@ async function generateImageAssets(opts) {
5015
5029
  const height = opts.height || 2048;
5016
5030
  const config = { width, height };
5017
5031
  if (sourceImages?.length) {
5032
+ const [firstImage] = sourceImages;
5018
5033
  config.images = sourceImages;
5019
- }
5020
- const isEdit = !!sourceImages?.length;
5021
- const enhancedPrompts = isEdit ? prompts : await Promise.all(
5034
+ config.source_images = sourceImages;
5035
+ config.image_ref = sourceImages;
5036
+ config.image = firstImage;
5037
+ config.image_url = firstImage;
5038
+ config.source_image = firstImage;
5039
+ config.source = firstImage;
5040
+ }
5041
+ const hasReference = !!sourceImages?.length;
5042
+ const enhancedPrompts = enhancePrompts ? await Promise.all(
5022
5043
  prompts.map(
5023
5044
  (brief) => enhanceImagePrompt({
5024
5045
  brief,
5025
5046
  width,
5026
5047
  height,
5027
5048
  transparentBackground,
5049
+ hasReferenceImage: hasReference,
5028
5050
  onLog,
5029
5051
  model: imagePromptEnhancerModel
5030
5052
  })
5031
5053
  )
5032
- );
5054
+ ) : prompts;
5033
5055
  let imageUrls;
5034
5056
  if (enhancedPrompts.length === 1) {
5035
5057
  const step = JSON.stringify({
@@ -5099,7 +5121,7 @@ async function generateImageAssets(opts) {
5099
5121
  if (url.startsWith("Error")) {
5100
5122
  return {
5101
5123
  prompt: prompts[i],
5102
- ...!isEdit && { enhancedPrompt: enhancedPrompts[i] },
5124
+ ...enhancePrompts && { enhancedPrompt: enhancedPrompts[i] },
5103
5125
  error: url
5104
5126
  };
5105
5127
  }
@@ -5112,7 +5134,7 @@ async function generateImageAssets(opts) {
5112
5134
  return {
5113
5135
  url,
5114
5136
  prompt: prompts[i],
5115
- ...!isEdit && { enhancedPrompt: enhancedPrompts[i] },
5137
+ ...enhancePrompts && { enhancedPrompt: enhancedPrompts[i] },
5116
5138
  analysis,
5117
5139
  width,
5118
5140
  height
@@ -5144,6 +5166,8 @@ async function execute6(input, onLog, context) {
5144
5166
  width: input.width,
5145
5167
  height: input.height,
5146
5168
  transparentBackground: input.transparentBackground,
5169
+ sourceImages: input.referenceImage ? [input.referenceImage] : void 0,
5170
+ enhancePrompts: true,
5147
5171
  onLog,
5148
5172
  imageGenerationModel: resolveModel(
5149
5173
  "imageGeneration",
@@ -5182,6 +5206,10 @@ var init_generateImages = __esm({
5182
5206
  },
5183
5207
  description: "One or more image briefs describing what you want. Focus on subject, mood, style, and intended use \u2014 the tool optimizes your brief into a model-ready prompt automatically. Multiple briefs run in parallel."
5184
5208
  },
5209
+ referenceImage: {
5210
+ type: "string",
5211
+ description: "Optional URL of a single reference image to guide the generation \u2014 for style, subject, character consistency, or composition. Your prompt still describes the desired result; the reference conditions it. Applies to every prompt in the batch."
5212
+ },
5185
5213
  width: {
5186
5214
  type: "number",
5187
5215
  description: "Image width in pixels. Default 2048. Range: 2048-4096."
@@ -5214,6 +5242,7 @@ async function execute7(input, onLog, context) {
5214
5242
  width: input.width,
5215
5243
  height: input.height,
5216
5244
  transparentBackground: input.transparentBackground,
5245
+ enhancePrompts: false,
5217
5246
  onLog,
5218
5247
  imageGenerationModel: resolveModel(
5219
5248
  "imageGeneration",
@@ -15,12 +15,19 @@ Examples of good density:
15
15
  These are non-negotiable. Violating them produces bad output.
16
16
 
17
17
  - **No hex codes.** The model renders hex codes as visible text in the image. Describe colors by name and relationship: "deep emerald green with a smooth satin finish" or "warm sand beige fading into pale desaturated blue" — never "#7C3AED".
18
- - **No quoted strings.** Any single or double quoted string gets rendered as literal text in the image.
19
18
  - **No physical object framing.** Words like "artwork", "painting", "canvas", "print", "app icon", "square digital artwork" produce photorealistic mockups of a painting in a frame or an icon inset on a background. Describe the visual content directly.
20
- - **No text triggers.** Words like "poster", "magazine cover", "editorial spread", "sign", or brand names risk rendering literal text, mastheads, or mockup layouts. If you want an editorial photography *style*, describe the photographic qualities — not the format.
21
19
  - **Describe what you want, not what you don't want.** Negation doesn't work — "street with no cars" activates "cars." Say "empty street" instead.
22
20
  - **No body part positioning.** Don't describe specific arrangements of arms, legs, or limbs.
23
- - **No brand names.** Things like "Apple style" or "Nintendo style" will generate literal logos in the output.
21
+ - **No other brands as a style shortcut.** Don't borrow another company's identity as shorthand — "Apple style", "Nintendo style" it renders that company's literal logo. (A brand's *own* name or wordmark on its *own* asset is intended text, not this — see Text & wordmarks below.)
22
+
23
+ ## Text & wordmarks
24
+
25
+ The model renders text well — but only the text you tell it to, so quotation marks mean "render this literally." Use them deliberately.
26
+
27
+ - **Reproduce intended text exactly.** When the brief names a wordmark, brand name, label, sign, headline, or UI copy, carry the exact string through in quotes — e.g. a wordmark reading "Solid Credit". Never drop it, paraphrase it, or genericize it to "a wordmark": the literal text is usually the whole point of the asset, and if you omit it the model fills the space with an invented placeholder.
28
+ - **Direct the typography and placement.** Specify weight, case, color, and position so the text lands where the designer wants it — "a near-black grotesque sans-serif wordmark reading 'Solid Credit', centered directly below the mark."
29
+ - **Keep it short.** Wordmarks, labels, and short taglines render reliably; full sentences and paragraphs degrade into garbled glyphs. Trim long copy to the few words that matter, or leave it out.
30
+ - **Don't summon text you don't want.** When the image should have no text, don't quote stray descriptive phrases, and avoid format words that imply copy — "poster", "magazine cover", "sign", "billboard" — which can produce spurious text or mastheads. For an editorial *style*, describe the photographic qualities, not the format.
24
31
 
25
32
  ## Composition
26
33
 
@@ -34,6 +41,7 @@ You'll receive context about the generation parameters. Use them:
34
41
 
35
42
  - **Dimensions**: If the image is wide (landscape), compose horizontally. If tall (portrait), compose vertically. If square, center the subject.
36
43
  - **Transparent background**: The background will be removed after generation and the image will be trimmed to the subject bounds (no extra padding). Don't describe elaborate backgrounds — focus on the subject. Describe it as an isolated element.
44
+ - **Reference image**: When a reference image is provided, the generation model receives it alongside your prompt to guide style, subject, or composition. Write the prompt to *complement* the reference, not duplicate it: describe the scene, action, and anything new or changed, and lean on the reference for what it already establishes (a specific face, product, logo, or art style). Don't exhaustively re-describe those — over-specifying competes with the reference image and can distort it.
37
45
 
38
46
  ## Photography prompts
39
47
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mindstudio-ai/remy",
3
- "version": "0.1.197",
3
+ "version": "0.1.199",
4
4
  "description": "MindStudio coding agent",
5
5
  "repository": {
6
6
  "type": "git",