@mindstudio-ai/remy 0.1.197 → 0.1.199
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/headless.js
CHANGED
|
@@ -4232,7 +4232,15 @@ var SYSTEM_PROMPT = readAsset(
|
|
|
4232
4232
|
"subagents/designExpert/tools/images/enhance-image-prompt.md"
|
|
4233
4233
|
);
|
|
4234
4234
|
async function enhanceImagePrompt(params) {
|
|
4235
|
-
const {
|
|
4235
|
+
const {
|
|
4236
|
+
brief,
|
|
4237
|
+
width,
|
|
4238
|
+
height,
|
|
4239
|
+
transparentBackground,
|
|
4240
|
+
hasReferenceImage,
|
|
4241
|
+
onLog,
|
|
4242
|
+
model
|
|
4243
|
+
} = params;
|
|
4236
4244
|
const contextParts = [
|
|
4237
4245
|
`Dimensions: ${width}x${height}${width > height ? " (landscape)" : width < height ? " (portrait)" : " (square)"}`
|
|
4238
4246
|
];
|
|
@@ -4241,6 +4249,11 @@ async function enhanceImagePrompt(params) {
|
|
|
4241
4249
|
"Transparent background: yes \u2014 the background will be removed. Focus on the subject as an isolated element."
|
|
4242
4250
|
);
|
|
4243
4251
|
}
|
|
4252
|
+
if (hasReferenceImage) {
|
|
4253
|
+
contextParts.push(
|
|
4254
|
+
"Reference image: yes \u2014 a reference image is provided to the generation model alongside this prompt to guide style, subject, or composition. Complement it; don't re-describe what it already carries."
|
|
4255
|
+
);
|
|
4256
|
+
}
|
|
4244
4257
|
const context = `<context>
|
|
4245
4258
|
${contextParts.join("\n")}
|
|
4246
4259
|
</context>`;
|
|
@@ -4271,6 +4284,7 @@ async function generateImageAssets(opts) {
|
|
|
4271
4284
|
prompts,
|
|
4272
4285
|
sourceImages,
|
|
4273
4286
|
transparentBackground,
|
|
4287
|
+
enhancePrompts,
|
|
4274
4288
|
onLog,
|
|
4275
4289
|
imageGenerationModel: genModel,
|
|
4276
4290
|
imageAnalysisModel,
|
|
@@ -4280,21 +4294,29 @@ async function generateImageAssets(opts) {
|
|
|
4280
4294
|
const height = opts.height || 2048;
|
|
4281
4295
|
const config = { width, height };
|
|
4282
4296
|
if (sourceImages?.length) {
|
|
4297
|
+
const [firstImage] = sourceImages;
|
|
4283
4298
|
config.images = sourceImages;
|
|
4284
|
-
|
|
4285
|
-
|
|
4286
|
-
|
|
4299
|
+
config.source_images = sourceImages;
|
|
4300
|
+
config.image_ref = sourceImages;
|
|
4301
|
+
config.image = firstImage;
|
|
4302
|
+
config.image_url = firstImage;
|
|
4303
|
+
config.source_image = firstImage;
|
|
4304
|
+
config.source = firstImage;
|
|
4305
|
+
}
|
|
4306
|
+
const hasReference = !!sourceImages?.length;
|
|
4307
|
+
const enhancedPrompts = enhancePrompts ? await Promise.all(
|
|
4287
4308
|
prompts.map(
|
|
4288
4309
|
(brief) => enhanceImagePrompt({
|
|
4289
4310
|
brief,
|
|
4290
4311
|
width,
|
|
4291
4312
|
height,
|
|
4292
4313
|
transparentBackground,
|
|
4314
|
+
hasReferenceImage: hasReference,
|
|
4293
4315
|
onLog,
|
|
4294
4316
|
model: imagePromptEnhancerModel
|
|
4295
4317
|
})
|
|
4296
4318
|
)
|
|
4297
|
-
);
|
|
4319
|
+
) : prompts;
|
|
4298
4320
|
let imageUrls;
|
|
4299
4321
|
if (enhancedPrompts.length === 1) {
|
|
4300
4322
|
const step = JSON.stringify({
|
|
@@ -4364,7 +4386,7 @@ async function generateImageAssets(opts) {
|
|
|
4364
4386
|
if (url.startsWith("Error")) {
|
|
4365
4387
|
return {
|
|
4366
4388
|
prompt: prompts[i],
|
|
4367
|
-
|
|
4389
|
+
...enhancePrompts && { enhancedPrompt: enhancedPrompts[i] },
|
|
4368
4390
|
error: url
|
|
4369
4391
|
};
|
|
4370
4392
|
}
|
|
@@ -4377,7 +4399,7 @@ async function generateImageAssets(opts) {
|
|
|
4377
4399
|
return {
|
|
4378
4400
|
url,
|
|
4379
4401
|
prompt: prompts[i],
|
|
4380
|
-
|
|
4402
|
+
...enhancePrompts && { enhancedPrompt: enhancedPrompts[i] },
|
|
4381
4403
|
analysis,
|
|
4382
4404
|
width,
|
|
4383
4405
|
height
|
|
@@ -4402,6 +4424,10 @@ var definition6 = {
|
|
|
4402
4424
|
},
|
|
4403
4425
|
description: "One or more image briefs describing what you want. Focus on subject, mood, style, and intended use \u2014 the tool optimizes your brief into a model-ready prompt automatically. Multiple briefs run in parallel."
|
|
4404
4426
|
},
|
|
4427
|
+
referenceImage: {
|
|
4428
|
+
type: "string",
|
|
4429
|
+
description: "Optional URL of a single reference image to guide the generation \u2014 for style, subject, character consistency, or composition. Your prompt still describes the desired result; the reference conditions it. Applies to every prompt in the batch."
|
|
4430
|
+
},
|
|
4405
4431
|
width: {
|
|
4406
4432
|
type: "number",
|
|
4407
4433
|
description: "Image width in pixels. Default 2048. Range: 2048-4096."
|
|
@@ -4424,6 +4450,8 @@ async function execute6(input, onLog, context) {
|
|
|
4424
4450
|
width: input.width,
|
|
4425
4451
|
height: input.height,
|
|
4426
4452
|
transparentBackground: input.transparentBackground,
|
|
4453
|
+
sourceImages: input.referenceImage ? [input.referenceImage] : void 0,
|
|
4454
|
+
enhancePrompts: true,
|
|
4427
4455
|
onLog,
|
|
4428
4456
|
imageGenerationModel: resolveModel(
|
|
4429
4457
|
"imageGeneration",
|
|
@@ -4493,6 +4521,7 @@ async function execute7(input, onLog, context) {
|
|
|
4493
4521
|
width: input.width,
|
|
4494
4522
|
height: input.height,
|
|
4495
4523
|
transparentBackground: input.transparentBackground,
|
|
4524
|
+
enhancePrompts: false,
|
|
4496
4525
|
onLog,
|
|
4497
4526
|
imageGenerationModel: resolveModel(
|
|
4498
4527
|
"imageGeneration",
|
package/dist/index.js
CHANGED
|
@@ -4957,7 +4957,15 @@ var init_screenshot3 = __esm({
|
|
|
4957
4957
|
|
|
4958
4958
|
// src/subagents/designExpert/tools/images/enhancePrompt.ts
|
|
4959
4959
|
async function enhanceImagePrompt(params) {
|
|
4960
|
-
const {
|
|
4960
|
+
const {
|
|
4961
|
+
brief,
|
|
4962
|
+
width,
|
|
4963
|
+
height,
|
|
4964
|
+
transparentBackground,
|
|
4965
|
+
hasReferenceImage,
|
|
4966
|
+
onLog,
|
|
4967
|
+
model
|
|
4968
|
+
} = params;
|
|
4961
4969
|
const contextParts = [
|
|
4962
4970
|
`Dimensions: ${width}x${height}${width > height ? " (landscape)" : width < height ? " (portrait)" : " (square)"}`
|
|
4963
4971
|
];
|
|
@@ -4966,6 +4974,11 @@ async function enhanceImagePrompt(params) {
|
|
|
4966
4974
|
"Transparent background: yes \u2014 the background will be removed. Focus on the subject as an isolated element."
|
|
4967
4975
|
);
|
|
4968
4976
|
}
|
|
4977
|
+
if (hasReferenceImage) {
|
|
4978
|
+
contextParts.push(
|
|
4979
|
+
"Reference image: yes \u2014 a reference image is provided to the generation model alongside this prompt to guide style, subject, or composition. Complement it; don't re-describe what it already carries."
|
|
4980
|
+
);
|
|
4981
|
+
}
|
|
4969
4982
|
const context = `<context>
|
|
4970
4983
|
${contextParts.join("\n")}
|
|
4971
4984
|
</context>`;
|
|
@@ -5006,6 +5019,7 @@ async function generateImageAssets(opts) {
|
|
|
5006
5019
|
prompts,
|
|
5007
5020
|
sourceImages,
|
|
5008
5021
|
transparentBackground,
|
|
5022
|
+
enhancePrompts,
|
|
5009
5023
|
onLog,
|
|
5010
5024
|
imageGenerationModel: genModel,
|
|
5011
5025
|
imageAnalysisModel,
|
|
@@ -5015,21 +5029,29 @@ async function generateImageAssets(opts) {
|
|
|
5015
5029
|
const height = opts.height || 2048;
|
|
5016
5030
|
const config = { width, height };
|
|
5017
5031
|
if (sourceImages?.length) {
|
|
5032
|
+
const [firstImage] = sourceImages;
|
|
5018
5033
|
config.images = sourceImages;
|
|
5019
|
-
|
|
5020
|
-
|
|
5021
|
-
|
|
5034
|
+
config.source_images = sourceImages;
|
|
5035
|
+
config.image_ref = sourceImages;
|
|
5036
|
+
config.image = firstImage;
|
|
5037
|
+
config.image_url = firstImage;
|
|
5038
|
+
config.source_image = firstImage;
|
|
5039
|
+
config.source = firstImage;
|
|
5040
|
+
}
|
|
5041
|
+
const hasReference = !!sourceImages?.length;
|
|
5042
|
+
const enhancedPrompts = enhancePrompts ? await Promise.all(
|
|
5022
5043
|
prompts.map(
|
|
5023
5044
|
(brief) => enhanceImagePrompt({
|
|
5024
5045
|
brief,
|
|
5025
5046
|
width,
|
|
5026
5047
|
height,
|
|
5027
5048
|
transparentBackground,
|
|
5049
|
+
hasReferenceImage: hasReference,
|
|
5028
5050
|
onLog,
|
|
5029
5051
|
model: imagePromptEnhancerModel
|
|
5030
5052
|
})
|
|
5031
5053
|
)
|
|
5032
|
-
);
|
|
5054
|
+
) : prompts;
|
|
5033
5055
|
let imageUrls;
|
|
5034
5056
|
if (enhancedPrompts.length === 1) {
|
|
5035
5057
|
const step = JSON.stringify({
|
|
@@ -5099,7 +5121,7 @@ async function generateImageAssets(opts) {
|
|
|
5099
5121
|
if (url.startsWith("Error")) {
|
|
5100
5122
|
return {
|
|
5101
5123
|
prompt: prompts[i],
|
|
5102
|
-
|
|
5124
|
+
...enhancePrompts && { enhancedPrompt: enhancedPrompts[i] },
|
|
5103
5125
|
error: url
|
|
5104
5126
|
};
|
|
5105
5127
|
}
|
|
@@ -5112,7 +5134,7 @@ async function generateImageAssets(opts) {
|
|
|
5112
5134
|
return {
|
|
5113
5135
|
url,
|
|
5114
5136
|
prompt: prompts[i],
|
|
5115
|
-
|
|
5137
|
+
...enhancePrompts && { enhancedPrompt: enhancedPrompts[i] },
|
|
5116
5138
|
analysis,
|
|
5117
5139
|
width,
|
|
5118
5140
|
height
|
|
@@ -5144,6 +5166,8 @@ async function execute6(input, onLog, context) {
|
|
|
5144
5166
|
width: input.width,
|
|
5145
5167
|
height: input.height,
|
|
5146
5168
|
transparentBackground: input.transparentBackground,
|
|
5169
|
+
sourceImages: input.referenceImage ? [input.referenceImage] : void 0,
|
|
5170
|
+
enhancePrompts: true,
|
|
5147
5171
|
onLog,
|
|
5148
5172
|
imageGenerationModel: resolveModel(
|
|
5149
5173
|
"imageGeneration",
|
|
@@ -5182,6 +5206,10 @@ var init_generateImages = __esm({
|
|
|
5182
5206
|
},
|
|
5183
5207
|
description: "One or more image briefs describing what you want. Focus on subject, mood, style, and intended use \u2014 the tool optimizes your brief into a model-ready prompt automatically. Multiple briefs run in parallel."
|
|
5184
5208
|
},
|
|
5209
|
+
referenceImage: {
|
|
5210
|
+
type: "string",
|
|
5211
|
+
description: "Optional URL of a single reference image to guide the generation \u2014 for style, subject, character consistency, or composition. Your prompt still describes the desired result; the reference conditions it. Applies to every prompt in the batch."
|
|
5212
|
+
},
|
|
5185
5213
|
width: {
|
|
5186
5214
|
type: "number",
|
|
5187
5215
|
description: "Image width in pixels. Default 2048. Range: 2048-4096."
|
|
@@ -5214,6 +5242,7 @@ async function execute7(input, onLog, context) {
|
|
|
5214
5242
|
width: input.width,
|
|
5215
5243
|
height: input.height,
|
|
5216
5244
|
transparentBackground: input.transparentBackground,
|
|
5245
|
+
enhancePrompts: false,
|
|
5217
5246
|
onLog,
|
|
5218
5247
|
imageGenerationModel: resolveModel(
|
|
5219
5248
|
"imageGeneration",
|
|
@@ -15,12 +15,19 @@ Examples of good density:
|
|
|
15
15
|
These are non-negotiable. Violating them produces bad output.
|
|
16
16
|
|
|
17
17
|
- **No hex codes.** The model renders hex codes as visible text in the image. Describe colors by name and relationship: "deep emerald green with a smooth satin finish" or "warm sand beige fading into pale desaturated blue" — never "#7C3AED".
|
|
18
|
-
- **No quoted strings.** Any single or double quoted string gets rendered as literal text in the image.
|
|
19
18
|
- **No physical object framing.** Words like "artwork", "painting", "canvas", "print", "app icon", "square digital artwork" produce photorealistic mockups of a painting in a frame or an icon inset on a background. Describe the visual content directly.
|
|
20
|
-
- **No text triggers.** Words like "poster", "magazine cover", "editorial spread", "sign", or brand names risk rendering literal text, mastheads, or mockup layouts. If you want an editorial photography *style*, describe the photographic qualities — not the format.
|
|
21
19
|
- **Describe what you want, not what you don't want.** Negation doesn't work — "street with no cars" activates "cars." Say "empty street" instead.
|
|
22
20
|
- **No body part positioning.** Don't describe specific arrangements of arms, legs, or limbs.
|
|
23
|
-
- **No
|
|
21
|
+
- **No other brands as a style shortcut.** Don't borrow another company's identity as shorthand — "Apple style", "Nintendo style" — it renders that company's literal logo. (A brand's *own* name or wordmark on its *own* asset is intended text, not this — see Text & wordmarks below.)
|
|
22
|
+
|
|
23
|
+
## Text & wordmarks
|
|
24
|
+
|
|
25
|
+
The model renders text well — but only the text you tell it to, so quotation marks mean "render this literally." Use them deliberately.
|
|
26
|
+
|
|
27
|
+
- **Reproduce intended text exactly.** When the brief names a wordmark, brand name, label, sign, headline, or UI copy, carry the exact string through in quotes — e.g. a wordmark reading "Solid Credit". Never drop it, paraphrase it, or genericize it to "a wordmark": the literal text is usually the whole point of the asset, and if you omit it the model fills the space with an invented placeholder.
|
|
28
|
+
- **Direct the typography and placement.** Specify weight, case, color, and position so the text lands where the designer wants it — "a near-black grotesque sans-serif wordmark reading 'Solid Credit', centered directly below the mark."
|
|
29
|
+
- **Keep it short.** Wordmarks, labels, and short taglines render reliably; full sentences and paragraphs degrade into garbled glyphs. Trim long copy to the few words that matter, or leave it out.
|
|
30
|
+
- **Don't summon text you don't want.** When the image should have no text, don't quote stray descriptive phrases, and avoid format words that imply copy — "poster", "magazine cover", "sign", "billboard" — which can produce spurious text or mastheads. For an editorial *style*, describe the photographic qualities, not the format.
|
|
24
31
|
|
|
25
32
|
## Composition
|
|
26
33
|
|
|
@@ -34,6 +41,7 @@ You'll receive context about the generation parameters. Use them:
|
|
|
34
41
|
|
|
35
42
|
- **Dimensions**: If the image is wide (landscape), compose horizontally. If tall (portrait), compose vertically. If square, center the subject.
|
|
36
43
|
- **Transparent background**: The background will be removed after generation and the image will be trimmed to the subject bounds (no extra padding). Don't describe elaborate backgrounds — focus on the subject. Describe it as an isolated element.
|
|
44
|
+
- **Reference image**: When a reference image is provided, the generation model receives it alongside your prompt to guide style, subject, or composition. Write the prompt to *complement* the reference, not duplicate it: describe the scene, action, and anything new or changed, and lean on the reference for what it already establishes (a specific face, product, logo, or art style). Don't exhaustively re-describe those — over-specifying competes with the reference image and can distort it.
|
|
37
45
|
|
|
38
46
|
## Photography prompts
|
|
39
47
|
|