@rubytech/taskmaster 1.0.97 → 1.0.99
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/agents/system-prompt.js +2 -1
- package/dist/agents/taskmaster-tools.js +6 -0
- package/dist/agents/tool-policy.js +2 -0
- package/dist/agents/tools/image-generate-api.js +154 -0
- package/dist/agents/tools/image-generate-tool.js +145 -0
- package/dist/build-info.json +3 -3
- package/dist/control-ui/assets/{index-CfNT98JU.js → index-BiXCzgVk.js} +252 -248
- package/dist/control-ui/assets/index-BiXCzgVk.js.map +1 -0
- package/dist/control-ui/assets/{index-6WdtDXJj.css → index-Bj8TaDNH.css} +1 -1
- package/dist/control-ui/index.html +2 -2
- package/dist/gateway/chat-sanitize.js +59 -4
- package/dist/gateway/control-ui.js +8 -7
- package/package.json +1 -1
- package/scripts/install.sh +2 -2
- package/skills/image-gen/SKILL.md +68 -0
- package/skills/image-gen/references/models.md +83 -0
- package/skills/image-gen/references/prompting.md +184 -0
- package/skills/image-gen/references/styles.md +113 -0
- package/skills/image-gen/references/troubleshooting.md +93 -0
- package/taskmaster-docs/USER-GUIDE.md +67 -6
- package/dist/control-ui/assets/index-CfNT98JU.js.map +0 -1
|
@@ -173,6 +173,29 @@ function extractMediaRefs(text) {
|
|
|
173
173
|
}
|
|
174
174
|
return refs;
|
|
175
175
|
}
|
|
176
|
+
// Pattern: MEDIA:/absolute/path (used by tool results like image_generate)
|
|
177
|
+
const MEDIA_PREFIX_PATTERN = /\bMEDIA:(\S+)/g;
|
|
178
|
+
/**
|
|
179
|
+
* Parse MEDIA:/path references from text to extract file paths.
|
|
180
|
+
* Tool results (e.g. image_generate) use this format instead of
|
|
181
|
+
* [media attached: ...] annotations.
|
|
182
|
+
*/
|
|
183
|
+
function extractMediaPrefixRefs(text) {
|
|
184
|
+
if (!text.includes("MEDIA:"))
|
|
185
|
+
return [];
|
|
186
|
+
const refs = [];
|
|
187
|
+
let match;
|
|
188
|
+
MEDIA_PREFIX_PATTERN.lastIndex = 0;
|
|
189
|
+
while ((match = MEDIA_PREFIX_PATTERN.exec(text)) !== null) {
|
|
190
|
+
const absPath = match[1]?.trim();
|
|
191
|
+
if (absPath) {
|
|
192
|
+
const ext = absPath.split(".").pop()?.toLowerCase() ?? "";
|
|
193
|
+
const mimeType = ext === "jpg" || ext === "jpeg" ? "image/jpeg" : "image/png";
|
|
194
|
+
refs.push({ absPath, mimeType });
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
return refs;
|
|
198
|
+
}
|
|
176
199
|
function mediaRefToUrl(ref, workspaceRoot) {
|
|
177
200
|
const relPath = nodePath.relative(workspaceRoot, ref.absPath);
|
|
178
201
|
// Must stay within workspace (no ../ escapes)
|
|
@@ -254,8 +277,13 @@ function sanitizeMessageMedia(message, workspaceRoot) {
|
|
|
254
277
|
if (!message || typeof message !== "object")
|
|
255
278
|
return message;
|
|
256
279
|
const entry = message;
|
|
257
|
-
// Collect media refs from text content (works for both string and array content)
|
|
258
|
-
|
|
280
|
+
// Collect media refs from text content (works for both string and array content).
|
|
281
|
+
// MEDIA: prefix refs are only extracted from tool result messages — assistant text
|
|
282
|
+
// may echo "MEDIA:" but that should not produce a duplicate image block.
|
|
283
|
+
const role = typeof entry.role === "string" ? entry.role.toLowerCase() : "";
|
|
284
|
+
const isToolResult = role === "toolresult" || role === "tool_result" ||
|
|
285
|
+
typeof entry.toolCallId === "string" || typeof entry.tool_call_id === "string";
|
|
286
|
+
const mediaRefs = extractMediaRefsFromMessage(entry, isToolResult);
|
|
259
287
|
// Build URL-based image blocks from annotations
|
|
260
288
|
const imageBlocks = [];
|
|
261
289
|
for (const ref of mediaRefs) {
|
|
@@ -283,6 +311,28 @@ function sanitizeMessageMedia(message, workspaceRoot) {
|
|
|
283
311
|
}
|
|
284
312
|
return true;
|
|
285
313
|
});
|
|
314
|
+
// Strip MEDIA:/path text from ALL messages (tool results AND assistant echoes).
|
|
315
|
+
// This prevents raw file paths from ever showing in the chat UI.
|
|
316
|
+
for (let i = 0; i < filtered.length; i++) {
|
|
317
|
+
const block = filtered[i];
|
|
318
|
+
if (block.type === "text" && typeof block.text === "string" && block.text.includes("MEDIA:")) {
|
|
319
|
+
const cleaned = block.text
|
|
320
|
+
.split(/\r?\n/)
|
|
321
|
+
.filter((line) => !/\bMEDIA:\S+/.test(line))
|
|
322
|
+
.join("\n")
|
|
323
|
+
.trim();
|
|
324
|
+
if (!cleaned) {
|
|
325
|
+
filtered.splice(i, 1);
|
|
326
|
+
i--;
|
|
327
|
+
didChange = true;
|
|
328
|
+
}
|
|
329
|
+
else if (cleaned !== block.text) {
|
|
330
|
+
filtered[i] = { ...block, text: cleaned };
|
|
331
|
+
didChange = true;
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
// Add URL-based image blocks from tool result annotations
|
|
286
336
|
if (imageBlocks.length > 0) {
|
|
287
337
|
didChange = true;
|
|
288
338
|
filtered.push(...imageBlocks);
|
|
@@ -291,9 +341,12 @@ function sanitizeMessageMedia(message, workspaceRoot) {
|
|
|
291
341
|
return message;
|
|
292
342
|
return { ...entry, content: filtered };
|
|
293
343
|
}
|
|
294
|
-
function extractMediaRefsFromMessage(entry) {
|
|
344
|
+
function extractMediaRefsFromMessage(entry, includeMediaPrefix) {
|
|
295
345
|
if (typeof entry.content === "string") {
|
|
296
|
-
|
|
346
|
+
const refs = extractMediaRefs(entry.content);
|
|
347
|
+
if (includeMediaPrefix)
|
|
348
|
+
refs.push(...extractMediaPrefixRefs(entry.content));
|
|
349
|
+
return refs;
|
|
297
350
|
}
|
|
298
351
|
if (Array.isArray(entry.content)) {
|
|
299
352
|
const refs = [];
|
|
@@ -303,6 +356,8 @@ function extractMediaRefsFromMessage(entry) {
|
|
|
303
356
|
const b = block;
|
|
304
357
|
if (b.type === "text" && typeof b.text === "string") {
|
|
305
358
|
refs.push(...extractMediaRefs(b.text));
|
|
359
|
+
if (includeMediaPrefix)
|
|
360
|
+
refs.push(...extractMediaPrefixRefs(b.text));
|
|
306
361
|
}
|
|
307
362
|
}
|
|
308
363
|
return refs;
|
|
@@ -474,26 +474,27 @@ export function handlePublicChatHttpRequest(req, res, opts) {
|
|
|
474
474
|
/** Widget script content — self-contained JS for embedding. */
|
|
475
475
|
const WIDGET_SCRIPT = `(function(){
|
|
476
476
|
"use strict";
|
|
477
|
-
var cfg={server:"",accountId:""};
|
|
477
|
+
var cfg={server:"",accountId:"",color:"#1a1a2e"};
|
|
478
478
|
var isOpen=false;
|
|
479
479
|
var btn,overlay,iframe;
|
|
480
480
|
|
|
481
481
|
function init(opts){
|
|
482
482
|
if(opts&&opts.server) cfg.server=opts.server.replace(/\\/$/,"");
|
|
483
483
|
if(opts&&opts.accountId) cfg.accountId=opts.accountId;
|
|
484
|
+
if(opts&&opts.color) cfg.color=opts.color;
|
|
484
485
|
build();
|
|
485
486
|
}
|
|
486
487
|
|
|
487
488
|
function build(){
|
|
488
489
|
var css=document.createElement("style");
|
|
489
490
|
css.textContent=[
|
|
490
|
-
".tm-widget-btn{position:fixed;bottom:20px;right:20px;width:
|
|
491
|
-
"border-radius:50%;background
|
|
492
|
-
"box-shadow:0
|
|
491
|
+
".tm-widget-btn{position:fixed;bottom:20px;right:20px;width:48px;height:48px;",
|
|
492
|
+
"border-radius:50%;background:"+cfg.color+";color:#fff;border:none;cursor:pointer;",
|
|
493
|
+
"box-shadow:0 2px 8px rgba(0,0,0,.3);z-index:999999;font-size:22px;",
|
|
493
494
|
"display:flex;align-items:center;justify-content:center;transition:transform .2s}",
|
|
494
|
-
".tm-widget-btn:hover{transform:scale(1.
|
|
495
|
-
".tm-widget-overlay{position:fixed;bottom:
|
|
496
|
-
"max-width:calc(100vw - 40px);max-height:calc(100vh -
|
|
495
|
+
".tm-widget-btn:hover{transform:scale(1.08)}",
|
|
496
|
+
".tm-widget-overlay{position:fixed;bottom:78px;right:20px;width:400px;height:600px;",
|
|
497
|
+
"max-width:calc(100vw - 40px);max-height:calc(100vh - 98px);",
|
|
497
498
|
"border-radius:12px;overflow:hidden;box-shadow:0 8px 30px rgba(0,0,0,.3);",
|
|
498
499
|
"z-index:999998;display:none;background:#1a1a2e}",
|
|
499
500
|
".tm-widget-overlay.open{display:block}",
|
package/package.json
CHANGED
package/scripts/install.sh
CHANGED
|
@@ -4,10 +4,10 @@ set -euo pipefail
|
|
|
4
4
|
# Taskmaster — one-command install for fresh devices (Pi or Mac).
|
|
5
5
|
#
|
|
6
6
|
# Usage:
|
|
7
|
-
# curl -fsSL https://taskmaster.bot/install.sh | bash
|
|
7
|
+
# curl -fsSL https://taskmaster.bot/install.sh | sudo bash
|
|
8
8
|
#
|
|
9
9
|
# With custom port:
|
|
10
|
-
# curl -fsSL https://taskmaster.bot/install.sh | bash -s -- --port 19000
|
|
10
|
+
# curl -fsSL https://taskmaster.bot/install.sh | sudo bash -s -- --port 19000
|
|
11
11
|
|
|
12
12
|
PORT=""
|
|
13
13
|
for arg in "$@"; do
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: image-gen
|
|
3
|
+
description: Generate images using Google AI models (Gemini, Imagen 4). Guides users through model selection, style choices, and expert prompt crafting.
|
|
4
|
+
metadata: {"taskmaster":{"emoji":"🎨"}}
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Image Generation
|
|
8
|
+
|
|
9
|
+
Generate images from text descriptions using Google AI models. Two model families are available: Gemini (conversational, multi-turn editing) and Imagen 4 (dedicated generation, higher fidelity).
|
|
10
|
+
|
|
11
|
+
## When to activate
|
|
12
|
+
|
|
13
|
+
- User asks to create, generate, design, draw, or make an image, illustration, logo, photo, graphic, or visual
|
|
14
|
+
- User sends an image and asks to edit, modify, or create a variation
|
|
15
|
+
- User asks about image generation capabilities or model differences
|
|
16
|
+
|
|
17
|
+
## Prerequisites
|
|
18
|
+
|
|
19
|
+
Requires a Google AI API key. If missing, activate the `google-ai` skill to guide the user through setup first.
|
|
20
|
+
|
|
21
|
+
## Critical rule
|
|
22
|
+
|
|
23
|
+
Only the `image_generate` tool produces images. Never write file paths, MEDIA: references, or image URLs in text. If you did not call `image_generate`, no image was generated. There is no other mechanism.
|
|
24
|
+
|
|
25
|
+
## Quick Model Reference
|
|
26
|
+
|
|
27
|
+
| Model | Speed | Best for |
|
|
28
|
+
|-------|-------|----------|
|
|
29
|
+
| gemini-2.5-flash-image | Fast | Quick drafts, iteration, editing existing images |
|
|
30
|
+
| gemini-3-pro-image-preview | Moderate | Text in images, highest Gemini quality, 4K |
|
|
31
|
+
| imagen-4.0-fast-generate-001 | Fast | Rapid photo-realistic output |
|
|
32
|
+
| imagen-4.0-generate-001 | Moderate | Balanced quality and speed |
|
|
33
|
+
| imagen-4.0-ultra-generate-001 | Slow | Maximum fidelity, hero images |
|
|
34
|
+
|
|
35
|
+
## References
|
|
36
|
+
|
|
37
|
+
| Reference | When to load |
|
|
38
|
+
|-----------|-------------|
|
|
39
|
+
| `references/models.md` | Choosing between models, understanding API differences, resolution or feature questions |
|
|
40
|
+
| `references/styles.md` | Discussing style, aspect ratio, mood, lighting, or colour choices with the user |
|
|
41
|
+
| `references/prompting.md` | Crafting the generation prompt, iterating on results, business use cases |
|
|
42
|
+
| `references/troubleshooting.md` | Any generation error — quota, auth, content policy, model availability |
|
|
43
|
+
|
|
44
|
+
Load the relevant reference before proceeding. For a typical generation request: load `prompting.md` to craft the prompt, consult `models.md` if the user has specific quality/speed needs, and check `styles.md` if style discussion is needed. **On any error**, load `troubleshooting.md` immediately — it has browser-assisted resolution steps for every common failure.
|
|
45
|
+
|
|
46
|
+
## Workflow
|
|
47
|
+
|
|
48
|
+
### Phase 1: Gather (conversation with user)
|
|
49
|
+
|
|
50
|
+
1. **Understand intent** — What does the user want? Product shot, social graphic, logo concept, illustration?
|
|
51
|
+
2. **Recommend model** — Match speed/quality needs to a model. Default to Gemini Flash for quick work, Imagen Standard for quality.
|
|
52
|
+
3. **Discuss style** — Ask about style, mood, and aspect ratio if the user hasn't specified. Suggest options based on use case.
|
|
53
|
+
|
|
54
|
+
### Phase 2: Generate (MUST call tool)
|
|
55
|
+
|
|
56
|
+
4. **Craft prompt and generate in a single turn** — Build an expert prompt using `references/prompting.md`. Show the prompt to the user AND call `image_generate` in the same response. Do not show the prompt without calling the tool. Do not wait for approval of the prompt before generating — generate immediately and iterate after.
|
|
57
|
+
|
|
58
|
+
This step is a hard gate. You MUST call `image_generate` here. The conversation cannot continue past this point without a tool call. If you respond with text only, you have failed this step.
|
|
59
|
+
|
|
60
|
+
### Phase 3: Deliver (requires tool result)
|
|
61
|
+
|
|
62
|
+
5. **Present the result** — The tool result contains the generated image. The user sees it inline. Offer to refine: adjust style, change composition, try a different model, or edit specific elements.
|
|
63
|
+
|
|
64
|
+
You cannot reach this step without the tool result from step 4. If `image_generate` was not called, go back to step 4.
|
|
65
|
+
|
|
66
|
+
## Error Handling
|
|
67
|
+
|
|
68
|
+
If generation fails, **load `references/troubleshooting.md` and follow its guidance**. Common issues (quota exceeded, auth errors, content policy) are all resolvable in-session. Use the browser tool to navigate to Google AI Studio with the user and resolve billing, quota, or key issues collaboratively.
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# Image Generation Models
|
|
2
|
+
|
|
3
|
+
Two API backends, five models. Both use the same Google AI API key.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## API Backends
|
|
8
|
+
|
|
9
|
+
### Gemini Native (generateContent)
|
|
10
|
+
|
|
11
|
+
Conversational image generation built into the Gemini chat API. Supports multi-turn editing — you can generate an image, then ask to modify it in follow-up turns. Also supports image-to-image: send an existing image and ask for edits, style transfers, or variations.
|
|
12
|
+
|
|
13
|
+
The Gemini backend accepts the same `generateContent` call used for text, with `responseModalities: ["TEXT", "IMAGE"]`. This means image generation is part of a natural conversation flow.
|
|
14
|
+
|
|
15
|
+
### Imagen 4 (predict)
|
|
16
|
+
|
|
17
|
+
Dedicated image generation API optimized for fidelity. Single-shot generation only — no multi-turn editing. Can produce 1-4 images per request, enabling the user to pick the best result. Higher baseline quality for photo-realistic output.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Model Details
|
|
22
|
+
|
|
23
|
+
| Model | API | Speed | Max Resolution | Strengths | Limitations |
|
|
24
|
+
|-------|-----|-------|---------------|-----------|-------------|
|
|
25
|
+
| gemini-2.5-flash-image | Gemini | Fast | 2K | Quick iterations, low cost, multi-turn editing, image-to-image | Lower detail than Pro |
|
|
26
|
+
| gemini-3-pro-image-preview | Gemini | Moderate | 4K | Text rendering in images, thinking mode, accepts up to 14 reference images | Preview model, slower |
|
|
27
|
+
| imagen-4.0-fast-generate-001 | Imagen | Fast | 1K | Rapid photo-realistic generation | Lower resolution than Standard |
|
|
28
|
+
| imagen-4.0-generate-001 | Imagen | Moderate | 2K | Balanced quality and speed, good photo-realism | English only, 480 token prompt limit |
|
|
29
|
+
| imagen-4.0-ultra-generate-001 | Imagen | Slow | 2K | Maximum fidelity, finest detail | Slowest, English only |
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Decision Matrix
|
|
34
|
+
|
|
35
|
+
Choose based on the user's actual need, not the "best" model. Speed and iteration matter more than peak fidelity for most business use cases.
|
|
36
|
+
|
|
37
|
+
| Use case | Recommended model | Why |
|
|
38
|
+
|----------|-------------------|-----|
|
|
39
|
+
| Quick draft or iteration | gemini-2.5-flash-image | Fastest turnaround, supports editing in follow-up messages |
|
|
40
|
+
| Text in image (menu, sign, infographic) | gemini-3-pro-image-preview | Best text rendering of any model |
|
|
41
|
+
| Photo-realistic product shot | imagen-4.0-generate-001 or ultra | Imagen excels at photo-realism |
|
|
42
|
+
| Social media graphic | gemini-2.5-flash-image or imagen-4.0-fast | Speed matters for social content |
|
|
43
|
+
| Hero image or print material | imagen-4.0-ultra-generate-001 or gemini-3-pro at 4K | Maximum quality for final output |
|
|
44
|
+
| Edit or refine an existing image | gemini-2.5-flash-image | Only Gemini supports multi-turn editing |
|
|
45
|
+
| Multiple options to choose from | Any Imagen model | Imagen can generate 1-4 images per request |
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Key Differences Between Backends
|
|
50
|
+
|
|
51
|
+
**Aspect ratios:**
|
|
52
|
+
- Gemini supports wider ratios including 21:9 (ultrawide banners, website headers)
|
|
53
|
+
- Imagen is limited to 5 aspect ratios: 1:1, 3:4, 4:3, 9:16, 16:9
|
|
54
|
+
|
|
55
|
+
**Editing:**
|
|
56
|
+
- Gemini supports multi-turn editing — generate, then refine in conversation
|
|
57
|
+
- Imagen is single-shot only — each request is independent
|
|
58
|
+
|
|
59
|
+
**Batch output:**
|
|
60
|
+
- Imagen supports 1-4 images per request (`numberOfImages` parameter)
|
|
61
|
+
- Gemini generates 1 image per request
|
|
62
|
+
|
|
63
|
+
**Person generation:**
|
|
64
|
+
- Imagen supports `personGeneration` control ("dont_allow", "allow_adult", "allow_all")
|
|
65
|
+
- Gemini does not have this parameter
|
|
66
|
+
|
|
67
|
+
**Language:**
|
|
68
|
+
- Gemini accepts prompts in any language
|
|
69
|
+
- Imagen accepts English only, with a 480-token prompt limit
|
|
70
|
+
|
|
71
|
+
**Watermarking:**
|
|
72
|
+
- All models apply SynthID digital watermark to generated images
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## Default Recommendations
|
|
77
|
+
|
|
78
|
+
When the user doesn't specify a preference:
|
|
79
|
+
|
|
80
|
+
- **Start with Gemini Flash** for exploration and drafting — it's fast, cheap, and the user can iterate conversationally
|
|
81
|
+
- **Switch to Imagen Standard or Ultra** when the user is happy with the concept and wants maximum quality for the final output
|
|
82
|
+
- **Use Gemini Pro** when the image needs readable text (menus, signs, business cards, infographics)
|
|
83
|
+
- **Offer Imagen batch mode** when the user wants options — "I can generate 4 variations for you to pick from"
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
# Expert Prompt Construction
|
|
2
|
+
|
|
3
|
+
A strong prompt is the difference between a usable image and a generic one. This reference covers prompt structure, quality modifiers, and business-specific examples.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Prompt Structure
|
|
8
|
+
|
|
9
|
+
Build prompts in this order. Each element adds specificity. Not every element is needed for every image — use judgement based on the request.
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
Subject → Style → Composition → Lighting → Mood → Quality modifiers
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
**Subject:** What is in the image. Be concrete — "a golden retriever puppy sitting on grass" beats "a dog." Include materials, textures, and context when relevant.
|
|
16
|
+
|
|
17
|
+
**Style:** How the image looks. Photorealistic, illustration, watercolour, etc. See `styles.md` for vocabulary.
|
|
18
|
+
|
|
19
|
+
**Composition:** How the frame is arranged. Camera angle, framing, depth of field, negative space.
|
|
20
|
+
|
|
21
|
+
**Lighting:** How the scene is lit. Studio, golden hour, dramatic, etc. This has outsized impact on mood.
|
|
22
|
+
|
|
23
|
+
**Mood:** The emotional feel. Professional, cozy, dramatic, energetic. Guides the overall atmosphere.
|
|
24
|
+
|
|
25
|
+
**Quality modifiers:** Technical terms that push the model toward higher-quality output.
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Quality Modifiers
|
|
30
|
+
|
|
31
|
+
Append these to improve output quality. Use sparingly — 2-3 is effective, more can cause conflicting signals.
|
|
32
|
+
|
|
33
|
+
- "highly detailed" — encourages fine detail and texture
|
|
34
|
+
- "professional quality" — steers toward polished, commercial output
|
|
35
|
+
- "sharp focus" — reduces soft or blurry areas
|
|
36
|
+
- "4K" / "8K resolution" — emphasises resolution and clarity in the prompt
|
|
37
|
+
- "award-winning" — biases toward striking, well-composed results
|
|
38
|
+
- "editorial quality" — clean, publication-ready aesthetic
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Composition Terms
|
|
43
|
+
|
|
44
|
+
| Term | Effect |
|
|
45
|
+
|------|--------|
|
|
46
|
+
| "close-up" | Tight framing on subject |
|
|
47
|
+
| "wide angle" | Expansive view, more context |
|
|
48
|
+
| "bird's eye view" | Top-down perspective |
|
|
49
|
+
| "eye level" | Natural, relatable perspective |
|
|
50
|
+
| "rule of thirds" | Subject offset, balanced composition |
|
|
51
|
+
| "centered" | Subject in the middle, symmetrical |
|
|
52
|
+
| "symmetrical" | Mirror balance, architectural feel |
|
|
53
|
+
| "shallow depth of field" | Subject sharp, background blurred |
|
|
54
|
+
| "bokeh background" | Soft, circular blur behind subject |
|
|
55
|
+
| "negative space" | Large empty areas, minimalist feel |
|
|
56
|
+
| "minimalist composition" | Few elements, clean, focused |
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## Business Use Case Examples
|
|
61
|
+
|
|
62
|
+
These are starting points. Adapt subject, colours, and details to the user's specific needs.
|
|
63
|
+
|
|
64
|
+
### Product Photography
|
|
65
|
+
|
|
66
|
+
```
|
|
67
|
+
Professional product photograph of [item] on a clean white surface, studio lighting,
|
|
68
|
+
shallow depth of field, commercial quality, highly detailed, 4K
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
```
|
|
72
|
+
Lifestyle product shot of [item] in a modern kitchen, natural window light,
|
|
73
|
+
warm tones, editorial quality, shallow depth of field
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
```
|
|
77
|
+
Flat lay arrangement of [items] on a marble surface, top-down view,
|
|
78
|
+
soft diffused lighting, minimalist composition, professional quality
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Social Media Graphics
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
Vibrant flat design illustration of [concept], bold colours, clean lines,
|
|
85
|
+
modern aesthetic, Instagram-ready, 1:1 aspect ratio
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
Eye-catching social media graphic with [text/concept], gradient background,
|
|
90
|
+
bold typography space, energetic mood, vibrant colours
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
Minimalist quote background, soft pastel gradient, clean negative space
|
|
95
|
+
for text overlay, calming mood, 1:1
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Logo Concepts
|
|
99
|
+
|
|
100
|
+
```
|
|
101
|
+
Minimalist logo design for [business type], clean vector style, simple geometric shapes,
|
|
102
|
+
professional, white background, scalable design
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
```
|
|
106
|
+
Modern logo mark for [business type], flat design, single accent colour on white,
|
|
107
|
+
memorable silhouette, minimalist
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Note: Image generation models produce raster images, not vectors. Generated logos work well as concepts and mood boards. For final logo files, the user will need a graphic designer to recreate the chosen concept as a vector.
|
|
111
|
+
|
|
112
|
+
### Business Headshots
|
|
113
|
+
|
|
114
|
+
```
|
|
115
|
+
Professional headshot portrait, studio lighting, neutral grey background,
|
|
116
|
+
sharp focus, natural expression, business attire, editorial quality
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
```
|
|
120
|
+
Approachable business portrait, natural window light, warm tones,
|
|
121
|
+
shallow depth of field, genuine smile, professional quality
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Illustrations
|
|
125
|
+
|
|
126
|
+
```
|
|
127
|
+
Warm watercolour illustration of [scene], soft edges, pastel palette,
|
|
128
|
+
editorial style, hand-painted feel, gentle mood
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
```
|
|
132
|
+
Detailed line art illustration of [subject], black ink on white,
|
|
133
|
+
clean lines, technical precision, architectural style
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
```
|
|
137
|
+
Playful children's book illustration of [scene], bright colours,
|
|
138
|
+
rounded shapes, whimsical mood, hand-drawn style
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Marketing and Print
|
|
142
|
+
|
|
143
|
+
```
|
|
144
|
+
Hero image for [business type] website, wide angle shot of [scene],
|
|
145
|
+
golden hour lighting, professional quality, 16:9, inviting mood
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
```
|
|
149
|
+
Business card background, abstract geometric pattern, [brand colours],
|
|
150
|
+
subtle texture, professional, minimalist, 3:2
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Iteration Tips
|
|
156
|
+
|
|
157
|
+
When the first result isn't right, refine systematically rather than starting from scratch.
|
|
158
|
+
|
|
159
|
+
**Change one thing at a time.** If the lighting is wrong but the composition is good, adjust only the lighting terms. Changing everything at once makes it impossible to learn what works.
|
|
160
|
+
|
|
161
|
+
**Add specificity to fix vagueness.** Generic subjects produce generic results. "A dog" becomes "a golden retriever puppy sitting on freshly cut grass, looking at camera." Specificity is the most reliable way to improve output.
|
|
162
|
+
|
|
163
|
+
**Use Gemini models for iterative refinement.** Gemini supports multi-turn editing — generate an image, then ask to change specific elements ("make the background warmer," "remove the text," "zoom in on the product"). This is faster than re-prompting from scratch.
|
|
164
|
+
|
|
165
|
+
**Switch to Imagen for final output.** Once the concept is right (via Gemini iteration), re-generate with Imagen Standard or Ultra for maximum fidelity. Think of Gemini as the sketch pad and Imagen as the final print.
|
|
166
|
+
|
|
167
|
+
**Ask the user what to change, not whether they like it.** "What would you change?" gets more useful feedback than "Do you like it?" People find it easier to articulate what's wrong than to rate overall quality.
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
## Imagen-Specific Tips
|
|
172
|
+
|
|
173
|
+
Imagen models have constraints that affect prompt strategy:
|
|
174
|
+
|
|
175
|
+
- **English only** — prompts must be in English, even if the user's conversation is in another language. Translate the intent, then prompt in English.
|
|
176
|
+
- **480-token prompt limit** — be concise. Front-load the most important elements (subject, style) and put modifiers at the end so they get trimmed first if the prompt is too long.
|
|
177
|
+
- **Concrete over abstract** — Imagen produces better results with specific, visual descriptions than with abstract concepts. "A sunrise over a calm ocean" works better than "hope and renewal."
|
|
178
|
+
- **Person generation control** — use `personGeneration: "dont_allow"` when people aren't needed in the image. This avoids potential content policy issues and often produces cleaner results for product/object shots.
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
## Showing the Prompt
|
|
183
|
+
|
|
184
|
+
Show the user the crafted prompt AND call `image_generate` in the same turn. Do not wait for prompt approval before generating — generate immediately, then iterate. This avoids unnecessary round-trips and gives the user a concrete result to react to. Frame it as: "Here's what I'm generating:" followed by the tool call. Iterate after seeing the result.
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# Style, Format & Visual Vocabulary
|
|
2
|
+
|
|
3
|
+
Use this reference when discussing visual style with the user. Suggest options based on their use case — most users know what they want to feel, not the technical vocabulary to express it.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Style Categories
|
|
8
|
+
|
|
9
|
+
Each style has distinct strengths. Match the style to the purpose, not personal preference.
|
|
10
|
+
|
|
11
|
+
| Style | Character | When to suggest |
|
|
12
|
+
|-------|-----------|-----------------|
|
|
13
|
+
| **Photorealistic** | Lifelike, indistinguishable from a photograph | Product photography, headshots, real estate, food |
|
|
14
|
+
| **Illustration** | Hand-drawn feel, artistic interpretation | Editorial content, children's materials, storytelling |
|
|
15
|
+
| **Digital art** | Polished, vibrant, contemporary | Tech marketing, game assets, social media |
|
|
16
|
+
| **Watercolour** | Soft edges, translucent washes, organic | Invitations, fine art prints, greeting cards |
|
|
17
|
+
| **Oil painting** | Rich textures, visible brushwork, classical depth | Portraits, wall art, premium branding |
|
|
18
|
+
| **Minimalist** | Clean, sparse, essential shapes only | Logos, icons, modern branding |
|
|
19
|
+
| **3D render** | Dimensional, material-accurate, product-focused | Product mockups, architectural visualisation, tech |
|
|
20
|
+
| **Pixel art** | Blocky, nostalgic, 8/16-bit aesthetic | Gaming, retro branding, novelty |
|
|
21
|
+
| **Vector / flat** | Clean lines, solid fills, scalable | Infographics, UI elements, icons, print |
|
|
22
|
+
| **Sketch / line art** | Raw, conceptual, structural | Wireframes, concept art, technical illustration |
|
|
23
|
+
| **Pop art** | Bold outlines, high contrast, graphic | Marketing, social media, event posters |
|
|
24
|
+
| **Anime / manga** | Expressive, character-driven, stylised | Character design, storytelling, youth audience |
|
|
25
|
+
| **Vintage / retro** | Aged, film grain, muted tones, nostalgic | Heritage branding, event themes, editorial |
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Aspect Ratios
|
|
30
|
+
|
|
31
|
+
Aspect ratio should match the output medium. Ask the user where the image will be used if they haven't said.
|
|
32
|
+
|
|
33
|
+
| Ratio | Common uses | Notes |
|
|
34
|
+
|-------|-------------|-------|
|
|
35
|
+
| **1:1** | Social media posts, profile pictures, product shots | Universal default |
|
|
36
|
+
| **16:9** | Presentations, hero images, desktop wallpapers, YouTube thumbnails | Standard widescreen |
|
|
37
|
+
| **9:16** | Instagram/TikTok stories, mobile wallpapers, vertical ads | Vertical video format |
|
|
38
|
+
| **3:4 / 4:3** | Portrait/landscape photos, print | Classic photography |
|
|
39
|
+
| **3:2 / 2:3** | Traditional photo format, DSLR native | Gemini only |
|
|
40
|
+
| **21:9** | Ultrawide banners, website headers, cinematic | Gemini only |
|
|
41
|
+
|
|
42
|
+
When a requested ratio isn't available for the chosen model (e.g. 21:9 on Imagen), explain the limitation and suggest the closest alternative.
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## Colour Vocabulary
|
|
47
|
+
|
|
48
|
+
Use these terms in prompts to steer colour treatment. Combine with style for precision.
|
|
49
|
+
|
|
50
|
+
| Term | Effect |
|
|
51
|
+
|------|--------|
|
|
52
|
+
| **Warm** | Reds, oranges, yellows — cozy, inviting |
|
|
53
|
+
| **Cool** | Blues, greens, purples — calm, professional |
|
|
54
|
+
| **Muted** | Desaturated, subdued — sophisticated, understated |
|
|
55
|
+
| **Vibrant** | Saturated, bold — energetic, attention-grabbing |
|
|
56
|
+
| **Monochrome** | Single hue or black/white — dramatic, editorial |
|
|
57
|
+
| **Pastel** | Soft, light tints — gentle, approachable |
|
|
58
|
+
| **Neon** | Electric, glowing — nightlife, tech, youth |
|
|
59
|
+
| **Earth tones** | Browns, tans, olive — natural, organic |
|
|
60
|
+
| **Jewel tones** | Deep emerald, sapphire, ruby — rich, luxurious |
|
|
61
|
+
| **Neutral** | Greys, whites, beiges — clean, unobtrusive |
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## Mood Vocabulary
|
|
66
|
+
|
|
67
|
+
Mood shapes the overall emotional impression. It influences lighting, colour, composition, and subject expression.
|
|
68
|
+
|
|
69
|
+
| Mood | Visual impression |
|
|
70
|
+
|------|-------------------|
|
|
71
|
+
| **Dramatic** | High contrast, deep shadows, strong focal point |
|
|
72
|
+
| **Serene** | Soft light, open space, gentle colours |
|
|
73
|
+
| **Energetic** | Bright, dynamic angles, saturated colour |
|
|
74
|
+
| **Mysterious** | Low light, fog, obscured elements, cool tones |
|
|
75
|
+
| **Whimsical** | Playful, unexpected, fantasy elements |
|
|
76
|
+
| **Professional** | Clean, neutral, well-lit, no visual noise |
|
|
77
|
+
| **Playful** | Bright, rounded shapes, warm palette |
|
|
78
|
+
| **Dark** | Low-key lighting, muted palette, heavy shadows |
|
|
79
|
+
| **Ethereal** | Soft focus, glowing light, translucent elements |
|
|
80
|
+
| **Cozy** | Warm tones, soft textures, intimate framing |
|
|
81
|
+
| **Bold** | Strong shapes, high saturation, graphic composition |
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
## Lighting Vocabulary
|
|
86
|
+
|
|
87
|
+
Lighting is the single most impactful prompt element after subject and style. Specific lighting terms produce dramatically different results.
|
|
88
|
+
|
|
89
|
+
| Lighting | Character | Best for |
|
|
90
|
+
|----------|-----------|----------|
|
|
91
|
+
| **Golden hour** | Warm, directional, long shadows | Portraits, landscapes, lifestyle |
|
|
92
|
+
| **Studio** | Even, controlled, professional | Product shots, headshots |
|
|
93
|
+
| **Natural** | Ambient, unprocessed, authentic | Documentary, editorial |
|
|
94
|
+
| **Dramatic / chiaroscuro** | Strong contrast between light and shadow | Portraits, moody scenes |
|
|
95
|
+
| **Flat** | Even, shadowless, clean | Product photography, UI mockups |
|
|
96
|
+
| **Rim-lit** | Edge highlighting, subject separation | Silhouettes, athletic, cinematic |
|
|
97
|
+
| **Backlit** | Light behind subject, halo effect | Ethereal, spiritual, romantic |
|
|
98
|
+
| **Soft / diffused** | Gentle, wrapped light, minimal shadows | Beauty, fashion, food |
|
|
99
|
+
| **Neon** | Coloured artificial light, urban | Night scenes, tech, cyberpunk |
|
|
100
|
+
| **Candlelight** | Warm, flickering, intimate | Cozy scenes, romantic, vintage |
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Combining Vocabulary
|
|
105
|
+
|
|
106
|
+
A well-crafted style description combines one element from each category. Examples:
|
|
107
|
+
|
|
108
|
+
- Product shot: "photorealistic, studio lighting, neutral background, professional mood"
|
|
109
|
+
- Social post: "digital art, vibrant colours, energetic mood, flat lighting, 1:1"
|
|
110
|
+
- Business card: "minimalist, monochrome, clean, professional, 3:2"
|
|
111
|
+
- Event poster: "pop art, neon colours, bold mood, dramatic lighting, 9:16"
|
|
112
|
+
|
|
113
|
+
The order matters less than specificity. Vague descriptions produce generic results.
|