@rubytech/taskmaster 1.0.97 → 1.0.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -173,6 +173,29 @@ function extractMediaRefs(text) {
173
173
  }
174
174
  return refs;
175
175
  }
176
+ // Pattern: MEDIA:/absolute/path (used by tool results like image_generate)
177
+ const MEDIA_PREFIX_PATTERN = /\bMEDIA:(\S+)/g;
178
+ /**
179
+ * Parse MEDIA:/path references from text to extract file paths.
180
+ * Tool results (e.g. image_generate) use this format instead of
181
+ * [media attached: ...] annotations.
182
+ */
183
+ function extractMediaPrefixRefs(text) {
184
+ if (!text.includes("MEDIA:"))
185
+ return [];
186
+ const refs = [];
187
+ let match;
188
+ MEDIA_PREFIX_PATTERN.lastIndex = 0;
189
+ while ((match = MEDIA_PREFIX_PATTERN.exec(text)) !== null) {
190
+ const absPath = match[1]?.trim();
191
+ if (absPath) {
192
+ const ext = absPath.split(".").pop()?.toLowerCase() ?? "";
193
+ const mimeType = ext === "jpg" || ext === "jpeg" ? "image/jpeg" : "image/png";
194
+ refs.push({ absPath, mimeType });
195
+ }
196
+ }
197
+ return refs;
198
+ }
176
199
  function mediaRefToUrl(ref, workspaceRoot) {
177
200
  const relPath = nodePath.relative(workspaceRoot, ref.absPath);
178
201
  // Must stay within workspace (no ../ escapes)
@@ -254,8 +277,13 @@ function sanitizeMessageMedia(message, workspaceRoot) {
254
277
  if (!message || typeof message !== "object")
255
278
  return message;
256
279
  const entry = message;
257
- // Collect media refs from text content (works for both string and array content)
258
- const mediaRefs = extractMediaRefsFromMessage(entry);
280
+ // Collect media refs from text content (works for both string and array content).
281
+ // MEDIA: prefix refs are only extracted from tool result messages — assistant text
282
+ // may echo "MEDIA:" but that should not produce a duplicate image block.
283
+ const role = typeof entry.role === "string" ? entry.role.toLowerCase() : "";
284
+ const isToolResult = role === "toolresult" || role === "tool_result" ||
285
+ typeof entry.toolCallId === "string" || typeof entry.tool_call_id === "string";
286
+ const mediaRefs = extractMediaRefsFromMessage(entry, isToolResult);
259
287
  // Build URL-based image blocks from annotations
260
288
  const imageBlocks = [];
261
289
  for (const ref of mediaRefs) {
@@ -283,6 +311,28 @@ function sanitizeMessageMedia(message, workspaceRoot) {
283
311
  }
284
312
  return true;
285
313
  });
314
+ // Strip MEDIA:/path text from ALL messages (tool results AND assistant echoes).
315
+ // This prevents raw file paths from ever showing in the chat UI.
316
+ for (let i = 0; i < filtered.length; i++) {
317
+ const block = filtered[i];
318
+ if (block.type === "text" && typeof block.text === "string" && block.text.includes("MEDIA:")) {
319
+ const cleaned = block.text
320
+ .split(/\r?\n/)
321
+ .filter((line) => !/\bMEDIA:\S+/.test(line))
322
+ .join("\n")
323
+ .trim();
324
+ if (!cleaned) {
325
+ filtered.splice(i, 1);
326
+ i--;
327
+ didChange = true;
328
+ }
329
+ else if (cleaned !== block.text) {
330
+ filtered[i] = { ...block, text: cleaned };
331
+ didChange = true;
332
+ }
333
+ }
334
+ }
335
+ // Add URL-based image blocks from tool result annotations
286
336
  if (imageBlocks.length > 0) {
287
337
  didChange = true;
288
338
  filtered.push(...imageBlocks);
@@ -291,9 +341,12 @@ function sanitizeMessageMedia(message, workspaceRoot) {
291
341
  return message;
292
342
  return { ...entry, content: filtered };
293
343
  }
294
- function extractMediaRefsFromMessage(entry) {
344
+ function extractMediaRefsFromMessage(entry, includeMediaPrefix) {
295
345
  if (typeof entry.content === "string") {
296
- return extractMediaRefs(entry.content);
346
+ const refs = extractMediaRefs(entry.content);
347
+ if (includeMediaPrefix)
348
+ refs.push(...extractMediaPrefixRefs(entry.content));
349
+ return refs;
297
350
  }
298
351
  if (Array.isArray(entry.content)) {
299
352
  const refs = [];
@@ -303,6 +356,8 @@ function extractMediaRefsFromMessage(entry) {
303
356
  const b = block;
304
357
  if (b.type === "text" && typeof b.text === "string") {
305
358
  refs.push(...extractMediaRefs(b.text));
359
+ if (includeMediaPrefix)
360
+ refs.push(...extractMediaPrefixRefs(b.text));
306
361
  }
307
362
  }
308
363
  return refs;
@@ -474,26 +474,27 @@ export function handlePublicChatHttpRequest(req, res, opts) {
474
474
  /** Widget script content — self-contained JS for embedding. */
475
475
  const WIDGET_SCRIPT = `(function(){
476
476
  "use strict";
477
- var cfg={server:"",accountId:""};
477
+ var cfg={server:"",accountId:"",color:"#1a1a2e"};
478
478
  var isOpen=false;
479
479
  var btn,overlay,iframe;
480
480
 
481
481
  function init(opts){
482
482
  if(opts&&opts.server) cfg.server=opts.server.replace(/\\/$/,"");
483
483
  if(opts&&opts.accountId) cfg.accountId=opts.accountId;
484
+ if(opts&&opts.color) cfg.color=opts.color;
484
485
  build();
485
486
  }
486
487
 
487
488
  function build(){
488
489
  var css=document.createElement("style");
489
490
  css.textContent=[
490
- ".tm-widget-btn{position:fixed;bottom:20px;right:20px;width:60px;height:60px;",
491
- "border-radius:50%;background:#0078ff;color:#fff;border:none;cursor:pointer;",
492
- "box-shadow:0 4px 12px rgba(0,0,0,.25);z-index:999999;font-size:28px;",
491
+ ".tm-widget-btn{position:fixed;bottom:20px;right:20px;width:48px;height:48px;",
492
+ "border-radius:50%;background:"+cfg.color+";color:#fff;border:none;cursor:pointer;",
493
+ "box-shadow:0 2px 8px rgba(0,0,0,.3);z-index:999999;font-size:22px;",
493
494
  "display:flex;align-items:center;justify-content:center;transition:transform .2s}",
494
- ".tm-widget-btn:hover{transform:scale(1.1)}",
495
- ".tm-widget-overlay{position:fixed;bottom:90px;right:20px;width:400px;height:600px;",
496
- "max-width:calc(100vw - 40px);max-height:calc(100vh - 110px);",
495
+ ".tm-widget-btn:hover{transform:scale(1.08)}",
496
+ ".tm-widget-overlay{position:fixed;bottom:78px;right:20px;width:400px;height:600px;",
497
+ "max-width:calc(100vw - 40px);max-height:calc(100vh - 98px);",
497
498
  "border-radius:12px;overflow:hidden;box-shadow:0 8px 30px rgba(0,0,0,.3);",
498
499
  "z-index:999998;display:none;background:#1a1a2e}",
499
500
  ".tm-widget-overlay.open{display:block}",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rubytech/taskmaster",
3
- "version": "1.0.97",
3
+ "version": "1.0.99",
4
4
  "description": "AI-powered business assistant for small businesses",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -4,10 +4,10 @@ set -euo pipefail
4
4
  # Taskmaster — one-command install for fresh devices (Pi or Mac).
5
5
  #
6
6
  # Usage:
7
- # curl -fsSL https://taskmaster.bot/install.sh | bash
7
+ # curl -fsSL https://taskmaster.bot/install.sh | sudo bash
8
8
  #
9
9
  # With custom port:
10
- # curl -fsSL https://taskmaster.bot/install.sh | bash -s -- --port 19000
10
+ # curl -fsSL https://taskmaster.bot/install.sh | sudo bash -s -- --port 19000
11
11
 
12
12
  PORT=""
13
13
  for arg in "$@"; do
@@ -0,0 +1,68 @@
1
+ ---
2
+ name: image-gen
3
+ description: Generate images using Google AI models (Gemini, Imagen 4). Guides users through model selection, style choices, and expert prompt crafting.
4
+ metadata: {"taskmaster":{"emoji":"🎨"}}
5
+ ---
6
+
7
+ # Image Generation
8
+
9
+ Generate images from text descriptions using Google AI models. Two model families are available: Gemini (conversational, multi-turn editing) and Imagen 4 (dedicated generation, higher fidelity).
10
+
11
+ ## When to activate
12
+
13
+ - User asks to create, generate, design, draw, or make an image, illustration, logo, photo, graphic, or visual
14
+ - User sends an image and asks to edit, modify, or create a variation
15
+ - User asks about image generation capabilities or model differences
16
+
17
+ ## Prerequisites
18
+
19
+ Requires a Google AI API key. If missing, activate the `google-ai` skill to guide the user through setup first.
20
+
21
+ ## Critical rule
22
+
23
+ Only the `image_generate` tool produces images. Never write file paths, MEDIA: references, or image URLs in text. If you did not call `image_generate`, no image was generated. There is no other mechanism.
24
+
25
+ ## Quick Model Reference
26
+
27
+ | Model | Speed | Best for |
28
+ |-------|-------|----------|
29
+ | gemini-2.5-flash-image | Fast | Quick drafts, iteration, editing existing images |
30
+ | gemini-3-pro-image-preview | Moderate | Text in images, highest Gemini quality, 4K |
31
+ | imagen-4.0-fast-generate-001 | Fast | Rapid photo-realistic output |
32
+ | imagen-4.0-generate-001 | Moderate | Balanced quality and speed |
33
+ | imagen-4.0-ultra-generate-001 | Slow | Maximum fidelity, hero images |
34
+
35
+ ## References
36
+
37
+ | Reference | When to load |
38
+ |-----------|-------------|
39
+ | `references/models.md` | Choosing between models, understanding API differences, resolution or feature questions |
40
+ | `references/styles.md` | Discussing style, aspect ratio, mood, lighting, or colour choices with the user |
41
+ | `references/prompting.md` | Crafting the generation prompt, iterating on results, business use cases |
42
+ | `references/troubleshooting.md` | Any generation error — quota, auth, content policy, model availability |
43
+
44
+ Load the relevant reference before proceeding. For a typical generation request: load `prompting.md` to craft the prompt, consult `models.md` if the user has specific quality/speed needs, and check `styles.md` if style discussion is needed. **On any error**, load `troubleshooting.md` immediately — it has browser-assisted resolution steps for every common failure.
45
+
46
+ ## Workflow
47
+
48
+ ### Phase 1: Gather (conversation with user)
49
+
50
+ 1. **Understand intent** — What does the user want? Product shot, social graphic, logo concept, illustration?
51
+ 2. **Recommend model** — Match speed/quality needs to a model. Default to Gemini Flash for quick work, Imagen Standard for quality.
52
+ 3. **Discuss style** — Ask about style, mood, and aspect ratio if the user hasn't specified. Suggest options based on use case.
53
+
54
+ ### Phase 2: Generate (MUST call tool)
55
+
56
+ 4. **Craft prompt and generate in a single turn** — Build an expert prompt using `references/prompting.md`. Show the prompt to the user AND call `image_generate` in the same response. Do not show the prompt without calling the tool. Do not wait for approval of the prompt before generating — generate immediately and iterate after.
57
+
58
+ This step is a hard gate. You MUST call `image_generate` here. The conversation cannot continue past this point without a tool call. If you respond with text only, you have failed this step.
59
+
60
+ ### Phase 3: Deliver (requires tool result)
61
+
62
+ 5. **Present the result** — The tool result contains the generated image. The user sees it inline. Offer to refine: adjust style, change composition, try a different model, or edit specific elements.
63
+
64
+ You cannot reach this step without the tool result from step 4. If `image_generate` was not called, go back to step 4.
65
+
66
+ ## Error Handling
67
+
68
+ If generation fails, **load `references/troubleshooting.md` and follow its guidance**. Common issues (quota exceeded, auth errors, content policy) are all resolvable in-session. Use the browser tool to navigate to Google AI Studio with the user and resolve billing, quota, or key issues collaboratively.
@@ -0,0 +1,83 @@
1
+ # Image Generation Models
2
+
3
+ Two API backends, five models. Both use the same Google AI API key.
4
+
5
+ ---
6
+
7
+ ## API Backends
8
+
9
+ ### Gemini Native (generateContent)
10
+
11
+ Conversational image generation built into the Gemini chat API. Supports multi-turn editing — you can generate an image, then ask to modify it in follow-up turns. Also supports image-to-image: send an existing image and ask for edits, style transfers, or variations.
12
+
13
+ The Gemini backend accepts the same `generateContent` call used for text, with `responseModalities: ["TEXT", "IMAGE"]`. This means image generation is part of a natural conversation flow.
14
+
15
+ ### Imagen 4 (predict)
16
+
17
+ Dedicated image generation API optimized for fidelity. Single-shot generation only — no multi-turn editing. Can produce 1-4 images per request, enabling the user to pick the best result. Higher baseline quality for photo-realistic output.
18
+
19
+ ---
20
+
21
+ ## Model Details
22
+
23
+ | Model | API | Speed | Max Resolution | Strengths | Limitations |
24
+ |-------|-----|-------|---------------|-----------|-------------|
25
+ | gemini-2.5-flash-image | Gemini | Fast | 2K | Quick iterations, low cost, multi-turn editing, image-to-image | Lower detail than Pro |
26
+ | gemini-3-pro-image-preview | Gemini | Moderate | 4K | Text rendering in images, thinking mode, accepts up to 14 reference images | Preview model, slower |
27
+ | imagen-4.0-fast-generate-001 | Imagen | Fast | 1K | Rapid photo-realistic generation | Lower resolution than Standard |
28
+ | imagen-4.0-generate-001 | Imagen | Moderate | 2K | Balanced quality and speed, good photo-realism | English only, 480 token prompt limit |
29
+ | imagen-4.0-ultra-generate-001 | Imagen | Slow | 2K | Maximum fidelity, finest detail | Slowest, English only |
30
+
31
+ ---
32
+
33
+ ## Decision Matrix
34
+
35
+ Choose based on the user's actual need, not the "best" model. Speed and iteration matter more than peak fidelity for most business use cases.
36
+
37
+ | Use case | Recommended model | Why |
38
+ |----------|-------------------|-----|
39
+ | Quick draft or iteration | gemini-2.5-flash-image | Fastest turnaround, supports editing in follow-up messages |
40
+ | Text in image (menu, sign, infographic) | gemini-3-pro-image-preview | Best text rendering of any model |
41
+ | Photo-realistic product shot | imagen-4.0-generate-001 or ultra | Imagen excels at photo-realism |
42
+ | Social media graphic | gemini-2.5-flash-image or imagen-4.0-fast | Speed matters for social content |
43
+ | Hero image or print material | imagen-4.0-ultra-generate-001 or gemini-3-pro at 4K | Maximum quality for final output |
44
+ | Edit or refine an existing image | gemini-2.5-flash-image | Only Gemini supports multi-turn editing |
45
+ | Multiple options to choose from | Any Imagen model | Imagen can generate 1-4 images per request |
46
+
47
+ ---
48
+
49
+ ## Key Differences Between Backends
50
+
51
+ **Aspect ratios:**
52
+ - Gemini supports wider ratios including 21:9 (ultrawide banners, website headers)
53
+ - Imagen is limited to 5 aspect ratios: 1:1, 3:4, 4:3, 9:16, 16:9
54
+
55
+ **Editing:**
56
+ - Gemini supports multi-turn editing — generate, then refine in conversation
57
+ - Imagen is single-shot only — each request is independent
58
+
59
+ **Batch output:**
60
+ - Imagen supports 1-4 images per request (`numberOfImages` parameter)
61
+ - Gemini generates 1 image per request
62
+
63
+ **Person generation:**
64
+ - Imagen supports `personGeneration` control ("dont_allow", "allow_adult", "allow_all")
65
+ - Gemini does not have this parameter
66
+
67
+ **Language:**
68
+ - Gemini accepts prompts in any language
69
+ - Imagen accepts English only, with a 480-token prompt limit
70
+
71
+ **Watermarking:**
72
+ - All models apply SynthID digital watermark to generated images
73
+
74
+ ---
75
+
76
+ ## Default Recommendations
77
+
78
+ When the user doesn't specify a preference:
79
+
80
+ - **Start with Gemini Flash** for exploration and drafting — it's fast, cheap, and the user can iterate conversationally
81
+ - **Switch to Imagen Standard or Ultra** when the user is happy with the concept and wants maximum quality for the final output
82
+ - **Use Gemini Pro** when the image needs readable text (menus, signs, business cards, infographics)
83
+ - **Offer Imagen batch mode** when the user wants options — "I can generate 4 variations for you to pick from"
@@ -0,0 +1,184 @@
1
+ # Expert Prompt Construction
2
+
3
+ A strong prompt is the difference between a usable image and a generic one. This reference covers prompt structure, quality modifiers, and business-specific examples.
4
+
5
+ ---
6
+
7
+ ## Prompt Structure
8
+
9
+ Build prompts in this order. Each element adds specificity. Not every element is needed for every image — use judgement based on the request.
10
+
11
+ ```
12
+ Subject → Style → Composition → Lighting → Mood → Quality modifiers
13
+ ```
14
+
15
+ **Subject:** What is in the image. Be concrete — "a golden retriever puppy sitting on grass" beats "a dog." Include materials, textures, and context when relevant.
16
+
17
+ **Style:** How the image looks. Photorealistic, illustration, watercolour, etc. See `styles.md` for vocabulary.
18
+
19
+ **Composition:** How the frame is arranged. Camera angle, framing, depth of field, negative space.
20
+
21
+ **Lighting:** How the scene is lit. Studio, golden hour, dramatic, etc. This has outsized impact on mood.
22
+
23
+ **Mood:** The emotional feel. Professional, cozy, dramatic, energetic. Guides the overall atmosphere.
24
+
25
+ **Quality modifiers:** Technical terms that push the model toward higher-quality output.
26
+
27
+ ---
28
+
29
+ ## Quality Modifiers
30
+
31
+ Append these to improve output quality. Use sparingly — 2-3 is effective, more can cause conflicting signals.
32
+
33
+ - "highly detailed" — encourages fine detail and texture
34
+ - "professional quality" — steers toward polished, commercial output
35
+ - "sharp focus" — reduces soft or blurry areas
36
+ - "4K" / "8K resolution" — emphasises resolution and clarity in the prompt
37
+ - "award-winning" — biases toward striking, well-composed results
38
+ - "editorial quality" — clean, publication-ready aesthetic
39
+
40
+ ---
41
+
42
+ ## Composition Terms
43
+
44
+ | Term | Effect |
45
+ |------|--------|
46
+ | "close-up" | Tight framing on subject |
47
+ | "wide angle" | Expansive view, more context |
48
+ | "bird's eye view" | Top-down perspective |
49
+ | "eye level" | Natural, relatable perspective |
50
+ | "rule of thirds" | Subject offset, balanced composition |
51
+ | "centered" | Subject in the middle, symmetrical |
52
+ | "symmetrical" | Mirror balance, architectural feel |
53
+ | "shallow depth of field" | Subject sharp, background blurred |
54
+ | "bokeh background" | Soft, circular blur behind subject |
55
+ | "negative space" | Large empty areas, minimalist feel |
56
+ | "minimalist composition" | Few elements, clean, focused |
57
+
58
+ ---
59
+
60
+ ## Business Use Case Examples
61
+
62
+ These are starting points. Adapt subject, colours, and details to the user's specific needs.
63
+
64
+ ### Product Photography
65
+
66
+ ```
67
+ Professional product photograph of [item] on a clean white surface, studio lighting,
68
+ shallow depth of field, commercial quality, highly detailed, 4K
69
+ ```
70
+
71
+ ```
72
+ Lifestyle product shot of [item] in a modern kitchen, natural window light,
73
+ warm tones, editorial quality, shallow depth of field
74
+ ```
75
+
76
+ ```
77
+ Flat lay arrangement of [items] on a marble surface, top-down view,
78
+ soft diffused lighting, minimalist composition, professional quality
79
+ ```
80
+
81
+ ### Social Media Graphics
82
+
83
+ ```
84
+ Vibrant flat design illustration of [concept], bold colours, clean lines,
85
+ modern aesthetic, Instagram-ready, 1:1 aspect ratio
86
+ ```
87
+
88
+ ```
89
+ Eye-catching social media graphic with [text/concept], gradient background,
90
+ bold typography space, energetic mood, vibrant colours
91
+ ```
92
+
93
+ ```
94
+ Minimalist quote background, soft pastel gradient, clean negative space
95
+ for text overlay, calming mood, 1:1
96
+ ```
97
+
98
+ ### Logo Concepts
99
+
100
+ ```
101
+ Minimalist logo design for [business type], clean vector style, simple geometric shapes,
102
+ professional, white background, scalable design
103
+ ```
104
+
105
+ ```
106
+ Modern logo mark for [business type], flat design, single accent colour on white,
107
+ memorable silhouette, minimalist
108
+ ```
109
+
110
+ Note: Image generation models produce raster images, not vectors. Generated logos work well as concepts and mood boards. For final logo files, the user will need a graphic designer to recreate the chosen concept as a vector.
111
+
112
+ ### Business Headshots
113
+
114
+ ```
115
+ Professional headshot portrait, studio lighting, neutral grey background,
116
+ sharp focus, natural expression, business attire, editorial quality
117
+ ```
118
+
119
+ ```
120
+ Approachable business portrait, natural window light, warm tones,
121
+ shallow depth of field, genuine smile, professional quality
122
+ ```
123
+
124
+ ### Illustrations
125
+
126
+ ```
127
+ Warm watercolour illustration of [scene], soft edges, pastel palette,
128
+ editorial style, hand-painted feel, gentle mood
129
+ ```
130
+
131
+ ```
132
+ Detailed line art illustration of [subject], black ink on white,
133
+ clean lines, technical precision, architectural style
134
+ ```
135
+
136
+ ```
137
+ Playful children's book illustration of [scene], bright colours,
138
+ rounded shapes, whimsical mood, hand-drawn style
139
+ ```
140
+
141
+ ### Marketing and Print
142
+
143
+ ```
144
+ Hero image for [business type] website, wide angle shot of [scene],
145
+ golden hour lighting, professional quality, 16:9, inviting mood
146
+ ```
147
+
148
+ ```
149
+ Business card background, abstract geometric pattern, [brand colours],
150
+ subtle texture, professional, minimalist, 3:2
151
+ ```
152
+
153
+ ---
154
+
155
+ ## Iteration Tips
156
+
157
+ When the first result isn't right, refine systematically rather than starting from scratch.
158
+
159
+ **Change one thing at a time.** If the lighting is wrong but the composition is good, adjust only the lighting terms. Changing everything at once makes it impossible to learn what works.
160
+
161
+ **Add specificity to fix vagueness.** Generic subjects produce generic results. "A dog" becomes "a golden retriever puppy sitting on freshly cut grass, looking at camera." Specificity is the most reliable way to improve output.
162
+
163
+ **Use Gemini models for iterative refinement.** Gemini supports multi-turn editing — generate an image, then ask to change specific elements ("make the background warmer," "remove the text," "zoom in on the product"). This is faster than re-prompting from scratch.
164
+
165
+ **Switch to Imagen for final output.** Once the concept is right (via Gemini iteration), re-generate with Imagen Standard or Ultra for maximum fidelity. Think of Gemini as the sketch pad and Imagen as the final print.
166
+
167
+ **Ask the user what to change, not whether they like it.** "What would you change?" gets more useful feedback than "Do you like it?" People find it easier to articulate what's wrong than to rate overall quality.
168
+
169
+ ---
170
+
171
+ ## Imagen-Specific Tips
172
+
173
+ Imagen models have constraints that affect prompt strategy:
174
+
175
+ - **English only** — prompts must be in English, even if the user's conversation is in another language. Translate the intent, then prompt in English.
176
+ - **480-token prompt limit** — be concise. Front-load the most important elements (subject, style) and put modifiers at the end so they get trimmed first if the prompt is too long.
177
+ - **Concrete over abstract** — Imagen produces better results with specific, visual descriptions than with abstract concepts. "A sunrise over a calm ocean" works better than "hope and renewal."
178
+ - **Person generation control** — use `personGeneration: "dont_allow"` when people aren't needed in the image. This avoids potential content policy issues and often produces cleaner results for product/object shots.
179
+
180
+ ---
181
+
182
+ ## Showing the Prompt
183
+
184
+ Show the user the crafted prompt AND call `image_generate` in the same turn. Do not wait for prompt approval before generating — generate immediately, then iterate. This avoids unnecessary round-trips and gives the user a concrete result to react to. Frame it as: "Here's what I'm generating:" followed by the tool call. Iterate after seeing the result.
@@ -0,0 +1,113 @@
1
+ # Style, Format & Visual Vocabulary
2
+
3
+ Use this reference when discussing visual style with the user. Suggest options based on their use case — most users know what they want to feel, not the technical vocabulary to express it.
4
+
5
+ ---
6
+
7
+ ## Style Categories
8
+
9
+ Each style has distinct strengths. Match the style to the purpose, not personal preference.
10
+
11
+ | Style | Character | When to suggest |
12
+ |-------|-----------|-----------------|
13
+ | **Photorealistic** | Lifelike, indistinguishable from a photograph | Product photography, headshots, real estate, food |
14
+ | **Illustration** | Hand-drawn feel, artistic interpretation | Editorial content, children's materials, storytelling |
15
+ | **Digital art** | Polished, vibrant, contemporary | Tech marketing, game assets, social media |
16
+ | **Watercolour** | Soft edges, translucent washes, organic | Invitations, fine art prints, greeting cards |
17
+ | **Oil painting** | Rich textures, visible brushwork, classical depth | Portraits, wall art, premium branding |
18
+ | **Minimalist** | Clean, sparse, essential shapes only | Logos, icons, modern branding |
19
+ | **3D render** | Dimensional, material-accurate, product-focused | Product mockups, architectural visualisation, tech |
20
+ | **Pixel art** | Blocky, nostalgic, 8/16-bit aesthetic | Gaming, retro branding, novelty |
21
+ | **Vector / flat** | Clean lines, solid fills, scalable | Infographics, UI elements, icons, print |
22
+ | **Sketch / line art** | Raw, conceptual, structural | Wireframes, concept art, technical illustration |
23
+ | **Pop art** | Bold outlines, high contrast, graphic | Marketing, social media, event posters |
24
+ | **Anime / manga** | Expressive, character-driven, stylised | Character design, storytelling, youth audience |
25
+ | **Vintage / retro** | Aged, film grain, muted tones, nostalgic | Heritage branding, event themes, editorial |
26
+
27
+ ---
28
+
29
+ ## Aspect Ratios
30
+
31
+ Aspect ratio should match the output medium. Ask the user where the image will be used if they haven't said.
32
+
33
+ | Ratio | Common uses | Notes |
34
+ |-------|-------------|-------|
35
+ | **1:1** | Social media posts, profile pictures, product shots | Universal default |
36
+ | **16:9** | Presentations, hero images, desktop wallpapers, YouTube thumbnails | Standard widescreen |
37
+ | **9:16** | Instagram/TikTok stories, mobile wallpapers, vertical ads | Vertical video format |
38
+ | **3:4 / 4:3** | Portrait/landscape photos, print | Classic photography |
39
+ | **3:2 / 2:3** | Traditional photo format, DSLR native | Gemini only |
40
+ | **21:9** | Ultrawide banners, website headers, cinematic | Gemini only |
41
+
42
+ When a requested ratio isn't available for the chosen model (e.g. 21:9 on Imagen), explain the limitation and suggest the closest alternative.
43
+
44
+ ---
45
+
46
+ ## Colour Vocabulary
47
+
48
+ Use these terms in prompts to steer colour treatment. Combine with style for precision.
49
+
50
+ | Term | Effect |
51
+ |------|--------|
52
+ | **Warm** | Reds, oranges, yellows — cozy, inviting |
53
+ | **Cool** | Blues, greens, purples — calm, professional |
54
+ | **Muted** | Desaturated, subdued — sophisticated, understated |
55
+ | **Vibrant** | Saturated, bold — energetic, attention-grabbing |
56
+ | **Monochrome** | Single hue or black/white — dramatic, editorial |
57
+ | **Pastel** | Soft, light tints — gentle, approachable |
58
+ | **Neon** | Electric, glowing — nightlife, tech, youth |
59
+ | **Earth tones** | Browns, tans, olive — natural, organic |
60
+ | **Jewel tones** | Deep emerald, sapphire, ruby — rich, luxurious |
61
+ | **Neutral** | Greys, whites, beiges — clean, unobtrusive |
62
+
63
+ ---
64
+
65
+ ## Mood Vocabulary
66
+
67
+ Mood shapes the overall emotional impression. It influences lighting, colour, composition, and subject expression.
68
+
69
+ | Mood | Visual impression |
70
+ |------|-------------------|
71
+ | **Dramatic** | High contrast, deep shadows, strong focal point |
72
+ | **Serene** | Soft light, open space, gentle colours |
73
+ | **Energetic** | Bright, dynamic angles, saturated colour |
74
+ | **Mysterious** | Low light, fog, obscured elements, cool tones |
75
+ | **Whimsical** | Playful, unexpected, fantasy elements |
76
+ | **Professional** | Clean, neutral, well-lit, no visual noise |
77
+ | **Playful** | Bright, rounded shapes, warm palette |
78
+ | **Dark** | Low-key lighting, muted palette, heavy shadows |
79
+ | **Ethereal** | Soft focus, glowing light, translucent elements |
80
+ | **Cozy** | Warm tones, soft textures, intimate framing |
81
+ | **Bold** | Strong shapes, high saturation, graphic composition |
82
+
83
+ ---
84
+
85
+ ## Lighting Vocabulary
86
+
87
+ Lighting is the single most impactful prompt element after subject and style. Specific lighting terms produce dramatically different results.
88
+
89
+ | Lighting | Character | Best for |
90
+ |----------|-----------|----------|
91
+ | **Golden hour** | Warm, directional, long shadows | Portraits, landscapes, lifestyle |
92
+ | **Studio** | Even, controlled, professional | Product shots, headshots |
93
+ | **Natural** | Ambient, unprocessed, authentic | Documentary, editorial |
94
+ | **Dramatic / chiaroscuro** | Strong contrast between light and shadow | Portraits, moody scenes |
95
+ | **Flat** | Even, shadowless, clean | Product photography, UI mockups |
96
+ | **Rim-lit** | Edge highlighting, subject separation | Silhouettes, athletic, cinematic |
97
+ | **Backlit** | Light behind subject, halo effect | Ethereal, spiritual, romantic |
98
+ | **Soft / diffused** | Gentle, wrapped light, minimal shadows | Beauty, fashion, food |
99
+ | **Neon** | Coloured artificial light, urban | Night scenes, tech, cyberpunk |
100
+ | **Candlelight** | Warm, flickering, intimate | Cozy scenes, romantic, vintage |
101
+
102
+ ---
103
+
104
+ ## Combining Vocabulary
105
+
106
+ A well-crafted style description combines one element from each category. Examples:
107
+
108
+ - Product shot: "photorealistic, studio lighting, neutral background, professional mood"
109
+ - Social post: "digital art, vibrant colours, energetic mood, flat lighting, 1:1"
110
+ - Business card: "minimalist, monochrome, clean, professional, 3:2"
111
+ - Event poster: "pop art, neon colours, bold mood, dramatic lighting, 9:16"
112
+
113
+ The order matters less than specificity. Vague descriptions produce generic results.