npm - @rubytech/taskmaster - Versions diffs - 1.0.98 → 1.0.100 - Mend

@rubytech/taskmaster 1.0.98 → 1.0.100

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/README.md +1 -1
package/dist/agents/system-prompt.js +2 -1
package/dist/agents/taskmaster-tools.js +6 -0
package/dist/agents/tool-policy.js +2 -0
package/dist/agents/tools/image-generate-api.js +154 -0
package/dist/agents/tools/image-generate-tool.js +145 -0
package/dist/build-info.json +3 -3
package/dist/control-ui/assets/{index-TI7yF6r_.js → index-BiXCzgVk.js} +244 -244
package/dist/control-ui/assets/index-BiXCzgVk.js.map +1 -0
package/dist/control-ui/assets/{index-6WdtDXJj.css → index-Bj8TaDNH.css} +1 -1
package/dist/control-ui/index.html +2 -2
package/dist/gateway/chat-sanitize.js +59 -4
package/dist/gateway/control-ui.js +8 -7
package/dist/gateway/server-methods/files.js +3 -3
package/dist/hooks/bundled/license-request/HOOK.md +47 -0
package/dist/hooks/bundled/license-request/handler.js +192 -0
package/package.json +1 -1
package/scripts/install.sh +2 -2
package/skills/image-gen/SKILL.md +68 -0
package/skills/image-gen/references/models.md +83 -0
package/skills/image-gen/references/prompting.md +184 -0
package/skills/image-gen/references/styles.md +113 -0
package/skills/image-gen/references/troubleshooting.md +93 -0
package/skills/taskmaster/SKILL.md +6 -6
package/taskmaster-docs/USER-GUIDE.md +67 -6
package/templates/beagle/agents/admin/AGENTS.md +2 -2
package/templates/beagle/agents/public/AGENTS.md +2 -2
package/templates/beagle/skills/beagle/SKILL.md +3 -3
package/templates/beagle/skills/beagle/references/booking-schema.md +1 -1
package/templates/beagle/skills/beagle/references/data-compliance.md +2 -2
package/templates/beagle/skills/beagle/references/fee-collection.md +1 -1
package/templates/beagle/skills/beagle/references/workflow.md +2 -2
package/templates/maxy/TOOLS.md +15 -0
package/templates/maxy/agents/admin/AGENTS.md +70 -0
package/templates/maxy/agents/admin/BOOTSTRAP.md +30 -0
package/templates/maxy/agents/admin/HEARTBEAT.md +6 -0
package/templates/maxy/agents/admin/IDENTITY.md +13 -0
package/templates/maxy/agents/admin/SOUL.md +21 -0
package/templates/maxy/agents/admin/TOOLS.md +20 -0
package/templates/maxy/agents/admin/USER.md +17 -0
package/templates/maxy/agents/public/AGENTS.md +72 -0
package/templates/maxy/agents/public/HEARTBEAT.md +2 -0
package/templates/maxy/agents/public/IDENTITY.md +13 -0
package/templates/maxy/agents/public/SOUL.md +60 -0
package/templates/maxy/agents/public/TOOLS.md +20 -0
package/templates/maxy/agents/public/USER.md +17 -0
package/templates/maxy/memory/public/FAQ.md +241 -0
package/templates/maxy/skills/maxy/SKILL.md +55 -0
package/templates/maxy/skills/personal-assistant/SKILL.md +50 -0
package/templates/taskmaster/agents/admin/AGENTS.md +20 -0
package/templates/taskmaster/agents/public/AGENTS.md +9 -0
package/dist/control-ui/assets/index-TI7yF6r_.js.map +0 -1

package/dist/gateway/chat-sanitize.js CHANGED Viewed

@@ -173,6 +173,29 @@ function extractMediaRefs(text) {
     }
     return refs;
 }
+// Pattern: MEDIA:/absolute/path (used by tool results like image_generate)
+const MEDIA_PREFIX_PATTERN = /\bMEDIA:(\S+)/g;
+/**
+ * Parse MEDIA:/path references from text to extract file paths.
+ * Tool results (e.g. image_generate) use this format instead of
+ * [media attached: ...] annotations.
+ */
+function extractMediaPrefixRefs(text) {
+    if (!text.includes("MEDIA:"))
+        return [];
+    const refs = [];
+    let match;
+    MEDIA_PREFIX_PATTERN.lastIndex = 0;
+    while ((match = MEDIA_PREFIX_PATTERN.exec(text)) !== null) {
+        const absPath = match[1]?.trim();
+        if (absPath) {
+            const ext = absPath.split(".").pop()?.toLowerCase() ?? "";
+            const mimeType = ext === "jpg" || ext === "jpeg" ? "image/jpeg" : "image/png";
+            refs.push({ absPath, mimeType });
+        }
+    }
+    return refs;
+}
 function mediaRefToUrl(ref, workspaceRoot) {
     const relPath = nodePath.relative(workspaceRoot, ref.absPath);
     // Must stay within workspace (no ../ escapes)
@@ -254,8 +277,13 @@ function sanitizeMessageMedia(message, workspaceRoot) {
     if (!message || typeof message !== "object")
         return message;
     const entry = message;
-    // Collect media refs from text content (works for both string and array content)
-    const mediaRefs = extractMediaRefsFromMessage(entry);
+    // Collect media refs from text content (works for both string and array content).
+    // MEDIA: prefix refs are only extracted from tool result messages — assistant text
+    // may echo "MEDIA:" but that should not produce a duplicate image block.
+    const role = typeof entry.role === "string" ? entry.role.toLowerCase() : "";
+    const isToolResult = role === "toolresult" || role === "tool_result" ||
+        typeof entry.toolCallId === "string" || typeof entry.tool_call_id === "string";
+    const mediaRefs = extractMediaRefsFromMessage(entry, isToolResult);
     // Build URL-based image blocks from annotations
     const imageBlocks = [];
     for (const ref of mediaRefs) {
@@ -283,6 +311,28 @@ function sanitizeMessageMedia(message, workspaceRoot) {
         }
         return true;
     });
+    // Strip MEDIA:/path text from ALL messages (tool results AND assistant echoes).
+    // This prevents raw file paths from ever showing in the chat UI.
+    for (let i = 0; i < filtered.length; i++) {
+        const block = filtered[i];
+        if (block.type === "text" && typeof block.text === "string" && block.text.includes("MEDIA:")) {
+            const cleaned = block.text
+                .split(/\r?\n/)
+                .filter((line) => !/\bMEDIA:\S+/.test(line))
+                .join("\n")
+                .trim();
+            if (!cleaned) {
+                filtered.splice(i, 1);
+                i--;
+                didChange = true;
+            }
+            else if (cleaned !== block.text) {
+                filtered[i] = { ...block, text: cleaned };
+                didChange = true;
+            }
+        }
+    }
+    // Add URL-based image blocks from tool result annotations
     if (imageBlocks.length > 0) {
         didChange = true;
         filtered.push(...imageBlocks);
@@ -291,9 +341,12 @@ function sanitizeMessageMedia(message, workspaceRoot) {
         return message;
     return { ...entry, content: filtered };
 }
-function extractMediaRefsFromMessage(entry) {
+function extractMediaRefsFromMessage(entry, includeMediaPrefix) {
     if (typeof entry.content === "string") {
-        return extractMediaRefs(entry.content);
+        const refs = extractMediaRefs(entry.content);
+        if (includeMediaPrefix)
+            refs.push(...extractMediaPrefixRefs(entry.content));
+        return refs;
     }
     if (Array.isArray(entry.content)) {
         const refs = [];
@@ -303,6 +356,8 @@ function extractMediaRefsFromMessage(entry) {
             const b = block;
             if (b.type === "text" && typeof b.text === "string") {
                 refs.push(...extractMediaRefs(b.text));
+                if (includeMediaPrefix)
+                    refs.push(...extractMediaPrefixRefs(b.text));
             }
         }
         return refs;

package/dist/gateway/control-ui.js CHANGED Viewed

@@ -474,26 +474,27 @@ export function handlePublicChatHttpRequest(req, res, opts) {
 /** Widget script content — self-contained JS for embedding. */
 const WIDGET_SCRIPT = `(function(){
   "use strict";
-  var cfg={server:"",accountId:""};
+  var cfg={server:"",accountId:"",color:"#1a1a2e"};
   var isOpen=false;
   var btn,overlay,iframe;
   function init(opts){
     if(opts&&opts.server) cfg.server=opts.server.replace(/\\/$/,"");
     if(opts&&opts.accountId) cfg.accountId=opts.accountId;
+    if(opts&&opts.color) cfg.color=opts.color;
     build();
   }
   function build(){
     var css=document.createElement("style");
     css.textContent=[
-      ".tm-widget-btn{position:fixed;bottom:20px;right:20px;width:60px;height:60px;",
-      "border-radius:50%;background:#0078ff;color:#fff;border:none;cursor:pointer;",
-      "box-shadow:0 4px 12px rgba(0,0,0,.25);z-index:999999;font-size:28px;",
+      ".tm-widget-btn{position:fixed;bottom:20px;right:20px;width:48px;height:48px;",
+      "border-radius:50%;background:"+cfg.color+";color:#fff;border:none;cursor:pointer;",
+      "box-shadow:0 2px 8px rgba(0,0,0,.3);z-index:999999;font-size:22px;",
       "display:flex;align-items:center;justify-content:center;transition:transform .2s}",
-      ".tm-widget-btn:hover{transform:scale(1.1)}",
-      ".tm-widget-overlay{position:fixed;bottom:90px;right:20px;width:400px;height:600px;",
-      "max-width:calc(100vw - 40px);max-height:calc(100vh - 110px);",
+      ".tm-widget-btn:hover{transform:scale(1.08)}",
+      ".tm-widget-overlay{position:fixed;bottom:78px;right:20px;width:400px;height:600px;",
+      "max-width:calc(100vw - 40px);max-height:calc(100vh - 98px);",
       "border-radius:12px;overflow:hidden;box-shadow:0 8px 30px rgba(0,0,0,.3);",
       "z-index:999998;display:none;background:#1a1a2e}",
       ".tm-widget-overlay.open{display:block}",

package/dist/gateway/server-methods/files.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import fsp from "node:fs/promises";
 import path from "node:path";
-import { resolveAgentWorkspaceDir, resolveDefaultAgentId } from "../../agents/agent-scope.js";
+import { resolveAgentWorkspaceRoot, resolveDefaultAgentId } from "../../agents/agent-scope.js";
 import { loadConfig } from "../../config/config.js";
 import { ErrorCodes, errorShape } from "../protocol/index.js";
 const MAX_PREVIEW_BYTES = 256 * 1024; // 256 KB for preview
@@ -8,7 +8,7 @@ const MAX_DOWNLOAD_BYTES = 5 * 1024 * 1024; // 5 MB for download
 const MAX_UPLOAD_BYTES = 5 * 1024 * 1024; // 5 MB for upload
 function resolveWorkspaceRoot() {
     const cfg = loadConfig();
-    return resolveAgentWorkspaceDir(cfg, resolveDefaultAgentId(cfg));
+    return resolveAgentWorkspaceRoot(cfg, resolveDefaultAgentId(cfg));
 }
 /**
  * Resolve workspace root from request params.
@@ -20,7 +20,7 @@ function resolveWorkspaceForRequest(params) {
     if (!agentId)
         return resolveWorkspaceRoot();
     const cfg = loadConfig();
-    return resolveAgentWorkspaceDir(cfg, agentId);
+    return resolveAgentWorkspaceRoot(cfg, agentId);
 }
 /**
  * Validate and resolve a relative path within the workspace.

package/dist/hooks/bundled/license-request/HOOK.md ADDED Viewed

@@ -0,0 +1,47 @@
+---
+name: license-request
+description: "Detect device IDs in public agent conversations and dispatch license generation to admin agent"
+homepage: https://docs.taskmaster.bot/hooks#license-request
+metadata:
+  {
+    "taskmaster":
+      {
+        "emoji": "🔑",
+        "events": ["message:inbound"],
+        "requires": { "config": ["workspace.dir"] },
+        "install": [{ "id": "bundled", "kind": "bundled", "label": "Bundled with Taskmaster" }],
+      },
+  }
+---
+# License Request Hook
+Detects when a customer sends a device ID (`tm_dev_...`) to the public agent and automatically dispatches a license generation request to the admin agent.
+## What It Does
+When an inbound message to the public agent contains a device ID:
+1. **Extracts device ID and customer phone** from the message and session key
+2. **Dispatches to admin agent** with a structured license processing instruction
+3. **Admin agent autonomously processes** — checks contact records, generates license if paid, sends to customer
+## Why This Exists
+The public agent cannot generate license keys (security boundary — untrusted input, prompt injection risk). The admin agent has the `license_generate` tool but previously had no way to know when a customer requested a license. This hook bridges that gap.
+## Behaviour
+- Only fires for **public agent DM sessions** (not admin, not groups)
+- Matches `tm_dev_` followed by 10+ hex characters
+- **Deduplicates** — same device ID from same phone within 5 minutes is ignored
+- **Non-blocking** — dispatch is fire-and-forget so the public agent's reply is not delayed
+- Admin agent uses `contact_lookup` → `license_generate` → `message` → `contact_update`
+## Configuration
+No additional configuration required. Disable with:
+```bash
+taskmaster hooks disable license-request
+```

package/dist/hooks/bundled/license-request/handler.js ADDED Viewed

@@ -0,0 +1,192 @@
+/**
+ * License Request Hook Handler
+ *
+ * Detects device IDs (tm_dev_*) in public agent inbound messages and
+ * dispatches a license generation request to the admin agent.
+ *
+ * The admin agent then:
+ * 1. Looks up the customer via contact_lookup
+ * 2. Checks payment status
+ * 3. Generates a license via license_generate (if paid)
+ * 4. Sends the key to the customer via the message tool
+ * 5. Records the issuance via contact_update
+ */
+import { randomUUID } from "node:crypto";
+import { dispatchInboundMessageWithDispatcher } from "../../../auto-reply/dispatch.js";
+import { formatInboundEnvelope, resolveEnvelopeFormatOptions } from "../../../auto-reply/envelope.js";
+import { createReplyPrefixContext } from "../../../channels/reply-prefix.js";
+import { resolveDefaultAgentId } from "../../../agents/agent-scope.js";
+import { resolveAgentIdFromSessionKey } from "../../../routing/session-key.js";
+import { resolveAgentBoundAccountId } from "../../../routing/bindings.js";
+import { buildAgentSessionKey } from "../../../routing/resolve-route.js";
+/** Device ID pattern: tm_dev_ followed by 10+ hex characters. */
+const DEVICE_ID_RE = /\btm_dev_[a-f0-9]{10,}\b/i;
+/**
+ * Dedup cache: Map<"phone:deviceId", timestamp>.
+ * Prevents re-dispatching the same request within the cooldown window.
+ */
+const recentRequests = new Map();
+const DEDUP_COOLDOWN_MS = 5 * 60 * 1000; // 5 minutes
+function isDuplicate(phone, deviceId) {
+    const key = `${phone}:${deviceId}`;
+    const now = Date.now();
+    // Prune stale entries
+    for (const [k, ts] of recentRequests) {
+        if (now - ts > DEDUP_COOLDOWN_MS)
+            recentRequests.delete(k);
+    }
+    const lastSeen = recentRequests.get(key);
+    if (lastSeen && now - lastSeen < DEDUP_COOLDOWN_MS)
+        return true;
+    recentRequests.set(key, now);
+    return false;
+}
+/**
+ * Extract peer phone from a DM session key.
+ *
+ * Formats:
+ * - 4-part: agent:{agentId}:dm:{peer}
+ * - 5-part: agent:{agentId}:{channel}:dm:{peer}
+ */
+function extractPeerFromSessionKey(sessionKey) {
+    const parts = sessionKey.toLowerCase().split(":").filter(Boolean);
+    if (parts[0] !== "agent" || parts.length < 4)
+        return null;
+    if (parts.length >= 4 && parts[2] === "dm")
+        return parts.slice(3).join(":");
+    if (parts.length >= 5 && parts[3] === "dm")
+        return parts.slice(4).join(":");
+    return null;
+}
+/**
+ * Find the admin agent ID from config.
+ * The admin agent is the one marked `default: true`.
+ */
+function findAdminAgentId(cfg) {
+    const agents = cfg.agents?.list ?? [];
+    const admin = agents.find((a) => a.default === true);
+    if (admin?.id)
+        return admin.id;
+    // Fallback: the config's resolved default agent (which is usually admin)
+    const defaultId = resolveDefaultAgentId(cfg);
+    return defaultId || null;
+}
+/**
+ * Dispatch the license request to the admin agent.
+ * Fire-and-forget — errors are logged, not thrown.
+ */
+async function dispatchToAdmin(params) {
+    const { cfg, adminAgentId, customerPhone, deviceId, accountId } = params;
+    // Build a session key for the admin agent scoped to this license request.
+    // Uses the admin's main session so it has full tool access.
+    const sessionKey = buildAgentSessionKey({
+        agentId: adminAgentId,
+        channel: "system",
+        peer: { kind: "dm", id: `license-${customerPhone}` },
+    }).toLowerCase();
+    const instruction = `[System: License Request]\n\n` +
+        `A customer has sent their device ID to activate Taskmaster.\n\n` +
+        `Customer phone: ${customerPhone}\n` +
+        `Device ID: ${deviceId}\n` +
+        `WhatsApp account: ${accountId}\n\n` +
+        `Process this request:\n` +
+        `1. Call contact_lookup with phone "${customerPhone}" to check their record\n` +
+        `2. If the customer has a paid plan (check the "plan_status" field for "paid" or "active"):\n` +
+        `   - Determine expiry: if plan is "lifetime", use 99 years. If plan_expires is set, use that date. Otherwise default 1 year.\n` +
+        `   - Call license_generate with deviceId "${deviceId}" and customerId "${customerPhone}"\n` +
+        `   - Send the license key to ${customerPhone} using the message tool (action: "send", target: "${customerPhone}", accountId: "${accountId}")\n` +
+        `   - Call contact_update to set field "license_key" to the token, "licensed_at" to today, and "device_id" to "${deviceId}"\n` +
+        `3. If no record exists or plan_status is not paid/active:\n` +
+        `   - Do NOT generate a license\n` +
+        `   - Notify the business owner that a license was requested but no paid plan was found for ${customerPhone}\n`;
+    const envelopeOptions = resolveEnvelopeFormatOptions(cfg);
+    const envelope = formatInboundEnvelope({
+        channel: "System",
+        from: "license-hook",
+        timestamp: Date.now(),
+        body: instruction,
+        chatType: "direct",
+        senderLabel: "License Request Hook",
+        envelope: envelopeOptions,
+    });
+    const ctx = {
+        Body: envelope,
+        RawBody: instruction,
+        CommandBody: instruction,
+        From: `license-${customerPhone}`,
+        SessionKey: sessionKey,
+        AccountId: accountId,
+        MessageSid: randomUUID(),
+        ChatType: "direct",
+        CommandAuthorized: false,
+        Provider: "system",
+        Surface: "system",
+        OriginatingChannel: "system",
+        OriginatingTo: customerPhone,
+    };
+    const prefixCtx = createReplyPrefixContext({ cfg, agentId: adminAgentId });
+    await dispatchInboundMessageWithDispatcher({
+        ctx,
+        cfg,
+        dispatcherOptions: {
+            responsePrefix: prefixCtx.responsePrefix,
+            responsePrefixContextProvider: prefixCtx.responsePrefixContextProvider,
+            deliver: async () => {
+                // No-op: the admin agent sends the license via its message tool directly.
+                // We don't relay the admin's conversational reply anywhere.
+            },
+            onError: () => {
+                // Logged internally by the dispatcher
+            },
+        },
+        replyOptions: {
+            onModelSelected: prefixCtx.onModelSelected,
+        },
+    });
+}
+/**
+ * Main hook handler — detects device IDs in public agent inbound messages.
+ */
+const handleLicenseRequest = async (event) => {
+    if (event.type !== "message" || event.action !== "inbound")
+        return;
+    const context = event.context || {};
+    const cfg = context.cfg;
+    const text = context.text;
+    if (!cfg || !text?.trim())
+        return;
+    // Only act on public agent sessions (not admin, not groups)
+    const agentId = resolveAgentIdFromSessionKey(event.sessionKey);
+    const agentConfig = cfg.agents?.list?.find((a) => a.id === agentId);
+    const isAdminAgent = agentConfig?.default === true;
+    if (isAdminAgent)
+        return;
+    // Only DM sessions
+    const customerPhone = extractPeerFromSessionKey(event.sessionKey);
+    if (!customerPhone)
+        return;
+    // Check for device ID
+    const match = text.match(DEVICE_ID_RE);
+    if (!match)
+        return;
+    const deviceId = match[0];
+    // Dedup check
+    if (isDuplicate(customerPhone, deviceId))
+        return;
+    // Find admin agent
+    const adminAgentId = findAdminAgentId(cfg);
+    if (!adminAgentId) {
+        console.warn("[license-request] No admin agent found in config");
+        return;
+    }
+    // Resolve WhatsApp account for delivery
+    const accountId = context.accountId ??
+        resolveAgentBoundAccountId(cfg, agentId, "whatsapp") ??
+        "default";
+    console.log(`[license-request] Device ID detected: ${deviceId} from ${customerPhone}, dispatching to admin agent "${adminAgentId}"`);
+    // Fire and forget — don't block the public agent's reply
+    dispatchToAdmin({ cfg, adminAgentId, customerPhone, deviceId, accountId }).catch((err) => {
+        console.error("[license-request] Failed to dispatch to admin:", err instanceof Error ? err.message : String(err));
+    });
+};
+export default handleLicenseRequest;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@rubytech/taskmaster",
-  "version": "1.0.98",
+  "version": "1.0.100",
   "description": "AI-powered business assistant for small businesses",
   "publishConfig": {
     "access": "public"

package/scripts/install.sh CHANGED Viewed

@@ -4,10 +4,10 @@ set -euo pipefail
 # Taskmaster — one-command install for fresh devices (Pi or Mac).
 #
 # Usage:
-#   curl -fsSL https://taskmaster.bot/install.sh | bash
+#   curl -fsSL https://taskmaster.bot/install.sh | sudo bash
 #
 # With custom port:
-#   curl -fsSL https://taskmaster.bot/install.sh | bash -s -- --port 19000
+#   curl -fsSL https://taskmaster.bot/install.sh | sudo bash -s -- --port 19000
 PORT=""
 for arg in "$@"; do

package/skills/image-gen/SKILL.md ADDED Viewed

@@ -0,0 +1,68 @@
+---
+name: image-gen
+description: Generate images using Google AI models (Gemini, Imagen 4). Guides users through model selection, style choices, and expert prompt crafting.
+metadata: {"taskmaster":{"emoji":"🎨"}}
+---
+# Image Generation
+Generate images from text descriptions using Google AI models. Two model families are available: Gemini (conversational, multi-turn editing) and Imagen 4 (dedicated generation, higher fidelity).
+## When to activate
+- User asks to create, generate, design, draw, or make an image, illustration, logo, photo, graphic, or visual
+- User sends an image and asks to edit, modify, or create a variation
+- User asks about image generation capabilities or model differences
+## Prerequisites
+Requires a Google AI API key. If missing, activate the `google-ai` skill to guide the user through setup first.
+## Critical rule
+Only the `image_generate` tool produces images. Never write file paths, MEDIA: references, or image URLs in text. If you did not call `image_generate`, no image was generated. There is no other mechanism.
+## Quick Model Reference
+| Model | Speed | Best for |
+|-------|-------|----------|
+| gemini-2.5-flash-image | Fast | Quick drafts, iteration, editing existing images |
+| gemini-3-pro-image-preview | Moderate | Text in images, highest Gemini quality, 4K |
+| imagen-4.0-fast-generate-001 | Fast | Rapid photo-realistic output |
+| imagen-4.0-generate-001 | Moderate | Balanced quality and speed |
+| imagen-4.0-ultra-generate-001 | Slow | Maximum fidelity, hero images |
+## References
+| Reference | When to load |
+|-----------|-------------|
+| `references/models.md` | Choosing between models, understanding API differences, resolution or feature questions |
+| `references/styles.md` | Discussing style, aspect ratio, mood, lighting, or colour choices with the user |
+| `references/prompting.md` | Crafting the generation prompt, iterating on results, business use cases |
+| `references/troubleshooting.md` | Any generation error — quota, auth, content policy, model availability |
+Load the relevant reference before proceeding. For a typical generation request: load `prompting.md` to craft the prompt, consult `models.md` if the user has specific quality/speed needs, and check `styles.md` if style discussion is needed. **On any error**, load `troubleshooting.md` immediately — it has browser-assisted resolution steps for every common failure.
+## Workflow
+### Phase 1: Gather (conversation with user)
+1. **Understand intent** — What does the user want? Product shot, social graphic, logo concept, illustration?
+2. **Recommend model** — Match speed/quality needs to a model. Default to Gemini Flash for quick work, Imagen Standard for quality.
+3. **Discuss style** — Ask about style, mood, and aspect ratio if the user hasn't specified. Suggest options based on use case.
+### Phase 2: Generate (MUST call tool)
+4. **Craft prompt and generate in a single turn** — Build an expert prompt using `references/prompting.md`. Show the prompt to the user AND call `image_generate` in the same response. Do not show the prompt without calling the tool. Do not wait for approval of the prompt before generating — generate immediately and iterate after.
+This step is a hard gate. You MUST call `image_generate` here. The conversation cannot continue past this point without a tool call. If you respond with text only, you have failed this step.
+### Phase 3: Deliver (requires tool result)
+5. **Present the result** — The tool result contains the generated image. The user sees it inline. Offer to refine: adjust style, change composition, try a different model, or edit specific elements.
+You cannot reach this step without the tool result from step 4. If `image_generate` was not called, go back to step 4.
+## Error Handling
+If generation fails, **load `references/troubleshooting.md` and follow its guidance**. Common issues (quota exceeded, auth errors, content policy) are all resolvable in-session. Use the browser tool to navigate to Google AI Studio with the user and resolve billing, quota, or key issues collaboratively.

package/skills/image-gen/references/models.md ADDED Viewed

@@ -0,0 +1,83 @@
+# Image Generation Models
+Two API backends, five models. Both use the same Google AI API key.
+---
+## API Backends
+### Gemini Native (generateContent)
+Conversational image generation built into the Gemini chat API. Supports multi-turn editing — you can generate an image, then ask to modify it in follow-up turns. Also supports image-to-image: send an existing image and ask for edits, style transfers, or variations.
+The Gemini backend accepts the same `generateContent` call used for text, with `responseModalities: ["TEXT", "IMAGE"]`. This means image generation is part of a natural conversation flow.
+### Imagen 4 (predict)
+Dedicated image generation API optimized for fidelity. Single-shot generation only — no multi-turn editing. Can produce 1-4 images per request, enabling the user to pick the best result. Higher baseline quality for photo-realistic output.
+---
+## Model Details
+| Model | API | Speed | Max Resolution | Strengths | Limitations |
+|-------|-----|-------|---------------|-----------|-------------|
+| gemini-2.5-flash-image | Gemini | Fast | 2K | Quick iterations, low cost, multi-turn editing, image-to-image | Lower detail than Pro |
+| gemini-3-pro-image-preview | Gemini | Moderate | 4K | Text rendering in images, thinking mode, accepts up to 14 reference images | Preview model, slower |
+| imagen-4.0-fast-generate-001 | Imagen | Fast | 1K | Rapid photo-realistic generation | Lower resolution than Standard |
+| imagen-4.0-generate-001 | Imagen | Moderate | 2K | Balanced quality and speed, good photo-realism | English only, 480 token prompt limit |
+| imagen-4.0-ultra-generate-001 | Imagen | Slow | 2K | Maximum fidelity, finest detail | Slowest, English only |
+---
+## Decision Matrix
+Choose based on the user's actual need, not the "best" model. Speed and iteration matter more than peak fidelity for most business use cases.
+| Use case | Recommended model | Why |
+|----------|-------------------|-----|
+| Quick draft or iteration | gemini-2.5-flash-image | Fastest turnaround, supports editing in follow-up messages |
+| Text in image (menu, sign, infographic) | gemini-3-pro-image-preview | Best text rendering of any model |
+| Photo-realistic product shot | imagen-4.0-generate-001 or ultra | Imagen excels at photo-realism |
+| Social media graphic | gemini-2.5-flash-image or imagen-4.0-fast | Speed matters for social content |
+| Hero image or print material | imagen-4.0-ultra-generate-001 or gemini-3-pro at 4K | Maximum quality for final output |
+| Edit or refine an existing image | gemini-2.5-flash-image | Only Gemini supports multi-turn editing |
+| Multiple options to choose from | Any Imagen model | Imagen can generate 1-4 images per request |
+---
+## Key Differences Between Backends
+**Aspect ratios:**
+- Gemini supports wider ratios including 21:9 (ultrawide banners, website headers)
+- Imagen is limited to 5 aspect ratios: 1:1, 3:4, 4:3, 9:16, 16:9
+**Editing:**
+- Gemini supports multi-turn editing — generate, then refine in conversation
+- Imagen is single-shot only — each request is independent
+**Batch output:**
+- Imagen supports 1-4 images per request (`numberOfImages` parameter)
+- Gemini generates 1 image per request
+**Person generation:**
+- Imagen supports `personGeneration` control ("dont_allow", "allow_adult", "allow_all")
+- Gemini does not have this parameter
+**Language:**
+- Gemini accepts prompts in any language
+- Imagen accepts English only, with a 480-token prompt limit
+**Watermarking:**
+- All models apply SynthID digital watermark to generated images
+---
+## Default Recommendations
+When the user doesn't specify a preference:
+- **Start with Gemini Flash** for exploration and drafting — it's fast, cheap, and the user can iterate conversationally
+- **Switch to Imagen Standard or Ultra** when the user is happy with the concept and wants maximum quality for the final output
+- **Use Gemini Pro** when the image needs readable text (menus, signs, business cards, infographics)
+- **Offer Imagen batch mode** when the user wants options — "I can generate 4 variations for you to pick from"