@rubytech/taskmaster 1.0.98 → 1.0.100

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +1 -1
  2. package/dist/agents/system-prompt.js +2 -1
  3. package/dist/agents/taskmaster-tools.js +6 -0
  4. package/dist/agents/tool-policy.js +2 -0
  5. package/dist/agents/tools/image-generate-api.js +154 -0
  6. package/dist/agents/tools/image-generate-tool.js +145 -0
  7. package/dist/build-info.json +3 -3
  8. package/dist/control-ui/assets/{index-TI7yF6r_.js → index-BiXCzgVk.js} +244 -244
  9. package/dist/control-ui/assets/index-BiXCzgVk.js.map +1 -0
  10. package/dist/control-ui/assets/{index-6WdtDXJj.css → index-Bj8TaDNH.css} +1 -1
  11. package/dist/control-ui/index.html +2 -2
  12. package/dist/gateway/chat-sanitize.js +59 -4
  13. package/dist/gateway/control-ui.js +8 -7
  14. package/dist/gateway/server-methods/files.js +3 -3
  15. package/dist/hooks/bundled/license-request/HOOK.md +47 -0
  16. package/dist/hooks/bundled/license-request/handler.js +192 -0
  17. package/package.json +1 -1
  18. package/scripts/install.sh +2 -2
  19. package/skills/image-gen/SKILL.md +68 -0
  20. package/skills/image-gen/references/models.md +83 -0
  21. package/skills/image-gen/references/prompting.md +184 -0
  22. package/skills/image-gen/references/styles.md +113 -0
  23. package/skills/image-gen/references/troubleshooting.md +93 -0
  24. package/skills/taskmaster/SKILL.md +6 -6
  25. package/taskmaster-docs/USER-GUIDE.md +67 -6
  26. package/templates/beagle/agents/admin/AGENTS.md +2 -2
  27. package/templates/beagle/agents/public/AGENTS.md +2 -2
  28. package/templates/beagle/skills/beagle/SKILL.md +3 -3
  29. package/templates/beagle/skills/beagle/references/booking-schema.md +1 -1
  30. package/templates/beagle/skills/beagle/references/data-compliance.md +2 -2
  31. package/templates/beagle/skills/beagle/references/fee-collection.md +1 -1
  32. package/templates/beagle/skills/beagle/references/workflow.md +2 -2
  33. package/templates/maxy/TOOLS.md +15 -0
  34. package/templates/maxy/agents/admin/AGENTS.md +70 -0
  35. package/templates/maxy/agents/admin/BOOTSTRAP.md +30 -0
  36. package/templates/maxy/agents/admin/HEARTBEAT.md +6 -0
  37. package/templates/maxy/agents/admin/IDENTITY.md +13 -0
  38. package/templates/maxy/agents/admin/SOUL.md +21 -0
  39. package/templates/maxy/agents/admin/TOOLS.md +20 -0
  40. package/templates/maxy/agents/admin/USER.md +17 -0
  41. package/templates/maxy/agents/public/AGENTS.md +72 -0
  42. package/templates/maxy/agents/public/HEARTBEAT.md +2 -0
  43. package/templates/maxy/agents/public/IDENTITY.md +13 -0
  44. package/templates/maxy/agents/public/SOUL.md +60 -0
  45. package/templates/maxy/agents/public/TOOLS.md +20 -0
  46. package/templates/maxy/agents/public/USER.md +17 -0
  47. package/templates/maxy/memory/public/FAQ.md +241 -0
  48. package/templates/maxy/skills/maxy/SKILL.md +55 -0
  49. package/templates/maxy/skills/personal-assistant/SKILL.md +50 -0
  50. package/templates/taskmaster/agents/admin/AGENTS.md +20 -0
  51. package/templates/taskmaster/agents/public/AGENTS.md +9 -0
  52. package/dist/control-ui/assets/index-TI7yF6r_.js.map +0 -1
@@ -173,6 +173,29 @@ function extractMediaRefs(text) {
173
173
  }
174
174
  return refs;
175
175
  }
176
+ // Pattern: MEDIA:/absolute/path (used by tool results like image_generate)
177
+ const MEDIA_PREFIX_PATTERN = /\bMEDIA:(\S+)/g;
178
+ /**
179
+ * Parse MEDIA:/path references from text to extract file paths.
180
+ * Tool results (e.g. image_generate) use this format instead of
181
+ * [media attached: ...] annotations.
182
+ */
183
+ function extractMediaPrefixRefs(text) {
184
+ if (!text.includes("MEDIA:"))
185
+ return [];
186
+ const refs = [];
187
+ let match;
188
+ MEDIA_PREFIX_PATTERN.lastIndex = 0;
189
+ while ((match = MEDIA_PREFIX_PATTERN.exec(text)) !== null) {
190
+ const absPath = match[1]?.trim();
191
+ if (absPath) {
192
+ const ext = absPath.split(".").pop()?.toLowerCase() ?? "";
193
+ const mimeType = ext === "jpg" || ext === "jpeg" ? "image/jpeg" : "image/png";
194
+ refs.push({ absPath, mimeType });
195
+ }
196
+ }
197
+ return refs;
198
+ }
176
199
  function mediaRefToUrl(ref, workspaceRoot) {
177
200
  const relPath = nodePath.relative(workspaceRoot, ref.absPath);
178
201
  // Must stay within workspace (no ../ escapes)
@@ -254,8 +277,13 @@ function sanitizeMessageMedia(message, workspaceRoot) {
254
277
  if (!message || typeof message !== "object")
255
278
  return message;
256
279
  const entry = message;
257
- // Collect media refs from text content (works for both string and array content)
258
- const mediaRefs = extractMediaRefsFromMessage(entry);
280
+ // Collect media refs from text content (works for both string and array content).
281
+ // MEDIA: prefix refs are only extracted from tool result messages — assistant text
282
+ // may echo "MEDIA:" but that should not produce a duplicate image block.
283
+ const role = typeof entry.role === "string" ? entry.role.toLowerCase() : "";
284
+ const isToolResult = role === "toolresult" || role === "tool_result" ||
285
+ typeof entry.toolCallId === "string" || typeof entry.tool_call_id === "string";
286
+ const mediaRefs = extractMediaRefsFromMessage(entry, isToolResult);
259
287
  // Build URL-based image blocks from annotations
260
288
  const imageBlocks = [];
261
289
  for (const ref of mediaRefs) {
@@ -283,6 +311,28 @@ function sanitizeMessageMedia(message, workspaceRoot) {
283
311
  }
284
312
  return true;
285
313
  });
314
+ // Strip MEDIA:/path text from ALL messages (tool results AND assistant echoes).
315
+ // This prevents raw file paths from ever showing in the chat UI.
316
+ for (let i = 0; i < filtered.length; i++) {
317
+ const block = filtered[i];
318
+ if (block.type === "text" && typeof block.text === "string" && block.text.includes("MEDIA:")) {
319
+ const cleaned = block.text
320
+ .split(/\r?\n/)
321
+ .filter((line) => !/\bMEDIA:\S+/.test(line))
322
+ .join("\n")
323
+ .trim();
324
+ if (!cleaned) {
325
+ filtered.splice(i, 1);
326
+ i--;
327
+ didChange = true;
328
+ }
329
+ else if (cleaned !== block.text) {
330
+ filtered[i] = { ...block, text: cleaned };
331
+ didChange = true;
332
+ }
333
+ }
334
+ }
335
+ // Add URL-based image blocks from tool result annotations
286
336
  if (imageBlocks.length > 0) {
287
337
  didChange = true;
288
338
  filtered.push(...imageBlocks);
@@ -291,9 +341,12 @@ function sanitizeMessageMedia(message, workspaceRoot) {
291
341
  return message;
292
342
  return { ...entry, content: filtered };
293
343
  }
294
- function extractMediaRefsFromMessage(entry) {
344
+ function extractMediaRefsFromMessage(entry, includeMediaPrefix) {
295
345
  if (typeof entry.content === "string") {
296
- return extractMediaRefs(entry.content);
346
+ const refs = extractMediaRefs(entry.content);
347
+ if (includeMediaPrefix)
348
+ refs.push(...extractMediaPrefixRefs(entry.content));
349
+ return refs;
297
350
  }
298
351
  if (Array.isArray(entry.content)) {
299
352
  const refs = [];
@@ -303,6 +356,8 @@ function extractMediaRefsFromMessage(entry) {
303
356
  const b = block;
304
357
  if (b.type === "text" && typeof b.text === "string") {
305
358
  refs.push(...extractMediaRefs(b.text));
359
+ if (includeMediaPrefix)
360
+ refs.push(...extractMediaPrefixRefs(b.text));
306
361
  }
307
362
  }
308
363
  return refs;
@@ -474,26 +474,27 @@ export function handlePublicChatHttpRequest(req, res, opts) {
474
474
  /** Widget script content — self-contained JS for embedding. */
475
475
  const WIDGET_SCRIPT = `(function(){
476
476
  "use strict";
477
- var cfg={server:"",accountId:""};
477
+ var cfg={server:"",accountId:"",color:"#1a1a2e"};
478
478
  var isOpen=false;
479
479
  var btn,overlay,iframe;
480
480
 
481
481
  function init(opts){
482
482
  if(opts&&opts.server) cfg.server=opts.server.replace(/\\/$/,"");
483
483
  if(opts&&opts.accountId) cfg.accountId=opts.accountId;
484
+ if(opts&&opts.color) cfg.color=opts.color;
484
485
  build();
485
486
  }
486
487
 
487
488
  function build(){
488
489
  var css=document.createElement("style");
489
490
  css.textContent=[
490
- ".tm-widget-btn{position:fixed;bottom:20px;right:20px;width:60px;height:60px;",
491
- "border-radius:50%;background:#0078ff;color:#fff;border:none;cursor:pointer;",
492
- "box-shadow:0 4px 12px rgba(0,0,0,.25);z-index:999999;font-size:28px;",
491
+ ".tm-widget-btn{position:fixed;bottom:20px;right:20px;width:48px;height:48px;",
492
+ "border-radius:50%;background:"+cfg.color+";color:#fff;border:none;cursor:pointer;",
493
+ "box-shadow:0 2px 8px rgba(0,0,0,.3);z-index:999999;font-size:22px;",
493
494
  "display:flex;align-items:center;justify-content:center;transition:transform .2s}",
494
- ".tm-widget-btn:hover{transform:scale(1.1)}",
495
- ".tm-widget-overlay{position:fixed;bottom:90px;right:20px;width:400px;height:600px;",
496
- "max-width:calc(100vw - 40px);max-height:calc(100vh - 110px);",
495
+ ".tm-widget-btn:hover{transform:scale(1.08)}",
496
+ ".tm-widget-overlay{position:fixed;bottom:78px;right:20px;width:400px;height:600px;",
497
+ "max-width:calc(100vw - 40px);max-height:calc(100vh - 98px);",
497
498
  "border-radius:12px;overflow:hidden;box-shadow:0 8px 30px rgba(0,0,0,.3);",
498
499
  "z-index:999998;display:none;background:#1a1a2e}",
499
500
  ".tm-widget-overlay.open{display:block}",
@@ -1,6 +1,6 @@
1
1
  import fsp from "node:fs/promises";
2
2
  import path from "node:path";
3
- import { resolveAgentWorkspaceDir, resolveDefaultAgentId } from "../../agents/agent-scope.js";
3
+ import { resolveAgentWorkspaceRoot, resolveDefaultAgentId } from "../../agents/agent-scope.js";
4
4
  import { loadConfig } from "../../config/config.js";
5
5
  import { ErrorCodes, errorShape } from "../protocol/index.js";
6
6
  const MAX_PREVIEW_BYTES = 256 * 1024; // 256 KB for preview
@@ -8,7 +8,7 @@ const MAX_DOWNLOAD_BYTES = 5 * 1024 * 1024; // 5 MB for download
8
8
  const MAX_UPLOAD_BYTES = 5 * 1024 * 1024; // 5 MB for upload
9
9
  function resolveWorkspaceRoot() {
10
10
  const cfg = loadConfig();
11
- return resolveAgentWorkspaceDir(cfg, resolveDefaultAgentId(cfg));
11
+ return resolveAgentWorkspaceRoot(cfg, resolveDefaultAgentId(cfg));
12
12
  }
13
13
  /**
14
14
  * Resolve workspace root from request params.
@@ -20,7 +20,7 @@ function resolveWorkspaceForRequest(params) {
20
20
  if (!agentId)
21
21
  return resolveWorkspaceRoot();
22
22
  const cfg = loadConfig();
23
- return resolveAgentWorkspaceDir(cfg, agentId);
23
+ return resolveAgentWorkspaceRoot(cfg, agentId);
24
24
  }
25
25
  /**
26
26
  * Validate and resolve a relative path within the workspace.
@@ -0,0 +1,47 @@
1
+ ---
2
+ name: license-request
3
+ description: "Detect device IDs in public agent conversations and dispatch license generation to admin agent"
4
+ homepage: https://docs.taskmaster.bot/hooks#license-request
5
+ metadata:
6
+ {
7
+ "taskmaster":
8
+ {
9
+ "emoji": "🔑",
10
+ "events": ["message:inbound"],
11
+ "requires": { "config": ["workspace.dir"] },
12
+ "install": [{ "id": "bundled", "kind": "bundled", "label": "Bundled with Taskmaster" }],
13
+ },
14
+ }
15
+ ---
16
+
17
+ # License Request Hook
18
+
19
+ Detects when a customer sends a device ID (`tm_dev_...`) to the public agent and automatically dispatches a license generation request to the admin agent.
20
+
21
+ ## What It Does
22
+
23
+ When an inbound message to the public agent contains a device ID:
24
+
25
+ 1. **Extracts device ID and customer phone** from the message and session key
26
+ 2. **Dispatches to admin agent** with a structured license processing instruction
27
+ 3. **Admin agent autonomously processes** — checks contact records, generates license if paid, sends to customer
28
+
29
+ ## Why This Exists
30
+
31
+ The public agent cannot generate license keys (security boundary — untrusted input, prompt injection risk). The admin agent has the `license_generate` tool but previously had no way to know when a customer requested a license. This hook bridges that gap.
32
+
33
+ ## Behaviour
34
+
35
+ - Only fires for **public agent DM sessions** (not admin, not groups)
36
+ - Matches `tm_dev_` followed by 10+ hex characters
37
+ - **Deduplicates** — same device ID from same phone within 5 minutes is ignored
38
+ - **Non-blocking** — dispatch is fire-and-forget so the public agent's reply is not delayed
39
+ - Admin agent uses `contact_lookup` → `license_generate` → `message` → `contact_update`
40
+
41
+ ## Configuration
42
+
43
+ No additional configuration required. Disable with:
44
+
45
+ ```bash
46
+ taskmaster hooks disable license-request
47
+ ```
@@ -0,0 +1,192 @@
1
+ /**
2
+ * License Request Hook Handler
3
+ *
4
+ * Detects device IDs (tm_dev_*) in public agent inbound messages and
5
+ * dispatches a license generation request to the admin agent.
6
+ *
7
+ * The admin agent then:
8
+ * 1. Looks up the customer via contact_lookup
9
+ * 2. Checks payment status
10
+ * 3. Generates a license via license_generate (if paid)
11
+ * 4. Sends the key to the customer via the message tool
12
+ * 5. Records the issuance via contact_update
13
+ */
14
+ import { randomUUID } from "node:crypto";
15
+ import { dispatchInboundMessageWithDispatcher } from "../../../auto-reply/dispatch.js";
16
+ import { formatInboundEnvelope, resolveEnvelopeFormatOptions } from "../../../auto-reply/envelope.js";
17
+ import { createReplyPrefixContext } from "../../../channels/reply-prefix.js";
18
+ import { resolveDefaultAgentId } from "../../../agents/agent-scope.js";
19
+ import { resolveAgentIdFromSessionKey } from "../../../routing/session-key.js";
20
+ import { resolveAgentBoundAccountId } from "../../../routing/bindings.js";
21
+ import { buildAgentSessionKey } from "../../../routing/resolve-route.js";
22
+ /** Device ID pattern: tm_dev_ followed by 10+ hex characters. */
23
+ const DEVICE_ID_RE = /\btm_dev_[a-f0-9]{10,}\b/i;
24
+ /**
25
+ * Dedup cache: Map<"phone:deviceId", timestamp>.
26
+ * Prevents re-dispatching the same request within the cooldown window.
27
+ */
28
+ const recentRequests = new Map();
29
+ const DEDUP_COOLDOWN_MS = 5 * 60 * 1000; // 5 minutes
30
+ function isDuplicate(phone, deviceId) {
31
+ const key = `${phone}:${deviceId}`;
32
+ const now = Date.now();
33
+ // Prune stale entries
34
+ for (const [k, ts] of recentRequests) {
35
+ if (now - ts > DEDUP_COOLDOWN_MS)
36
+ recentRequests.delete(k);
37
+ }
38
+ const lastSeen = recentRequests.get(key);
39
+ if (lastSeen && now - lastSeen < DEDUP_COOLDOWN_MS)
40
+ return true;
41
+ recentRequests.set(key, now);
42
+ return false;
43
+ }
44
+ /**
45
+ * Extract peer phone from a DM session key.
46
+ *
47
+ * Formats:
48
+ * - 4-part: agent:{agentId}:dm:{peer}
49
+ * - 5-part: agent:{agentId}:{channel}:dm:{peer}
50
+ */
51
+ function extractPeerFromSessionKey(sessionKey) {
52
+ const parts = sessionKey.toLowerCase().split(":").filter(Boolean);
53
+ if (parts[0] !== "agent" || parts.length < 4)
54
+ return null;
55
+ if (parts.length >= 4 && parts[2] === "dm")
56
+ return parts.slice(3).join(":");
57
+ if (parts.length >= 5 && parts[3] === "dm")
58
+ return parts.slice(4).join(":");
59
+ return null;
60
+ }
61
+ /**
62
+ * Find the admin agent ID from config.
63
+ * The admin agent is the one marked `default: true`.
64
+ */
65
+ function findAdminAgentId(cfg) {
66
+ const agents = cfg.agents?.list ?? [];
67
+ const admin = agents.find((a) => a.default === true);
68
+ if (admin?.id)
69
+ return admin.id;
70
+ // Fallback: the config's resolved default agent (which is usually admin)
71
+ const defaultId = resolveDefaultAgentId(cfg);
72
+ return defaultId || null;
73
+ }
74
+ /**
75
+ * Dispatch the license request to the admin agent.
76
+ * Fire-and-forget — errors are logged, not thrown.
77
+ */
78
+ async function dispatchToAdmin(params) {
79
+ const { cfg, adminAgentId, customerPhone, deviceId, accountId } = params;
80
+ // Build a session key for the admin agent scoped to this license request.
81
+ // Uses the admin's main session so it has full tool access.
82
+ const sessionKey = buildAgentSessionKey({
83
+ agentId: adminAgentId,
84
+ channel: "system",
85
+ peer: { kind: "dm", id: `license-${customerPhone}` },
86
+ }).toLowerCase();
87
+ const instruction = `[System: License Request]\n\n` +
88
+ `A customer has sent their device ID to activate Taskmaster.\n\n` +
89
+ `Customer phone: ${customerPhone}\n` +
90
+ `Device ID: ${deviceId}\n` +
91
+ `WhatsApp account: ${accountId}\n\n` +
92
+ `Process this request:\n` +
93
+ `1. Call contact_lookup with phone "${customerPhone}" to check their record\n` +
94
+ `2. If the customer has a paid plan (check the "plan_status" field for "paid" or "active"):\n` +
95
+ ` - Determine expiry: if plan is "lifetime", use 99 years. If plan_expires is set, use that date. Otherwise default 1 year.\n` +
96
+ ` - Call license_generate with deviceId "${deviceId}" and customerId "${customerPhone}"\n` +
97
+ ` - Send the license key to ${customerPhone} using the message tool (action: "send", target: "${customerPhone}", accountId: "${accountId}")\n` +
98
+ ` - Call contact_update to set field "license_key" to the token, "licensed_at" to today, and "device_id" to "${deviceId}"\n` +
99
+ `3. If no record exists or plan_status is not paid/active:\n` +
100
+ ` - Do NOT generate a license\n` +
101
+ ` - Notify the business owner that a license was requested but no paid plan was found for ${customerPhone}\n`;
102
+ const envelopeOptions = resolveEnvelopeFormatOptions(cfg);
103
+ const envelope = formatInboundEnvelope({
104
+ channel: "System",
105
+ from: "license-hook",
106
+ timestamp: Date.now(),
107
+ body: instruction,
108
+ chatType: "direct",
109
+ senderLabel: "License Request Hook",
110
+ envelope: envelopeOptions,
111
+ });
112
+ const ctx = {
113
+ Body: envelope,
114
+ RawBody: instruction,
115
+ CommandBody: instruction,
116
+ From: `license-${customerPhone}`,
117
+ SessionKey: sessionKey,
118
+ AccountId: accountId,
119
+ MessageSid: randomUUID(),
120
+ ChatType: "direct",
121
+ CommandAuthorized: false,
122
+ Provider: "system",
123
+ Surface: "system",
124
+ OriginatingChannel: "system",
125
+ OriginatingTo: customerPhone,
126
+ };
127
+ const prefixCtx = createReplyPrefixContext({ cfg, agentId: adminAgentId });
128
+ await dispatchInboundMessageWithDispatcher({
129
+ ctx,
130
+ cfg,
131
+ dispatcherOptions: {
132
+ responsePrefix: prefixCtx.responsePrefix,
133
+ responsePrefixContextProvider: prefixCtx.responsePrefixContextProvider,
134
+ deliver: async () => {
135
+ // No-op: the admin agent sends the license via its message tool directly.
136
+ // We don't relay the admin's conversational reply anywhere.
137
+ },
138
+ onError: () => {
139
+ // Logged internally by the dispatcher
140
+ },
141
+ },
142
+ replyOptions: {
143
+ onModelSelected: prefixCtx.onModelSelected,
144
+ },
145
+ });
146
+ }
147
+ /**
148
+ * Main hook handler — detects device IDs in public agent inbound messages.
149
+ */
150
+ const handleLicenseRequest = async (event) => {
151
+ if (event.type !== "message" || event.action !== "inbound")
152
+ return;
153
+ const context = event.context || {};
154
+ const cfg = context.cfg;
155
+ const text = context.text;
156
+ if (!cfg || !text?.trim())
157
+ return;
158
+ // Only act on public agent sessions (not admin, not groups)
159
+ const agentId = resolveAgentIdFromSessionKey(event.sessionKey);
160
+ const agentConfig = cfg.agents?.list?.find((a) => a.id === agentId);
161
+ const isAdminAgent = agentConfig?.default === true;
162
+ if (isAdminAgent)
163
+ return;
164
+ // Only DM sessions
165
+ const customerPhone = extractPeerFromSessionKey(event.sessionKey);
166
+ if (!customerPhone)
167
+ return;
168
+ // Check for device ID
169
+ const match = text.match(DEVICE_ID_RE);
170
+ if (!match)
171
+ return;
172
+ const deviceId = match[0];
173
+ // Dedup check
174
+ if (isDuplicate(customerPhone, deviceId))
175
+ return;
176
+ // Find admin agent
177
+ const adminAgentId = findAdminAgentId(cfg);
178
+ if (!adminAgentId) {
179
+ console.warn("[license-request] No admin agent found in config");
180
+ return;
181
+ }
182
+ // Resolve WhatsApp account for delivery
183
+ const accountId = context.accountId ??
184
+ resolveAgentBoundAccountId(cfg, agentId, "whatsapp") ??
185
+ "default";
186
+ console.log(`[license-request] Device ID detected: ${deviceId} from ${customerPhone}, dispatching to admin agent "${adminAgentId}"`);
187
+ // Fire and forget — don't block the public agent's reply
188
+ dispatchToAdmin({ cfg, adminAgentId, customerPhone, deviceId, accountId }).catch((err) => {
189
+ console.error("[license-request] Failed to dispatch to admin:", err instanceof Error ? err.message : String(err));
190
+ });
191
+ };
192
+ export default handleLicenseRequest;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rubytech/taskmaster",
3
- "version": "1.0.98",
3
+ "version": "1.0.100",
4
4
  "description": "AI-powered business assistant for small businesses",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -4,10 +4,10 @@ set -euo pipefail
4
4
  # Taskmaster — one-command install for fresh devices (Pi or Mac).
5
5
  #
6
6
  # Usage:
7
- # curl -fsSL https://taskmaster.bot/install.sh | bash
7
+ # curl -fsSL https://taskmaster.bot/install.sh | sudo bash
8
8
  #
9
9
  # With custom port:
10
- # curl -fsSL https://taskmaster.bot/install.sh | bash -s -- --port 19000
10
+ # curl -fsSL https://taskmaster.bot/install.sh | sudo bash -s -- --port 19000
11
11
 
12
12
  PORT=""
13
13
  for arg in "$@"; do
@@ -0,0 +1,68 @@
1
+ ---
2
+ name: image-gen
3
+ description: Generate images using Google AI models (Gemini, Imagen 4). Guides users through model selection, style choices, and expert prompt crafting.
4
+ metadata: {"taskmaster":{"emoji":"🎨"}}
5
+ ---
6
+
7
+ # Image Generation
8
+
9
+ Generate images from text descriptions using Google AI models. Two model families are available: Gemini (conversational, multi-turn editing) and Imagen 4 (dedicated generation, higher fidelity).
10
+
11
+ ## When to activate
12
+
13
+ - User asks to create, generate, design, draw, or make an image, illustration, logo, photo, graphic, or visual
14
+ - User sends an image and asks to edit, modify, or create a variation
15
+ - User asks about image generation capabilities or model differences
16
+
17
+ ## Prerequisites
18
+
19
+ Requires a Google AI API key. If missing, activate the `google-ai` skill to guide the user through setup first.
20
+
21
+ ## Critical rule
22
+
23
+ Only the `image_generate` tool produces images. Never write file paths, MEDIA: references, or image URLs in text. If you did not call `image_generate`, no image was generated. There is no other mechanism.
24
+
25
+ ## Quick Model Reference
26
+
27
+ | Model | Speed | Best for |
28
+ |-------|-------|----------|
29
+ | gemini-2.5-flash-image | Fast | Quick drafts, iteration, editing existing images |
30
+ | gemini-3-pro-image-preview | Moderate | Text in images, highest Gemini quality, 4K |
31
+ | imagen-4.0-fast-generate-001 | Fast | Rapid photo-realistic output |
32
+ | imagen-4.0-generate-001 | Moderate | Balanced quality and speed |
33
+ | imagen-4.0-ultra-generate-001 | Slow | Maximum fidelity, hero images |
34
+
35
+ ## References
36
+
37
+ | Reference | When to load |
38
+ |-----------|-------------|
39
+ | `references/models.md` | Choosing between models, understanding API differences, resolution or feature questions |
40
+ | `references/styles.md` | Discussing style, aspect ratio, mood, lighting, or colour choices with the user |
41
+ | `references/prompting.md` | Crafting the generation prompt, iterating on results, business use cases |
42
+ | `references/troubleshooting.md` | Any generation error — quota, auth, content policy, model availability |
43
+
44
+ Load the relevant reference before proceeding. For a typical generation request: load `prompting.md` to craft the prompt, consult `models.md` if the user has specific quality/speed needs, and check `styles.md` if style discussion is needed. **On any error**, load `troubleshooting.md` immediately — it has browser-assisted resolution steps for every common failure.
45
+
46
+ ## Workflow
47
+
48
+ ### Phase 1: Gather (conversation with user)
49
+
50
+ 1. **Understand intent** — What does the user want? Product shot, social graphic, logo concept, illustration?
51
+ 2. **Recommend model** — Match speed/quality needs to a model. Default to Gemini Flash for quick work, Imagen Standard for quality.
52
+ 3. **Discuss style** — Ask about style, mood, and aspect ratio if the user hasn't specified. Suggest options based on use case.
53
+
54
+ ### Phase 2: Generate (MUST call tool)
55
+
56
+ 4. **Craft prompt and generate in a single turn** — Build an expert prompt using `references/prompting.md`. Show the prompt to the user AND call `image_generate` in the same response. Do not show the prompt without calling the tool. Do not wait for approval of the prompt before generating — generate immediately and iterate after.
57
+
58
+ This step is a hard gate. You MUST call `image_generate` here. The conversation cannot continue past this point without a tool call. If you respond with text only, you have failed this step.
59
+
60
+ ### Phase 3: Deliver (requires tool result)
61
+
62
+ 5. **Present the result** — The tool result contains the generated image. The user sees it inline. Offer to refine: adjust style, change composition, try a different model, or edit specific elements.
63
+
64
+ You cannot reach this step without the tool result from step 4. If `image_generate` was not called, go back to step 4.
65
+
66
+ ## Error Handling
67
+
68
+ If generation fails, **load `references/troubleshooting.md` and follow its guidance**. Common issues (quota exceeded, auth errors, content policy) are all resolvable in-session. Use the browser tool to navigate to Google AI Studio with the user and resolve billing, quota, or key issues collaboratively.
@@ -0,0 +1,83 @@
1
+ # Image Generation Models
2
+
3
+ Two API backends, five models. Both use the same Google AI API key.
4
+
5
+ ---
6
+
7
+ ## API Backends
8
+
9
+ ### Gemini Native (generateContent)
10
+
11
+ Conversational image generation built into the Gemini chat API. Supports multi-turn editing — you can generate an image, then ask to modify it in follow-up turns. Also supports image-to-image: send an existing image and ask for edits, style transfers, or variations.
12
+
13
+ The Gemini backend accepts the same `generateContent` call used for text, with `responseModalities: ["TEXT", "IMAGE"]`. This means image generation is part of a natural conversation flow.
14
+
15
+ ### Imagen 4 (predict)
16
+
17
+ Dedicated image generation API optimized for fidelity. Single-shot generation only — no multi-turn editing. Can produce 1-4 images per request, enabling the user to pick the best result. Higher baseline quality for photo-realistic output.
18
+
19
+ ---
20
+
21
+ ## Model Details
22
+
23
+ | Model | API | Speed | Max Resolution | Strengths | Limitations |
24
+ |-------|-----|-------|---------------|-----------|-------------|
25
+ | gemini-2.5-flash-image | Gemini | Fast | 2K | Quick iterations, low cost, multi-turn editing, image-to-image | Lower detail than Pro |
26
+ | gemini-3-pro-image-preview | Gemini | Moderate | 4K | Text rendering in images, thinking mode, accepts up to 14 reference images | Preview model, slower |
27
+ | imagen-4.0-fast-generate-001 | Imagen | Fast | 1K | Rapid photo-realistic generation | Lower resolution than Standard |
28
+ | imagen-4.0-generate-001 | Imagen | Moderate | 2K | Balanced quality and speed, good photo-realism | English only, 480 token prompt limit |
29
+ | imagen-4.0-ultra-generate-001 | Imagen | Slow | 2K | Maximum fidelity, finest detail | Slowest, English only |
30
+
31
+ ---
32
+
33
+ ## Decision Matrix
34
+
35
+ Choose based on the user's actual need, not the "best" model. Speed and iteration matter more than peak fidelity for most business use cases.
36
+
37
+ | Use case | Recommended model | Why |
38
+ |----------|-------------------|-----|
39
+ | Quick draft or iteration | gemini-2.5-flash-image | Fastest turnaround, supports editing in follow-up messages |
40
+ | Text in image (menu, sign, infographic) | gemini-3-pro-image-preview | Best text rendering of any model |
41
+ | Photo-realistic product shot | imagen-4.0-generate-001 or ultra | Imagen excels at photo-realism |
42
+ | Social media graphic | gemini-2.5-flash-image or imagen-4.0-fast | Speed matters for social content |
43
+ | Hero image or print material | imagen-4.0-ultra-generate-001 or gemini-3-pro at 4K | Maximum quality for final output |
44
+ | Edit or refine an existing image | gemini-2.5-flash-image | Only Gemini supports multi-turn editing |
45
+ | Multiple options to choose from | Any Imagen model | Imagen can generate 1-4 images per request |
46
+
47
+ ---
48
+
49
+ ## Key Differences Between Backends
50
+
51
+ **Aspect ratios:**
52
+ - Gemini supports wider ratios including 21:9 (ultrawide banners, website headers)
53
+ - Imagen is limited to 5 aspect ratios: 1:1, 3:4, 4:3, 9:16, 16:9
54
+
55
+ **Editing:**
56
+ - Gemini supports multi-turn editing — generate, then refine in conversation
57
+ - Imagen is single-shot only — each request is independent
58
+
59
+ **Batch output:**
60
+ - Imagen supports 1-4 images per request (`numberOfImages` parameter)
61
+ - Gemini generates 1 image per request
62
+
63
+ **Person generation:**
64
+ - Imagen supports `personGeneration` control ("dont_allow", "allow_adult", "allow_all")
65
+ - Gemini does not have this parameter
66
+
67
+ **Language:**
68
+ - Gemini accepts prompts in any language
69
+ - Imagen accepts English only, with a 480-token prompt limit
70
+
71
+ **Watermarking:**
72
+ - All models apply SynthID digital watermark to generated images
73
+
74
+ ---
75
+
76
+ ## Default Recommendations
77
+
78
+ When the user doesn't specify a preference:
79
+
80
+ - **Start with Gemini Flash** for exploration and drafting — it's fast, cheap, and the user can iterate conversationally
81
+ - **Switch to Imagen Standard or Ultra** when the user is happy with the concept and wants maximum quality for the final output
82
+ - **Use Gemini Pro** when the image needs readable text (menus, signs, business cards, infographics)
83
+ - **Offer Imagen batch mode** when the user wants options — "I can generate 4 variations for you to pick from"