npm - discoclaw - Versions diffs - 0.8.0 → 0.8.2 - Mend

discoclaw 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/dist/cron/executor.js +2 -1
package/dist/discord/actions-config.js +28 -2
package/dist/discord/actions-config.test.js +4 -4
package/dist/discord/actions-imagegen.js +155 -30
package/dist/discord/actions-imagegen.test.js +608 -24
package/dist/discord/actions-messaging.js +40 -4
package/dist/discord/actions-messaging.test.js +169 -0
package/dist/discord/actions.js +6 -3
package/dist/discord/deferred-runner.js +2 -0
package/dist/discord/image-download.js +54 -66
package/dist/discord/image-download.test.js +156 -144
package/dist/discord/message-coordinator.js +54 -13
package/dist/discord/message-coordinator.test.js +206 -1
package/dist/discord/message-history.js +21 -9
package/dist/discord/message-history.test.js +98 -15
package/dist/discord/models-command.js +3 -3
package/dist/discord/models-command.test.js +3 -4
package/dist/discord/reaction-handler.js +4 -1
package/dist/discord/reaction-handler.test.js +4 -1
package/dist/discord/reply-reference.test.js +2 -0
package/dist/discord/streaming-progress.js +55 -0
package/dist/discord/thread-context.js +61 -27
package/dist/discord/thread-context.test.js +180 -1
package/dist/image/url-safety.js +159 -0
package/dist/image/url-safety.test.js +193 -0
package/dist/index.js +10 -0
package/dist/runtime/model-tiers.js +1 -1
package/dist/runtime/model-tiers.test.js +4 -4
package/dist/runtime/tools/image-download.js +65 -26
package/dist/runtime/tools/image-download.test.js +9 -3
package/package.json +1 -1

package/dist/cron/executor.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import { resolveDefaultModel as resolveImagegenDefaultModel } from '../discord/actions-imagegen.js';
 import { acquireCronLock, releaseCronLock } from './job-lock.js';
 import { resolveChannel } from '../discord/action-utils.js';
 import { DiscordTransportClient } from '../discord/transport-client.js';
@@ -211,7 +212,7 @@ export async function executeCronJob(job, ctx) {
             });
         // Inject tiered action schema documentation when discord actions are enabled.
         if (ctx.discordActionsEnabled) {
-            const actionSelection = buildTieredDiscordActionsPromptSection(cronActionFlags, ctx.botDisplayName, { userText: job.def.prompt });
+            const actionSelection = buildTieredDiscordActionsPromptSection(cronActionFlags, ctx.botDisplayName, { userText: job.def.prompt, imagegenDefaultModel: ctx.imagegenCtx ? resolveImagegenDefaultModel(ctx.imagegenCtx) : undefined });
             if (actionSelection.prompt) {
                 prompt += '\n\n---\n' + actionSelection.prompt;
             }

package/dist/discord/actions-config.js CHANGED Viewed

@@ -21,6 +21,7 @@ const ROLE_DESCRIPTIONS = {
     cron: 'Cron auto-tagging and model classification',
     'cron-exec': 'Default model for cron job execution (overridden by per-job settings)',
     voice: 'Voice channel AI responses',
+    imagegen: 'Default model for image generation',
 };
 // ---------------------------------------------------------------------------
 // Executor
@@ -229,6 +230,15 @@ export function executeConfigAction(action, configCtx) {
                         return { ok: false, error: 'Voice subsystem not configured' };
                     }
                     break;
+                case 'imagegen':
+                    if (bp.imagegenCtx) {
+                        bp.imagegenCtx.defaultModel = model;
+                        changes.push(`imagegen → ${model}`);
+                    }
+                    else {
+                        return { ok: false, error: 'Imagegen subsystem not configured' };
+                    }
+                    break;
                 default:
                     return { ok: false, error: `Unknown role: ${String(action.role)}` };
             }
@@ -338,6 +348,21 @@ export function executeConfigAction(action, configCtx) {
                             }
                         }
                         break;
+                    case 'imagegen':
+                        if (bp.imagegenCtx) {
+                            // Restore the fallback-resolved default (env setting or provider-based fallback).
+                            const igEnvDefault = defaults['imagegen'];
+                            if (igEnvDefault) {
+                                bp.imagegenCtx.defaultModel = igEnvDefault;
+                            }
+                            else {
+                                // Clear override so resolveDefaultModel falls back to provider detection.
+                                bp.imagegenCtx.defaultModel = undefined;
+                            }
+                            const resolvedIg = resolveDefaultModel(bp.imagegenCtx);
+                            resetChanges.push(`imagegen → ${resolvedIg}`);
+                        }
+                        break;
                 }
                 // Clear the override marker regardless of whether we had a default.
                 if (configCtx.overrideSources) {
@@ -382,7 +407,7 @@ export function executeConfigAction(action, configCtx) {
             if (bp.imagegenCtx) {
                 const igModel = resolveDefaultModel(bp.imagegenCtx);
                 const igProvider = resolveProvider(igModel);
-                rows.push(['imagegen', igModel, `Image generation (${igProvider})`, '']);
+                rows.push(['imagegen', igModel, `Image generation (${igProvider})`, ovr('imagegen')]);
             }
             else {
                 rows.push(['imagegen', 'setup-required', 'Image generation (setup required)', '']);
@@ -474,7 +499,7 @@ export function configActionsPromptSection() {
 <discord-action>{"type":"modelSet","role":"chat","model":"sonnet"}</discord-action>
 <discord-action>{"type":"modelSet","role":"fast","model":"haiku"}</discord-action>
 \`\`\`
-- \`role\` (required): One of \`chat\`, \`plan-run\`, \`fast\`, \`forge-drafter\`, \`forge-auditor\`, \`summary\`, \`cron\`, \`cron-exec\`, \`voice\`.
+- \`role\` (required): One of \`chat\`, \`plan-run\`, \`fast\`, \`forge-drafter\`, \`forge-auditor\`, \`summary\`, \`cron\`, \`cron-exec\`, \`voice\`, \`imagegen\`.
 - \`model\` (required): Model tier (\`fast\`, \`capable\`, \`deep\`), concrete model name (\`haiku\`, \`sonnet\`, \`opus\`), runtime name (\`openrouter\`, \`gemini\` — for \`chat\` and \`voice\` roles, swaps the active runtime adapter independently), or \`default\` (for cron-exec only, to revert to the startup default for that role). For the \`voice\` role, setting a model name that belongs to a different provider's tier map (e.g. \`sonnet\` while voice is on Gemini) will auto-switch the voice runtime to match.
 **Roles:**
@@ -489,6 +514,7 @@ export function configActionsPromptSection() {
 | \`cron\` | Cron auto-tagging and model classification (overrides fast) |
 | \`cron-exec\` | Default model for cron job execution; per-job overrides (via \`cronUpdate\`) take priority |
 | \`voice\` | Voice channel AI responses |
+| \`imagegen\` | Default model for image generation |
 Changes are **persisted** to \`models.json\` and survive restart. Use \`!models reset\` to clear overrides and revert to defaults.

package/dist/discord/actions-config.test.js CHANGED Viewed

@@ -243,7 +243,7 @@ describe('modelShow imagegen row', () => {
         if (!result.ok)
             return;
         expect(result.summary).toContain('imagegen');
-        expect(result.summary).toContain('imagen-4.0-generate-001');
+        expect(result.summary).toContain('gemini-3.1-flash-image-preview');
         expect(result.summary).toContain('gemini');
     });
     it('respects explicit defaultModel', () => {
@@ -267,7 +267,7 @@ describe('modelShow imagegen row', () => {
         expect(result.summary).toContain('setup-required');
         expect(result.summary).toContain('Image generation (setup required)');
     });
-    it('defaults to dall-e-3/openai when both apiKey and geminiApiKey are set', () => {
+    it('defaults to native Gemini when both apiKey and geminiApiKey are set', () => {
         const imagegenCtx = { apiKey: 'sk-test', geminiApiKey: 'gk-test' };
         const ctx = makeCtx({ imagegenCtx });
         const result = executeConfigAction({ type: 'modelShow' }, ctx);
@@ -275,8 +275,8 @@ describe('modelShow imagegen row', () => {
         if (!result.ok)
             return;
         expect(result.summary).toContain('imagegen');
-        expect(result.summary).toContain('dall-e-3');
-        expect(result.summary).toContain('openai');
+        expect(result.summary).toContain('gemini-3.1-flash-image-preview');
+        expect(result.summary).toContain('gemini');
     });
 });
 // ---------------------------------------------------------------------------

package/dist/discord/actions-imagegen.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { AttachmentBuilder } from 'discord.js';
 import { resolveChannel, findChannelRaw, describeChannelType } from './action-utils.js';
 import { NO_MENTIONS } from './allowed-mentions.js';
+import { downloadMessageImages, downloadImageUrl } from './image-download.js';
 const IMAGEGEN_TYPE_MAP = {
     generateImage: true,
 };
@@ -15,14 +16,19 @@ const GPT_IMAGE_VALID_SIZES = new Set(['1024x1024', '1024x1792', '1792x1024', 'a
 const GEMINI_VALID_SIZES = new Set(['1:1', '3:4', '4:3', '9:16', '16:9']);
 const VALID_QUALITY = new Set(['standard', 'hd']);
 const DISCORD_MAX_CONTENT = 2000;
+// Progress UX
+export const TYPING_INTERVAL_MS = 8_000;
+export const DOT_CYCLE_INTERVAL_MS = 3_000;
+export const REQUEST_TIMEOUT_MS = 120_000;
+const DOT_STATES = ['On it.', 'On it..', 'On it...'];
 // ---------------------------------------------------------------------------
 // Provider resolution
 // ---------------------------------------------------------------------------
 export function resolveDefaultModel(imagegenCtx) {
     if (imagegenCtx.defaultModel)
         return imagegenCtx.defaultModel;
-    if (imagegenCtx.geminiApiKey && !imagegenCtx.apiKey)
-        return 'imagen-4.0-generate-001';
+    if (imagegenCtx.geminiApiKey)
+        return 'gemini-3.1-flash-image-preview';
     return 'dall-e-3';
 }
 export function resolveProvider(model, explicit) {
@@ -37,7 +43,7 @@ export function resolveProvider(model, explicit) {
 // ---------------------------------------------------------------------------
 // API callers
 // ---------------------------------------------------------------------------
-async function callOpenAI(prompt, model, size, quality, apiKey, baseUrl) {
+async function callOpenAI(prompt, model, size, quality, apiKey, baseUrl, signal) {
     const body = {
         model,
         prompt,
@@ -57,6 +63,7 @@ async function callOpenAI(prompt, model, size, quality, apiKey, baseUrl) {
                 'Content-Type': 'application/json',
             },
             body: JSON.stringify(body),
+            signal,
         });
     }
     catch (err) {
@@ -87,7 +94,7 @@ async function callOpenAI(prompt, model, size, quality, apiKey, baseUrl) {
     }
     return { ok: true, b64: imageItem.b64_json };
 }
-async function callGemini(prompt, model, size, geminiApiKey) {
+async function callGemini(prompt, model, size, geminiApiKey, signal) {
     const url = `https://generativelanguage.googleapis.com/v1beta/models/${model}:predict`;
     const body = {
         instances: [{ prompt }],
@@ -105,6 +112,7 @@ async function callGemini(prompt, model, size, geminiApiKey) {
                 'Content-Type': 'application/json',
             },
             body: JSON.stringify(body),
+            signal,
         });
     }
     catch (err) {
@@ -135,10 +143,15 @@ async function callGemini(prompt, model, size, geminiApiKey) {
     }
     return { ok: true, b64 };
 }
-async function callGeminiNative(prompt, model, geminiApiKey) {
+async function callGeminiNative(prompt, model, geminiApiKey, sourceImage, signal) {
     const url = `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent`;
+    const parts = [];
+    if (sourceImage) {
+        parts.push({ inlineData: { mimeType: sourceImage.mediaType, data: sourceImage.base64 } });
+    }
+    parts.push({ text: prompt });
     const body = {
-        contents: [{ parts: [{ text: prompt }] }],
+        contents: [{ parts }],
         generationConfig: { responseModalities: ['TEXT', 'IMAGE'] },
     };
     let response;
@@ -150,6 +163,7 @@ async function callGeminiNative(prompt, model, geminiApiKey) {
                 'Content-Type': 'application/json',
             },
             body: JSON.stringify(body),
+            signal,
         });
     }
     catch (err) {
@@ -174,14 +188,57 @@ async function callGeminiNative(prompt, model, geminiApiKey) {
     catch {
         return { ok: false, error: 'generateImage: failed to parse API response' };
     }
-    const parts = data.candidates?.[0]?.content?.parts ?? [];
-    const imagePart = parts.find(p => p.inlineData?.mimeType?.startsWith('image/'));
+    const responseParts = data.candidates?.[0]?.content?.parts ?? [];
+    const imagePart = responseParts.find(p => p.inlineData?.mimeType?.startsWith('image/'));
     if (!imagePart?.inlineData?.data) {
         return { ok: false, error: 'generateImage: API returned no image data' };
     }
     return { ok: true, b64: imagePart.inlineData.data };
 }
 // ---------------------------------------------------------------------------
+// Source image resolution
+// ---------------------------------------------------------------------------
+async function resolveSourceImage(sourceImage, ctx) {
+    if (sourceImage.type === 'url') {
+        const dlResult = await downloadImageUrl(sourceImage.url);
+        if (!dlResult.ok) {
+            return { ok: false, error: `generateImage: ${dlResult.error}` };
+        }
+        return { ok: true, base64: dlResult.image.base64, mediaType: dlResult.image.mediaType };
+    }
+    const channelId = sourceImage.channelId ?? ctx.channelId;
+    const messageId = sourceImage.messageId ?? ctx.messageId;
+    const attachmentIndex = sourceImage.attachmentIndex ?? 0;
+    let channel;
+    try {
+        channel = await ctx.client.channels.fetch(channelId);
+    }
+    catch {
+        return { ok: false, error: `generateImage: could not fetch channel "${channelId}"` };
+    }
+    if (!channel || !('messages' in channel)) {
+        return { ok: false, error: `generateImage: channel "${channelId}" is not a text channel` };
+    }
+    let message;
+    try {
+        message = await channel.messages.fetch(messageId);
+    }
+    catch {
+        return { ok: false, error: `generateImage: could not fetch message "${messageId}"` };
+    }
+    const attachments = [...message.attachments.values()];
+    if (attachmentIndex < 0 || attachmentIndex >= attachments.length) {
+        return { ok: false, error: `generateImage: no attachment at index ${attachmentIndex} (message has ${attachments.length} attachment${attachments.length === 1 ? '' : 's'})` };
+    }
+    const target = attachments[attachmentIndex];
+    const result = await downloadMessageImages([target], 1);
+    if (result.images.length === 0) {
+        const reason = result.errors.length > 0 ? `: ${result.errors[0]}` : '';
+        return { ok: false, error: `generateImage: source image attachment rejected${reason}` };
+    }
+    return { ok: true, base64: result.images[0].base64, mediaType: result.images[0].mediaType };
+}
+// ---------------------------------------------------------------------------
 // Executor
 // ---------------------------------------------------------------------------
 export async function executeImagegenAction(action, ctx, imagegenCtx) {
@@ -238,38 +295,85 @@ export async function executeImagegenAction(action, ctx, imagegenCtx) {
                     return { ok: false, error: 'generateImage: apiKey is required for OpenAI provider' };
                 }
             }
-            // Call provider
-            let result;
-            if (provider === 'gemini') {
-                if (model.startsWith('gemini-')) {
-                    result = await callGeminiNative(action.prompt.trim(), model, imagegenCtx.geminiApiKey);
+            // Source image model gate (sync check, before placeholder)
+            if (action.sourceImage && !model.startsWith('gemini-')) {
+                return { ok: false, error: `generateImage: sourceImage is only supported with native Gemini models (gemini-*), not "${model}"` };
+            }
+            // --- Progress UX lifecycle ---
+            const placeholder = await channel.send({ content: DOT_STATES[0], allowedMentions: NO_MENTIONS });
+            channel.sendTyping().catch(() => { });
+            let dotIndex = 0;
+            const typingInterval = setInterval(() => { channel.sendTyping().catch(() => { }); }, TYPING_INTERVAL_MS);
+            const dotInterval = setInterval(() => {
+                dotIndex = (dotIndex + 1) % DOT_STATES.length;
+                placeholder.edit(DOT_STATES[dotIndex]).catch(() => { });
+            }, DOT_CYCLE_INTERVAL_MS);
+            const controller = new AbortController();
+            const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
+            try {
+                // Resolve source image if provided
+                let resolvedSourceImage;
+                if (action.sourceImage) {
+                    const srcResult = await resolveSourceImage(action.sourceImage, ctx);
+                    if (!srcResult.ok) {
+                        return { ok: false, error: srcResult.error };
+                    }
+                    resolvedSourceImage = { base64: srcResult.base64, mediaType: srcResult.mediaType };
+                }
+                // Call provider
+                let result;
+                if (provider === 'gemini') {
+                    if (model.startsWith('gemini-')) {
+                        result = await callGeminiNative(action.prompt.trim(), model, imagegenCtx.geminiApiKey, resolvedSourceImage, controller.signal);
+                    }
+                    else {
+                        result = await callGemini(action.prompt.trim(), model, size, imagegenCtx.geminiApiKey, controller.signal);
+                    }
                 }
                 else {
-                    result = await callGemini(action.prompt.trim(), model, size, imagegenCtx.geminiApiKey);
+                    const baseUrl = imagegenCtx.baseUrl ?? 'https://api.openai.com/v1';
+                    result = await callOpenAI(action.prompt.trim(), model, size, quality, imagegenCtx.apiKey, baseUrl, controller.signal);
                 }
+                if (controller.signal.aborted) {
+                    return { ok: false, error: 'generateImage: request timed out' };
+                }
+                if (!result.ok) {
+                    return { ok: false, error: result.error };
+                }
+                // Stop progress before final Discord mutations
+                clearInterval(typingInterval);
+                clearInterval(dotInterval);
+                clearTimeout(timeoutId);
+                await placeholder.delete().catch(() => { });
+                const buf = Buffer.from(result.b64, 'base64');
+                const attachment = new AttachmentBuilder(buf, { name: 'image-1.png' });
+                const sendOpts = { files: [attachment], allowedMentions: NO_MENTIONS };
+                if (action.caption) {
+                    sendOpts.content = action.caption;
+                }
+                await channel.send(sendOpts);
+                return { ok: true, summary: `Generated image posted to #${channel.name}` };
             }
-            else {
-                const baseUrl = imagegenCtx.baseUrl ?? 'https://api.openai.com/v1';
-                result = await callOpenAI(action.prompt.trim(), model, size, quality, imagegenCtx.apiKey, baseUrl);
-            }
-            if (!result.ok) {
-                return { ok: false, error: result.error };
+            catch (err) {
+                const msg = err instanceof Error ? err.message : String(err);
+                return { ok: false, error: `generateImage: ${msg}` };
             }
-            const buf = Buffer.from(result.b64, 'base64');
-            const attachment = new AttachmentBuilder(buf, { name: 'image-1.png' });
-            const sendOpts = { files: [attachment], allowedMentions: NO_MENTIONS };
-            if (action.caption) {
-                sendOpts.content = action.caption;
+            finally {
+                clearInterval(typingInterval);
+                clearInterval(dotInterval);
+                clearTimeout(timeoutId);
+                await placeholder.delete().catch(() => { });
             }
-            await channel.send(sendOpts);
-            return { ok: true, summary: `Generated image posted to #${channel.name}` };
         }
     }
 }
 // ---------------------------------------------------------------------------
 // Prompt section
 // ---------------------------------------------------------------------------
-export function imagegenActionsPromptSection() {
+export function imagegenActionsPromptSection(resolvedDefaultModel) {
+    const modelFieldDoc = resolvedDefaultModel
+        ? `- \`model\` (optional): Default is \`${resolvedDefaultModel}\`. Omit this field to use the default; only set it when a different model is explicitly needed. Supported families/examples:`
+        : `- \`model\` (optional): Default depends on configuration. Supported families/examples:`;
     return `### Image Generation
 **generateImage** — Generate an image and post it to a channel:
@@ -278,7 +382,7 @@ export function imagegenActionsPromptSection() {
 \`\`\`
 - \`prompt\` (required): Text description of the image to generate.
 - \`channel\` (optional): Channel name (with or without #) or channel ID to post the image to. Defaults to the current channel/thread if omitted.
-- \`model\` (optional): Model to use. Default depends on configuration (auto-detected from available API keys). Common supported families/examples:
+${modelFieldDoc}
   - OpenAI: \`dall-e-3\`, \`gpt-image-1\`
   - Gemini (Imagen): \`imagen-4.0-generate-001\`, \`imagen-4.0-fast-generate-001\`, \`imagen-4.0-ultra-generate-001\`
   - Gemini (native): \`gemini-3.1-flash-image-preview\`, \`gemini-3-pro-image-preview\`
@@ -289,5 +393,26 @@ export function imagegenActionsPromptSection() {
   - Gemini (Imagen): aspect ratios — \`1:1\` (default), \`3:4\`, \`4:3\`, \`9:16\`, \`16:9\`
   - Gemini (native): size/aspect-ratio params do not apply — omit \`size\` for these models
 - \`quality\` (optional): \`standard\` (default) or \`hd\` — applies to OpenAI dall-e-3 only.
-- \`caption\` (optional): Text message to accompany the image in the channel.`;
+- \`caption\` (optional): Text message to accompany the image in the channel.
+- \`sourceImage\` (optional): Provide a source image for image-to-image editing. **Only supported with native Gemini models** (\`gemini-*\`). Two forms:
+  - **Attachment form** — reference a Discord message attachment:
+    - \`type\` (required): \`"attachment"\`
+    - \`channelId\` (optional): Channel ID of the message containing the image. Defaults to the current channel.
+    - \`messageId\` (optional): Message ID containing the image attachment. Defaults to the current message.
+    - \`attachmentIndex\` (optional): Zero-based index of the attachment to use. Defaults to \`0\` (first attachment).
+    - Example — edit the image from the current message:
+      \`\`\`
+      <discord-action>{"type":"generateImage","prompt":"Make this image look like a watercolor painting","model":"gemini-3.1-flash-image-preview","sourceImage":{"type":"attachment"}}</discord-action>
+      \`\`\`
+    - Example — edit an image from a specific message:
+      \`\`\`
+      <discord-action>{"type":"generateImage","prompt":"Add a sunset sky","model":"gemini-3.1-flash-image-preview","sourceImage":{"type":"attachment","channelId":"123","messageId":"456","attachmentIndex":1}}</discord-action>
+      \`\`\`
+  - **URL form** — provide a public http(s) image URL directly:
+    - \`type\` (required): \`"url"\`
+    - \`url\` (required): A public \`http(s)\` image URL (PNG, JPEG, GIF, or WebP).
+    - Example:
+      \`\`\`
+      <discord-action>{"type":"generateImage","prompt":"Make this photo a pencil sketch","model":"gemini-3.1-flash-image-preview","sourceImage":{"type":"url","url":"https://example.com/photo.jpg"}}</discord-action>
+      \`\`\``;
 }