discoclaw 0.8.1 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -243,7 +243,7 @@ describe('modelShow imagegen row', () => {
243
243
  if (!result.ok)
244
244
  return;
245
245
  expect(result.summary).toContain('imagegen');
246
- expect(result.summary).toContain('imagen-4.0-generate-001');
246
+ expect(result.summary).toContain('gemini-3.1-flash-image-preview');
247
247
  expect(result.summary).toContain('gemini');
248
248
  });
249
249
  it('respects explicit defaultModel', () => {
@@ -267,7 +267,7 @@ describe('modelShow imagegen row', () => {
267
267
  expect(result.summary).toContain('setup-required');
268
268
  expect(result.summary).toContain('Image generation (setup required)');
269
269
  });
270
- it('defaults to dall-e-3/openai when both apiKey and geminiApiKey are set', () => {
270
+ it('defaults to native Gemini when both apiKey and geminiApiKey are set', () => {
271
271
  const imagegenCtx = { apiKey: 'sk-test', geminiApiKey: 'gk-test' };
272
272
  const ctx = makeCtx({ imagegenCtx });
273
273
  const result = executeConfigAction({ type: 'modelShow' }, ctx);
@@ -275,8 +275,8 @@ describe('modelShow imagegen row', () => {
275
275
  if (!result.ok)
276
276
  return;
277
277
  expect(result.summary).toContain('imagegen');
278
- expect(result.summary).toContain('dall-e-3');
279
- expect(result.summary).toContain('openai');
278
+ expect(result.summary).toContain('gemini-3.1-flash-image-preview');
279
+ expect(result.summary).toContain('gemini');
280
280
  });
281
281
  });
282
282
  // ---------------------------------------------------------------------------
@@ -1,6 +1,7 @@
1
1
  import { AttachmentBuilder } from 'discord.js';
2
2
  import { resolveChannel, findChannelRaw, describeChannelType } from './action-utils.js';
3
3
  import { NO_MENTIONS } from './allowed-mentions.js';
4
+ import { downloadMessageImages, downloadImageUrl } from './image-download.js';
4
5
  const IMAGEGEN_TYPE_MAP = {
5
6
  generateImage: true,
6
7
  };
@@ -15,14 +16,19 @@ const GPT_IMAGE_VALID_SIZES = new Set(['1024x1024', '1024x1792', '1792x1024', 'a
15
16
  const GEMINI_VALID_SIZES = new Set(['1:1', '3:4', '4:3', '9:16', '16:9']);
16
17
  const VALID_QUALITY = new Set(['standard', 'hd']);
17
18
  const DISCORD_MAX_CONTENT = 2000;
19
+ // Progress UX
20
+ export const TYPING_INTERVAL_MS = 8_000;
21
+ export const DOT_CYCLE_INTERVAL_MS = 3_000;
22
+ export const REQUEST_TIMEOUT_MS = 120_000;
23
+ const DOT_STATES = ['On it.', 'On it..', 'On it...'];
18
24
  // ---------------------------------------------------------------------------
19
25
  // Provider resolution
20
26
  // ---------------------------------------------------------------------------
21
27
  export function resolveDefaultModel(imagegenCtx) {
22
28
  if (imagegenCtx.defaultModel)
23
29
  return imagegenCtx.defaultModel;
24
- if (imagegenCtx.geminiApiKey && !imagegenCtx.apiKey)
25
- return 'imagen-4.0-generate-001';
30
+ if (imagegenCtx.geminiApiKey)
31
+ return 'gemini-3.1-flash-image-preview';
26
32
  return 'dall-e-3';
27
33
  }
28
34
  export function resolveProvider(model, explicit) {
@@ -37,7 +43,7 @@ export function resolveProvider(model, explicit) {
37
43
  // ---------------------------------------------------------------------------
38
44
  // API callers
39
45
  // ---------------------------------------------------------------------------
40
- async function callOpenAI(prompt, model, size, quality, apiKey, baseUrl) {
46
+ async function callOpenAI(prompt, model, size, quality, apiKey, baseUrl, signal) {
41
47
  const body = {
42
48
  model,
43
49
  prompt,
@@ -57,6 +63,7 @@ async function callOpenAI(prompt, model, size, quality, apiKey, baseUrl) {
57
63
  'Content-Type': 'application/json',
58
64
  },
59
65
  body: JSON.stringify(body),
66
+ signal,
60
67
  });
61
68
  }
62
69
  catch (err) {
@@ -87,7 +94,7 @@ async function callOpenAI(prompt, model, size, quality, apiKey, baseUrl) {
87
94
  }
88
95
  return { ok: true, b64: imageItem.b64_json };
89
96
  }
90
- async function callGemini(prompt, model, size, geminiApiKey) {
97
+ async function callGemini(prompt, model, size, geminiApiKey, signal) {
91
98
  const url = `https://generativelanguage.googleapis.com/v1beta/models/${model}:predict`;
92
99
  const body = {
93
100
  instances: [{ prompt }],
@@ -105,6 +112,7 @@ async function callGemini(prompt, model, size, geminiApiKey) {
105
112
  'Content-Type': 'application/json',
106
113
  },
107
114
  body: JSON.stringify(body),
115
+ signal,
108
116
  });
109
117
  }
110
118
  catch (err) {
@@ -135,10 +143,15 @@ async function callGemini(prompt, model, size, geminiApiKey) {
135
143
  }
136
144
  return { ok: true, b64 };
137
145
  }
138
- async function callGeminiNative(prompt, model, geminiApiKey) {
146
+ async function callGeminiNative(prompt, model, geminiApiKey, sourceImage, signal) {
139
147
  const url = `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent`;
148
+ const parts = [];
149
+ if (sourceImage) {
150
+ parts.push({ inlineData: { mimeType: sourceImage.mediaType, data: sourceImage.base64 } });
151
+ }
152
+ parts.push({ text: prompt });
140
153
  const body = {
141
- contents: [{ parts: [{ text: prompt }] }],
154
+ contents: [{ parts }],
142
155
  generationConfig: { responseModalities: ['TEXT', 'IMAGE'] },
143
156
  };
144
157
  let response;
@@ -150,6 +163,7 @@ async function callGeminiNative(prompt, model, geminiApiKey) {
150
163
  'Content-Type': 'application/json',
151
164
  },
152
165
  body: JSON.stringify(body),
166
+ signal,
153
167
  });
154
168
  }
155
169
  catch (err) {
@@ -174,14 +188,57 @@ async function callGeminiNative(prompt, model, geminiApiKey) {
174
188
  catch {
175
189
  return { ok: false, error: 'generateImage: failed to parse API response' };
176
190
  }
177
- const parts = data.candidates?.[0]?.content?.parts ?? [];
178
- const imagePart = parts.find(p => p.inlineData?.mimeType?.startsWith('image/'));
191
+ const responseParts = data.candidates?.[0]?.content?.parts ?? [];
192
+ const imagePart = responseParts.find(p => p.inlineData?.mimeType?.startsWith('image/'));
179
193
  if (!imagePart?.inlineData?.data) {
180
194
  return { ok: false, error: 'generateImage: API returned no image data' };
181
195
  }
182
196
  return { ok: true, b64: imagePart.inlineData.data };
183
197
  }
184
198
  // ---------------------------------------------------------------------------
199
+ // Source image resolution
200
+ // ---------------------------------------------------------------------------
201
+ async function resolveSourceImage(sourceImage, ctx) {
202
+ if (sourceImage.type === 'url') {
203
+ const dlResult = await downloadImageUrl(sourceImage.url);
204
+ if (!dlResult.ok) {
205
+ return { ok: false, error: `generateImage: ${dlResult.error}` };
206
+ }
207
+ return { ok: true, base64: dlResult.image.base64, mediaType: dlResult.image.mediaType };
208
+ }
209
+ const channelId = sourceImage.channelId ?? ctx.channelId;
210
+ const messageId = sourceImage.messageId ?? ctx.messageId;
211
+ const attachmentIndex = sourceImage.attachmentIndex ?? 0;
212
+ let channel;
213
+ try {
214
+ channel = await ctx.client.channels.fetch(channelId);
215
+ }
216
+ catch {
217
+ return { ok: false, error: `generateImage: could not fetch channel "${channelId}"` };
218
+ }
219
+ if (!channel || !('messages' in channel)) {
220
+ return { ok: false, error: `generateImage: channel "${channelId}" is not a text channel` };
221
+ }
222
+ let message;
223
+ try {
224
+ message = await channel.messages.fetch(messageId);
225
+ }
226
+ catch {
227
+ return { ok: false, error: `generateImage: could not fetch message "${messageId}"` };
228
+ }
229
+ const attachments = [...message.attachments.values()];
230
+ if (attachmentIndex < 0 || attachmentIndex >= attachments.length) {
231
+ return { ok: false, error: `generateImage: no attachment at index ${attachmentIndex} (message has ${attachments.length} attachment${attachments.length === 1 ? '' : 's'})` };
232
+ }
233
+ const target = attachments[attachmentIndex];
234
+ const result = await downloadMessageImages([target], 1);
235
+ if (result.images.length === 0) {
236
+ const reason = result.errors.length > 0 ? `: ${result.errors[0]}` : '';
237
+ return { ok: false, error: `generateImage: source image attachment rejected${reason}` };
238
+ }
239
+ return { ok: true, base64: result.images[0].base64, mediaType: result.images[0].mediaType };
240
+ }
241
+ // ---------------------------------------------------------------------------
185
242
  // Executor
186
243
  // ---------------------------------------------------------------------------
187
244
  export async function executeImagegenAction(action, ctx, imagegenCtx) {
@@ -238,31 +295,75 @@ export async function executeImagegenAction(action, ctx, imagegenCtx) {
238
295
  return { ok: false, error: 'generateImage: apiKey is required for OpenAI provider' };
239
296
  }
240
297
  }
241
- // Call provider
242
- let result;
243
- if (provider === 'gemini') {
244
- if (model.startsWith('gemini-')) {
245
- result = await callGeminiNative(action.prompt.trim(), model, imagegenCtx.geminiApiKey);
298
+ // Source image model gate (sync check, before placeholder)
299
+ if (action.sourceImage && !model.startsWith('gemini-')) {
300
+ return { ok: false, error: `generateImage: sourceImage is only supported with native Gemini models (gemini-*), not "${model}"` };
301
+ }
302
+ // --- Progress UX lifecycle ---
303
+ const placeholder = await channel.send({ content: DOT_STATES[0], allowedMentions: NO_MENTIONS });
304
+ channel.sendTyping().catch(() => { });
305
+ let dotIndex = 0;
306
+ const typingInterval = setInterval(() => { channel.sendTyping().catch(() => { }); }, TYPING_INTERVAL_MS);
307
+ const dotInterval = setInterval(() => {
308
+ dotIndex = (dotIndex + 1) % DOT_STATES.length;
309
+ placeholder.edit(DOT_STATES[dotIndex]).catch(() => { });
310
+ }, DOT_CYCLE_INTERVAL_MS);
311
+ const controller = new AbortController();
312
+ const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
313
+ try {
314
+ // Resolve source image if provided
315
+ let resolvedSourceImage;
316
+ if (action.sourceImage) {
317
+ const srcResult = await resolveSourceImage(action.sourceImage, ctx);
318
+ if (!srcResult.ok) {
319
+ return { ok: false, error: srcResult.error };
320
+ }
321
+ resolvedSourceImage = { base64: srcResult.base64, mediaType: srcResult.mediaType };
322
+ }
323
+ // Call provider
324
+ let result;
325
+ if (provider === 'gemini') {
326
+ if (model.startsWith('gemini-')) {
327
+ result = await callGeminiNative(action.prompt.trim(), model, imagegenCtx.geminiApiKey, resolvedSourceImage, controller.signal);
328
+ }
329
+ else {
330
+ result = await callGemini(action.prompt.trim(), model, size, imagegenCtx.geminiApiKey, controller.signal);
331
+ }
246
332
  }
247
333
  else {
248
- result = await callGemini(action.prompt.trim(), model, size, imagegenCtx.geminiApiKey);
334
+ const baseUrl = imagegenCtx.baseUrl ?? 'https://api.openai.com/v1';
335
+ result = await callOpenAI(action.prompt.trim(), model, size, quality, imagegenCtx.apiKey, baseUrl, controller.signal);
249
336
  }
337
+ if (controller.signal.aborted) {
338
+ return { ok: false, error: 'generateImage: request timed out' };
339
+ }
340
+ if (!result.ok) {
341
+ return { ok: false, error: result.error };
342
+ }
343
+ // Stop progress before final Discord mutations
344
+ clearInterval(typingInterval);
345
+ clearInterval(dotInterval);
346
+ clearTimeout(timeoutId);
347
+ await placeholder.delete().catch(() => { });
348
+ const buf = Buffer.from(result.b64, 'base64');
349
+ const attachment = new AttachmentBuilder(buf, { name: 'image-1.png' });
350
+ const sendOpts = { files: [attachment], allowedMentions: NO_MENTIONS };
351
+ if (action.caption) {
352
+ sendOpts.content = action.caption;
353
+ }
354
+ await channel.send(sendOpts);
355
+ return { ok: true, summary: `Generated image posted to #${channel.name}` };
250
356
  }
251
- else {
252
- const baseUrl = imagegenCtx.baseUrl ?? 'https://api.openai.com/v1';
253
- result = await callOpenAI(action.prompt.trim(), model, size, quality, imagegenCtx.apiKey, baseUrl);
254
- }
255
- if (!result.ok) {
256
- return { ok: false, error: result.error };
357
+ catch (err) {
358
+ const msg = err instanceof Error ? err.message : String(err);
359
+ return { ok: false, error: `generateImage: ${msg}` };
257
360
  }
258
- const buf = Buffer.from(result.b64, 'base64');
259
- const attachment = new AttachmentBuilder(buf, { name: 'image-1.png' });
260
- const sendOpts = { files: [attachment], allowedMentions: NO_MENTIONS };
261
- if (action.caption) {
262
- sendOpts.content = action.caption;
361
+ finally {
362
+ clearInterval(typingInterval);
363
+ clearInterval(dotInterval);
364
+ clearTimeout(timeoutId);
365
+ await placeholder.delete().catch(() => { });
263
366
  }
264
- await channel.send(sendOpts);
265
- return { ok: true, summary: `Generated image posted to #${channel.name}` };
266
367
  }
267
368
  }
268
369
  }
@@ -292,5 +393,26 @@ ${modelFieldDoc}
292
393
  - Gemini (Imagen): aspect ratios — \`1:1\` (default), \`3:4\`, \`4:3\`, \`9:16\`, \`16:9\`
293
394
  - Gemini (native): size/aspect-ratio params do not apply — omit \`size\` for these models
294
395
  - \`quality\` (optional): \`standard\` (default) or \`hd\` — applies to OpenAI dall-e-3 only.
295
- - \`caption\` (optional): Text message to accompany the image in the channel.`;
396
+ - \`caption\` (optional): Text message to accompany the image in the channel.
397
+ - \`sourceImage\` (optional): Provide a source image for image-to-image editing. **Only supported with native Gemini models** (\`gemini-*\`). Two forms:
398
+ - **Attachment form** — reference a Discord message attachment:
399
+ - \`type\` (required): \`"attachment"\`
400
+ - \`channelId\` (optional): Channel ID of the message containing the image. Defaults to the current channel.
401
+ - \`messageId\` (optional): Message ID containing the image attachment. Defaults to the current message.
402
+ - \`attachmentIndex\` (optional): Zero-based index of the attachment to use. Defaults to \`0\` (first attachment).
403
+ - Example — edit the image from the current message:
404
+ \`\`\`
405
+ <discord-action>{"type":"generateImage","prompt":"Make this image look like a watercolor painting","model":"gemini-3.1-flash-image-preview","sourceImage":{"type":"attachment"}}</discord-action>
406
+ \`\`\`
407
+ - Example — edit an image from a specific message:
408
+ \`\`\`
409
+ <discord-action>{"type":"generateImage","prompt":"Add a sunset sky","model":"gemini-3.1-flash-image-preview","sourceImage":{"type":"attachment","channelId":"123","messageId":"456","attachmentIndex":1}}</discord-action>
410
+ \`\`\`
411
+ - **URL form** — provide a public http(s) image URL directly:
412
+ - \`type\` (required): \`"url"\`
413
+ - \`url\` (required): A public \`http(s)\` image URL (PNG, JPEG, GIF, or WebP).
414
+ - Example:
415
+ \`\`\`
416
+ <discord-action>{"type":"generateImage","prompt":"Make this photo a pencil sketch","model":"gemini-3.1-flash-image-preview","sourceImage":{"type":"url","url":"https://example.com/photo.jpg"}}</discord-action>
417
+ \`\`\``;
296
418
  }