squidclaw 0.8.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * 🦑 Image Generation Tool
3
- * Supports: OpenAI DALL-E, Google Gemini/Imagen
3
+ * Supports: OpenAI DALL-E 3, Google Gemini Flash Image
4
4
  */
5
5
 
6
6
  import { logger } from '../core/logger.js';
@@ -13,13 +13,14 @@ export class ImageGenTool {
13
13
  async generate(prompt, provider) {
14
14
  const providers = this.config.ai?.providers || {};
15
15
 
16
- // Auto-select provider
17
16
  if (!provider) {
18
17
  if (providers.openai?.key) provider = 'openai';
19
18
  else if (providers.google?.key) provider = 'google';
20
19
  else throw new Error('No image generation API key configured');
21
20
  }
22
21
 
22
+ logger.info('image-gen', `Generating with ${provider}: ${prompt.slice(0, 80)}...`);
23
+
23
24
  if (provider === 'openai') return this.generateOpenAI(prompt, providers.openai.key);
24
25
  if (provider === 'google') return this.generateGemini(prompt, providers.google.key);
25
26
  throw new Error('Unsupported provider: ' + provider);
@@ -37,52 +38,30 @@ export class ImageGenTool {
37
38
  }
38
39
 
39
40
  async generateGemini(prompt, apiKey) {
40
- // Gemini Imagen 3 via generateImages endpoint
41
- const res = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/imagen-3.0-generate-002:predict?key=${apiKey}`, {
41
+ // Try gemini-2.5-flash-image (native image generation)
42
+ const res = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-image:generateContent?key=${apiKey}`, {
42
43
  method: 'POST',
43
44
  headers: { 'Content-Type': 'application/json' },
44
45
  body: JSON.stringify({
45
- instances: [{ prompt }],
46
- parameters: { sampleCount: 1, aspectRatio: '1:1' },
46
+ contents: [{ parts: [{ text: `Generate an image: ${prompt}` }] }],
47
+ generationConfig: { responseModalities: ['TEXT', 'IMAGE'] },
47
48
  }),
48
49
  });
49
50
  const data = await res.json();
50
-
51
- if (data.error) {
52
- // Fallback: use Gemini 2.0 Flash native image generation
53
- return this.generateGeminiFlash(prompt, apiKey);
54
- }
55
51
 
56
- if (data.predictions?.[0]?.bytesBase64Encoded) {
57
- return { base64: data.predictions[0].bytesBase64Encoded, mimeType: 'image/png' };
52
+ if (data.error) {
53
+ logger.error('image-gen', `Gemini error: ${data.error.message}`);
54
+ throw new Error(data.error.message);
58
55
  }
59
56
 
60
- // Fallback to Gemini Flash
61
- return this.generateGeminiFlash(prompt, apiKey);
62
- }
63
-
64
- async generateGeminiFlash(prompt, apiKey) {
65
- const res = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?key=${apiKey}`, {
66
- method: 'POST',
67
- headers: { 'Content-Type': 'application/json' },
68
- body: JSON.stringify({
69
- contents: [{ parts: [{ text: `Generate an image: ${prompt}` }] }],
70
- generationConfig: { responseModalities: ['TEXT', 'IMAGE'] },
71
- }),
72
- });
73
- const data = await res.json();
74
-
75
- if (data.error) throw new Error(data.error.message);
76
-
77
- // Find image part in response
78
57
  const parts = data.candidates?.[0]?.content?.parts || [];
79
58
  for (const part of parts) {
80
59
  if (part.inlineData) {
60
+ logger.info('image-gen', `Image generated! ${part.inlineData.mimeType}, ${part.inlineData.data?.length} bytes`);
81
61
  return { base64: part.inlineData.data, mimeType: part.inlineData.mimeType || 'image/png' };
82
62
  }
83
63
  }
84
64
 
85
- // Text-only response
86
65
  const text = parts.find(p => p.text)?.text;
87
66
  throw new Error(text || 'Gemini could not generate an image');
88
67
  }
@@ -86,22 +86,23 @@ export class ToolRouter {
86
86
  break;
87
87
 
88
88
  case 'imagine':
89
- case 'draw':
90
- case 'image': {
91
- try {
92
- const { ImageGenTool } = await import('./image-gen.js');
93
- const gen = new ImageGenTool(this.config);
94
- const result = await gen.generate(args);
95
- if (result.url) {
96
- return { toolUsed: true, toolName: 'image', toolResult: result.url, imageUrl: result.url };
97
- } else if (result.base64) {
98
- return { toolUsed: true, toolName: 'image', toolResult: '[Image generated]', imageBase64: result.base64, mimeType: result.mimeType };
89
+ case 'draw':
90
+ case 'image': {
91
+ try {
92
+ const { ImageGenTool } = await import('./image-gen.js');
93
+ const gen = new ImageGenTool(this.config);
94
+ const imgResult = await gen.generate(toolArg);
95
+ if (imgResult.url) {
96
+ return { toolUsed: true, toolName: 'image', toolResult: imgResult.url, imageUrl: imgResult.url, cleanResponse };
97
+ } else if (imgResult.base64) {
98
+ return { toolUsed: true, toolName: 'image', toolResult: '[Image generated]', imageBase64: imgResult.base64, mimeType: imgResult.mimeType, cleanResponse };
99
+ }
100
+ } catch (err) {
101
+ toolResult = 'Image generation failed: ' + err.message;
99
102
  }
100
- } catch (err) {
101
- return { toolUsed: true, toolName: 'image', toolResult: 'Image generation failed: ' + err.message };
103
+ break;
102
104
  }
103
- }
104
- case 'read':
105
+ case 'read':
105
106
  const page = await this.browser.readPage(toolArg, 3000);
106
107
  toolResult = `Title: ${page.title}\n\n${page.content}`;
107
108
  break;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squidclaw",
3
- "version": "0.8.2",
3
+ "version": "0.8.3",
4
4
  "description": "\ud83e\udd91 AI agent platform \u2014 human-like agents for WhatsApp, Telegram & more",
5
5
  "main": "lib/engine.js",
6
6
  "bin": {