squidclaw 0.7.2 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -166,6 +166,26 @@ export class TelegramManager {
166
166
  } catch {} // Reactions might not be supported in all chats
167
167
  }
168
168
 
169
+ async sendPhoto(agentId, contactId, photoData, caption, metadata = {}) {
170
+ const chatId = metadata.chatId || contactId;
171
+ const token = metadata.token;
172
+ const botInfo = this.bots.get(token);
173
+ if (!botInfo) return;
174
+
175
+ try {
176
+ if (photoData.url) {
177
+ await botInfo.bot.api.sendPhoto(chatId, photoData.url, { caption });
178
+ } else if (photoData.base64) {
179
+ const buffer = Buffer.from(photoData.base64, 'base64');
180
+ const { InputFile } = await import('grammy');
181
+ await botInfo.bot.api.sendPhoto(chatId, new InputFile(buffer, 'image.png'), { caption });
182
+ }
183
+ } catch (err) {
184
+ logger.error('telegram', 'Failed to send photo:', err.message);
185
+ if (caption) await this.sendMessage(agentId, contactId, caption, metadata);
186
+ }
187
+ }
188
+
169
189
  /**
170
190
  * Send voice note
171
191
  */
@@ -53,6 +53,13 @@ export function addToolSupport(agent, toolRouter, knowledgeBase) {
53
53
  const fullResponse = result.messages.join('\n');
54
54
  const toolResult = await toolRouter.processResponse(fullResponse, agent.id);
55
55
 
56
+ if (toolResult.toolUsed && (toolResult.imageBase64 || toolResult.imageUrl)) {
57
+ // Image generated — pass through directly
58
+ result.image = { base64: toolResult.imageBase64, url: toolResult.imageUrl, mimeType: toolResult.mimeType };
59
+ result.messages = ['Here you go! 🎨'];
60
+ return result;
61
+ }
62
+
56
63
  if (toolResult.toolUsed && toolResult.toolResult) {
57
64
  // Agent used a tool — now call AI again with the tool result
58
65
  logger.info('agent', `Tool ${toolResult.toolName} returned, calling AI again...`);
package/lib/engine.js CHANGED
@@ -99,6 +99,9 @@ export class SquidclawEngine {
99
99
  const keyDetected = detectApiKey(message);
100
100
  if (keyDetected && keyDetected.provider !== 'unknown') {
101
101
  saveApiKey(keyDetected.provider, keyDetected.key);
102
+ // Reload config so skills see the new key
103
+ const { loadConfig } = await import('./core/config.js');
104
+ this.config = loadConfig();
102
105
  const confirmation = getKeyConfirmation(keyDetected.provider);
103
106
  await this.telegramManager.sendMessage(agentId, contactId, confirmation, metadata);
104
107
  return;
@@ -217,7 +220,19 @@ export class SquidclawEngine {
217
220
  }
218
221
 
219
222
  if (result.messages && result.messages.length > 0) {
220
- await this.telegramManager.sendMessages(agentId, contactId, result.messages, metadata);
223
+ // Send image if generated
224
+ if (result.image) {
225
+ const photoData = result.image.url ? { url: result.image.url } : { base64: result.image.base64 };
226
+ const caption = result.messages?.[0] || '';
227
+ await this.telegramManager.sendPhoto(agentId, contactId, photoData, caption, metadata);
228
+ } else {
229
+ // Send image if generated
230
+ if (result.image) {
231
+ await this.telegramManager.sendPhoto(agentId, contactId, result.image, result.messages?.[0] || '', metadata);
232
+ } else {
233
+ await this.telegramManager.sendMessages(agentId, contactId, result.messages, metadata);
234
+ }
235
+ }
221
236
  }
222
237
  };
223
238
 
@@ -77,6 +77,7 @@ export function checkSkillAvailable(skill, config) {
77
77
  const req = SKILL_REQUIREMENTS[skill];
78
78
  if (!req) return { available: true };
79
79
 
80
+ // Check if ANY provider has a key that could handle this skill
80
81
  for (const prov of req.providers) {
81
82
  if (!prov.keyPrefix) return { available: true, provider: prov }; // Free skill
82
83
  const key = config.ai?.providers?.[prov.id]?.key;
@@ -84,6 +85,13 @@ export function checkSkillAvailable(skill, config) {
84
85
  return { available: true, provider: prov };
85
86
  }
86
87
  }
88
+
89
+ // Special: image_generation works with google OR openai key
90
+ if (skill === 'image_generation') {
91
+ const googleKey = config.ai?.providers?.google?.key;
92
+ const openaiKey = config.ai?.providers?.openai?.key;
93
+ if (googleKey || openaiKey) return { available: true };
94
+ }
87
95
 
88
96
  return { available: false, skill: req, requirements: req.providers.filter(p => p.keyPrefix) };
89
97
  }
@@ -0,0 +1,89 @@
1
+ /**
2
+ * 🦑 Image Generation Tool
3
+ * Supports: OpenAI DALL-E, Google Gemini/Imagen
4
+ */
5
+
6
+ import { logger } from '../core/logger.js';
7
+
8
+ export class ImageGenTool {
9
+ constructor(config) {
10
+ this.config = config;
11
+ }
12
+
13
+ async generate(prompt, provider) {
14
+ const providers = this.config.ai?.providers || {};
15
+
16
+ // Auto-select provider
17
+ if (!provider) {
18
+ if (providers.openai?.key) provider = 'openai';
19
+ else if (providers.google?.key) provider = 'google';
20
+ else throw new Error('No image generation API key configured');
21
+ }
22
+
23
+ if (provider === 'openai') return this.generateOpenAI(prompt, providers.openai.key);
24
+ if (provider === 'google') return this.generateGemini(prompt, providers.google.key);
25
+ throw new Error('Unsupported provider: ' + provider);
26
+ }
27
+
28
+ async generateOpenAI(prompt, apiKey) {
29
+ const res = await fetch('https://api.openai.com/v1/images/generations', {
30
+ method: 'POST',
31
+ headers: { 'Authorization': 'Bearer ' + apiKey, 'Content-Type': 'application/json' },
32
+ body: JSON.stringify({ model: 'dall-e-3', prompt, n: 1, size: '1024x1024' }),
33
+ });
34
+ const data = await res.json();
35
+ if (data.error) throw new Error(data.error.message);
36
+ return { url: data.data[0].url, revised_prompt: data.data[0].revised_prompt };
37
+ }
38
+
39
+ async generateGemini(prompt, apiKey) {
40
+ // Gemini Imagen 3 via generateImages endpoint
41
+ const res = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/imagen-3.0-generate-002:predict?key=${apiKey}`, {
42
+ method: 'POST',
43
+ headers: { 'Content-Type': 'application/json' },
44
+ body: JSON.stringify({
45
+ instances: [{ prompt }],
46
+ parameters: { sampleCount: 1, aspectRatio: '1:1' },
47
+ }),
48
+ });
49
+ const data = await res.json();
50
+
51
+ if (data.error) {
52
+ // Fallback: use Gemini 2.0 Flash native image generation
53
+ return this.generateGeminiFlash(prompt, apiKey);
54
+ }
55
+
56
+ if (data.predictions?.[0]?.bytesBase64Encoded) {
57
+ return { base64: data.predictions[0].bytesBase64Encoded, mimeType: 'image/png' };
58
+ }
59
+
60
+ // Fallback to Gemini Flash
61
+ return this.generateGeminiFlash(prompt, apiKey);
62
+ }
63
+
64
+ async generateGeminiFlash(prompt, apiKey) {
65
+ const res = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?key=${apiKey}`, {
66
+ method: 'POST',
67
+ headers: { 'Content-Type': 'application/json' },
68
+ body: JSON.stringify({
69
+ contents: [{ parts: [{ text: `Generate an image: ${prompt}` }] }],
70
+ generationConfig: { responseModalities: ['TEXT', 'IMAGE'] },
71
+ }),
72
+ });
73
+ const data = await res.json();
74
+
75
+ if (data.error) throw new Error(data.error.message);
76
+
77
+ // Find image part in response
78
+ const parts = data.candidates?.[0]?.content?.parts || [];
79
+ for (const part of parts) {
80
+ if (part.inlineData) {
81
+ return { base64: part.inlineData.data, mimeType: part.inlineData.mimeType || 'image/png' };
82
+ }
83
+ }
84
+
85
+ // Text-only response
86
+ const text = parts.find(p => p.text)?.text;
87
+ throw new Error(text || 'Gemini could not generate an image');
88
+ }
89
+ }
@@ -38,6 +38,13 @@ export class ToolRouter {
38
38
  'Search the agent\'s uploaded knowledge base for relevant information.',
39
39
  ];
40
40
 
41
+ // Image generation (if key available)
42
+ if (this.config.ai?.providers?.openai?.key || this.config.ai?.providers?.google?.key) {
43
+ tools.push('', '### Image Generation',
44
+ '---TOOL:imagine:detailed description of the image to generate---',
45
+ 'Generate an image from a text description. Be detailed in your description for best results.');
46
+ }
47
+
41
48
  if (this.calendar) {
42
49
  tools.push('', '### Calendar', '---TOOL:calendar:today--- or ---TOOL:calendar:week---',
43
50
  'Check upcoming calendar events.');
@@ -78,7 +85,23 @@ export class ToolRouter {
78
85
  toolResult = results.map(r => `• ${r.title}\n ${r.snippet}\n ${r.url}`).join('\n\n');
79
86
  break;
80
87
 
81
- case 'read':
88
+ case 'imagine':
89
+ case 'draw':
90
+ case 'image': {
91
+ try {
92
+ const { ImageGenTool } = await import('./image-gen.js');
93
+ const gen = new ImageGenTool(this.config);
94
+ const result = await gen.generate(args);
95
+ if (result.url) {
96
+ return { toolUsed: true, toolName: 'image', toolResult: result.url, imageUrl: result.url };
97
+ } else if (result.base64) {
98
+ return { toolUsed: true, toolName: 'image', toolResult: '[Image generated]', imageBase64: result.base64, mimeType: result.mimeType };
99
+ }
100
+ } catch (err) {
101
+ return { toolUsed: true, toolName: 'image', toolResult: 'Image generation failed: ' + err.message };
102
+ }
103
+ }
104
+ case 'read':
82
105
  const page = await this.browser.readPage(toolArg, 3000);
83
106
  toolResult = `Title: ${page.title}\n\n${page.content}`;
84
107
  break;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squidclaw",
3
- "version": "0.7.2",
3
+ "version": "0.8.1",
4
4
  "description": "\ud83e\udd91 AI agent platform \u2014 human-like agents for WhatsApp, Telegram & more",
5
5
  "main": "lib/engine.js",
6
6
  "bin": {