squidclaw 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/tools/browser-control.js +218 -0
- package/lib/tools/router.js +43 -0
- package/package.json +2 -1
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 🦑 Browser Control
|
|
3
|
+
* Headless browser for browsing, screenshots, form filling, scraping
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { logger } from '../core/logger.js';
|
|
7
|
+
|
|
8
|
+
let browser = null;
|
|
9
|
+
|
|
10
|
+
async function getBrowser() {
|
|
11
|
+
if (browser?.isConnected()) return browser;
|
|
12
|
+
|
|
13
|
+
const puppeteer = await import('puppeteer-core');
|
|
14
|
+
|
|
15
|
+
// Try common Chrome/Chromium locations
|
|
16
|
+
const paths = [
|
|
17
|
+
'/usr/bin/google-chrome',
|
|
18
|
+
'/usr/bin/google-chrome-stable',
|
|
19
|
+
'/usr/bin/chromium-browser',
|
|
20
|
+
'/usr/bin/chromium',
|
|
21
|
+
'/snap/bin/chromium',
|
|
22
|
+
'/usr/bin/brave-browser',
|
|
23
|
+
];
|
|
24
|
+
|
|
25
|
+
let execPath = null;
|
|
26
|
+
const { existsSync } = await import('fs');
|
|
27
|
+
for (const p of paths) {
|
|
28
|
+
if (existsSync(p)) { execPath = p; break; }
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (!execPath) {
|
|
32
|
+
// Try installing chromium
|
|
33
|
+
try {
|
|
34
|
+
const { execSync } = await import('child_process');
|
|
35
|
+
execSync('which chromium || which chromium-browser || apt-get install -y chromium-browser 2>/dev/null', { stdio: 'ignore' });
|
|
36
|
+
for (const p of paths) {
|
|
37
|
+
if (existsSync(p)) { execPath = p; break; }
|
|
38
|
+
}
|
|
39
|
+
} catch {}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (!execPath) throw new Error('No Chrome/Chromium found. Install with: apt install chromium-browser');
|
|
43
|
+
|
|
44
|
+
browser = await puppeteer.default.launch({
|
|
45
|
+
executablePath: execPath,
|
|
46
|
+
headless: 'new',
|
|
47
|
+
args: [
|
|
48
|
+
'--no-sandbox',
|
|
49
|
+
'--disable-setuid-sandbox',
|
|
50
|
+
'--disable-dev-shm-usage',
|
|
51
|
+
'--disable-gpu',
|
|
52
|
+
'--single-process',
|
|
53
|
+
],
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
logger.info('browser', 'Browser launched: ' + execPath);
|
|
57
|
+
return browser;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export class BrowserControl {
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Take a screenshot of a URL
|
|
64
|
+
*/
|
|
65
|
+
async screenshot(url, options = {}) {
|
|
66
|
+
const b = await getBrowser();
|
|
67
|
+
const page = await b.newPage();
|
|
68
|
+
|
|
69
|
+
try {
|
|
70
|
+
await page.setViewport({ width: options.width || 1280, height: options.height || 800 });
|
|
71
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 15000 });
|
|
72
|
+
|
|
73
|
+
if (options.waitFor) {
|
|
74
|
+
await page.waitForSelector(options.waitFor, { timeout: 5000 }).catch(() => {});
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const screenshot = await page.screenshot({
|
|
78
|
+
type: 'jpeg',
|
|
79
|
+
quality: 80,
|
|
80
|
+
fullPage: options.fullPage || false,
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
const title = await page.title();
|
|
84
|
+
return { buffer: screenshot, title, url };
|
|
85
|
+
} finally {
|
|
86
|
+
await page.close();
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Extract page content (more thorough than simple fetch)
|
|
92
|
+
*/
|
|
93
|
+
async readPage(url, options = {}) {
|
|
94
|
+
const b = await getBrowser();
|
|
95
|
+
const page = await b.newPage();
|
|
96
|
+
|
|
97
|
+
try {
|
|
98
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 15000 });
|
|
99
|
+
|
|
100
|
+
const content = await page.evaluate(() => {
|
|
101
|
+
// Remove scripts, styles, nav, footer
|
|
102
|
+
const remove = document.querySelectorAll('script, style, nav, footer, header, aside, iframe, .ad, [class*="cookie"]');
|
|
103
|
+
remove.forEach(el => el.remove());
|
|
104
|
+
|
|
105
|
+
const main = document.querySelector('main, article, [role="main"], .content, #content');
|
|
106
|
+
const el = main || document.body;
|
|
107
|
+
return el.innerText.trim().slice(0, 5000);
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
const title = await page.title();
|
|
111
|
+
return { title, content, url };
|
|
112
|
+
} finally {
|
|
113
|
+
await page.close();
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Fill a form on a page
|
|
119
|
+
*/
|
|
120
|
+
async fillForm(url, fields) {
|
|
121
|
+
const b = await getBrowser();
|
|
122
|
+
const page = await b.newPage();
|
|
123
|
+
|
|
124
|
+
try {
|
|
125
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 15000 });
|
|
126
|
+
|
|
127
|
+
for (const field of fields) {
|
|
128
|
+
if (field.selector && field.value) {
|
|
129
|
+
await page.type(field.selector, field.value, { delay: 50 });
|
|
130
|
+
}
|
|
131
|
+
if (field.click) {
|
|
132
|
+
await page.click(field.click);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Wait for result
|
|
137
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
138
|
+
|
|
139
|
+
const screenshot = await page.screenshot({ type: 'jpeg', quality: 80 });
|
|
140
|
+
const content = await page.evaluate(() => document.body.innerText.trim().slice(0, 3000));
|
|
141
|
+
|
|
142
|
+
return { screenshot, content, url };
|
|
143
|
+
} finally {
|
|
144
|
+
await page.close();
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Click an element on a page
|
|
150
|
+
*/
|
|
151
|
+
async click(url, selector) {
|
|
152
|
+
const b = await getBrowser();
|
|
153
|
+
const page = await b.newPage();
|
|
154
|
+
|
|
155
|
+
try {
|
|
156
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 15000 });
|
|
157
|
+
await page.click(selector);
|
|
158
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
159
|
+
|
|
160
|
+
const screenshot = await page.screenshot({ type: 'jpeg', quality: 80 });
|
|
161
|
+
const newUrl = page.url();
|
|
162
|
+
const content = await page.evaluate(() => document.body.innerText.trim().slice(0, 3000));
|
|
163
|
+
|
|
164
|
+
return { screenshot, content, url: newUrl };
|
|
165
|
+
} finally {
|
|
166
|
+
await page.close();
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Get all links on a page
|
|
172
|
+
*/
|
|
173
|
+
async getLinks(url) {
|
|
174
|
+
const b = await getBrowser();
|
|
175
|
+
const page = await b.newPage();
|
|
176
|
+
|
|
177
|
+
try {
|
|
178
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 15000 });
|
|
179
|
+
|
|
180
|
+
const links = await page.evaluate(() => {
|
|
181
|
+
return Array.from(document.querySelectorAll('a[href]'))
|
|
182
|
+
.map(a => ({ text: a.innerText.trim(), href: a.href }))
|
|
183
|
+
.filter(l => l.text && l.href.startsWith('http'))
|
|
184
|
+
.slice(0, 30);
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
return links;
|
|
188
|
+
} finally {
|
|
189
|
+
await page.close();
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Execute JavaScript on a page and return result
|
|
195
|
+
*/
|
|
196
|
+
async evaluate(url, script) {
|
|
197
|
+
const b = await getBrowser();
|
|
198
|
+
const page = await b.newPage();
|
|
199
|
+
|
|
200
|
+
try {
|
|
201
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 15000 });
|
|
202
|
+
const result = await page.evaluate(script);
|
|
203
|
+
return { result, url };
|
|
204
|
+
} finally {
|
|
205
|
+
await page.close();
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Close browser
|
|
211
|
+
*/
|
|
212
|
+
async close() {
|
|
213
|
+
if (browser) {
|
|
214
|
+
await browser.close();
|
|
215
|
+
browser = null;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
}
|
package/lib/tools/router.js
CHANGED
|
@@ -59,6 +59,16 @@ export class ToolRouter {
|
|
|
59
59
|
'Send an email.');
|
|
60
60
|
}
|
|
61
61
|
|
|
62
|
+
tools.push('', '### Screenshot',
|
|
63
|
+
'---TOOL:screenshot:https://example.com---',
|
|
64
|
+
'Take a screenshot of a website. Returns the screenshot as an image.',
|
|
65
|
+
'', '### Browse Page (Full)',
|
|
66
|
+
'---TOOL:browse:https://example.com---',
|
|
67
|
+
'Open a page in a real browser (handles JavaScript). Better than read for dynamic sites.',
|
|
68
|
+
'', '### Get Links',
|
|
69
|
+
'---TOOL:links:https://example.com---',
|
|
70
|
+
'Get all links on a webpage.');
|
|
71
|
+
|
|
62
72
|
tools.push('', '### Weather',
|
|
63
73
|
'---TOOL:weather:city name---',
|
|
64
74
|
'Get current weather and 3-day forecast for any city.',
|
|
@@ -139,6 +149,39 @@ export class ToolRouter {
|
|
|
139
149
|
}
|
|
140
150
|
break;
|
|
141
151
|
}
|
|
152
|
+
case 'screenshot': {
|
|
153
|
+
try {
|
|
154
|
+
const { BrowserControl } = await import('./browser-control.js');
|
|
155
|
+
const bc = new BrowserControl();
|
|
156
|
+
const result = await bc.screenshot(toolArg);
|
|
157
|
+
return { toolUsed: true, toolName: 'screenshot', toolResult: '[Screenshot taken]', imageBase64: result.buffer.toString('base64'), mimeType: 'image/jpeg', cleanResponse };
|
|
158
|
+
} catch (err) {
|
|
159
|
+
toolResult = 'Screenshot failed: ' + err.message;
|
|
160
|
+
}
|
|
161
|
+
break;
|
|
162
|
+
}
|
|
163
|
+
case 'browse': {
|
|
164
|
+
try {
|
|
165
|
+
const { BrowserControl } = await import('./browser-control.js');
|
|
166
|
+
const bc = new BrowserControl();
|
|
167
|
+
const result = await bc.readPage(toolArg);
|
|
168
|
+
toolResult = 'Title: ' + result.title + '\n\n' + result.content;
|
|
169
|
+
} catch (err) {
|
|
170
|
+
toolResult = 'Browse failed: ' + err.message;
|
|
171
|
+
}
|
|
172
|
+
break;
|
|
173
|
+
}
|
|
174
|
+
case 'links': {
|
|
175
|
+
try {
|
|
176
|
+
const { BrowserControl } = await import('./browser-control.js');
|
|
177
|
+
const bc = new BrowserControl();
|
|
178
|
+
const links = await bc.getLinks(toolArg);
|
|
179
|
+
toolResult = links.map(l => '• ' + l.text + ' → ' + l.href).join('\n');
|
|
180
|
+
} catch (err) {
|
|
181
|
+
toolResult = 'Failed: ' + err.message;
|
|
182
|
+
}
|
|
183
|
+
break;
|
|
184
|
+
}
|
|
142
185
|
case 'weather': {
|
|
143
186
|
const { getWeather } = await import('./weather.js');
|
|
144
187
|
toolResult = await getWeather(toolArg);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "squidclaw",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.3.0",
|
|
4
4
|
"description": "🦑 AI agent platform — human-like agents for WhatsApp, Telegram & more",
|
|
5
5
|
"main": "lib/engine.js",
|
|
6
6
|
"bin": {
|
|
@@ -49,6 +49,7 @@
|
|
|
49
49
|
"node-edge-tts": "^1.2.10",
|
|
50
50
|
"pdfjs-dist": "^5.4.624",
|
|
51
51
|
"pino": "^10.3.1",
|
|
52
|
+
"puppeteer-core": "^24.37.5",
|
|
52
53
|
"qrcode-terminal": "^0.12.0",
|
|
53
54
|
"sharp": "^0.34.5",
|
|
54
55
|
"undici": "^7.22.0",
|