kernelbot 1.0.20 → 1.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ # Hello World! 🌍
2
+
3
+ This is a **test file** created to verify that everything is working properly.
4
+
5
+ ## About This File
6
+
7
+ This file demonstrates:
8
+ - *Basic markdown formatting*
9
+ - **Bold text**
10
+ - Simple lists
11
+
12
+ ## Features Tested
13
+
14
+ - ✅ File creation
15
+ - ✅ Markdown formatting
16
+ - ✅ Emoji support 🚀
17
+ - ✅ Basic structure
18
+
19
+ ---
20
+
21
+ *Created as a test for the KernelBot project!* 🤖
@@ -0,0 +1,11 @@
1
+ # Hello World 👋
2
+
3
+ Welcome to **newnew-1**! This is a simple hello world file.
4
+
5
+ ## Quick Example
6
+
7
+ ```python
8
+ print("Hello, World!")
9
+ ```
10
+
11
+ > Keep it simple. Keep it fun.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "kernelbot",
3
- "version": "1.0.20",
3
+ "version": "1.0.22",
4
4
  "description": "KernelBot — AI engineering agent with full OS control",
5
5
  "type": "module",
6
6
  "author": "Abdullah Al-Taheri <abdullah@altaheri.me>",
@@ -34,9 +34,11 @@
34
34
  "chalk": "^5.4.1",
35
35
  "commander": "^13.1.0",
36
36
  "dotenv": "^16.4.7",
37
+ "gradient-string": "^3.0.0",
37
38
  "js-yaml": "^4.1.0",
38
39
  "node-telegram-bot-api": "^0.66.0",
39
40
  "ora": "^8.1.1",
41
+ "puppeteer": "^24.37.3",
40
42
  "simple-git": "^3.31.1",
41
43
  "uuid": "^11.1.0",
42
44
  "winston": "^3.17.0"
package/src/agent.js CHANGED
@@ -13,10 +13,11 @@ export class Agent {
13
13
  this._pending = new Map(); // chatId -> pending state
14
14
  }
15
15
 
16
- async processMessage(chatId, userMessage, user, onUpdate) {
16
+ async processMessage(chatId, userMessage, user, onUpdate, sendPhoto) {
17
17
  const logger = getLogger();
18
18
 
19
19
  this._onUpdate = onUpdate || null;
20
+ this._sendPhoto = sendPhoto || null;
20
21
 
21
22
  // Handle pending responses (confirmation or credential)
22
23
  const pending = this._pending.get(chatId);
@@ -66,6 +67,11 @@ export class Agent {
66
67
  docker_compose: 'action',
67
68
  curl_url: 'url',
68
69
  check_port: 'port',
70
+ screenshot_website: 'url',
71
+ send_image: 'file_path',
72
+ browse_website: 'url',
73
+ extract_content: 'url',
74
+ interact_with_page: 'url',
69
75
  }[name];
70
76
  const val = key && input[key] ? String(input[key]).slice(0, 120) : JSON.stringify(input).slice(0, 120);
71
77
  return `${name}: ${val}`;
@@ -100,6 +106,7 @@ export class Agent {
100
106
  config: this.config,
101
107
  user,
102
108
  onUpdate: this._onUpdate,
109
+ sendPhoto: this._sendPhoto,
103
110
  });
104
111
 
105
112
  pending.toolResults.push({
@@ -117,7 +124,7 @@ export class Agent {
117
124
 
118
125
  if (lower === 'yes' || lower === 'y' || lower === 'confirm') {
119
126
  logger.info(`User confirmed dangerous tool: ${pending.block.name}`);
120
- const result = await executeTool(pending.block.name, pending.block.input, { ...pending.context, onUpdate: this._onUpdate });
127
+ const result = await executeTool(pending.block.name, pending.block.input, { ...pending.context, onUpdate: this._onUpdate, sendPhoto: this._sendPhoto });
121
128
 
122
129
  pending.toolResults.push({
123
130
  type: 'tool_result',
@@ -144,7 +151,7 @@ export class Agent {
144
151
  const pauseMsg = await this._checkPause(chatId, block, user, pending.toolResults, pending.remainingBlocks.filter((b) => b !== block), pending.messages);
145
152
  if (pauseMsg) return pauseMsg;
146
153
 
147
- const r = await executeTool(block.name, block.input, { config: this.config, user, onUpdate: this._onUpdate });
154
+ const r = await executeTool(block.name, block.input, { config: this.config, user, onUpdate: this._onUpdate, sendPhoto: this._sendPhoto });
148
155
  pending.toolResults.push({
149
156
  type: 'tool_result',
150
157
  tool_use_id: block.id,
@@ -256,6 +263,7 @@ export class Agent {
256
263
  config: this.config,
257
264
  user,
258
265
  onUpdate: this._onUpdate,
266
+ sendPhoto: this._sendPhoto,
259
267
  });
260
268
 
261
269
  toolResults.push({
package/src/bot.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import TelegramBot from 'node-telegram-bot-api';
2
+ import { createReadStream } from 'fs';
2
3
  import { isAllowedUser, getUnauthorizedMessage } from './security/auth.js';
3
4
  import { getLogger } from './utils/logger.js';
4
5
 
@@ -47,7 +48,7 @@ export function startBot(config, agent, conversationManager) {
47
48
  return;
48
49
  }
49
50
 
50
- const text = msg.text.trim();
51
+ let text = msg.text.trim();
51
52
 
52
53
  // Handle commands
53
54
  if (text === '/clean' || text === '/clear' || text === '/reset') {
@@ -69,6 +70,9 @@ export function startBot(config, agent, conversationManager) {
69
70
  '',
70
71
  '/clean — Clear conversation and start fresh',
71
72
  '/history — Show message count in memory',
73
+ '/browse <url> — Browse a website and get a summary',
74
+ '/screenshot <url> — Take a screenshot of a website',
75
+ '/extract <url> <selector> — Extract content using CSS selector',
72
76
  '/help — Show this help message',
73
77
  '',
74
78
  'Or just send any message to chat with the agent.',
@@ -76,6 +80,32 @@ export function startBot(config, agent, conversationManager) {
76
80
  return;
77
81
  }
78
82
 
83
+ // Web browsing shortcut commands — rewrite as natural language for the agent
84
+ if (text.startsWith('/browse ')) {
85
+ const browseUrl = text.slice('/browse '.length).trim();
86
+ if (!browseUrl) {
87
+ await bot.sendMessage(chatId, 'Usage: /browse <url>');
88
+ return;
89
+ }
90
+ text = `Browse this website and give me a summary: ${browseUrl}`;
91
+ } else if (text.startsWith('/screenshot ')) {
92
+ const screenshotUrl = text.slice('/screenshot '.length).trim();
93
+ if (!screenshotUrl) {
94
+ await bot.sendMessage(chatId, 'Usage: /screenshot <url>');
95
+ return;
96
+ }
97
+ text = `Take a screenshot of this website: ${screenshotUrl}`;
98
+ } else if (text.startsWith('/extract ')) {
99
+ const extractParts = text.slice('/extract '.length).trim().split(/\s+/);
100
+ if (extractParts.length < 2) {
101
+ await bot.sendMessage(chatId, 'Usage: /extract <url> <css-selector>');
102
+ return;
103
+ }
104
+ const extractUrl = extractParts[0];
105
+ const extractSelector = extractParts.slice(1).join(' ');
106
+ text = `Extract content from ${extractUrl} using the CSS selector: ${extractSelector}`;
107
+ }
108
+
79
109
  logger.info(`Message from ${username} (${userId}): ${text.slice(0, 100)}`);
80
110
 
81
111
  // Show typing and keep refreshing it
@@ -85,13 +115,57 @@ export function startBot(config, agent, conversationManager) {
85
115
  bot.sendChatAction(chatId, 'typing').catch(() => {});
86
116
 
87
117
  try {
88
- const onUpdate = async (update) => {
118
+ const onUpdate = async (update, opts = {}) => {
119
+ // Edit an existing message instead of sending a new one
120
+ if (opts.editMessageId) {
121
+ try {
122
+ const edited = await bot.editMessageText(update, {
123
+ chat_id: chatId,
124
+ message_id: opts.editMessageId,
125
+ parse_mode: 'Markdown',
126
+ });
127
+ return edited.message_id;
128
+ } catch {
129
+ try {
130
+ const edited = await bot.editMessageText(update, {
131
+ chat_id: chatId,
132
+ message_id: opts.editMessageId,
133
+ });
134
+ return edited.message_id;
135
+ } catch {
136
+ return opts.editMessageId;
137
+ }
138
+ }
139
+ }
140
+
141
+ // Send new message(s)
89
142
  const parts = splitMessage(update);
143
+ let lastMsgId = null;
90
144
  for (const part of parts) {
91
145
  try {
92
- await bot.sendMessage(chatId, part, { parse_mode: 'Markdown' });
146
+ const sent = await bot.sendMessage(chatId, part, { parse_mode: 'Markdown' });
147
+ lastMsgId = sent.message_id;
93
148
  } catch {
94
- await bot.sendMessage(chatId, part);
149
+ const sent = await bot.sendMessage(chatId, part);
150
+ lastMsgId = sent.message_id;
151
+ }
152
+ }
153
+ return lastMsgId;
154
+ };
155
+
156
+ const sendPhoto = async (filePath, caption) => {
157
+ try {
158
+ await bot.sendPhoto(chatId, createReadStream(filePath), {
159
+ caption: caption || '',
160
+ parse_mode: 'Markdown',
161
+ });
162
+ } catch {
163
+ try {
164
+ await bot.sendPhoto(chatId, createReadStream(filePath), {
165
+ caption: caption || '',
166
+ });
167
+ } catch (err) {
168
+ logger.error(`Failed to send photo: ${err.message}`);
95
169
  }
96
170
  }
97
171
  };
@@ -99,7 +173,7 @@ export function startBot(config, agent, conversationManager) {
99
173
  const reply = await agent.processMessage(chatId, text, {
100
174
  id: userId,
101
175
  username,
102
- }, onUpdate);
176
+ }, onUpdate, sendPhoto);
103
177
 
104
178
  clearInterval(typingInterval);
105
179
 
package/src/coder.js CHANGED
@@ -84,7 +84,7 @@ function processEvent(line, onOutput, logger) {
84
84
  // Not JSON — send raw text if it looks meaningful
85
85
  if (line.trim() && line.length > 3 && onOutput) {
86
86
  logger.info(`Claude Code (raw): ${line.slice(0, 200)}`);
87
- onOutput(`📟 ${line.trim()}`).catch(() => {});
87
+ onOutput(`▹ ${line.trim()}`).catch(() => {});
88
88
  }
89
89
  return null;
90
90
  }
@@ -103,7 +103,7 @@ function processEvent(line, onOutput, logger) {
103
103
  const tool = extractToolUse(event);
104
104
  if (tool) {
105
105
  logger.info(`Claude Code tool: ${tool.name}: ${tool.summary}`);
106
- if (onOutput) onOutput(`🔨 \`${tool.name}: ${tool.summary}\``).catch(() => {});
106
+ if (onOutput) onOutput(`▸ ${tool.name}: ${tool.summary}`).catch(() => {});
107
107
  }
108
108
  return event;
109
109
  }
@@ -113,7 +113,7 @@ function processEvent(line, onOutput, logger) {
113
113
  const tool = extractToolUse(event);
114
114
  if (tool) {
115
115
  logger.info(`Claude Code tool: ${tool.name}: ${tool.summary}`);
116
- if (onOutput) onOutput(`🔨 \`${tool.name}: ${tool.summary}\``).catch(() => {});
116
+ if (onOutput) onOutput(`▸ ${tool.name}: ${tool.summary}`).catch(() => {});
117
117
  }
118
118
  return event;
119
119
  }
@@ -124,7 +124,7 @@ function processEvent(line, onOutput, logger) {
124
124
  const duration = event.duration_ms ? ` in ${(event.duration_ms / 1000).toFixed(1)}s` : '';
125
125
  const cost = event.cost_usd ? ` ($${event.cost_usd.toFixed(3)})` : '';
126
126
  logger.info(`Claude Code finished: ${status}${duration}${cost}`);
127
- if (onOutput) onOutput(`✅ Claude Code finished (${status}${duration}${cost})`).catch(() => {});
127
+ if (onOutput) onOutput(`▪ done (${status}${duration}${cost})`).catch(() => {});
128
128
  return event;
129
129
  }
130
130
 
@@ -160,7 +160,61 @@ export class ClaudeCodeSpawner {
160
160
  const cmd = `claude ${args.map((a) => a.includes(' ') ? `"${a}"` : a).join(' ')}`;
161
161
  logger.info(`Spawning: ${cmd.slice(0, 300)}`);
162
162
  logger.info(`CWD: ${workingDirectory}`);
163
- if (onOutput) onOutput(`⏳ Starting Claude Code...\n\`${cmd.slice(0, 200)}\``).catch(() => {});
163
+
164
+ // --- Smart output: consolidate tool activity into one editable message ---
165
+ let statusMsgId = null;
166
+ let activityLines = [];
167
+ let flushTimer = null;
168
+ const MAX_VISIBLE = 15;
169
+
170
+ const buildStatusText = (finalState = null) => {
171
+ const visible = activityLines.slice(-MAX_VISIBLE);
172
+ const countInfo = activityLines.length > MAX_VISIBLE
173
+ ? `\n_... ${activityLines.length} operations total_\n`
174
+ : '';
175
+ if (finalState === 'done') {
176
+ return `░▒▓ *Claude Code Done* — ${activityLines.length} ops\n${countInfo}\n${visible.join('\n')}`;
177
+ }
178
+ if (finalState === 'error') {
179
+ return `░▒▓ *Claude Code Failed* — ${activityLines.length} ops\n${countInfo}\n${visible.join('\n')}`;
180
+ }
181
+ return `░▒▓ *Claude Code Working...*\n${countInfo}\n${visible.join('\n')}`;
182
+ };
183
+
184
+ const flushStatus = async () => {
185
+ flushTimer = null;
186
+ if (!onOutput || activityLines.length === 0) return;
187
+ try {
188
+ if (statusMsgId) {
189
+ await onOutput(buildStatusText(), { editMessageId: statusMsgId });
190
+ } else {
191
+ statusMsgId = await onOutput(buildStatusText());
192
+ }
193
+ } catch {}
194
+ };
195
+
196
+ const addActivity = (line) => {
197
+ activityLines.push(line);
198
+ if (!statusMsgId && !flushTimer) {
199
+ // First activity — create the status message immediately
200
+ flushStatus();
201
+ } else if (!flushTimer) {
202
+ // Throttle subsequent edits to avoid Telegram rate limits
203
+ flushTimer = setTimeout(flushStatus, 1000);
204
+ }
205
+ };
206
+
207
+ const smartOutput = onOutput ? async (text) => {
208
+ // Tool calls, raw output, warnings, starting → accumulate in status message
209
+ if (text.startsWith('▸') || text.startsWith('▹') || text.startsWith('▪')) {
210
+ addActivity(text);
211
+ return;
212
+ }
213
+ // Everything else (💬 text, errors, timeout) → new message
214
+ await onOutput(text);
215
+ } : null;
216
+
217
+ if (smartOutput) smartOutput(`▸ Starting Claude Code...`).catch(() => {});
164
218
 
165
219
  return new Promise((resolve, reject) => {
166
220
  const child = spawn('claude', args, {
@@ -193,7 +247,7 @@ export class ClaudeCodeSpawner {
193
247
  }
194
248
  } catch {}
195
249
 
196
- processEvent(trimmed, onOutput, logger);
250
+ processEvent(trimmed, smartOutput, logger);
197
251
  }
198
252
  });
199
253
 
@@ -201,19 +255,18 @@ export class ClaudeCodeSpawner {
201
255
  const chunk = data.toString().trim();
202
256
  stderr += chunk + '\n';
203
257
  logger.warn(`Claude Code stderr: ${chunk.slice(0, 300)}`);
204
- // Forward ALL stderr to Telegram immediately
205
- if (onOutput && chunk) {
206
- onOutput(`⚠️ Claude Code: ${chunk.slice(0, 400)}`).catch(() => {});
258
+ if (smartOutput && chunk) {
259
+ smartOutput(`▹ ${chunk.slice(0, 300)}`).catch(() => {});
207
260
  }
208
261
  });
209
262
 
210
263
  const timer = setTimeout(() => {
211
264
  child.kill('SIGTERM');
212
- if (onOutput) onOutput(`⏰ Claude Code timed out after ${this.timeout / 1000}s`).catch(() => {});
265
+ if (smartOutput) smartOutput(`▸ Claude Code timed out after ${this.timeout / 1000}s`).catch(() => {});
213
266
  reject(new Error(`Claude Code timed out after ${this.timeout / 1000}s`));
214
267
  }, this.timeout);
215
268
 
216
- child.on('close', (code) => {
269
+ child.on('close', async (code) => {
217
270
  clearTimeout(timer);
218
271
 
219
272
  if (buffer.trim()) {
@@ -224,7 +277,22 @@ export class ClaudeCodeSpawner {
224
277
  resultText = event.result || resultText;
225
278
  }
226
279
  } catch {}
227
- processEvent(buffer.trim(), onOutput, logger);
280
+ processEvent(buffer.trim(), smartOutput, logger);
281
+ }
282
+
283
+ // Flush any pending status edits
284
+ if (flushTimer) {
285
+ clearTimeout(flushTimer);
286
+ flushTimer = null;
287
+ }
288
+ await flushStatus();
289
+
290
+ // Final status message update — show done/failed state
291
+ if (statusMsgId && onOutput) {
292
+ const finalState = code === 0 ? 'done' : 'error';
293
+ try {
294
+ await onOutput(buildStatusText(finalState), { editMessageId: statusMsgId });
295
+ } catch {}
228
296
  }
229
297
 
230
298
  logger.info(`Claude Code exited with code ${code} | stdout: ${fullOutput.length} chars | stderr: ${stderr.length} chars`);
@@ -232,7 +300,6 @@ export class ClaudeCodeSpawner {
232
300
  if (code !== 0) {
233
301
  const errMsg = stderr.trim() || fullOutput.trim() || `exited with code ${code}`;
234
302
  logger.error(`Claude Code failed: ${errMsg.slice(0, 500)}`);
235
- if (onOutput) onOutput(`❌ Claude Code failed (exit ${code}):\n\`\`\`\n${errMsg.slice(0, 400)}\n\`\`\``).catch(() => {});
236
303
  reject(new Error(`Claude Code exited with code ${code}: ${errMsg.slice(0, 500)}`));
237
304
  } else {
238
305
  resolve({
@@ -17,8 +17,19 @@ IMPORTANT: You MUST NOT write code yourself using read_file/write_file. ALWAYS d
17
17
  4. Use GitHub tools to create the PR
18
18
  5. Report back with the PR link
19
19
 
20
+ ## Web Browsing Tasks (researching, scraping, reading documentation, taking screenshots)
21
+ - Use browse_website to read and summarize web pages
22
+ - Use screenshot_website to capture visual snapshots of pages — the screenshot is automatically sent to the chat
23
+ - Use extract_content to pull specific data from pages using CSS selectors
24
+ - Use interact_with_page for pages that need clicking, typing, or scrolling to reveal content
25
+ - Use send_image to send any image file directly to the Telegram chat (screenshots, generated images, etc.)
26
+ - When a user sends /browse <url>, use browse_website on that URL
27
+ - When a user sends /screenshot <url>, use screenshot_website on that URL
28
+ - When a user sends /extract <url> <selector>, use extract_content with that URL and selector
29
+
20
30
  You are the orchestrator. Claude Code is the coder. Never use read_file + write_file to modify source code — that's Claude Code's job. You handle git, GitHub, and infrastructure. Claude Code handles all code changes.
21
31
 
32
+
22
33
  ## Non-Coding Tasks (monitoring, deploying, restarting services, checking status)
23
34
  - Use OS, Docker, process, network, and monitoring tools directly
24
35
  - No need to spawn Claude Code for these
@@ -6,6 +6,7 @@ const DANGEROUS_PATTERNS = [
6
6
  { tool: 'github_create_repo', pattern: null, label: 'create a GitHub repository' },
7
7
  { tool: 'docker_compose', param: 'action', value: 'down', label: 'take down containers' },
8
8
  { tool: 'git_push', param: 'force', value: true, label: 'force push' },
9
+ { tool: 'interact_with_page', pattern: null, label: 'interact with a webpage (click, type, execute scripts)' },
9
10
  ];
10
11
 
11
12
  export function requiresConfirmation(toolName, params, config) {
@@ -0,0 +1,680 @@
1
+ import puppeteer from 'puppeteer';
2
+ import { writeFile, mkdir, access } from 'fs/promises';
3
+ import { join } from 'path';
4
+ import { homedir } from 'os';
5
+
6
+ // ── Constants ────────────────────────────────────────────────────────────────
7
+
8
+ const NAVIGATION_TIMEOUT = 30000;
9
+ const MAX_CONTENT_LENGTH = 15000;
10
+ const MAX_SCREENSHOT_WIDTH = 1920;
11
+ const MAX_SCREENSHOT_HEIGHT = 1080;
12
+ const SCREENSHOTS_DIR = join(homedir(), '.kernelbot', 'screenshots');
13
+
14
+ // Blocklist to prevent abuse — internal/private network ranges and sensitive targets
15
+ const BLOCKED_URL_PATTERNS = [
16
+ /^https?:\/\/localhost/i,
17
+ /^https?:\/\/127\./,
18
+ /^https?:\/\/0\./,
19
+ /^https?:\/\/10\./,
20
+ /^https?:\/\/172\.(1[6-9]|2\d|3[01])\./,
21
+ /^https?:\/\/192\.168\./,
22
+ /^https?:\/\/\[::1\]/,
23
+ /^https?:\/\/169\.254\./,
24
+ /^file:/i,
25
+ /^ftp:/i,
26
+ /^data:/i,
27
+ ];
28
+
29
+ // ── Helpers ──────────────────────────────────────────────────────────────────
30
+
31
+ function validateUrl(url) {
32
+ if (!url || typeof url !== 'string') {
33
+ return { valid: false, error: 'URL is required' };
34
+ }
35
+
36
+ // Block non-http protocols before auto-prepending https
37
+ for (const pattern of BLOCKED_URL_PATTERNS) {
38
+ if (pattern.test(url)) {
39
+ return { valid: false, error: 'Access to internal/private network addresses or non-HTTP protocols is blocked' };
40
+ }
41
+ }
42
+
43
+ // Add https:// if no protocol specified
44
+ if (!/^https?:\/\//i.test(url)) {
45
+ url = 'https://' + url;
46
+ }
47
+
48
+ try {
49
+ new URL(url);
50
+ } catch {
51
+ return { valid: false, error: 'Invalid URL format' };
52
+ }
53
+
54
+ // Check again after normalization (e.g., localhost without protocol)
55
+ for (const pattern of BLOCKED_URL_PATTERNS) {
56
+ if (pattern.test(url)) {
57
+ return { valid: false, error: 'Access to internal/private network addresses is blocked' };
58
+ }
59
+ }
60
+
61
+ return { valid: true, url };
62
+ }
63
+
64
+ function truncate(text, maxLength = MAX_CONTENT_LENGTH) {
65
+ if (!text || text.length <= maxLength) return text;
66
+ return text.slice(0, maxLength) + `\n\n... [truncated, ${text.length - maxLength} chars omitted]`;
67
+ }
68
+
69
+ async function ensureScreenshotsDir() {
70
+ await mkdir(SCREENSHOTS_DIR, { recursive: true });
71
+ }
72
+
73
+ async function withBrowser(fn) {
74
+ let browser;
75
+ try {
76
+ browser = await puppeteer.launch({
77
+ headless: true,
78
+ args: [
79
+ '--no-sandbox',
80
+ '--disable-setuid-sandbox',
81
+ '--disable-dev-shm-usage',
82
+ '--disable-gpu',
83
+ '--disable-extensions',
84
+ '--disable-background-networking',
85
+ '--disable-default-apps',
86
+ '--disable-sync',
87
+ '--no-first-run',
88
+ ],
89
+ });
90
+ return await fn(browser);
91
+ } finally {
92
+ if (browser) {
93
+ await browser.close().catch(() => {});
94
+ }
95
+ }
96
+ }
97
+
98
+ async function navigateTo(page, url, waitUntil = 'networkidle2') {
99
+ await page.setUserAgent(
100
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
101
+ );
102
+ await page.setViewport({ width: MAX_SCREENSHOT_WIDTH, height: MAX_SCREENSHOT_HEIGHT });
103
+ await page.goto(url, {
104
+ waitUntil,
105
+ timeout: NAVIGATION_TIMEOUT,
106
+ });
107
+ }
108
+
109
+ // ── Tool Definitions ─────────────────────────────────────────────────────────
110
+
111
+ export const definitions = [
112
+ {
113
+ name: 'browse_website',
114
+ description:
115
+ 'Navigate to a website URL and extract its content including title, headings, text, links, and metadata. Returns a structured summary of the page. Handles JavaScript-rendered pages.',
116
+ input_schema: {
117
+ type: 'object',
118
+ properties: {
119
+ url: {
120
+ type: 'string',
121
+ description: 'The URL to browse (e.g., "https://example.com" or "example.com")',
122
+ },
123
+ wait_for_selector: {
124
+ type: 'string',
125
+ description: 'Optional CSS selector to wait for before extracting content (useful for JS-heavy pages)',
126
+ },
127
+ include_links: {
128
+ type: 'boolean',
129
+ description: 'Include links found on the page (default: false)',
130
+ },
131
+ },
132
+ required: ['url'],
133
+ },
134
+ },
135
+ {
136
+ name: 'screenshot_website',
137
+ description:
138
+ 'Take a screenshot of a website and save it to disk. Returns the file path to the screenshot image. Supports full-page and viewport-only screenshots.',
139
+ input_schema: {
140
+ type: 'object',
141
+ properties: {
142
+ url: {
143
+ type: 'string',
144
+ description: 'The URL to screenshot',
145
+ },
146
+ full_page: {
147
+ type: 'boolean',
148
+ description: 'Capture the full scrollable page instead of just the viewport (default: false)',
149
+ },
150
+ selector: {
151
+ type: 'string',
152
+ description: 'Optional CSS selector to screenshot a specific element instead of the full page',
153
+ },
154
+ },
155
+ required: ['url'],
156
+ },
157
+ },
158
+ {
159
+ name: 'extract_content',
160
+ description:
161
+ 'Extract specific content from a webpage using CSS selectors. Returns the text or HTML content of matched elements. Useful for scraping structured data.',
162
+ input_schema: {
163
+ type: 'object',
164
+ properties: {
165
+ url: {
166
+ type: 'string',
167
+ description: 'The URL to extract content from',
168
+ },
169
+ selector: {
170
+ type: 'string',
171
+ description: 'CSS selector to match elements (e.g., "h1", ".article-body", "#main-content")',
172
+ },
173
+ attribute: {
174
+ type: 'string',
175
+ description: 'Extract a specific attribute instead of text content (e.g., "href", "src")',
176
+ },
177
+ include_html: {
178
+ type: 'boolean',
179
+ description: 'Include raw HTML of matched elements (default: false, returns text only)',
180
+ },
181
+ limit: {
182
+ type: 'number',
183
+ description: 'Maximum number of elements to return (default: 20)',
184
+ },
185
+ },
186
+ required: ['url', 'selector'],
187
+ },
188
+ },
189
+ {
190
+ name: 'send_image',
191
+ description:
192
+ 'Send an image or screenshot file directly to the Telegram chat. Use this to share screenshots, generated images, or any image file with the user.',
193
+ input_schema: {
194
+ type: 'object',
195
+ properties: {
196
+ file_path: {
197
+ type: 'string',
198
+ description: 'Absolute path to the image file to send (e.g., "/home/user/.kernelbot/screenshots/example.png")',
199
+ },
200
+ caption: {
201
+ type: 'string',
202
+ description: 'Optional caption to include with the image',
203
+ },
204
+ },
205
+ required: ['file_path'],
206
+ },
207
+ },
208
+ {
209
+ name: 'interact_with_page',
210
+ description:
211
+ 'Interact with a webpage by clicking elements, typing into inputs, scrolling, or executing JavaScript. Returns the page state after interaction.',
212
+ input_schema: {
213
+ type: 'object',
214
+ properties: {
215
+ url: {
216
+ type: 'string',
217
+ description: 'The URL to interact with',
218
+ },
219
+ actions: {
220
+ type: 'array',
221
+ description:
222
+ 'List of actions to perform in sequence. Each action is an object with a "type" field.',
223
+ items: {
224
+ type: 'object',
225
+ properties: {
226
+ type: {
227
+ type: 'string',
228
+ enum: ['click', 'type', 'scroll', 'wait', 'evaluate'],
229
+ description: 'Action type',
230
+ },
231
+ selector: {
232
+ type: 'string',
233
+ description: 'CSS selector for the target element (for click and type actions)',
234
+ },
235
+ text: {
236
+ type: 'string',
237
+ description: 'Text to type (for type action)',
238
+ },
239
+ direction: {
240
+ type: 'string',
241
+ enum: ['down', 'up'],
242
+ description: 'Scroll direction (for scroll action, default: down)',
243
+ },
244
+ pixels: {
245
+ type: 'number',
246
+ description: 'Number of pixels to scroll (default: 500)',
247
+ },
248
+ milliseconds: {
249
+ type: 'number',
250
+ description: 'Time to wait in ms (for wait action, default: 1000)',
251
+ },
252
+ script: {
253
+ type: 'string',
254
+ description: 'JavaScript to execute in the page context (for evaluate action). Must be a single expression or IIFE.',
255
+ },
256
+ },
257
+ required: ['type'],
258
+ },
259
+ },
260
+ extract_after: {
261
+ type: 'boolean',
262
+ description: 'Extract page content after performing actions (default: true)',
263
+ },
264
+ },
265
+ required: ['url', 'actions'],
266
+ },
267
+ },
268
+ ];
269
+
270
+ // ── Handlers ─────────────────────────────────────────────────────────────────
271
+
272
+ async function handleBrowse(params) {
273
+ const validation = validateUrl(params.url);
274
+ if (!validation.valid) return { error: validation.error };
275
+
276
+ const url = validation.url;
277
+
278
+ return withBrowser(async (browser) => {
279
+ const page = await browser.newPage();
280
+
281
+ try {
282
+ await navigateTo(page, url);
283
+ } catch (err) {
284
+ if (err.message.includes('net::ERR_NAME_NOT_RESOLVED')) {
285
+ return { error: `Could not resolve hostname for: ${url}` };
286
+ }
287
+ if (err.message.includes('Timeout')) {
288
+ return { error: `Page load timed out after ${NAVIGATION_TIMEOUT / 1000}s: ${url}` };
289
+ }
290
+ return { error: `Navigation failed: ${err.message}` };
291
+ }
292
+
293
+ // Wait for optional selector
294
+ if (params.wait_for_selector) {
295
+ try {
296
+ await page.waitForSelector(params.wait_for_selector, { timeout: 10000 });
297
+ } catch {
298
+ // Continue even if selector not found
299
+ }
300
+ }
301
+
302
+ const content = await page.evaluate((includeLinks) => {
303
+ const title = document.title || '';
304
+ const metaDesc = document.querySelector('meta[name="description"]')?.content || '';
305
+ const canonicalUrl = document.querySelector('link[rel="canonical"]')?.href || window.location.href;
306
+
307
+ // Extract headings
308
+ const headings = [];
309
+ for (const tag of ['h1', 'h2', 'h3']) {
310
+ document.querySelectorAll(tag).forEach((el) => {
311
+ const text = el.textContent.trim();
312
+ if (text) headings.push({ level: tag, text });
313
+ });
314
+ }
315
+
316
+ // Extract main text content
317
+ // Prefer common article/content containers
318
+ const contentSelectors = [
319
+ 'article', 'main', '[role="main"]',
320
+ '.content', '.article', '.post',
321
+ '#content', '#main', '#article',
322
+ ];
323
+
324
+ let mainText = '';
325
+ for (const sel of contentSelectors) {
326
+ const el = document.querySelector(sel);
327
+ if (el) {
328
+ mainText = el.innerText.trim();
329
+ break;
330
+ }
331
+ }
332
+
333
+ // Fall back to body text if no content container found
334
+ if (!mainText) {
335
+ // Remove script, style, nav, footer, header noise
336
+ const clone = document.body.cloneNode(true);
337
+ for (const el of clone.querySelectorAll('script, style, nav, footer, header, aside, [role="navigation"]')) {
338
+ el.remove();
339
+ }
340
+ mainText = clone.innerText.trim();
341
+ }
342
+
343
+ // Extract links if requested
344
+ let links = [];
345
+ if (includeLinks) {
346
+ document.querySelectorAll('a[href]').forEach((a) => {
347
+ const text = a.textContent.trim();
348
+ const href = a.href;
349
+ if (text && href && !href.startsWith('javascript:')) {
350
+ links.push({ text: text.slice(0, 100), href });
351
+ }
352
+ });
353
+ links = links.slice(0, 50);
354
+ }
355
+
356
+ return { title, metaDesc, canonicalUrl, headings, mainText, links };
357
+ }, params.include_links || false);
358
+
359
+ return {
360
+ success: true,
361
+ url: page.url(),
362
+ title: content.title,
363
+ meta_description: content.metaDesc,
364
+ canonical_url: content.canonicalUrl,
365
+ headings: content.headings.slice(0, 30),
366
+ content: truncate(content.mainText),
367
+ links: content.links || [],
368
+ };
369
+ });
370
+ }
371
+
372
+ async function handleScreenshot(params, context) {
373
+ const validation = validateUrl(params.url);
374
+ if (!validation.valid) return { error: validation.error };
375
+
376
+ const url = validation.url;
377
+ await ensureScreenshotsDir();
378
+
379
+ return withBrowser(async (browser) => {
380
+ const page = await browser.newPage();
381
+
382
+ try {
383
+ await navigateTo(page, url);
384
+ } catch (err) {
385
+ if (err.message.includes('net::ERR_NAME_NOT_RESOLVED')) {
386
+ return { error: `Could not resolve hostname for: ${url}` };
387
+ }
388
+ if (err.message.includes('Timeout')) {
389
+ return { error: `Page load timed out after ${NAVIGATION_TIMEOUT / 1000}s: ${url}` };
390
+ }
391
+ return { error: `Navigation failed: ${err.message}` };
392
+ }
393
+
394
+ const timestamp = Date.now();
395
+ const safeName = new URL(url).hostname.replace(/[^a-z0-9.-]/gi, '_');
396
+ const filename = `${safeName}_${timestamp}.png`;
397
+ const filepath = join(SCREENSHOTS_DIR, filename);
398
+
399
+ const screenshotOptions = {
400
+ path: filepath,
401
+ type: 'png',
402
+ };
403
+
404
+ if (params.selector) {
405
+ try {
406
+ const element = await page.$(params.selector);
407
+ if (!element) {
408
+ return { error: `Element not found for selector: ${params.selector}` };
409
+ }
410
+ await element.screenshot(screenshotOptions);
411
+ } catch (err) {
412
+ return { error: `Failed to screenshot element: ${err.message}` };
413
+ }
414
+ } else {
415
+ screenshotOptions.fullPage = params.full_page || false;
416
+ await page.screenshot(screenshotOptions);
417
+ }
418
+
419
+ const title = await page.title();
420
+
421
+ // Send the screenshot directly to Telegram chat
422
+ if (context?.sendPhoto) {
423
+ try {
424
+ await context.sendPhoto(filepath, `📸 ${title || url}`);
425
+ } catch {
426
+ // Photo sending is best-effort; don't fail the tool
427
+ }
428
+ }
429
+
430
+ return {
431
+ success: true,
432
+ url: page.url(),
433
+ title,
434
+ screenshot_path: filepath,
435
+ filename,
436
+ sent_to_chat: !!context?.sendPhoto,
437
+ };
438
+ });
439
+ }
440
+
441
+ async function handleExtract(params) {
442
+ const validation = validateUrl(params.url);
443
+ if (!validation.valid) return { error: validation.error };
444
+
445
+ const url = validation.url;
446
+ const limit = Math.min(params.limit || 20, 100);
447
+
448
+ return withBrowser(async (browser) => {
449
+ const page = await browser.newPage();
450
+
451
+ try {
452
+ await navigateTo(page, url);
453
+ } catch (err) {
454
+ if (err.message.includes('net::ERR_NAME_NOT_RESOLVED')) {
455
+ return { error: `Could not resolve hostname for: ${url}` };
456
+ }
457
+ if (err.message.includes('Timeout')) {
458
+ return { error: `Page load timed out after ${NAVIGATION_TIMEOUT / 1000}s: ${url}` };
459
+ }
460
+ return { error: `Navigation failed: ${err.message}` };
461
+ }
462
+
463
+ const results = await page.evaluate(
464
+ (selector, attribute, includeHtml, maxItems) => {
465
+ const elements = document.querySelectorAll(selector);
466
+ if (elements.length === 0) return { found: 0, items: [] };
467
+
468
+ const items = [];
469
+ for (let i = 0; i < Math.min(elements.length, maxItems); i++) {
470
+ const el = elements[i];
471
+ const item = {};
472
+
473
+ if (attribute) {
474
+ item.value = el.getAttribute(attribute) || null;
475
+ } else {
476
+ item.text = el.innerText?.trim() || el.textContent?.trim() || '';
477
+ }
478
+
479
+ if (includeHtml) {
480
+ item.html = el.outerHTML;
481
+ }
482
+
483
+ item.tag = el.tagName.toLowerCase();
484
+ items.push(item);
485
+ }
486
+
487
+ return { found: elements.length, items };
488
+ },
489
+ params.selector,
490
+ params.attribute || null,
491
+ params.include_html || false,
492
+ limit
493
+ );
494
+
495
+ if (results.found === 0) {
496
+ return {
497
+ success: true,
498
+ url: page.url(),
499
+ selector: params.selector,
500
+ found: 0,
501
+ items: [],
502
+ message: `No elements found matching selector: ${params.selector}`,
503
+ };
504
+ }
505
+
506
+ // Truncate individual items to prevent massive responses
507
+ for (const item of results.items) {
508
+ if (item.text) item.text = truncate(item.text, 2000);
509
+ if (item.html) item.html = truncate(item.html, 3000);
510
+ }
511
+
512
+ return {
513
+ success: true,
514
+ url: page.url(),
515
+ selector: params.selector,
516
+ found: results.found,
517
+ returned: results.items.length,
518
+ items: results.items,
519
+ };
520
+ });
521
+ }
522
+
523
+ async function handleInteract(params) {
524
+ const validation = validateUrl(params.url);
525
+ if (!validation.valid) return { error: validation.error };
526
+
527
+ const url = validation.url;
528
+
529
+ if (!params.actions || params.actions.length === 0) {
530
+ return { error: 'At least one action is required' };
531
+ }
532
+
533
+ if (params.actions.length > 10) {
534
+ return { error: 'Maximum 10 actions per request' };
535
+ }
536
+
537
+ // Block dangerous evaluate scripts
538
+ for (const action of params.actions) {
539
+ if (action.type === 'evaluate' && action.script) {
540
+ const blocked = /fetch\s*\(|XMLHttpRequest|window\.location\s*=|document\.cookie|localStorage|sessionStorage/i;
541
+ if (blocked.test(action.script)) {
542
+ return { error: 'Script contains blocked patterns (network requests, cookie access, storage access, or redirects)' };
543
+ }
544
+ }
545
+ }
546
+
547
+ return withBrowser(async (browser) => {
548
+ const page = await browser.newPage();
549
+
550
+ try {
551
+ await navigateTo(page, url);
552
+ } catch (err) {
553
+ if (err.message.includes('net::ERR_NAME_NOT_RESOLVED')) {
554
+ return { error: `Could not resolve hostname for: ${url}` };
555
+ }
556
+ if (err.message.includes('Timeout')) {
557
+ return { error: `Page load timed out after ${NAVIGATION_TIMEOUT / 1000}s: ${url}` };
558
+ }
559
+ return { error: `Navigation failed: ${err.message}` };
560
+ }
561
+
562
+ const actionResults = [];
563
+
564
+ for (const action of params.actions) {
565
+ try {
566
+ switch (action.type) {
567
+ case 'click': {
568
+ if (!action.selector) {
569
+ actionResults.push({ action: 'click', error: 'selector is required' });
570
+ break;
571
+ }
572
+ await page.waitForSelector(action.selector, { timeout: 5000 });
573
+ await page.click(action.selector);
574
+ // Brief wait for any navigation or rendering
575
+ await new Promise((r) => setTimeout(r, 500));
576
+ actionResults.push({ action: 'click', selector: action.selector, success: true });
577
+ break;
578
+ }
579
+
580
+ case 'type': {
581
+ if (!action.selector || !action.text) {
582
+ actionResults.push({ action: 'type', error: 'selector and text are required' });
583
+ break;
584
+ }
585
+ await page.waitForSelector(action.selector, { timeout: 5000 });
586
+ await page.type(action.selector, action.text);
587
+ actionResults.push({ action: 'type', selector: action.selector, success: true });
588
+ break;
589
+ }
590
+
591
+ case 'scroll': {
592
+ const direction = action.direction || 'down';
593
+ const pixels = Math.min(action.pixels || 500, 5000);
594
+ const scrollAmount = direction === 'up' ? -pixels : pixels;
595
+ await page.evaluate((amount) => window.scrollBy(0, amount), scrollAmount);
596
+ actionResults.push({ action: 'scroll', direction, pixels, success: true });
597
+ break;
598
+ }
599
+
600
+ case 'wait': {
601
+ const ms = Math.min(action.milliseconds || 1000, 10000);
602
+ await new Promise((r) => setTimeout(r, ms));
603
+ actionResults.push({ action: 'wait', milliseconds: ms, success: true });
604
+ break;
605
+ }
606
+
607
+ case 'evaluate': {
608
+ if (!action.script) {
609
+ actionResults.push({ action: 'evaluate', error: 'script is required' });
610
+ break;
611
+ }
612
+ const result = await page.evaluate(action.script);
613
+ actionResults.push({ action: 'evaluate', success: true, result: String(result).slice(0, 2000) });
614
+ break;
615
+ }
616
+
617
+ default:
618
+ actionResults.push({ action: action.type, error: `Unknown action type: ${action.type}` });
619
+ }
620
+ } catch (err) {
621
+ actionResults.push({ action: action.type, error: err.message });
622
+ }
623
+ }
624
+
625
+ const response = {
626
+ success: true,
627
+ url: page.url(),
628
+ title: await page.title(),
629
+ actions: actionResults,
630
+ };
631
+
632
+ // Extract content after interactions unless disabled
633
+ if (params.extract_after !== false) {
634
+ const text = await page.evaluate(() => {
635
+ const clone = document.body.cloneNode(true);
636
+ for (const el of clone.querySelectorAll('script, style, nav, footer, header')) {
637
+ el.remove();
638
+ }
639
+ return clone.innerText.trim();
640
+ });
641
+ response.content = truncate(text);
642
+ }
643
+
644
+ return response;
645
+ });
646
+ }
647
+
648
+ async function handleSendImage(params, context) {
649
+ if (!params.file_path) {
650
+ return { error: 'file_path is required' };
651
+ }
652
+
653
+ // Verify the file exists
654
+ try {
655
+ await access(params.file_path);
656
+ } catch {
657
+ return { error: `File not found: ${params.file_path}` };
658
+ }
659
+
660
+ if (!context?.sendPhoto) {
661
+ return { error: 'Image sending is not available in this context (no active Telegram chat)' };
662
+ }
663
+
664
+ try {
665
+ await context.sendPhoto(params.file_path, params.caption || '');
666
+ return { success: true, file_path: params.file_path, sent: true };
667
+ } catch (err) {
668
+ return { error: `Failed to send image: ${err.message}` };
669
+ }
670
+ }
671
+
672
+ // ── Export ────────────────────────────────────────────────────────────────────
673
+
674
+ export const handlers = {
675
+ browse_website: handleBrowse,
676
+ screenshot_website: handleScreenshot,
677
+ extract_content: handleExtract,
678
+ interact_with_page: handleInteract,
679
+ send_image: handleSendImage,
680
+ };
@@ -6,6 +6,7 @@ import { definitions as networkDefinitions, handlers as networkHandlers } from '
6
6
  import { definitions as gitDefinitions, handlers as gitHandlers } from './git.js';
7
7
  import { definitions as githubDefinitions, handlers as githubHandlers } from './github.js';
8
8
  import { definitions as codingDefinitions, handlers as codingHandlers } from './coding.js';
9
+ import { definitions as browserDefinitions, handlers as browserHandlers } from './browser.js';
9
10
  import { logToolCall } from '../security/audit.js';
10
11
  import { requiresConfirmation } from '../security/confirm.js';
11
12
 
@@ -18,6 +19,7 @@ export const toolDefinitions = [
18
19
  ...gitDefinitions,
19
20
  ...githubDefinitions,
20
21
  ...codingDefinitions,
22
+ ...browserDefinitions,
21
23
  ];
22
24
 
23
25
  const handlerMap = {
@@ -29,6 +31,7 @@ const handlerMap = {
29
31
  ...gitHandlers,
30
32
  ...githubHandlers,
31
33
  ...codingHandlers,
34
+ ...browserHandlers,
32
35
  };
33
36
 
34
37
  export function checkConfirmation(name, params, config) {
@@ -4,6 +4,7 @@ import { fileURLToPath } from 'url';
4
4
  import chalk from 'chalk';
5
5
  import ora from 'ora';
6
6
  import boxen from 'boxen';
7
+ import gradient from 'gradient-string';
7
8
 
8
9
  const __dirname = dirname(fileURLToPath(import.meta.url));
9
10
 
@@ -25,8 +26,19 @@ const LOGO = `
25
26
  ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═╝ ╚═══╝╚══════╝╚══════╝╚═════╝ ╚═════╝ ╚═╝
26
27
  `;
27
28
 
29
+ // Create a vibrant rainbow gradient
30
+ const rainbowGradient = gradient([
31
+ '#FF0080', // Hot Pink
32
+ '#FF8C00', // Dark Orange
33
+ '#FFD700', // Gold
34
+ '#00FF00', // Lime Green
35
+ '#00CED1', // Dark Turquoise
36
+ '#1E90FF', // Dodger Blue
37
+ '#9370DB' // Medium Purple
38
+ ]);
39
+
28
40
  export function showLogo() {
29
- console.log(chalk.cyan(LOGO));
41
+ console.log(rainbowGradient.multiline(LOGO));
30
42
  console.log(chalk.dim(` AI Engineering Agent — v${getVersion()}\n`));
31
43
  console.log(
32
44
  boxen(
@@ -93,4 +105,4 @@ export function showError(msg) {
93
105
 
94
106
  export function createSpinner(text) {
95
107
  return ora({ text, color: 'cyan' });
96
- }
108
+ }