@projectservan8n/cnapse 0.2.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,254 @@
1
+ /**
2
+ * Vision capability - screenshot capture and AI description
3
+ * Supports Ollama VLMs, OpenRouter, Anthropic, OpenAI
4
+ */
5
+
6
+ import { getConfig, getApiKey } from './config.js';
7
+
8
+ export interface VisionResponse {
9
+ description: string;
10
+ screenshot: string; // base64
11
+ }
12
+
13
+ /**
14
+ * Capture screenshot and get AI description
15
+ */
16
+ export async function describeScreen(): Promise<VisionResponse> {
17
+ const screenshot = await captureScreenshot();
18
+ if (!screenshot) {
19
+ throw new Error('Failed to capture screenshot');
20
+ }
21
+
22
+ const config = getConfig();
23
+ const description = await analyzeWithVision(screenshot, config.provider);
24
+
25
+ return { description, screenshot };
26
+ }
27
+
28
+ /**
29
+ * Capture screenshot as base64
30
+ */
31
+ export async function captureScreenshot(): Promise<string | null> {
32
+ try {
33
+ // Try screenshot-desktop first (more reliable)
34
+ const screenshotDesktop = await import('screenshot-desktop');
35
+ const buffer = await screenshotDesktop.default({ format: 'png' });
36
+ return buffer.toString('base64');
37
+ } catch {
38
+ // Fallback to platform-specific methods
39
+ return captureScreenFallback();
40
+ }
41
+ }
42
+
43
+ async function captureScreenFallback(): Promise<string | null> {
44
+ const { exec } = await import('child_process');
45
+ const { promisify } = await import('util');
46
+ const { tmpdir } = await import('os');
47
+ const { join } = await import('path');
48
+ const { readFile, unlink } = await import('fs/promises');
49
+
50
+ const execAsync = promisify(exec);
51
+ const tempFile = join(tmpdir(), `cnapse-screen-${Date.now()}.png`);
52
+
53
+ try {
54
+ const platform = process.platform;
55
+
56
+ if (platform === 'win32') {
57
+ await execAsync(`
58
+ Add-Type -AssemblyName System.Windows.Forms
59
+ $screen = [System.Windows.Forms.Screen]::PrimaryScreen.Bounds
60
+ $bitmap = New-Object System.Drawing.Bitmap($screen.Width, $screen.Height)
61
+ $graphics = [System.Drawing.Graphics]::FromImage($bitmap)
62
+ $graphics.CopyFromScreen($screen.Location, [System.Drawing.Point]::Empty, $screen.Size)
63
+ $bitmap.Save("${tempFile.replace(/\\/g, '\\\\')}")
64
+ $graphics.Dispose()
65
+ $bitmap.Dispose()
66
+ `, { shell: 'powershell.exe' });
67
+ } else if (platform === 'darwin') {
68
+ await execAsync(`screencapture -x "${tempFile}"`);
69
+ } else {
70
+ await execAsync(`gnome-screenshot -f "${tempFile}" 2>/dev/null || scrot "${tempFile}" 2>/dev/null || import -window root "${tempFile}"`);
71
+ }
72
+
73
+ const imageBuffer = await readFile(tempFile);
74
+ await unlink(tempFile).catch(() => {});
75
+ return imageBuffer.toString('base64');
76
+ } catch {
77
+ return null;
78
+ }
79
+ }
80
+
81
+ /**
82
+ * Analyze screenshot with vision-capable AI
83
+ */
84
+ async function analyzeWithVision(base64Image: string, provider: string): Promise<string> {
85
+ const prompt = `Look at this screenshot and describe:
86
+ 1. What application or window is visible
87
+ 2. Key UI elements you can see (buttons, text fields, menus)
88
+ 3. What the user appears to be doing or could do next
89
+ 4. Any notable content or state
90
+
91
+ Be concise but helpful.`;
92
+
93
+ switch (provider) {
94
+ case 'ollama':
95
+ return analyzeWithOllama(base64Image, prompt);
96
+ case 'openrouter':
97
+ return analyzeWithOpenRouter(base64Image, prompt);
98
+ case 'anthropic':
99
+ return analyzeWithAnthropic(base64Image, prompt);
100
+ case 'openai':
101
+ return analyzeWithOpenAI(base64Image, prompt);
102
+ default:
103
+ throw new Error(`Vision not supported for provider: ${provider}`);
104
+ }
105
+ }
106
+
107
+ async function analyzeWithOllama(base64Image: string, prompt: string): Promise<string> {
108
+ const config = getConfig();
109
+ const ollamaHost = config.ollamaHost || 'http://localhost:11434';
110
+
111
+ // Use a vision-capable model (llava, llama3.2-vision, bakllava)
112
+ const visionModels = ['llava', 'llama3.2-vision', 'bakllava', 'llava-llama3'];
113
+ const model = visionModels.find(m => config.model.includes(m)) || 'llava';
114
+
115
+ const response = await fetch(`${ollamaHost}/api/generate`, {
116
+ method: 'POST',
117
+ headers: { 'Content-Type': 'application/json' },
118
+ body: JSON.stringify({
119
+ model,
120
+ prompt,
121
+ images: [base64Image],
122
+ stream: false,
123
+ }),
124
+ });
125
+
126
+ if (!response.ok) {
127
+ const text = await response.text();
128
+ throw new Error(`Ollama vision error: ${text}`);
129
+ }
130
+
131
+ const data = await response.json() as { response: string };
132
+ return data.response || 'Unable to analyze image';
133
+ }
134
+
135
+ async function analyzeWithOpenRouter(base64Image: string, prompt: string): Promise<string> {
136
+ const apiKey = getApiKey('openrouter');
137
+ if (!apiKey) throw new Error('OpenRouter API key not configured');
138
+
139
+ // Use a vision-capable model
140
+ const model = 'anthropic/claude-3-5-sonnet'; // or 'openai/gpt-4-vision-preview'
141
+
142
+ const response = await fetch('https://openrouter.ai/api/v1/chat/completions', {
143
+ method: 'POST',
144
+ headers: {
145
+ 'Authorization': `Bearer ${apiKey}`,
146
+ 'Content-Type': 'application/json',
147
+ 'HTTP-Referer': 'https://c-napse.up.railway.app',
148
+ 'X-Title': 'C-napse',
149
+ },
150
+ body: JSON.stringify({
151
+ model,
152
+ messages: [
153
+ {
154
+ role: 'user',
155
+ content: [
156
+ { type: 'text', text: prompt },
157
+ {
158
+ type: 'image_url',
159
+ image_url: { url: `data:image/png;base64,${base64Image}` },
160
+ },
161
+ ],
162
+ },
163
+ ],
164
+ max_tokens: 1000,
165
+ }),
166
+ });
167
+
168
+ if (!response.ok) {
169
+ const text = await response.text();
170
+ throw new Error(`OpenRouter vision error: ${text}`);
171
+ }
172
+
173
+ const data = await response.json() as { choices: Array<{ message: { content: string } }> };
174
+ return data.choices?.[0]?.message?.content || 'Unable to analyze image';
175
+ }
176
+
177
+ async function analyzeWithAnthropic(base64Image: string, prompt: string): Promise<string> {
178
+ const apiKey = getApiKey('anthropic');
179
+ if (!apiKey) throw new Error('Anthropic API key not configured');
180
+
181
+ const response = await fetch('https://api.anthropic.com/v1/messages', {
182
+ method: 'POST',
183
+ headers: {
184
+ 'x-api-key': apiKey,
185
+ 'anthropic-version': '2023-06-01',
186
+ 'Content-Type': 'application/json',
187
+ },
188
+ body: JSON.stringify({
189
+ model: 'claude-3-5-sonnet-20241022',
190
+ max_tokens: 1000,
191
+ messages: [
192
+ {
193
+ role: 'user',
194
+ content: [
195
+ {
196
+ type: 'image',
197
+ source: {
198
+ type: 'base64',
199
+ media_type: 'image/png',
200
+ data: base64Image,
201
+ },
202
+ },
203
+ { type: 'text', text: prompt },
204
+ ],
205
+ },
206
+ ],
207
+ }),
208
+ });
209
+
210
+ if (!response.ok) {
211
+ const text = await response.text();
212
+ throw new Error(`Anthropic vision error: ${text}`);
213
+ }
214
+
215
+ const data = await response.json() as { content: Array<{ text: string }> };
216
+ return data.content?.[0]?.text || 'Unable to analyze image';
217
+ }
218
+
219
+ async function analyzeWithOpenAI(base64Image: string, prompt: string): Promise<string> {
220
+ const apiKey = getApiKey('openai');
221
+ if (!apiKey) throw new Error('OpenAI API key not configured');
222
+
223
+ const response = await fetch('https://api.openai.com/v1/chat/completions', {
224
+ method: 'POST',
225
+ headers: {
226
+ 'Authorization': `Bearer ${apiKey}`,
227
+ 'Content-Type': 'application/json',
228
+ },
229
+ body: JSON.stringify({
230
+ model: 'gpt-4-vision-preview',
231
+ messages: [
232
+ {
233
+ role: 'user',
234
+ content: [
235
+ { type: 'text', text: prompt },
236
+ {
237
+ type: 'image_url',
238
+ image_url: { url: `data:image/png;base64,${base64Image}` },
239
+ },
240
+ ],
241
+ },
242
+ ],
243
+ max_tokens: 1000,
244
+ }),
245
+ });
246
+
247
+ if (!response.ok) {
248
+ const text = await response.text();
249
+ throw new Error(`OpenAI vision error: ${text}`);
250
+ }
251
+
252
+ const data = await response.json() as { choices: Array<{ message: { content: string } }> };
253
+ return data.choices?.[0]?.message?.content || 'Unable to analyze image';
254
+ }
@@ -0,0 +1,278 @@
1
+ /**
2
+ * Telegram Bot Service - Remote PC control via Telegram
3
+ */
4
+
5
+ import { EventEmitter } from 'events';
6
+ import { getConfig, getApiKey } from '../lib/config.js';
7
+ import { describeScreen, captureScreenshot } from '../lib/vision.js';
8
+ import { runCommand } from '../tools/shell.js';
9
+
10
+ export interface TelegramMessage {
11
+ chatId: number;
12
+ text: string;
13
+ from: string;
14
+ }
15
+
16
+ export interface TelegramBotEvents {
17
+ message: (msg: TelegramMessage) => void;
18
+ command: (cmd: string, args: string, chatId: number) => void;
19
+ error: (error: Error) => void;
20
+ started: () => void;
21
+ stopped: () => void;
22
+ }
23
+
24
+ export class TelegramBotService extends EventEmitter {
25
+ private bot: any = null;
26
+ private isRunning = false;
27
+ private allowedChatIds: Set<number> = new Set();
28
+
29
+ constructor() {
30
+ super();
31
+ }
32
+
33
+ /**
34
+ * Start the Telegram bot
35
+ */
36
+ async start(): Promise<void> {
37
+ if (this.isRunning) {
38
+ return;
39
+ }
40
+
41
+ const botToken = getApiKey('telegram');
42
+ if (!botToken) {
43
+ throw new Error('Telegram bot token not configured. Use: cnapse auth telegram YOUR_BOT_TOKEN');
44
+ }
45
+
46
+ try {
47
+ // Dynamically import telegraf
48
+ const { Telegraf } = await import('telegraf');
49
+ this.bot = new Telegraf(botToken);
50
+
51
+ // Load allowed chat IDs from config
52
+ const config = getConfig();
53
+ if (config.telegram?.chatId) {
54
+ this.allowedChatIds.add(config.telegram.chatId);
55
+ }
56
+
57
+ this.setupHandlers();
58
+
59
+ // Start polling
60
+ await this.bot.launch();
61
+ this.isRunning = true;
62
+ this.emit('started');
63
+ } catch (error) {
64
+ throw new Error(`Failed to start Telegram bot: ${error instanceof Error ? error.message : 'Unknown error'}`);
65
+ }
66
+ }
67
+
68
+ /**
69
+ * Stop the Telegram bot
70
+ */
71
+ async stop(): Promise<void> {
72
+ if (!this.isRunning || !this.bot) {
73
+ return;
74
+ }
75
+
76
+ this.bot.stop('SIGTERM');
77
+ this.isRunning = false;
78
+ this.bot = null;
79
+ this.emit('stopped');
80
+ }
81
+
82
+ /**
83
+ * Check if bot is running
84
+ */
85
+ get running(): boolean {
86
+ return this.isRunning;
87
+ }
88
+
89
+ /**
90
+ * Setup message and command handlers
91
+ */
92
+ private setupHandlers(): void {
93
+ if (!this.bot) return;
94
+
95
+ // /start command - registers user
96
+ this.bot.command('start', async (ctx: any) => {
97
+ const chatId = ctx.chat.id;
98
+ this.allowedChatIds.add(chatId);
99
+ await ctx.reply(
100
+ '🤖 C-napse connected!\n\n' +
101
+ 'Commands:\n' +
102
+ '/screen - Take screenshot\n' +
103
+ '/describe - Screenshot + AI description\n' +
104
+ '/run <cmd> - Execute command\n' +
105
+ '/status - System status\n\n' +
106
+ `Your chat ID: ${chatId}`
107
+ );
108
+ });
109
+
110
+ // /screen command - send screenshot
111
+ this.bot.command('screen', async (ctx: any) => {
112
+ if (!this.isAllowed(ctx.chat.id)) {
113
+ await ctx.reply('⛔ Not authorized. Send /start first.');
114
+ return;
115
+ }
116
+
117
+ await ctx.reply('📸 Taking screenshot...');
118
+
119
+ try {
120
+ const screenshot = await captureScreenshot();
121
+ if (!screenshot) {
122
+ await ctx.reply('❌ Failed to capture screenshot');
123
+ return;
124
+ }
125
+
126
+ // Send as photo
127
+ const buffer = Buffer.from(screenshot, 'base64');
128
+ await ctx.replyWithPhoto({ source: buffer }, { caption: '📸 Current screen' });
129
+ } catch (error) {
130
+ await ctx.reply(`❌ Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
131
+ }
132
+ });
133
+
134
+ // /describe command - screenshot + AI description
135
+ this.bot.command('describe', async (ctx: any) => {
136
+ if (!this.isAllowed(ctx.chat.id)) {
137
+ await ctx.reply('⛔ Not authorized. Send /start first.');
138
+ return;
139
+ }
140
+
141
+ await ctx.reply('🔍 Analyzing screen...');
142
+
143
+ try {
144
+ const result = await describeScreen();
145
+ const buffer = Buffer.from(result.screenshot, 'base64');
146
+
147
+ // Send photo with description as caption
148
+ const caption = `🖥️ Screen Analysis:\n\n${result.description}`.slice(0, 1024); // Telegram caption limit
149
+ await ctx.replyWithPhoto({ source: buffer }, { caption });
150
+
151
+ // If description is longer, send the rest as text
152
+ if (result.description.length > 900) {
153
+ await ctx.reply(result.description);
154
+ }
155
+ } catch (error) {
156
+ await ctx.reply(`❌ Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
157
+ }
158
+ });
159
+
160
+ // /run command - execute shell command
161
+ this.bot.command('run', async (ctx: any) => {
162
+ if (!this.isAllowed(ctx.chat.id)) {
163
+ await ctx.reply('⛔ Not authorized. Send /start first.');
164
+ return;
165
+ }
166
+
167
+ const cmd = ctx.message.text.replace('/run ', '').trim();
168
+ if (!cmd) {
169
+ await ctx.reply('Usage: /run <command>\nExample: /run dir');
170
+ return;
171
+ }
172
+
173
+ await ctx.reply(`⚙️ Running: ${cmd}`);
174
+
175
+ try {
176
+ const result = await runCommand(cmd, 30000);
177
+ if (result.success) {
178
+ const output = result.output.slice(0, 4000) || '(no output)';
179
+ await ctx.reply(`✅ Output:\n\`\`\`\n${output}\n\`\`\``, { parse_mode: 'Markdown' });
180
+ } else {
181
+ await ctx.reply(`❌ Error:\n\`\`\`\n${result.error}\n\`\`\``, { parse_mode: 'Markdown' });
182
+ }
183
+ } catch (error) {
184
+ await ctx.reply(`❌ Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
185
+ }
186
+ });
187
+
188
+ // /status command - system status
189
+ this.bot.command('status', async (ctx: any) => {
190
+ if (!this.isAllowed(ctx.chat.id)) {
191
+ await ctx.reply('⛔ Not authorized. Send /start first.');
192
+ return;
193
+ }
194
+
195
+ const config = getConfig();
196
+ const status = [
197
+ '📊 C-napse Status',
198
+ '',
199
+ `Provider: ${config.provider}`,
200
+ `Model: ${config.model}`,
201
+ `Platform: ${process.platform}`,
202
+ `Node: ${process.version}`,
203
+ ].join('\n');
204
+
205
+ await ctx.reply(status);
206
+ });
207
+
208
+ // Handle text messages - forward to AI
209
+ this.bot.on('text', async (ctx: any) => {
210
+ if (!this.isAllowed(ctx.chat.id)) {
211
+ return;
212
+ }
213
+
214
+ // Skip commands
215
+ if (ctx.message.text.startsWith('/')) {
216
+ return;
217
+ }
218
+
219
+ const message: TelegramMessage = {
220
+ chatId: ctx.chat.id,
221
+ text: ctx.message.text,
222
+ from: ctx.from.username || ctx.from.first_name || 'User',
223
+ };
224
+
225
+ this.emit('message', message);
226
+ this.emit('command', 'chat', ctx.message.text, ctx.chat.id);
227
+ });
228
+
229
+ // Error handling
230
+ this.bot.catch((err: Error) => {
231
+ this.emit('error', err);
232
+ });
233
+ }
234
+
235
+ /**
236
+ * Check if chat is authorized
237
+ */
238
+ private isAllowed(chatId: number): boolean {
239
+ // If no chat IDs configured, allow all (first-come authorization)
240
+ if (this.allowedChatIds.size === 0) {
241
+ return true;
242
+ }
243
+ return this.allowedChatIds.has(chatId);
244
+ }
245
+
246
+ /**
247
+ * Send a message to a specific chat
248
+ */
249
+ async sendMessage(chatId: number, text: string): Promise<void> {
250
+ if (!this.bot || !this.isRunning) {
251
+ throw new Error('Telegram bot is not running');
252
+ }
253
+ await this.bot.telegram.sendMessage(chatId, text);
254
+ }
255
+
256
+ /**
257
+ * Send a photo to a specific chat
258
+ */
259
+ async sendPhoto(chatId: number, base64Image: string, caption?: string): Promise<void> {
260
+ if (!this.bot || !this.isRunning) {
261
+ throw new Error('Telegram bot is not running');
262
+ }
263
+ const buffer = Buffer.from(base64Image, 'base64');
264
+ await this.bot.telegram.sendPhoto(chatId, { source: buffer }, { caption });
265
+ }
266
+ }
267
+
268
+ // Singleton instance
269
+ let instance: TelegramBotService | null = null;
270
+
271
+ export function getTelegramBot(): TelegramBotService {
272
+ if (!instance) {
273
+ instance = new TelegramBotService();
274
+ }
275
+ return instance;
276
+ }
277
+
278
+ export default TelegramBotService;
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Clipboard tools
3
+ */
4
+
5
+ import clipboardy from 'clipboardy';
6
+ import { ToolResult, ok, err } from './index.js';
7
+
8
+ /**
9
+ * Get clipboard contents
10
+ */
11
+ export async function getClipboard(): Promise<ToolResult> {
12
+ try {
13
+ const text = await clipboardy.read();
14
+ return ok(text);
15
+ } catch (error: any) {
16
+ return err(`Failed to read clipboard: ${error.message}`);
17
+ }
18
+ }
19
+
20
+ /**
21
+ * Set clipboard contents
22
+ */
23
+ export async function setClipboard(text: string): Promise<ToolResult> {
24
+ try {
25
+ await clipboardy.write(text);
26
+ return ok(`Copied ${text.length} characters to clipboard`);
27
+ } catch (error: any) {
28
+ return err(`Failed to write clipboard: ${error.message}`);
29
+ }
30
+ }
31
+
32
+ /**
33
+ * Clipboard tool definitions for agents
34
+ */
35
+ export const clipboardTools = [
36
+ {
37
+ name: 'get_clipboard',
38
+ description: 'Get clipboard contents',
39
+ parameters: {
40
+ type: 'object',
41
+ properties: {},
42
+ },
43
+ },
44
+ {
45
+ name: 'set_clipboard',
46
+ description: 'Set clipboard contents',
47
+ parameters: {
48
+ type: 'object',
49
+ properties: {
50
+ text: { type: 'string', description: 'Text to copy' },
51
+ },
52
+ required: ['text'],
53
+ },
54
+ },
55
+ ];