@projectservan8n/cnapse 0.8.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,193 +1,157 @@
1
1
  /**
2
- * Browser Service - Playwright-based web automation
2
+ * Browser Service - Shell-based URL opening + Computer Control
3
3
  *
4
- * Provides reliable browser automation for:
5
- * - Web searches
6
- * - AI chat interactions (Perplexity, ChatGPT, Claude, etc.)
7
- * - Email (Gmail, Outlook)
8
- * - Google Sheets/Docs
9
- * - General web browsing
4
+ * Opens URLs in the user's default browser using system commands.
5
+ * All browser automation is done via mouse/keyboard control (nut-js).
10
6
  *
11
- * Uses your system Chrome with existing logins and profile!
7
+ * NO Playwright dependency - just native OS commands + desktop automation.
12
8
  */
13
9
 
14
- import { chromium, Browser, Page, BrowserContext } from 'playwright';
15
- import * as path from 'path';
16
- import * as os from 'os';
17
- import * as fs from 'fs';
18
-
19
- // Singleton browser instance
20
- let browser: Browser | null = null;
21
- let context: BrowserContext | null = null;
22
- let activePage: Page | null = null;
23
-
24
- // Browser configuration
25
- interface BrowserConfig {
26
- headless: boolean;
27
- slowMo: number;
28
- viewport: { width: number; height: number };
29
- useSystemBrowser: boolean; // Use system Chrome with your profile
30
- }
31
-
32
- const defaultConfig: BrowserConfig = {
33
- headless: false, // Show browser so user can see what's happening
34
- slowMo: 50, // Slight delay for visibility
35
- viewport: { width: 1280, height: 800 },
36
- useSystemBrowser: true // Default to using system Chrome
37
- };
10
+ import { runCommand } from '../tools/shell.js';
11
+ import * as computer from '../tools/computer.js';
12
+ import { describeScreen, captureScreenshot } from '../lib/vision.js';
38
13
 
39
14
  /**
40
- * Find Chrome/Edge executable on Windows
15
+ * Sleep helper
41
16
  */
42
- function findSystemBrowser(): string | null {
43
- const possiblePaths = [
44
- // Chrome paths
45
- path.join(process.env['PROGRAMFILES'] || '', 'Google', 'Chrome', 'Application', 'chrome.exe'),
46
- path.join(process.env['PROGRAMFILES(X86)'] || '', 'Google', 'Chrome', 'Application', 'chrome.exe'),
47
- path.join(process.env['LOCALAPPDATA'] || '', 'Google', 'Chrome', 'Application', 'chrome.exe'),
48
- // Edge paths (fallback)
49
- path.join(process.env['PROGRAMFILES'] || '', 'Microsoft', 'Edge', 'Application', 'msedge.exe'),
50
- path.join(process.env['PROGRAMFILES(X86)'] || '', 'Microsoft', 'Edge', 'Application', 'msedge.exe'),
51
- ];
52
-
53
- for (const browserPath of possiblePaths) {
54
- if (fs.existsSync(browserPath)) {
55
- return browserPath;
56
- }
57
- }
58
- return null;
17
+ function sleep(ms: number): Promise<void> {
18
+ return new Promise(resolve => setTimeout(resolve, ms));
59
19
  }
60
20
 
61
21
  /**
62
- * Get Chrome user data directory
22
+ * Open URL in user's default browser
63
23
  */
64
- function getChromeUserDataDir(): string {
65
- // Use a separate profile to avoid conflicts with running Chrome
66
- const cnapseProfile = path.join(os.homedir(), '.cnapse', 'chrome-profile');
67
-
68
- // Create if doesn't exist
69
- if (!fs.existsSync(cnapseProfile)) {
70
- fs.mkdirSync(cnapseProfile, { recursive: true });
71
- }
72
-
73
- return cnapseProfile;
74
- }
24
+ export async function openUrl(url: string): Promise<{ success: boolean; error?: string }> {
25
+ const fullUrl = url.startsWith('http') ? url : `https://${url}`;
75
26
 
76
- /**
77
- * Initialize browser if not already running
78
- * Uses system Chrome with persistent profile (keeps your logins!)
79
- */
80
- export async function initBrowser(config: Partial<BrowserConfig> = {}): Promise<Page> {
81
- const cfg = { ...defaultConfig, ...config };
82
-
83
- if (!context) {
84
- const browserPath = cfg.useSystemBrowser ? findSystemBrowser() : null;
85
- const userDataDir = getChromeUserDataDir();
86
-
87
- if (browserPath && cfg.useSystemBrowser) {
88
- // Use persistent context with system Chrome - keeps logins!
89
- context = await chromium.launchPersistentContext(userDataDir, {
90
- headless: cfg.headless,
91
- slowMo: cfg.slowMo,
92
- viewport: cfg.viewport,
93
- executablePath: browserPath,
94
- channel: undefined, // Don't use channel when specifying executablePath
95
- args: [
96
- '--disable-blink-features=AutomationControlled', // Less bot detection
97
- '--no-first-run',
98
- '--no-default-browser-check',
99
- ]
100
- });
27
+ try {
28
+ if (process.platform === 'win32') {
29
+ await runCommand(`start "" "${fullUrl}"`, 5000);
30
+ } else if (process.platform === 'darwin') {
31
+ await runCommand(`open "${fullUrl}"`, 5000);
101
32
  } else {
102
- // Fallback to bundled Chromium with persistent context
103
- context = await chromium.launchPersistentContext(userDataDir, {
104
- headless: cfg.headless,
105
- slowMo: cfg.slowMo,
106
- viewport: cfg.viewport,
107
- args: [
108
- '--disable-blink-features=AutomationControlled',
109
- ]
110
- });
33
+ await runCommand(`xdg-open "${fullUrl}"`, 5000);
111
34
  }
35
+ return { success: true };
36
+ } catch (error) {
37
+ return {
38
+ success: false,
39
+ error: error instanceof Error ? error.message : 'Failed to open URL'
40
+ };
112
41
  }
113
-
114
- // Get existing page or create new one
115
- const pages = context.pages();
116
- if (pages.length > 0) {
117
- activePage = pages[0];
118
- } else {
119
- activePage = await context.newPage();
120
- }
121
-
122
- return activePage;
123
42
  }
124
43
 
125
44
  /**
126
- * Get current page or create one
45
+ * Open browser and navigate to URL
46
+ * Same as openUrl but with explicit naming
127
47
  */
128
- export async function getPage(): Promise<Page> {
129
- if (!activePage) {
130
- return initBrowser();
131
- }
132
- return activePage;
48
+ export async function navigateTo(url: string): Promise<void> {
49
+ await openUrl(url);
133
50
  }
134
51
 
135
52
  /**
136
- * Close browser
53
+ * Open browser with Google search
137
54
  */
138
- export async function closeBrowser(): Promise<void> {
139
- if (context) {
140
- await context.close();
141
- context = null;
142
- activePage = null;
143
- }
144
- if (browser) {
145
- await browser.close();
146
- browser = null;
147
- }
55
+ export async function searchGoogle(query: string): Promise<{ success: boolean; error?: string }> {
56
+ const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query)}`;
57
+ return openUrl(searchUrl);
148
58
  }
149
59
 
150
60
  /**
151
- * Navigate to URL
61
+ * Perform web search and describe results using vision
62
+ * Opens search in browser, waits for results, takes screenshot and describes
152
63
  */
153
- export async function navigateTo(url: string): Promise<void> {
154
- const page = await getPage();
155
- await page.goto(url, { waitUntil: 'domcontentloaded' });
64
+ export async function webSearch(query: string, engine: 'google' | 'bing' | 'duckduckgo' = 'google'): Promise<string> {
65
+ const urls = {
66
+ google: `https://www.google.com/search?q=${encodeURIComponent(query)}`,
67
+ bing: `https://www.bing.com/search?q=${encodeURIComponent(query)}`,
68
+ duckduckgo: `https://duckduckgo.com/?q=${encodeURIComponent(query)}`
69
+ };
70
+
71
+ // Open search in browser
72
+ await openUrl(urls[engine]);
73
+
74
+ // Wait for page to load
75
+ await sleep(3000);
76
+
77
+ // Take screenshot and describe what we see
78
+ const vision = await describeScreen();
79
+
80
+ return `🔍 Search results for "${query}":\n\n${vision.description}`;
156
81
  }
157
82
 
158
83
  /**
159
- * Take screenshot and return as base64
84
+ * Open AI chat website and type a question
85
+ * Uses mouse/keyboard control to interact
160
86
  */
161
- export async function takeScreenshot(): Promise<string> {
162
- const page = await getPage();
163
- const buffer = await page.screenshot({ type: 'png' });
164
- return buffer.toString('base64');
87
+ export async function askAI(
88
+ site: 'perplexity' | 'chatgpt' | 'claude' | 'copilot' | 'google',
89
+ question: string
90
+ ): Promise<{ response: string; screenshot?: string }> {
91
+ const urls: Record<string, string> = {
92
+ perplexity: 'https://www.perplexity.ai',
93
+ chatgpt: 'https://chat.openai.com',
94
+ claude: 'https://claude.ai',
95
+ copilot: 'https://copilot.microsoft.com',
96
+ google: 'https://www.google.com'
97
+ };
98
+
99
+ // Open the site
100
+ await openUrl(urls[site]);
101
+
102
+ // Wait for page to load
103
+ await sleep(4000);
104
+
105
+ // Type the question using keyboard
106
+ await computer.typeText(question);
107
+ await sleep(500);
108
+
109
+ // Press Enter to submit
110
+ await computer.pressKey('Return');
111
+
112
+ // Wait for response to generate
113
+ await sleep(site === 'google' ? 3000 : 10000);
114
+
115
+ // Capture screenshot and describe what we see
116
+ const vision = await describeScreen();
117
+
118
+ return {
119
+ response: vision.description,
120
+ screenshot: vision.screenshot
121
+ };
165
122
  }
166
123
 
167
124
  /**
168
- * Take screenshot of specific element
125
+ * Open Gmail compose
169
126
  */
170
- export async function screenshotElement(selector: string): Promise<string | null> {
171
- const page = await getPage();
172
- try {
173
- const element = await page.waitForSelector(selector, { timeout: 5000 });
174
- if (element) {
175
- const buffer = await element.screenshot({ type: 'png' });
176
- return buffer.toString('base64');
177
- }
178
- } catch {
179
- return null;
180
- }
181
- return null;
127
+ export async function openGmailCompose(to?: string, subject?: string, body?: string): Promise<boolean> {
128
+ let url = 'https://mail.google.com/mail/u/0/?fs=1&tf=cm';
129
+
130
+ if (to) url += `&to=${encodeURIComponent(to)}`;
131
+ if (subject) url += `&su=${encodeURIComponent(subject)}`;
132
+ if (body) url += `&body=${encodeURIComponent(body)}`;
133
+
134
+ const result = await openUrl(url);
135
+ return result.success;
182
136
  }
183
137
 
184
138
  /**
185
- * Wait for element and click
139
+ * Send email via Gmail compose URL
140
+ * Opens compose with pre-filled fields, user completes manually or we automate with keyboard
186
141
  */
187
- export async function clickElement(selector: string, timeout = 10000): Promise<boolean> {
188
- const page = await getPage();
142
+ export async function sendGmail(email: { to: string; subject: string; body: string }): Promise<boolean> {
189
143
  try {
190
- await page.click(selector, { timeout });
144
+ // Open Gmail compose with pre-filled fields
145
+ await openGmailCompose(email.to, email.subject, email.body);
146
+
147
+ // Wait for compose to open
148
+ await sleep(5000);
149
+
150
+ // User can review and send manually, or:
151
+ // Press Ctrl+Enter to send
152
+ await computer.keyCombo(['control', 'Return']);
153
+
154
+ await sleep(2000);
191
155
  return true;
192
156
  } catch {
193
157
  return false;
@@ -195,26 +159,33 @@ export async function clickElement(selector: string, timeout = 10000): Promise<b
195
159
  }
196
160
 
197
161
  /**
198
- * Type text into element
162
+ * Open Outlook compose
199
163
  */
200
- export async function typeInElement(selector: string, text: string, timeout = 10000): Promise<boolean> {
201
- const page = await getPage();
202
- try {
203
- await page.fill(selector, text, { timeout });
204
- return true;
205
- } catch {
206
- return false;
207
- }
164
+ export async function openOutlookCompose(to?: string, subject?: string, body?: string): Promise<boolean> {
165
+ let url = 'https://outlook.office.com/mail/deeplink/compose?';
166
+
167
+ if (to) url += `to=${encodeURIComponent(to)}&`;
168
+ if (subject) url += `subject=${encodeURIComponent(subject)}&`;
169
+ if (body) url += `body=${encodeURIComponent(body)}&`;
170
+
171
+ const result = await openUrl(url);
172
+ return result.success;
208
173
  }
209
174
 
210
175
  /**
211
- * Type text character by character (for sites that need keypresses)
176
+ * Send email via Outlook
212
177
  */
213
- export async function typeSlowly(selector: string, text: string, delay = 50): Promise<boolean> {
214
- const page = await getPage();
178
+ export async function sendOutlook(email: { to: string; subject: string; body: string }): Promise<boolean> {
215
179
  try {
216
- await page.click(selector);
217
- await page.type(selector, text, { delay });
180
+ await openOutlookCompose(email.to, email.subject, email.body);
181
+
182
+ // Wait for compose to open
183
+ await sleep(5000);
184
+
185
+ // Press Ctrl+Enter to send
186
+ await computer.keyCombo(['control', 'Return']);
187
+
188
+ await sleep(2000);
218
189
  return true;
219
190
  } catch {
220
191
  return false;
@@ -222,517 +193,298 @@ export async function typeSlowly(selector: string, text: string, delay = 50): Pr
222
193
  }
223
194
 
224
195
  /**
225
- * Press keyboard key
196
+ * Open new Google Sheet
226
197
  */
227
- export async function pressKey(key: string): Promise<void> {
228
- const page = await getPage();
229
- await page.keyboard.press(key);
198
+ export async function openGoogleSheet(): Promise<boolean> {
199
+ const result = await openUrl('https://docs.google.com/spreadsheets/create');
200
+ return result.success;
230
201
  }
231
202
 
232
203
  /**
233
- * Scroll page
204
+ * Open new Google Doc
234
205
  */
235
- export async function scroll(direction: 'up' | 'down', amount = 500): Promise<void> {
236
- const page = await getPage();
237
- await page.mouse.wheel(0, direction === 'down' ? amount : -amount);
206
+ export async function openGoogleDoc(): Promise<boolean> {
207
+ const result = await openUrl('https://docs.google.com/document/create');
208
+ return result.success;
238
209
  }
239
210
 
240
211
  /**
241
- * Wait for text to appear on page
212
+ * Type in current browser window
213
+ * Assumes browser is focused
242
214
  */
243
- export async function waitForText(text: string, timeout = 30000): Promise<boolean> {
244
- const page = await getPage();
245
- try {
246
- await page.waitForFunction(
247
- (searchText) => document.body.innerText.includes(searchText),
248
- text,
249
- { timeout }
250
- );
251
- return true;
252
- } catch {
253
- return false;
254
- }
215
+ export async function typeInBrowser(text: string): Promise<void> {
216
+ await computer.typeText(text);
255
217
  }
256
218
 
257
219
  /**
258
- * Get text content of element
220
+ * Press key in browser
259
221
  */
260
- export async function getTextContent(selector: string): Promise<string | null> {
261
- const page = await getPage();
262
- try {
263
- return await page.textContent(selector);
264
- } catch {
265
- return null;
266
- }
222
+ export async function pressKey(key: string): Promise<void> {
223
+ await computer.pressKey(key);
267
224
  }
268
225
 
269
226
  /**
270
- * Get all text from page
227
+ * Click at current mouse position
271
228
  */
272
- export async function getPageText(): Promise<string> {
273
- const page = await getPage();
274
- return await page.evaluate(() => document.body.innerText);
229
+ export async function click(button: 'left' | 'right' | 'middle' = 'left'): Promise<void> {
230
+ await computer.clickMouse(button);
275
231
  }
276
232
 
277
233
  /**
278
- * Wait for navigation
234
+ * Scroll in browser
279
235
  */
280
- export async function waitForNavigation(timeout = 30000): Promise<void> {
281
- const page = await getPage();
282
- await page.waitForLoadState('domcontentloaded', { timeout });
236
+ export async function scroll(direction: 'up' | 'down', amount = 3): Promise<void> {
237
+ // Use Page Up/Page Down for scrolling
238
+ const key = direction === 'down' ? 'pagedown' : 'pageup';
239
+ for (let i = 0; i < amount; i++) {
240
+ await computer.pressKey(key);
241
+ await sleep(200);
242
+ }
283
243
  }
284
244
 
285
245
  /**
286
- * Check if element exists
246
+ * Take screenshot of current screen (not just browser)
287
247
  */
288
- export async function elementExists(selector: string): Promise<boolean> {
289
- const page = await getPage();
290
- try {
291
- const element = await page.$(selector);
292
- return element !== null;
293
- } catch {
294
- return false;
295
- }
248
+ export async function takeScreenshot(): Promise<string> {
249
+ return await captureScreenshot();
296
250
  }
297
251
 
298
- // ========================================
299
- // AI Chat Site Helpers
300
- // ========================================
301
-
302
- interface AIChatConfig {
303
- url: string;
304
- inputSelector: string;
305
- submitSelector?: string;
306
- submitKey?: string;
307
- responseSelector: string;
308
- waitForResponse: number;
309
- }
310
-
311
- const aiChatConfigs: Record<string, AIChatConfig> = {
312
- perplexity: {
313
- url: 'https://www.perplexity.ai',
314
- inputSelector: 'textarea[placeholder*="Ask"]',
315
- submitKey: 'Enter',
316
- responseSelector: '.prose, [class*="answer"], [class*="response"]',
317
- waitForResponse: 15000
318
- },
319
- chatgpt: {
320
- url: 'https://chat.openai.com',
321
- inputSelector: 'textarea[id="prompt-textarea"], textarea[data-id="root"]',
322
- submitSelector: 'button[data-testid="send-button"]',
323
- responseSelector: '[data-message-author-role="assistant"]',
324
- waitForResponse: 20000
325
- },
326
- claude: {
327
- url: 'https://claude.ai',
328
- inputSelector: '[contenteditable="true"], textarea',
329
- submitKey: 'Enter',
330
- responseSelector: '[data-testid="message-content"]',
331
- waitForResponse: 20000
332
- },
333
- copilot: {
334
- url: 'https://copilot.microsoft.com',
335
- inputSelector: 'textarea, [contenteditable="true"]',
336
- submitKey: 'Enter',
337
- responseSelector: '[class*="response"], [class*="message"]',
338
- waitForResponse: 15000
339
- },
340
- google: {
341
- url: 'https://www.google.com',
342
- inputSelector: 'textarea[name="q"], input[name="q"]',
343
- submitKey: 'Enter',
344
- responseSelector: '#search',
345
- waitForResponse: 5000
346
- }
347
- };
348
-
349
252
  /**
350
- * Ask AI chat and get response
253
+ * Get description of current screen
351
254
  */
352
- export async function askAI(
353
- site: keyof typeof aiChatConfigs,
354
- question: string,
355
- includeScreenshot = false
356
- ): Promise<{ response: string; screenshot?: string }> {
357
- const config = aiChatConfigs[site];
358
- if (!config) {
359
- throw new Error(`Unknown AI site: ${site}`);
360
- }
361
-
362
- const page = await getPage();
363
-
364
- // Navigate to site
365
- await page.goto(config.url, { waitUntil: 'domcontentloaded' });
366
- await page.waitForTimeout(2000); // Let page fully load
367
-
368
- // Find and fill input
369
- try {
370
- await page.waitForSelector(config.inputSelector, { timeout: 10000 });
371
- await page.fill(config.inputSelector, question);
372
- } catch {
373
- // Try clicking first then typing
374
- await page.click(config.inputSelector);
375
- await page.type(config.inputSelector, question, { delay: 30 });
376
- }
255
+ export async function getPageText(): Promise<string> {
256
+ const vision = await describeScreen();
257
+ return vision.description;
258
+ }
377
259
 
378
- // Submit
379
- if (config.submitSelector) {
380
- await page.click(config.submitSelector);
381
- } else if (config.submitKey) {
382
- await page.keyboard.press(config.submitKey);
383
- }
260
+ /**
261
+ * Research a topic - opens multiple searches and gathers info
262
+ */
263
+ export async function research(topic: string, maxSources = 3): Promise<{
264
+ query: string;
265
+ sources: { title: string; url: string; content: string }[];
266
+ summary: string;
267
+ }> {
268
+ // Open Google search
269
+ await searchGoogle(topic);
270
+ await sleep(3000);
384
271
 
385
- // Wait for response
386
- await page.waitForTimeout(config.waitForResponse);
272
+ // Get vision description of search results
273
+ const searchResults = await describeScreen();
387
274
 
388
- // Try to get response text
389
- let response = '';
390
- try {
391
- const elements = await page.$$(config.responseSelector);
392
- if (elements.length > 0) {
393
- const lastElement = elements[elements.length - 1];
394
- response = await lastElement.textContent() || '';
395
- }
396
- } catch {
397
- // Fallback: get all page text
398
- response = await getPageText();
399
- }
275
+ // For now, we return the vision-based description
276
+ // In a real scenario, we'd click through results and gather more
277
+ return {
278
+ query: topic,
279
+ sources: [{
280
+ title: `Google search: ${topic}`,
281
+ url: `https://www.google.com/search?q=${encodeURIComponent(topic)}`,
282
+ content: searchResults.description
283
+ }],
284
+ summary: searchResults.description
285
+ };
286
+ }
400
287
 
401
- // Optional screenshot
402
- let screenshot: string | undefined;
403
- if (includeScreenshot) {
404
- screenshot = await takeScreenshot();
405
- }
288
+ /**
289
+ * Close current browser tab (Ctrl+W)
290
+ */
291
+ export async function closeTab(): Promise<void> {
292
+ await computer.keyCombo(['control', 'w']);
293
+ }
406
294
 
407
- return { response: response.trim(), screenshot };
295
+ /**
296
+ * New browser tab (Ctrl+T)
297
+ */
298
+ export async function newTab(): Promise<void> {
299
+ await computer.keyCombo(['control', 't']);
408
300
  }
409
301
 
410
302
  /**
411
- * Scroll and capture full response (for long answers)
303
+ * Switch browser tab (Ctrl+Tab)
412
304
  */
413
- export async function getFullAIResponse(
414
- site: keyof typeof aiChatConfigs,
415
- maxScrolls = 5
416
- ): Promise<string[]> {
417
- const config = aiChatConfigs[site];
418
- const page = await getPage();
419
- const responseParts: string[] = [];
305
+ export async function nextTab(): Promise<void> {
306
+ await computer.keyCombo(['control', 'Tab']);
307
+ }
420
308
 
421
- for (let i = 0; i < maxScrolls; i++) {
422
- try {
423
- const elements = await page.$$(config.responseSelector);
424
- if (elements.length > 0) {
425
- const lastElement = elements[elements.length - 1];
426
- const text = await lastElement.textContent();
427
- if (text) {
428
- responseParts.push(text.trim());
429
- }
430
- }
431
-
432
- // Scroll down
433
- await page.mouse.wheel(0, 500);
434
- await page.waitForTimeout(1000);
435
-
436
- // Check if we've reached the bottom
437
- const atBottom = await page.evaluate(() => {
438
- return window.innerHeight + window.scrollY >= document.body.scrollHeight - 100;
439
- });
440
- if (atBottom) break;
441
- } catch {
442
- break;
443
- }
444
- }
309
+ /**
310
+ * Go back in browser (Alt+Left)
311
+ */
312
+ export async function goBack(): Promise<void> {
313
+ await computer.keyCombo(['alt', 'Left']);
314
+ }
445
315
 
446
- return responseParts;
316
+ /**
317
+ * Go forward in browser (Alt+Right)
318
+ */
319
+ export async function goForward(): Promise<void> {
320
+ await computer.keyCombo(['alt', 'Right']);
447
321
  }
448
322
 
449
- // ========================================
450
- // Email Helpers
451
- // ========================================
323
+ /**
324
+ * Refresh page (F5)
325
+ */
326
+ export async function refresh(): Promise<void> {
327
+ await computer.pressKey('F5');
328
+ }
452
329
 
453
- interface EmailData {
454
- to: string;
455
- subject: string;
456
- body: string;
330
+ /**
331
+ * Focus address bar (Ctrl+L)
332
+ */
333
+ export async function focusAddressBar(): Promise<void> {
334
+ await computer.keyCombo(['control', 'l']);
457
335
  }
458
336
 
459
337
  /**
460
- * Send email via Gmail web interface
338
+ * Navigate to URL by typing in address bar
461
339
  */
462
- export async function sendGmail(email: EmailData): Promise<boolean> {
463
- const page = await getPage();
340
+ export async function typeUrl(url: string): Promise<void> {
341
+ await focusAddressBar();
342
+ await sleep(300);
343
+ await computer.typeText(url);
344
+ await sleep(200);
345
+ await computer.pressKey('Return');
346
+ }
464
347
 
465
- try {
466
- // Go to Gmail compose
467
- await page.goto('https://mail.google.com/mail/u/0/#inbox?compose=new');
468
- await page.waitForTimeout(3000);
348
+ // Legacy function stubs for compatibility (do nothing or minimal behavior)
349
+ export async function initBrowser(): Promise<null> {
350
+ // No initialization needed - we use system browser
351
+ return null;
352
+ }
469
353
 
470
- // Wait for compose dialog
471
- await page.waitForSelector('input[aria-label*="To"]', { timeout: 10000 });
354
+ export async function getPage(): Promise<null> {
355
+ return null;
356
+ }
472
357
 
473
- // Fill To field
474
- await page.fill('input[aria-label*="To"]', email.to);
475
- await page.keyboard.press('Tab');
358
+ export async function closeBrowser(): Promise<void> {
359
+ // Close browser window with Alt+F4
360
+ await computer.keyCombo(['alt', 'F4']);
361
+ }
476
362
 
477
- // Fill Subject
478
- await page.fill('input[name="subjectbox"]', email.subject);
479
- await page.keyboard.press('Tab');
363
+ export async function elementExists(selector: string): Promise<boolean> {
364
+ // Can't check DOM without Playwright - always return true to not block
365
+ return true;
366
+ }
480
367
 
481
- // Fill Body
482
- await page.fill('[aria-label*="Message Body"], [role="textbox"]', email.body);
368
+ export async function clickElement(selector: string): Promise<boolean> {
369
+ // Without Playwright, we can't click by selector
370
+ // Just click at current position
371
+ await click();
372
+ return true;
373
+ }
483
374
 
484
- // Click Send (Ctrl+Enter is faster)
485
- await page.keyboard.press('Control+Enter');
375
+ export async function typeInElement(selector: string, text: string): Promise<boolean> {
376
+ // Just type the text
377
+ await typeInBrowser(text);
378
+ return true;
379
+ }
486
380
 
487
- await page.waitForTimeout(2000);
488
- return true;
489
- } catch {
490
- return false;
381
+ export async function typeSlowly(selector: string, text: string): Promise<boolean> {
382
+ // Type character by character
383
+ for (const char of text) {
384
+ await computer.typeText(char);
385
+ await sleep(50);
491
386
  }
387
+ return true;
492
388
  }
493
389
 
494
- /**
495
- * Send email via Outlook web interface
496
- */
497
- export async function sendOutlook(email: EmailData): Promise<boolean> {
498
- const page = await getPage();
499
-
500
- try {
501
- // Go to Outlook compose
502
- await page.goto('https://outlook.office.com/mail/0/inbox');
503
- await page.waitForTimeout(3000);
504
-
505
- // Click New Message
506
- await page.click('button[aria-label*="New mail"], button[title*="New mail"]');
507
- await page.waitForTimeout(2000);
508
-
509
- // Fill To
510
- await page.fill('input[aria-label*="To"]', email.to);
511
- await page.keyboard.press('Tab');
390
+ export async function waitForText(text: string): Promise<boolean> {
391
+ // Can't check DOM - just wait a bit
392
+ await sleep(3000);
393
+ return true;
394
+ }
512
395
 
513
- // Fill Subject
514
- await page.fill('input[aria-label*="Subject"], input[placeholder*="Subject"]', email.subject);
515
- await page.keyboard.press('Tab');
396
+ export async function getTextContent(selector: string): Promise<string | null> {
397
+ // Use vision to describe what's on screen
398
+ const vision = await describeScreen();
399
+ return vision.description;
400
+ }
516
401
 
517
- // Fill Body
518
- await page.fill('[aria-label*="Message body"], [role="textbox"]', email.body);
402
+ export async function waitForNavigation(): Promise<void> {
403
+ await sleep(3000);
404
+ }
519
405
 
520
- // Click Send
521
- await page.click('button[aria-label*="Send"], button[title*="Send"]');
406
+ export async function getFullAIResponse(site: string, maxScrolls = 5): Promise<string[]> {
407
+ // Scroll down and capture what we see
408
+ const responses: string[] = [];
522
409
 
523
- await page.waitForTimeout(2000);
524
- return true;
525
- } catch {
526
- return false;
410
+ for (let i = 0; i < maxScrolls; i++) {
411
+ const vision = await describeScreen();
412
+ responses.push(vision.description);
413
+ await scroll('down', 1);
414
+ await sleep(1000);
527
415
  }
528
- }
529
-
530
- // ========================================
531
- // Google Apps Helpers
532
- // ========================================
533
416
 
534
- /**
535
- * Create new Google Sheet and type in cells
536
- */
537
- export async function googleSheetsType(cellData: { cell: string; value: string }[]): Promise<boolean> {
538
- const page = await getPage();
417
+ return responses;
418
+ }
539
419
 
420
+ export async function googleSheetsType(cells: { cell: string; value: string }[]): Promise<boolean> {
540
421
  try {
541
- // Go to Google Sheets
542
- await page.goto('https://docs.google.com/spreadsheets/create');
543
- await page.waitForTimeout(5000);
544
-
545
- for (const { cell, value } of cellData) {
546
- // Click on name box and type cell reference
547
- await page.click('input#t-name-box');
548
- await page.fill('input#t-name-box', cell);
549
- await page.keyboard.press('Enter');
550
- await page.waitForTimeout(500);
551
-
552
- // Type value
553
- await page.keyboard.type(value);
554
- await page.keyboard.press('Enter');
555
- await page.waitForTimeout(300);
422
+ for (const { cell, value } of cells) {
423
+ // Press Ctrl+G to go to cell (or use name box with Ctrl+G)
424
+ await computer.keyCombo(['control', 'g']);
425
+ await sleep(500);
426
+ await computer.typeText(cell);
427
+ await computer.pressKey('Return');
428
+ await sleep(300);
429
+ await computer.typeText(value);
430
+ await computer.pressKey('Return');
431
+ await sleep(200);
556
432
  }
557
-
558
433
  return true;
559
434
  } catch {
560
435
  return false;
561
436
  }
562
437
  }
563
438
 
564
- /**
565
- * Create new Google Doc and type
566
- */
567
439
  export async function googleDocsType(text: string): Promise<boolean> {
568
- const page = await getPage();
569
-
570
440
  try {
571
- // Go to Google Docs
572
- await page.goto('https://docs.google.com/document/create');
573
- await page.waitForTimeout(5000);
574
-
575
- // Click on document body
576
- await page.click('.kix-appview-editor');
577
- await page.waitForTimeout(500);
578
-
579
- // Type text
580
- await page.keyboard.type(text, { delay: 20 });
581
-
441
+ await sleep(1000);
442
+ await computer.typeText(text);
582
443
  return true;
583
444
  } catch {
584
445
  return false;
585
446
  }
586
447
  }
587
448
 
588
- // ========================================
589
- // Web Search
590
- // ========================================
591
-
592
- /**
593
- * Perform web search and get results
594
- */
595
- export async function webSearch(query: string, engine: 'google' | 'bing' | 'duckduckgo' = 'google'): Promise<string[]> {
596
- const page = await getPage();
597
- const results: string[] = [];
598
-
599
- const urls = {
600
- google: 'https://www.google.com',
601
- bing: 'https://www.bing.com',
602
- duckduckgo: 'https://duckduckgo.com'
603
- };
604
-
605
- const selectors = {
606
- google: { input: 'textarea[name="q"]', results: '#search .g h3' },
607
- bing: { input: 'input[name="q"]', results: '#b_results h2 a' },
608
- duckduckgo: { input: 'input[name="q"]', results: '[data-result] h2' }
609
- };
610
-
611
- try {
612
- await page.goto(urls[engine]);
613
- await page.waitForTimeout(2000);
614
-
615
- // Search
616
- await page.fill(selectors[engine].input, query);
617
- await page.keyboard.press('Enter');
618
- await page.waitForTimeout(3000);
619
-
620
- // Get result titles
621
- const elements = await page.$$(selectors[engine].results);
622
- for (const el of elements.slice(0, 10)) {
623
- const text = await el.textContent();
624
- if (text) results.push(text);
625
- }
626
- } catch {
627
- // Return empty on error
628
- }
629
-
630
- return results;
631
- }
632
-
633
- /**
634
- * Click on search result by index
635
- */
636
- export async function clickSearchResult(index: number): Promise<boolean> {
637
- const page = await getPage();
638
-
639
- try {
640
- const results = await page.$$('#search .g h3, #b_results h2 a, [data-result] h2 a');
641
- if (results[index]) {
642
- await results[index].click();
643
- await page.waitForTimeout(2000);
644
- return true;
645
- }
646
- } catch {}
647
-
648
- return false;
649
- }
650
-
651
- // ========================================
652
- // Research Helper (multi-step)
653
- // ========================================
654
-
655
- export interface ResearchResult {
656
- query: string;
657
- sources: { title: string; url: string; content: string }[];
658
- summary: string;
659
- }
660
-
661
- /**
662
- * Research a topic: search, visit results, gather info
663
- */
664
- export async function research(topic: string, maxSources = 3): Promise<ResearchResult> {
665
- const page = await getPage();
666
- const sources: { title: string; url: string; content: string }[] = [];
667
-
668
- // Search
669
- await webSearch(topic);
670
- await page.waitForTimeout(2000);
671
-
672
- // Visit top results
673
- for (let i = 0; i < maxSources; i++) {
674
- try {
675
- const results = await page.$$('#search .g');
676
- if (results[i]) {
677
- // Get title and URL
678
- const titleEl = await results[i].$('h3');
679
- const linkEl = await results[i].$('a');
680
-
681
- const title = await titleEl?.textContent() || 'Unknown';
682
- const url = await linkEl?.getAttribute('href') || '';
683
-
684
- // Click and get content
685
- await titleEl?.click();
686
- await page.waitForTimeout(3000);
687
-
688
- // Get main content
689
- const content = await page.evaluate(() => {
690
- const article = document.querySelector('article, main, .content, #content');
691
- return article?.textContent?.slice(0, 2000) || document.body.innerText.slice(0, 2000);
692
- });
693
-
694
- sources.push({ title, url, content: content.trim() });
695
-
696
- // Go back
697
- await page.goBack();
698
- await page.waitForTimeout(1500);
699
- }
700
- } catch {
701
- continue;
702
- }
703
- }
704
-
705
- return {
706
- query: topic,
707
- sources,
708
- summary: '' // To be filled by AI
709
- };
710
- }
711
-
712
449
  export default {
450
+ openUrl,
451
+ navigateTo,
452
+ searchGoogle,
453
+ webSearch,
454
+ askAI,
455
+ openGmailCompose,
456
+ sendGmail,
457
+ openOutlookCompose,
458
+ sendOutlook,
459
+ openGoogleSheet,
460
+ openGoogleDoc,
461
+ typeInBrowser,
462
+ pressKey,
463
+ click,
464
+ scroll,
465
+ takeScreenshot,
466
+ getPageText,
467
+ research,
468
+ closeTab,
469
+ newTab,
470
+ nextTab,
471
+ goBack,
472
+ goForward,
473
+ refresh,
474
+ focusAddressBar,
475
+ typeUrl,
476
+ // Legacy compatibility
713
477
  initBrowser,
714
478
  getPage,
715
479
  closeBrowser,
716
- navigateTo,
717
- takeScreenshot,
718
- screenshotElement,
480
+ elementExists,
719
481
  clickElement,
720
482
  typeInElement,
721
483
  typeSlowly,
722
- pressKey,
723
- scroll,
724
484
  waitForText,
725
485
  getTextContent,
726
- getPageText,
727
486
  waitForNavigation,
728
- elementExists,
729
- askAI,
730
487
  getFullAIResponse,
731
- sendGmail,
732
- sendOutlook,
733
488
  googleSheetsType,
734
- googleDocsType,
735
- webSearch,
736
- clickSearchResult,
737
- research
489
+ googleDocsType
738
490
  };