surfagent 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/api/act.d.ts CHANGED
@@ -23,6 +23,7 @@ export interface ClickRequest {
23
23
  tab: string;
24
24
  selector?: string;
25
25
  text?: string;
26
+ waitAfter?: number;
26
27
  }
27
28
  export declare function clickElement(request: ClickRequest, options: {
28
29
  port?: number;
@@ -70,6 +71,10 @@ export declare function readPage(tabPattern: string, options: {
70
71
  host?: string;
71
72
  selector?: string;
72
73
  }): Promise<any>;
74
+ export declare function dismissOverlays(tabPattern: string, options: {
75
+ port?: number;
76
+ host?: string;
77
+ }): Promise<any>;
73
78
  export interface CaptchaRequest {
74
79
  tab: string;
75
80
  action: 'detect' | 'read' | 'next' | 'prev' | 'submit' | 'audio' | 'restart';
package/dist/api/act.js CHANGED
@@ -135,9 +135,9 @@ export async function clickElement(request, options) {
135
135
  }
136
136
  if (!el && text) {
137
137
  const lower = text.toLowerCase();
138
- const all = document.querySelectorAll('a, button, input[type="submit"], [role="button"], [onclick]');
138
+ const all = document.querySelectorAll('a, button, input[type="submit"], [role="button"], [role="option"], [role="menuitem"], [role="listitem"], [role="tab"], [role="link"], li[aria-label], [onclick]');
139
139
  for (const candidate of all) {
140
- const t = (candidate.innerText || candidate.textContent || candidate.value || '').trim();
140
+ const t = (candidate.innerText || candidate.textContent || candidate.value || candidate.getAttribute('aria-label') || '').trim();
141
141
  if (t.toLowerCase().includes(lower)) { el = candidate; break; }
142
142
  }
143
143
  }
@@ -159,6 +159,10 @@ export async function clickElement(request, options) {
159
159
  `,
160
160
  returnByValue: true
161
161
  });
162
+ // Wait after click if requested (for page to settle after navigation/SPA route change)
163
+ if (request.waitAfter && request.waitAfter > 0) {
164
+ await new Promise(resolve => setTimeout(resolve, Math.min(request.waitAfter, 10000)));
165
+ }
162
166
  await client.close();
163
167
  return result.result.value;
164
168
  }
@@ -357,6 +361,67 @@ export async function readPage(tabPattern, options) {
357
361
  throw error;
358
362
  }
359
363
  }
364
+ const DISMISS_OVERLAYS_SCRIPT = `
365
+ (function() {
366
+ const dismissed = [];
367
+
368
+ // Common cookie consent button patterns (multi-language)
369
+ const consentPatterns = [
370
+ 'reject all', 'reject', 'decline', 'deny',
371
+ 'accept all', 'accept', 'godta alle', 'godta',
372
+ 'alle ablehnen', 'ablehnen', 'tout refuser', 'refuser',
373
+ 'rechazar todo', 'rechazar', 'rifiuta tutto', 'rifiuta',
374
+ 'bare nødvendige', 'only necessary', 'nur notwendige',
375
+ 'manage preferences', 'cookie settings',
376
+ ];
377
+
378
+ // Try cookie consent buttons
379
+ for (const btn of document.querySelectorAll('button, a[role="button"]')) {
380
+ const text = (btn.innerText || btn.textContent || '').trim().toLowerCase();
381
+ if (text.length > 50 || text.length < 2) continue;
382
+ for (const pattern of consentPatterns) {
383
+ if (text === pattern || text.startsWith(pattern)) {
384
+ btn.click();
385
+ dismissed.push({ type: 'cookie', text: text.substring(0, 40) });
386
+ break;
387
+ }
388
+ }
389
+ if (dismissed.length) break;
390
+ }
391
+
392
+ // Try closing modal dialogs (X button, close button, dismiss)
393
+ if (!dismissed.length) {
394
+ for (const btn of document.querySelectorAll('[aria-label*="Close" i], [aria-label*="Dismiss" i], [aria-label*="Lukk" i], [aria-label*="Schließen" i], [aria-label*="Fermer" i]')) {
395
+ const dialog = btn.closest('[role="dialog"], [role="alertdialog"], .modal, [data-overlay]');
396
+ if (dialog) {
397
+ btn.click();
398
+ dismissed.push({ type: 'dialog', text: btn.getAttribute('aria-label') || 'close' });
399
+ break;
400
+ }
401
+ }
402
+ }
403
+
404
+ return { dismissed, count: dismissed.length };
405
+ })()
406
+ `;
407
+ export async function dismissOverlays(tabPattern, options) {
408
+ const port = options.port || 9222;
409
+ const host = options.host || 'localhost';
410
+ const tab = await resolveTab(tabPattern, port, host);
411
+ const client = await connectToTab(tab.id, port, host);
412
+ try {
413
+ const r = await client.Runtime.evaluate({
414
+ expression: DISMISS_OVERLAYS_SCRIPT,
415
+ returnByValue: true
416
+ });
417
+ await client.close();
418
+ return r.result.value;
419
+ }
420
+ catch (error) {
421
+ await client.close();
422
+ throw error;
423
+ }
424
+ }
360
425
  const CAPTCHA_DETECT_SCRIPT = `
361
426
  (function() {
362
427
  // Find captcha iframes on the page
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import http from 'node:http';
3
3
  import { reconUrl, reconTab } from './recon.js';
4
- import { fillFields, clickElement, scrollPage, navigatePage, evalInTab, focusTab, readPage, captchaInteract } from './act.js';
4
+ import { fillFields, clickElement, scrollPage, navigatePage, evalInTab, focusTab, readPage, captchaInteract, dismissOverlays } from './act.js';
5
5
  import { getAllTabs } from '../chrome/tabs.js';
6
6
  const PORT = parseInt(process.env.API_PORT || '3456', 10);
7
7
  const CDP_PORT = parseInt(process.env.CDP_PORT || '9222', 10);
@@ -16,6 +16,11 @@ function json(res, status, data) {
16
16
  res.writeHead(status, { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*' });
17
17
  res.end(JSON.stringify(data));
18
18
  }
19
+ function parseBody(raw) {
20
+ if (!raw || !raw.trim())
21
+ throw new SyntaxError('Empty request body');
22
+ return JSON.parse(raw);
23
+ }
19
24
  function cors(res) {
20
25
  res.writeHead(204, {
21
26
  'Access-Control-Allow-Origin': '*',
@@ -32,7 +37,7 @@ const server = http.createServer(async (req, res) => {
32
37
  try {
33
38
  // POST /recon — full page reconnaissance
34
39
  if (path === '/recon' && req.method === 'POST') {
35
- const body = JSON.parse(await readBody(req));
40
+ const body = parseBody(await readBody(req));
36
41
  if (!body.url && !body.tab) {
37
42
  return json(res, 400, { error: 'Provide "url" (to open new page) or "tab" (to recon existing tab)' });
38
43
  }
@@ -56,17 +61,20 @@ const server = http.createServer(async (req, res) => {
56
61
  }
57
62
  // POST /fill — fill form fields via CDP keystrokes
58
63
  if (path === '/fill' && req.method === 'POST') {
59
- const body = JSON.parse(await readBody(req));
64
+ const body = parseBody(await readBody(req));
60
65
  if (!body.tab || !body.fields) {
61
66
  return json(res, 400, { error: 'Provide "tab" and "fields" [{ selector, value }]' });
62
67
  }
68
+ if (!Array.isArray(body.fields)) {
69
+ return json(res, 400, { error: '"fields" must be an array of { selector, value }' });
70
+ }
63
71
  const start = Date.now();
64
72
  const result = await fillFields(body, { port: CDP_PORT, host: CDP_HOST });
65
73
  return json(res, 200, { ...result, _fillMs: Date.now() - start });
66
74
  }
67
75
  // POST /click — click an element
68
76
  if (path === '/click' && req.method === 'POST') {
69
- const body = JSON.parse(await readBody(req));
77
+ const body = parseBody(await readBody(req));
70
78
  if (!body.tab || (!body.selector && !body.text)) {
71
79
  return json(res, 400, { error: 'Provide "tab" and "selector" or "text"' });
72
80
  }
@@ -75,16 +83,25 @@ const server = http.createServer(async (req, res) => {
75
83
  }
76
84
  // POST /scroll — scroll a page
77
85
  if (path === '/scroll' && req.method === 'POST') {
78
- const body = JSON.parse(await readBody(req));
86
+ const body = parseBody(await readBody(req));
79
87
  if (!body.tab) {
80
88
  return json(res, 400, { error: 'Provide "tab", optional "direction" (down/up), "amount" (pixels)' });
81
89
  }
82
90
  const result = await scrollPage(body, { port: CDP_PORT, host: CDP_HOST });
83
91
  return json(res, 200, result);
84
92
  }
93
+ // POST /dismiss — dismiss cookie banners, modals, overlays
94
+ if (path === '/dismiss' && req.method === 'POST') {
95
+ const body = parseBody(await readBody(req));
96
+ if (!body.tab) {
97
+ return json(res, 400, { error: 'Provide "tab"' });
98
+ }
99
+ const result = await dismissOverlays(body.tab, { port: CDP_PORT, host: CDP_HOST });
100
+ return json(res, 200, result);
101
+ }
85
102
  // POST /captcha — detect and interact with captchas
86
103
  if (path === '/captcha' && req.method === 'POST') {
87
- const body = JSON.parse(await readBody(req));
104
+ const body = parseBody(await readBody(req));
88
105
  if (!body.action) {
89
106
  return json(res, 400, { error: 'Provide "action": detect, read, next, prev, submit, audio, restart' });
90
107
  }
@@ -96,7 +113,7 @@ const server = http.createServer(async (req, res) => {
96
113
  }
97
114
  // POST /read — get structured readable content from a page
98
115
  if (path === '/read' && req.method === 'POST') {
99
- const body = JSON.parse(await readBody(req));
116
+ const body = parseBody(await readBody(req));
100
117
  if (!body.tab) {
101
118
  return json(res, 400, { error: 'Provide "tab", optional "selector"' });
102
119
  }
@@ -105,7 +122,7 @@ const server = http.createServer(async (req, res) => {
105
122
  }
106
123
  // POST /focus — bring a tab to front
107
124
  if (path === '/focus' && req.method === 'POST') {
108
- const body = JSON.parse(await readBody(req));
125
+ const body = parseBody(await readBody(req));
109
126
  if (!body.tab) {
110
127
  return json(res, 400, { error: 'Provide "tab"' });
111
128
  }
@@ -114,7 +131,7 @@ const server = http.createServer(async (req, res) => {
114
131
  }
115
132
  // POST /eval — run JavaScript in a tab or iframe
116
133
  if (path === '/eval' && req.method === 'POST') {
117
- const body = JSON.parse(await readBody(req));
134
+ const body = parseBody(await readBody(req));
118
135
  if (!body.tab || !body.expression) {
119
136
  return json(res, 400, { error: 'Provide "tab" and "expression"' });
120
137
  }
@@ -123,7 +140,7 @@ const server = http.createServer(async (req, res) => {
123
140
  }
124
141
  // POST /navigate — go to url, back, or forward in same tab
125
142
  if (path === '/navigate' && req.method === 'POST') {
126
- const body = JSON.parse(await readBody(req));
143
+ const body = parseBody(await readBody(req));
127
144
  if (!body.tab) {
128
145
  return json(res, 400, { error: 'Provide "tab" and one of: "url", "back":true, "forward":true' });
129
146
  }
@@ -150,6 +167,15 @@ const server = http.createServer(async (req, res) => {
150
167
  catch (error) {
151
168
  const message = error instanceof Error ? error.message : String(error);
152
169
  console.error(`[${new Date().toISOString()}] Error:`, message);
170
+ if (error instanceof SyntaxError) {
171
+ return json(res, 400, { error: 'Invalid JSON: ' + message });
172
+ }
173
+ if (message.includes('Tab not found')) {
174
+ return json(res, 404, { error: message });
175
+ }
176
+ if (message.includes('Cannot connect to Chrome') || message.includes('ECONNREFUSED')) {
177
+ return json(res, 503, { error: 'Chrome not running. Start with: surfagent start' });
178
+ }
153
179
  json(res, 500, { error: message });
154
180
  }
155
181
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "surfagent",
3
- "version": "1.0.5",
3
+ "version": "1.0.7",
4
4
  "description": "Browser automation API for AI agents — structured page recon, form filling, clicking, and navigation via Chrome CDP",
5
5
  "keywords": [
6
6
  "ai-agent",
package/src/api/act.ts CHANGED
@@ -148,6 +148,7 @@ export interface ClickRequest {
148
148
  tab: string;
149
149
  selector?: string;
150
150
  text?: string;
151
+ waitAfter?: number; // ms to wait after click for page to settle
151
152
  }
152
153
 
153
154
  export async function clickElement(
@@ -173,9 +174,9 @@ export async function clickElement(
173
174
  }
174
175
  if (!el && text) {
175
176
  const lower = text.toLowerCase();
176
- const all = document.querySelectorAll('a, button, input[type="submit"], [role="button"], [onclick]');
177
+ const all = document.querySelectorAll('a, button, input[type="submit"], [role="button"], [role="option"], [role="menuitem"], [role="listitem"], [role="tab"], [role="link"], li[aria-label], [onclick]');
177
178
  for (const candidate of all) {
178
- const t = (candidate.innerText || candidate.textContent || candidate.value || '').trim();
179
+ const t = (candidate.innerText || candidate.textContent || candidate.value || candidate.getAttribute('aria-label') || '').trim();
179
180
  if (t.toLowerCase().includes(lower)) { el = candidate; break; }
180
181
  }
181
182
  }
@@ -198,6 +199,11 @@ export async function clickElement(
198
199
  returnByValue: true
199
200
  });
200
201
 
202
+ // Wait after click if requested (for page to settle after navigation/SPA route change)
203
+ if (request.waitAfter && request.waitAfter > 0) {
204
+ await new Promise<void>(resolve => setTimeout(resolve, Math.min(request.waitAfter!, 10000)));
205
+ }
206
+
201
207
  await client.close();
202
208
  return result.result.value as any;
203
209
  } catch (error) {
@@ -434,6 +440,73 @@ export async function readPage(
434
440
  }
435
441
  }
436
442
 
443
+ const DISMISS_OVERLAYS_SCRIPT = `
444
+ (function() {
445
+ const dismissed = [];
446
+
447
+ // Common cookie consent button patterns (multi-language)
448
+ const consentPatterns = [
449
+ 'reject all', 'reject', 'decline', 'deny',
450
+ 'accept all', 'accept', 'godta alle', 'godta',
451
+ 'alle ablehnen', 'ablehnen', 'tout refuser', 'refuser',
452
+ 'rechazar todo', 'rechazar', 'rifiuta tutto', 'rifiuta',
453
+ 'bare nødvendige', 'only necessary', 'nur notwendige',
454
+ 'manage preferences', 'cookie settings',
455
+ ];
456
+
457
+ // Try cookie consent buttons
458
+ for (const btn of document.querySelectorAll('button, a[role="button"]')) {
459
+ const text = (btn.innerText || btn.textContent || '').trim().toLowerCase();
460
+ if (text.length > 50 || text.length < 2) continue;
461
+ for (const pattern of consentPatterns) {
462
+ if (text === pattern || text.startsWith(pattern)) {
463
+ btn.click();
464
+ dismissed.push({ type: 'cookie', text: text.substring(0, 40) });
465
+ break;
466
+ }
467
+ }
468
+ if (dismissed.length) break;
469
+ }
470
+
471
+ // Try closing modal dialogs (X button, close button, dismiss)
472
+ if (!dismissed.length) {
473
+ for (const btn of document.querySelectorAll('[aria-label*="Close" i], [aria-label*="Dismiss" i], [aria-label*="Lukk" i], [aria-label*="Schließen" i], [aria-label*="Fermer" i]')) {
474
+ const dialog = btn.closest('[role="dialog"], [role="alertdialog"], .modal, [data-overlay]');
475
+ if (dialog) {
476
+ btn.click();
477
+ dismissed.push({ type: 'dialog', text: btn.getAttribute('aria-label') || 'close' });
478
+ break;
479
+ }
480
+ }
481
+ }
482
+
483
+ return { dismissed, count: dismissed.length };
484
+ })()
485
+ `;
486
+
487
+ export async function dismissOverlays(
488
+ tabPattern: string,
489
+ options: { port?: number; host?: string }
490
+ ): Promise<any> {
491
+ const port = options.port || 9222;
492
+ const host = options.host || 'localhost';
493
+
494
+ const tab = await resolveTab(tabPattern, port, host);
495
+ const client = await connectToTab(tab.id, port, host);
496
+
497
+ try {
498
+ const r = await client.Runtime.evaluate({
499
+ expression: DISMISS_OVERLAYS_SCRIPT,
500
+ returnByValue: true
501
+ });
502
+ await client.close();
503
+ return r.result.value;
504
+ } catch (error) {
505
+ await client.close();
506
+ throw error;
507
+ }
508
+ }
509
+
437
510
  const CAPTCHA_DETECT_SCRIPT = `
438
511
  (function() {
439
512
  // Find captcha iframes on the page
package/src/api/server.ts CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  import http from 'node:http';
4
4
  import { reconUrl, reconTab } from './recon.js';
5
- import { fillFields, clickElement, scrollPage, navigatePage, evalInTab, focusTab, readPage, captchaInteract } from './act.js';
5
+ import { fillFields, clickElement, scrollPage, navigatePage, evalInTab, focusTab, readPage, captchaInteract, dismissOverlays } from './act.js';
6
6
  import { getAllTabs } from '../chrome/tabs.js';
7
7
 
8
8
  const PORT = parseInt(process.env.API_PORT || '3456', 10);
@@ -27,6 +27,11 @@ function json(res: http.ServerResponse, status: number, data: any) {
27
27
  res.end(JSON.stringify(data));
28
28
  }
29
29
 
30
+ function parseBody(raw: string): any {
31
+ if (!raw || !raw.trim()) throw new SyntaxError('Empty request body');
32
+ return JSON.parse(raw);
33
+ }
34
+
30
35
  function cors(res: http.ServerResponse) {
31
36
  res.writeHead(204, {
32
37
  'Access-Control-Allow-Origin': '*',
@@ -45,7 +50,7 @@ const server = http.createServer(async (req, res) => {
45
50
  try {
46
51
  // POST /recon — full page reconnaissance
47
52
  if (path === '/recon' && req.method === 'POST') {
48
- const body: RequestBody = JSON.parse(await readBody(req));
53
+ const body: RequestBody = parseBody(await readBody(req));
49
54
 
50
55
  if (!body.url && !body.tab) {
51
56
  return json(res, 400, { error: 'Provide "url" (to open new page) or "tab" (to recon existing tab)' });
@@ -73,10 +78,13 @@ const server = http.createServer(async (req, res) => {
73
78
 
74
79
  // POST /fill — fill form fields via CDP keystrokes
75
80
  if (path === '/fill' && req.method === 'POST') {
76
- const body = JSON.parse(await readBody(req));
81
+ const body = parseBody(await readBody(req));
77
82
  if (!body.tab || !body.fields) {
78
83
  return json(res, 400, { error: 'Provide "tab" and "fields" [{ selector, value }]' });
79
84
  }
85
+ if (!Array.isArray(body.fields)) {
86
+ return json(res, 400, { error: '"fields" must be an array of { selector, value }' });
87
+ }
80
88
  const start = Date.now();
81
89
  const result = await fillFields(body, { port: CDP_PORT, host: CDP_HOST });
82
90
  return json(res, 200, { ...result, _fillMs: Date.now() - start });
@@ -84,7 +92,7 @@ const server = http.createServer(async (req, res) => {
84
92
 
85
93
  // POST /click — click an element
86
94
  if (path === '/click' && req.method === 'POST') {
87
- const body = JSON.parse(await readBody(req));
95
+ const body = parseBody(await readBody(req));
88
96
  if (!body.tab || (!body.selector && !body.text)) {
89
97
  return json(res, 400, { error: 'Provide "tab" and "selector" or "text"' });
90
98
  }
@@ -94,7 +102,7 @@ const server = http.createServer(async (req, res) => {
94
102
 
95
103
  // POST /scroll — scroll a page
96
104
  if (path === '/scroll' && req.method === 'POST') {
97
- const body = JSON.parse(await readBody(req));
105
+ const body = parseBody(await readBody(req));
98
106
  if (!body.tab) {
99
107
  return json(res, 400, { error: 'Provide "tab", optional "direction" (down/up), "amount" (pixels)' });
100
108
  }
@@ -102,9 +110,19 @@ const server = http.createServer(async (req, res) => {
102
110
  return json(res, 200, result);
103
111
  }
104
112
 
113
+ // POST /dismiss — dismiss cookie banners, modals, overlays
114
+ if (path === '/dismiss' && req.method === 'POST') {
115
+ const body = parseBody(await readBody(req));
116
+ if (!body.tab) {
117
+ return json(res, 400, { error: 'Provide "tab"' });
118
+ }
119
+ const result = await dismissOverlays(body.tab, { port: CDP_PORT, host: CDP_HOST });
120
+ return json(res, 200, result);
121
+ }
122
+
105
123
  // POST /captcha — detect and interact with captchas
106
124
  if (path === '/captcha' && req.method === 'POST') {
107
- const body = JSON.parse(await readBody(req));
125
+ const body = parseBody(await readBody(req));
108
126
  if (!body.action) {
109
127
  return json(res, 400, { error: 'Provide "action": detect, read, next, prev, submit, audio, restart' });
110
128
  }
@@ -117,7 +135,7 @@ const server = http.createServer(async (req, res) => {
117
135
 
118
136
  // POST /read — get structured readable content from a page
119
137
  if (path === '/read' && req.method === 'POST') {
120
- const body = JSON.parse(await readBody(req));
138
+ const body = parseBody(await readBody(req));
121
139
  if (!body.tab) {
122
140
  return json(res, 400, { error: 'Provide "tab", optional "selector"' });
123
141
  }
@@ -127,7 +145,7 @@ const server = http.createServer(async (req, res) => {
127
145
 
128
146
  // POST /focus — bring a tab to front
129
147
  if (path === '/focus' && req.method === 'POST') {
130
- const body = JSON.parse(await readBody(req));
148
+ const body = parseBody(await readBody(req));
131
149
  if (!body.tab) {
132
150
  return json(res, 400, { error: 'Provide "tab"' });
133
151
  }
@@ -137,7 +155,7 @@ const server = http.createServer(async (req, res) => {
137
155
 
138
156
  // POST /eval — run JavaScript in a tab or iframe
139
157
  if (path === '/eval' && req.method === 'POST') {
140
- const body = JSON.parse(await readBody(req));
158
+ const body = parseBody(await readBody(req));
141
159
  if (!body.tab || !body.expression) {
142
160
  return json(res, 400, { error: 'Provide "tab" and "expression"' });
143
161
  }
@@ -147,7 +165,7 @@ const server = http.createServer(async (req, res) => {
147
165
 
148
166
  // POST /navigate — go to url, back, or forward in same tab
149
167
  if (path === '/navigate' && req.method === 'POST') {
150
- const body = JSON.parse(await readBody(req));
168
+ const body = parseBody(await readBody(req));
151
169
  if (!body.tab) {
152
170
  return json(res, 400, { error: 'Provide "tab" and one of: "url", "back":true, "forward":true' });
153
171
  }
@@ -175,6 +193,16 @@ const server = http.createServer(async (req, res) => {
175
193
  } catch (error) {
176
194
  const message = error instanceof Error ? error.message : String(error);
177
195
  console.error(`[${new Date().toISOString()}] Error:`, message);
196
+
197
+ if (error instanceof SyntaxError) {
198
+ return json(res, 400, { error: 'Invalid JSON: ' + message });
199
+ }
200
+ if (message.includes('Tab not found')) {
201
+ return json(res, 404, { error: message });
202
+ }
203
+ if (message.includes('Cannot connect to Chrome') || message.includes('ECONNREFUSED')) {
204
+ return json(res, 503, { error: 'Chrome not running. Start with: surfagent start' });
205
+ }
178
206
  json(res, 500, { error: message });
179
207
  }
180
208
  });