@monostate/node-scraper 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -79,6 +79,80 @@ await bulkScrapeStream(urls, {
79
79
 
80
80
  See [BULK_SCRAPING.md](./BULK_SCRAPING.md) for full documentation.
81
81
 
82
+ ### Browser sessions
83
+
84
+ Persistent browser sessions with real-time control. Three modes:
85
+
86
+ ```javascript
87
+ import { createSession } from '@monostate/node-scraper';
88
+
89
+ // Headless (default) — LightPanda with Chrome fallback
90
+ const session = await createSession({ mode: 'auto' });
91
+ await session.goto('https://example.com');
92
+ const content = await session.extractContent();
93
+ const state = await session.getPageState({ includeScreenshot: true });
94
+ await session.close();
95
+
96
+ // Visual — Chrome with headless:false for dev/debug
97
+ const visual = await createSession({ mode: 'visual' });
98
+ await visual.goto('https://example.com');
99
+ await visual.screenshot(); // real Chrome rendering
100
+ await visual.close();
101
+ ```
102
+
103
+ Session methods: `goto`, `click`, `type`, `scroll`, `hover`, `select`, `pressKey`, `goBack`, `goForward`, `screenshot`, `extractContent`, `getPageState`, `waitFor`, `evaluate`, `getCookies`, `setCookies`.
104
+
105
+ ### Computer use (coordinate-based browser control)
106
+
107
+ For AI agents that navigate by pixel coordinates -- useful for anti-bot sites, dynamic UIs, or anything that can't be scraped with selectors.
108
+
109
+ ```javascript
110
+ import { createSession, LocalProvider } from '@monostate/node-scraper';
111
+
112
+ // LocalProvider runs Xvfb + Chrome + xdotool (Linux only)
113
+ const session = await createSession({
114
+ mode: 'computer-use',
115
+ provider: new LocalProvider({ screenWidth: 1280, screenHeight: 800, enableVnc: true }),
116
+ });
117
+
118
+ await session.goto('https://example.com');
119
+
120
+ // Coordinate-based actions (delegated to provider)
121
+ await session.clickAt(640, 400);
122
+ await session.typeText('hello world');
123
+ await session.mouseMove(100, 200);
124
+ await session.drag(10, 20, 300, 400);
125
+ await session.scrollAt(640, 400, 'down', 5);
126
+ const pos = await session.getCursorPosition();
127
+ const size = await session.getScreenSize();
128
+
129
+ // Selector-based actions still work (via Puppeteer CDP)
130
+ await session.click('#submit');
131
+ await session.type('#search', 'query');
132
+
133
+ // VNC streaming URL (if provider supports it)
134
+ console.log(session.getVncUrl());
135
+
136
+ await session.close();
137
+ ```
138
+
139
+ #### Custom providers
140
+
141
+ Implement `ComputerUseProvider` to connect any VM/container backend:
142
+
143
+ ```javascript
144
+ import { ComputerUseProvider } from '@monostate/node-scraper';
145
+
146
+ class MyProvider extends ComputerUseProvider {
147
+ async start() {
148
+ // Provision VM, return { cdpUrl, vncUrl, screenSize }
149
+ }
150
+ async mouseClick(x, y, button) { /* ... */ }
151
+ async screenshot() { /* ... */ }
152
+ async stop() { /* cleanup */ }
153
+ }
154
+ ```
155
+
82
156
  ### AI-powered Q&A
83
157
 
84
158
  Ask questions about any website using OpenRouter, OpenAI, or local fallback:
@@ -12,15 +12,16 @@ const FALLBACK_REASONS = {
12
12
  export class BrowserSession {
13
13
  /**
14
14
  * @param {object} options
15
- * @param {'headless'|'visual'|'auto'} options.mode - 'headless' (LightPanda), 'visual' (Chrome), 'auto' (LP with Chrome fallback)
15
+ * @param {'headless'|'visual'|'auto'|'computer-use'} options.mode - 'headless' (LightPanda), 'visual' (Chrome visible), 'auto' (LP with Chrome fallback), 'computer-use' (provider with Xvfb+VNC)
16
16
  * @param {number} options.timeout - Navigation timeout in ms (default: 15000)
17
17
  * @param {string} options.userAgent - Custom user agent
18
18
  * @param {string} options.lightpandaPath - Path to LightPanda binary
19
19
  * @param {boolean} options.verbose - Enable logging
20
+ * @param {import('./computer-use-provider.js').ComputerUseProvider} options.provider - Required for computer-use mode
20
21
  */
21
22
  constructor(options = {}) {
22
23
  this.mode = options.mode || 'auto';
23
- this.activeBackend = null; // 'lightpanda' | 'chrome'
24
+ this.activeBackend = null; // 'lightpanda' | 'chrome' | 'computer-use'
24
25
  this.timeout = options.timeout || 15000;
25
26
  this.userAgent = options.userAgent || 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36';
26
27
  this.lightpandaPath = options.lightpandaPath;
@@ -33,6 +34,10 @@ export class BrowserSession {
33
34
  this._connected = false;
34
35
  this._fallbackCount = 0;
35
36
 
37
+ // Computer-use provider (Xvfb + Chrome + xdotool + optional VNC)
38
+ this.provider = options.provider || null;
39
+ this._providerInfo = null;
40
+
36
41
  this.history = []; // action log for debugging
37
42
  }
38
43
 
@@ -41,8 +46,10 @@ export class BrowserSession {
41
46
  async connect() {
42
47
  if (this._connected) return this;
43
48
 
44
- if (this.mode === 'visual') {
45
- await this._connectChrome();
49
+ if (this.mode === 'computer-use') {
50
+ await this._connectViaProvider();
51
+ } else if (this.mode === 'visual') {
52
+ await this._connectChromeVisual();
46
53
  } else {
47
54
  // 'headless' or 'auto' — start with LightPanda
48
55
  try {
@@ -86,6 +93,45 @@ export class BrowserSession {
86
93
  this._log('Connected to Chrome');
87
94
  }
88
95
 
96
+ async _connectChromeVisual() {
97
+ const puppeteer = await this._getPuppeteer();
98
+ this.browser = await puppeteer.launch({
99
+ headless: false,
100
+ args: [
101
+ '--no-sandbox',
102
+ '--disable-setuid-sandbox',
103
+ '--disable-dev-shm-usage',
104
+ '--window-size=1280,800',
105
+ ],
106
+ });
107
+ this.page = await this.browser.newPage();
108
+ await this.page.setUserAgent(this.userAgent);
109
+ await this.page.setViewport({ width: 1280, height: 800 });
110
+ this.activeBackend = 'chrome';
111
+ this._log('Connected to Chrome (visual, headless:false)');
112
+ }
113
+
114
+ async _connectViaProvider() {
115
+ if (!this.provider) {
116
+ throw new Error('computer-use mode requires a provider. Pass { provider: new LocalProvider() }');
117
+ }
118
+ this._providerInfo = await this.provider.start();
119
+
120
+ const puppeteer = await this._getPuppeteer();
121
+ this.browser = await puppeteer.connect({
122
+ browserWSEndpoint: this._providerInfo.cdpUrl,
123
+ });
124
+
125
+ const pages = await this.browser.pages();
126
+ this.page = pages[0] || await this.browser.newPage();
127
+ await this.page.setViewport({
128
+ width: this._providerInfo.screenSize.width,
129
+ height: this._providerInfo.screenSize.height,
130
+ });
131
+ this.activeBackend = 'computer-use';
132
+ this._log('Connected via ComputerUseProvider');
133
+ }
134
+
89
135
  // ── Navigation ──────────────────────────────────────────────
90
136
 
91
137
  async goto(url) {
@@ -383,6 +429,15 @@ export class BrowserSession {
383
429
  case 'screenshot': return this.screenshot(action);
384
430
  case 'extractContent': return this.extractContent();
385
431
  case 'waitFor': return this.waitFor(action.selector, action.timeout);
432
+ // Coordinate-based actions (computer-use mode)
433
+ case 'mouseMove': return this.mouseMove(action.x, action.y);
434
+ case 'clickAt': return this.clickAt(action.x, action.y, action.button);
435
+ case 'doubleClickAt': return this.doubleClickAt(action.x, action.y, action.button);
436
+ case 'drag': return this.drag(action.startX, action.startY, action.endX, action.endY);
437
+ case 'scrollAt': return this.scrollAt(action.x, action.y, action.direction, action.amount);
438
+ case 'typeText': return this.typeText(action.text);
439
+ case 'getCursorPosition': return this.getCursorPosition();
440
+ case 'getScreenSize': return this.getScreenSize();
386
441
  default: throw new Error(`Unknown action type: ${action.type}`);
387
442
  }
388
443
  }
@@ -416,6 +471,78 @@ export class BrowserSession {
416
471
  }
417
472
  }
418
473
 
474
+ // ── Coordinate-based actions (computer-use mode) ────────────
475
+
476
+ _ensureProvider() {
477
+ if (!this.provider || this.activeBackend !== 'computer-use') {
478
+ throw new Error('Coordinate-based actions require computer-use mode with a provider');
479
+ }
480
+ }
481
+
482
+ async mouseMove(x, y) {
483
+ this._ensureConnected();
484
+ this._ensureProvider();
485
+ const result = await this.provider.mouseMove(x, y);
486
+ this._logAction('mouseMove', { x, y });
487
+ return result;
488
+ }
489
+
490
+ async clickAt(x, y, button = 'left') {
491
+ this._ensureConnected();
492
+ this._ensureProvider();
493
+ const result = await this.provider.mouseClick(x, y, button);
494
+ this._logAction('clickAt', { x, y, button });
495
+ return result;
496
+ }
497
+
498
+ async doubleClickAt(x, y, button = 'left') {
499
+ this._ensureConnected();
500
+ this._ensureProvider();
501
+ const result = await this.provider.mouseDoubleClick(x, y, button);
502
+ this._logAction('doubleClickAt', { x, y, button });
503
+ return result;
504
+ }
505
+
506
+ async drag(startX, startY, endX, endY) {
507
+ this._ensureConnected();
508
+ this._ensureProvider();
509
+ const result = await this.provider.mouseDrag(startX, startY, endX, endY);
510
+ this._logAction('drag', { startX, startY, endX, endY });
511
+ return result;
512
+ }
513
+
514
+ async scrollAt(x, y, direction = 'down', amount = 3) {
515
+ this._ensureConnected();
516
+ this._ensureProvider();
517
+ const result = await this.provider.scroll(x, y, direction, amount);
518
+ this._logAction('scrollAt', { x, y, direction, amount });
519
+ return result;
520
+ }
521
+
522
+ async typeText(text) {
523
+ this._ensureConnected();
524
+ this._ensureProvider();
525
+ const result = await this.provider.typeText(text);
526
+ this._logAction('typeText', { text: text.substring(0, 20) + (text.length > 20 ? '...' : '') });
527
+ return result;
528
+ }
529
+
530
+ async getCursorPosition() {
531
+ this._ensureConnected();
532
+ this._ensureProvider();
533
+ return this.provider.getCursorPosition();
534
+ }
535
+
536
+ async getScreenSize() {
537
+ this._ensureConnected();
538
+ this._ensureProvider();
539
+ return this.provider.getScreenSize();
540
+ }
541
+
542
+ getVncUrl() {
543
+ return this._providerInfo?.vncUrl || null;
544
+ }
545
+
419
546
  // ── Fallback ────────────────────────────────────────────────
420
547
 
421
548
  async _fallbackToChrome(reason) {
@@ -487,8 +614,15 @@ export class BrowserSession {
487
614
  if (this.context) await this.context.close();
488
615
  } catch { /* ignore */ }
489
616
 
490
- if (this.activeBackend === 'lightpanda' && this.browser) {
617
+ if (this.activeBackend === 'computer-use') {
618
+ // Provider mode: disconnect from CDP, then stop the provider
619
+ try { if (this.browser) await this.browser.disconnect(); } catch { /* ignore */ }
620
+ try { if (this.provider) await this.provider.stop(); } catch { /* ignore */ }
621
+ } else if (this.activeBackend === 'lightpanda' && this.browser) {
491
622
  try { await this.browser.disconnect(); } catch { /* ignore */ }
623
+ } else if (this.mode === 'visual' && this.browser && !this._chromeBrowser) {
624
+ // Visual mode: launched directly, not from pool — close the browser process
625
+ try { await this.browser.close(); } catch { /* ignore */ }
492
626
  }
493
627
 
494
628
  if (this._chromeBrowser) {
@@ -0,0 +1,168 @@
1
+ /**
2
+ * Abstract base class for computer-use providers.
3
+ *
4
+ * A provider manages a display server (e.g. Xvfb), a browser (Chrome),
5
+ * and optionally a VNC server. It exposes coordinate-based actions
6
+ * (mouse, keyboard) and screenshots for AI computer-use agents.
7
+ *
8
+ * Open-source implementations:
9
+ * - LocalProvider: spawns Xvfb + Chrome + xdotool on the local machine
10
+ *
11
+ * To build your own provider (Docker, Kubernetes, cloud VMs, etc.),
12
+ * extend this class and implement all methods.
13
+ *
14
+ * @example
15
+ * import { ComputerUseProvider } from '@monostate/node-scraper';
16
+ *
17
+ * class MyCloudProvider extends ComputerUseProvider {
18
+ * async start() {
19
+ * // spin up a VM, return CDP URL
20
+ * return { cdpUrl: 'ws://...', vncUrl: 'https://...', screenSize: { width: 1280, height: 800 } };
21
+ * }
22
+ * // ... implement all other methods ...
23
+ * }
24
+ */
25
+
26
+ /**
27
+ * @typedef {Object} ProviderInfo
28
+ * @property {string} cdpUrl - WebSocket URL for Chrome DevTools Protocol (ws://...)
29
+ * @property {string|null} vncUrl - noVNC URL for live browser view (null if unavailable)
30
+ * @property {{width: number, height: number}} screenSize - Virtual display dimensions
31
+ */
32
+
33
+ /**
34
+ * @typedef {Object} CoordinateActionResult
35
+ * @property {boolean} success
36
+ * @property {string} [screenshot] - base64 data URL of screenshot after action (optional)
37
+ * @property {string} [error] - error message if failed
38
+ */
39
+
40
+ export class ComputerUseProvider {
41
+ /**
42
+ * Start the environment: display server, browser, VNC, etc.
43
+ * @returns {Promise<ProviderInfo>}
44
+ */
45
+ async start() {
46
+ throw new Error('ComputerUseProvider.start() not implemented');
47
+ }
48
+
49
+ /**
50
+ * Stop the environment and release all resources.
51
+ * @returns {Promise<void>}
52
+ */
53
+ async stop() {
54
+ throw new Error('ComputerUseProvider.stop() not implemented');
55
+ }
56
+
57
+ /**
58
+ * Capture a screenshot of the full virtual display.
59
+ * @returns {Promise<{screenshot: string}>} base64 data URL (data:image/png;base64,...)
60
+ */
61
+ async screenshot() {
62
+ throw new Error('ComputerUseProvider.screenshot() not implemented');
63
+ }
64
+
65
+ // ── Coordinate-based actions ──────────────────────────────
66
+
67
+ /**
68
+ * Move the mouse cursor to (x, y).
69
+ * @param {number} x
70
+ * @param {number} y
71
+ * @returns {Promise<CoordinateActionResult>}
72
+ */
73
+ async mouseMove(x, y) {
74
+ throw new Error('ComputerUseProvider.mouseMove() not implemented');
75
+ }
76
+
77
+ /**
78
+ * Click at (x, y).
79
+ * @param {number} x
80
+ * @param {number} y
81
+ * @param {'left'|'right'|'middle'} [button='left']
82
+ * @returns {Promise<CoordinateActionResult>}
83
+ */
84
+ async mouseClick(x, y, button = 'left') {
85
+ throw new Error('ComputerUseProvider.mouseClick() not implemented');
86
+ }
87
+
88
+ /**
89
+ * Double-click at (x, y).
90
+ * @param {number} x
91
+ * @param {number} y
92
+ * @param {'left'|'right'|'middle'} [button='left']
93
+ * @returns {Promise<CoordinateActionResult>}
94
+ */
95
+ async mouseDoubleClick(x, y, button = 'left') {
96
+ throw new Error('ComputerUseProvider.mouseDoubleClick() not implemented');
97
+ }
98
+
99
+ /**
100
+ * Drag from (startX, startY) to (endX, endY).
101
+ * @param {number} startX
102
+ * @param {number} startY
103
+ * @param {number} endX
104
+ * @param {number} endY
105
+ * @returns {Promise<CoordinateActionResult>}
106
+ */
107
+ async mouseDrag(startX, startY, endX, endY) {
108
+ throw new Error('ComputerUseProvider.mouseDrag() not implemented');
109
+ }
110
+
111
+ /**
112
+ * Scroll at (x, y) in a direction.
113
+ * @param {number} x
114
+ * @param {number} y
115
+ * @param {'up'|'down'} direction
116
+ * @param {number} [amount=3] - number of scroll steps
117
+ * @returns {Promise<CoordinateActionResult>}
118
+ */
119
+ async scroll(x, y, direction, amount = 3) {
120
+ throw new Error('ComputerUseProvider.scroll() not implemented');
121
+ }
122
+
123
+ /**
124
+ * Press a key or key combination (e.g. 'Return', 'ctrl+c', 'alt+Tab').
125
+ * @param {string} key - xdotool-compatible key name
126
+ * @returns {Promise<CoordinateActionResult>}
127
+ */
128
+ async pressKey(key) {
129
+ throw new Error('ComputerUseProvider.pressKey() not implemented');
130
+ }
131
+
132
+ /**
133
+ * Type text character by character.
134
+ * @param {string} text
135
+ * @returns {Promise<CoordinateActionResult>}
136
+ */
137
+ async typeText(text) {
138
+ throw new Error('ComputerUseProvider.typeText() not implemented');
139
+ }
140
+
141
+ /**
142
+ * Wait for a duration.
143
+ * @param {number} ms - milliseconds to wait
144
+ * @returns {Promise<CoordinateActionResult>}
145
+ */
146
+ async wait(ms) {
147
+ await new Promise(resolve => setTimeout(resolve, ms));
148
+ return { success: true };
149
+ }
150
+
151
+ /**
152
+ * Get the current cursor position.
153
+ * @returns {Promise<{x: number, y: number}>}
154
+ */
155
+ async getCursorPosition() {
156
+ throw new Error('ComputerUseProvider.getCursorPosition() not implemented');
157
+ }
158
+
159
+ /**
160
+ * Get the virtual display dimensions.
161
+ * @returns {Promise<{width: number, height: number}>}
162
+ */
163
+ async getScreenSize() {
164
+ throw new Error('ComputerUseProvider.getScreenSize() not implemented');
165
+ }
166
+ }
167
+
168
+ export default ComputerUseProvider;
package/index.d.ts CHANGED
@@ -455,14 +455,57 @@ export function bulkScrape(urls: string[], options?: BulkScrapeOptions): Promise
455
455
  */
456
456
  export function bulkScrapeStream(urls: string[], options: BulkScrapeStreamOptions): Promise<BulkScrapeStreamStats>;
457
457
 
458
+ // ── Computer Use Provider ─────────────────────────────────────
459
+
460
+ export interface ProviderInfo {
461
+ cdpUrl: string;
462
+ vncUrl: string | null;
463
+ screenSize: { width: number; height: number };
464
+ }
465
+
466
+ export interface CoordinateActionResult {
467
+ success: boolean;
468
+ screenshot?: string | null;
469
+ error?: string | null;
470
+ }
471
+
472
+ export declare class ComputerUseProvider {
473
+ start(): Promise<ProviderInfo>;
474
+ stop(): Promise<void>;
475
+ screenshot(): Promise<{ screenshot: string }>;
476
+ mouseMove(x: number, y: number): Promise<CoordinateActionResult>;
477
+ mouseClick(x: number, y: number, button?: 'left' | 'right' | 'middle'): Promise<CoordinateActionResult>;
478
+ mouseDoubleClick(x: number, y: number, button?: 'left' | 'right' | 'middle'): Promise<CoordinateActionResult>;
479
+ mouseDrag(startX: number, startY: number, endX: number, endY: number): Promise<CoordinateActionResult>;
480
+ scroll(x: number, y: number, direction: 'up' | 'down', amount?: number): Promise<CoordinateActionResult>;
481
+ pressKey(key: string): Promise<CoordinateActionResult>;
482
+ typeText(text: string): Promise<CoordinateActionResult>;
483
+ wait(ms: number): Promise<CoordinateActionResult>;
484
+ getCursorPosition(): Promise<{ x: number; y: number }>;
485
+ getScreenSize(): Promise<{ width: number; height: number }>;
486
+ }
487
+
488
+ export interface LocalProviderOptions {
489
+ screenWidth?: number;
490
+ screenHeight?: number;
491
+ enableVnc?: boolean;
492
+ chromePath?: string;
493
+ chromeArgs?: string[];
494
+ }
495
+
496
+ export declare class LocalProvider extends ComputerUseProvider {
497
+ constructor(options?: LocalProviderOptions);
498
+ }
499
+
458
500
  // ── Browser Session ───────────────────────────────────────────
459
501
 
460
502
  export interface BrowserSessionOptions {
461
- mode?: 'headless' | 'visual' | 'auto';
503
+ mode?: 'headless' | 'visual' | 'auto' | 'computer-use';
462
504
  timeout?: number;
463
505
  userAgent?: string;
464
506
  lightpandaPath?: string;
465
507
  verbose?: boolean;
508
+ provider?: ComputerUseProvider;
466
509
  }
467
510
 
468
511
  export interface PageState {
@@ -480,7 +523,7 @@ export interface PageState {
480
523
  value?: string;
481
524
  }>;
482
525
  screenshot?: string;
483
- backend: 'lightpanda' | 'chrome';
526
+ backend: 'lightpanda' | 'chrome' | 'computer-use';
484
527
  sessionHistory: Array<{ type: string; timestamp: number; backend: string }>;
485
528
  }
486
529
 
@@ -492,7 +535,7 @@ export interface ActionResult {
492
535
  }
493
536
 
494
537
  export interface BrowserAction {
495
- type: 'goto' | 'click' | 'type' | 'scroll' | 'hover' | 'select' | 'pressKey' | 'goBack' | 'goForward' | 'screenshot' | 'extractContent' | 'waitFor';
538
+ type: 'goto' | 'click' | 'type' | 'scroll' | 'hover' | 'select' | 'pressKey' | 'goBack' | 'goForward' | 'screenshot' | 'extractContent' | 'waitFor' | 'mouseMove' | 'clickAt' | 'doubleClickAt' | 'drag' | 'scrollAt' | 'typeText' | 'getCursorPosition' | 'getScreenSize';
496
539
  url?: string;
497
540
  selector?: string;
498
541
  text?: string;
@@ -508,13 +551,21 @@ export interface BrowserAction {
508
551
  fullPage?: boolean;
509
552
  type_?: 'png' | 'jpeg' | 'webp';
510
553
  includeScreenshot?: boolean;
554
+ /** Coordinate-based action fields */
555
+ x?: number;
556
+ y?: number;
557
+ button?: 'left' | 'right' | 'middle';
558
+ startX?: number;
559
+ startY?: number;
560
+ endX?: number;
561
+ endY?: number;
511
562
  }
512
563
 
513
564
  export declare class BrowserSession {
514
565
  constructor(options?: BrowserSessionOptions);
515
566
 
516
- readonly activeBackend: 'lightpanda' | 'chrome' | null;
517
- readonly mode: 'headless' | 'visual' | 'auto';
567
+ readonly activeBackend: 'lightpanda' | 'chrome' | 'computer-use' | null;
568
+ readonly mode: 'headless' | 'visual' | 'auto' | 'computer-use';
518
569
 
519
570
  connect(): Promise<BrowserSession>;
520
571
  goto(url: string): Promise<ActionResult>;
@@ -535,8 +586,19 @@ export declare class BrowserSession {
535
586
  getCookies(): Promise<any[]>;
536
587
  setCookies(cookies: any[]): Promise<void>;
537
588
  getHistory(): Array<{ type: string; timestamp: number; backend: string }>;
538
- getBackend(): 'lightpanda' | 'chrome' | null;
589
+ getBackend(): 'lightpanda' | 'chrome' | 'computer-use' | null;
539
590
  close(): Promise<void>;
591
+
592
+ /** Coordinate-based actions (computer-use mode only) */
593
+ mouseMove(x: number, y: number): Promise<CoordinateActionResult>;
594
+ clickAt(x: number, y: number, button?: 'left' | 'right' | 'middle'): Promise<CoordinateActionResult>;
595
+ doubleClickAt(x: number, y: number, button?: 'left' | 'right' | 'middle'): Promise<CoordinateActionResult>;
596
+ drag(startX: number, startY: number, endX: number, endY: number): Promise<CoordinateActionResult>;
597
+ scrollAt(x: number, y: number, direction: 'up' | 'down', amount?: number): Promise<CoordinateActionResult>;
598
+ typeText(text: string): Promise<CoordinateActionResult>;
599
+ getCursorPosition(): Promise<{ x: number; y: number }>;
600
+ getScreenSize(): Promise<{ width: number; height: number }>;
601
+ getVncUrl(): string | null;
540
602
  }
541
603
 
542
604
  export function createSession(options?: BrowserSessionOptions): Promise<BrowserSession>;
package/index.js CHANGED
@@ -1798,5 +1798,7 @@ export async function bulkScrapeStream(urls, options = {}) {
1798
1798
  // Browser session exports
1799
1799
  export { BrowserSession, createSession } from './browser-session.js';
1800
1800
  export { default as LightPandaServer, getLightPandaServer, stopLightPandaServer } from './lightpanda-server.js';
1801
+ export { ComputerUseProvider } from './computer-use-provider.js';
1802
+ export { LocalProvider } from './providers/local-provider.js';
1801
1803
 
1802
1804
  export default BNCASmartScraper;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@monostate/node-scraper",
3
- "version": "2.1.0",
3
+ "version": "2.2.0",
4
4
  "description": "Intelligent web scraping with AI Q&A, PDF support and multi-level fallback system - 11x faster than traditional scrapers",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -17,6 +17,8 @@
17
17
  "browser-pool.js",
18
18
  "browser-session.js",
19
19
  "lightpanda-server.js",
20
+ "computer-use-provider.js",
21
+ "providers/",
20
22
  "README.md",
21
23
  "BULK_SCRAPING.md",
22
24
  "package.json",
@@ -0,0 +1,322 @@
1
+ /**
2
+ * LocalProvider — runs Xvfb + Chrome + xdotool on the local machine.
3
+ *
4
+ * Requires Linux with: xvfb, xdotool, chromium/google-chrome
5
+ * Optional: x11vnc, noVNC (for VNC streaming)
6
+ *
7
+ * @example
8
+ * import { createSession, LocalProvider } from '@monostate/node-scraper';
9
+ *
10
+ * const session = await createSession({
11
+ * mode: 'computer-use',
12
+ * provider: new LocalProvider({ screenWidth: 1280, screenHeight: 800 }),
13
+ * });
14
+ * await session.goto('https://example.com');
15
+ * await session.clickAt(640, 400);
16
+ * await session.close();
17
+ */
18
+
19
+ import { ComputerUseProvider } from '../computer-use-provider.js';
20
+ import { spawn, execFile } from 'child_process';
21
+ import { createServer } from 'net';
22
+
23
+ function findFreePort() {
24
+ return new Promise((resolve, reject) => {
25
+ const srv = createServer();
26
+ srv.listen(0, '127.0.0.1', () => {
27
+ const port = srv.address().port;
28
+ srv.close(() => resolve(port));
29
+ });
30
+ srv.on('error', reject);
31
+ });
32
+ }
33
+
34
+ function waitForProcess(proc, readyCheck, timeoutMs = 10000) {
35
+ return new Promise((resolve, reject) => {
36
+ const timer = setTimeout(() => reject(new Error('Process startup timed out')), timeoutMs);
37
+ const check = setInterval(async () => {
38
+ try {
39
+ if (await readyCheck()) {
40
+ clearTimeout(timer);
41
+ clearInterval(check);
42
+ resolve();
43
+ }
44
+ } catch { /* still starting */ }
45
+ }, 200);
46
+ proc.on('error', (err) => {
47
+ clearTimeout(timer);
48
+ clearInterval(check);
49
+ reject(err);
50
+ });
51
+ proc.on('exit', (code) => {
52
+ if (code !== 0 && code !== null) {
53
+ clearTimeout(timer);
54
+ clearInterval(check);
55
+ reject(new Error(`Process exited with code ${code}`));
56
+ }
57
+ });
58
+ });
59
+ }
60
+
61
+ function execAsync(cmd, args, env) {
62
+ return new Promise((resolve, reject) => {
63
+ execFile(cmd, args, { env, timeout: 10000 }, (err, stdout, stderr) => {
64
+ if (err) reject(err);
65
+ else resolve({ stdout: stdout.trim(), stderr: stderr.trim() });
66
+ });
67
+ });
68
+ }
69
+
70
+ export class LocalProvider extends ComputerUseProvider {
71
+ /**
72
+ * @param {object} [options]
73
+ * @param {number} [options.screenWidth=1280]
74
+ * @param {number} [options.screenHeight=800]
75
+ * @param {boolean} [options.enableVnc=false]
76
+ * @param {string} [options.chromePath] - Path to Chrome/Chromium binary (auto-detected)
77
+ * @param {string[]} [options.chromeArgs] - Additional Chrome launch args
78
+ */
79
+ constructor(options = {}) {
80
+ super();
81
+ this.screenWidth = options.screenWidth || 1280;
82
+ this.screenHeight = options.screenHeight || 800;
83
+ this.enableVnc = options.enableVnc ?? false;
84
+ this.chromePath = options.chromePath || null;
85
+ this.chromeArgs = options.chromeArgs || [];
86
+
87
+ this._display = null;
88
+ this._displayNum = null;
89
+ this._cdpPort = null;
90
+ this._vncPort = null;
91
+ this._xvfbProc = null;
92
+ this._chromeProc = null;
93
+ this._vncProc = null;
94
+ this._env = null;
95
+ this._cdpWsUrl = null;
96
+ }
97
+
98
+ async start() {
99
+ if (process.platform !== 'linux') {
100
+ throw new Error(
101
+ 'LocalProvider requires Linux (Xvfb + xdotool). ' +
102
+ 'On macOS/Windows, use Docker or a remote provider.'
103
+ );
104
+ }
105
+
106
+ // Find free ports
107
+ this._cdpPort = await findFreePort();
108
+ this._displayNum = 10 + Math.floor(Math.random() * 90); // :10-:99
109
+ this._display = `:${this._displayNum}`;
110
+
111
+ this._env = {
112
+ ...process.env,
113
+ DISPLAY: this._display,
114
+ DBUS_SESSION_BUS_ADDRESS: '/dev/null',
115
+ };
116
+
117
+ // 1. Start Xvfb
118
+ this._xvfbProc = spawn('Xvfb', [
119
+ this._display,
120
+ '-screen', '0', `${this.screenWidth}x${this.screenHeight}x24`,
121
+ '-ac',
122
+ ], { stdio: 'pipe', env: this._env });
123
+
124
+ await new Promise(resolve => setTimeout(resolve, 500));
125
+
126
+ // 2. Find Chrome binary
127
+ const chromePath = this.chromePath || await this._findChrome();
128
+
129
+ // 3. Start Chrome
130
+ const chromeArgs = [
131
+ '--no-sandbox',
132
+ '--disable-setuid-sandbox',
133
+ '--disable-dev-shm-usage',
134
+ '--disable-gpu',
135
+ `--remote-debugging-port=${this._cdpPort}`,
136
+ '--remote-debugging-address=127.0.0.1',
137
+ `--window-size=${this.screenWidth},${this.screenHeight}`,
138
+ '--disable-features=dbus',
139
+ '--disable-sync',
140
+ '--disable-extensions',
141
+ '--disable-component-update',
142
+ '--no-first-run',
143
+ '--user-data-dir=/tmp/chrome-local-provider-' + this._displayNum,
144
+ ...this.chromeArgs,
145
+ 'about:blank',
146
+ ];
147
+
148
+ this._chromeProc = spawn(chromePath, chromeArgs, {
149
+ stdio: 'pipe',
150
+ env: this._env,
151
+ });
152
+
153
+ // Wait for CDP to be ready
154
+ await waitForProcess(this._chromeProc, async () => {
155
+ const res = await fetch(`http://127.0.0.1:${this._cdpPort}/json/version`);
156
+ return res.ok;
157
+ }, 15000);
158
+
159
+ // Get the CDP WebSocket URL
160
+ const versionRes = await fetch(`http://127.0.0.1:${this._cdpPort}/json/version`);
161
+ const versionData = await versionRes.json();
162
+ this._cdpWsUrl = versionData.webSocketDebuggerUrl;
163
+
164
+ // 4. Optionally start VNC
165
+ let vncUrl = null;
166
+ if (this.enableVnc) {
167
+ try {
168
+ this._vncPort = await findFreePort();
169
+ this._vncProc = spawn('x11vnc', [
170
+ '-display', this._display,
171
+ '-rfbport', String(this._vncPort),
172
+ '-shared', '-nopw', '-forever',
173
+ ], { stdio: 'pipe', env: this._env });
174
+ await new Promise(resolve => setTimeout(resolve, 500));
175
+ vncUrl = `vnc://127.0.0.1:${this._vncPort}`;
176
+ } catch {
177
+ // VNC is optional — don't fail if x11vnc is not installed
178
+ }
179
+ }
180
+
181
+ return {
182
+ cdpUrl: this._cdpWsUrl,
183
+ vncUrl,
184
+ screenSize: { width: this.screenWidth, height: this.screenHeight },
185
+ };
186
+ }
187
+
188
+ async stop() {
189
+ for (const proc of [this._vncProc, this._chromeProc, this._xvfbProc]) {
190
+ if (proc && !proc.killed) {
191
+ try { proc.kill('SIGTERM'); } catch { /* ignore */ }
192
+ }
193
+ }
194
+ this._vncProc = null;
195
+ this._chromeProc = null;
196
+ this._xvfbProc = null;
197
+ }
198
+
199
+ async screenshot() {
200
+ // Use CDP Page.captureScreenshot
201
+ const pagesRes = await fetch(`http://127.0.0.1:${this._cdpPort}/json`);
202
+ const pages = await pagesRes.json();
203
+ const page = pages[0];
204
+ if (!page) throw new Error('No open pages for screenshot');
205
+
206
+ const wsUrl = page.webSocketDebuggerUrl;
207
+ const { default: WebSocket } = await import('ws');
208
+
209
+ return new Promise((resolve, reject) => {
210
+ const ws = new WebSocket(wsUrl);
211
+ ws.on('open', () => {
212
+ ws.send(JSON.stringify({ id: 1, method: 'Page.captureScreenshot', params: { format: 'png' } }));
213
+ });
214
+ ws.on('message', (data) => {
215
+ const msg = JSON.parse(data.toString());
216
+ if (msg.id === 1) {
217
+ ws.close();
218
+ if (msg.error) {
219
+ reject(new Error(msg.error.message));
220
+ } else {
221
+ resolve({ screenshot: `data:image/png;base64,${msg.result.data}` });
222
+ }
223
+ }
224
+ });
225
+ ws.on('error', reject);
226
+ setTimeout(() => { ws.close(); reject(new Error('Screenshot timed out')); }, 10000);
227
+ });
228
+ }
229
+
230
+ // ── Coordinate-based actions via xdotool ──────────────────
231
+
232
+ async mouseMove(x, y) {
233
+ await this._xdotool(['mousemove', '--', String(Math.round(x)), String(Math.round(y))]);
234
+ return { success: true };
235
+ }
236
+
237
+ async mouseClick(x, y, button = 'left') {
238
+ const btn = { left: '1', right: '3', middle: '2' }[button] || '1';
239
+ await this._xdotool(['mousemove', '--', String(Math.round(x)), String(Math.round(y))]);
240
+ await this._xdotool(['click', btn]);
241
+ return { success: true };
242
+ }
243
+
244
+ async mouseDoubleClick(x, y, button = 'left') {
245
+ const btn = { left: '1', right: '3', middle: '2' }[button] || '1';
246
+ await this._xdotool(['mousemove', '--', String(Math.round(x)), String(Math.round(y))]);
247
+ await this._xdotool(['click', '--repeat', '2', '--delay', '50', btn]);
248
+ return { success: true };
249
+ }
250
+
251
+ async mouseDrag(startX, startY, endX, endY) {
252
+ await this._xdotool([
253
+ 'mousemove', '--', String(Math.round(startX)), String(Math.round(startY)),
254
+ ]);
255
+ await this._xdotool(['mousedown', '1']);
256
+ await this._xdotool([
257
+ 'mousemove', '--', String(Math.round(endX)), String(Math.round(endY)),
258
+ ]);
259
+ await this._xdotool(['mouseup', '1']);
260
+ return { success: true };
261
+ }
262
+
263
+ async scroll(x, y, direction, amount = 3) {
264
+ await this._xdotool(['mousemove', '--', String(Math.round(x)), String(Math.round(y))]);
265
+ const btn = direction === 'up' ? '4' : '5';
266
+ await this._xdotool(['click', '--repeat', String(amount), '--delay', '50', btn]);
267
+ return { success: true };
268
+ }
269
+
270
+ async pressKey(key) {
271
+ await this._xdotool(['key', key]);
272
+ return { success: true };
273
+ }
274
+
275
+ async typeText(text) {
276
+ // Type in chunks to avoid buffer issues
277
+ const CHUNK = 50;
278
+ for (let i = 0; i < text.length; i += CHUNK) {
279
+ const chunk = text.substring(i, i + CHUNK);
280
+ await this._xdotool(['type', '--delay', '12', '--clearmodifiers', chunk]);
281
+ }
282
+ return { success: true };
283
+ }
284
+
285
+ async getCursorPosition() {
286
+ const { stdout } = await this._xdotool(['getmouselocation', '--shell']);
287
+ const x = parseInt(stdout.match(/X=(\d+)/)?.[1] || '0', 10);
288
+ const y = parseInt(stdout.match(/Y=(\d+)/)?.[1] || '0', 10);
289
+ return { x, y };
290
+ }
291
+
292
+ async getScreenSize() {
293
+ return { width: this.screenWidth, height: this.screenHeight };
294
+ }
295
+
296
+ // ── Internal helpers ──────────────────────────────────────
297
+
298
+ async _xdotool(args) {
299
+ return execAsync('xdotool', args, this._env);
300
+ }
301
+
302
+ async _findChrome() {
303
+ const candidates = [
304
+ 'chromium-browser',
305
+ 'chromium',
306
+ 'google-chrome',
307
+ 'google-chrome-stable',
308
+ ];
309
+ for (const name of candidates) {
310
+ try {
311
+ const { stdout } = await execAsync('which', [name], this._env);
312
+ if (stdout) return stdout;
313
+ } catch { /* not found */ }
314
+ }
315
+ throw new Error(
316
+ 'Chrome/Chromium not found. Install with: apt-get install chromium ' +
317
+ 'or pass chromePath option.'
318
+ );
319
+ }
320
+ }
321
+
322
+ export default LocalProvider;