@monostate/node-scraper 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,168 @@
1
+ /**
2
+ * Abstract base class for computer-use providers.
3
+ *
4
+ * A provider manages a display server (e.g. Xvfb), a browser (Chrome),
5
+ * and optionally a VNC server. It exposes coordinate-based actions
6
+ * (mouse, keyboard) and screenshots for AI computer-use agents.
7
+ *
8
+ * Open-source implementations:
9
+ * - LocalProvider: spawns Xvfb + Chrome + xdotool on the local machine
10
+ *
11
+ * To build your own provider (Docker, Kubernetes, cloud VMs, etc.),
12
+ * extend this class and implement all methods.
13
+ *
14
+ * @example
15
+ * import { ComputerUseProvider } from '@monostate/node-scraper';
16
+ *
17
+ * class MyCloudProvider extends ComputerUseProvider {
18
+ * async start() {
19
+ * // spin up a VM, return CDP URL
20
+ * return { cdpUrl: 'ws://...', vncUrl: 'https://...', screenSize: { width: 1280, height: 800 } };
21
+ * }
22
+ * // ... implement all other methods ...
23
+ * }
24
+ */
25
+
26
+ /**
27
+ * @typedef {Object} ProviderInfo
28
+ * @property {string} cdpUrl - WebSocket URL for Chrome DevTools Protocol (ws://...)
29
+ * @property {string|null} vncUrl - noVNC URL for live browser view (null if unavailable)
30
+ * @property {{width: number, height: number}} screenSize - Virtual display dimensions
31
+ */
32
+
33
+ /**
34
+ * @typedef {Object} CoordinateActionResult
35
+ * @property {boolean} success
36
+ * @property {string} [screenshot] - base64 data URL of screenshot after action (optional)
37
+ * @property {string} [error] - error message if failed
38
+ */
39
+
40
+ export class ComputerUseProvider {
41
+ /**
42
+ * Start the environment: display server, browser, VNC, etc.
43
+ * @returns {Promise<ProviderInfo>}
44
+ */
45
+ async start() {
46
+ throw new Error('ComputerUseProvider.start() not implemented');
47
+ }
48
+
49
+ /**
50
+ * Stop the environment and release all resources.
51
+ * @returns {Promise<void>}
52
+ */
53
+ async stop() {
54
+ throw new Error('ComputerUseProvider.stop() not implemented');
55
+ }
56
+
57
+ /**
58
+ * Capture a screenshot of the full virtual display.
59
+ * @returns {Promise<{screenshot: string}>} base64 data URL (data:image/png;base64,...)
60
+ */
61
+ async screenshot() {
62
+ throw new Error('ComputerUseProvider.screenshot() not implemented');
63
+ }
64
+
65
+ // ── Coordinate-based actions ──────────────────────────────
66
+
67
+ /**
68
+ * Move the mouse cursor to (x, y).
69
+ * @param {number} x
70
+ * @param {number} y
71
+ * @returns {Promise<CoordinateActionResult>}
72
+ */
73
+ async mouseMove(x, y) {
74
+ throw new Error('ComputerUseProvider.mouseMove() not implemented');
75
+ }
76
+
77
+ /**
78
+ * Click at (x, y).
79
+ * @param {number} x
80
+ * @param {number} y
81
+ * @param {'left'|'right'|'middle'} [button='left']
82
+ * @returns {Promise<CoordinateActionResult>}
83
+ */
84
+ async mouseClick(x, y, button = 'left') {
85
+ throw new Error('ComputerUseProvider.mouseClick() not implemented');
86
+ }
87
+
88
+ /**
89
+ * Double-click at (x, y).
90
+ * @param {number} x
91
+ * @param {number} y
92
+ * @param {'left'|'right'|'middle'} [button='left']
93
+ * @returns {Promise<CoordinateActionResult>}
94
+ */
95
+ async mouseDoubleClick(x, y, button = 'left') {
96
+ throw new Error('ComputerUseProvider.mouseDoubleClick() not implemented');
97
+ }
98
+
99
+ /**
100
+ * Drag from (startX, startY) to (endX, endY).
101
+ * @param {number} startX
102
+ * @param {number} startY
103
+ * @param {number} endX
104
+ * @param {number} endY
105
+ * @returns {Promise<CoordinateActionResult>}
106
+ */
107
+ async mouseDrag(startX, startY, endX, endY) {
108
+ throw new Error('ComputerUseProvider.mouseDrag() not implemented');
109
+ }
110
+
111
+ /**
112
+ * Scroll at (x, y) in a direction.
113
+ * @param {number} x
114
+ * @param {number} y
115
+ * @param {'up'|'down'} direction
116
+ * @param {number} [amount=3] - number of scroll steps
117
+ * @returns {Promise<CoordinateActionResult>}
118
+ */
119
+ async scroll(x, y, direction, amount = 3) {
120
+ throw new Error('ComputerUseProvider.scroll() not implemented');
121
+ }
122
+
123
+ /**
124
+ * Press a key or key combination (e.g. 'Return', 'ctrl+c', 'alt+Tab').
125
+ * @param {string} key - xdotool-compatible key name
126
+ * @returns {Promise<CoordinateActionResult>}
127
+ */
128
+ async pressKey(key) {
129
+ throw new Error('ComputerUseProvider.pressKey() not implemented');
130
+ }
131
+
132
+ /**
133
+ * Type text character by character.
134
+ * @param {string} text
135
+ * @returns {Promise<CoordinateActionResult>}
136
+ */
137
+ async typeText(text) {
138
+ throw new Error('ComputerUseProvider.typeText() not implemented');
139
+ }
140
+
141
+ /**
142
+ * Wait for a duration.
143
+ * @param {number} ms - milliseconds to wait
144
+ * @returns {Promise<CoordinateActionResult>}
145
+ */
146
+ async wait(ms) {
147
+ await new Promise(resolve => setTimeout(resolve, ms));
148
+ return { success: true };
149
+ }
150
+
151
+ /**
152
+ * Get the current cursor position.
153
+ * @returns {Promise<{x: number, y: number}>}
154
+ */
155
+ async getCursorPosition() {
156
+ throw new Error('ComputerUseProvider.getCursorPosition() not implemented');
157
+ }
158
+
159
+ /**
160
+ * Get the virtual display dimensions.
161
+ * @returns {Promise<{width: number, height: number}>}
162
+ */
163
+ async getScreenSize() {
164
+ throw new Error('ComputerUseProvider.getScreenSize() not implemented');
165
+ }
166
+ }
167
+
168
+ export default ComputerUseProvider;
package/index.d.ts CHANGED
@@ -455,6 +455,165 @@ export function bulkScrape(urls: string[], options?: BulkScrapeOptions): Promise
455
455
  */
456
456
  export function bulkScrapeStream(urls: string[], options: BulkScrapeStreamOptions): Promise<BulkScrapeStreamStats>;
457
457
 
458
+ // ── Computer Use Provider ─────────────────────────────────────
459
+
460
+ export interface ProviderInfo {
461
+ cdpUrl: string;
462
+ vncUrl: string | null;
463
+ screenSize: { width: number; height: number };
464
+ }
465
+
466
+ export interface CoordinateActionResult {
467
+ success: boolean;
468
+ screenshot?: string | null;
469
+ error?: string | null;
470
+ }
471
+
472
+ export declare class ComputerUseProvider {
473
+ start(): Promise<ProviderInfo>;
474
+ stop(): Promise<void>;
475
+ screenshot(): Promise<{ screenshot: string }>;
476
+ mouseMove(x: number, y: number): Promise<CoordinateActionResult>;
477
+ mouseClick(x: number, y: number, button?: 'left' | 'right' | 'middle'): Promise<CoordinateActionResult>;
478
+ mouseDoubleClick(x: number, y: number, button?: 'left' | 'right' | 'middle'): Promise<CoordinateActionResult>;
479
+ mouseDrag(startX: number, startY: number, endX: number, endY: number): Promise<CoordinateActionResult>;
480
+ scroll(x: number, y: number, direction: 'up' | 'down', amount?: number): Promise<CoordinateActionResult>;
481
+ pressKey(key: string): Promise<CoordinateActionResult>;
482
+ typeText(text: string): Promise<CoordinateActionResult>;
483
+ wait(ms: number): Promise<CoordinateActionResult>;
484
+ getCursorPosition(): Promise<{ x: number; y: number }>;
485
+ getScreenSize(): Promise<{ width: number; height: number }>;
486
+ }
487
+
488
+ export interface LocalProviderOptions {
489
+ screenWidth?: number;
490
+ screenHeight?: number;
491
+ enableVnc?: boolean;
492
+ chromePath?: string;
493
+ chromeArgs?: string[];
494
+ }
495
+
496
+ export declare class LocalProvider extends ComputerUseProvider {
497
+ constructor(options?: LocalProviderOptions);
498
+ }
499
+
500
+ // ── Browser Session ───────────────────────────────────────────
501
+
502
+ export interface BrowserSessionOptions {
503
+ mode?: 'headless' | 'visual' | 'auto' | 'computer-use';
504
+ timeout?: number;
505
+ userAgent?: string;
506
+ lightpandaPath?: string;
507
+ verbose?: boolean;
508
+ provider?: ComputerUseProvider;
509
+ }
510
+
511
+ export interface PageState {
512
+ url: string;
513
+ title: string;
514
+ text: string;
515
+ interactiveElements: Array<{
516
+ type: 'button' | 'link' | 'input' | 'select';
517
+ text?: string;
518
+ label?: string;
519
+ href?: string;
520
+ selector: string;
521
+ tag?: string;
522
+ inputType?: string;
523
+ value?: string;
524
+ }>;
525
+ screenshot?: string;
526
+ backend: 'lightpanda' | 'chrome' | 'computer-use';
527
+ sessionHistory: Array<{ type: string; timestamp: number; backend: string }>;
528
+ }
529
+
530
+ export interface ActionResult {
531
+ success: boolean;
532
+ url?: string;
533
+ screenshot?: string;
534
+ backend?: string;
535
+ }
536
+
537
+ export interface BrowserAction {
538
+ type: 'goto' | 'click' | 'type' | 'scroll' | 'hover' | 'select' | 'pressKey' | 'goBack' | 'goForward' | 'screenshot' | 'extractContent' | 'waitFor' | 'mouseMove' | 'clickAt' | 'doubleClickAt' | 'drag' | 'scrollAt' | 'typeText' | 'getCursorPosition' | 'getScreenSize';
539
+ url?: string;
540
+ selector?: string;
541
+ text?: string;
542
+ key?: string;
543
+ direction?: 'up' | 'down';
544
+ amount?: number;
545
+ values?: string[];
546
+ timeout?: number;
547
+ expectNavigation?: boolean;
548
+ waitForNavigation?: boolean;
549
+ clear?: boolean;
550
+ delay?: number;
551
+ fullPage?: boolean;
552
+ type_?: 'png' | 'jpeg' | 'webp';
553
+ includeScreenshot?: boolean;
554
+ /** Coordinate-based action fields */
555
+ x?: number;
556
+ y?: number;
557
+ button?: 'left' | 'right' | 'middle';
558
+ startX?: number;
559
+ startY?: number;
560
+ endX?: number;
561
+ endY?: number;
562
+ }
563
+
564
+ export declare class BrowserSession {
565
+ constructor(options?: BrowserSessionOptions);
566
+
567
+ readonly activeBackend: 'lightpanda' | 'chrome' | 'computer-use' | null;
568
+ readonly mode: 'headless' | 'visual' | 'auto' | 'computer-use';
569
+
570
+ connect(): Promise<BrowserSession>;
571
+ goto(url: string): Promise<ActionResult>;
572
+ goBack(): Promise<void>;
573
+ goForward(): Promise<void>;
574
+ click(selector: string, options?: { timeout?: number; expectNavigation?: boolean; waitForNavigation?: boolean }): Promise<ActionResult>;
575
+ type(selector: string, text: string, options?: { timeout?: number; clear?: boolean; delay?: number }): Promise<ActionResult>;
576
+ scroll(direction?: 'up' | 'down', amount?: number): Promise<ActionResult>;
577
+ hover(selector: string): Promise<ActionResult>;
578
+ select(selector: string, ...values: string[]): Promise<ActionResult>;
579
+ pressKey(key: string): Promise<ActionResult>;
580
+ screenshot(options?: { type?: 'png' | 'jpeg' | 'webp'; fullPage?: boolean }): Promise<{ success: boolean; screenshot: string; backend: string }>;
581
+ extractContent(): Promise<{ title: string; metaDescription: string; headings: any[]; paragraphs: string[]; links: any[]; bodyText: string; url: string }>;
582
+ evaluate<T>(fn: (...args: any[]) => T, ...args: any[]): Promise<T>;
583
+ waitFor(selector: string, timeout?: number): Promise<ActionResult>;
584
+ getPageState(options?: { includeScreenshot?: boolean }): Promise<PageState>;
585
+ executeAction(action: BrowserAction): Promise<ActionResult>;
586
+ getCookies(): Promise<any[]>;
587
+ setCookies(cookies: any[]): Promise<void>;
588
+ getHistory(): Array<{ type: string; timestamp: number; backend: string }>;
589
+ getBackend(): 'lightpanda' | 'chrome' | 'computer-use' | null;
590
+ close(): Promise<void>;
591
+
592
+ /** Coordinate-based actions (computer-use mode only) */
593
+ mouseMove(x: number, y: number): Promise<CoordinateActionResult>;
594
+ clickAt(x: number, y: number, button?: 'left' | 'right' | 'middle'): Promise<CoordinateActionResult>;
595
+ doubleClickAt(x: number, y: number, button?: 'left' | 'right' | 'middle'): Promise<CoordinateActionResult>;
596
+ drag(startX: number, startY: number, endX: number, endY: number): Promise<CoordinateActionResult>;
597
+ scrollAt(x: number, y: number, direction: 'up' | 'down', amount?: number): Promise<CoordinateActionResult>;
598
+ typeText(text: string): Promise<CoordinateActionResult>;
599
+ getCursorPosition(): Promise<{ x: number; y: number }>;
600
+ getScreenSize(): Promise<{ width: number; height: number }>;
601
+ getVncUrl(): string | null;
602
+ }
603
+
604
+ export function createSession(options?: BrowserSessionOptions): Promise<BrowserSession>;
605
+
606
+ export declare class LightPandaServer {
607
+ constructor(binaryPath?: string);
608
+ start(port?: number): Promise<string>;
609
+ getEndpoint(): string;
610
+ isRunning(): boolean;
611
+ stop(): void;
612
+ }
613
+
614
+ export function getLightPandaServer(binaryPath?: string): LightPandaServer;
615
+ export function stopLightPandaServer(): void;
616
+
458
617
  /**
459
618
  * Default export - same as BNCASmartScraper class
460
619
  */
package/index.js CHANGED
@@ -1795,4 +1795,10 @@ export async function bulkScrapeStream(urls, options = {}) {
1795
1795
  }
1796
1796
  }
1797
1797
 
1798
+ // Browser session exports
1799
+ export { BrowserSession, createSession } from './browser-session.js';
1800
+ export { default as LightPandaServer, getLightPandaServer, stopLightPandaServer } from './lightpanda-server.js';
1801
+ export { ComputerUseProvider } from './computer-use-provider.js';
1802
+ export { LocalProvider } from './providers/local-provider.js';
1803
+
1798
1804
  export default BNCASmartScraper;
@@ -0,0 +1,151 @@
1
+ import { spawn } from 'child_process';
2
+ import { createServer } from 'net';
3
+ import path from 'path';
4
+ import fs from 'fs';
5
+
6
+ class LightPandaServer {
7
+ constructor(binaryPath) {
8
+ this.binaryPath = binaryPath || this._findBinary();
9
+ this.process = null;
10
+ this.host = '127.0.0.1';
11
+ this.port = null;
12
+ this.ready = false;
13
+ }
14
+
15
+ async start(port) {
16
+ if (this.process && this.ready) return this.getEndpoint();
17
+
18
+ this.port = port || await this._findAvailablePort();
19
+
20
+ return new Promise((resolve, reject) => {
21
+ const args = [
22
+ 'serve',
23
+ '--host', this.host,
24
+ '--port', String(this.port),
25
+ '--cdp_max_connections', '16',
26
+ ];
27
+
28
+ this.process = spawn(this.binaryPath, args, {
29
+ stdio: ['ignore', 'pipe', 'pipe'],
30
+ });
31
+
32
+ let stderr = '';
33
+
34
+ const onReady = () => {
35
+ this.ready = true;
36
+ resolve(this.getEndpoint());
37
+ };
38
+
39
+ // LP prints to stderr when ready — wait for it or poll /json/version
40
+ this.process.stderr.on('data', (data) => {
41
+ stderr += data.toString();
42
+ // LightPanda logs server start to stderr
43
+ if (stderr.includes('Listening on') || stderr.includes('server started')) {
44
+ onReady();
45
+ }
46
+ });
47
+
48
+ this.process.on('error', (err) => {
49
+ this.ready = false;
50
+ reject(new Error(`Failed to start LightPanda: ${err.message}`));
51
+ });
52
+
53
+ this.process.on('exit', (code) => {
54
+ this.ready = false;
55
+ this.process = null;
56
+ if (!this.ready) {
57
+ reject(new Error(`LightPanda exited with code ${code}: ${stderr}`));
58
+ }
59
+ });
60
+
61
+ // Fallback: poll /json/version if no stderr signal within 3s
62
+ setTimeout(async () => {
63
+ if (this.ready) return;
64
+ try {
65
+ const res = await fetch(`http://${this.host}:${this.port}/json/version`);
66
+ if (res.ok) onReady();
67
+ } catch {
68
+ // Still starting up, give it more time
69
+ }
70
+ }, 1500);
71
+
72
+ // Hard timeout
73
+ setTimeout(() => {
74
+ if (!this.ready) {
75
+ this.stop();
76
+ reject(new Error(`LightPanda failed to start within 5s. stderr: ${stderr}`));
77
+ }
78
+ }, 5000);
79
+ });
80
+ }
81
+
82
+ getEndpoint() {
83
+ return `ws://${this.host}:${this.port}`;
84
+ }
85
+
86
+ isRunning() {
87
+ return this.ready && this.process !== null;
88
+ }
89
+
90
+ stop() {
91
+ if (this.process) {
92
+ try {
93
+ this.process.kill('SIGTERM');
94
+ } catch {
95
+ // already dead
96
+ }
97
+ this.process = null;
98
+ }
99
+ this.ready = false;
100
+ this.port = null;
101
+ }
102
+
103
+ async _findAvailablePort() {
104
+ return new Promise((resolve, reject) => {
105
+ const server = createServer();
106
+ server.listen(0, '127.0.0.1', () => {
107
+ const port = server.address().port;
108
+ server.close(() => resolve(port));
109
+ });
110
+ server.on('error', reject);
111
+ });
112
+ }
113
+
114
+ _findBinary() {
115
+ // Check common locations
116
+ const candidates = [
117
+ path.join(path.dirname(new URL(import.meta.url).pathname), 'bin', 'lightpanda'),
118
+ '/usr/local/bin/lightpanda',
119
+ '/usr/bin/lightpanda',
120
+ ];
121
+
122
+ for (const p of candidates) {
123
+ if (fs.existsSync(p)) return p;
124
+ }
125
+
126
+ return 'lightpanda'; // hope it's on PATH
127
+ }
128
+ }
129
+
130
+ // Singleton instance — shared across all sessions
131
+ let _instance = null;
132
+
133
+ export function getLightPandaServer(binaryPath) {
134
+ if (!_instance) {
135
+ _instance = new LightPandaServer(binaryPath);
136
+ }
137
+ return _instance;
138
+ }
139
+
140
+ export function stopLightPandaServer() {
141
+ if (_instance) {
142
+ _instance.stop();
143
+ _instance = null;
144
+ }
145
+ }
146
+
147
+ process.on('SIGTERM', stopLightPandaServer);
148
+ process.on('SIGINT', stopLightPandaServer);
149
+ process.on('beforeExit', stopLightPandaServer);
150
+
151
+ export default LightPandaServer;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@monostate/node-scraper",
3
- "version": "2.0.0",
3
+ "version": "2.2.0",
4
4
  "description": "Intelligent web scraping with AI Q&A, PDF support and multi-level fallback system - 11x faster than traditional scrapers",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -15,6 +15,10 @@
15
15
  "index.js",
16
16
  "index.d.ts",
17
17
  "browser-pool.js",
18
+ "browser-session.js",
19
+ "lightpanda-server.js",
20
+ "computer-use-provider.js",
21
+ "providers/",
18
22
  "README.md",
19
23
  "BULK_SCRAPING.md",
20
24
  "package.json",
@@ -34,6 +38,9 @@
34
38
  "data-extraction",
35
39
  "automation",
36
40
  "browser",
41
+ "browser-use",
42
+ "cdp",
43
+ "ai-agent",
37
44
  "ai-powered",
38
45
  "question-answering",
39
46
  "pdf-parsing",