@monostate/node-scraper 2.0.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -0
- package/browser-session.js +685 -0
- package/computer-use-provider.js +168 -0
- package/index.d.ts +159 -0
- package/index.js +6 -0
- package/lightpanda-server.js +151 -0
- package/package.json +8 -1
- package/providers/local-provider.js +322 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Abstract base class for computer-use providers.
|
|
3
|
+
*
|
|
4
|
+
* A provider manages a display server (e.g. Xvfb), a browser (Chrome),
|
|
5
|
+
* and optionally a VNC server. It exposes coordinate-based actions
|
|
6
|
+
* (mouse, keyboard) and screenshots for AI computer-use agents.
|
|
7
|
+
*
|
|
8
|
+
* Open-source implementations:
|
|
9
|
+
* - LocalProvider: spawns Xvfb + Chrome + xdotool on the local machine
|
|
10
|
+
*
|
|
11
|
+
* To build your own provider (Docker, Kubernetes, cloud VMs, etc.),
|
|
12
|
+
* extend this class and implement all methods.
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* import { ComputerUseProvider } from '@monostate/node-scraper';
|
|
16
|
+
*
|
|
17
|
+
* class MyCloudProvider extends ComputerUseProvider {
|
|
18
|
+
* async start() {
|
|
19
|
+
* // spin up a VM, return CDP URL
|
|
20
|
+
* return { cdpUrl: 'ws://...', vncUrl: 'https://...', screenSize: { width: 1280, height: 800 } };
|
|
21
|
+
* }
|
|
22
|
+
* // ... implement all other methods ...
|
|
23
|
+
* }
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* @typedef {Object} ProviderInfo
|
|
28
|
+
* @property {string} cdpUrl - WebSocket URL for Chrome DevTools Protocol (ws://...)
|
|
29
|
+
* @property {string|null} vncUrl - noVNC URL for live browser view (null if unavailable)
|
|
30
|
+
* @property {{width: number, height: number}} screenSize - Virtual display dimensions
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* @typedef {Object} CoordinateActionResult
|
|
35
|
+
* @property {boolean} success
|
|
36
|
+
* @property {string} [screenshot] - base64 data URL of screenshot after action (optional)
|
|
37
|
+
* @property {string} [error] - error message if failed
|
|
38
|
+
*/
|
|
39
|
+
|
|
40
|
+
export class ComputerUseProvider {
|
|
41
|
+
/**
|
|
42
|
+
* Start the environment: display server, browser, VNC, etc.
|
|
43
|
+
* @returns {Promise<ProviderInfo>}
|
|
44
|
+
*/
|
|
45
|
+
async start() {
|
|
46
|
+
throw new Error('ComputerUseProvider.start() not implemented');
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Stop the environment and release all resources.
|
|
51
|
+
* @returns {Promise<void>}
|
|
52
|
+
*/
|
|
53
|
+
async stop() {
|
|
54
|
+
throw new Error('ComputerUseProvider.stop() not implemented');
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Capture a screenshot of the full virtual display.
|
|
59
|
+
* @returns {Promise<{screenshot: string}>} base64 data URL (data:image/png;base64,...)
|
|
60
|
+
*/
|
|
61
|
+
async screenshot() {
|
|
62
|
+
throw new Error('ComputerUseProvider.screenshot() not implemented');
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// ── Coordinate-based actions ──────────────────────────────
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Move the mouse cursor to (x, y).
|
|
69
|
+
* @param {number} x
|
|
70
|
+
* @param {number} y
|
|
71
|
+
* @returns {Promise<CoordinateActionResult>}
|
|
72
|
+
*/
|
|
73
|
+
async mouseMove(x, y) {
|
|
74
|
+
throw new Error('ComputerUseProvider.mouseMove() not implemented');
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Click at (x, y).
|
|
79
|
+
* @param {number} x
|
|
80
|
+
* @param {number} y
|
|
81
|
+
* @param {'left'|'right'|'middle'} [button='left']
|
|
82
|
+
* @returns {Promise<CoordinateActionResult>}
|
|
83
|
+
*/
|
|
84
|
+
async mouseClick(x, y, button = 'left') {
|
|
85
|
+
throw new Error('ComputerUseProvider.mouseClick() not implemented');
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Double-click at (x, y).
|
|
90
|
+
* @param {number} x
|
|
91
|
+
* @param {number} y
|
|
92
|
+
* @param {'left'|'right'|'middle'} [button='left']
|
|
93
|
+
* @returns {Promise<CoordinateActionResult>}
|
|
94
|
+
*/
|
|
95
|
+
async mouseDoubleClick(x, y, button = 'left') {
|
|
96
|
+
throw new Error('ComputerUseProvider.mouseDoubleClick() not implemented');
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Drag from (startX, startY) to (endX, endY).
|
|
101
|
+
* @param {number} startX
|
|
102
|
+
* @param {number} startY
|
|
103
|
+
* @param {number} endX
|
|
104
|
+
* @param {number} endY
|
|
105
|
+
* @returns {Promise<CoordinateActionResult>}
|
|
106
|
+
*/
|
|
107
|
+
async mouseDrag(startX, startY, endX, endY) {
|
|
108
|
+
throw new Error('ComputerUseProvider.mouseDrag() not implemented');
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Scroll at (x, y) in a direction.
|
|
113
|
+
* @param {number} x
|
|
114
|
+
* @param {number} y
|
|
115
|
+
* @param {'up'|'down'} direction
|
|
116
|
+
* @param {number} [amount=3] - number of scroll steps
|
|
117
|
+
* @returns {Promise<CoordinateActionResult>}
|
|
118
|
+
*/
|
|
119
|
+
async scroll(x, y, direction, amount = 3) {
|
|
120
|
+
throw new Error('ComputerUseProvider.scroll() not implemented');
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Press a key or key combination (e.g. 'Return', 'ctrl+c', 'alt+Tab').
|
|
125
|
+
* @param {string} key - xdotool-compatible key name
|
|
126
|
+
* @returns {Promise<CoordinateActionResult>}
|
|
127
|
+
*/
|
|
128
|
+
async pressKey(key) {
|
|
129
|
+
throw new Error('ComputerUseProvider.pressKey() not implemented');
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Type text character by character.
|
|
134
|
+
* @param {string} text
|
|
135
|
+
* @returns {Promise<CoordinateActionResult>}
|
|
136
|
+
*/
|
|
137
|
+
async typeText(text) {
|
|
138
|
+
throw new Error('ComputerUseProvider.typeText() not implemented');
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Wait for a duration.
|
|
143
|
+
* @param {number} ms - milliseconds to wait
|
|
144
|
+
* @returns {Promise<CoordinateActionResult>}
|
|
145
|
+
*/
|
|
146
|
+
async wait(ms) {
|
|
147
|
+
await new Promise(resolve => setTimeout(resolve, ms));
|
|
148
|
+
return { success: true };
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Get the current cursor position.
|
|
153
|
+
* @returns {Promise<{x: number, y: number}>}
|
|
154
|
+
*/
|
|
155
|
+
async getCursorPosition() {
|
|
156
|
+
throw new Error('ComputerUseProvider.getCursorPosition() not implemented');
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Get the virtual display dimensions.
|
|
161
|
+
* @returns {Promise<{width: number, height: number}>}
|
|
162
|
+
*/
|
|
163
|
+
async getScreenSize() {
|
|
164
|
+
throw new Error('ComputerUseProvider.getScreenSize() not implemented');
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
export default ComputerUseProvider;
|
package/index.d.ts
CHANGED
|
@@ -455,6 +455,165 @@ export function bulkScrape(urls: string[], options?: BulkScrapeOptions): Promise
|
|
|
455
455
|
*/
|
|
456
456
|
export function bulkScrapeStream(urls: string[], options: BulkScrapeStreamOptions): Promise<BulkScrapeStreamStats>;
|
|
457
457
|
|
|
458
|
+
// ── Computer Use Provider ─────────────────────────────────────
|
|
459
|
+
|
|
460
|
+
export interface ProviderInfo {
|
|
461
|
+
cdpUrl: string;
|
|
462
|
+
vncUrl: string | null;
|
|
463
|
+
screenSize: { width: number; height: number };
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
export interface CoordinateActionResult {
|
|
467
|
+
success: boolean;
|
|
468
|
+
screenshot?: string | null;
|
|
469
|
+
error?: string | null;
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
export declare class ComputerUseProvider {
|
|
473
|
+
start(): Promise<ProviderInfo>;
|
|
474
|
+
stop(): Promise<void>;
|
|
475
|
+
screenshot(): Promise<{ screenshot: string }>;
|
|
476
|
+
mouseMove(x: number, y: number): Promise<CoordinateActionResult>;
|
|
477
|
+
mouseClick(x: number, y: number, button?: 'left' | 'right' | 'middle'): Promise<CoordinateActionResult>;
|
|
478
|
+
mouseDoubleClick(x: number, y: number, button?: 'left' | 'right' | 'middle'): Promise<CoordinateActionResult>;
|
|
479
|
+
mouseDrag(startX: number, startY: number, endX: number, endY: number): Promise<CoordinateActionResult>;
|
|
480
|
+
scroll(x: number, y: number, direction: 'up' | 'down', amount?: number): Promise<CoordinateActionResult>;
|
|
481
|
+
pressKey(key: string): Promise<CoordinateActionResult>;
|
|
482
|
+
typeText(text: string): Promise<CoordinateActionResult>;
|
|
483
|
+
wait(ms: number): Promise<CoordinateActionResult>;
|
|
484
|
+
getCursorPosition(): Promise<{ x: number; y: number }>;
|
|
485
|
+
getScreenSize(): Promise<{ width: number; height: number }>;
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
export interface LocalProviderOptions {
|
|
489
|
+
screenWidth?: number;
|
|
490
|
+
screenHeight?: number;
|
|
491
|
+
enableVnc?: boolean;
|
|
492
|
+
chromePath?: string;
|
|
493
|
+
chromeArgs?: string[];
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
export declare class LocalProvider extends ComputerUseProvider {
|
|
497
|
+
constructor(options?: LocalProviderOptions);
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
// ── Browser Session ───────────────────────────────────────────
|
|
501
|
+
|
|
502
|
+
export interface BrowserSessionOptions {
|
|
503
|
+
mode?: 'headless' | 'visual' | 'auto' | 'computer-use';
|
|
504
|
+
timeout?: number;
|
|
505
|
+
userAgent?: string;
|
|
506
|
+
lightpandaPath?: string;
|
|
507
|
+
verbose?: boolean;
|
|
508
|
+
provider?: ComputerUseProvider;
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
export interface PageState {
|
|
512
|
+
url: string;
|
|
513
|
+
title: string;
|
|
514
|
+
text: string;
|
|
515
|
+
interactiveElements: Array<{
|
|
516
|
+
type: 'button' | 'link' | 'input' | 'select';
|
|
517
|
+
text?: string;
|
|
518
|
+
label?: string;
|
|
519
|
+
href?: string;
|
|
520
|
+
selector: string;
|
|
521
|
+
tag?: string;
|
|
522
|
+
inputType?: string;
|
|
523
|
+
value?: string;
|
|
524
|
+
}>;
|
|
525
|
+
screenshot?: string;
|
|
526
|
+
backend: 'lightpanda' | 'chrome' | 'computer-use';
|
|
527
|
+
sessionHistory: Array<{ type: string; timestamp: number; backend: string }>;
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
export interface ActionResult {
|
|
531
|
+
success: boolean;
|
|
532
|
+
url?: string;
|
|
533
|
+
screenshot?: string;
|
|
534
|
+
backend?: string;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
export interface BrowserAction {
|
|
538
|
+
type: 'goto' | 'click' | 'type' | 'scroll' | 'hover' | 'select' | 'pressKey' | 'goBack' | 'goForward' | 'screenshot' | 'extractContent' | 'waitFor' | 'mouseMove' | 'clickAt' | 'doubleClickAt' | 'drag' | 'scrollAt' | 'typeText' | 'getCursorPosition' | 'getScreenSize';
|
|
539
|
+
url?: string;
|
|
540
|
+
selector?: string;
|
|
541
|
+
text?: string;
|
|
542
|
+
key?: string;
|
|
543
|
+
direction?: 'up' | 'down';
|
|
544
|
+
amount?: number;
|
|
545
|
+
values?: string[];
|
|
546
|
+
timeout?: number;
|
|
547
|
+
expectNavigation?: boolean;
|
|
548
|
+
waitForNavigation?: boolean;
|
|
549
|
+
clear?: boolean;
|
|
550
|
+
delay?: number;
|
|
551
|
+
fullPage?: boolean;
|
|
552
|
+
type_?: 'png' | 'jpeg' | 'webp';
|
|
553
|
+
includeScreenshot?: boolean;
|
|
554
|
+
/** Coordinate-based action fields */
|
|
555
|
+
x?: number;
|
|
556
|
+
y?: number;
|
|
557
|
+
button?: 'left' | 'right' | 'middle';
|
|
558
|
+
startX?: number;
|
|
559
|
+
startY?: number;
|
|
560
|
+
endX?: number;
|
|
561
|
+
endY?: number;
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
export declare class BrowserSession {
|
|
565
|
+
constructor(options?: BrowserSessionOptions);
|
|
566
|
+
|
|
567
|
+
readonly activeBackend: 'lightpanda' | 'chrome' | 'computer-use' | null;
|
|
568
|
+
readonly mode: 'headless' | 'visual' | 'auto' | 'computer-use';
|
|
569
|
+
|
|
570
|
+
connect(): Promise<BrowserSession>;
|
|
571
|
+
goto(url: string): Promise<ActionResult>;
|
|
572
|
+
goBack(): Promise<void>;
|
|
573
|
+
goForward(): Promise<void>;
|
|
574
|
+
click(selector: string, options?: { timeout?: number; expectNavigation?: boolean; waitForNavigation?: boolean }): Promise<ActionResult>;
|
|
575
|
+
type(selector: string, text: string, options?: { timeout?: number; clear?: boolean; delay?: number }): Promise<ActionResult>;
|
|
576
|
+
scroll(direction?: 'up' | 'down', amount?: number): Promise<ActionResult>;
|
|
577
|
+
hover(selector: string): Promise<ActionResult>;
|
|
578
|
+
select(selector: string, ...values: string[]): Promise<ActionResult>;
|
|
579
|
+
pressKey(key: string): Promise<ActionResult>;
|
|
580
|
+
screenshot(options?: { type?: 'png' | 'jpeg' | 'webp'; fullPage?: boolean }): Promise<{ success: boolean; screenshot: string; backend: string }>;
|
|
581
|
+
extractContent(): Promise<{ title: string; metaDescription: string; headings: any[]; paragraphs: string[]; links: any[]; bodyText: string; url: string }>;
|
|
582
|
+
evaluate<T>(fn: (...args: any[]) => T, ...args: any[]): Promise<T>;
|
|
583
|
+
waitFor(selector: string, timeout?: number): Promise<ActionResult>;
|
|
584
|
+
getPageState(options?: { includeScreenshot?: boolean }): Promise<PageState>;
|
|
585
|
+
executeAction(action: BrowserAction): Promise<ActionResult>;
|
|
586
|
+
getCookies(): Promise<any[]>;
|
|
587
|
+
setCookies(cookies: any[]): Promise<void>;
|
|
588
|
+
getHistory(): Array<{ type: string; timestamp: number; backend: string }>;
|
|
589
|
+
getBackend(): 'lightpanda' | 'chrome' | 'computer-use' | null;
|
|
590
|
+
close(): Promise<void>;
|
|
591
|
+
|
|
592
|
+
/** Coordinate-based actions (computer-use mode only) */
|
|
593
|
+
mouseMove(x: number, y: number): Promise<CoordinateActionResult>;
|
|
594
|
+
clickAt(x: number, y: number, button?: 'left' | 'right' | 'middle'): Promise<CoordinateActionResult>;
|
|
595
|
+
doubleClickAt(x: number, y: number, button?: 'left' | 'right' | 'middle'): Promise<CoordinateActionResult>;
|
|
596
|
+
drag(startX: number, startY: number, endX: number, endY: number): Promise<CoordinateActionResult>;
|
|
597
|
+
scrollAt(x: number, y: number, direction: 'up' | 'down', amount?: number): Promise<CoordinateActionResult>;
|
|
598
|
+
typeText(text: string): Promise<CoordinateActionResult>;
|
|
599
|
+
getCursorPosition(): Promise<{ x: number; y: number }>;
|
|
600
|
+
getScreenSize(): Promise<{ width: number; height: number }>;
|
|
601
|
+
getVncUrl(): string | null;
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
export function createSession(options?: BrowserSessionOptions): Promise<BrowserSession>;
|
|
605
|
+
|
|
606
|
+
export declare class LightPandaServer {
|
|
607
|
+
constructor(binaryPath?: string);
|
|
608
|
+
start(port?: number): Promise<string>;
|
|
609
|
+
getEndpoint(): string;
|
|
610
|
+
isRunning(): boolean;
|
|
611
|
+
stop(): void;
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
export function getLightPandaServer(binaryPath?: string): LightPandaServer;
|
|
615
|
+
export function stopLightPandaServer(): void;
|
|
616
|
+
|
|
458
617
|
/**
|
|
459
618
|
* Default export - same as BNCASmartScraper class
|
|
460
619
|
*/
|
package/index.js
CHANGED
|
@@ -1795,4 +1795,10 @@ export async function bulkScrapeStream(urls, options = {}) {
|
|
|
1795
1795
|
}
|
|
1796
1796
|
}
|
|
1797
1797
|
|
|
1798
|
+
// Browser session exports
|
|
1799
|
+
export { BrowserSession, createSession } from './browser-session.js';
|
|
1800
|
+
export { default as LightPandaServer, getLightPandaServer, stopLightPandaServer } from './lightpanda-server.js';
|
|
1801
|
+
export { ComputerUseProvider } from './computer-use-provider.js';
|
|
1802
|
+
export { LocalProvider } from './providers/local-provider.js';
|
|
1803
|
+
|
|
1798
1804
|
export default BNCASmartScraper;
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import { spawn } from 'child_process';
|
|
2
|
+
import { createServer } from 'net';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import fs from 'fs';
|
|
5
|
+
|
|
6
|
+
class LightPandaServer {
|
|
7
|
+
constructor(binaryPath) {
|
|
8
|
+
this.binaryPath = binaryPath || this._findBinary();
|
|
9
|
+
this.process = null;
|
|
10
|
+
this.host = '127.0.0.1';
|
|
11
|
+
this.port = null;
|
|
12
|
+
this.ready = false;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
async start(port) {
|
|
16
|
+
if (this.process && this.ready) return this.getEndpoint();
|
|
17
|
+
|
|
18
|
+
this.port = port || await this._findAvailablePort();
|
|
19
|
+
|
|
20
|
+
return new Promise((resolve, reject) => {
|
|
21
|
+
const args = [
|
|
22
|
+
'serve',
|
|
23
|
+
'--host', this.host,
|
|
24
|
+
'--port', String(this.port),
|
|
25
|
+
'--cdp_max_connections', '16',
|
|
26
|
+
];
|
|
27
|
+
|
|
28
|
+
this.process = spawn(this.binaryPath, args, {
|
|
29
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
let stderr = '';
|
|
33
|
+
|
|
34
|
+
const onReady = () => {
|
|
35
|
+
this.ready = true;
|
|
36
|
+
resolve(this.getEndpoint());
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
// LP prints to stderr when ready — wait for it or poll /json/version
|
|
40
|
+
this.process.stderr.on('data', (data) => {
|
|
41
|
+
stderr += data.toString();
|
|
42
|
+
// LightPanda logs server start to stderr
|
|
43
|
+
if (stderr.includes('Listening on') || stderr.includes('server started')) {
|
|
44
|
+
onReady();
|
|
45
|
+
}
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
this.process.on('error', (err) => {
|
|
49
|
+
this.ready = false;
|
|
50
|
+
reject(new Error(`Failed to start LightPanda: ${err.message}`));
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
this.process.on('exit', (code) => {
|
|
54
|
+
this.ready = false;
|
|
55
|
+
this.process = null;
|
|
56
|
+
if (!this.ready) {
|
|
57
|
+
reject(new Error(`LightPanda exited with code ${code}: ${stderr}`));
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
// Fallback: poll /json/version if no stderr signal within 3s
|
|
62
|
+
setTimeout(async () => {
|
|
63
|
+
if (this.ready) return;
|
|
64
|
+
try {
|
|
65
|
+
const res = await fetch(`http://${this.host}:${this.port}/json/version`);
|
|
66
|
+
if (res.ok) onReady();
|
|
67
|
+
} catch {
|
|
68
|
+
// Still starting up, give it more time
|
|
69
|
+
}
|
|
70
|
+
}, 1500);
|
|
71
|
+
|
|
72
|
+
// Hard timeout
|
|
73
|
+
setTimeout(() => {
|
|
74
|
+
if (!this.ready) {
|
|
75
|
+
this.stop();
|
|
76
|
+
reject(new Error(`LightPanda failed to start within 5s. stderr: ${stderr}`));
|
|
77
|
+
}
|
|
78
|
+
}, 5000);
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
getEndpoint() {
|
|
83
|
+
return `ws://${this.host}:${this.port}`;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
isRunning() {
|
|
87
|
+
return this.ready && this.process !== null;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
stop() {
|
|
91
|
+
if (this.process) {
|
|
92
|
+
try {
|
|
93
|
+
this.process.kill('SIGTERM');
|
|
94
|
+
} catch {
|
|
95
|
+
// already dead
|
|
96
|
+
}
|
|
97
|
+
this.process = null;
|
|
98
|
+
}
|
|
99
|
+
this.ready = false;
|
|
100
|
+
this.port = null;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
async _findAvailablePort() {
|
|
104
|
+
return new Promise((resolve, reject) => {
|
|
105
|
+
const server = createServer();
|
|
106
|
+
server.listen(0, '127.0.0.1', () => {
|
|
107
|
+
const port = server.address().port;
|
|
108
|
+
server.close(() => resolve(port));
|
|
109
|
+
});
|
|
110
|
+
server.on('error', reject);
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
_findBinary() {
|
|
115
|
+
// Check common locations
|
|
116
|
+
const candidates = [
|
|
117
|
+
path.join(path.dirname(new URL(import.meta.url).pathname), 'bin', 'lightpanda'),
|
|
118
|
+
'/usr/local/bin/lightpanda',
|
|
119
|
+
'/usr/bin/lightpanda',
|
|
120
|
+
];
|
|
121
|
+
|
|
122
|
+
for (const p of candidates) {
|
|
123
|
+
if (fs.existsSync(p)) return p;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return 'lightpanda'; // hope it's on PATH
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Singleton instance — shared across all sessions
|
|
131
|
+
let _instance = null;
|
|
132
|
+
|
|
133
|
+
export function getLightPandaServer(binaryPath) {
|
|
134
|
+
if (!_instance) {
|
|
135
|
+
_instance = new LightPandaServer(binaryPath);
|
|
136
|
+
}
|
|
137
|
+
return _instance;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
export function stopLightPandaServer() {
|
|
141
|
+
if (_instance) {
|
|
142
|
+
_instance.stop();
|
|
143
|
+
_instance = null;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
process.on('SIGTERM', stopLightPandaServer);
|
|
148
|
+
process.on('SIGINT', stopLightPandaServer);
|
|
149
|
+
process.on('beforeExit', stopLightPandaServer);
|
|
150
|
+
|
|
151
|
+
export default LightPandaServer;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@monostate/node-scraper",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.2.0",
|
|
4
4
|
"description": "Intelligent web scraping with AI Q&A, PDF support and multi-level fallback system - 11x faster than traditional scrapers",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -15,6 +15,10 @@
|
|
|
15
15
|
"index.js",
|
|
16
16
|
"index.d.ts",
|
|
17
17
|
"browser-pool.js",
|
|
18
|
+
"browser-session.js",
|
|
19
|
+
"lightpanda-server.js",
|
|
20
|
+
"computer-use-provider.js",
|
|
21
|
+
"providers/",
|
|
18
22
|
"README.md",
|
|
19
23
|
"BULK_SCRAPING.md",
|
|
20
24
|
"package.json",
|
|
@@ -34,6 +38,9 @@
|
|
|
34
38
|
"data-extraction",
|
|
35
39
|
"automation",
|
|
36
40
|
"browser",
|
|
41
|
+
"browser-use",
|
|
42
|
+
"cdp",
|
|
43
|
+
"ai-agent",
|
|
37
44
|
"ai-powered",
|
|
38
45
|
"question-answering",
|
|
39
46
|
"pdf-parsing",
|