@monostate/node-scraper 2.0.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -0
- package/browser-session.js +685 -0
- package/computer-use-provider.js +168 -0
- package/index.d.ts +159 -0
- package/index.js +6 -0
- package/lightpanda-server.js +151 -0
- package/package.json +8 -1
- package/providers/local-provider.js +322 -0
package/README.md
CHANGED
|
@@ -79,6 +79,80 @@ await bulkScrapeStream(urls, {
|
|
|
79
79
|
|
|
80
80
|
See [BULK_SCRAPING.md](./BULK_SCRAPING.md) for full documentation.
|
|
81
81
|
|
|
82
|
+
### Browser sessions
|
|
83
|
+
|
|
84
|
+
Persistent browser sessions with real-time control. Three modes:
|
|
85
|
+
|
|
86
|
+
```javascript
|
|
87
|
+
import { createSession } from '@monostate/node-scraper';
|
|
88
|
+
|
|
89
|
+
// Headless (default) — LightPanda with Chrome fallback
|
|
90
|
+
const session = await createSession({ mode: 'auto' });
|
|
91
|
+
await session.goto('https://example.com');
|
|
92
|
+
const content = await session.extractContent();
|
|
93
|
+
const state = await session.getPageState({ includeScreenshot: true });
|
|
94
|
+
await session.close();
|
|
95
|
+
|
|
96
|
+
// Visual — Chrome with headless:false for dev/debug
|
|
97
|
+
const visual = await createSession({ mode: 'visual' });
|
|
98
|
+
await visual.goto('https://example.com');
|
|
99
|
+
await visual.screenshot(); // real Chrome rendering
|
|
100
|
+
await visual.close();
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Session methods: `goto`, `click`, `type`, `scroll`, `hover`, `select`, `pressKey`, `goBack`, `goForward`, `screenshot`, `extractContent`, `getPageState`, `waitFor`, `evaluate`, `getCookies`, `setCookies`.
|
|
104
|
+
|
|
105
|
+
### Computer use (coordinate-based browser control)
|
|
106
|
+
|
|
107
|
+
For AI agents that navigate by pixel coordinates -- useful for anti-bot sites, dynamic UIs, or anything that can't be scraped with selectors.
|
|
108
|
+
|
|
109
|
+
```javascript
|
|
110
|
+
import { createSession, LocalProvider } from '@monostate/node-scraper';
|
|
111
|
+
|
|
112
|
+
// LocalProvider runs Xvfb + Chrome + xdotool (Linux only)
|
|
113
|
+
const session = await createSession({
|
|
114
|
+
mode: 'computer-use',
|
|
115
|
+
provider: new LocalProvider({ screenWidth: 1280, screenHeight: 800, enableVnc: true }),
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
await session.goto('https://example.com');
|
|
119
|
+
|
|
120
|
+
// Coordinate-based actions (delegated to provider)
|
|
121
|
+
await session.clickAt(640, 400);
|
|
122
|
+
await session.typeText('hello world');
|
|
123
|
+
await session.mouseMove(100, 200);
|
|
124
|
+
await session.drag(10, 20, 300, 400);
|
|
125
|
+
await session.scrollAt(640, 400, 'down', 5);
|
|
126
|
+
const pos = await session.getCursorPosition();
|
|
127
|
+
const size = await session.getScreenSize();
|
|
128
|
+
|
|
129
|
+
// Selector-based actions still work (via Puppeteer CDP)
|
|
130
|
+
await session.click('#submit');
|
|
131
|
+
await session.type('#search', 'query');
|
|
132
|
+
|
|
133
|
+
// VNC streaming URL (if provider supports it)
|
|
134
|
+
console.log(session.getVncUrl());
|
|
135
|
+
|
|
136
|
+
await session.close();
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
#### Custom providers
|
|
140
|
+
|
|
141
|
+
Implement `ComputerUseProvider` to connect any VM/container backend:
|
|
142
|
+
|
|
143
|
+
```javascript
|
|
144
|
+
import { ComputerUseProvider } from '@monostate/node-scraper';
|
|
145
|
+
|
|
146
|
+
class MyProvider extends ComputerUseProvider {
|
|
147
|
+
async start() {
|
|
148
|
+
// Provision VM, return { cdpUrl, vncUrl, screenSize }
|
|
149
|
+
}
|
|
150
|
+
async mouseClick(x, y, button) { /* ... */ }
|
|
151
|
+
async screenshot() { /* ... */ }
|
|
152
|
+
async stop() { /* cleanup */ }
|
|
153
|
+
}
|
|
154
|
+
```
|
|
155
|
+
|
|
82
156
|
### AI-powered Q&A
|
|
83
157
|
|
|
84
158
|
Ask questions about any website using OpenRouter, OpenAI, or local fallback:
|