crawlio-browser 1.3.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +2 -2
- package/dist/mcp-server/chunk-DUJTVASE.js +39 -0
- package/dist/mcp-server/index.js +862 -2506
- package/dist/mcp-server/init-EJMNI6KH.js +774 -0
- package/dist/mcp-server/tool-embeddings.json +6 -0
- package/package.json +9 -4
- package/skills/browser-automation/SKILL.md +76 -3
- package/skills/browser-automation/reference.md +11 -2
- package/dist/mcp-server/chunk-JSBRDJBE.js +0 -30
- package/dist/mcp-server/init-TRQTWLAB.js +0 -492
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "crawlio-browser",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "MCP server with
|
|
3
|
+
"version": "1.4.1",
|
|
4
|
+
"description": "MCP server with 96 CDP-backed tools for browser automation — screenshots, DOM, network capture, framework detection, cookies, storage, session recording, performance metrics via Chrome",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/mcp-server/index.js",
|
|
7
7
|
"bin": {
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
],
|
|
17
17
|
"scripts": {
|
|
18
18
|
"build": "npm run build:server && npm run build:extension",
|
|
19
|
-
"build:server": "node -e \"require('fs').rmSync('dist/mcp-server',{recursive:true,force:true})\" && tsup src/mcp-server/index.ts --format esm --dts --outDir dist/mcp-server",
|
|
19
|
+
"build:server": "node -e \"require('fs').rmSync('dist/mcp-server',{recursive:true,force:true})\" && tsup src/mcp-server/index.ts --format esm --dts --outDir dist/mcp-server && cp src/mcp-server/tool-embeddings.json dist/mcp-server/tool-embeddings.json",
|
|
20
20
|
"prepublishOnly": "npm run typecheck && npm run test && npm run build:server",
|
|
21
21
|
"build:extension": "rm -rf dist/extension && tsup src/extension/background.ts src/extension/popup.ts src/extension/welcome.ts --format iife --outDir dist/extension --define.__DEV__=false --minify && cd dist/extension && mv background.global.js background.js && mv popup.global.js popup.js && mv welcome.global.js welcome.js && cd ../.. && cp src/extension/manifest.prod.json dist/extension/manifest.json && cp src/extension/popup.html src/extension/popup.css src/extension/welcome.html src/extension/welcome.css src/extension/icon16.png src/extension/icon32.png src/extension/icon48.png src/extension/icon128.png dist/extension/",
|
|
22
22
|
"build:dev": "rm -rf dist/extension-dev && tsup src/extension/background.ts src/extension/popup.ts src/extension/welcome.ts --format iife --outDir dist/extension-dev --define.__DEV__=true && cd dist/extension-dev && mv background.global.js background.js && mv popup.global.js popup.js && mv welcome.global.js welcome.js && cd ../.. && cp src/extension/manifest.dev.json dist/extension-dev/manifest.json && cp src/extension/popup.html src/extension/popup.css src/extension/welcome.html src/extension/welcome.css src/extension/icon*.png dist/extension-dev/",
|
|
@@ -26,6 +26,7 @@
|
|
|
26
26
|
"typecheck": "tsc --noEmit && tsc --noEmit -p tsconfig.extension.json",
|
|
27
27
|
"typecheck:extension": "tsc --noEmit -p tsconfig.extension.json",
|
|
28
28
|
"dev": "tsup src/mcp-server/index.ts --format esm --watch",
|
|
29
|
+
"generate:embeddings": "npx tsx scripts/generate-embeddings.ts",
|
|
29
30
|
"setup": "node dist/mcp-server/index.js init"
|
|
30
31
|
},
|
|
31
32
|
"keywords": [
|
|
@@ -37,7 +38,10 @@
|
|
|
37
38
|
"ai",
|
|
38
39
|
"screenshots",
|
|
39
40
|
"dom",
|
|
40
|
-
"web-scraping"
|
|
41
|
+
"web-scraping",
|
|
42
|
+
"cdp",
|
|
43
|
+
"session-recording",
|
|
44
|
+
"network-capture"
|
|
41
45
|
],
|
|
42
46
|
"homepage": "https://crawlio.app/agent",
|
|
43
47
|
"repository": {
|
|
@@ -50,6 +54,7 @@
|
|
|
50
54
|
},
|
|
51
55
|
"dependencies": {
|
|
52
56
|
"@modelcontextprotocol/sdk": "^1.8.0",
|
|
57
|
+
"express-rate-limit": "^8.2.1",
|
|
53
58
|
"ws": "^8.18.1",
|
|
54
59
|
"zod": "^3.24.2"
|
|
55
60
|
},
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: browser-automation
|
|
3
|
-
description: Use this skill when the user asks to interact with a browser, take screenshots, inspect a page, capture network traffic, detect frameworks, click elements, fill forms, or automate any browser task. Orchestrates crawlio-agent's
|
|
3
|
+
description: Use this skill when the user asks to interact with a browser, take screenshots, inspect a page, capture network traffic, detect frameworks, click elements, fill forms, record browser sessions, or automate any browser task. Orchestrates crawlio-agent's 96 browser tools via the search + execute + connect_tab interface.
|
|
4
4
|
allowed-tools: mcp__crawlio-browser__search, mcp__crawlio-browser__execute, mcp__crawlio-browser__connect_tab
|
|
5
5
|
---
|
|
6
6
|
|
|
@@ -16,6 +16,7 @@ Use this skill when the user wants to:
|
|
|
16
16
|
- Click buttons, fill forms, type text, or navigate
|
|
17
17
|
- Read cookies, localStorage, sessionStorage, or IndexedDB
|
|
18
18
|
- Capture performance metrics, security state, or service workers
|
|
19
|
+
- Record browser sessions to capture interactions, navigation, network, and console as structured data
|
|
19
20
|
- Automate multi-step browser workflows
|
|
20
21
|
|
|
21
22
|
## Connection (Always First)
|
|
@@ -58,6 +59,20 @@ return await smart.screenshot()
|
|
|
58
59
|
|
|
59
60
|
Returns base64 PNG. Use `{ fullPage: true }` for full-page capture.
|
|
60
61
|
|
|
62
|
+
### OCR Text Extraction (macOS only)
|
|
63
|
+
|
|
64
|
+
```
|
|
65
|
+
ocr_screenshot({ fullPage: true })
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Takes a CDP screenshot, runs it through macOS Vision.framework (`VNRecognizeTextRequest`), and returns recognized text with confidence scores and bounding regions. Works on canvas elements, images rendered as pixels, anti-scraping sites, and any visual content invisible to DOM extraction.
|
|
69
|
+
|
|
70
|
+
Parameters:
|
|
71
|
+
- `fullPage` (bool, opt): Capture full scrollable page instead of viewport.
|
|
72
|
+
- `selector` (string, opt): CSS selector to screenshot a specific element.
|
|
73
|
+
|
|
74
|
+
Returns up to 20 regions (sorted by confidence), each with `text`, `confidence` (0-1), and `bounds` (x, y, width, height in normalized coordinates).
|
|
75
|
+
|
|
61
76
|
### Click an Element
|
|
62
77
|
|
|
63
78
|
```js
|
|
@@ -167,6 +182,64 @@ Capture framework, network, console, DOM, and cookies in one call:
|
|
|
167
182
|
return await bridge.send({ type: "capture_page" })
|
|
168
183
|
```
|
|
169
184
|
|
|
185
|
+
Returns a **shaped summary** (~1KB) instead of raw arrays (~50KB+):
|
|
186
|
+
```json
|
|
187
|
+
{
|
|
188
|
+
"url": "...", "title": "...", "framework": {...}, "capturedAt": "...",
|
|
189
|
+
"network": { "total": 47, "failed": 2, "byType": {...}, "errors": [...] },
|
|
190
|
+
"console": { "total": 23, "errors": [...], "warnings": 5, "info": 10, "debug": 8 },
|
|
191
|
+
"cookies": { "total": 12, "names": ["session", "_ga", ...] },
|
|
192
|
+
"dom": { "nodeCount": 342, "forms": 1, "links": 15, "images": 8, "inputs": 4 }
|
|
193
|
+
}
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
To drill down, call individual tools: `get_console_logs`, `get_dom_snapshot`, `get_cookies`, or `stop_network_capture`.
|
|
197
|
+
|
|
198
|
+
## Session Recording
|
|
199
|
+
|
|
200
|
+
Record a full browser session — every interaction, navigation, network request, and console log — as structured JSON.
|
|
201
|
+
|
|
202
|
+
### Start Recording
|
|
203
|
+
|
|
204
|
+
```
|
|
205
|
+
start_recording({ maxDurationSec: 300, maxInteractions: 100 })
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
Returns `{ sessionId, startedAt, tabId, url }`. Both params optional (defaults: 300s / 200 interactions).
|
|
209
|
+
- `maxDurationSec`: 10–600 (validated by Zod)
|
|
210
|
+
- `maxInteractions`: 1–500 (validated by Zod)
|
|
211
|
+
|
|
212
|
+
### Check Status
|
|
213
|
+
|
|
214
|
+
```
|
|
215
|
+
get_recording_status({})
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
Returns `{ active, sessionId, durationSec, pageCount, interactionCount, currentPageUrl }`. Permission-exempt (safe to poll).
|
|
219
|
+
|
|
220
|
+
### Stop and Get Session
|
|
221
|
+
|
|
222
|
+
```
|
|
223
|
+
stop_recording({})
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
Returns the full session:
|
|
227
|
+
- `pages[]` — one per URL visited, each with `interactions[]`, `console[]`, `network[]`
|
|
228
|
+
- `metadata` — tabId, initialUrl, stopReason (`manual` | `max_duration` | `max_interactions` | `tab_closed` | `disconnect`)
|
|
229
|
+
- `duration` — total seconds
|
|
230
|
+
|
|
231
|
+
### Workflow via Execute
|
|
232
|
+
|
|
233
|
+
```js
|
|
234
|
+
// Start recording
|
|
235
|
+
const session = await bridge.send({ type: "start_recording", maxDurationSec: 120 })
|
|
236
|
+
// ... user interacts with the page ...
|
|
237
|
+
const status = await bridge.send({ type: "get_recording_status" })
|
|
238
|
+
// Stop and get full session
|
|
239
|
+
const result = await bridge.send({ type: "stop_recording" })
|
|
240
|
+
return result
|
|
241
|
+
```
|
|
242
|
+
|
|
170
243
|
## Tab Management
|
|
171
244
|
|
|
172
245
|
```js
|
|
@@ -191,7 +264,7 @@ When you don't know the exact command, search first:
|
|
|
191
264
|
search({ query: "cookies" })
|
|
192
265
|
```
|
|
193
266
|
|
|
194
|
-
This returns matching command names, descriptions, and parameter schemas from the full catalog of
|
|
267
|
+
This returns matching command names, descriptions, and parameter schemas from the full catalog of 129 commands (96 browser + 33 desktop).
|
|
195
268
|
|
|
196
269
|
## Desktop Integration (Crawlio App)
|
|
197
270
|
|
|
@@ -242,4 +315,4 @@ return { title, url: await smart.evaluate("location.href") }
|
|
|
242
315
|
|
|
243
316
|
## Reference
|
|
244
317
|
|
|
245
|
-
See [reference.md](./reference.md) for the full list of all
|
|
318
|
+
See [reference.md](./reference.md) for the full list of all 96 browser commands and 33 desktop commands with parameters.
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
Full catalog of all commands available via `search` and `execute`.
|
|
4
4
|
|
|
5
|
-
## Browser Commands (
|
|
5
|
+
## Browser Commands (96)
|
|
6
6
|
|
|
7
7
|
Commands sent via `bridge.send({ type: "<command>", ...params })`.
|
|
8
8
|
|
|
@@ -53,10 +53,19 @@ Commands sent via `bridge.send({ type: "<command>", ...params })`.
|
|
|
53
53
|
| `get_cookies` | Get all cookies for the current page | `url?` |
|
|
54
54
|
| `get_dom_snapshot` | Get DOM snapshot | `depth?` |
|
|
55
55
|
| `take_screenshot` | Take a screenshot | `fullPage?`, `selector?`, `format?`, `quality?` |
|
|
56
|
+
| `ocr_screenshot` | Extract text from page via Vision.framework OCR (macOS) | `fullPage?`, `selector?` |
|
|
56
57
|
| `get_response_body` | Get response body for a network request | `requestId` |
|
|
57
58
|
| `get_websocket_connections` | List active WebSocket connections | — |
|
|
58
59
|
| `get_websocket_messages` | Get messages for a WebSocket connection | `requestId`, `limit?` |
|
|
59
60
|
|
|
61
|
+
### Session Recording
|
|
62
|
+
|
|
63
|
+
| Command | Description | Key Parameters |
|
|
64
|
+
|---------|-------------|----------------|
|
|
65
|
+
| `start_recording` | Start recording browser session | `maxDurationSec?` (10–600), `maxInteractions?` (1–500) |
|
|
66
|
+
| `stop_recording` | Stop recording and return full session data | — |
|
|
67
|
+
| `get_recording_status` | Check active recording status and counters | — |
|
|
68
|
+
|
|
60
69
|
### Cookies & Storage
|
|
61
70
|
|
|
62
71
|
| Command | Description | Key Parameters |
|
|
@@ -256,4 +265,4 @@ The `smart` object provides auto-waiting wrappers and framework-specific data:
|
|
|
256
265
|
## Links
|
|
257
266
|
|
|
258
267
|
- Extension install: https://crawlio.app/agent
|
|
259
|
-
- GitHub: https://github.com/Crawlio-app/crawlio-
|
|
268
|
+
- GitHub: https://github.com/Crawlio-app/crawlio-browser
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
var __create = Object.create;
|
|
2
|
-
var __defProp = Object.defineProperty;
|
|
3
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
6
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
-
var __commonJS = (cb, mod) => function __require() {
|
|
8
|
-
return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
|
|
9
|
-
};
|
|
10
|
-
var __copyProps = (to, from, except, desc) => {
|
|
11
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
-
for (let key of __getOwnPropNames(from))
|
|
13
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
-
}
|
|
16
|
-
return to;
|
|
17
|
-
};
|
|
18
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
19
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
20
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
21
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
22
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
23
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
24
|
-
mod
|
|
25
|
-
));
|
|
26
|
-
|
|
27
|
-
export {
|
|
28
|
-
__commonJS,
|
|
29
|
-
__toESM
|
|
30
|
-
};
|