mcp-scraper 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -17
- package/dist/bin/api-server.cjs +6 -3
- package/dist/bin/api-server.cjs.map +1 -1
- package/dist/bin/api-server.js +1 -1
- package/dist/bin/browser-agent-stdio-server.cjs +8 -5
- package/dist/bin/browser-agent-stdio-server.cjs.map +1 -1
- package/dist/bin/browser-agent-stdio-server.js +7 -356
- package/dist/bin/browser-agent-stdio-server.js.map +1 -1
- package/dist/bin/mcp-scraper-combined-stdio-server.cjs +1990 -0
- package/dist/bin/mcp-scraper-combined-stdio-server.cjs.map +1 -0
- package/dist/bin/mcp-scraper-combined-stdio-server.d.cts +1 -0
- package/dist/bin/mcp-scraper-combined-stdio-server.d.ts +1 -0
- package/dist/bin/mcp-scraper-combined-stdio-server.js +51 -0
- package/dist/bin/mcp-scraper-combined-stdio-server.js.map +1 -0
- package/dist/bin/mcp-stdio-server.cjs +7 -4
- package/dist/bin/mcp-stdio-server.cjs.map +1 -1
- package/dist/bin/mcp-stdio-server.js +3 -3
- package/dist/bin/mcp-stdio-server.js.map +1 -1
- package/dist/{chunk-KIF4PKFZ.js → chunk-7DUA4OSB.js} +8 -4
- package/dist/chunk-7DUA4OSB.js.map +1 -0
- package/dist/chunk-JIP4FTR7.js +7 -0
- package/dist/chunk-JIP4FTR7.js.map +1 -0
- package/dist/chunk-XQGWEM4S.js +360 -0
- package/dist/chunk-XQGWEM4S.js.map +1 -0
- package/dist/{server-3QMDOEOS.js → server-ADOMF5IM.js} +3 -3
- package/package.json +4 -2
- package/dist/chunk-KIF4PKFZ.js.map +0 -1
- package/dist/chunk-PYBMZ346.js +0 -7
- package/dist/chunk-PYBMZ346.js.map +0 -1
- /package/dist/{server-3QMDOEOS.js.map → server-ADOMF5IM.js.map} +0 -0
package/README.md
CHANGED
|
@@ -4,10 +4,27 @@ MCP Scraper is an MCP server for live web intelligence tools backed by `https://
|
|
|
4
4
|
|
|
5
5
|
## Install
|
|
6
6
|
|
|
7
|
-
Use the npm package from any MCP client that can run local stdio commands.
|
|
7
|
+
Use the MCPB Desktop Extension for the branded Claude Desktop install, or use the npm package from any MCP client that can run local stdio commands.
|
|
8
|
+
|
|
9
|
+
MCP Scraper ships three local stdio entrypoints:
|
|
8
10
|
|
|
9
11
|
- `mcp-scraper` — live web intelligence, SERP, PAA, site extraction, YouTube, Facebook, Maps, directory, and credit tools.
|
|
10
12
|
- `browser-agent` — an agent-controlled live cloud browser with screenshots, clicks, typing, scrolling, live watch URLs, replay links, and MP4 replay download.
|
|
13
|
+
- `mcp-scraper-combined` — one local MCP server that exposes both tool sets. This is the entrypoint used by the MCPB Desktop Extension.
|
|
14
|
+
|
|
15
|
+
### Claude Desktop MCPB
|
|
16
|
+
|
|
17
|
+
Build the branded one-click bundle:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
npm run build:mcpb
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
The generated bundle is written to `build/mcpb/mcp-scraper-<version>.mcpb` and copied to `public/downloads/` for the hosted download. The current public bundle is `https://mcpscraper.dev/downloads/mcp-scraper.mcpb` (`0.2.4`, SHA-256 `0450353fb3bb921ab823da8c725c8916c01317e4fba693fcbdacfa5054efa35a`). Install it by opening or dragging it into Claude Desktop. Claude displays the `MCP Scraper` install card, icon, and API-key configuration field from the bundle manifest.
|
|
24
|
+
|
|
25
|
+
The MCPB install exposes the same web-intelligence tools as `mcp-scraper` plus all `browser_*` tools from `browser-agent` through one server.
|
|
26
|
+
|
|
27
|
+
### Raw stdio config
|
|
11
28
|
|
|
12
29
|
Claude Desktop:
|
|
13
30
|
|
|
@@ -16,14 +33,7 @@ Claude Desktop:
|
|
|
16
33
|
"mcpServers": {
|
|
17
34
|
"mcp-scraper": {
|
|
18
35
|
"command": "npx",
|
|
19
|
-
"args": ["-y", "mcp-scraper@latest"],
|
|
20
|
-
"env": {
|
|
21
|
-
"MCP_SCRAPER_API_KEY": "sk_live_your_key"
|
|
22
|
-
}
|
|
23
|
-
},
|
|
24
|
-
"browser-agent": {
|
|
25
|
-
"command": "npx",
|
|
26
|
-
"args": ["-y", "-p", "mcp-scraper@latest", "browser-agent"],
|
|
36
|
+
"args": ["-y", "-p", "mcp-scraper@latest", "mcp-scraper-combined"],
|
|
27
37
|
"env": {
|
|
28
38
|
"MCP_SCRAPER_API_KEY": "sk_live_your_key"
|
|
29
39
|
}
|
|
@@ -32,10 +42,16 @@ Claude Desktop:
|
|
|
32
42
|
}
|
|
33
43
|
```
|
|
34
44
|
|
|
35
|
-
Existing MCP configs that use only `npx -y mcp-scraper` still work for the web intelligence server, but they do not automatically add
|
|
45
|
+
Existing MCP configs that use only `npx -y mcp-scraper` still work for the web intelligence server, but they do not automatically add browser tools. Switch to `mcp-scraper-combined` or add the second `browser-agent` config entry if you want browser tools. Use `mcp-scraper@latest` to force npm to resolve the newest published package whenever the MCP client starts a fresh `npx` process.
|
|
36
46
|
|
|
37
47
|
Claude Code:
|
|
38
48
|
|
|
49
|
+
```bash
|
|
50
|
+
claude mcp add mcp-scraper --scope user --env MCP_SCRAPER_API_KEY=sk_live_your_key -- npx -y -p mcp-scraper@latest mcp-scraper-combined
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Split-server raw config still works:
|
|
54
|
+
|
|
39
55
|
```bash
|
|
40
56
|
claude mcp add mcp-scraper --scope user --env MCP_SCRAPER_API_KEY=sk_live_your_key -- npx -y mcp-scraper@latest
|
|
41
57
|
claude mcp add browser-agent --scope user --env MCP_SCRAPER_API_KEY=sk_live_your_key -- npx -y -p mcp-scraper@latest browser-agent
|
|
@@ -43,6 +59,15 @@ claude mcp add browser-agent --scope user --env MCP_SCRAPER_API_KEY=sk_live_your
|
|
|
43
59
|
|
|
44
60
|
Codex config:
|
|
45
61
|
|
|
62
|
+
```toml
|
|
63
|
+
[mcp_servers.mcp-scraper]
|
|
64
|
+
command = "npx"
|
|
65
|
+
args = ["-y", "-p", "mcp-scraper@latest", "mcp-scraper-combined"]
|
|
66
|
+
env = { MCP_SCRAPER_API_KEY = "sk_live_your_key" }
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Split-server Codex config:
|
|
70
|
+
|
|
46
71
|
```toml
|
|
47
72
|
[mcp_servers.mcp-scraper]
|
|
48
73
|
command = "npx"
|
|
@@ -57,7 +82,7 @@ env = { MCP_SCRAPER_API_KEY = "sk_live_your_key" }
|
|
|
57
82
|
|
|
58
83
|
## Tools
|
|
59
84
|
|
|
60
|
-
###
|
|
85
|
+
### Web-intelligence tools
|
|
61
86
|
|
|
62
87
|
- `harvest_paa`
|
|
63
88
|
- `search_serp`
|
|
@@ -74,7 +99,7 @@ env = { MCP_SCRAPER_API_KEY = "sk_live_your_key" }
|
|
|
74
99
|
- `directory_workflow` — build city-by-city directory/prospecting datasets from Census place selection plus Google Maps searches. Use it for requests like "all cities over 100k population in Tennessee, then get 20 roofers from Maps." The saved CSV includes `source_location`, `result_position`, `business_name`, `review_stars`, `category`, `address`, `phone`, `hours_status`, `website_url`, `directions_url`, `place_url`, `cid`, `cid_decimal`, Census population, and ZIP groups. It captures Maps star ratings from list cards, not profile review counts.
|
|
75
100
|
- `credits_info`
|
|
76
101
|
|
|
77
|
-
###
|
|
102
|
+
### Browser-agent tools
|
|
78
103
|
|
|
79
104
|
- `browser_open` — open a live cloud browser session. Returns a `session_id`, a human `watch_url`, and the raw `live_view_url` when available.
|
|
80
105
|
- `browser_screenshot` — capture a screenshot plus visible text and clickable element coordinates.
|
|
@@ -93,13 +118,15 @@ env = { MCP_SCRAPER_API_KEY = "sk_live_your_key" }
|
|
|
93
118
|
|
|
94
119
|
For US local SERP tools (`harvest_paa` and `search_serp`), keep `proxyMode` at the default `location` unless you are debugging. Location mode uses fresh residential proxy IDs across retries and treats CAPTCHA, proxy tunnel failure, and wrong-location evidence as retryable before returning.
|
|
95
120
|
|
|
121
|
+
The MCPB bundle and `mcp-scraper-combined` expose both sections through one local MCP server. The split `mcp-scraper` entrypoint exposes only the web-intelligence tools, and the split `browser-agent` entrypoint exposes only the browser-agent tools.
|
|
122
|
+
|
|
96
123
|
Chaining tools (`maps_search`, `map_site_urls`, `youtube_harvest`, `facebook_ad_search`, `facebook_page_intel`) advertise an `outputSchema` and return `structuredContent` with the IDs and URLs needed by the next tool. All tools carry MCP annotations (`readOnlyHint: true`, `openWorldHint: true` for live-web tools).
|
|
97
124
|
|
|
98
|
-
The hosted MCP endpoint at `https://mcpscraper.dev/mcp` exposes the 14
|
|
125
|
+
The hosted MCP endpoint at `https://mcpscraper.dev/mcp` exposes the 14 web-intelligence tools plus `capture_serp_snapshot` and `capture_serp_page_snapshots` (16 total). Browser-agent tools are local stdio tools backed by the REST API under `https://mcpscraper.dev/agent/*`.
|
|
99
126
|
|
|
100
127
|
## Resources
|
|
101
128
|
|
|
102
|
-
The `mcp-scraper` NPX stdio
|
|
129
|
+
The `mcp-scraper` and `mcp-scraper-combined` NPX stdio servers also expose saved reports as MCP resources: `resources/list` returns the most recent Markdown reports from your output directory as `report://` URIs, and `resources/read` returns their content — so an MCP client can pull prior research into context without re-scraping or spending credits. The hosted endpoint does not expose resources (it saves no files).
|
|
103
130
|
|
|
104
131
|
## Environment
|
|
105
132
|
|
|
@@ -109,7 +136,7 @@ The `mcp-scraper` NPX stdio server also exposes saved reports as MCP resources:
|
|
|
109
136
|
- `MCP_SCRAPER_SAVE_REPORTS=false` disables automatic Markdown report files.
|
|
110
137
|
- `MCP_SCRAPER_KEY_PATH` is optional. When no API key env var is set, the server also reads `~/.mcp-scraper-key` for compatibility with older installs.
|
|
111
138
|
|
|
112
|
-
Every web intelligence tool call made through
|
|
139
|
+
Every web intelligence tool call made through `mcp-scraper` or `mcp-scraper-combined` saves a full Markdown report locally by default and returns the file path in the MCP response. The hosted `/mcp` endpoint returns reports inline only and never writes files. Browser replay downloads are saved by `browser_replay_download` under `MCP_SCRAPER_OUTPUT_DIR/browser-replays`.
|
|
113
140
|
|
|
114
141
|
## Updating Existing Installs
|
|
115
142
|
|
|
@@ -117,6 +144,12 @@ Hosted API and website changes deploy immediately to `https://mcpscraper.dev`. L
|
|
|
117
144
|
|
|
118
145
|
Recommended config for update-friendly installs:
|
|
119
146
|
|
|
147
|
+
```bash
|
|
148
|
+
npx -y -p mcp-scraper@latest mcp-scraper-combined
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Split-server config:
|
|
152
|
+
|
|
120
153
|
```bash
|
|
121
154
|
npx -y mcp-scraper@latest
|
|
122
155
|
npx -y -p mcp-scraper@latest browser-agent
|
|
@@ -129,13 +162,19 @@ npm update -g mcp-scraper
|
|
|
129
162
|
npm install mcp-scraper@latest
|
|
130
163
|
```
|
|
131
164
|
|
|
132
|
-
Users who do not update can keep using the tools their local package already advertises, but they will not see newly added local stdio tools, schemas, or AI-facing descriptions. For example, a client running an older local package cannot call `directory_workflow` through stdio even if the hosted API already supports it. Users who configured only `mcp-scraper` must add `browser-agent` separately; MCP clients do not auto-create a second server entry from an existing config.
|
|
165
|
+
Users who do not update can keep using the tools their local package already advertises, but they will not see newly added local stdio tools, schemas, or AI-facing descriptions. For example, a client running an older local package cannot call `directory_workflow` through stdio even if the hosted API already supports it. Users who configured only `mcp-scraper` must switch to `mcp-scraper-combined` or add `browser-agent` separately; MCP clients do not auto-create a second server entry from an existing config.
|
|
133
166
|
|
|
134
167
|
## Branded One-Click Installs
|
|
135
168
|
|
|
136
169
|
Raw `npx` MCP installs are command/config based. They do not provide a reliable user-facing install card, logo, or setup screen inside MCP clients. Do not print marketing text to stdout from an MCP server; stdout is reserved for JSON-RPC protocol messages.
|
|
137
170
|
|
|
138
|
-
For a branded Claude Desktop install, package MCP Scraper as an MCPB Desktop Extension.
|
|
171
|
+
For a branded Claude Desktop install, package MCP Scraper as an MCPB Desktop Extension. The repository now builds one combined MCPB bundle with a generated icon, `manifest.json`, bundled runtime dependencies, and `user_config` fields for API-key setup, API URL, and output folder.
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
npm run build:mcpb
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
The bundle uses `mcp-scraper-combined` internally, so the user installs `MCP Scraper` once and gets web-intelligence tools plus live browser tools in one MCP server.
|
|
139
178
|
|
|
140
179
|
## Development
|
|
141
180
|
|
package/dist/bin/api-server.cjs
CHANGED
|
@@ -17082,7 +17082,7 @@ var PACKAGE_VERSION;
|
|
|
17082
17082
|
var init_version = __esm({
|
|
17083
17083
|
"src/version.ts"() {
|
|
17084
17084
|
"use strict";
|
|
17085
|
-
PACKAGE_VERSION = "0.2.
|
|
17085
|
+
PACKAGE_VERSION = "0.2.4";
|
|
17086
17086
|
}
|
|
17087
17087
|
});
|
|
17088
17088
|
|
|
@@ -17513,10 +17513,14 @@ function registerSavedReportResources(server) {
|
|
|
17513
17513
|
);
|
|
17514
17514
|
}
|
|
17515
17515
|
function buildPaaExtractorMcpServer(executor, options = {}) {
|
|
17516
|
+
const server = new import_mcp.McpServer({ name: "mcp-scraper", version: PACKAGE_VERSION });
|
|
17517
|
+
registerPaaExtractorMcpTools(server, executor, options);
|
|
17518
|
+
return server;
|
|
17519
|
+
}
|
|
17520
|
+
function registerPaaExtractorMcpTools(server, executor, options = {}) {
|
|
17516
17521
|
const savesReports = options.savesReportsLocally !== false;
|
|
17517
17522
|
const reportNote = savesReports ? " Saves a full Markdown report locally." : " Reports are returned inline; no files are saved on this hosted endpoint.";
|
|
17518
17523
|
const withReportNote = (description) => `${description}${reportNote}`;
|
|
17519
|
-
const server = new import_mcp.McpServer({ name: "mcp-scraper", version: PACKAGE_VERSION });
|
|
17520
17524
|
if (savesReports) registerSavedReportResources(server);
|
|
17521
17525
|
server.registerTool("harvest_paa", {
|
|
17522
17526
|
title: "Google PAA + SERP Harvest",
|
|
@@ -17620,7 +17624,6 @@ function buildPaaExtractorMcpServer(executor, options = {}) {
|
|
|
17620
17624
|
openWorldHint: false
|
|
17621
17625
|
}
|
|
17622
17626
|
}, async (input) => formatCreditsInfo(await executor.creditsInfo(input), input));
|
|
17623
|
-
return server;
|
|
17624
17627
|
}
|
|
17625
17628
|
var import_mcp, import_node_fs5, import_node_path8;
|
|
17626
17629
|
var init_paa_mcp_server = __esm({
|