mcp-scraper 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,10 +4,27 @@ MCP Scraper is an MCP server for live web intelligence tools backed by `https://
4
4
 
5
5
  ## Install
6
6
 
7
- Use the npm package from any MCP client that can run local stdio commands. MCP Scraper ships two separate local MCP servers:
7
+ Use the MCPB Desktop Extension for the branded Claude Desktop install, or use the npm package from any MCP client that can run local stdio commands.
8
+
9
+ MCP Scraper ships three local stdio entrypoints:
8
10
 
9
11
  - `mcp-scraper` — live web intelligence, SERP, PAA, site extraction, YouTube, Facebook, Maps, directory, and credit tools.
10
12
  - `browser-agent` — an agent-controlled live cloud browser with screenshots, clicks, typing, scrolling, live watch URLs, replay links, and MP4 replay download.
13
+ - `mcp-scraper-combined` — one local MCP server that exposes both tool sets. This is the entrypoint used by the MCPB Desktop Extension.
14
+
15
+ ### Claude Desktop MCPB
16
+
17
+ Build the branded one-click bundle:
18
+
19
+ ```bash
20
+ npm run build:mcpb
21
+ ```
22
+
23
+ The generated bundle is written to `build/mcpb/mcp-scraper-<version>.mcpb`. Install it by opening or dragging it into Claude Desktop. Claude displays the `MCP Scraper` install card, icon, and API-key configuration field from the bundle manifest.
24
+
25
+ The MCPB install exposes the same web-intelligence tools as `mcp-scraper` plus all `browser_*` tools from `browser-agent` through one server.
26
+
27
+ ### Raw stdio config
11
28
 
12
29
  Claude Desktop:
13
30
 
@@ -16,14 +33,7 @@ Claude Desktop:
16
33
  "mcpServers": {
17
34
  "mcp-scraper": {
18
35
  "command": "npx",
19
- "args": ["-y", "mcp-scraper@latest"],
20
- "env": {
21
- "MCP_SCRAPER_API_KEY": "sk_live_your_key"
22
- }
23
- },
24
- "browser-agent": {
25
- "command": "npx",
26
- "args": ["-y", "-p", "mcp-scraper@latest", "browser-agent"],
36
+ "args": ["-y", "-p", "mcp-scraper@latest", "mcp-scraper-combined"],
27
37
  "env": {
28
38
  "MCP_SCRAPER_API_KEY": "sk_live_your_key"
29
39
  }
@@ -32,10 +42,16 @@ Claude Desktop:
32
42
  }
33
43
  ```
34
44
 
35
- Existing MCP configs that use only `npx -y mcp-scraper` still work for the web intelligence server, but they do not automatically add the `browser-agent` server. Add the second config entry if you want browser tools. Use `mcp-scraper@latest` to force npm to resolve the newest published package whenever the MCP client starts a fresh `npx` process.
45
+ Existing MCP configs that use only `npx -y mcp-scraper` still work for the web intelligence server, but they do not automatically add browser tools. Switch to `mcp-scraper-combined` or add the second `browser-agent` config entry if you want browser tools. Use `mcp-scraper@latest` to force npm to resolve the newest published package whenever the MCP client starts a fresh `npx` process.
36
46
 
37
47
  Claude Code:
38
48
 
49
+ ```bash
50
+ claude mcp add mcp-scraper --scope user --env MCP_SCRAPER_API_KEY=sk_live_your_key -- npx -y -p mcp-scraper@latest mcp-scraper-combined
51
+ ```
52
+
53
+ Split-server raw config still works:
54
+
39
55
  ```bash
40
56
  claude mcp add mcp-scraper --scope user --env MCP_SCRAPER_API_KEY=sk_live_your_key -- npx -y mcp-scraper@latest
41
57
  claude mcp add browser-agent --scope user --env MCP_SCRAPER_API_KEY=sk_live_your_key -- npx -y -p mcp-scraper@latest browser-agent
@@ -43,6 +59,15 @@ claude mcp add browser-agent --scope user --env MCP_SCRAPER_API_KEY=sk_live_your
43
59
 
44
60
  Codex config:
45
61
 
62
+ ```toml
63
+ [mcp_servers.mcp-scraper]
64
+ command = "npx"
65
+ args = ["-y", "-p", "mcp-scraper@latest", "mcp-scraper-combined"]
66
+ env = { MCP_SCRAPER_API_KEY = "sk_live_your_key" }
67
+ ```
68
+
69
+ Split-server Codex config:
70
+
46
71
  ```toml
47
72
  [mcp_servers.mcp-scraper]
48
73
  command = "npx"
@@ -57,7 +82,7 @@ env = { MCP_SCRAPER_API_KEY = "sk_live_your_key" }
57
82
 
58
83
  ## Tools
59
84
 
60
- ### `mcp-scraper` stdio tools
85
+ ### Web-intelligence tools
61
86
 
62
87
  - `harvest_paa`
63
88
  - `search_serp`
@@ -74,7 +99,7 @@ env = { MCP_SCRAPER_API_KEY = "sk_live_your_key" }
74
99
  - `directory_workflow` — build city-by-city directory/prospecting datasets from Census place selection plus Google Maps searches. Use it for requests like "all cities over 100k population in Tennessee, then get 20 roofers from Maps." The saved CSV includes `source_location`, `result_position`, `business_name`, `review_stars`, `category`, `address`, `phone`, `hours_status`, `website_url`, `directions_url`, `place_url`, `cid`, `cid_decimal`, Census population, and ZIP groups. It captures Maps star ratings from list cards, not profile review counts.
75
100
  - `credits_info`
76
101
 
77
- ### `browser-agent` stdio tools
102
+ ### Browser-agent tools
78
103
 
79
104
  - `browser_open` — open a live cloud browser session. Returns a `session_id`, a human `watch_url`, and the raw `live_view_url` when available.
80
105
  - `browser_screenshot` — capture a screenshot plus visible text and clickable element coordinates.
@@ -93,13 +118,15 @@ env = { MCP_SCRAPER_API_KEY = "sk_live_your_key" }
93
118
 
94
119
  For US local SERP tools (`harvest_paa` and `search_serp`), keep `proxyMode` at the default `location` unless you are debugging. Location mode uses fresh residential proxy IDs across retries and treats CAPTCHA, proxy tunnel failure, and wrong-location evidence as retryable before returning.
95
120
 
121
+ The MCPB bundle and `mcp-scraper-combined` expose both sections through one local MCP server. The split `mcp-scraper` entrypoint exposes only the web-intelligence tools, and the split `browser-agent` entrypoint exposes only the browser-agent tools.
122
+
96
123
  Chaining tools (`maps_search`, `map_site_urls`, `youtube_harvest`, `facebook_ad_search`, `facebook_page_intel`) advertise an `outputSchema` and return `structuredContent` with the IDs and URLs needed by the next tool. All tools carry MCP annotations (`readOnlyHint: true`, `openWorldHint: true` for live-web tools).
97
124
 
98
- The hosted MCP endpoint at `https://mcpscraper.dev/mcp` exposes the 14 `mcp-scraper` tools plus `capture_serp_snapshot` and `capture_serp_page_snapshots` (16 total). The `browser-agent` server is currently a separate local stdio server; its REST backing API lives under `https://mcpscraper.dev/agent/*`.
125
+ The hosted MCP endpoint at `https://mcpscraper.dev/mcp` exposes the 14 web-intelligence tools plus `capture_serp_snapshot` and `capture_serp_page_snapshots` (16 total). Browser-agent tools are local stdio tools backed by the REST API under `https://mcpscraper.dev/agent/*`.
99
126
 
100
127
  ## Resources
101
128
 
102
- The `mcp-scraper` NPX stdio server also exposes saved reports as MCP resources: `resources/list` returns the most recent Markdown reports from your output directory as `report://` URIs, and `resources/read` returns their content — so an MCP client can pull prior research into context without re-scraping or spending credits. The hosted endpoint does not expose resources (it saves no files).
129
+ The `mcp-scraper` and `mcp-scraper-combined` NPX stdio servers also expose saved reports as MCP resources: `resources/list` returns the most recent Markdown reports from your output directory as `report://` URIs, and `resources/read` returns their content — so an MCP client can pull prior research into context without re-scraping or spending credits. The hosted endpoint does not expose resources (it saves no files).
103
130
 
104
131
  ## Environment
105
132
 
@@ -109,7 +136,7 @@ The `mcp-scraper` NPX stdio server also exposes saved reports as MCP resources:
109
136
  - `MCP_SCRAPER_SAVE_REPORTS=false` disables automatic Markdown report files.
110
137
  - `MCP_SCRAPER_KEY_PATH` is optional. When no API key env var is set, the server also reads `~/.mcp-scraper-key` for compatibility with older installs.
111
138
 
112
- Every web intelligence tool call made through the `mcp-scraper` NPX stdio server saves a full Markdown report locally by default and returns the file path in the MCP response. The hosted `/mcp` endpoint returns reports inline only and never writes files. Browser replay downloads are saved by `browser_replay_download` under `MCP_SCRAPER_OUTPUT_DIR/browser-replays`.
139
+ Every web intelligence tool call made through `mcp-scraper` or `mcp-scraper-combined` saves a full Markdown report locally by default and returns the file path in the MCP response. The hosted `/mcp` endpoint returns reports inline only and never writes files. Browser replay downloads are saved by `browser_replay_download` under `MCP_SCRAPER_OUTPUT_DIR/browser-replays`.
113
140
 
114
141
  ## Updating Existing Installs
115
142
 
@@ -117,6 +144,12 @@ Hosted API and website changes deploy immediately to `https://mcpscraper.dev`. L
117
144
 
118
145
  Recommended config for update-friendly installs:
119
146
 
147
+ ```bash
148
+ npx -y -p mcp-scraper@latest mcp-scraper-combined
149
+ ```
150
+
151
+ Split-server config:
152
+
120
153
  ```bash
121
154
  npx -y mcp-scraper@latest
122
155
  npx -y -p mcp-scraper@latest browser-agent
@@ -129,13 +162,19 @@ npm update -g mcp-scraper
129
162
  npm install mcp-scraper@latest
130
163
  ```
131
164
 
132
- Users who do not update can keep using the tools their local package already advertises, but they will not see newly added local stdio tools, schemas, or AI-facing descriptions. For example, a client running an older local package cannot call `directory_workflow` through stdio even if the hosted API already supports it. Users who configured only `mcp-scraper` must add `browser-agent` separately; MCP clients do not auto-create a second server entry from an existing config.
165
+ Users who do not update can keep using the tools their local package already advertises, but they will not see newly added local stdio tools, schemas, or AI-facing descriptions. For example, a client running an older local package cannot call `directory_workflow` through stdio even if the hosted API already supports it. Users who configured only `mcp-scraper` must switch to `mcp-scraper-combined` or add `browser-agent` separately; MCP clients do not auto-create a second server entry from an existing config.
133
166
 
134
167
  ## Branded One-Click Installs
135
168
 
136
169
  Raw `npx` MCP installs are command/config based. They do not provide a reliable user-facing install card, logo, or setup screen inside MCP clients. Do not print marketing text to stdout from an MCP server; stdout is reserved for JSON-RPC protocol messages.
137
170
 
138
- For a branded Claude Desktop install, package MCP Scraper as an MCPB Desktop Extension. An MCPB bundle can include a `manifest.json`, bundled server files/dependencies, `user_config` fields for API-key setup, and an optional `icon.png`. That is the right path for a designed install experience with a logo and guided configuration.
171
+ For a branded Claude Desktop install, package MCP Scraper as an MCPB Desktop Extension. The repository now builds one combined MCPB bundle with a generated icon, `manifest.json`, bundled runtime dependencies, and `user_config` fields for API-key setup, API URL, and output folder.
172
+
173
+ ```bash
174
+ npm run build:mcpb
175
+ ```
176
+
177
+ The bundle uses `mcp-scraper-combined` internally, so the user installs `MCP Scraper` once and gets web-intelligence tools plus live browser tools in one MCP server.
139
178
 
140
179
  ## Development
141
180
 
@@ -17082,7 +17082,7 @@ var PACKAGE_VERSION;
17082
17082
  var init_version = __esm({
17083
17083
  "src/version.ts"() {
17084
17084
  "use strict";
17085
- PACKAGE_VERSION = "0.2.1";
17085
+ PACKAGE_VERSION = "0.2.3";
17086
17086
  }
17087
17087
  });
17088
17088
 
@@ -17513,10 +17513,14 @@ function registerSavedReportResources(server) {
17513
17513
  );
17514
17514
  }
17515
17515
  function buildPaaExtractorMcpServer(executor, options = {}) {
17516
+ const server = new import_mcp.McpServer({ name: "mcp-scraper", version: PACKAGE_VERSION });
17517
+ registerPaaExtractorMcpTools(server, executor, options);
17518
+ return server;
17519
+ }
17520
+ function registerPaaExtractorMcpTools(server, executor, options = {}) {
17516
17521
  const savesReports = options.savesReportsLocally !== false;
17517
17522
  const reportNote = savesReports ? " Saves a full Markdown report locally." : " Reports are returned inline; no files are saved on this hosted endpoint.";
17518
17523
  const withReportNote = (description) => `${description}${reportNote}`;
17519
- const server = new import_mcp.McpServer({ name: "mcp-scraper", version: PACKAGE_VERSION });
17520
17524
  if (savesReports) registerSavedReportResources(server);
17521
17525
  server.registerTool("harvest_paa", {
17522
17526
  title: "Google PAA + SERP Harvest",
@@ -17620,7 +17624,6 @@ function buildPaaExtractorMcpServer(executor, options = {}) {
17620
17624
  openWorldHint: false
17621
17625
  }
17622
17626
  }, async (input) => formatCreditsInfo(await executor.creditsInfo(input), input));
17623
- return server;
17624
17627
  }
17625
17628
  var import_mcp, import_node_fs5, import_node_path8;
17626
17629
  var init_paa_mcp_server = __esm({