@stealth-scraper/mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +91 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.js +56 -0
- package/dist/index.js.map +1 -0
- package/dist/tools.d.ts +39 -0
- package/dist/tools.js +176 -0
- package/dist/tools.js.map +1 -0
- package/package.json +50 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Stealth Scraper
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# @stealth-scraper/mcp
|
|
2
|
+
|
|
3
|
+
Model Context Protocol (MCP) server for [Stealth Scraper](https://stealthscraper.dev). Plugs the anti-bot-resistant web scraping API directly into Claude Desktop, Cursor, Cline, and any other MCP-aware agent.
|
|
4
|
+
|
|
5
|
+
> **Status:** beta. Public API is stable; we follow semver from `1.0.0` onward.
|
|
6
|
+
|
|
7
|
+
## What it does
|
|
8
|
+
|
|
9
|
+
Exposes four tools to the agent:
|
|
10
|
+
|
|
11
|
+
| Tool | What it does |
|
|
12
|
+
| -------------------- | ---------------------------------------------------------------------------- |
|
|
13
|
+
| `scrape_url` | Stealth-scrape a URL, with optional natural-language hints about what to extract. |
|
|
14
|
+
| `extract_structured` | Run a one-shot extract with an inline schema (list of fields + CSS selectors). |
|
|
15
|
+
| `list_templates` | List the user's saved extraction recipes. |
|
|
16
|
+
| `run_template` | Run a saved template against a target URL. |
|
|
17
|
+
|
|
18
|
+
## Claude Desktop setup
|
|
19
|
+
|
|
20
|
+
Edit `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS) or `%APPDATA%\Claude\claude_desktop_config.json` (Windows):
|
|
21
|
+
|
|
22
|
+
```json
|
|
23
|
+
{
|
|
24
|
+
"mcpServers": {
|
|
25
|
+
"stealth-scraper": {
|
|
26
|
+
"command": "npx",
|
|
27
|
+
"args": ["@stealth-scraper/mcp"],
|
|
28
|
+
"env": {
|
|
29
|
+
"STEALTH_SCRAPER_API_KEY": "ssk_..."
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Restart Claude Desktop. You should see a hammer/tools icon in the input area listing the four tools.
|
|
37
|
+
|
|
38
|
+
## Cursor / Cline / generic stdio
|
|
39
|
+
|
|
40
|
+
Any client that supports an MCP stdio server can use:
|
|
41
|
+
|
|
42
|
+
```jsonc
|
|
43
|
+
{
|
|
44
|
+
"command": "npx",
|
|
45
|
+
"args": ["@stealth-scraper/mcp"],
|
|
46
|
+
"env": { "STEALTH_SCRAPER_API_KEY": "ssk_..." }
|
|
47
|
+
}
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
For a custom backend (self-hosted), also set `STEALTH_SCRAPER_BASE_URL=https://your-host`.
|
|
51
|
+
|
|
52
|
+
## Example agent prompts
|
|
53
|
+
|
|
54
|
+
> Use stealth-scraper to grab the top 20 stories from Hacker News with titles, scores, and links.
|
|
55
|
+
|
|
56
|
+
> List my saved scraper templates, then run the "Amazon product price" one against `https://www.amazon.com/dp/B08N5WRWNW`.
|
|
57
|
+
|
|
58
|
+
## Local install
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
npm install -g @stealth-scraper/mcp
|
|
62
|
+
stealth-scraper-mcp # runs the server on stdio
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Development
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
npm install
|
|
69
|
+
npm run build # produces dist/index.js (the bin)
|
|
70
|
+
STEALTH_SCRAPER_API_KEY=ssk_test node dist/index.js
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Smoke test against the local backend with an MCP inspector:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
npx @modelcontextprotocol/inspector node dist/index.js
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## RUN THIS TO PUBLISH
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
npm install
|
|
83
|
+
npm run build
|
|
84
|
+
npm publish --access public # under the @stealth-scraper org
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
> First-time only: create the npm org at `https://www.npmjs.com/org/create` with name `stealth-scraper`.
|
|
88
|
+
|
|
89
|
+
## License
|
|
90
|
+
|
|
91
|
+
MIT
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* MCP server entry point for Stealth Scraper.
|
|
4
|
+
*
|
|
5
|
+
* Connects via stdio (the transport Claude Desktop / Cursor use by default).
|
|
6
|
+
* The API key comes from the `STEALTH_SCRAPER_API_KEY` env var — we fail
|
|
7
|
+
* fast with a clear message if it's missing so misconfiguration shows up
|
|
8
|
+
* before the model issues the first tool call.
|
|
9
|
+
*/
|
|
10
|
+
export {};
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* MCP server entry point for Stealth Scraper.
|
|
4
|
+
*
|
|
5
|
+
* Connects via stdio (the transport Claude Desktop / Cursor use by default).
|
|
6
|
+
* The API key comes from the `STEALTH_SCRAPER_API_KEY` env var — we fail
|
|
7
|
+
* fast with a clear message if it's missing so misconfiguration shows up
|
|
8
|
+
* before the model issues the first tool call.
|
|
9
|
+
*/
|
|
10
|
+
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
11
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
12
|
+
import { CallToolRequestSchema, ListToolsRequestSchema, } from "@modelcontextprotocol/sdk/types.js";
|
|
13
|
+
import { StealthClient } from "stealth-scraper";
|
|
14
|
+
import { TOOL_DEFS, dispatch } from "./tools.js";
|
|
15
|
+
const SERVER_NAME = "stealth-scraper";
|
|
16
|
+
const SERVER_VERSION = "0.1.0";
|
|
17
|
+
async function main() {
|
|
18
|
+
const apiKey = process.env.STEALTH_SCRAPER_API_KEY;
|
|
19
|
+
if (!apiKey) {
|
|
20
|
+
// stderr is the only safe channel for diagnostics on stdio transport —
|
|
21
|
+
// stdout is reserved for the MCP wire protocol.
|
|
22
|
+
process.stderr.write("[stealth-scraper-mcp] STEALTH_SCRAPER_API_KEY env var is required.\n" +
|
|
23
|
+
"Add it to your MCP client config, e.g. Claude Desktop:\n" +
|
|
24
|
+
' { "env": { "STEALTH_SCRAPER_API_KEY": "ssk_..." } }\n');
|
|
25
|
+
process.exit(1);
|
|
26
|
+
}
|
|
27
|
+
const baseUrl = process.env.STEALTH_SCRAPER_BASE_URL;
|
|
28
|
+
const client = new StealthClient({
|
|
29
|
+
apiKey,
|
|
30
|
+
...(baseUrl ? { baseUrl } : {}),
|
|
31
|
+
});
|
|
32
|
+
const server = new Server({ name: SERVER_NAME, version: SERVER_VERSION }, { capabilities: { tools: {} } });
|
|
33
|
+
// ListTools: return the static catalogue. The schemas are JSON Schema
|
|
34
|
+
// objects so MCP clients can validate args before they hit the wire.
|
|
35
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
36
|
+
tools: TOOL_DEFS,
|
|
37
|
+
}));
|
|
38
|
+
// CallTool: route to the dispatcher in tools.ts.
|
|
39
|
+
// Cast to `any` so a future MCP SDK refactor of the ServerResult union
|
|
40
|
+
// (Nov-2025 spec introduced an optional `task` field for streaming
|
|
41
|
+
// progress notifications) doesn't break our build. Our ToolResult
|
|
42
|
+
// shape is a strict subset of the accepted union.
|
|
43
|
+
server.setRequestHandler(CallToolRequestSchema, (async (request) => {
|
|
44
|
+
const name = request.params.name;
|
|
45
|
+
const args = (request.params.arguments ?? {});
|
|
46
|
+
return dispatch(client, name, args);
|
|
47
|
+
}));
|
|
48
|
+
const transport = new StdioServerTransport();
|
|
49
|
+
await server.connect(transport);
|
|
50
|
+
process.stderr.write(`[stealth-scraper-mcp] connected via stdio (v${SERVER_VERSION})\n`);
|
|
51
|
+
}
|
|
52
|
+
main().catch((err) => {
|
|
53
|
+
process.stderr.write(`[stealth-scraper-mcp] fatal: ${err}\n`);
|
|
54
|
+
process.exit(1);
|
|
55
|
+
});
|
|
56
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AACA;;;;;;;GAOG;AAEH,OAAO,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EACL,qBAAqB,EACrB,sBAAsB,GACvB,MAAM,oCAAoC,CAAC;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAEhD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEjD,MAAM,WAAW,GAAG,iBAAiB,CAAC;AACtC,MAAM,cAAc,GAAG,OAAO,CAAC;AAE/B,KAAK,UAAU,IAAI;IACjB,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC;IACnD,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,uEAAuE;QACvE,gDAAgD;QAChD,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,sEAAsE;YACpE,0DAA0D;YAC1D,yDAAyD,CAC5D,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC;IACrD,MAAM,MAAM,GAAG,IAAI,aAAa,CAAC;QAC/B,MAAM;QACN,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KAChC,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,IAAI,MAAM,CACvB,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,cAAc,EAAE,EAC9C,EAAE,YAAY,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,CAChC,CAAC;IAEF,sEAAsE;IACtE,qEAAqE;IACrE,MAAM,CAAC,iBAAiB,CAAC,sBAAsB,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC;QAC5D,KAAK,EAAE,SAAS;KACjB,CAAC,CAAC,CAAC;IAEJ,iDAAiD;IACjD,uEAAuE;IACvE,mEAAmE;IACnE,kEAAkE;IAClE,kDAAkD;IAClD,MAAM,CAAC,iBAAiB,CAAC,qBAAqB,EAAE,CAAC,KAAK,EAAE,OAAY,EAAE,EAAE;QACtE,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC;QACjC,MAAM,IAAI,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,SAAS,IAAI,EAAE,CAA4B,CAAC;QACzE,OAAO,QAAQ,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;IACtC,CAAC,CAAQ,CAAC,CAAC;IAEX,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAChC,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,+CAA+C,cAAc,KAAK,CACnE,CAAC;AACJ,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACnB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,gCAAgC,GAAG,IAAI,CAAC,CAAC;IAC9D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
package/dist/tools.d.ts
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool implementations for the Stealth Scraper MCP server.
|
|
3
|
+
*
|
|
4
|
+
* Each tool wraps a method on `StealthClient` and returns a MCP-compliant
|
|
5
|
+
* `{ content: [{ type: 'text', text: ... }] }` envelope. Errors from the
|
|
6
|
+
* SDK are caught at the dispatcher boundary and surfaced as MCP `isError`
|
|
7
|
+
* responses so the calling agent sees a clear failure mode instead of a
|
|
8
|
+
* raw stack trace.
|
|
9
|
+
*
|
|
10
|
+
* The schemas below are plain JSON Schema (the format the MCP SDK expects
|
|
11
|
+
* for `inputSchema`). We keep them minimal — the SDK enforces the rest.
|
|
12
|
+
*/
|
|
13
|
+
import { StealthClient } from "stealth-scraper";
|
|
14
|
+
export interface ToolDef {
|
|
15
|
+
name: string;
|
|
16
|
+
description: string;
|
|
17
|
+
inputSchema: {
|
|
18
|
+
type: "object";
|
|
19
|
+
properties: Record<string, unknown>;
|
|
20
|
+
required?: string[];
|
|
21
|
+
additionalProperties?: boolean;
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
export declare const TOOL_DEFS: ToolDef[];
|
|
25
|
+
export interface ToolResult {
|
|
26
|
+
content: Array<{
|
|
27
|
+
type: "text";
|
|
28
|
+
text: string;
|
|
29
|
+
}>;
|
|
30
|
+
isError?: boolean;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Dispatch a single tool call. The MCP runtime hands us the tool name plus
|
|
34
|
+
* its arguments — we look up the handler and return a result envelope.
|
|
35
|
+
*
|
|
36
|
+
* All errors are caught and converted to MCP error envelopes so the agent
|
|
37
|
+
* model sees a structured failure response instead of a crashed server.
|
|
38
|
+
*/
|
|
39
|
+
export declare function dispatch(client: StealthClient, name: string, args: Record<string, unknown>): Promise<ToolResult>;
|
package/dist/tools.js
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool implementations for the Stealth Scraper MCP server.
|
|
3
|
+
*
|
|
4
|
+
* Each tool wraps a method on `StealthClient` and returns a MCP-compliant
|
|
5
|
+
* `{ content: [{ type: 'text', text: ... }] }` envelope. Errors from the
|
|
6
|
+
* SDK are caught at the dispatcher boundary and surfaced as MCP `isError`
|
|
7
|
+
* responses so the calling agent sees a clear failure mode instead of a
|
|
8
|
+
* raw stack trace.
|
|
9
|
+
*
|
|
10
|
+
* The schemas below are plain JSON Schema (the format the MCP SDK expects
|
|
11
|
+
* for `inputSchema`). We keep them minimal — the SDK enforces the rest.
|
|
12
|
+
*/
|
|
13
|
+
export const TOOL_DEFS = [
|
|
14
|
+
{
|
|
15
|
+
name: "scrape_url",
|
|
16
|
+
description: "Stealth-scrape a URL with optional natural-language field hints. Uses Stealth Scraper's anti-bot-resistant browser pool. Returns extracted structured data plus the page title.",
|
|
17
|
+
inputSchema: {
|
|
18
|
+
type: "object",
|
|
19
|
+
properties: {
|
|
20
|
+
url: { type: "string", description: "URL to scrape (https://...)" },
|
|
21
|
+
fields: {
|
|
22
|
+
type: "string",
|
|
23
|
+
description: "Optional natural-language description of what to extract, e.g. 'top 20 story titles, scores, and links'. If omitted, returns the raw page element catalog.",
|
|
24
|
+
},
|
|
25
|
+
},
|
|
26
|
+
required: ["url"],
|
|
27
|
+
additionalProperties: false,
|
|
28
|
+
},
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
name: "extract_structured",
|
|
32
|
+
description: "Run a one-shot structured extraction against a URL using an inline schema (list of fields with CSS selectors).",
|
|
33
|
+
inputSchema: {
|
|
34
|
+
type: "object",
|
|
35
|
+
properties: {
|
|
36
|
+
url: { type: "string" },
|
|
37
|
+
schema: {
|
|
38
|
+
type: "array",
|
|
39
|
+
description: "Array of field objects, e.g. [{ name: 'title', selector: 'h1' }, { name: 'price', selector: '.price', attribute: 'data-price' }]",
|
|
40
|
+
items: {
|
|
41
|
+
type: "object",
|
|
42
|
+
properties: {
|
|
43
|
+
name: { type: "string" },
|
|
44
|
+
selector: { type: "string" },
|
|
45
|
+
attribute: { type: "string" },
|
|
46
|
+
},
|
|
47
|
+
required: ["name"],
|
|
48
|
+
},
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
required: ["url", "schema"],
|
|
52
|
+
additionalProperties: false,
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
name: "list_templates",
|
|
57
|
+
description: "List the authenticated user's saved extraction templates / recipes. Returns id, name, and source URL for each.",
|
|
58
|
+
inputSchema: {
|
|
59
|
+
type: "object",
|
|
60
|
+
properties: {},
|
|
61
|
+
additionalProperties: false,
|
|
62
|
+
},
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
name: "run_template",
|
|
66
|
+
description: "Run a saved Stealth Scraper template by id against a target URL.",
|
|
67
|
+
inputSchema: {
|
|
68
|
+
type: "object",
|
|
69
|
+
properties: {
|
|
70
|
+
template_id: { type: "string" },
|
|
71
|
+
url: { type: "string" },
|
|
72
|
+
},
|
|
73
|
+
required: ["template_id", "url"],
|
|
74
|
+
additionalProperties: false,
|
|
75
|
+
},
|
|
76
|
+
},
|
|
77
|
+
];
|
|
78
|
+
function ok(payload) {
|
|
79
|
+
return {
|
|
80
|
+
content: [{ type: "text", text: JSON.stringify(payload, null, 2) }],
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
function err(message, extras) {
|
|
84
|
+
const body = extras ? { error: message, ...extras } : { error: message };
|
|
85
|
+
return {
|
|
86
|
+
isError: true,
|
|
87
|
+
content: [{ type: "text", text: JSON.stringify(body, null, 2) }],
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Dispatch a single tool call. The MCP runtime hands us the tool name plus
|
|
92
|
+
* its arguments — we look up the handler and return a result envelope.
|
|
93
|
+
*
|
|
94
|
+
* All errors are caught and converted to MCP error envelopes so the agent
|
|
95
|
+
* model sees a structured failure response instead of a crashed server.
|
|
96
|
+
*/
|
|
97
|
+
export async function dispatch(client, name, args) {
|
|
98
|
+
try {
|
|
99
|
+
switch (name) {
|
|
100
|
+
case "scrape_url": {
|
|
101
|
+
const url = String(args.url ?? "");
|
|
102
|
+
const fields = typeof args.fields === "string" ? args.fields : "";
|
|
103
|
+
if (!url)
|
|
104
|
+
return err("missing required arg: url");
|
|
105
|
+
if (fields) {
|
|
106
|
+
const r = await client.assistExtract({ url, description: fields });
|
|
107
|
+
return ok({
|
|
108
|
+
url: r.url,
|
|
109
|
+
title: r.title,
|
|
110
|
+
template: r.template,
|
|
111
|
+
fields: r.fields,
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
const snap = await client.snapshot(url);
|
|
115
|
+
return ok({
|
|
116
|
+
url: snap.url,
|
|
117
|
+
title: snap.title,
|
|
118
|
+
element_count: snap.elementCount,
|
|
119
|
+
elements: snap.elements.slice(0, 50), // cap to keep response small
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
case "extract_structured": {
|
|
123
|
+
const url = String(args.url ?? "");
|
|
124
|
+
const schema = (args.schema ?? []);
|
|
125
|
+
if (!url)
|
|
126
|
+
return err("missing required arg: url");
|
|
127
|
+
if (!Array.isArray(schema) || schema.length === 0)
|
|
128
|
+
return err("missing required arg: schema (non-empty array)");
|
|
129
|
+
const r = await client.extract({ url, template: schema });
|
|
130
|
+
return ok({ url: r.url, title: r.title, fields: r.fields, errors: r.errors });
|
|
131
|
+
}
|
|
132
|
+
case "list_templates": {
|
|
133
|
+
const templates = await client.listTemplates();
|
|
134
|
+
return ok({
|
|
135
|
+
count: templates.length,
|
|
136
|
+
templates: templates.map((t) => ({
|
|
137
|
+
id: t.id,
|
|
138
|
+
name: t.name,
|
|
139
|
+
source_url: t.sourceUrl,
|
|
140
|
+
field_count: t.fields.length,
|
|
141
|
+
})),
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
case "run_template": {
|
|
145
|
+
const templateId = String(args.template_id ?? "");
|
|
146
|
+
const url = String(args.url ?? "");
|
|
147
|
+
if (!templateId)
|
|
148
|
+
return err("missing required arg: template_id");
|
|
149
|
+
if (!url)
|
|
150
|
+
return err("missing required arg: url");
|
|
151
|
+
const r = await client.runTemplate(templateId, url);
|
|
152
|
+
return ok({ url: r.url, title: r.title, fields: r.fields, errors: r.errors });
|
|
153
|
+
}
|
|
154
|
+
default:
|
|
155
|
+
return err(`unknown tool: ${name}`);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
catch (e) {
|
|
159
|
+
// Surface typed SDK errors with their structured fields so the model can
|
|
160
|
+
// explain to the user *why* the scrape failed (anti-bot vendor, plan
|
|
161
|
+
// cap, etc.). Falls back to message + name for everything else.
|
|
162
|
+
const anyErr = e;
|
|
163
|
+
return err(anyErr.message ?? String(e), {
|
|
164
|
+
type: anyErr.name,
|
|
165
|
+
kind: anyErr.kind,
|
|
166
|
+
vendor: anyErr.vendor,
|
|
167
|
+
suggestion: anyErr.suggestion,
|
|
168
|
+
used: anyErr.used,
|
|
169
|
+
limit: anyErr.limit,
|
|
170
|
+
upgrade_url: anyErr.upgradeUrl,
|
|
171
|
+
retry_after_s: anyErr.retryAfterS,
|
|
172
|
+
status_code: anyErr.statusCode,
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
//# sourceMappingURL=tools.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tools.js","sourceRoot":"","sources":["../src/tools.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAeH,MAAM,CAAC,MAAM,SAAS,GAAc;IAClC;QACE,IAAI,EAAE,YAAY;QAClB,WAAW,EACT,iLAAiL;QACnL,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,6BAA6B,EAAE;gBACnE,MAAM,EAAE;oBACN,IAAI,EAAE,QAAQ;oBACd,WAAW,EACT,4JAA4J;iBAC/J;aACF;YACD,QAAQ,EAAE,CAAC,KAAK,CAAC;YACjB,oBAAoB,EAAE,KAAK;SAC5B;KACF;IACD;QACE,IAAI,EAAE,oBAAoB;QAC1B,WAAW,EACT,gHAAgH;QAClH,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;gBACvB,MAAM,EAAE;oBACN,IAAI,EAAE,OAAO;oBACb,WAAW,EACT,kIAAkI;oBACpI,KAAK,EAAE;wBACL,IAAI,EAAE,QAAQ;wBACd,UAAU,EAAE;4BACV,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;4BACxB,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;4BAC5B,SAAS,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;yBAC9B;wBACD,QAAQ,EAAE,CAAC,MAAM,CAAC;qBACnB;iBACF;aACF;YACD,QAAQ,EAAE,CAAC,KAAK,EAAE,QAAQ,CAAC;YAC3B,oBAAoB,EAAE,KAAK;SAC5B;KACF;IACD;QACE,IAAI,EAAE,gBAAgB;QACtB,WAAW,EACT,gHAAgH;QAClH,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE,EAAE;YACd,oBAAoB,EAAE,KAAK;SAC5B;KACF;IACD;QACE,IAAI,EAAE,cAAc;QACpB,WAAW,EAAE,kEAAkE;QAC/E,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,WAAW,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;gBAC/B,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;aACxB;YACD,QAAQ,EAAE,CAAC,aAAa,EAAE,KAAK,CAAC;YAChC,oBAAoB,EAAE,KAAK;SAC5B;KACF;CACF,CAAC;AAOF,SAAS,EAAE,CAAC,OAAgB;IAC1B,OAAO;QACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;KACpE,CAAC;AACJ,CAAC;AAED,SAAS,GAAG,CAAC,OAAe,EAAE,MAAgC;IAC5D,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC;IACzE,OAAO;QACL,OAAO,EAAE,IAAI;QACb,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;KACjE,CAAC;AACJ,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,MAAqB,EACrB,IAAY,EACZ,IAA6B;IAE7B,IAAI,CAAC;QACH,QAAQ,IAAI,EAAE,CAAC;YACb,KAAK,YAAY,CAAC,CAAC,CAAC;gBAClB,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC;gBACnC,MAAM,MAAM,GAAG,OAAO,IAAI,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;gBAClE,IAAI,CAAC,GAAG;oBAAE,OAAO,GAAG,CAAC,2BAA2B,CAAC,CAAC;gBAClD,IAAI,MAAM,EAAE,CAAC;oBACX,MAAM,CAAC,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,EAAE,CAAC,CAAC;oBACnE,OAAO,EAAE,CAAC;wBACR,GAAG,EAAE,CAAC,CAAC,GAAG;wBACV,KAAK,EAAE,CAAC,CAAC,KAAK;wBACd,QAAQ,EAAE,CAAC,CAAC,QAAQ;wBACpB,MAAM,EAAE,CAAC,CAAC,MAAM;qBACjB,CAAC,CAAC;gBACL,CAAC;gBACD,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;gBACxC,OAAO,EAAE,CAAC;oBACR,GAAG,EAAE,IAAI,CAAC,GAAG;oBACb,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,aAAa,EAAE,IAAI,CAAC,YAAY;oBAChC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,6BAA6B;iBACpE,CAAC,CAAC;YACL,CAAC;YACD,KAAK,oBAAoB,CAAC,CAAC,CAAC;gBAC1B,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC;gBACnC,MAAM,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,IAAI,EAAE,CAAoB,CAAC;gBACtD,IAAI,CAAC,GAAG;oBAAE,OAAO,GAAG,CAAC,2BAA2B,CAAC,CAAC;gBAClD,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;oBAC/C,OAAO,GAAG,CAAC,gDAAgD,CAAC,CAAC;gBAC/D,MAAM,CAAC,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAC;gBAC1D,OAAO,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;YAChF,CAAC;YACD,KAAK,gBAAgB,CAAC,CAAC,CAAC;gBACtB,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,aAAa,EAAE,CAAC;gBAC/C,OAAO,EAAE,CAAC;oBACR,KAAK,EAAE,SAAS,CAAC,MAAM;oBACvB,SAAS,EAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;wBAC/B,EAAE,EAAE,CAAC,CAAC,EAAE;wBACR,IAAI,EAAE,CAAC,CAAC,IAAI;wBACZ,UAAU,EAAE,CAAC,CAAC,SAAS;wBACvB,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC,MAAM;qBAC7B,CAAC,CAAC;iBACJ,CAAC,CAAC;YACL,CAAC;YACD,KAAK,cAAc,CAAC,CAAC,CAAC;gBACpB,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC;gBAClD,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC;gBACnC,IAAI,CAAC,UAAU;oBAAE,OAAO,GAAG,CAAC,mCAAmC,CAAC,CAAC;gBACjE,IAAI,CAAC,GAAG;oBAAE,OAAO,GAAG,CAAC,2BAA2B,CAAC,CAAC;gBAClD,MAAM,CAAC,GAAG,MAAM,MAAM,CAAC,WAAW,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;gBACpD,OAAO,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;YAChF,CAAC;YACD;gBACE,OAAO,GAAG,CAAC,iBAAiB,IAAI,EAAE,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,yEAAyE;QACzE,qEAAqE;QACrE,gEAAgE;QAChE,MAAM,MAAM,GAAG,CAWd,CAAC;QACF,OAAO,GAAG,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,CAAC,CAAC,EAAE;YACtC,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,WAAW,EAAE,MAAM,CAAC,UAAU;YAC9B,aAAa,EAAE,MAAM,CAAC,WAAW;YACjC,WAAW,EAAE,MAAM,CAAC,UAAU;SAC/B,CAAC,CAAC;IACL,CAAC;AACH,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@stealth-scraper/mcp",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Model Context Protocol server for Stealth Scraper — gives Claude, Cursor, and other MCP-aware agents direct access to anti-bot-resistant web scraping.",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"author": "Stealth Scraper <hello@stealthscraper.dev>",
|
|
7
|
+
"homepage": "https://stealthscraper.dev",
|
|
8
|
+
"repository": {
|
|
9
|
+
"type": "git",
|
|
10
|
+
"url": "git+https://github.com/Rusheesonu/Stealth-Scraper.git",
|
|
11
|
+
"directory": "sdks/mcp"
|
|
12
|
+
},
|
|
13
|
+
"bugs": {
|
|
14
|
+
"url": "https://github.com/Rusheesonu/Stealth-Scraper/issues"
|
|
15
|
+
},
|
|
16
|
+
"keywords": [
|
|
17
|
+
"mcp",
|
|
18
|
+
"model-context-protocol",
|
|
19
|
+
"claude",
|
|
20
|
+
"ai-agents",
|
|
21
|
+
"scraping",
|
|
22
|
+
"anti-bot",
|
|
23
|
+
"stealth-scraper"
|
|
24
|
+
],
|
|
25
|
+
"type": "module",
|
|
26
|
+
"main": "./dist/index.js",
|
|
27
|
+
"bin": "./dist/index.js",
|
|
28
|
+
"files": [
|
|
29
|
+
"dist",
|
|
30
|
+
"README.md",
|
|
31
|
+
"LICENSE"
|
|
32
|
+
],
|
|
33
|
+
"engines": {
|
|
34
|
+
"node": ">=18"
|
|
35
|
+
},
|
|
36
|
+
"scripts": {
|
|
37
|
+
"build": "tsc && chmod +x dist/index.js",
|
|
38
|
+
"start": "node dist/index.js",
|
|
39
|
+
"dev": "tsc --watch",
|
|
40
|
+
"prepublishOnly": "npm run build"
|
|
41
|
+
},
|
|
42
|
+
"dependencies": {
|
|
43
|
+
"@modelcontextprotocol/sdk": "^1.0.4",
|
|
44
|
+
"stealth-scraper": "file:../typescript"
|
|
45
|
+
},
|
|
46
|
+
"devDependencies": {
|
|
47
|
+
"@types/node": "^20.11.0",
|
|
48
|
+
"typescript": "^5.6.0"
|
|
49
|
+
}
|
|
50
|
+
}
|