extractly-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +60 -0
- package/index.js +73 -0
- package/package.json +28 -0
package/README.md
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# extractly-mcp
|
|
2
|
+
|
|
3
|
+
MCP server that exposes [Extractly](https://getextractly.com) as a tool: send a URL and a JSON extraction schema, and get structured JSON back.
|
|
4
|
+
|
|
5
|
+
## Install & run
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npx extractly-mcp
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Requires **Node.js 18+** (uses built-in `fetch`).
|
|
12
|
+
|
|
13
|
+
Get an API key at [getextractly.com](https://getextractly.com).
|
|
14
|
+
|
|
15
|
+
## Tool: `extract_structured_data`
|
|
16
|
+
|
|
17
|
+
- **url** — page to extract from
|
|
18
|
+
- **schema** — JSON object describing fields to extract
|
|
19
|
+
- **api_key** — your Extractly API key
|
|
20
|
+
|
|
21
|
+
The server calls `POST https://getextractly.com/api/v1/extract` with `url` and `schema` in the body and `x-api-key` set to your key.
|
|
22
|
+
|
|
23
|
+
## Claude Desktop
|
|
24
|
+
|
|
25
|
+
Edit your MCP config (e.g. `claude_desktop_config.json`) and add a server entry:
|
|
26
|
+
|
|
27
|
+
```json
|
|
28
|
+
{
|
|
29
|
+
"mcpServers": {
|
|
30
|
+
"extractly": {
|
|
31
|
+
"command": "npx",
|
|
32
|
+
"args": ["-y", "extractly-mcp"]
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Restart Claude Desktop after saving.
|
|
39
|
+
|
|
40
|
+
## Cursor
|
|
41
|
+
|
|
42
|
+
1. Open **Cursor Settings** → **MCP** (or edit your MCP config file, depending on your Cursor version).
|
|
43
|
+
2. Add a server that runs the same command as above, for example:
|
|
44
|
+
|
|
45
|
+
```json
|
|
46
|
+
{
|
|
47
|
+
"mcpServers": {
|
|
48
|
+
"extractly": {
|
|
49
|
+
"command": "npx",
|
|
50
|
+
"args": ["-y", "extractly-mcp"]
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
You will pass your **api_key** when the model calls the tool (or your client may support env-based config in the future).
|
|
57
|
+
|
|
58
|
+
## License
|
|
59
|
+
|
|
60
|
+
MIT
|
package/index.js
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
3
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
4
|
+
import { z } from 'zod';
|
|
5
|
+
|
|
6
|
+
const EXTRACTLY_EXTRACT_URL = 'https://getextractly.com/api/v1/extract';
|
|
7
|
+
|
|
8
|
+
const server = new McpServer(
|
|
9
|
+
{ name: 'extractly-mcp', version: '1.0.0' },
|
|
10
|
+
{
|
|
11
|
+
instructions:
|
|
12
|
+
'Call extract_structured_data with a URL, a JSON schema describing fields to extract, and your Extractly API key.',
|
|
13
|
+
}
|
|
14
|
+
);
|
|
15
|
+
|
|
16
|
+
server.registerTool(
|
|
17
|
+
'extract_structured_data',
|
|
18
|
+
{
|
|
19
|
+
description:
|
|
20
|
+
'Extract structured data from any URL by providing a JSON schema. Returns clean JSON matching your schema. Powered by Extractly.',
|
|
21
|
+
inputSchema: z.object({
|
|
22
|
+
url: z.string().describe('The webpage to scrape'),
|
|
23
|
+
schema: z
|
|
24
|
+
.object({})
|
|
25
|
+
.catchall(z.unknown())
|
|
26
|
+
.describe('JSON object describing the fields to extract'),
|
|
27
|
+
api_key: z.string().describe('Extractly API key from getextractly.com'),
|
|
28
|
+
}),
|
|
29
|
+
},
|
|
30
|
+
async ({ url, schema, api_key }) => {
|
|
31
|
+
const response = await fetch(EXTRACTLY_EXTRACT_URL, {
|
|
32
|
+
method: 'POST',
|
|
33
|
+
headers: {
|
|
34
|
+
'Content-Type': 'application/json',
|
|
35
|
+
'x-api-key': api_key,
|
|
36
|
+
},
|
|
37
|
+
body: JSON.stringify({ url, schema }),
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
const raw = await response.text();
|
|
41
|
+
let parsed;
|
|
42
|
+
try {
|
|
43
|
+
parsed = raw ? JSON.parse(raw) : null;
|
|
44
|
+
} catch {
|
|
45
|
+
parsed = undefined;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if (!response.ok) {
|
|
49
|
+
const message =
|
|
50
|
+
parsed !== undefined && parsed !== null && typeof parsed === 'object' && 'message' in parsed
|
|
51
|
+
? String(parsed.message)
|
|
52
|
+
: raw || response.statusText;
|
|
53
|
+
return {
|
|
54
|
+
isError: true,
|
|
55
|
+
content: [{ type: 'text', text: `Extractly API error (${response.status}): ${message}` }],
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if (parsed === undefined) {
|
|
60
|
+
return {
|
|
61
|
+
content: [{ type: 'text', text: raw || '' }],
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return {
|
|
66
|
+
content: [{ type: 'text', text: JSON.stringify(parsed, null, 2) }],
|
|
67
|
+
structuredContent: parsed,
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
);
|
|
71
|
+
|
|
72
|
+
const transport = new StdioServerTransport();
|
|
73
|
+
await server.connect(transport);
|
package/package.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "extractly-mcp",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "MCP server for Extractly — extract structured data from any URL",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "index.js",
|
|
7
|
+
"bin": {
|
|
8
|
+
"extractly-mcp": "./index.js"
|
|
9
|
+
},
|
|
10
|
+
"scripts": {
|
|
11
|
+
"start": "node index.js",
|
|
12
|
+
"test": "echo \"Error: no test specified\" && exit 1"
|
|
13
|
+
},
|
|
14
|
+
"keywords": [
|
|
15
|
+
"mcp",
|
|
16
|
+
"extractly",
|
|
17
|
+
"web-scraping",
|
|
18
|
+
"structured-data",
|
|
19
|
+
"ai-agents"
|
|
20
|
+
],
|
|
21
|
+
"license": "MIT",
|
|
22
|
+
"engines": {
|
|
23
|
+
"node": ">=18"
|
|
24
|
+
},
|
|
25
|
+
"dependencies": {
|
|
26
|
+
"@modelcontextprotocol/sdk": "^1.28.0"
|
|
27
|
+
}
|
|
28
|
+
}
|