defuddle-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +144 -0
- package/index.js +177 -0
- package/package.json +33 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Defuddle
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# defuddle-mcp
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/defuddle-mcp)
|
|
4
|
+
[](https://www.npmjs.com/package/defuddle-mcp)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
|
|
7
|
+
A [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) server that extracts clean article content from web pages, removing clutter like ads, sidebars, and navigation elements.
|
|
8
|
+
|
|
9
|
+
## Features
|
|
10
|
+
|
|
11
|
+
- ๐งน **Clean Content Extraction** - Removes ads, sidebars, and other distractions
|
|
12
|
+
- ๐ **Markdown Output** - Returns content in clean Markdown format with frontmatter
|
|
13
|
+
- ๐ **Zero Dependencies** - Built with Node.js built-ins only
|
|
14
|
+
- โก **Fast & Lightweight** - Minimal overhead, quick startup
|
|
15
|
+
|
|
16
|
+
## Requirements
|
|
17
|
+
|
|
18
|
+
- Node.js >= 18 (for built-in `fetch` support)
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
### Install from npm
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
npm install -g defuddle-mcp
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### Install from source
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
git clone https://github.com/ryanxili/defuddle-mcp.git
|
|
32
|
+
cd defuddle-mcp
|
|
33
|
+
npm install -g .
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Usage
|
|
37
|
+
|
|
38
|
+
### As a Global CLI Tool
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
defuddle-mcp
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### With MCP Host (e.g., Claude Desktop)
|
|
45
|
+
|
|
46
|
+
Add to your MCP configuration:
|
|
47
|
+
|
|
48
|
+
```json
|
|
49
|
+
{
|
|
50
|
+
"mcpServers": {
|
|
51
|
+
"defuddle": {
|
|
52
|
+
"command": "npx",
|
|
53
|
+
"args": ["-y", "defuddle-mcp"]
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Or with global install:
|
|
60
|
+
|
|
61
|
+
```json
|
|
62
|
+
{
|
|
63
|
+
"mcpServers": {
|
|
64
|
+
"defuddle": {
|
|
65
|
+
"command": "defuddle-mcp"
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Programmatically
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
node index.js
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
The server runs on stdio and communicates via JSON-RPC 2.0.
|
|
78
|
+
|
|
79
|
+
## Tool: defuddle
|
|
80
|
+
|
|
81
|
+
Extract clean article content from a web page.
|
|
82
|
+
|
|
83
|
+
| Property | Type | Description |
|
|
84
|
+
|----------|------|-------------|
|
|
85
|
+
| `url` | string | The URL of the web page to extract content from (required) |
|
|
86
|
+
|
|
87
|
+
### Example Request
|
|
88
|
+
|
|
89
|
+
```json
|
|
90
|
+
{
|
|
91
|
+
"jsonrpc": "2.0",
|
|
92
|
+
"id": 1,
|
|
93
|
+
"method": "tools/call",
|
|
94
|
+
"params": {
|
|
95
|
+
"name": "defuddle",
|
|
96
|
+
"arguments": {
|
|
97
|
+
"url": "https://example.com/article"
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Example Response
|
|
104
|
+
|
|
105
|
+
```json
|
|
106
|
+
{
|
|
107
|
+
"jsonrpc": "2.0",
|
|
108
|
+
"id": 1,
|
|
109
|
+
"result": {
|
|
110
|
+
"content": [
|
|
111
|
+
{
|
|
112
|
+
"type": "text",
|
|
113
|
+
"text": "---\ntitle: Example Article\nsource: https://example.com/article\nlanguage: en\nword_count: 1234\n---\n\n# Article Content\n\n..."
|
|
114
|
+
}
|
|
115
|
+
]
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## API Reference
|
|
121
|
+
|
|
122
|
+
### Methods
|
|
123
|
+
|
|
124
|
+
| Method | Description |
|
|
125
|
+
|--------|-------------|
|
|
126
|
+
| `initialize` | Initialize the MCP session |
|
|
127
|
+
| `tools/list` | List available tools |
|
|
128
|
+
| `tools/call` | Call a specific tool |
|
|
129
|
+
|
|
130
|
+
### Error Codes
|
|
131
|
+
|
|
132
|
+
| Code | Message | Description |
|
|
133
|
+
|------|---------|-------------|
|
|
134
|
+
| -32700 | Parse error | Invalid JSON received |
|
|
135
|
+
| -32601 | Method not found | Requested method does not exist |
|
|
136
|
+
|
|
137
|
+
## License
|
|
138
|
+
|
|
139
|
+
MIT ยฉ [Defuddle](https://github.com/ryanxili/defuddle-mcp)
|
|
140
|
+
|
|
141
|
+
## Related
|
|
142
|
+
|
|
143
|
+
- [Model Context Protocol](https://modelcontextprotocol.io/) - MCP specification
|
|
144
|
+
- [defuddle.md](https://defuddle.md/) - Content extraction API used by this server
|
package/index.js
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Defuddle MCP Server
|
|
5
|
+
*
|
|
6
|
+
* An MCP (Model Context Protocol) server that extracts clean article content
|
|
7
|
+
* from web pages. Uses the defuddle.md API to remove ads, sidebars, and other
|
|
8
|
+
* clutter, returning content in Markdown format.
|
|
9
|
+
*
|
|
10
|
+
* @see https://modelcontextprotocol.io/
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const readline = require('readline');
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Create readline interface to read MCP requests from stdin.
|
|
17
|
+
* terminal: false enables stream mode for data processing instead of interactive terminal.
|
|
18
|
+
*/
|
|
19
|
+
const rl = readline.createInterface({
|
|
20
|
+
input: process.stdin,
|
|
21
|
+
output: process.stdout,
|
|
22
|
+
terminal: false
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Handle each line of input (one JSON-RPC request).
|
|
27
|
+
* MCP protocol is based on JSON-RPC 2.0, with one complete JSON object per line.
|
|
28
|
+
*/
|
|
29
|
+
rl.on('line', async (line) => {
|
|
30
|
+
try {
|
|
31
|
+
// Parse JSON-RPC request
|
|
32
|
+
const request = JSON.parse(line.trim());
|
|
33
|
+
|
|
34
|
+
// ==================== Handle initialize Request ====================
|
|
35
|
+
// Client sends initialize first after connection to negotiate protocol version and capabilities
|
|
36
|
+
if (request.method === 'initialize') {
|
|
37
|
+
const response = {
|
|
38
|
+
jsonrpc: '2.0',
|
|
39
|
+
id: request.id,
|
|
40
|
+
result: {
|
|
41
|
+
// MCP protocol version
|
|
42
|
+
protocolVersion: '2026-03-14',
|
|
43
|
+
// Server capabilities declaration
|
|
44
|
+
capabilities: {
|
|
45
|
+
tools: {
|
|
46
|
+
// listChanged: true indicates support for tools/listChanged notifications
|
|
47
|
+
listChanged: false
|
|
48
|
+
}
|
|
49
|
+
},
|
|
50
|
+
// Server information
|
|
51
|
+
serverInfo: {
|
|
52
|
+
name: 'defuddle-mcp',
|
|
53
|
+
version: '1.0.0'
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
};
|
|
57
|
+
console.log(JSON.stringify(response));
|
|
58
|
+
}
|
|
59
|
+
// ==================== Handle tools/list Request ====================
|
|
60
|
+
// Client queries the list of tools provided by the server
|
|
61
|
+
else if (request.method === 'tools/list') {
|
|
62
|
+
const response = {
|
|
63
|
+
jsonrpc: '2.0',
|
|
64
|
+
id: request.id,
|
|
65
|
+
result: {
|
|
66
|
+
tools: [
|
|
67
|
+
{
|
|
68
|
+
// Tool name
|
|
69
|
+
name: 'defuddle',
|
|
70
|
+
// Tool description
|
|
71
|
+
description: 'Extract clean article content from a web page, removing clutter like ads and sidebars.',
|
|
72
|
+
// Input parameter schema (JSON Schema)
|
|
73
|
+
inputSchema: {
|
|
74
|
+
type: 'object',
|
|
75
|
+
properties: {
|
|
76
|
+
url: {
|
|
77
|
+
type: 'string',
|
|
78
|
+
description: 'The URL of the web page to extract content from.',
|
|
79
|
+
},
|
|
80
|
+
},
|
|
81
|
+
// Required parameters
|
|
82
|
+
required: ['url'],
|
|
83
|
+
},
|
|
84
|
+
},
|
|
85
|
+
],
|
|
86
|
+
},
|
|
87
|
+
};
|
|
88
|
+
console.log(JSON.stringify(response));
|
|
89
|
+
}
|
|
90
|
+
// ==================== Handle tools/call Request ====================
|
|
91
|
+
// Client invokes a specified tool
|
|
92
|
+
else if (request.method === 'tools/call') {
|
|
93
|
+
const { name, arguments: args } = request.params;
|
|
94
|
+
|
|
95
|
+
if (name === 'defuddle') {
|
|
96
|
+
try {
|
|
97
|
+
// Call defuddle.md API to extract clean content
|
|
98
|
+
const response = await fetch(`https://defuddle.md/${encodeURIComponent(args.url)}`);
|
|
99
|
+
if (!response.ok) {
|
|
100
|
+
throw new Error(`Failed to fetch content: ${response.status} ${response.statusText}`);
|
|
101
|
+
}
|
|
102
|
+
// API returns Markdown format
|
|
103
|
+
const markdown = await response.text();
|
|
104
|
+
|
|
105
|
+
// Build success response
|
|
106
|
+
const result = {
|
|
107
|
+
jsonrpc: '2.0',
|
|
108
|
+
id: request.id,
|
|
109
|
+
result: {
|
|
110
|
+
content: [
|
|
111
|
+
{
|
|
112
|
+
type: 'text',
|
|
113
|
+
text: markdown,
|
|
114
|
+
},
|
|
115
|
+
],
|
|
116
|
+
},
|
|
117
|
+
};
|
|
118
|
+
console.log(JSON.stringify(result));
|
|
119
|
+
} catch (error) {
|
|
120
|
+
// Build error response (isError: true indicates tool execution failure)
|
|
121
|
+
const result = {
|
|
122
|
+
jsonrpc: '2.0',
|
|
123
|
+
id: request.id,
|
|
124
|
+
result: {
|
|
125
|
+
content: [
|
|
126
|
+
{
|
|
127
|
+
type: 'text',
|
|
128
|
+
text: `Error extracting content: ${error.message}`,
|
|
129
|
+
},
|
|
130
|
+
],
|
|
131
|
+
isError: true,
|
|
132
|
+
},
|
|
133
|
+
};
|
|
134
|
+
console.log(JSON.stringify(result));
|
|
135
|
+
}
|
|
136
|
+
} else {
|
|
137
|
+
// Tool not found, return method not found error
|
|
138
|
+
const result = {
|
|
139
|
+
jsonrpc: '2.0',
|
|
140
|
+
id: request.id,
|
|
141
|
+
error: {
|
|
142
|
+
code: -32601, // JSON-RPC 2.0 standard error code: Method not found
|
|
143
|
+
message: 'Method not found',
|
|
144
|
+
},
|
|
145
|
+
};
|
|
146
|
+
console.log(JSON.stringify(result));
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
// ==================== Unknown Method ====================
|
|
150
|
+
else {
|
|
151
|
+
const response = {
|
|
152
|
+
jsonrpc: '2.0',
|
|
153
|
+
id: request.id,
|
|
154
|
+
error: {
|
|
155
|
+
code: -32601,
|
|
156
|
+
message: 'Method not found',
|
|
157
|
+
},
|
|
158
|
+
};
|
|
159
|
+
console.log(JSON.stringify(response));
|
|
160
|
+
}
|
|
161
|
+
} catch (error) {
|
|
162
|
+
// ==================== JSON Parse Error ====================
|
|
163
|
+
// JSON-RPC 2.0 error code -32700 indicates Parse error
|
|
164
|
+
const response = {
|
|
165
|
+
jsonrpc: '2.0',
|
|
166
|
+
id: null, // Cannot get request ID when parsing fails
|
|
167
|
+
error: {
|
|
168
|
+
code: -32700,
|
|
169
|
+
message: 'Parse error',
|
|
170
|
+
},
|
|
171
|
+
};
|
|
172
|
+
console.log(JSON.stringify(response));
|
|
173
|
+
}
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
// Server startup message output to stderr to avoid interfering with stdout JSON-RPC communication
|
|
177
|
+
console.error('Defuddle MCP server running on stdio');
|
package/package.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "defuddle-mcp",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "MCP server for extracting clean article content from web pages",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"defuddle-mcp": "index.js"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"start": "node index.js"
|
|
11
|
+
},
|
|
12
|
+
"engines": {
|
|
13
|
+
"node": ">=18"
|
|
14
|
+
},
|
|
15
|
+
"repository": {
|
|
16
|
+
"type": "git",
|
|
17
|
+
"url": "git+https://github.com/ryanxili/defuddle-mcp.git"
|
|
18
|
+
},
|
|
19
|
+
"keywords": [
|
|
20
|
+
"mcp",
|
|
21
|
+
"model-context-protocol",
|
|
22
|
+
"defuddle",
|
|
23
|
+
"content-extraction",
|
|
24
|
+
"readability",
|
|
25
|
+
"web-scraper"
|
|
26
|
+
],
|
|
27
|
+
"author": "ryanxili",
|
|
28
|
+
"license": "MIT",
|
|
29
|
+
"files": [
|
|
30
|
+
"index.js",
|
|
31
|
+
"README.md"
|
|
32
|
+
]
|
|
33
|
+
}
|