ghostreader-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +14 -0
- package/dist/index.js +153 -0
- package/dist/index.js.map +1 -0
- package/package.json +32 -0
- package/src/index.ts +174 -0
- package/tsconfig.json +17 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* GhostReader MCP Server (standalone)
|
|
4
|
+
*
|
|
5
|
+
* Calls the GhostReader processor HTTP API via GHOSTREADER_URL.
|
|
6
|
+
* No heavy dependencies — just MCP SDK + fetch.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* GHOSTREADER_URL=http://localhost:3000 npx ghostreader-mcp
|
|
10
|
+
*
|
|
11
|
+
* Environment variables:
|
|
12
|
+
* GHOSTREADER_URL — processor URL (default: http://localhost:3000)
|
|
13
|
+
*/
|
|
14
|
+
export {};
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* GhostReader MCP Server (standalone)
|
|
4
|
+
*
|
|
5
|
+
* Calls the GhostReader processor HTTP API via GHOSTREADER_URL.
|
|
6
|
+
* No heavy dependencies — just MCP SDK + fetch.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* GHOSTREADER_URL=http://localhost:3000 npx ghostreader-mcp
|
|
10
|
+
*
|
|
11
|
+
* Environment variables:
|
|
12
|
+
* GHOSTREADER_URL — processor URL (default: http://localhost:3000)
|
|
13
|
+
*/
|
|
14
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
15
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
16
|
+
import { z } from 'zod';
|
|
17
|
+
const BASE_URL = (process.env.GHOSTREADER_URL || 'http://localhost:3000').replace(/\/$/, '');
|
|
18
|
+
function truncate(text, maxLen = 50000) {
|
|
19
|
+
if (text.length <= maxLen)
|
|
20
|
+
return text;
|
|
21
|
+
return text.slice(0, maxLen) + `\n\n... [truncated, ${text.length - maxLen} chars omitted]`;
|
|
22
|
+
}
|
|
23
|
+
async function post(endpoint, body) {
|
|
24
|
+
const res = await fetch(`${BASE_URL}${endpoint}`, {
|
|
25
|
+
method: 'POST',
|
|
26
|
+
headers: { 'Content-Type': 'application/json' },
|
|
27
|
+
body: JSON.stringify(body),
|
|
28
|
+
});
|
|
29
|
+
if (!res.ok) {
|
|
30
|
+
const text = await res.text();
|
|
31
|
+
throw new Error(`GhostReader ${endpoint} returned ${res.status}: ${text}`);
|
|
32
|
+
}
|
|
33
|
+
return res.json();
|
|
34
|
+
}
|
|
35
|
+
async function get(endpoint) {
|
|
36
|
+
const res = await fetch(`${BASE_URL}${endpoint}`);
|
|
37
|
+
if (!res.ok)
|
|
38
|
+
throw new Error(`GhostReader ${endpoint} returned ${res.status}`);
|
|
39
|
+
return res.text();
|
|
40
|
+
}
|
|
41
|
+
const server = new McpServer({
|
|
42
|
+
name: 'ghostreader',
|
|
43
|
+
version: '0.2.0',
|
|
44
|
+
});
|
|
45
|
+
// Tool: ghostreader_scrape
|
|
46
|
+
server.tool('ghostreader_scrape', 'Render a URL using an anti-detect browser (Camoufox) and return the page content as markdown. ' +
|
|
47
|
+
'Use this for JS-heavy sites, SPAs, or pages that block normal HTTP fetches. ' +
|
|
48
|
+
'The browser has a persistent identity (fingerprint, cookies, cache) that avoids bot detection.', {
|
|
49
|
+
url: z.string().url().describe('The URL to render and return as markdown'),
|
|
50
|
+
wait_after_load: z
|
|
51
|
+
.number()
|
|
52
|
+
.min(0)
|
|
53
|
+
.max(30)
|
|
54
|
+
.default(2)
|
|
55
|
+
.describe('Seconds to wait after page load for JS to execute (default: 2)'),
|
|
56
|
+
engine: z
|
|
57
|
+
.string()
|
|
58
|
+
.default('standard')
|
|
59
|
+
.describe('Processing engine: standard (default, fast) or ai (Ollama reader-lm-v2)'),
|
|
60
|
+
article: z
|
|
61
|
+
.boolean()
|
|
62
|
+
.default(false)
|
|
63
|
+
.describe('Enable article mode: aggressively extract main content, strip sidebars/noise. Best for blog posts and news articles.'),
|
|
64
|
+
images: z
|
|
65
|
+
.boolean()
|
|
66
|
+
.default(false)
|
|
67
|
+
.describe('Keep images in output (default: false). When false, strips all <img>/<picture> tags for cleaner text-only output.'),
|
|
68
|
+
}, async ({ url, wait_after_load, engine, article, images }) => {
|
|
69
|
+
try {
|
|
70
|
+
const params = `engine=${engine}&wait=${wait_after_load}${article ? '&article=true' : ''}${images ? '&images=true' : ''}`;
|
|
71
|
+
const markdown = await get(`/render/${url}?${params}`);
|
|
72
|
+
return {
|
|
73
|
+
content: [{ type: 'text', text: truncate(markdown) }],
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
catch (err) {
|
|
77
|
+
return {
|
|
78
|
+
content: [
|
|
79
|
+
{
|
|
80
|
+
type: 'text',
|
|
81
|
+
text: `Error scraping ${url}: ${err instanceof Error ? err.message : String(err)}`,
|
|
82
|
+
},
|
|
83
|
+
],
|
|
84
|
+
isError: true,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
// Tool: ghostreader_extract
|
|
89
|
+
server.tool('ghostreader_extract', 'Extract structured results from a URL using a named extraction profile. ' +
|
|
90
|
+
"Profiles know how to parse specific sites (e.g., 'google_web' for Google Search, " +
|
|
91
|
+
"'google_news' for Google News). Returns titles, URLs, and content snippets. " +
|
|
92
|
+
'Available profiles: google_web, google_news, base (generic CSS selectors).', {
|
|
93
|
+
url: z.string().url().describe('The URL to render and extract results from'),
|
|
94
|
+
profile: z.string().describe("Extraction profile name (e.g., 'google_web', 'google_news', 'base')"),
|
|
95
|
+
timeout: z
|
|
96
|
+
.number()
|
|
97
|
+
.min(1000)
|
|
98
|
+
.max(120000)
|
|
99
|
+
.default(30000)
|
|
100
|
+
.describe('Render timeout in milliseconds (default: 30000)'),
|
|
101
|
+
}, async ({ url, profile, timeout }) => {
|
|
102
|
+
try {
|
|
103
|
+
const data = (await post('/extract', { url, profile, timeout }));
|
|
104
|
+
if (data.captcha) {
|
|
105
|
+
return {
|
|
106
|
+
content: [{ type: 'text', text: `CAPTCHA detected at ${url}. Try again later.` }],
|
|
107
|
+
isError: true,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
if (data.error) {
|
|
111
|
+
return {
|
|
112
|
+
content: [{ type: 'text', text: `Extraction error: ${data.error}` }],
|
|
113
|
+
isError: true,
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
const lines = [];
|
|
117
|
+
lines.push(`Found ${data.results.length} results from ${profile} profile:\n`);
|
|
118
|
+
for (const [i, r] of data.results.entries()) {
|
|
119
|
+
lines.push(`${i + 1}. ${r.title}`);
|
|
120
|
+
lines.push(` URL: ${r.url}`);
|
|
121
|
+
if (r.content)
|
|
122
|
+
lines.push(` ${r.content}`);
|
|
123
|
+
lines.push('');
|
|
124
|
+
}
|
|
125
|
+
if (data.suggestions.length > 0) {
|
|
126
|
+
lines.push('Related searches:');
|
|
127
|
+
for (const s of data.suggestions)
|
|
128
|
+
lines.push(` - ${s}`);
|
|
129
|
+
}
|
|
130
|
+
return { content: [{ type: 'text', text: lines.join('\n') }] };
|
|
131
|
+
}
|
|
132
|
+
catch (err) {
|
|
133
|
+
return {
|
|
134
|
+
content: [
|
|
135
|
+
{
|
|
136
|
+
type: 'text',
|
|
137
|
+
text: `Error extracting from ${url}: ${err instanceof Error ? err.message : String(err)}`,
|
|
138
|
+
},
|
|
139
|
+
],
|
|
140
|
+
isError: true,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
});
|
|
144
|
+
async function main() {
|
|
145
|
+
const transport = new StdioServerTransport();
|
|
146
|
+
await server.connect(transport);
|
|
147
|
+
console.error(`[ghostreader-mcp] connected (GHOSTREADER_URL=${BASE_URL})`);
|
|
148
|
+
}
|
|
149
|
+
main().catch((err) => {
|
|
150
|
+
console.error('Fatal:', err);
|
|
151
|
+
process.exit(1);
|
|
152
|
+
});
|
|
153
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,MAAM,QAAQ,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,uBAAuB,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;AAE7F,SAAS,QAAQ,CAAC,IAAY,EAAE,MAAM,GAAG,KAAK;IAC5C,IAAI,IAAI,CAAC,MAAM,IAAI,MAAM;QAAE,OAAO,IAAI,CAAC;IACvC,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,GAAG,uBAAuB,IAAI,CAAC,MAAM,GAAG,MAAM,iBAAiB,CAAC;AAC9F,CAAC;AAED,KAAK,UAAU,IAAI,CAAC,QAAgB,EAAE,IAA6B;IACjE,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,QAAQ,GAAG,QAAQ,EAAE,EAAE;QAChD,MAAM,EAAE,MAAM;QACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;QAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;KAC3B,CAAC,CAAC;IACH,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QACZ,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,MAAM,IAAI,KAAK,CAAC,eAAe,QAAQ,aAAa,GAAG,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC,CAAC;IAC7E,CAAC;IACD,OAAO,GAAG,CAAC,IAAI,EAAE,CAAC;AACpB,CAAC;AAED,KAAK,UAAU,GAAG,CAAC,QAAgB;IACjC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,QAAQ,GAAG,QAAQ,EAAE,CAAC,CAAC;IAClD,IAAI,CAAC,GAAG,CAAC,EAAE;QAAE,MAAM,IAAI,KAAK,CAAC,eAAe,QAAQ,aAAa,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC;IAC/E,OAAO,GAAG,CAAC,IAAI,EAAE,CAAC;AACpB,CAAC;AAED,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;IAC3B,IAAI,EAAE,aAAa;IACnB,OAAO,EAAE,OAAO;CACjB,CAAC,CAAC;AAEH,2BAA2B;AAC3B,MAAM,CAAC,IAAI,CACT,oBAAoB,EACpB,gGAAgG;IAC9F,8EAA8E;IAC9E,gGAAgG,EAClG;IACE,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,0CAA0C,CAAC;IAC1E,eAAe,EAAE,CAAC;SACf,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,EAAE,CAAC;SACP,OAAO,CAAC,CAAC,CAAC;SACV,QAAQ,CAAC,gEAAgE,CAAC;IAC7E,MAAM,EAAE,CAAC;SACN,MAAM,EAAE;SACR,OAAO,CAAC,UAAU,CAAC;SACnB,QAAQ,CAAC,yEAAyE,CAAC;IACtF,OAAO,EAAE,CAAC;SACP,OAAO,EAAE;SACT,OAAO,CAAC,KAAK,CAAC;SACd,QAAQ,CAAC,sHAAsH,CAAC;IACnI,MAAM,EAAE,CAAC;SACN,OAAO,EAAE;SACT,OAAO,CAAC,KAAK,CAAC;SACd,QAAQ,CAAC,mHAAmH,CAAC;CACjI,EACD,KAAK,EAAE,EAAE,GAAG,EAAE,eAAe,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE,EAAE;IAC1D,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,UAAU,MAAM,SAAS,eAAe,GAAG,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QAC1H,MAAM,QAAQ,GAAG,MAAM,GAAG,CAAC,WAAW,GAAG,IAAI,MAAM,EAAE,CAAC,CAAC;QACvD,OAAO;YACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;SAC/D,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,kBAAkB,GAAG,KAAK,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;iBACnF;aACF;YACD,OAAO,EAAE,IAAI;SACd,CAAC;IACJ,CAAC;AACH,CAAC,CACF,CAAC;AAEF,4BAA4B;AAC5B,MAAM,CAAC,IAAI,CACT,qBAAqB,EACrB,0EAA0E;IACxE,mFAAmF;IACnF,8EAA8E;IAC9E,4EAA4E,EAC9E;IACE,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,4CAA4C,CAAC;IAC5E,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,qEAAqE,CAAC;IACnG,OAAO,EAAE,CAAC;SACP,MAAM,EAAE;SACR,GAAG,CAAC,IAAI,CAAC;SACT,GAAG,CAAC,MAAM,CAAC;SACX,OAAO,CAAC,KAAK,CAAC;SACd,QAAQ,CAAC,iDAAiD,CAAC;CAC/D,EACD,KAAK,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE;IAClC,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,CAAC,MAAM,IAAI,CAAC,UAAU,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAK9D,CAAC;QAEF,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,OAAO;gBACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,uBAAuB,GAAG,oBAAoB,EAAE,CAAC;gBAC1F,OAAO,EAAE,IAAI;aACd,CAAC;QACJ,CAAC;QACD,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,qBAAqB,IAAI,CAAC,KAAK,EAAE,EAAE,CAAC;gBAC7E,OAAO,EAAE,IAAI;aACd,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,OAAO,CAAC,MAAM,iBAAiB,OAAO,aAAa,CAAC,CAAC;QAC9E,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;YAC5C,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;YACnC,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;YAC/B,IAAI,CAAC,CAAC,OAAO;gBAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;YAC7C,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACjB,CAAC;QACD,IAAI,IAAI,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChC,KAAK,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;YAChC,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,WAAW;gBAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,OAAO,EAAE,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;IAC1E,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,yBAAyB,GAAG,KAAK,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;iBAC1F;aACF;YACD,OAAO,EAAE,IAAI;SACd,CAAC;IACJ,CAAC;AACH,CAAC,CACF,CAAC;AAEF,KAAK,UAAU,IAAI;IACjB,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAChC,OAAO,CAAC,KAAK,CAAC,gDAAgD,QAAQ,GAAG,CAAC,CAAC;AAC7E,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACnB,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAC7B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "ghostreader-mcp",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "MCP server for GhostReader — anti-detect browser rendering + AI content processing",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"ghostreader-mcp": "./dist/index.js"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"dev": "tsx src/index.ts",
|
|
11
|
+
"build": "tsc",
|
|
12
|
+
"start": "node dist/index.js"
|
|
13
|
+
},
|
|
14
|
+
"keywords": [
|
|
15
|
+
"mcp",
|
|
16
|
+
"ghostreader",
|
|
17
|
+
"browser",
|
|
18
|
+
"scraping",
|
|
19
|
+
"markdown",
|
|
20
|
+
"anti-detect"
|
|
21
|
+
],
|
|
22
|
+
"license": "MIT",
|
|
23
|
+
"dependencies": {
|
|
24
|
+
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
25
|
+
"zod": "^3.0.0"
|
|
26
|
+
},
|
|
27
|
+
"devDependencies": {
|
|
28
|
+
"@types/node": "^22.0.0",
|
|
29
|
+
"tsx": "^4.19.0",
|
|
30
|
+
"typescript": "^5.7.0"
|
|
31
|
+
}
|
|
32
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* GhostReader MCP Server (standalone)
|
|
4
|
+
*
|
|
5
|
+
* Calls the GhostReader processor HTTP API via GHOSTREADER_URL.
|
|
6
|
+
* No heavy dependencies — just MCP SDK + fetch.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* GHOSTREADER_URL=http://localhost:3000 npx ghostreader-mcp
|
|
10
|
+
*
|
|
11
|
+
* Environment variables:
|
|
12
|
+
* GHOSTREADER_URL — processor URL (default: http://localhost:3000)
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
16
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
17
|
+
import { z } from 'zod';
|
|
18
|
+
|
|
19
|
+
const BASE_URL = (process.env.GHOSTREADER_URL || 'http://localhost:3000').replace(/\/$/, '');
|
|
20
|
+
|
|
21
|
+
function truncate(text: string, maxLen = 50000): string {
|
|
22
|
+
if (text.length <= maxLen) return text;
|
|
23
|
+
return text.slice(0, maxLen) + `\n\n... [truncated, ${text.length - maxLen} chars omitted]`;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
async function post(endpoint: string, body: Record<string, unknown>): Promise<unknown> {
|
|
27
|
+
const res = await fetch(`${BASE_URL}${endpoint}`, {
|
|
28
|
+
method: 'POST',
|
|
29
|
+
headers: { 'Content-Type': 'application/json' },
|
|
30
|
+
body: JSON.stringify(body),
|
|
31
|
+
});
|
|
32
|
+
if (!res.ok) {
|
|
33
|
+
const text = await res.text();
|
|
34
|
+
throw new Error(`GhostReader ${endpoint} returned ${res.status}: ${text}`);
|
|
35
|
+
}
|
|
36
|
+
return res.json();
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async function get(endpoint: string): Promise<string> {
|
|
40
|
+
const res = await fetch(`${BASE_URL}${endpoint}`);
|
|
41
|
+
if (!res.ok) throw new Error(`GhostReader ${endpoint} returned ${res.status}`);
|
|
42
|
+
return res.text();
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const server = new McpServer({
|
|
46
|
+
name: 'ghostreader',
|
|
47
|
+
version: '0.2.0',
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// Tool: ghostreader_scrape
|
|
51
|
+
server.tool(
|
|
52
|
+
'ghostreader_scrape',
|
|
53
|
+
'Render a URL using an anti-detect browser (Camoufox) and return the page content as markdown. ' +
|
|
54
|
+
'Use this for JS-heavy sites, SPAs, or pages that block normal HTTP fetches. ' +
|
|
55
|
+
'The browser has a persistent identity (fingerprint, cookies, cache) that avoids bot detection.',
|
|
56
|
+
{
|
|
57
|
+
url: z.string().url().describe('The URL to render and return as markdown'),
|
|
58
|
+
wait_after_load: z
|
|
59
|
+
.number()
|
|
60
|
+
.min(0)
|
|
61
|
+
.max(30)
|
|
62
|
+
.default(2)
|
|
63
|
+
.describe('Seconds to wait after page load for JS to execute (default: 2)'),
|
|
64
|
+
engine: z
|
|
65
|
+
.string()
|
|
66
|
+
.default('standard')
|
|
67
|
+
.describe('Processing engine: standard (default, fast) or ai (Ollama reader-lm-v2)'),
|
|
68
|
+
article: z
|
|
69
|
+
.boolean()
|
|
70
|
+
.default(false)
|
|
71
|
+
.describe('Enable article mode: aggressively extract main content, strip sidebars/noise. Best for blog posts and news articles.'),
|
|
72
|
+
images: z
|
|
73
|
+
.boolean()
|
|
74
|
+
.default(false)
|
|
75
|
+
.describe('Keep images in output (default: false). When false, strips all <img>/<picture> tags for cleaner text-only output.'),
|
|
76
|
+
},
|
|
77
|
+
async ({ url, wait_after_load, engine, article, images }) => {
|
|
78
|
+
try {
|
|
79
|
+
const params = `engine=${engine}&wait=${wait_after_load}${article ? '&article=true' : ''}${images ? '&images=true' : ''}`;
|
|
80
|
+
const markdown = await get(`/render/${url}?${params}`);
|
|
81
|
+
return {
|
|
82
|
+
content: [{ type: 'text' as const, text: truncate(markdown) }],
|
|
83
|
+
};
|
|
84
|
+
} catch (err) {
|
|
85
|
+
return {
|
|
86
|
+
content: [
|
|
87
|
+
{
|
|
88
|
+
type: 'text' as const,
|
|
89
|
+
text: `Error scraping ${url}: ${err instanceof Error ? err.message : String(err)}`,
|
|
90
|
+
},
|
|
91
|
+
],
|
|
92
|
+
isError: true,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
},
|
|
96
|
+
);
|
|
97
|
+
|
|
98
|
+
// Tool: ghostreader_extract
|
|
99
|
+
server.tool(
|
|
100
|
+
'ghostreader_extract',
|
|
101
|
+
'Extract structured results from a URL using a named extraction profile. ' +
|
|
102
|
+
"Profiles know how to parse specific sites (e.g., 'google_web' for Google Search, " +
|
|
103
|
+
"'google_news' for Google News). Returns titles, URLs, and content snippets. " +
|
|
104
|
+
'Available profiles: google_web, google_news, base (generic CSS selectors).',
|
|
105
|
+
{
|
|
106
|
+
url: z.string().url().describe('The URL to render and extract results from'),
|
|
107
|
+
profile: z.string().describe("Extraction profile name (e.g., 'google_web', 'google_news', 'base')"),
|
|
108
|
+
timeout: z
|
|
109
|
+
.number()
|
|
110
|
+
.min(1000)
|
|
111
|
+
.max(120000)
|
|
112
|
+
.default(30000)
|
|
113
|
+
.describe('Render timeout in milliseconds (default: 30000)'),
|
|
114
|
+
},
|
|
115
|
+
async ({ url, profile, timeout }) => {
|
|
116
|
+
try {
|
|
117
|
+
const data = (await post('/extract', { url, profile, timeout })) as {
|
|
118
|
+
results: Array<{ url: string; title: string; content: string }>;
|
|
119
|
+
suggestions: string[];
|
|
120
|
+
captcha: boolean;
|
|
121
|
+
error: string | null;
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
if (data.captcha) {
|
|
125
|
+
return {
|
|
126
|
+
content: [{ type: 'text' as const, text: `CAPTCHA detected at ${url}. Try again later.` }],
|
|
127
|
+
isError: true,
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
if (data.error) {
|
|
131
|
+
return {
|
|
132
|
+
content: [{ type: 'text' as const, text: `Extraction error: ${data.error}` }],
|
|
133
|
+
isError: true,
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
const lines: string[] = [];
|
|
138
|
+
lines.push(`Found ${data.results.length} results from ${profile} profile:\n`);
|
|
139
|
+
for (const [i, r] of data.results.entries()) {
|
|
140
|
+
lines.push(`${i + 1}. ${r.title}`);
|
|
141
|
+
lines.push(` URL: ${r.url}`);
|
|
142
|
+
if (r.content) lines.push(` ${r.content}`);
|
|
143
|
+
lines.push('');
|
|
144
|
+
}
|
|
145
|
+
if (data.suggestions.length > 0) {
|
|
146
|
+
lines.push('Related searches:');
|
|
147
|
+
for (const s of data.suggestions) lines.push(` - ${s}`);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
return { content: [{ type: 'text' as const, text: lines.join('\n') }] };
|
|
151
|
+
} catch (err) {
|
|
152
|
+
return {
|
|
153
|
+
content: [
|
|
154
|
+
{
|
|
155
|
+
type: 'text' as const,
|
|
156
|
+
text: `Error extracting from ${url}: ${err instanceof Error ? err.message : String(err)}`,
|
|
157
|
+
},
|
|
158
|
+
],
|
|
159
|
+
isError: true,
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
},
|
|
163
|
+
);
|
|
164
|
+
|
|
165
|
+
async function main() {
|
|
166
|
+
const transport = new StdioServerTransport();
|
|
167
|
+
await server.connect(transport);
|
|
168
|
+
console.error(`[ghostreader-mcp] connected (GHOSTREADER_URL=${BASE_URL})`);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
main().catch((err) => {
|
|
172
|
+
console.error('Fatal:', err);
|
|
173
|
+
process.exit(1);
|
|
174
|
+
});
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "ESNext",
|
|
5
|
+
"moduleResolution": "bundler",
|
|
6
|
+
"outDir": "dist",
|
|
7
|
+
"rootDir": "src",
|
|
8
|
+
"strict": true,
|
|
9
|
+
"esModuleInterop": true,
|
|
10
|
+
"skipLibCheck": true,
|
|
11
|
+
"forceConsistentCasingInFileNames": true,
|
|
12
|
+
"declaration": true,
|
|
13
|
+
"sourceMap": true
|
|
14
|
+
},
|
|
15
|
+
"include": ["src"],
|
|
16
|
+
"exclude": ["node_modules", "dist"]
|
|
17
|
+
}
|