mcp-web-reader 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -0
- package/dist/index.js +422 -194
- package/package.json +30 -24
package/README.md
CHANGED
|
@@ -11,6 +11,7 @@ A powerful MCP (Model Context Protocol) server that enables Claude and other LLM
|
|
|
11
11
|
- 🌐 **Bypass restrictions**: Cloudflare, CAPTCHAs, access controls
|
|
12
12
|
- 📦 **Batch processing**: Fetch multiple URLs simultaneously
|
|
13
13
|
- 📝 **Markdown output**: Automatic conversion to clean Markdown
|
|
14
|
+
- 🔌 **Transport compatibility**: stdio + Streamable HTTP (optional legacy SSE compatibility mode)
|
|
14
15
|
|
|
15
16
|
## Installation
|
|
16
17
|
|
|
@@ -61,6 +62,28 @@ claude mcp add web-reader -- mcp-web-reader
|
|
|
61
62
|
claude mcp list
|
|
62
63
|
```
|
|
63
64
|
|
|
65
|
+
### Streamable HTTP (Remote Deployment)
|
|
66
|
+
|
|
67
|
+
Start server in Streamable HTTP mode:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
MCP_TRANSPORT=http MCP_HTTP_HOST=0.0.0.0 MCP_HTTP_PORT=3000 npm run start:http
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Optional environment variables:
|
|
74
|
+
|
|
75
|
+
- `MCP_HTTP_PATH` (default: `/mcp`)
|
|
76
|
+
- `MCP_ENABLE_LEGACY_SSE=true` to expose deprecated `/sse` + `/messages` endpoints
|
|
77
|
+
|
|
78
|
+
Codex MCP config (HTTP):
|
|
79
|
+
|
|
80
|
+
```toml
|
|
81
|
+
[mcp_servers.web-reader]
|
|
82
|
+
type = "http"
|
|
83
|
+
url = "https://your-domain.com/mcp"
|
|
84
|
+
bearer_token_env_var = "WEB_READER_TOKEN"
|
|
85
|
+
```
|
|
86
|
+
|
|
64
87
|
## Usage
|
|
65
88
|
|
|
66
89
|
In Claude:
|
|
@@ -102,6 +125,7 @@ Auto-detects restrictions and switches to browser for:
|
|
|
102
125
|
npm run dev # Development with auto-rebuild
|
|
103
126
|
npm run build # Build production version
|
|
104
127
|
npm start # Test run
|
|
128
|
+
npm run start:http # Run Streamable HTTP server
|
|
105
129
|
```
|
|
106
130
|
|
|
107
131
|
## License
|
package/dist/index.js
CHANGED
|
@@ -1,19 +1,14 @@
|
|
|
1
1
|
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
2
2
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
3
|
-
import {
|
|
3
|
+
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
|
|
4
|
+
import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js";
|
|
5
|
+
import { createMcpExpressApp } from "@modelcontextprotocol/sdk/server/express.js";
|
|
6
|
+
import { CallToolRequestSchema, ErrorCode, ListToolsRequestSchema, McpError, isInitializeRequest, } from "@modelcontextprotocol/sdk/types.js";
|
|
4
7
|
import fetch from "node-fetch";
|
|
5
8
|
import { JSDOM } from "jsdom";
|
|
6
9
|
import TurndownService from "turndown";
|
|
7
10
|
import { chromium } from "playwright";
|
|
8
|
-
|
|
9
|
-
const server = new Server({
|
|
10
|
-
name: "web-reader",
|
|
11
|
-
version: "2.0.0",
|
|
12
|
-
}, {
|
|
13
|
-
capabilities: {
|
|
14
|
-
tools: {},
|
|
15
|
-
},
|
|
16
|
-
});
|
|
11
|
+
import { randomUUID } from "node:crypto";
|
|
17
12
|
// Initialize Turndown service (convert HTML to Markdown)
|
|
18
13
|
const turndownService = new TurndownService({
|
|
19
14
|
headingStyle: "atx",
|
|
@@ -371,226 +366,459 @@ async function fetchWebContent(url, preferJina = true) {
|
|
|
371
366
|
}
|
|
372
367
|
}
|
|
373
368
|
}
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
369
|
+
const streamableSessions = new Map();
|
|
370
|
+
const legacySseSessions = new Map();
|
|
371
|
+
function createServerInstance() {
|
|
372
|
+
const server = new Server({
|
|
373
|
+
name: "web-reader",
|
|
374
|
+
version: "2.1.0",
|
|
375
|
+
}, {
|
|
376
|
+
capabilities: {
|
|
377
|
+
tools: {},
|
|
378
|
+
},
|
|
379
|
+
});
|
|
380
|
+
registerServerHandlers(server);
|
|
381
|
+
return server;
|
|
382
|
+
}
|
|
383
|
+
function registerServerHandlers(server) {
|
|
384
|
+
// Handle tool list requests
|
|
385
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
386
|
+
return {
|
|
387
|
+
tools: [
|
|
388
|
+
{
|
|
389
|
+
name: "fetch_url",
|
|
390
|
+
description: "Fetch web content from specified URL and convert to Markdown format. Uses Jina Reader by default, automatically falls back to local parser on failure",
|
|
391
|
+
inputSchema: {
|
|
392
|
+
type: "object",
|
|
393
|
+
properties: {
|
|
394
|
+
url: {
|
|
395
|
+
type: "string",
|
|
396
|
+
description: "Webpage URL to fetch (must be http or https protocol)",
|
|
397
|
+
},
|
|
398
|
+
preferJina: {
|
|
399
|
+
type: "boolean",
|
|
400
|
+
description: "Whether to prioritize Jina Reader (default: true)",
|
|
401
|
+
default: true,
|
|
402
|
+
},
|
|
392
403
|
},
|
|
404
|
+
required: ["url"],
|
|
393
405
|
},
|
|
394
|
-
required: ["url"],
|
|
395
406
|
},
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
+
{
|
|
408
|
+
name: "fetch_multiple_urls",
|
|
409
|
+
description: "Batch fetch web content from multiple URLs",
|
|
410
|
+
inputSchema: {
|
|
411
|
+
type: "object",
|
|
412
|
+
properties: {
|
|
413
|
+
urls: {
|
|
414
|
+
type: "array",
|
|
415
|
+
items: {
|
|
416
|
+
type: "string",
|
|
417
|
+
},
|
|
418
|
+
description: "List of webpage URLs to fetch",
|
|
419
|
+
maxItems: 10, // Limit to 10 URLs
|
|
420
|
+
},
|
|
421
|
+
preferJina: {
|
|
422
|
+
type: "boolean",
|
|
423
|
+
description: "Whether to prioritize Jina Reader (default: true)",
|
|
424
|
+
default: true,
|
|
407
425
|
},
|
|
408
|
-
description: "List of webpage URLs to fetch",
|
|
409
|
-
maxItems: 10, // Limit to 10 URLs
|
|
410
|
-
},
|
|
411
|
-
preferJina: {
|
|
412
|
-
type: "boolean",
|
|
413
|
-
description: "Whether to prioritize Jina Reader (default: true)",
|
|
414
|
-
default: true,
|
|
415
426
|
},
|
|
427
|
+
required: ["urls"],
|
|
416
428
|
},
|
|
417
|
-
required: ["urls"],
|
|
418
429
|
},
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
430
|
+
{
|
|
431
|
+
name: "fetch_url_with_jina",
|
|
432
|
+
description: "Force fetch using Jina Reader (suitable for complex webpages)",
|
|
433
|
+
inputSchema: {
|
|
434
|
+
type: "object",
|
|
435
|
+
properties: {
|
|
436
|
+
url: {
|
|
437
|
+
type: "string",
|
|
438
|
+
description: "Webpage URL to fetch",
|
|
439
|
+
},
|
|
429
440
|
},
|
|
441
|
+
required: ["url"],
|
|
430
442
|
},
|
|
431
|
-
required: ["url"],
|
|
432
443
|
},
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
444
|
+
{
|
|
445
|
+
name: "fetch_url_local",
|
|
446
|
+
description: "Force fetch using local parser (suitable for simple webpages or when Jina is unavailable)",
|
|
447
|
+
inputSchema: {
|
|
448
|
+
type: "object",
|
|
449
|
+
properties: {
|
|
450
|
+
url: {
|
|
451
|
+
type: "string",
|
|
452
|
+
description: "Webpage URL to fetch",
|
|
453
|
+
},
|
|
443
454
|
},
|
|
455
|
+
required: ["url"],
|
|
444
456
|
},
|
|
445
|
-
required: ["url"],
|
|
446
457
|
},
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
458
|
+
{
|
|
459
|
+
name: "fetch_url_with_browser",
|
|
460
|
+
description: "Force fetch using Playwright browser (suitable for websites with access restrictions, such as Cloudflare protection, CAPTCHA, etc.)",
|
|
461
|
+
inputSchema: {
|
|
462
|
+
type: "object",
|
|
463
|
+
properties: {
|
|
464
|
+
url: {
|
|
465
|
+
type: "string",
|
|
466
|
+
description: "Webpage URL to fetch",
|
|
467
|
+
},
|
|
457
468
|
},
|
|
469
|
+
required: ["url"],
|
|
458
470
|
},
|
|
459
|
-
required: ["url"],
|
|
460
471
|
},
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
};
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
472
|
+
],
|
|
473
|
+
};
|
|
474
|
+
});
|
|
475
|
+
// Handle tool call requests
|
|
476
|
+
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
477
|
+
const { name, arguments: args } = request.params;
|
|
478
|
+
try {
|
|
479
|
+
if (name === "fetch_url") {
|
|
480
|
+
const { url, preferJina = true } = args;
|
|
481
|
+
// Validate URL
|
|
482
|
+
if (!isValidUrl(url)) {
|
|
483
|
+
throw new McpError(ErrorCode.InvalidParams, "Invalid URL format, please provide http or https protocol URL");
|
|
484
|
+
}
|
|
485
|
+
// Fetch web content
|
|
486
|
+
const result = await fetchWebContent(url, preferJina);
|
|
487
|
+
return {
|
|
488
|
+
content: [
|
|
489
|
+
{
|
|
490
|
+
type: "text",
|
|
491
|
+
text: `# ${result.title}\n\n**URL**: ${result.metadata.url}\n**Fetched At**: ${result.metadata.fetchedAt}\n**Content Length**: ${result.metadata.contentLength} characters\n**Method**: ${result.metadata.method}\n\n---\n\n${result.content}`,
|
|
492
|
+
},
|
|
493
|
+
],
|
|
494
|
+
};
|
|
474
495
|
}
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
throw new McpError(ErrorCode.InvalidParams, "Invalid URL format");
|
|
496
|
+
else if (name === "fetch_url_with_jina") {
|
|
497
|
+
const { url } = args;
|
|
498
|
+
if (!isValidUrl(url)) {
|
|
499
|
+
throw new McpError(ErrorCode.InvalidParams, "Invalid URL format");
|
|
500
|
+
}
|
|
501
|
+
const result = await fetchWithJinaReader(url);
|
|
502
|
+
return {
|
|
503
|
+
content: [
|
|
504
|
+
{
|
|
505
|
+
type: "text",
|
|
506
|
+
text: `# ${result.title}\n\n**URL**: ${result.metadata.url}\n**Fetched At**: ${result.metadata.fetchedAt}\n**Content Length**: ${result.metadata.contentLength} characters\n**Method**: Jina Reader\n\n---\n\n${result.content}`,
|
|
507
|
+
},
|
|
508
|
+
],
|
|
509
|
+
};
|
|
490
510
|
}
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
511
|
+
else if (name === "fetch_url_local") {
|
|
512
|
+
const { url } = args;
|
|
513
|
+
if (!isValidUrl(url)) {
|
|
514
|
+
throw new McpError(ErrorCode.InvalidParams, "Invalid URL format");
|
|
515
|
+
}
|
|
516
|
+
const result = await fetchWithLocalParser(url);
|
|
517
|
+
return {
|
|
518
|
+
content: [
|
|
519
|
+
{
|
|
520
|
+
type: "text",
|
|
521
|
+
text: `# ${result.title}\n\n**URL**: ${result.metadata.url}\n**Fetched At**: ${result.metadata.fetchedAt}\n**Content Length**: ${result.metadata.contentLength} characters\n**Method**: Local Parser\n\n---\n\n${result.content}`,
|
|
522
|
+
},
|
|
523
|
+
],
|
|
524
|
+
};
|
|
505
525
|
}
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
526
|
+
else if (name === "fetch_multiple_urls") {
|
|
527
|
+
const { urls, preferJina = true } = args;
|
|
528
|
+
// Validate all URLs
|
|
529
|
+
const invalidUrls = urls.filter(url => !isValidUrl(url));
|
|
530
|
+
if (invalidUrls.length > 0) {
|
|
531
|
+
throw new McpError(ErrorCode.InvalidParams, `The following URLs have invalid format: ${invalidUrls.join(", ")}`);
|
|
532
|
+
}
|
|
533
|
+
// Fetch all URLs concurrently
|
|
534
|
+
const results = await Promise.allSettled(urls.map(url => fetchWebContent(url, preferJina)));
|
|
535
|
+
// Combine results
|
|
536
|
+
let combinedContent = "# Batch URL Content Fetch Results\n\n";
|
|
537
|
+
results.forEach((result, index) => {
|
|
538
|
+
const url = urls[index];
|
|
539
|
+
combinedContent += `## ${index + 1}. ${url}\n\n`;
|
|
540
|
+
if (result.status === "fulfilled") {
|
|
541
|
+
const { title, content, metadata } = result.value;
|
|
542
|
+
combinedContent += `**Title**: ${title}\n`;
|
|
543
|
+
combinedContent += `**Fetched At**: ${metadata.fetchedAt}\n`;
|
|
544
|
+
combinedContent += `**Content Length**: ${metadata.contentLength} characters\n`;
|
|
545
|
+
combinedContent += `**Method**: ${metadata.method}\n\n`;
|
|
546
|
+
combinedContent += `### Content\n\n${content}\n\n`;
|
|
547
|
+
}
|
|
548
|
+
else {
|
|
549
|
+
combinedContent += `**Error**: ${result.reason}\n\n`;
|
|
550
|
+
}
|
|
551
|
+
combinedContent += "---\n\n";
|
|
552
|
+
});
|
|
553
|
+
return {
|
|
554
|
+
content: [
|
|
555
|
+
{
|
|
556
|
+
type: "text",
|
|
557
|
+
text: combinedContent,
|
|
558
|
+
},
|
|
559
|
+
],
|
|
560
|
+
};
|
|
522
561
|
}
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
results.forEach((result, index) => {
|
|
528
|
-
const url = urls[index];
|
|
529
|
-
combinedContent += `## ${index + 1}. ${url}\n\n`;
|
|
530
|
-
if (result.status === "fulfilled") {
|
|
531
|
-
const { title, content, metadata } = result.value;
|
|
532
|
-
combinedContent += `**Title**: ${title}\n`;
|
|
533
|
-
combinedContent += `**Fetched At**: ${metadata.fetchedAt}\n`;
|
|
534
|
-
combinedContent += `**Content Length**: ${metadata.contentLength} characters\n`;
|
|
535
|
-
combinedContent += `**Method**: ${metadata.method}\n\n`;
|
|
536
|
-
combinedContent += `### Content\n\n${content}\n\n`;
|
|
562
|
+
else if (name === "fetch_url_with_browser") {
|
|
563
|
+
const { url } = args;
|
|
564
|
+
if (!isValidUrl(url)) {
|
|
565
|
+
throw new McpError(ErrorCode.InvalidParams, "Invalid URL format");
|
|
537
566
|
}
|
|
538
|
-
|
|
539
|
-
|
|
567
|
+
const result = await fetchWithPlaywright(url);
|
|
568
|
+
return {
|
|
569
|
+
content: [
|
|
570
|
+
{
|
|
571
|
+
type: "text",
|
|
572
|
+
text: `# ${result.title}\n\n**URL**: ${result.metadata.url}\n**Fetched At**: ${result.metadata.fetchedAt}\n**Content Length**: ${result.metadata.contentLength} characters\n**Method**: Playwright Browser\n\n---\n\n${result.content}`,
|
|
573
|
+
},
|
|
574
|
+
],
|
|
575
|
+
};
|
|
576
|
+
}
|
|
577
|
+
else {
|
|
578
|
+
throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`);
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
catch (error) {
|
|
582
|
+
if (error instanceof McpError) {
|
|
583
|
+
throw error;
|
|
584
|
+
}
|
|
585
|
+
throw new McpError(ErrorCode.InternalError, `Tool execution failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
586
|
+
}
|
|
587
|
+
});
|
|
588
|
+
}
|
|
589
|
+
function sendJsonRpcError(res, statusCode, message) {
|
|
590
|
+
res.status(statusCode).json({
|
|
591
|
+
jsonrpc: "2.0",
|
|
592
|
+
error: {
|
|
593
|
+
code: -32000,
|
|
594
|
+
message,
|
|
595
|
+
},
|
|
596
|
+
id: null,
|
|
597
|
+
});
|
|
598
|
+
}
|
|
599
|
+
function getSessionIdFromHeaders(headers) {
|
|
600
|
+
const value = headers["mcp-session-id"];
|
|
601
|
+
if (!value) {
|
|
602
|
+
return undefined;
|
|
603
|
+
}
|
|
604
|
+
return Array.isArray(value) ? value[0] : value;
|
|
605
|
+
}
|
|
606
|
+
function resolveTransportMode() {
|
|
607
|
+
const cliTransportArg = process.argv.find((arg) => arg.startsWith("--transport="));
|
|
608
|
+
const cliTransport = cliTransportArg ? cliTransportArg.split("=", 2)[1] : undefined;
|
|
609
|
+
const legacyHttpFlag = process.argv.includes("--http");
|
|
610
|
+
const mode = (cliTransport ?? process.env.MCP_TRANSPORT ?? (legacyHttpFlag ? "http" : "stdio"))
|
|
611
|
+
.toLowerCase();
|
|
612
|
+
if (mode === "stdio" || mode === "http") {
|
|
613
|
+
return mode;
|
|
614
|
+
}
|
|
615
|
+
throw new Error(`Unsupported transport mode: ${mode}. Use 'stdio' or 'http'.`);
|
|
616
|
+
}
|
|
617
|
+
function resolveLegacySseFlag() {
|
|
618
|
+
const envValue = (process.env.MCP_ENABLE_LEGACY_SSE ?? "").toLowerCase();
|
|
619
|
+
return envValue === "1" || envValue === "true" || process.argv.includes("--legacy-sse");
|
|
620
|
+
}
|
|
621
|
+
async function closeAllSessions() {
|
|
622
|
+
for (const [sessionId, session] of streamableSessions.entries()) {
|
|
623
|
+
try {
|
|
624
|
+
await session.server.close();
|
|
625
|
+
}
|
|
626
|
+
catch (error) {
|
|
627
|
+
console.error(`Failed to close streamable server for session ${sessionId}:`, error);
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
streamableSessions.clear();
|
|
631
|
+
for (const [sessionId, session] of legacySseSessions.entries()) {
|
|
632
|
+
try {
|
|
633
|
+
await session.server.close();
|
|
634
|
+
}
|
|
635
|
+
catch (error) {
|
|
636
|
+
console.error(`Failed to close SSE server for session ${sessionId}:`, error);
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
legacySseSessions.clear();
|
|
640
|
+
}
|
|
641
|
+
async function startStdioServer() {
|
|
642
|
+
const server = createServerInstance();
|
|
643
|
+
const transport = new StdioServerTransport();
|
|
644
|
+
await server.connect(transport);
|
|
645
|
+
console.error("MCP Web Reader started in stdio mode");
|
|
646
|
+
}
|
|
647
|
+
async function startHttpServer() {
|
|
648
|
+
const host = process.env.MCP_HTTP_HOST ?? "127.0.0.1";
|
|
649
|
+
const port = Number.parseInt(process.env.MCP_HTTP_PORT ?? "3000", 10);
|
|
650
|
+
const mcpPath = process.env.MCP_HTTP_PATH ?? "/mcp";
|
|
651
|
+
const enableLegacySse = resolveLegacySseFlag();
|
|
652
|
+
if (!Number.isInteger(port) || port <= 0 || port > 65535) {
|
|
653
|
+
throw new Error(`Invalid MCP_HTTP_PORT: ${process.env.MCP_HTTP_PORT}`);
|
|
654
|
+
}
|
|
655
|
+
const app = createMcpExpressApp({ host });
|
|
656
|
+
app.post(mcpPath, async (req, res) => {
|
|
657
|
+
const sessionId = getSessionIdFromHeaders(req.headers);
|
|
658
|
+
try {
|
|
659
|
+
if (sessionId) {
|
|
660
|
+
const existingSession = streamableSessions.get(sessionId);
|
|
661
|
+
if (!existingSession) {
|
|
662
|
+
sendJsonRpcError(res, 404, "Session not found");
|
|
663
|
+
return;
|
|
540
664
|
}
|
|
541
|
-
|
|
665
|
+
await existingSession.transport.handleRequest(req, res, req.body);
|
|
666
|
+
return;
|
|
667
|
+
}
|
|
668
|
+
if (!isInitializeRequest(req.body)) {
|
|
669
|
+
sendJsonRpcError(res, 400, "Missing session ID; initialize request required");
|
|
670
|
+
return;
|
|
671
|
+
}
|
|
672
|
+
let transport;
|
|
673
|
+
const sessionServer = createServerInstance();
|
|
674
|
+
transport = new StreamableHTTPServerTransport({
|
|
675
|
+
sessionIdGenerator: () => randomUUID(),
|
|
676
|
+
onsessioninitialized: (initializedSessionId) => {
|
|
677
|
+
streamableSessions.set(initializedSessionId, { transport, server: sessionServer });
|
|
678
|
+
console.error(`Streamable HTTP session initialized: ${initializedSessionId}`);
|
|
679
|
+
},
|
|
542
680
|
});
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
},
|
|
549
|
-
],
|
|
681
|
+
transport.onclose = () => {
|
|
682
|
+
const closedSessionId = transport.sessionId;
|
|
683
|
+
if (closedSessionId && streamableSessions.delete(closedSessionId)) {
|
|
684
|
+
console.error(`Streamable HTTP session closed: ${closedSessionId}`);
|
|
685
|
+
}
|
|
550
686
|
};
|
|
687
|
+
await sessionServer.connect(transport);
|
|
688
|
+
await transport.handleRequest(req, res, req.body);
|
|
551
689
|
}
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
if (!
|
|
555
|
-
|
|
690
|
+
catch (error) {
|
|
691
|
+
console.error("Error handling streamable HTTP POST request:", error);
|
|
692
|
+
if (!res.headersSent) {
|
|
693
|
+
sendJsonRpcError(res, 500, "Internal server error");
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
});
|
|
697
|
+
app.get(mcpPath, async (req, res) => {
|
|
698
|
+
const sessionId = getSessionIdFromHeaders(req.headers);
|
|
699
|
+
if (!sessionId) {
|
|
700
|
+
sendJsonRpcError(res, 400, "Missing mcp-session-id header");
|
|
701
|
+
return;
|
|
702
|
+
}
|
|
703
|
+
const session = streamableSessions.get(sessionId);
|
|
704
|
+
if (!session) {
|
|
705
|
+
sendJsonRpcError(res, 404, "Session not found");
|
|
706
|
+
return;
|
|
707
|
+
}
|
|
708
|
+
try {
|
|
709
|
+
await session.transport.handleRequest(req, res);
|
|
710
|
+
}
|
|
711
|
+
catch (error) {
|
|
712
|
+
console.error("Error handling streamable HTTP GET request:", error);
|
|
713
|
+
if (!res.headersSent) {
|
|
714
|
+
sendJsonRpcError(res, 500, "Internal server error");
|
|
556
715
|
}
|
|
557
|
-
const result = await fetchWithPlaywright(url);
|
|
558
|
-
return {
|
|
559
|
-
content: [
|
|
560
|
-
{
|
|
561
|
-
type: "text",
|
|
562
|
-
text: `# ${result.title}\n\n**URL**: ${result.metadata.url}\n**Fetched At**: ${result.metadata.fetchedAt}\n**Content Length**: ${result.metadata.contentLength} characters\n**Method**: Playwright Browser\n\n---\n\n${result.content}`,
|
|
563
|
-
},
|
|
564
|
-
],
|
|
565
|
-
};
|
|
566
716
|
}
|
|
567
|
-
|
|
568
|
-
|
|
717
|
+
});
|
|
718
|
+
app.delete(mcpPath, async (req, res) => {
|
|
719
|
+
const sessionId = getSessionIdFromHeaders(req.headers);
|
|
720
|
+
if (!sessionId) {
|
|
721
|
+
sendJsonRpcError(res, 400, "Missing mcp-session-id header");
|
|
722
|
+
return;
|
|
569
723
|
}
|
|
724
|
+
const session = streamableSessions.get(sessionId);
|
|
725
|
+
if (!session) {
|
|
726
|
+
sendJsonRpcError(res, 404, "Session not found");
|
|
727
|
+
return;
|
|
728
|
+
}
|
|
729
|
+
try {
|
|
730
|
+
await session.transport.handleRequest(req, res);
|
|
731
|
+
}
|
|
732
|
+
catch (error) {
|
|
733
|
+
console.error("Error handling streamable HTTP DELETE request:", error);
|
|
734
|
+
if (!res.headersSent) {
|
|
735
|
+
sendJsonRpcError(res, 500, "Internal server error");
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
});
|
|
739
|
+
if (enableLegacySse) {
|
|
740
|
+
app.get("/sse", async (_req, res) => {
|
|
741
|
+
const transport = new SSEServerTransport("/messages", res);
|
|
742
|
+
const server = createServerInstance();
|
|
743
|
+
legacySseSessions.set(transport.sessionId, { transport, server });
|
|
744
|
+
res.on("close", () => {
|
|
745
|
+
const removed = legacySseSessions.delete(transport.sessionId);
|
|
746
|
+
if (removed) {
|
|
747
|
+
void server.close().catch((error) => {
|
|
748
|
+
console.error("Failed to close legacy SSE session server:", error);
|
|
749
|
+
});
|
|
750
|
+
}
|
|
751
|
+
});
|
|
752
|
+
await server.connect(transport);
|
|
753
|
+
});
|
|
754
|
+
app.post("/messages", async (req, res) => {
|
|
755
|
+
const querySessionId = req.query.sessionId;
|
|
756
|
+
const sessionId = typeof querySessionId === "string"
|
|
757
|
+
? querySessionId
|
|
758
|
+
: Array.isArray(querySessionId) && typeof querySessionId[0] === "string"
|
|
759
|
+
? querySessionId[0]
|
|
760
|
+
: undefined;
|
|
761
|
+
if (!sessionId) {
|
|
762
|
+
sendJsonRpcError(res, 400, "Missing sessionId query parameter");
|
|
763
|
+
return;
|
|
764
|
+
}
|
|
765
|
+
const session = legacySseSessions.get(sessionId);
|
|
766
|
+
if (!session) {
|
|
767
|
+
sendJsonRpcError(res, 404, "Session not found");
|
|
768
|
+
return;
|
|
769
|
+
}
|
|
770
|
+
try {
|
|
771
|
+
await session.transport.handlePostMessage(req, res, req.body);
|
|
772
|
+
}
|
|
773
|
+
catch (error) {
|
|
774
|
+
console.error("Error handling legacy SSE POST request:", error);
|
|
775
|
+
if (!res.headersSent) {
|
|
776
|
+
sendJsonRpcError(res, 500, "Internal server error");
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
});
|
|
570
780
|
}
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
781
|
+
app.get("/healthz", (_req, res) => {
|
|
782
|
+
res.json({
|
|
783
|
+
status: "ok",
|
|
784
|
+
transport: "streamable-http",
|
|
785
|
+
sessions: streamableSessions.size,
|
|
786
|
+
legacySseEnabled: enableLegacySse,
|
|
787
|
+
});
|
|
788
|
+
});
|
|
789
|
+
app.listen(port, host, () => {
|
|
790
|
+
console.error(`MCP Web Reader started in HTTP mode on http://${host}:${port}${mcpPath}`);
|
|
791
|
+
if (enableLegacySse) {
|
|
792
|
+
console.error("Legacy SSE compatibility enabled on /sse and /messages");
|
|
574
793
|
}
|
|
575
|
-
|
|
794
|
+
});
|
|
795
|
+
}
|
|
796
|
+
let isShuttingDown = false;
|
|
797
|
+
async function shutdown(signal) {
|
|
798
|
+
if (isShuttingDown) {
|
|
799
|
+
return;
|
|
576
800
|
}
|
|
577
|
-
|
|
801
|
+
isShuttingDown = true;
|
|
802
|
+
console.error(`Received ${signal}, shutting down MCP Web Reader...`);
|
|
803
|
+
await closeAllSessions();
|
|
804
|
+
await closeBrowser();
|
|
805
|
+
process.exit(0);
|
|
806
|
+
}
|
|
578
807
|
// Start server
|
|
579
808
|
async function main() {
|
|
580
|
-
const
|
|
581
|
-
|
|
582
|
-
|
|
809
|
+
const transportMode = resolveTransportMode();
|
|
810
|
+
if (transportMode === "http") {
|
|
811
|
+
await startHttpServer();
|
|
812
|
+
return;
|
|
813
|
+
}
|
|
814
|
+
await startStdioServer();
|
|
583
815
|
}
|
|
584
816
|
// Graceful shutdown handling
|
|
585
|
-
process.on(
|
|
586
|
-
|
|
587
|
-
await closeBrowser();
|
|
588
|
-
process.exit(0);
|
|
817
|
+
process.on("SIGINT", () => {
|
|
818
|
+
void shutdown("SIGINT");
|
|
589
819
|
});
|
|
590
|
-
process.on(
|
|
591
|
-
|
|
592
|
-
await closeBrowser();
|
|
593
|
-
process.exit(0);
|
|
820
|
+
process.on("SIGTERM", () => {
|
|
821
|
+
void shutdown("SIGTERM");
|
|
594
822
|
});
|
|
595
823
|
main().catch((error) => {
|
|
596
824
|
console.error("Server startup failed:", error);
|
package/package.json
CHANGED
|
@@ -1,46 +1,52 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mcp-web-reader",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.2.0",
|
|
4
4
|
"description": "MCP server for reading web content with Jina Reader and local parser support",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"bin": {
|
|
7
|
-
|
|
7
|
+
"mcp-web-reader": "./dist/index.js"
|
|
8
8
|
},
|
|
9
9
|
"type": "module",
|
|
10
10
|
"scripts": {
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
11
|
+
"build": "tsc",
|
|
12
|
+
"start": "node dist/index.js",
|
|
13
|
+
"start:http": "node dist/index.js --transport=http",
|
|
14
|
+
"dev": "tsc --watch",
|
|
15
|
+
"claude-code": "node dist/index.js",
|
|
16
|
+
"postinstall": "npx playwright install chromium"
|
|
16
17
|
},
|
|
17
18
|
"repository": {
|
|
18
|
-
|
|
19
|
-
|
|
19
|
+
"type": "git",
|
|
20
|
+
"url": "git+https://github.com/Gracker/mcp-web-reader.git"
|
|
20
21
|
},
|
|
21
22
|
"bugs": {
|
|
22
|
-
|
|
23
|
+
"url": "https://github.com/Gracker/mcp-web-reader/issues"
|
|
23
24
|
},
|
|
24
25
|
"homepage": "https://github.com/Gracker/mcp-web-reader#readme",
|
|
25
|
-
"keywords": [
|
|
26
|
+
"keywords": [
|
|
27
|
+
"mcp",
|
|
28
|
+
"claude",
|
|
29
|
+
"web-scraping",
|
|
30
|
+
"jina-reader"
|
|
31
|
+
],
|
|
26
32
|
"author": "Gracker",
|
|
27
33
|
"license": "MIT",
|
|
28
34
|
"files": [
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
35
|
+
"dist",
|
|
36
|
+
"README.md",
|
|
37
|
+
"LICENSE"
|
|
32
38
|
],
|
|
33
39
|
"dependencies": {
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
40
|
+
"@modelcontextprotocol/sdk": "^1.26.0",
|
|
41
|
+
"jsdom": "^24.0.0",
|
|
42
|
+
"node-fetch": "^3.3.2",
|
|
43
|
+
"playwright": "^1.40.0",
|
|
44
|
+
"turndown": "^7.1.3"
|
|
39
45
|
},
|
|
40
46
|
"devDependencies": {
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
47
|
+
"@types/jsdom": "^21.1.6",
|
|
48
|
+
"@types/node": "^20.0.0",
|
|
49
|
+
"@types/turndown": "^5.0.4",
|
|
50
|
+
"typescript": "^5.3.3"
|
|
45
51
|
}
|
|
46
|
-
|
|
52
|
+
}
|