mcp-web-reader 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +24 -0
  2. package/dist/index.js +422 -194
  3. package/package.json +30 -24
package/README.md CHANGED
@@ -11,6 +11,7 @@ A powerful MCP (Model Context Protocol) server that enables Claude and other LLM
11
11
  - 🌐 **Bypass restrictions**: Cloudflare, CAPTCHAs, access controls
12
12
  - 📦 **Batch processing**: Fetch multiple URLs simultaneously
13
13
  - 📝 **Markdown output**: Automatic conversion to clean Markdown
14
+ - 🔌 **Transport compatibility**: stdio + Streamable HTTP (optional legacy SSE compatibility mode)
14
15
 
15
16
  ## Installation
16
17
 
@@ -61,6 +62,28 @@ claude mcp add web-reader -- mcp-web-reader
61
62
  claude mcp list
62
63
  ```
63
64
 
65
+ ### Streamable HTTP (Remote Deployment)
66
+
67
+ Start server in Streamable HTTP mode:
68
+
69
+ ```bash
70
+ MCP_TRANSPORT=http MCP_HTTP_HOST=0.0.0.0 MCP_HTTP_PORT=3000 npm run start:http
71
+ ```
72
+
73
+ Optional environment variables:
74
+
75
+ - `MCP_HTTP_PATH` (default: `/mcp`)
76
+ - `MCP_ENABLE_LEGACY_SSE=true` to expose deprecated `/sse` + `/messages` endpoints
77
+
78
+ Codex MCP config (HTTP):
79
+
80
+ ```toml
81
+ [mcp_servers.web-reader]
82
+ type = "http"
83
+ url = "https://your-domain.com/mcp"
84
+ bearer_token_env_var = "WEB_READER_TOKEN"
85
+ ```
86
+
64
87
  ## Usage
65
88
 
66
89
  In Claude:
@@ -102,6 +125,7 @@ Auto-detects restrictions and switches to browser for:
102
125
  npm run dev # Development with auto-rebuild
103
126
  npm run build # Build production version
104
127
  npm start # Test run
128
+ npm run start:http # Run Streamable HTTP server
105
129
  ```
106
130
 
107
131
  ## License
package/dist/index.js CHANGED
@@ -1,19 +1,14 @@
1
1
  import { Server } from "@modelcontextprotocol/sdk/server/index.js";
2
2
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3
- import { CallToolRequestSchema, ErrorCode, ListToolsRequestSchema, McpError, } from "@modelcontextprotocol/sdk/types.js";
3
+ import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
4
+ import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js";
5
+ import { createMcpExpressApp } from "@modelcontextprotocol/sdk/server/express.js";
6
+ import { CallToolRequestSchema, ErrorCode, ListToolsRequestSchema, McpError, isInitializeRequest, } from "@modelcontextprotocol/sdk/types.js";
4
7
  import fetch from "node-fetch";
5
8
  import { JSDOM } from "jsdom";
6
9
  import TurndownService from "turndown";
7
10
  import { chromium } from "playwright";
8
- // Create server instance
9
- const server = new Server({
10
- name: "web-reader",
11
- version: "2.0.0",
12
- }, {
13
- capabilities: {
14
- tools: {},
15
- },
16
- });
11
+ import { randomUUID } from "node:crypto";
17
12
  // Initialize Turndown service (convert HTML to Markdown)
18
13
  const turndownService = new TurndownService({
19
14
  headingStyle: "atx",
@@ -371,226 +366,459 @@ async function fetchWebContent(url, preferJina = true) {
371
366
  }
372
367
  }
373
368
  }
374
- // Handle tool list requests
375
- server.setRequestHandler(ListToolsRequestSchema, async () => {
376
- return {
377
- tools: [
378
- {
379
- name: "fetch_url",
380
- description: "Fetch web content from specified URL and convert to Markdown format. Uses Jina Reader by default, automatically falls back to local parser on failure",
381
- inputSchema: {
382
- type: "object",
383
- properties: {
384
- url: {
385
- type: "string",
386
- description: "Webpage URL to fetch (must be http or https protocol)",
387
- },
388
- preferJina: {
389
- type: "boolean",
390
- description: "Whether to prioritize Jina Reader (default: true)",
391
- default: true,
369
+ const streamableSessions = new Map();
370
+ const legacySseSessions = new Map();
371
+ function createServerInstance() {
372
+ const server = new Server({
373
+ name: "web-reader",
374
+ version: "2.1.0",
375
+ }, {
376
+ capabilities: {
377
+ tools: {},
378
+ },
379
+ });
380
+ registerServerHandlers(server);
381
+ return server;
382
+ }
383
+ function registerServerHandlers(server) {
384
+ // Handle tool list requests
385
+ server.setRequestHandler(ListToolsRequestSchema, async () => {
386
+ return {
387
+ tools: [
388
+ {
389
+ name: "fetch_url",
390
+ description: "Fetch web content from specified URL and convert to Markdown format. Uses Jina Reader by default, automatically falls back to local parser on failure",
391
+ inputSchema: {
392
+ type: "object",
393
+ properties: {
394
+ url: {
395
+ type: "string",
396
+ description: "Webpage URL to fetch (must be http or https protocol)",
397
+ },
398
+ preferJina: {
399
+ type: "boolean",
400
+ description: "Whether to prioritize Jina Reader (default: true)",
401
+ default: true,
402
+ },
392
403
  },
404
+ required: ["url"],
393
405
  },
394
- required: ["url"],
395
406
  },
396
- },
397
- {
398
- name: "fetch_multiple_urls",
399
- description: "Batch fetch web content from multiple URLs",
400
- inputSchema: {
401
- type: "object",
402
- properties: {
403
- urls: {
404
- type: "array",
405
- items: {
406
- type: "string",
407
+ {
408
+ name: "fetch_multiple_urls",
409
+ description: "Batch fetch web content from multiple URLs",
410
+ inputSchema: {
411
+ type: "object",
412
+ properties: {
413
+ urls: {
414
+ type: "array",
415
+ items: {
416
+ type: "string",
417
+ },
418
+ description: "List of webpage URLs to fetch",
419
+ maxItems: 10, // Limit to 10 URLs
420
+ },
421
+ preferJina: {
422
+ type: "boolean",
423
+ description: "Whether to prioritize Jina Reader (default: true)",
424
+ default: true,
407
425
  },
408
- description: "List of webpage URLs to fetch",
409
- maxItems: 10, // Limit to 10 URLs
410
- },
411
- preferJina: {
412
- type: "boolean",
413
- description: "Whether to prioritize Jina Reader (default: true)",
414
- default: true,
415
426
  },
427
+ required: ["urls"],
416
428
  },
417
- required: ["urls"],
418
429
  },
419
- },
420
- {
421
- name: "fetch_url_with_jina",
422
- description: "Force fetch using Jina Reader (suitable for complex webpages)",
423
- inputSchema: {
424
- type: "object",
425
- properties: {
426
- url: {
427
- type: "string",
428
- description: "Webpage URL to fetch",
430
+ {
431
+ name: "fetch_url_with_jina",
432
+ description: "Force fetch using Jina Reader (suitable for complex webpages)",
433
+ inputSchema: {
434
+ type: "object",
435
+ properties: {
436
+ url: {
437
+ type: "string",
438
+ description: "Webpage URL to fetch",
439
+ },
429
440
  },
441
+ required: ["url"],
430
442
  },
431
- required: ["url"],
432
443
  },
433
- },
434
- {
435
- name: "fetch_url_local",
436
- description: "Force fetch using local parser (suitable for simple webpages or when Jina is unavailable)",
437
- inputSchema: {
438
- type: "object",
439
- properties: {
440
- url: {
441
- type: "string",
442
- description: "Webpage URL to fetch",
444
+ {
445
+ name: "fetch_url_local",
446
+ description: "Force fetch using local parser (suitable for simple webpages or when Jina is unavailable)",
447
+ inputSchema: {
448
+ type: "object",
449
+ properties: {
450
+ url: {
451
+ type: "string",
452
+ description: "Webpage URL to fetch",
453
+ },
443
454
  },
455
+ required: ["url"],
444
456
  },
445
- required: ["url"],
446
457
  },
447
- },
448
- {
449
- name: "fetch_url_with_browser",
450
- description: "Force fetch using Playwright browser (suitable for websites with access restrictions, such as Cloudflare protection, CAPTCHA, etc.)",
451
- inputSchema: {
452
- type: "object",
453
- properties: {
454
- url: {
455
- type: "string",
456
- description: "Webpage URL to fetch",
458
+ {
459
+ name: "fetch_url_with_browser",
460
+ description: "Force fetch using Playwright browser (suitable for websites with access restrictions, such as Cloudflare protection, CAPTCHA, etc.)",
461
+ inputSchema: {
462
+ type: "object",
463
+ properties: {
464
+ url: {
465
+ type: "string",
466
+ description: "Webpage URL to fetch",
467
+ },
457
468
  },
469
+ required: ["url"],
458
470
  },
459
- required: ["url"],
460
471
  },
461
- },
462
- ],
463
- };
464
- });
465
- // Handle tool call requests
466
- server.setRequestHandler(CallToolRequestSchema, async (request) => {
467
- const { name, arguments: args } = request.params;
468
- try {
469
- if (name === "fetch_url") {
470
- const { url, preferJina = true } = args;
471
- // Validate URL
472
- if (!isValidUrl(url)) {
473
- throw new McpError(ErrorCode.InvalidParams, "Invalid URL format, please provide http or https protocol URL");
472
+ ],
473
+ };
474
+ });
475
+ // Handle tool call requests
476
+ server.setRequestHandler(CallToolRequestSchema, async (request) => {
477
+ const { name, arguments: args } = request.params;
478
+ try {
479
+ if (name === "fetch_url") {
480
+ const { url, preferJina = true } = args;
481
+ // Validate URL
482
+ if (!isValidUrl(url)) {
483
+ throw new McpError(ErrorCode.InvalidParams, "Invalid URL format, please provide http or https protocol URL");
484
+ }
485
+ // Fetch web content
486
+ const result = await fetchWebContent(url, preferJina);
487
+ return {
488
+ content: [
489
+ {
490
+ type: "text",
491
+ text: `# ${result.title}\n\n**URL**: ${result.metadata.url}\n**Fetched At**: ${result.metadata.fetchedAt}\n**Content Length**: ${result.metadata.contentLength} characters\n**Method**: ${result.metadata.method}\n\n---\n\n${result.content}`,
492
+ },
493
+ ],
494
+ };
474
495
  }
475
- // Fetch web content
476
- const result = await fetchWebContent(url, preferJina);
477
- return {
478
- content: [
479
- {
480
- type: "text",
481
- text: `# ${result.title}\n\n**URL**: ${result.metadata.url}\n**Fetched At**: ${result.metadata.fetchedAt}\n**Content Length**: ${result.metadata.contentLength} characters\n**Method**: ${result.metadata.method}\n\n---\n\n${result.content}`,
482
- },
483
- ],
484
- };
485
- }
486
- else if (name === "fetch_url_with_jina") {
487
- const { url } = args;
488
- if (!isValidUrl(url)) {
489
- throw new McpError(ErrorCode.InvalidParams, "Invalid URL format");
496
+ else if (name === "fetch_url_with_jina") {
497
+ const { url } = args;
498
+ if (!isValidUrl(url)) {
499
+ throw new McpError(ErrorCode.InvalidParams, "Invalid URL format");
500
+ }
501
+ const result = await fetchWithJinaReader(url);
502
+ return {
503
+ content: [
504
+ {
505
+ type: "text",
506
+ text: `# ${result.title}\n\n**URL**: ${result.metadata.url}\n**Fetched At**: ${result.metadata.fetchedAt}\n**Content Length**: ${result.metadata.contentLength} characters\n**Method**: Jina Reader\n\n---\n\n${result.content}`,
507
+ },
508
+ ],
509
+ };
490
510
  }
491
- const result = await fetchWithJinaReader(url);
492
- return {
493
- content: [
494
- {
495
- type: "text",
496
- text: `# ${result.title}\n\n**URL**: ${result.metadata.url}\n**Fetched At**: ${result.metadata.fetchedAt}\n**Content Length**: ${result.metadata.contentLength} characters\n**Method**: Jina Reader\n\n---\n\n${result.content}`,
497
- },
498
- ],
499
- };
500
- }
501
- else if (name === "fetch_url_local") {
502
- const { url } = args;
503
- if (!isValidUrl(url)) {
504
- throw new McpError(ErrorCode.InvalidParams, "Invalid URL format");
511
+ else if (name === "fetch_url_local") {
512
+ const { url } = args;
513
+ if (!isValidUrl(url)) {
514
+ throw new McpError(ErrorCode.InvalidParams, "Invalid URL format");
515
+ }
516
+ const result = await fetchWithLocalParser(url);
517
+ return {
518
+ content: [
519
+ {
520
+ type: "text",
521
+ text: `# ${result.title}\n\n**URL**: ${result.metadata.url}\n**Fetched At**: ${result.metadata.fetchedAt}\n**Content Length**: ${result.metadata.contentLength} characters\n**Method**: Local Parser\n\n---\n\n${result.content}`,
522
+ },
523
+ ],
524
+ };
505
525
  }
506
- const result = await fetchWithLocalParser(url);
507
- return {
508
- content: [
509
- {
510
- type: "text",
511
- text: `# ${result.title}\n\n**URL**: ${result.metadata.url}\n**Fetched At**: ${result.metadata.fetchedAt}\n**Content Length**: ${result.metadata.contentLength} characters\n**Method**: Local Parser\n\n---\n\n${result.content}`,
512
- },
513
- ],
514
- };
515
- }
516
- else if (name === "fetch_multiple_urls") {
517
- const { urls, preferJina = true } = args;
518
- // Validate all URLs
519
- const invalidUrls = urls.filter(url => !isValidUrl(url));
520
- if (invalidUrls.length > 0) {
521
- throw new McpError(ErrorCode.InvalidParams, `The following URLs have invalid format: ${invalidUrls.join(", ")}`);
526
+ else if (name === "fetch_multiple_urls") {
527
+ const { urls, preferJina = true } = args;
528
+ // Validate all URLs
529
+ const invalidUrls = urls.filter(url => !isValidUrl(url));
530
+ if (invalidUrls.length > 0) {
531
+ throw new McpError(ErrorCode.InvalidParams, `The following URLs have invalid format: ${invalidUrls.join(", ")}`);
532
+ }
533
+ // Fetch all URLs concurrently
534
+ const results = await Promise.allSettled(urls.map(url => fetchWebContent(url, preferJina)));
535
+ // Combine results
536
+ let combinedContent = "# Batch URL Content Fetch Results\n\n";
537
+ results.forEach((result, index) => {
538
+ const url = urls[index];
539
+ combinedContent += `## ${index + 1}. ${url}\n\n`;
540
+ if (result.status === "fulfilled") {
541
+ const { title, content, metadata } = result.value;
542
+ combinedContent += `**Title**: ${title}\n`;
543
+ combinedContent += `**Fetched At**: ${metadata.fetchedAt}\n`;
544
+ combinedContent += `**Content Length**: ${metadata.contentLength} characters\n`;
545
+ combinedContent += `**Method**: ${metadata.method}\n\n`;
546
+ combinedContent += `### Content\n\n${content}\n\n`;
547
+ }
548
+ else {
549
+ combinedContent += `**Error**: ${result.reason}\n\n`;
550
+ }
551
+ combinedContent += "---\n\n";
552
+ });
553
+ return {
554
+ content: [
555
+ {
556
+ type: "text",
557
+ text: combinedContent,
558
+ },
559
+ ],
560
+ };
522
561
  }
523
- // Fetch all URLs concurrently
524
- const results = await Promise.allSettled(urls.map(url => fetchWebContent(url, preferJina)));
525
- // Combine results
526
- let combinedContent = "# Batch URL Content Fetch Results\n\n";
527
- results.forEach((result, index) => {
528
- const url = urls[index];
529
- combinedContent += `## ${index + 1}. ${url}\n\n`;
530
- if (result.status === "fulfilled") {
531
- const { title, content, metadata } = result.value;
532
- combinedContent += `**Title**: ${title}\n`;
533
- combinedContent += `**Fetched At**: ${metadata.fetchedAt}\n`;
534
- combinedContent += `**Content Length**: ${metadata.contentLength} characters\n`;
535
- combinedContent += `**Method**: ${metadata.method}\n\n`;
536
- combinedContent += `### Content\n\n${content}\n\n`;
562
+ else if (name === "fetch_url_with_browser") {
563
+ const { url } = args;
564
+ if (!isValidUrl(url)) {
565
+ throw new McpError(ErrorCode.InvalidParams, "Invalid URL format");
537
566
  }
538
- else {
539
- combinedContent += `**Error**: ${result.reason}\n\n`;
567
+ const result = await fetchWithPlaywright(url);
568
+ return {
569
+ content: [
570
+ {
571
+ type: "text",
572
+ text: `# ${result.title}\n\n**URL**: ${result.metadata.url}\n**Fetched At**: ${result.metadata.fetchedAt}\n**Content Length**: ${result.metadata.contentLength} characters\n**Method**: Playwright Browser\n\n---\n\n${result.content}`,
573
+ },
574
+ ],
575
+ };
576
+ }
577
+ else {
578
+ throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`);
579
+ }
580
+ }
581
+ catch (error) {
582
+ if (error instanceof McpError) {
583
+ throw error;
584
+ }
585
+ throw new McpError(ErrorCode.InternalError, `Tool execution failed: ${error instanceof Error ? error.message : String(error)}`);
586
+ }
587
+ });
588
+ }
589
+ function sendJsonRpcError(res, statusCode, message) {
590
+ res.status(statusCode).json({
591
+ jsonrpc: "2.0",
592
+ error: {
593
+ code: -32000,
594
+ message,
595
+ },
596
+ id: null,
597
+ });
598
+ }
599
+ function getSessionIdFromHeaders(headers) {
600
+ const value = headers["mcp-session-id"];
601
+ if (!value) {
602
+ return undefined;
603
+ }
604
+ return Array.isArray(value) ? value[0] : value;
605
+ }
606
+ function resolveTransportMode() {
607
+ const cliTransportArg = process.argv.find((arg) => arg.startsWith("--transport="));
608
+ const cliTransport = cliTransportArg ? cliTransportArg.split("=", 2)[1] : undefined;
609
+ const legacyHttpFlag = process.argv.includes("--http");
610
+ const mode = (cliTransport ?? process.env.MCP_TRANSPORT ?? (legacyHttpFlag ? "http" : "stdio"))
611
+ .toLowerCase();
612
+ if (mode === "stdio" || mode === "http") {
613
+ return mode;
614
+ }
615
+ throw new Error(`Unsupported transport mode: ${mode}. Use 'stdio' or 'http'.`);
616
+ }
617
+ function resolveLegacySseFlag() {
618
+ const envValue = (process.env.MCP_ENABLE_LEGACY_SSE ?? "").toLowerCase();
619
+ return envValue === "1" || envValue === "true" || process.argv.includes("--legacy-sse");
620
+ }
621
+ async function closeAllSessions() {
622
+ for (const [sessionId, session] of streamableSessions.entries()) {
623
+ try {
624
+ await session.server.close();
625
+ }
626
+ catch (error) {
627
+ console.error(`Failed to close streamable server for session ${sessionId}:`, error);
628
+ }
629
+ }
630
+ streamableSessions.clear();
631
+ for (const [sessionId, session] of legacySseSessions.entries()) {
632
+ try {
633
+ await session.server.close();
634
+ }
635
+ catch (error) {
636
+ console.error(`Failed to close SSE server for session ${sessionId}:`, error);
637
+ }
638
+ }
639
+ legacySseSessions.clear();
640
+ }
641
+ async function startStdioServer() {
642
+ const server = createServerInstance();
643
+ const transport = new StdioServerTransport();
644
+ await server.connect(transport);
645
+ console.error("MCP Web Reader started in stdio mode");
646
+ }
647
+ async function startHttpServer() {
648
+ const host = process.env.MCP_HTTP_HOST ?? "127.0.0.1";
649
+ const port = Number.parseInt(process.env.MCP_HTTP_PORT ?? "3000", 10);
650
+ const mcpPath = process.env.MCP_HTTP_PATH ?? "/mcp";
651
+ const enableLegacySse = resolveLegacySseFlag();
652
+ if (!Number.isInteger(port) || port <= 0 || port > 65535) {
653
+ throw new Error(`Invalid MCP_HTTP_PORT: ${process.env.MCP_HTTP_PORT}`);
654
+ }
655
+ const app = createMcpExpressApp({ host });
656
+ app.post(mcpPath, async (req, res) => {
657
+ const sessionId = getSessionIdFromHeaders(req.headers);
658
+ try {
659
+ if (sessionId) {
660
+ const existingSession = streamableSessions.get(sessionId);
661
+ if (!existingSession) {
662
+ sendJsonRpcError(res, 404, "Session not found");
663
+ return;
540
664
  }
541
- combinedContent += "---\n\n";
665
+ await existingSession.transport.handleRequest(req, res, req.body);
666
+ return;
667
+ }
668
+ if (!isInitializeRequest(req.body)) {
669
+ sendJsonRpcError(res, 400, "Missing session ID; initialize request required");
670
+ return;
671
+ }
672
+ let transport;
673
+ const sessionServer = createServerInstance();
674
+ transport = new StreamableHTTPServerTransport({
675
+ sessionIdGenerator: () => randomUUID(),
676
+ onsessioninitialized: (initializedSessionId) => {
677
+ streamableSessions.set(initializedSessionId, { transport, server: sessionServer });
678
+ console.error(`Streamable HTTP session initialized: ${initializedSessionId}`);
679
+ },
542
680
  });
543
- return {
544
- content: [
545
- {
546
- type: "text",
547
- text: combinedContent,
548
- },
549
- ],
681
+ transport.onclose = () => {
682
+ const closedSessionId = transport.sessionId;
683
+ if (closedSessionId && streamableSessions.delete(closedSessionId)) {
684
+ console.error(`Streamable HTTP session closed: ${closedSessionId}`);
685
+ }
550
686
  };
687
+ await sessionServer.connect(transport);
688
+ await transport.handleRequest(req, res, req.body);
551
689
  }
552
- else if (name === "fetch_url_with_browser") {
553
- const { url } = args;
554
- if (!isValidUrl(url)) {
555
- throw new McpError(ErrorCode.InvalidParams, "Invalid URL format");
690
+ catch (error) {
691
+ console.error("Error handling streamable HTTP POST request:", error);
692
+ if (!res.headersSent) {
693
+ sendJsonRpcError(res, 500, "Internal server error");
694
+ }
695
+ }
696
+ });
697
+ app.get(mcpPath, async (req, res) => {
698
+ const sessionId = getSessionIdFromHeaders(req.headers);
699
+ if (!sessionId) {
700
+ sendJsonRpcError(res, 400, "Missing mcp-session-id header");
701
+ return;
702
+ }
703
+ const session = streamableSessions.get(sessionId);
704
+ if (!session) {
705
+ sendJsonRpcError(res, 404, "Session not found");
706
+ return;
707
+ }
708
+ try {
709
+ await session.transport.handleRequest(req, res);
710
+ }
711
+ catch (error) {
712
+ console.error("Error handling streamable HTTP GET request:", error);
713
+ if (!res.headersSent) {
714
+ sendJsonRpcError(res, 500, "Internal server error");
556
715
  }
557
- const result = await fetchWithPlaywright(url);
558
- return {
559
- content: [
560
- {
561
- type: "text",
562
- text: `# ${result.title}\n\n**URL**: ${result.metadata.url}\n**Fetched At**: ${result.metadata.fetchedAt}\n**Content Length**: ${result.metadata.contentLength} characters\n**Method**: Playwright Browser\n\n---\n\n${result.content}`,
563
- },
564
- ],
565
- };
566
716
  }
567
- else {
568
- throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`);
717
+ });
718
+ app.delete(mcpPath, async (req, res) => {
719
+ const sessionId = getSessionIdFromHeaders(req.headers);
720
+ if (!sessionId) {
721
+ sendJsonRpcError(res, 400, "Missing mcp-session-id header");
722
+ return;
569
723
  }
724
+ const session = streamableSessions.get(sessionId);
725
+ if (!session) {
726
+ sendJsonRpcError(res, 404, "Session not found");
727
+ return;
728
+ }
729
+ try {
730
+ await session.transport.handleRequest(req, res);
731
+ }
732
+ catch (error) {
733
+ console.error("Error handling streamable HTTP DELETE request:", error);
734
+ if (!res.headersSent) {
735
+ sendJsonRpcError(res, 500, "Internal server error");
736
+ }
737
+ }
738
+ });
739
+ if (enableLegacySse) {
740
+ app.get("/sse", async (_req, res) => {
741
+ const transport = new SSEServerTransport("/messages", res);
742
+ const server = createServerInstance();
743
+ legacySseSessions.set(transport.sessionId, { transport, server });
744
+ res.on("close", () => {
745
+ const removed = legacySseSessions.delete(transport.sessionId);
746
+ if (removed) {
747
+ void server.close().catch((error) => {
748
+ console.error("Failed to close legacy SSE session server:", error);
749
+ });
750
+ }
751
+ });
752
+ await server.connect(transport);
753
+ });
754
+ app.post("/messages", async (req, res) => {
755
+ const querySessionId = req.query.sessionId;
756
+ const sessionId = typeof querySessionId === "string"
757
+ ? querySessionId
758
+ : Array.isArray(querySessionId) && typeof querySessionId[0] === "string"
759
+ ? querySessionId[0]
760
+ : undefined;
761
+ if (!sessionId) {
762
+ sendJsonRpcError(res, 400, "Missing sessionId query parameter");
763
+ return;
764
+ }
765
+ const session = legacySseSessions.get(sessionId);
766
+ if (!session) {
767
+ sendJsonRpcError(res, 404, "Session not found");
768
+ return;
769
+ }
770
+ try {
771
+ await session.transport.handlePostMessage(req, res, req.body);
772
+ }
773
+ catch (error) {
774
+ console.error("Error handling legacy SSE POST request:", error);
775
+ if (!res.headersSent) {
776
+ sendJsonRpcError(res, 500, "Internal server error");
777
+ }
778
+ }
779
+ });
570
780
  }
571
- catch (error) {
572
- if (error instanceof McpError) {
573
- throw error;
781
+ app.get("/healthz", (_req, res) => {
782
+ res.json({
783
+ status: "ok",
784
+ transport: "streamable-http",
785
+ sessions: streamableSessions.size,
786
+ legacySseEnabled: enableLegacySse,
787
+ });
788
+ });
789
+ app.listen(port, host, () => {
790
+ console.error(`MCP Web Reader started in HTTP mode on http://${host}:${port}${mcpPath}`);
791
+ if (enableLegacySse) {
792
+ console.error("Legacy SSE compatibility enabled on /sse and /messages");
574
793
  }
575
- throw new McpError(ErrorCode.InternalError, `Tool execution failed: ${error instanceof Error ? error.message : String(error)}`);
794
+ });
795
+ }
796
+ let isShuttingDown = false;
797
+ async function shutdown(signal) {
798
+ if (isShuttingDown) {
799
+ return;
576
800
  }
577
- });
801
+ isShuttingDown = true;
802
+ console.error(`Received ${signal}, shutting down MCP Web Reader...`);
803
+ await closeAllSessions();
804
+ await closeBrowser();
805
+ process.exit(0);
806
+ }
578
807
  // Start server
579
808
  async function main() {
580
- const transport = new StdioServerTransport();
581
- await server.connect(transport);
582
- console.error("MCP Web Reader v2.0 started (with Jina Reader + Playwright support)");
809
+ const transportMode = resolveTransportMode();
810
+ if (transportMode === "http") {
811
+ await startHttpServer();
812
+ return;
813
+ }
814
+ await startStdioServer();
583
815
  }
584
816
  // Graceful shutdown handling
585
- process.on('SIGINT', async () => {
586
- console.error("Received SIGINT signal, closing browser...");
587
- await closeBrowser();
588
- process.exit(0);
817
+ process.on("SIGINT", () => {
818
+ void shutdown("SIGINT");
589
819
  });
590
- process.on('SIGTERM', async () => {
591
- console.error("Received SIGTERM signal, closing browser...");
592
- await closeBrowser();
593
- process.exit(0);
820
+ process.on("SIGTERM", () => {
821
+ void shutdown("SIGTERM");
594
822
  });
595
823
  main().catch((error) => {
596
824
  console.error("Server startup failed:", error);
package/package.json CHANGED
@@ -1,46 +1,52 @@
1
1
  {
2
2
  "name": "mcp-web-reader",
3
- "version": "2.1.0",
3
+ "version": "2.2.0",
4
4
  "description": "MCP server for reading web content with Jina Reader and local parser support",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
7
- "mcp-web-reader": "./dist/index.js"
7
+ "mcp-web-reader": "./dist/index.js"
8
8
  },
9
9
  "type": "module",
10
10
  "scripts": {
11
- "build": "tsc",
12
- "start": "node dist/index.js",
13
- "dev": "tsc --watch",
14
- "claude-code": "node dist/index.js",
15
- "postinstall": "npx playwright install chromium"
11
+ "build": "tsc",
12
+ "start": "node dist/index.js",
13
+ "start:http": "node dist/index.js --transport=http",
14
+ "dev": "tsc --watch",
15
+ "claude-code": "node dist/index.js",
16
+ "postinstall": "npx playwright install chromium"
16
17
  },
17
18
  "repository": {
18
- "type": "git",
19
- "url": "git+https://github.com/Gracker/mcp-web-reader.git"
19
+ "type": "git",
20
+ "url": "git+https://github.com/Gracker/mcp-web-reader.git"
20
21
  },
21
22
  "bugs": {
22
- "url": "https://github.com/Gracker/mcp-web-reader/issues"
23
+ "url": "https://github.com/Gracker/mcp-web-reader/issues"
23
24
  },
24
25
  "homepage": "https://github.com/Gracker/mcp-web-reader#readme",
25
- "keywords": ["mcp", "claude", "web-scraping", "jina-reader"],
26
+ "keywords": [
27
+ "mcp",
28
+ "claude",
29
+ "web-scraping",
30
+ "jina-reader"
31
+ ],
26
32
  "author": "Gracker",
27
33
  "license": "MIT",
28
34
  "files": [
29
- "dist",
30
- "README.md",
31
- "LICENSE"
35
+ "dist",
36
+ "README.md",
37
+ "LICENSE"
32
38
  ],
33
39
  "dependencies": {
34
- "@modelcontextprotocol/sdk": "^0.5.0",
35
- "node-fetch": "^3.3.2",
36
- "jsdom": "^24.0.0",
37
- "turndown": "^7.1.3",
38
- "playwright": "^1.40.0"
40
+ "@modelcontextprotocol/sdk": "^1.26.0",
41
+ "jsdom": "^24.0.0",
42
+ "node-fetch": "^3.3.2",
43
+ "playwright": "^1.40.0",
44
+ "turndown": "^7.1.3"
39
45
  },
40
46
  "devDependencies": {
41
- "@types/node": "^20.0.0",
42
- "@types/jsdom": "^21.1.6",
43
- "@types/turndown": "^5.0.4",
44
- "typescript": "^5.3.3"
47
+ "@types/jsdom": "^21.1.6",
48
+ "@types/node": "^20.0.0",
49
+ "@types/turndown": "^5.0.4",
50
+ "typescript": "^5.3.3"
45
51
  }
46
- }
52
+ }