@just-every/mcp-read-website-fast 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,7 +40,7 @@ export class DiskCache {
40
40
  url,
41
41
  markdown,
42
42
  timestamp: Date.now(),
43
- title
43
+ title,
44
44
  };
45
45
  const path = this.getCachePath(url);
46
46
  await writeFile(path, JSON.stringify(entry, null, 2));
@@ -1,24 +1,25 @@
1
1
  import { fetch } from 'undici';
2
2
  export async function fetchStream(url, options = {}) {
3
- const { userAgent = 'MCP/0.1 (+https://github.com/just-every/mcp-read-website-fast)', timeout = 30000, maxRedirections = 5 } = options;
3
+ const { userAgent = 'MCP/0.1 (+https://github.com/just-every/mcp-read-website-fast)', timeout = 30000, maxRedirections = 5, } = options;
4
4
  try {
5
5
  const response = await fetch(url, {
6
6
  headers: {
7
7
  'User-Agent': userAgent,
8
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
8
+ Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
9
9
  'Accept-Language': 'en-US,en;q=0.5',
10
- 'DNT': '1',
11
- 'Connection': 'keep-alive',
12
- 'Upgrade-Insecure-Requests': '1'
10
+ DNT: '1',
11
+ Connection: 'keep-alive',
12
+ 'Upgrade-Insecure-Requests': '1',
13
13
  },
14
14
  redirect: maxRedirections > 0 ? 'follow' : 'manual',
15
- signal: AbortSignal.timeout(timeout)
15
+ signal: AbortSignal.timeout(timeout),
16
16
  });
17
17
  if (!response.ok) {
18
18
  throw new Error(`HTTP ${response.status} for ${url}`);
19
19
  }
20
20
  const contentType = response.headers.get('content-type');
21
- if (contentType && !contentType.includes('text/html') &&
21
+ if (contentType &&
22
+ !contentType.includes('text/html') &&
22
23
  !contentType.includes('application/xhtml+xml')) {
23
24
  throw new Error(`Non-HTML content type: ${contentType} for ${url}`);
24
25
  }
@@ -21,7 +21,7 @@ export class CrawlQueue {
21
21
  sameOriginOnly: options.sameOriginOnly ?? true,
22
22
  userAgent: options.userAgent ?? 'MCP/0.1',
23
23
  cacheDir: options.cacheDir ?? '.cache',
24
- timeout: options.timeout ?? 30000
24
+ timeout: options.timeout ?? 30000,
25
25
  };
26
26
  this.limit = pLimit(this.options.maxConcurrency);
27
27
  this.cache = new DiskCache(this.options.cacheDir);
@@ -60,7 +60,7 @@ export class CrawlQueue {
60
60
  this.results.push({
61
61
  url: normalizedUrl,
62
62
  markdown: cached.markdown,
63
- title: cached.title
63
+ title: cached.title,
64
64
  });
65
65
  return;
66
66
  }
@@ -70,7 +70,7 @@ export class CrawlQueue {
70
70
  this.results.push({
71
71
  url: normalizedUrl,
72
72
  markdown: '',
73
- error: 'Blocked by robots.txt'
73
+ error: 'Blocked by robots.txt',
74
74
  });
75
75
  return;
76
76
  }
@@ -81,13 +81,13 @@ export class CrawlQueue {
81
81
  }
82
82
  const html = await fetchStream(normalizedUrl, {
83
83
  userAgent: this.options.userAgent,
84
- timeout: this.options.timeout
84
+ timeout: this.options.timeout,
85
85
  });
86
86
  if (!html || html.trim().length === 0) {
87
87
  this.results.push({
88
88
  url: normalizedUrl,
89
89
  markdown: '',
90
- error: 'Empty response from server'
90
+ error: 'Empty response from server',
91
91
  });
92
92
  return;
93
93
  }
@@ -97,15 +97,21 @@ export class CrawlQueue {
97
97
  this.results.push({
98
98
  url: normalizedUrl,
99
99
  markdown: '',
100
- error: 'Failed to extract article content'
100
+ error: 'Failed to extract article content',
101
101
  });
102
102
  return;
103
103
  }
104
104
  if (!article.content || article.content.trim().length < 50) {
105
+ const fallbackMarkdown = `# ${article.title || 'Page Content'}\n\n` +
106
+ `*Note: This page appears to be JavaScript-rendered. Limited content extracted.*\n\n` +
107
+ (article.textContent
108
+ ? article.textContent.substring(0, 1000) + '...'
109
+ : 'No text content available');
105
110
  this.results.push({
106
111
  url: normalizedUrl,
107
- markdown: '',
108
- error: 'Page contains minimal extractable content'
112
+ markdown: fallbackMarkdown,
113
+ title: article.title || normalizedUrl,
114
+ error: 'Limited content extracted (JavaScript-rendered page)',
109
115
  });
110
116
  return;
111
117
  }
@@ -128,14 +134,14 @@ export class CrawlQueue {
128
134
  url: normalizedUrl,
129
135
  markdown,
130
136
  title: article.title,
131
- links: links.length > 0 ? links : undefined
137
+ links: links.length > 0 ? links : undefined,
132
138
  });
133
139
  }
134
140
  catch (error) {
135
141
  this.results.push({
136
142
  url: normalizedUrl,
137
143
  markdown: '',
138
- error: error instanceof Error ? error.message : 'Unknown error'
144
+ error: error instanceof Error ? error.message : 'Unknown error',
139
145
  });
140
146
  }
141
147
  }
@@ -8,9 +8,9 @@ export async function getRobotsChecker(origin, userAgent = '*') {
8
8
  const robotsUrl = new URL('/robots.txt', origin).href;
9
9
  const robotsTxt = await fetchStream(robotsUrl, {
10
10
  timeout: 5000,
11
- userAgent
11
+ userAgent,
12
12
  });
13
- const robotsParserModule = await import('robots-parser');
13
+ const robotsParserModule = (await import('robots-parser'));
14
14
  const robotsParser = robotsParserModule.default || robotsParserModule;
15
15
  const robots = robotsParser(robotsUrl, robotsTxt);
16
16
  robotsCache.set(origin, robots);
@@ -19,7 +19,7 @@ export async function getRobotsChecker(origin, userAgent = '*') {
19
19
  catch {
20
20
  const permissive = {
21
21
  isAllowed: () => true,
22
- getCrawlDelay: () => undefined
22
+ getCrawlDelay: () => undefined,
23
23
  };
24
24
  robotsCache.set(origin, permissive);
25
25
  return permissive;
package/dist/index.js CHANGED
@@ -32,7 +32,7 @@ program
32
32
  sameOriginOnly: !options.allOrigins,
33
33
  userAgent: options.userAgent,
34
34
  cacheDir: options.cacheDir,
35
- timeout: parseInt(options.timeout, 10)
35
+ timeout: parseInt(options.timeout, 10),
36
36
  };
37
37
  const queue = new CrawlQueue(crawlOptions);
38
38
  await queue.init();
@@ -43,30 +43,33 @@ program
43
43
  }
44
44
  else if (options.output === 'markdown') {
45
45
  results.forEach(result => {
46
- if (result.error) {
47
- console.error(`Error for ${result.url}: ${result.error}`);
48
- }
49
- else if (result.markdown) {
46
+ if (result.markdown) {
50
47
  console.log(result.markdown);
51
48
  if (results.length > 1) {
52
49
  console.log('\n---\n');
53
50
  }
54
51
  }
52
+ if (result.error && result.markdown) {
53
+ console.error(`Warning for ${result.url}: ${result.error}`);
54
+ }
55
+ else if (result.error && !result.markdown) {
56
+ console.error(`Error for ${result.url}: ${result.error}`);
57
+ }
55
58
  });
56
59
  }
57
60
  else if (options.output === 'both') {
58
61
  results.forEach(result => {
59
62
  console.log(`\n## URL: ${result.url}\n`);
60
- if (result.error) {
61
- console.error(`Error: ${result.error}`);
62
- }
63
- else {
63
+ if (result.markdown) {
64
64
  console.log(result.markdown);
65
65
  }
66
+ if (result.error) {
67
+ console.error(`${result.markdown ? 'Warning' : 'Error'}: ${result.error}`);
68
+ }
66
69
  });
67
70
  }
68
- const hasErrors = results.some(r => r.error);
69
- if (hasErrors) {
71
+ const hasFatalErrors = results.some(r => r.error && !r.markdown);
72
+ if (hasFatalErrors) {
70
73
  process.exit(1);
71
74
  }
72
75
  }
@@ -8,7 +8,7 @@ export async function fetchMarkdown(url, options = {}) {
8
8
  sameOriginOnly: options.sameOriginOnly ?? true,
9
9
  userAgent: options.userAgent,
10
10
  cacheDir: options.cacheDir ?? '.cache',
11
- timeout: options.timeout ?? 30000
11
+ timeout: options.timeout ?? 30000,
12
12
  };
13
13
  const queue = new CrawlQueue(crawlOptions);
14
14
  await queue.init();
@@ -17,20 +17,20 @@ export async function fetchMarkdown(url, options = {}) {
17
17
  if (!mainResult) {
18
18
  return {
19
19
  markdown: '',
20
- error: 'No results returned'
20
+ error: 'No results returned',
21
21
  };
22
22
  }
23
23
  return {
24
24
  markdown: mainResult.markdown,
25
25
  title: mainResult.title,
26
26
  links: mainResult.links,
27
- error: mainResult.error
27
+ error: mainResult.error,
28
28
  };
29
29
  }
30
30
  catch (error) {
31
31
  return {
32
32
  markdown: '',
33
- error: error instanceof Error ? error.message : 'Unknown error'
33
+ error: error instanceof Error ? error.message : 'Unknown error',
34
34
  };
35
35
  }
36
36
  }
@@ -4,10 +4,12 @@ export function extractArticle(dom) {
4
4
  const baseUrl = dom.window.location.href;
5
5
  const articleParagraph = document.querySelector('article p');
6
6
  const hasStrongArticleIndicators = (document.querySelector('article') !== null &&
7
- articleParagraph?.textContent && articleParagraph.textContent.length > 200) ||
7
+ articleParagraph?.textContent &&
8
+ articleParagraph.textContent.length > 200) ||
8
9
  document.querySelector('[itemtype*="BlogPosting"]') !== null ||
9
10
  document.querySelector('[itemtype*="NewsArticle"]') !== null ||
10
- document.querySelector('meta[property="article:published_time"]') !== null;
11
+ document.querySelector('meta[property="article:published_time"]') !==
12
+ null;
11
13
  if (hasStrongArticleIndicators) {
12
14
  const documentClone = document.cloneNode(true);
13
15
  const reader = new Readability(documentClone);
@@ -24,7 +26,7 @@ export function extractArticle(dom) {
24
26
  lang: article.lang || null,
25
27
  siteName: article.siteName || null,
26
28
  publishedTime: article.publishedTime || null,
27
- baseUrl
29
+ baseUrl,
28
30
  };
29
31
  }
30
32
  }
@@ -36,10 +38,16 @@ function extractContentManually(dom) {
36
38
  const baseUrl = dom.window.location.href;
37
39
  const title = document.querySelector('title')?.textContent ||
38
40
  document.querySelector('h1')?.textContent ||
39
- document.querySelector('meta[property="og:title"]')?.getAttribute('content') ||
40
- document.querySelector('meta[name="title"]')?.getAttribute('content') ||
41
+ document
42
+ .querySelector('meta[property="og:title"]')
43
+ ?.getAttribute('content') ||
44
+ document
45
+ .querySelector('meta[name="title"]')
46
+ ?.getAttribute('content') ||
41
47
  'Untitled Page';
42
- const byline = document.querySelector('meta[name="author"]')?.getAttribute('content') ||
48
+ const byline = document
49
+ .querySelector('meta[name="author"]')
50
+ ?.getAttribute('content') ||
43
51
  document.querySelector('[rel="author"]')?.textContent ||
44
52
  document.querySelector('.author')?.textContent ||
45
53
  null;
@@ -56,18 +64,18 @@ function extractContentManually(dom) {
56
64
  siteName: null,
57
65
  textContent: document.documentElement?.textContent || '',
58
66
  publishedTime: null,
59
- baseUrl
67
+ baseUrl,
60
68
  };
61
69
  }
62
70
  const contentClone = document.body.cloneNode(true);
63
- const selectorsToRemove = [
64
- 'script', 'style', 'noscript', 'template'
65
- ];
71
+ const selectorsToRemove = ['script', 'style', 'noscript', 'template'];
66
72
  selectorsToRemove.forEach(selector => {
67
73
  try {
68
- contentClone.querySelectorAll(selector).forEach(el => el.remove());
74
+ contentClone
75
+ .querySelectorAll(selector)
76
+ .forEach(el => el.remove());
69
77
  }
70
- catch (e) {
78
+ catch {
71
79
  }
72
80
  });
73
81
  const mainContent = contentClone;
@@ -83,14 +91,16 @@ function extractContentManually(dom) {
83
91
  siteName: null,
84
92
  textContent: mainContent.textContent || '',
85
93
  publishedTime: null,
86
- baseUrl
94
+ baseUrl,
87
95
  };
88
96
  }
89
97
  catch (error) {
90
98
  console.error('Error in manual extraction:', error);
91
99
  return {
92
100
  title: 'Error extracting content',
93
- content: dom.window.document.body?.innerHTML || dom.window.document.documentElement?.innerHTML || '',
101
+ content: dom.window.document.body?.innerHTML ||
102
+ dom.window.document.documentElement?.innerHTML ||
103
+ '',
94
104
  byline: null,
95
105
  excerpt: '',
96
106
  dir: null,
@@ -99,7 +109,7 @@ function extractContentManually(dom) {
99
109
  siteName: null,
100
110
  textContent: dom.window.document.body?.textContent || '',
101
111
  publishedTime: null,
102
- baseUrl: dom.window.location.href
112
+ baseUrl: dom.window.location.href,
103
113
  };
104
114
  }
105
115
  }
@@ -1,28 +1,29 @@
1
- import { JSDOM } from 'jsdom';
1
+ import { JSDOM, VirtualConsole } from 'jsdom';
2
2
  export function htmlToDom(html, url) {
3
3
  try {
4
4
  return new JSDOM(html, {
5
5
  url,
6
6
  contentType: 'text/html',
7
7
  includeNodeLocations: false,
8
- runScripts: 'outside-only',
9
- resources: 'usable',
10
- pretendToBeVisual: true
8
+ runScripts: undefined,
9
+ resources: undefined,
10
+ pretendToBeVisual: true,
11
+ virtualConsole: new VirtualConsole().sendTo(console, { omitJSDOMErrors: true }),
11
12
  });
12
13
  }
13
- catch (error) {
14
- console.error('Error parsing HTML with JSDOM, trying with minimal options:', error);
14
+ catch {
15
15
  try {
16
16
  return new JSDOM(html, {
17
17
  url,
18
- contentType: 'text/html'
18
+ contentType: 'text/html',
19
+ virtualConsole: new VirtualConsole().sendTo(console, { omitJSDOMErrors: true }),
19
20
  });
20
21
  }
21
- catch (fallbackError) {
22
- console.error('Fallback parsing also failed:', fallbackError);
22
+ catch {
23
23
  return new JSDOM(`<!DOCTYPE html><html><body>${html}</body></html>`, {
24
24
  url,
25
- contentType: 'text/html'
25
+ contentType: 'text/html',
26
+ virtualConsole: new VirtualConsole().sendTo(console, { omitJSDOMErrors: true }),
26
27
  });
27
28
  }
28
29
  }
@@ -32,7 +33,7 @@ export function extractLinks(dom) {
32
33
  const links = [];
33
34
  const baseUrl = dom.window.location.href;
34
35
  const anchorElements = document.querySelectorAll('a[href]');
35
- anchorElements.forEach((element) => {
36
+ anchorElements.forEach(element => {
36
37
  try {
37
38
  const href = element.getAttribute('href');
38
39
  if (!href)
@@ -7,34 +7,41 @@ function convertRelativeUrls(html, baseUrl) {
7
7
  const document = dom.window.document;
8
8
  document.querySelectorAll('a[href]').forEach(link => {
9
9
  const href = link.getAttribute('href');
10
- if (href && !href.startsWith('http://') && !href.startsWith('https://') &&
11
- !href.startsWith('//') && !href.startsWith('mailto:') &&
12
- !href.startsWith('tel:') && !href.startsWith('javascript:') &&
10
+ if (href &&
11
+ !href.startsWith('http://') &&
12
+ !href.startsWith('https://') &&
13
+ !href.startsWith('//') &&
14
+ !href.startsWith('mailto:') &&
15
+ !href.startsWith('tel:') &&
16
+ !href.startsWith('javascript:') &&
13
17
  !href.startsWith('#')) {
14
18
  try {
15
19
  const absoluteUrl = new URL(href, baseUrl).href;
16
20
  link.setAttribute('href', absoluteUrl);
17
21
  }
18
- catch (e) {
22
+ catch {
19
23
  }
20
24
  }
21
25
  });
22
26
  document.querySelectorAll('img[src]').forEach(img => {
23
27
  const src = img.getAttribute('src');
24
- if (src && !src.startsWith('http://') && !src.startsWith('https://') &&
25
- !src.startsWith('//') && !src.startsWith('data:')) {
28
+ if (src &&
29
+ !src.startsWith('http://') &&
30
+ !src.startsWith('https://') &&
31
+ !src.startsWith('//') &&
32
+ !src.startsWith('data:')) {
26
33
  try {
27
34
  const absoluteUrl = new URL(src, baseUrl).href;
28
35
  img.setAttribute('src', absoluteUrl);
29
36
  }
30
- catch (e) {
37
+ catch {
31
38
  }
32
39
  }
33
40
  });
34
41
  const bodyElement = document.body || document.documentElement;
35
42
  return bodyElement ? bodyElement.innerHTML : html;
36
43
  }
37
- catch (e) {
44
+ catch {
38
45
  return html;
39
46
  }
40
47
  }
@@ -55,7 +62,7 @@ export function createTurndownService() {
55
62
  },
56
63
  defaultReplacement: (content, node) => {
57
64
  return node.isBlock ? '\n\n' + content + '\n\n' : content;
58
- }
65
+ },
59
66
  });
60
67
  turndown.use(gfm);
61
68
  turndown.addRule('media', {
@@ -63,12 +70,14 @@ export function createTurndownService() {
63
70
  replacement: (_content, node) => {
64
71
  const element = node;
65
72
  const src = element.getAttribute('src') || element.getAttribute('data-src');
66
- const title = element.getAttribute('title') || element.getAttribute('alt') || 'media';
73
+ const title = element.getAttribute('title') ||
74
+ element.getAttribute('alt') ||
75
+ 'media';
67
76
  if (src) {
68
77
  return `\n\n[${title}](${src})\n\n`;
69
78
  }
70
79
  return '';
71
- }
80
+ },
72
81
  });
73
82
  turndown.addRule('figure', {
74
83
  filter: 'figure',
@@ -80,7 +89,7 @@ export function createTurndownService() {
80
89
  return `\n\n${content.trim()}\n*${captionText}*\n\n`;
81
90
  }
82
91
  return `\n\n${content.trim()}\n\n`;
83
- }
92
+ },
84
93
  });
85
94
  return turndown;
86
95
  }
@@ -119,7 +128,9 @@ export function formatArticleMarkdown(article) {
119
128
  markdown += tempDiv.textContent || article.content;
120
129
  }
121
130
  else {
122
- markdown += article.content.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ');
131
+ markdown += article.content
132
+ .replace(/<[^>]*>/g, ' ')
133
+ .replace(/\s+/g, ' ');
123
134
  }
124
135
  }
125
136
  return markdown
@@ -129,6 +140,8 @@ export function formatArticleMarkdown(article) {
129
140
  }
130
141
  catch (error) {
131
142
  console.error('Fatal error in formatArticleMarkdown:', error);
132
- return article.title ? `# ${article.title}\n\n[Content extraction failed]` : '[Content extraction failed]';
143
+ return article.title
144
+ ? `# ${article.title}\n\n[Content extraction failed]`
145
+ : '[Content extraction failed]';
133
146
  }
134
147
  }
package/dist/serve.js CHANGED
@@ -1,13 +1,13 @@
1
1
  #!/usr/bin/env node
2
- import { Server } from "@modelcontextprotocol/sdk/server/index.js";
3
- import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
- import { CallToolRequestSchema, ListToolsRequestSchema, ListResourcesRequestSchema, ReadResourceRequestSchema, } from "@modelcontextprotocol/sdk/types.js";
2
+ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
3
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
4
+ import { CallToolRequestSchema, ListToolsRequestSchema, ListResourcesRequestSchema, ReadResourceRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
5
5
  let fetchMarkdownModule;
6
6
  let fsPromises;
7
7
  let pathModule;
8
8
  const server = new Server({
9
- name: "read-website-fast",
10
- version: "0.1.0",
9
+ name: 'read-website-fast',
10
+ version: '0.1.0',
11
11
  }, {
12
12
  capabilities: {
13
13
  tools: {},
@@ -15,64 +15,83 @@ const server = new Server({
15
15
  },
16
16
  });
17
17
  const READ_WEBSITE_TOOL = {
18
- name: "read_website_fast",
19
- description: "Quickly reads webpages and converts to markdown for fast, token efficient web scraping",
18
+ name: 'read_website_fast',
19
+ description: 'Quickly reads webpages and converts to markdown for fast, token efficient web scraping',
20
20
  inputSchema: {
21
- type: "object",
21
+ type: 'object',
22
22
  properties: {
23
23
  url: {
24
- type: "string",
25
- description: "HTTP/HTTPS URL to fetch and convert to markdown",
24
+ type: 'string',
25
+ description: 'HTTP/HTTPS URL to fetch and convert to markdown',
26
26
  },
27
27
  depth: {
28
- type: "number",
29
- description: "Crawl depth (0 = single page)",
28
+ type: 'number',
29
+ description: 'Crawl depth (0 = single page)',
30
30
  default: 0,
31
31
  },
32
32
  respectRobots: {
33
- type: "boolean",
34
- description: "Whether to respect robots.txt",
33
+ type: 'boolean',
34
+ description: 'Whether to respect robots.txt',
35
35
  default: true,
36
36
  },
37
37
  },
38
- required: ["url"],
38
+ required: ['url'],
39
39
  },
40
40
  };
41
41
  const RESOURCES = [
42
42
  {
43
- uri: "read-website-fast://status",
44
- name: "Cache Status",
45
- mimeType: "application/json",
46
- description: "Get cache status information",
43
+ uri: 'read-website-fast://status',
44
+ name: 'Cache Status',
45
+ mimeType: 'application/json',
46
+ description: 'Get cache status information',
47
47
  },
48
48
  {
49
- uri: "read-website-fast://clear-cache",
50
- name: "Clear Cache",
51
- mimeType: "application/json",
52
- description: "Clear the cache directory",
49
+ uri: 'read-website-fast://clear-cache',
50
+ name: 'Clear Cache',
51
+ mimeType: 'application/json',
52
+ description: 'Clear the cache directory',
53
53
  },
54
54
  ];
55
55
  server.setRequestHandler(ListToolsRequestSchema, async () => ({
56
56
  tools: [READ_WEBSITE_TOOL],
57
57
  }));
58
58
  server.setRequestHandler(CallToolRequestSchema, async (request) => {
59
- if (request.params.name !== "read_website_fast") {
59
+ if (request.params.name !== 'read_website_fast') {
60
60
  throw new Error(`Unknown tool: ${request.params.name}`);
61
61
  }
62
- if (!fetchMarkdownModule) {
63
- fetchMarkdownModule = await import("./internal/fetchMarkdown.js");
62
+ try {
63
+ if (!fetchMarkdownModule) {
64
+ fetchMarkdownModule = await import('./internal/fetchMarkdown.js');
65
+ }
66
+ const args = request.params.arguments;
67
+ if (!args.url || typeof args.url !== 'string') {
68
+ throw new Error('URL parameter is required and must be a string');
69
+ }
70
+ const result = await fetchMarkdownModule.fetchMarkdown(args.url, {
71
+ depth: args.depth ?? 0,
72
+ respectRobots: args.respectRobots ?? true,
73
+ });
74
+ if (result.error && result.markdown) {
75
+ return {
76
+ content: [
77
+ {
78
+ type: 'text',
79
+ text: `${result.markdown}\n\n---\n*Note: ${result.error}*`,
80
+ },
81
+ ],
82
+ };
83
+ }
84
+ if (result.error && !result.markdown) {
85
+ throw new Error(result.error);
86
+ }
87
+ return {
88
+ content: [{ type: 'text', text: result.markdown }],
89
+ };
64
90
  }
65
- const args = request.params.arguments;
66
- const result = await fetchMarkdownModule.fetchMarkdown(args.url, {
67
- depth: args.depth ?? 0,
68
- respectRobots: args.respectRobots ?? true,
69
- });
70
- if (result.error) {
71
- throw new Error(result.error);
91
+ catch (error) {
92
+ console.error('Tool execution error:', error);
93
+ throw new Error(`Failed to fetch content: ${error instanceof Error ? error.message : 'Unknown error'}`);
72
94
  }
73
- return {
74
- content: [{ type: "text", text: result.markdown }],
75
- };
76
95
  });
77
96
  server.setRequestHandler(ListResourcesRequestSchema, async () => ({
78
97
  resources: RESOURCES,
@@ -80,14 +99,14 @@ server.setRequestHandler(ListResourcesRequestSchema, async () => ({
80
99
  server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
81
100
  const uri = request.params.uri;
82
101
  if (!fsPromises) {
83
- fsPromises = await import("fs/promises");
102
+ fsPromises = await import('fs/promises');
84
103
  }
85
104
  if (!pathModule) {
86
- pathModule = await import("path");
105
+ pathModule = await import('path');
87
106
  }
88
- if (uri === "read-website-fast://status") {
107
+ if (uri === 'read-website-fast://status') {
89
108
  try {
90
- const cacheDir = ".cache";
109
+ const cacheDir = '.cache';
91
110
  const files = await fsPromises.readdir(cacheDir).catch(() => []);
92
111
  let totalSize = 0;
93
112
  for (const file of files) {
@@ -102,7 +121,7 @@ server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
102
121
  contents: [
103
122
  {
104
123
  uri,
105
- mimeType: "application/json",
124
+ mimeType: 'application/json',
106
125
  text: JSON.stringify({
107
126
  cacheSize: totalSize,
108
127
  cacheFiles: files.length,
@@ -117,27 +136,29 @@ server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
117
136
  contents: [
118
137
  {
119
138
  uri,
120
- mimeType: "application/json",
139
+ mimeType: 'application/json',
121
140
  text: JSON.stringify({
122
- error: "Failed to get cache status",
123
- message: error instanceof Error ? error.message : "Unknown error",
141
+ error: 'Failed to get cache status',
142
+ message: error instanceof Error
143
+ ? error.message
144
+ : 'Unknown error',
124
145
  }, null, 2),
125
146
  },
126
147
  ],
127
148
  };
128
149
  }
129
150
  }
130
- if (uri === "read-website-fast://clear-cache") {
151
+ if (uri === 'read-website-fast://clear-cache') {
131
152
  try {
132
- await fsPromises.rm(".cache", { recursive: true, force: true });
153
+ await fsPromises.rm('.cache', { recursive: true, force: true });
133
154
  return {
134
155
  contents: [
135
156
  {
136
157
  uri,
137
- mimeType: "application/json",
158
+ mimeType: 'application/json',
138
159
  text: JSON.stringify({
139
- status: "success",
140
- message: "Cache cleared successfully",
160
+ status: 'success',
161
+ message: 'Cache cleared successfully',
141
162
  }, null, 2),
142
163
  },
143
164
  ],
@@ -148,10 +169,12 @@ server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
148
169
  contents: [
149
170
  {
150
171
  uri,
151
- mimeType: "application/json",
172
+ mimeType: 'application/json',
152
173
  text: JSON.stringify({
153
- status: "error",
154
- message: error instanceof Error ? error.message : "Failed to clear cache",
174
+ status: 'error',
175
+ message: error instanceof Error
176
+ ? error.message
177
+ : 'Failed to clear cache',
155
178
  }, null, 2),
156
179
  },
157
180
  ],
@@ -162,10 +185,35 @@ server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
162
185
  });
163
186
  async function runServer() {
164
187
  const transport = new StdioServerTransport();
165
- await server.connect(transport);
166
- console.error("read-website-fast MCP server running");
188
+ process.on('SIGINT', async () => {
189
+ console.error('Received SIGINT, shutting down gracefully...');
190
+ await server.close();
191
+ process.exit(0);
192
+ });
193
+ process.on('SIGTERM', async () => {
194
+ console.error('Received SIGTERM, shutting down gracefully...');
195
+ await server.close();
196
+ process.exit(0);
197
+ });
198
+ process.on('uncaughtException', error => {
199
+ console.error('Uncaught exception:', error);
200
+ process.exit(1);
201
+ });
202
+ process.on('unhandledRejection', (reason, promise) => {
203
+ console.error('Unhandled rejection at:', promise, 'reason:', reason);
204
+ process.exit(1);
205
+ });
206
+ try {
207
+ await server.connect(transport);
208
+ console.error('read-website-fast MCP server running');
209
+ process.stdin.resume();
210
+ }
211
+ catch (error) {
212
+ console.error('Failed to start server:', error);
213
+ process.exit(1);
214
+ }
167
215
  }
168
- runServer().catch((error) => {
169
- console.error("Server error:", error);
216
+ runServer().catch(error => {
217
+ console.error('Server initialization error:', error);
170
218
  process.exit(1);
171
219
  });
@@ -5,7 +5,7 @@ export class MarkdownChunker {
5
5
  maxTokens: options.maxTokens ?? 0,
6
6
  maxChars: options.maxChars ?? 4000,
7
7
  splitOn: options.splitOn ?? 'heading',
8
- overlap: options.overlap ?? 200
8
+ overlap: options.overlap ?? 200,
9
9
  };
10
10
  }
11
11
  chunk(markdown) {
@@ -36,8 +36,8 @@ export class MarkdownChunker {
36
36
  metadata: {
37
37
  headings: [...currentHeadings],
38
38
  startLine,
39
- endLine: i - 1
40
- }
39
+ endLine: i - 1,
40
+ },
41
41
  });
42
42
  const overlapLines = this.getOverlapLines(currentChunk);
43
43
  currentChunk = [...overlapLines, line];
@@ -58,8 +58,8 @@ export class MarkdownChunker {
58
58
  metadata: {
59
59
  headings: [...currentHeadings],
60
60
  startLine,
61
- endLine: i
62
- }
61
+ endLine: i,
62
+ },
63
63
  });
64
64
  const overlapLines = this.getOverlapLines(currentChunk);
65
65
  currentChunk = [...overlapLines];
@@ -74,8 +74,8 @@ export class MarkdownChunker {
74
74
  metadata: {
75
75
  headings: currentHeadings,
76
76
  startLine,
77
- endLine: lines.length - 1
78
- }
77
+ endLine: lines.length - 1,
78
+ },
79
79
  });
80
80
  }
81
81
  return chunks;
@@ -85,11 +85,12 @@ export class MarkdownChunker {
85
85
  const paragraphs = markdown.split(/\n\n+/);
86
86
  let currentChunk = [];
87
87
  for (const paragraph of paragraphs) {
88
- const wouldExceedLimit = currentChunk.join('\n\n').length + paragraph.length > this.options.maxChars;
88
+ const wouldExceedLimit = currentChunk.join('\n\n').length + paragraph.length >
89
+ this.options.maxChars;
89
90
  if (wouldExceedLimit && currentChunk.length > 0) {
90
91
  chunks.push({
91
92
  content: currentChunk.join('\n\n').trim(),
92
- index: chunks.length
93
+ index: chunks.length,
93
94
  });
94
95
  currentChunk = [];
95
96
  }
@@ -98,7 +99,7 @@ export class MarkdownChunker {
98
99
  if (currentChunk.length > 0) {
99
100
  chunks.push({
100
101
  content: currentChunk.join('\n\n').trim(),
101
- index: chunks.length
102
+ index: chunks.length,
102
103
  });
103
104
  }
104
105
  return chunks;
@@ -108,11 +109,12 @@ export class MarkdownChunker {
108
109
  const sentences = markdown.match(/[^.!?]+[.!?]+/g) || [markdown];
109
110
  let currentChunk = [];
110
111
  for (const sentence of sentences) {
111
- const wouldExceedLimit = currentChunk.join(' ').length + sentence.length > this.options.maxChars;
112
+ const wouldExceedLimit = currentChunk.join(' ').length + sentence.length >
113
+ this.options.maxChars;
112
114
  if (wouldExceedLimit && currentChunk.length > 0) {
113
115
  chunks.push({
114
116
  content: currentChunk.join(' ').trim(),
115
- index: chunks.length
117
+ index: chunks.length,
116
118
  });
117
119
  currentChunk = [];
118
120
  }
@@ -121,7 +123,7 @@ export class MarkdownChunker {
121
123
  if (currentChunk.length > 0) {
122
124
  chunks.push({
123
125
  content: currentChunk.join(' ').trim(),
124
- index: chunks.length
126
+ index: chunks.length,
125
127
  });
126
128
  }
127
129
  return chunks;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@just-every/mcp-read-website-fast",
3
- "version": "0.1.6",
3
+ "version": "0.1.8",
4
4
  "description": "Markdown Content Preprocessor - Fetch web pages, extract content, convert to clean Markdown",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
@@ -67,8 +67,11 @@
67
67
  "@typescript-eslint/eslint-plugin": "^8.34.0",
68
68
  "@typescript-eslint/parser": "^8.34.0",
69
69
  "eslint": "^9.28.0",
70
+ "eslint-config-prettier": "^10.1.5",
71
+ "eslint-plugin-prettier": "^5.4.1",
70
72
  "tsx": "^4.7.0",
71
73
  "typescript": "^5.3.3",
74
+ "typescript-eslint": "^8.34.0",
72
75
  "vitest": "^3.2.3"
73
76
  },
74
77
  "engines": {