@just-every/mcp-read-website-fast 0.1.20 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import { Command } from 'commander';
3
- import { fetch } from '@just-every/crawl';
4
3
  import { fetchMarkdown } from './internal/fetchMarkdown.js';
4
+ import { loadCrawlModule } from './internal/crawlCompat.js';
5
5
  import { readFileSync } from 'fs';
6
6
  import { fileURLToPath } from 'url';
7
7
  import { dirname, join } from 'path';
@@ -43,6 +43,7 @@ program
43
43
  }
44
44
  console.error(`Fetching ${url}...`);
45
45
  if (options.output === 'json') {
46
+ const { fetch } = await loadCrawlModule();
46
47
  const results = await fetch(url, crawlOptions);
47
48
  console.log(JSON.stringify(results, null, 2));
48
49
  }
@@ -59,6 +60,7 @@ program
59
60
  }
60
61
  }
61
62
  else if (options.output === 'both') {
63
+ const { fetch } = await loadCrawlModule();
62
64
  const results = await fetch(url, crawlOptions);
63
65
  results.forEach((result) => {
64
66
  console.log(`\n## URL: ${result.url}\n`);
@@ -0,0 +1,4 @@
1
+ import type { CrawlOptions } from '@just-every/crawl';
2
+ export type { CrawlOptions };
3
+ type CrawlModule = typeof import('@just-every/crawl');
4
+ export declare function loadCrawlModule(): Promise<CrawlModule>;
@@ -0,0 +1,11 @@
1
+ import { ensureTurndownPluginCompat } from './turndownPluginCompat.js';
2
+ let crawlModulePromise;
3
+ export async function loadCrawlModule() {
4
+ if (!crawlModulePromise) {
5
+ crawlModulePromise = (async () => {
6
+ await ensureTurndownPluginCompat();
7
+ return import('@just-every/crawl');
8
+ })();
9
+ }
10
+ return crawlModulePromise;
11
+ }
@@ -1,4 +1,4 @@
1
- import { fetch } from '@just-every/crawl';
1
+ import { loadCrawlModule } from './crawlCompat.js';
2
2
  import { extractMarkdownLinks, filterSameOriginLinks } from '../utils/extractMarkdownLinks.js';
3
3
  export async function fetchMarkdown(url, options = {}) {
4
4
  try {
@@ -23,6 +23,7 @@ export async function fetchMarkdown(url, options = {}) {
23
23
  if (options.cookiesFile) {
24
24
  crawlOptions.cookiesFile = options.cookiesFile;
25
25
  }
26
+ const { fetch } = await loadCrawlModule();
26
27
  const results = await fetch(currentUrl, crawlOptions);
27
28
  if (results && results.length > 0) {
28
29
  const result = results[0];
@@ -0,0 +1,12 @@
1
+ interface DualPackageManifest {
2
+ module?: unknown;
3
+ main?: unknown;
4
+ }
5
+ export declare function applyCrawlMarkdownInteropPatch(source: string, turndownPluginUrl: string): string;
6
+ export declare function selectPreferredPackageEntry(manifest: DualPackageManifest, packageName: string): string;
7
+ export declare function resolvePackageModuleUrlFromImporter(packageName: string, importerPackageJsonPath: string): string;
8
+ export declare function resolveTurndownPluginEsmUrl(): string;
9
+ export declare function resolveTurndownPluginCompatUrl(): string;
10
+ export declare function patchCrawlMarkdownInterop(): void;
11
+ export declare function ensureTurndownPluginCompat(): Promise<void>;
12
+ export {};
@@ -0,0 +1,76 @@
1
+ import { readFileSync, writeFileSync } from 'node:fs';
2
+ import { dirname, join } from 'node:path';
3
+ import { createRequire, register } from 'node:module';
4
+ import { pathToFileURL } from 'node:url';
5
+ let loaderRegistered = false;
6
+ const CRAWL_GFM_IMPORT = "import { gfm } from 'turndown-plugin-gfm';";
7
+ const PATCHED_GFM_IMPORT_PATTERN = /\/\/ Patched by @just-every\/mcp-read-website-fast for Node ESM\/CJS interop\.\nimport \{ gfm \} from [^\n]+;/;
8
+ function getCrawlPackageJsonPath() {
9
+ const requireFromHere = createRequire(import.meta.url);
10
+ return requireFromHere.resolve('@just-every/crawl/package.json');
11
+ }
12
+ function getPatchedGfmImport(turndownPluginUrl) {
13
+ return [
14
+ '// Patched by @just-every/mcp-read-website-fast for Node ESM/CJS interop.',
15
+ `import { gfm } from ${JSON.stringify(turndownPluginUrl)};`,
16
+ ].join('\n');
17
+ }
18
+ export function applyCrawlMarkdownInteropPatch(source, turndownPluginUrl) {
19
+ const patchedImport = getPatchedGfmImport(turndownPluginUrl);
20
+ if (source.includes(patchedImport) || source.includes(turndownPluginUrl)) {
21
+ return source;
22
+ }
23
+ if (PATCHED_GFM_IMPORT_PATTERN.test(source)) {
24
+ return source.replace(PATCHED_GFM_IMPORT_PATTERN, patchedImport);
25
+ }
26
+ if (!source.includes(CRAWL_GFM_IMPORT)) {
27
+ return source;
28
+ }
29
+ return source.replace(CRAWL_GFM_IMPORT, patchedImport);
30
+ }
31
+ export function selectPreferredPackageEntry(manifest, packageName) {
32
+ if (typeof manifest.module === 'string' && manifest.module.length > 0) {
33
+ return manifest.module;
34
+ }
35
+ if (typeof manifest.main === 'string' && manifest.main.length > 0) {
36
+ return manifest.main;
37
+ }
38
+ throw new Error(`Could not determine entry file for ${packageName}`);
39
+ }
40
+ export function resolvePackageModuleUrlFromImporter(packageName, importerPackageJsonPath) {
41
+ const importerRequire = createRequire(importerPackageJsonPath);
42
+ const packageJsonPath = importerRequire.resolve(`${packageName}/package.json`);
43
+ const manifest = JSON.parse(readFileSync(packageJsonPath, 'utf8'));
44
+ const entry = selectPreferredPackageEntry(manifest, packageName);
45
+ return pathToFileURL(join(dirname(packageJsonPath), entry)).href;
46
+ }
47
+ export function resolveTurndownPluginEsmUrl() {
48
+ const crawlPackageJsonPath = getCrawlPackageJsonPath();
49
+ return resolvePackageModuleUrlFromImporter('turndown-plugin-gfm', crawlPackageJsonPath);
50
+ }
51
+ export function resolveTurndownPluginCompatUrl() {
52
+ return new URL('./turndownPluginGfmCompat.js', import.meta.url).href;
53
+ }
54
+ export function patchCrawlMarkdownInterop() {
55
+ const crawlPackageJsonPath = getCrawlPackageJsonPath();
56
+ const crawlMarkdownPath = join(dirname(crawlPackageJsonPath), 'dist', 'parser', 'markdown.js');
57
+ const compatModuleUrl = resolveTurndownPluginCompatUrl();
58
+ const source = readFileSync(crawlMarkdownPath, 'utf8');
59
+ const patchedSource = applyCrawlMarkdownInteropPatch(source, compatModuleUrl);
60
+ if (patchedSource === source) {
61
+ return;
62
+ }
63
+ try {
64
+ writeFileSync(crawlMarkdownPath, patchedSource, 'utf8');
65
+ }
66
+ catch {
67
+ }
68
+ }
69
+ export async function ensureTurndownPluginCompat() {
70
+ if (loaderRegistered) {
71
+ return;
72
+ }
73
+ patchCrawlMarkdownInterop();
74
+ register(new URL('./turndownPluginLoader.js', import.meta.url), import.meta.url);
75
+ loaderRegistered = true;
76
+ }
@@ -0,0 +1,3 @@
1
+ type GfmPlugin = (service: unknown) => void;
2
+ export declare const gfm: GfmPlugin | undefined;
3
+ export {};
@@ -0,0 +1,7 @@
1
+ import { createRequire } from 'node:module';
2
+ const require = createRequire(import.meta.url);
3
+ const turndownPluginGfmModule = require('turndown-plugin-gfm');
4
+ export const gfm = turndownPluginGfmModule.gfm ?? turndownPluginGfmModule.default?.gfm;
5
+ if (typeof gfm !== 'function') {
6
+ throw new Error('turndown-plugin-gfm did not provide a usable gfm export');
7
+ }
@@ -0,0 +1,10 @@
1
+ export declare function resolve(specifier: string, context: {
2
+ parentURL?: string;
3
+ }, defaultResolve: (specifier: string, context: {
4
+ parentURL?: string;
5
+ }) => Promise<{
6
+ url: string;
7
+ }>): Promise<{
8
+ shortCircuit?: boolean;
9
+ url: string;
10
+ }>;
@@ -0,0 +1,11 @@
1
+ import { resolveTurndownPluginCompatUrl } from './turndownPluginCompat.js';
2
+ const turndownPluginCompatUrl = resolveTurndownPluginCompatUrl();
3
+ export async function resolve(specifier, context, defaultResolve) {
4
+ if (specifier === 'turndown-plugin-gfm') {
5
+ return {
6
+ shortCircuit: true,
7
+ url: turndownPluginCompatUrl,
8
+ };
9
+ }
10
+ return defaultResolve(specifier, context);
11
+ }
@@ -0,0 +1,3 @@
1
+ import type { Resource, Tool } from '@modelcontextprotocol/sdk/types.js';
2
+ export declare const READ_WEBSITE_TOOL: Tool;
3
+ export declare const RESOURCES: Resource[];
@@ -0,0 +1,46 @@
1
+ export const READ_WEBSITE_TOOL = {
2
+ name: 'read_website',
3
+ description: 'Fast, token-efficient web content extraction - ideal for reading documentation, analyzing content, and gathering information from websites. Converts to clean Markdown while preserving links and structure.',
4
+ inputSchema: {
5
+ type: 'object',
6
+ properties: {
7
+ url: {
8
+ type: 'string',
9
+ description: 'HTTP/HTTPS URL to fetch and convert to markdown',
10
+ },
11
+ pages: {
12
+ type: 'number',
13
+ description: 'Maximum number of pages to crawl (default: 1)',
14
+ default: 1,
15
+ minimum: 1,
16
+ maximum: 100,
17
+ },
18
+ cookiesFile: {
19
+ type: 'string',
20
+ description: 'Path to Netscape cookie file for authenticated pages',
21
+ },
22
+ },
23
+ required: ['url'],
24
+ },
25
+ annotations: {
26
+ title: 'Read Website',
27
+ readOnlyHint: true,
28
+ destructiveHint: false,
29
+ idempotentHint: true,
30
+ openWorldHint: true,
31
+ },
32
+ };
33
+ export const RESOURCES = [
34
+ {
35
+ uri: 'read-website-fast://status',
36
+ name: 'Cache Status',
37
+ mimeType: 'application/json',
38
+ description: 'Get cache status information',
39
+ },
40
+ {
41
+ uri: 'read-website-fast://clear-cache',
42
+ name: 'Clear Cache',
43
+ mimeType: 'application/json',
44
+ description: 'Clear the cache directory',
45
+ },
46
+ ];
package/dist/serve.js CHANGED
@@ -2,6 +2,7 @@
2
2
  import { Server } from '@modelcontextprotocol/sdk/server/index.js';
3
3
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
4
4
  import { CallToolRequestSchema, ListToolsRequestSchema, ListResourcesRequestSchema, ReadResourceRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
5
+ import { READ_WEBSITE_TOOL, RESOURCES } from './mcp/definitions.js';
5
6
  import { logger } from './utils/logger.js';
6
7
  logger.info('MCP Server starting up...');
7
8
  logger.debug('Node version:', process.version);
@@ -27,53 +28,6 @@ logger.info('MCP server instance created successfully');
27
28
  server.onerror = error => {
28
29
  logger.error('MCP Server Error:', error);
29
30
  };
30
- const READ_WEBSITE_TOOL = {
31
- name: 'read_website',
32
- description: 'Fast, token-efficient web content extraction - ideal for reading documentation, analyzing content, and gathering information from websites. Converts to clean Markdown while preserving links and structure.',
33
- inputSchema: {
34
- type: 'object',
35
- properties: {
36
- url: {
37
- type: 'string',
38
- description: 'HTTP/HTTPS URL to fetch and convert to markdown',
39
- },
40
- pages: {
41
- type: 'number',
42
- description: 'Maximum number of pages to crawl (default: 1)',
43
- default: 1,
44
- minimum: 1,
45
- maximum: 100,
46
- },
47
- cookiesFile: {
48
- type: 'string',
49
- description: 'Path to Netscape cookie file for authenticated pages',
50
- optional: true,
51
- },
52
- },
53
- required: ['url'],
54
- },
55
- annotations: {
56
- title: 'Read Website',
57
- readOnlyHint: true,
58
- destructiveHint: false,
59
- idempotentHint: true,
60
- openWorldHint: true,
61
- },
62
- };
63
- const RESOURCES = [
64
- {
65
- uri: 'read-website-fast://status',
66
- name: 'Cache Status',
67
- mimeType: 'application/json',
68
- description: 'Get cache status information',
69
- },
70
- {
71
- uri: 'read-website-fast://clear-cache',
72
- name: 'Clear Cache',
73
- mimeType: 'application/json',
74
- description: 'Clear the cache directory',
75
- },
76
- ];
77
31
  server.setRequestHandler(ListToolsRequestSchema, async () => {
78
32
  logger.debug('Received ListTools request');
79
33
  const response = {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@just-every/mcp-read-website-fast",
3
- "version": "0.1.20",
3
+ "version": "0.1.21",
4
4
  "description": "Markdown Content Preprocessor - Fetch web pages, extract content, convert to clean Markdown",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
@@ -51,23 +51,23 @@
51
51
  "license": "MIT",
52
52
  "dependencies": {
53
53
  "@just-every/crawl": "^1.0.8",
54
- "@modelcontextprotocol/sdk": "^1.17.4",
55
- "commander": "^14.0.0",
56
- "uuid": "^11.1.0"
54
+ "@modelcontextprotocol/sdk": "^1.29.0",
55
+ "commander": "^14.0.3",
56
+ "uuid": "^13.0.0"
57
57
  },
58
58
  "devDependencies": {
59
- "@types/jsdom": "^21.1.7",
60
- "@types/node": "^24.3.0",
61
- "@types/turndown": "^5.0.5",
62
- "@typescript-eslint/eslint-plugin": "^8.41.0",
63
- "@typescript-eslint/parser": "^8.41.0",
64
- "eslint": "^9.34.0",
59
+ "@types/jsdom": "^28.0.1",
60
+ "@types/node": "^25.5.2",
61
+ "@types/turndown": "^5.0.6",
62
+ "@typescript-eslint/eslint-plugin": "^8.58.0",
63
+ "@typescript-eslint/parser": "^8.58.0",
64
+ "eslint": "^9.39.4",
65
65
  "eslint-config-prettier": "^10.1.8",
66
- "eslint-plugin-prettier": "^5.5.4",
67
- "tsx": "^4.20.5",
68
- "typescript": "^5.9.2",
69
- "typescript-eslint": "^8.41.0",
70
- "vitest": "^3.2.4"
66
+ "eslint-plugin-prettier": "^5.5.5",
67
+ "tsx": "^4.21.0",
68
+ "typescript": "^6.0.2",
69
+ "typescript-eslint": "^8.58.0",
70
+ "vitest": "^4.1.2"
71
71
  },
72
72
  "engines": {
73
73
  "node": ">=20.0.0"