mcp-safe-fetch 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,79 @@
1
+ # mcp-safe-fetch
2
+
3
+ Deterministic content sanitization MCP server for agentic coding tools. Strips prompt injection vectors from web-fetched content before it enters the LLM context.
4
+
5
+ Drop-in replacement for Claude Code's built-in `WebFetch` — exposes a `safe_fetch` tool that fetches URLs, sanitizes the HTML, and returns clean markdown.
6
+
7
+ ## What it strips
8
+
9
+ - **Hidden HTML** — `display:none`, `visibility:hidden`, `opacity:0`, `[hidden]` attribute
10
+ - **Dangerous tags** — `<script>`, `<style>`, `<noscript>`, `<meta>`, `<link>`
11
+ - **HTML comments** — often used to inject instructions invisible to readers
12
+ - **Invisible unicode** — zero-width chars, soft hyphens, BOM, bidi overrides, variation selectors, tag characters
13
+ - **Control characters** — preserves `\n`, `\t`, `\r`, strips everything else
14
+ - **Fake LLM delimiters** — `<|im_start|>`, `[INST]`, `<<SYS>>`, `\n\nHuman:`, etc.
15
+ - **NFKC normalization** — collapses fullwidth and homoglyph characters
16
+
17
+ ## Install
18
+
19
+ ```bash
20
+ npx -y mcp-safe-fetch init
21
+ ```
22
+
23
+ This configures Claude Code to use `safe_fetch` and deny the built-in `WebFetch`. Restart Claude Code after running.
24
+
25
+ ## Usage
26
+
27
+ ### As MCP server (automatic)
28
+
29
+ After `init`, Claude Code uses `safe_fetch` whenever it needs to read a URL. The sanitization header shows what was stripped:
30
+
31
+ ```
32
+ [safe-fetch] Stripped: 5 hidden elements, 68 script tags, 3 style tags | 284127 → 12720 bytes (219ms)
33
+ ```
34
+
35
+ ### CLI
36
+
37
+ Test sanitization on any URL:
38
+
39
+ ```bash
40
+ npx -y mcp-safe-fetch test <url>
41
+ ```
42
+
43
+ Stats print to stderr, sanitized markdown to stdout.
44
+
45
+ ### MCP tools
46
+
47
+ | Tool | Description |
48
+ |------|-------------|
49
+ | `safe_fetch` | Fetch a URL and return sanitized markdown |
50
+ | `sanitize_stats` | Show session sanitization statistics |
51
+
52
+ ## Configuration
53
+
54
+ Optional. Create `.mcp-safe-fetch.json` in your project root or home directory:
55
+
56
+ ```json
57
+ {
58
+ "logStripped": true,
59
+ "logFile": ".claude/sanitize.log"
60
+ }
61
+ ```
62
+
63
+ | Option | Default | Description |
64
+ |--------|---------|-------------|
65
+ | `logStripped` | `false` | Log sanitization stats to file |
66
+ | `logFile` | `.claude/sanitize.log` | Log file path |
67
+
68
+ ## How it works
69
+
70
+ 1. Fetch URL with native `fetch` (from your machine, not Anthropic's servers)
71
+ 2. Parse HTML with [cheerio](https://cheerio.js.org/) (htmlparser2 backend)
72
+ 3. Strip hidden elements, dangerous tags, and comments
73
+ 4. Convert to markdown with [turndown](https://github.com/mixmark-io/turndown)
74
+ 5. Strip invisible unicode characters and normalize with NFKC
75
+ 6. Strip fake LLM delimiter tokens
76
+
77
+ ## License
78
+
79
+ MIT
package/dist/cli.d.ts ADDED
@@ -0,0 +1 @@
1
+ export declare function runCli(command: string, args: string[]): void;
package/dist/cli.js ADDED
@@ -0,0 +1,88 @@
1
+ import { readFileSync, writeFileSync, existsSync } from 'node:fs';
2
+ import { join } from 'node:path';
3
+ import { fetchUrl } from './fetch.js';
4
+ import { sanitize } from './sanitize/pipeline.js';
5
+ const CLAUDE_JSON_PATH = join(process.env.HOME || '', '.claude.json');
6
+ const SETTINGS_PATH = join(process.env.HOME || '', '.claude', 'settings.json');
7
+ const MCP_CONFIG = {
8
+ type: 'stdio',
9
+ command: 'npx',
10
+ args: ['-y', 'mcp-safe-fetch'],
11
+ };
12
+ function readJson(path) {
13
+ if (!existsSync(path)) {
14
+ return {};
15
+ }
16
+ try {
17
+ return JSON.parse(readFileSync(path, 'utf-8'));
18
+ }
19
+ catch {
20
+ return {};
21
+ }
22
+ }
23
+ function writeJson(path, data) {
24
+ writeFileSync(path, JSON.stringify(data, null, 2) + '\n', 'utf-8');
25
+ }
26
+ function runInit(args) {
27
+ const dryRun = args.includes('--dry-run');
28
+ if (dryRun) {
29
+ console.log('Would add to ~/.claude.json:');
30
+ console.log(JSON.stringify({ mcpServers: { 'safe-fetch': MCP_CONFIG } }, null, 2));
31
+ console.log('\nWould add to ~/.claude/settings.json:');
32
+ console.log(JSON.stringify({ allowedTools: { WebFetch: 'deny', 'mcp__safe-fetch__safe_fetch': 'allow' } }, null, 2));
33
+ return;
34
+ }
35
+ // Add MCP server to ~/.claude.json
36
+ const claudeJson = readJson(CLAUDE_JSON_PATH);
37
+ if (!claudeJson.mcpServers)
38
+ claudeJson.mcpServers = {};
39
+ claudeJson.mcpServers['safe-fetch'] = MCP_CONFIG;
40
+ writeJson(CLAUDE_JSON_PATH, claudeJson);
41
+ // Add tool permissions to ~/.claude/settings.json
42
+ const settings = readJson(SETTINGS_PATH);
43
+ if (!settings.allowedTools)
44
+ settings.allowedTools = {};
45
+ settings.allowedTools['WebFetch'] = 'deny';
46
+ settings.allowedTools['mcp__safe-fetch__safe_fetch'] = 'allow';
47
+ writeJson(SETTINGS_PATH, settings);
48
+ console.log('Updated ~/.claude.json:');
49
+ console.log(' + mcpServers.safe-fetch (mcp-safe-fetch MCP server)');
50
+ console.log('\nUpdated ~/.claude/settings.json:');
51
+ console.log(' + allowedTools.WebFetch: "deny"');
52
+ console.log(' + allowedTools.mcp__safe-fetch__safe_fetch: "allow"');
53
+ console.log('\nRestart Claude Code to activate.');
54
+ }
55
+ async function runTest(args) {
56
+ const url = args[0];
57
+ if (!url) {
58
+ console.error('Usage: mcp-safe-fetch test <url>');
59
+ process.exit(1);
60
+ }
61
+ console.error(`Fetching ${url}...`);
62
+ const startTime = Date.now();
63
+ const fetched = await fetchUrl(url);
64
+ const result = sanitize(fetched.html);
65
+ const durationMs = Date.now() - startTime;
66
+ // Print stats to stderr
67
+ console.error(`\nSanitization complete (${durationMs}ms):`);
68
+ console.error(` Input: ${result.inputSize} bytes`);
69
+ console.error(` Output: ${result.outputSize} bytes`);
70
+ console.error(` Hidden elements: ${result.stats.hiddenElements}`);
71
+ console.error(` Script tags: ${result.stats.scriptTags}`);
72
+ console.error(` Style tags: ${result.stats.styleTags}`);
73
+ console.error(` Zero-width chars: ${result.stats.zeroWidthChars}`);
74
+ console.error(` LLM delimiters: ${result.stats.llmDelimiters}`);
75
+ // Print sanitized content to stdout
76
+ process.stdout.write(result.content);
77
+ }
78
+ export function runCli(command, args) {
79
+ if (command === 'init') {
80
+ runInit(args);
81
+ }
82
+ else if (command === 'test') {
83
+ runTest(args).catch((error) => {
84
+ console.error(`Error: ${error instanceof Error ? error.message : error}`);
85
+ process.exit(1);
86
+ });
87
+ }
88
+ }
@@ -0,0 +1,8 @@
1
+ export interface SanitizeConfig {
2
+ logStripped: boolean;
3
+ logFile: string;
4
+ allowDataUris: boolean;
5
+ maxBase64DecodeLength: number;
6
+ customPatterns: string[];
7
+ }
8
+ export declare function loadConfig(): SanitizeConfig;
package/dist/config.js ADDED
@@ -0,0 +1,28 @@
1
+ import { readFileSync, existsSync } from 'node:fs';
2
+ import { join } from 'node:path';
3
+ const DEFAULT_CONFIG = {
4
+ logStripped: false,
5
+ logFile: '.claude/sanitize.log',
6
+ allowDataUris: false,
7
+ maxBase64DecodeLength: 500,
8
+ customPatterns: [],
9
+ };
10
+ export function loadConfig() {
11
+ const paths = [
12
+ join(process.cwd(), '.mcp-safe-fetch.json'),
13
+ join(process.env.HOME || '', '.mcp-safe-fetch.json'),
14
+ ];
15
+ for (const configPath of paths) {
16
+ if (existsSync(configPath)) {
17
+ try {
18
+ const raw = readFileSync(configPath, 'utf-8');
19
+ const parsed = JSON.parse(raw);
20
+ return { ...DEFAULT_CONFIG, ...parsed };
21
+ }
22
+ catch {
23
+ // Invalid config, use defaults
24
+ }
25
+ }
26
+ }
27
+ return DEFAULT_CONFIG;
28
+ }
@@ -0,0 +1,7 @@
1
+ export interface FetchResult {
2
+ html: string;
3
+ url: string;
4
+ status: number;
5
+ contentType: string;
6
+ }
7
+ export declare function fetchUrl(url: string): Promise<FetchResult>;
package/dist/fetch.js ADDED
@@ -0,0 +1,21 @@
1
+ export async function fetchUrl(url) {
2
+ const response = await fetch(url, {
3
+ headers: {
4
+ 'User-Agent': 'mcp-safe-fetch/0.1',
5
+ 'Accept': 'text/html,application/xhtml+xml,*/*',
6
+ },
7
+ redirect: 'follow',
8
+ signal: AbortSignal.timeout(10000),
9
+ });
10
+ if (!response.ok) {
11
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
12
+ }
13
+ const html = await response.text();
14
+ const contentType = response.headers.get('content-type') || '';
15
+ return {
16
+ html,
17
+ url: response.url,
18
+ status: response.status,
19
+ contentType,
20
+ };
21
+ }
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};
package/dist/index.js ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env node
2
+ import { startServer } from './server.js';
3
+ import { runCli } from './cli.js';
4
+ const command = process.argv[2];
5
+ if (command === 'init' || command === 'test') {
6
+ runCli(command, process.argv.slice(3));
7
+ }
8
+ else {
9
+ // Default: start MCP server (this is what npx safe-fetch invokes)
10
+ startServer().catch((error) => {
11
+ console.error('[safe-fetch] Fatal error:', error);
12
+ process.exit(1);
13
+ });
14
+ }
@@ -0,0 +1,11 @@
1
+ import type { PipelineStats } from './sanitize/pipeline.js';
2
+ export interface LogEntry {
3
+ timestamp: string;
4
+ url: string;
5
+ stripped: PipelineStats;
6
+ inputSize: number;
7
+ outputSize: number;
8
+ reductionPercent: number;
9
+ durationMs: number;
10
+ }
11
+ export declare function logSanitization(logFile: string, entry: LogEntry): void;
package/dist/logger.js ADDED
@@ -0,0 +1,11 @@
1
+ import { appendFileSync, mkdirSync } from 'node:fs';
2
+ import { dirname } from 'node:path';
3
+ export function logSanitization(logFile, entry) {
4
+ try {
5
+ mkdirSync(dirname(logFile), { recursive: true });
6
+ appendFileSync(logFile, JSON.stringify(entry) + '\n', 'utf-8');
7
+ }
8
+ catch {
9
+ console.error(`[safe-fetch] Failed to write log to ${logFile}`);
10
+ }
11
+ }
@@ -0,0 +1,7 @@
1
+ export interface DelimiterSanitizeResult {
2
+ text: string;
3
+ stats: {
4
+ llmDelimiters: number;
5
+ };
6
+ }
7
+ export declare function sanitizeDelimiters(text: string): DelimiterSanitizeResult;
@@ -0,0 +1,30 @@
1
+ const LLM_DELIMITER_PATTERNS = [
2
+ /<\|im_start\|>/gi,
3
+ /<\|im_end\|>/gi,
4
+ /<\|system\|>/gi,
5
+ /<\|user\|>/gi,
6
+ /<\|assistant\|>/gi,
7
+ /<\|endoftext\|>/gi,
8
+ /<\|pad\|>/gi,
9
+ /\\?\[INST\\?\]/gi,
10
+ /\\?\[\\?\/INST\\?\]/gi,
11
+ /<<SYS>>/gi,
12
+ /<<\\?\/SYS>>/gi,
13
+ /\n\nHuman:/g,
14
+ /\n\nAssistant:/g,
15
+ ];
16
+ export function sanitizeDelimiters(text) {
17
+ let count = 0;
18
+ let result = text;
19
+ for (const pattern of LLM_DELIMITER_PATTERNS) {
20
+ const matches = result.match(pattern);
21
+ if (matches) {
22
+ count += matches.length;
23
+ result = result.replace(pattern, '');
24
+ }
25
+ }
26
+ return {
27
+ text: result,
28
+ stats: { llmDelimiters: count },
29
+ };
30
+ }
@@ -0,0 +1,13 @@
1
+ import type { CheerioAPI } from 'cheerio';
2
+ export interface HtmlSanitizeResult {
3
+ html: string;
4
+ stats: {
5
+ hiddenElements: number;
6
+ htmlComments: number;
7
+ scriptTags: number;
8
+ styleTags: number;
9
+ noscriptTags: number;
10
+ metaTags: number;
11
+ };
12
+ }
13
+ export declare function sanitizeHtml($: CheerioAPI): HtmlSanitizeResult;
@@ -0,0 +1,48 @@
1
+ const HIDDEN_SELECTORS = [
2
+ '[style*="display:none"]',
3
+ '[style*="display: none"]',
4
+ '[style*="visibility:hidden"]',
5
+ '[style*="visibility: hidden"]',
6
+ '[style*="opacity:0"]',
7
+ '[style*="opacity: 0"]',
8
+ '[hidden]',
9
+ ].join(', ');
10
+ const STRIP_TAGS = ['script', 'style', 'noscript', 'meta', 'link'];
11
+ export function sanitizeHtml($) {
12
+ const stats = {
13
+ hiddenElements: 0,
14
+ htmlComments: 0,
15
+ scriptTags: 0,
16
+ styleTags: 0,
17
+ noscriptTags: 0,
18
+ metaTags: 0,
19
+ };
20
+ // Remove hidden elements by inline style / hidden attribute
21
+ const hidden = $(HIDDEN_SELECTORS);
22
+ stats.hiddenElements = hidden.length;
23
+ hidden.remove();
24
+ // Remove script, style, noscript, meta, link tags
25
+ for (const tag of STRIP_TAGS) {
26
+ const elements = $(tag);
27
+ const count = elements.length;
28
+ if (tag === 'script')
29
+ stats.scriptTags = count;
30
+ else if (tag === 'style')
31
+ stats.styleTags = count;
32
+ else if (tag === 'noscript')
33
+ stats.noscriptTags = count;
34
+ else if (tag === 'meta' || tag === 'link')
35
+ stats.metaTags += count;
36
+ elements.remove();
37
+ }
38
+ // Count and remove HTML comments
39
+ const comments = $('*').contents().filter(function () {
40
+ return this.type === 'comment';
41
+ });
42
+ stats.htmlComments = comments.length;
43
+ comments.remove();
44
+ return {
45
+ html: $.html(),
46
+ stats,
47
+ };
48
+ }
@@ -0,0 +1,21 @@
1
+ export interface PipelineStats {
2
+ hiddenElements: number;
3
+ htmlComments: number;
4
+ scriptTags: number;
5
+ styleTags: number;
6
+ noscriptTags: number;
7
+ metaTags: number;
8
+ zeroWidthChars: number;
9
+ controlChars: number;
10
+ bidiOverrides: number;
11
+ unicodeTags: number;
12
+ variationSelectors: number;
13
+ llmDelimiters: number;
14
+ }
15
+ export interface PipelineResult {
16
+ content: string;
17
+ stats: PipelineStats;
18
+ inputSize: number;
19
+ outputSize: number;
20
+ }
21
+ export declare function sanitize(html: string): PipelineResult;
@@ -0,0 +1,39 @@
1
+ import * as cheerio from 'cheerio/slim';
2
+ import TurndownService from 'turndown';
3
+ import { sanitizeHtml } from './html.js';
4
+ import { sanitizeUnicode } from './unicode.js';
5
+ import { sanitizeDelimiters } from './delimiters.js';
6
+ const turndown = new TurndownService({
7
+ headingStyle: 'atx',
8
+ codeBlockStyle: 'fenced',
9
+ fence: '```',
10
+ hr: '---',
11
+ bulletListMarker: '-',
12
+ preformattedCode: true,
13
+ });
14
+ export function sanitize(html) {
15
+ const inputSize = html.length;
16
+ // Step 1: Parse HTML with cheerio (htmlparser2 backend via /slim)
17
+ const $ = cheerio.load(html);
18
+ // Step 2: Strip hidden HTML elements
19
+ const htmlResult = sanitizeHtml($);
20
+ // Step 3: Convert cleaned HTML to markdown
21
+ let content = turndown.turndown(htmlResult.html);
22
+ // Step 4: Unicode sanitization
23
+ const unicodeResult = sanitizeUnicode(content);
24
+ content = unicodeResult.text;
25
+ // Step 5: Strip fake LLM delimiters
26
+ const delimiterResult = sanitizeDelimiters(content);
27
+ content = delimiterResult.text;
28
+ const outputSize = content.length;
29
+ return {
30
+ content,
31
+ stats: {
32
+ ...htmlResult.stats,
33
+ ...unicodeResult.stats,
34
+ ...delimiterResult.stats,
35
+ },
36
+ inputSize,
37
+ outputSize,
38
+ };
39
+ }
@@ -0,0 +1,11 @@
1
+ export interface UnicodeSanitizeResult {
2
+ text: string;
3
+ stats: {
4
+ zeroWidthChars: number;
5
+ controlChars: number;
6
+ bidiOverrides: number;
7
+ unicodeTags: number;
8
+ variationSelectors: number;
9
+ };
10
+ }
11
+ export declare function sanitizeUnicode(text: string): UnicodeSanitizeResult;
@@ -0,0 +1,35 @@
1
+ // Zero-width and invisible characters
2
+ const INVISIBLE_CHARS = /[\u200B\u200C\u200D\u200E\u200F\u2060\u2063\uFEFF\u00AD]/g;
3
+ // Bidirectional overrides and isolates
4
+ const BIDI_CHARS = /[\u202A-\u202E\u2066-\u2069]/g;
5
+ // Variation selectors
6
+ const VARIATION_SELECTORS = /[\uFE00-\uFE0F]/g;
7
+ // Unicode tag characters (U+E0001-U+E007F)
8
+ const UNICODE_TAGS = /[\u{E0001}-\u{E007F}]/gu;
9
+ // Control characters (except \n \t \r)
10
+ const CONTROL_CHARS = /[\x00-\x08\x0B\x0C\x0E-\x1F]/g;
11
+ export function sanitizeUnicode(text) {
12
+ const stats = {
13
+ zeroWidthChars: 0,
14
+ controlChars: 0,
15
+ bidiOverrides: 0,
16
+ unicodeTags: 0,
17
+ variationSelectors: 0,
18
+ };
19
+ // Count before stripping
20
+ stats.zeroWidthChars = (text.match(INVISIBLE_CHARS) || []).length;
21
+ stats.bidiOverrides = (text.match(BIDI_CHARS) || []).length;
22
+ stats.variationSelectors = (text.match(VARIATION_SELECTORS) || []).length;
23
+ stats.unicodeTags = (text.match(UNICODE_TAGS) || []).length;
24
+ stats.controlChars = (text.match(CONTROL_CHARS) || []).length;
25
+ // Strip all
26
+ let result = text
27
+ .replace(INVISIBLE_CHARS, '')
28
+ .replace(BIDI_CHARS, '')
29
+ .replace(VARIATION_SELECTORS, '')
30
+ .replace(UNICODE_TAGS, '')
31
+ .replace(CONTROL_CHARS, '');
32
+ // NFKC normalization (collapses homoglyphs)
33
+ result = result.normalize('NFKC');
34
+ return { text: result, stats };
35
+ }
@@ -0,0 +1 @@
1
+ export declare function startServer(): Promise<void>;
package/dist/server.js ADDED
@@ -0,0 +1,104 @@
1
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
3
+ import { z } from 'zod';
4
+ import { fetchUrl } from './fetch.js';
5
+ import { sanitize } from './sanitize/pipeline.js';
6
+ import { loadConfig } from './config.js';
7
+ import { logSanitization } from './logger.js';
8
+ export async function startServer() {
9
+ const config = loadConfig();
10
+ const session = {
11
+ totalRequests: 0,
12
+ totalStripped: {
13
+ hiddenElements: 0, htmlComments: 0, scriptTags: 0,
14
+ styleTags: 0, noscriptTags: 0, metaTags: 0,
15
+ zeroWidthChars: 0, controlChars: 0, bidiOverrides: 0,
16
+ unicodeTags: 0, variationSelectors: 0, llmDelimiters: 0,
17
+ },
18
+ urls: [],
19
+ };
20
+ const server = new McpServer({
21
+ name: 'safe-fetch',
22
+ version: '0.1.0',
23
+ });
24
+ server.registerTool('safe_fetch', {
25
+ description: 'Fetch a URL and return sanitized content with prompt injection vectors removed. Strips hidden HTML elements, invisible unicode characters, and fake LLM delimiters.',
26
+ inputSchema: {
27
+ url: z.string().url().describe('URL to fetch'),
28
+ },
29
+ }, async ({ url }) => {
30
+ try {
31
+ const startTime = Date.now();
32
+ const fetched = await fetchUrl(url);
33
+ const result = sanitize(fetched.html);
34
+ const durationMs = Date.now() - startTime;
35
+ // Update session stats
36
+ session.totalRequests++;
37
+ session.urls.push(url);
38
+ for (const key of Object.keys(session.totalStripped)) {
39
+ session.totalStripped[key] += result.stats[key];
40
+ }
41
+ // Log if configured
42
+ if (config.logStripped) {
43
+ const entry = {
44
+ timestamp: new Date().toISOString(),
45
+ url,
46
+ stripped: result.stats,
47
+ inputSize: result.inputSize,
48
+ outputSize: result.outputSize,
49
+ reductionPercent: Math.round((1 - result.outputSize / result.inputSize) * 1000) / 10,
50
+ durationMs,
51
+ };
52
+ logSanitization(config.logFile, entry);
53
+ }
54
+ // Build summary of what was stripped
55
+ const strippedItems = [];
56
+ if (result.stats.hiddenElements > 0)
57
+ strippedItems.push(`${result.stats.hiddenElements} hidden elements`);
58
+ if (result.stats.scriptTags > 0)
59
+ strippedItems.push(`${result.stats.scriptTags} script tags`);
60
+ if (result.stats.styleTags > 0)
61
+ strippedItems.push(`${result.stats.styleTags} style tags`);
62
+ if (result.stats.zeroWidthChars > 0)
63
+ strippedItems.push(`${result.stats.zeroWidthChars} zero-width chars`);
64
+ if (result.stats.llmDelimiters > 0)
65
+ strippedItems.push(`${result.stats.llmDelimiters} LLM delimiters`);
66
+ const header = strippedItems.length > 0
67
+ ? `[safe-fetch] Stripped: ${strippedItems.join(', ')} | ${result.inputSize} → ${result.outputSize} bytes (${durationMs}ms)\n\n`
68
+ : `[safe-fetch] Clean page | ${result.inputSize} → ${result.outputSize} bytes (${durationMs}ms)\n\n`;
69
+ return {
70
+ content: [{ type: 'text', text: header + result.content }],
71
+ };
72
+ }
73
+ catch (error) {
74
+ const message = error instanceof Error ? error.message : String(error);
75
+ return {
76
+ content: [{ type: 'text', text: `[safe-fetch] Error fetching ${url}: ${message}` }],
77
+ isError: true,
78
+ };
79
+ }
80
+ });
81
+ server.registerTool('sanitize_stats', {
82
+ description: 'Show sanitization statistics for the current session',
83
+ inputSchema: {},
84
+ }, async () => {
85
+ const lines = [
86
+ `Session stats (${session.totalRequests} requests):`,
87
+ ` Hidden elements stripped: ${session.totalStripped.hiddenElements}`,
88
+ ` Script tags stripped: ${session.totalStripped.scriptTags}`,
89
+ ` Style tags stripped: ${session.totalStripped.styleTags}`,
90
+ ` Zero-width chars stripped: ${session.totalStripped.zeroWidthChars}`,
91
+ ` LLM delimiters stripped: ${session.totalStripped.llmDelimiters}`,
92
+ ` Bidi overrides stripped: ${session.totalStripped.bidiOverrides}`,
93
+ '',
94
+ `URLs fetched:`,
95
+ ...session.urls.map(u => ` - ${u}`),
96
+ ];
97
+ return {
98
+ content: [{ type: 'text', text: lines.join('\n') }],
99
+ };
100
+ });
101
+ const transport = new StdioServerTransport();
102
+ await server.connect(transport);
103
+ console.error('[safe-fetch] MCP server running on stdio');
104
+ }
package/package.json ADDED
@@ -0,0 +1,45 @@
1
+ {
2
+ "name": "mcp-safe-fetch",
3
+ "version": "0.1.0",
4
+ "description": "Deterministic content sanitization MCP server for agentic coding tools",
5
+ "type": "module",
6
+ "main": "./dist/index.js",
7
+ "bin": {
8
+ "mcp-safe-fetch": "./dist/index.js"
9
+ },
10
+ "files": [
11
+ "dist"
12
+ ],
13
+ "scripts": {
14
+ "build": "tsc",
15
+ "test": "vitest run",
16
+ "test:watch": "vitest",
17
+ "prepublishOnly": "npm run build"
18
+ },
19
+ "keywords": [
20
+ "mcp",
21
+ "sanitize",
22
+ "prompt-injection",
23
+ "claude",
24
+ "llm",
25
+ "security"
26
+ ],
27
+ "author": "Tim Stark <tim@timstark.dev>",
28
+ "license": "MIT",
29
+ "repository": {
30
+ "type": "git",
31
+ "url": "https://github.com/timstarkk/mcp-safe-fetch"
32
+ },
33
+ "dependencies": {
34
+ "@modelcontextprotocol/sdk": "^1.27.0",
35
+ "cheerio": "^1.2.0",
36
+ "turndown": "^7.2.2",
37
+ "zod": "^3.23.0"
38
+ },
39
+ "devDependencies": {
40
+ "@types/node": "^22.0.0",
41
+ "@types/turndown": "^5.0.5",
42
+ "typescript": "^5.7.0",
43
+ "vitest": "^3.0.0"
44
+ }
45
+ }