visus-mcp 0.3.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/.claude/settings.local.json +22 -0
  2. package/LINKEDIN-STRATEGY.md +367 -0
  3. package/README.md +491 -16
  4. package/ROADMAP.md +167 -30
  5. package/SECURITY-AUDIT-v1.md +277 -0
  6. package/STATUS.md +801 -42
  7. package/TROUBLESHOOT-AUTH-20260322-2019.md +291 -0
  8. package/TROUBLESHOOT-JEST-20260323-1357.md +139 -0
  9. package/TROUBLESHOOT-LAMBDA-20260322-1945.md +183 -0
  10. package/VISUS-CLAUDE-CODE-PROMPT.md +1 -1
  11. package/VISUS-PROJECT-PLAN.md +7 -0
  12. package/dist/browser/playwright-renderer.d.ts.map +1 -1
  13. package/dist/browser/playwright-renderer.js +7 -0
  14. package/dist/browser/playwright-renderer.js.map +1 -1
  15. package/dist/browser/reader.d.ts +31 -0
  16. package/dist/browser/reader.d.ts.map +1 -0
  17. package/dist/browser/reader.js +98 -0
  18. package/dist/browser/reader.js.map +1 -0
  19. package/dist/index.d.ts +1 -1
  20. package/dist/index.d.ts.map +1 -1
  21. package/dist/index.js +37 -5
  22. package/dist/index.js.map +1 -1
  23. package/dist/lambda-handler.d.ts +0 -6
  24. package/dist/lambda-handler.d.ts.map +1 -1
  25. package/dist/lambda-handler.js +97 -25
  26. package/dist/lambda-handler.js.map +1 -1
  27. package/dist/sanitizer/framework-mapper.d.ts +22 -0
  28. package/dist/sanitizer/framework-mapper.d.ts.map +1 -0
  29. package/dist/sanitizer/framework-mapper.js +296 -0
  30. package/dist/sanitizer/framework-mapper.js.map +1 -0
  31. package/dist/sanitizer/index.d.ts +2 -0
  32. package/dist/sanitizer/index.d.ts.map +1 -1
  33. package/dist/sanitizer/index.js +14 -1
  34. package/dist/sanitizer/index.js.map +1 -1
  35. package/dist/sanitizer/patterns.js +1 -1
  36. package/dist/sanitizer/patterns.js.map +1 -1
  37. package/dist/sanitizer/severity-classifier.d.ts +33 -0
  38. package/dist/sanitizer/severity-classifier.d.ts.map +1 -0
  39. package/dist/sanitizer/severity-classifier.js +113 -0
  40. package/dist/sanitizer/severity-classifier.js.map +1 -0
  41. package/dist/sanitizer/threat-reporter.d.ts +65 -0
  42. package/dist/sanitizer/threat-reporter.d.ts.map +1 -0
  43. package/dist/sanitizer/threat-reporter.js +160 -0
  44. package/dist/sanitizer/threat-reporter.js.map +1 -0
  45. package/dist/tools/fetch-structured.d.ts +5 -0
  46. package/dist/tools/fetch-structured.d.ts.map +1 -1
  47. package/dist/tools/fetch-structured.js +54 -6
  48. package/dist/tools/fetch-structured.js.map +1 -1
  49. package/dist/tools/fetch.d.ts +5 -0
  50. package/dist/tools/fetch.d.ts.map +1 -1
  51. package/dist/tools/fetch.js +42 -9
  52. package/dist/tools/fetch.js.map +1 -1
  53. package/dist/tools/read.d.ts +51 -0
  54. package/dist/tools/read.d.ts.map +1 -0
  55. package/dist/tools/read.js +127 -0
  56. package/dist/tools/read.js.map +1 -0
  57. package/dist/tools/search.d.ts +45 -0
  58. package/dist/tools/search.d.ts.map +1 -0
  59. package/dist/tools/search.js +220 -0
  60. package/dist/tools/search.js.map +1 -0
  61. package/dist/types.d.ts +64 -0
  62. package/dist/types.d.ts.map +1 -1
  63. package/dist/types.js.map +1 -1
  64. package/dist/utils/format-converter.d.ts +39 -0
  65. package/dist/utils/format-converter.d.ts.map +1 -0
  66. package/dist/utils/format-converter.js +191 -0
  67. package/dist/utils/format-converter.js.map +1 -0
  68. package/dist/utils/truncate.d.ts +26 -0
  69. package/dist/utils/truncate.d.ts.map +1 -0
  70. package/dist/utils/truncate.js +54 -0
  71. package/dist/utils/truncate.js.map +1 -0
  72. package/infrastructure/stack.ts +55 -6
  73. package/jest.config.js +3 -0
  74. package/package.json +9 -2
  75. package/src/browser/playwright-renderer.ts +8 -0
  76. package/src/browser/reader.ts +129 -0
  77. package/src/index.ts +49 -5
  78. package/src/lambda-handler.ts +131 -26
  79. package/src/sanitizer/framework-mapper.ts +347 -0
  80. package/src/sanitizer/index.ts +18 -1
  81. package/src/sanitizer/patterns.ts +1 -1
  82. package/src/sanitizer/severity-classifier.ts +132 -0
  83. package/src/sanitizer/threat-reporter.ts +261 -0
  84. package/src/tools/fetch-structured.ts +58 -6
  85. package/src/tools/fetch.ts +44 -9
  86. package/src/tools/read.ts +143 -0
  87. package/src/tools/search.ts +263 -0
  88. package/src/types.ts +69 -0
  89. package/src/utils/format-converter.ts +236 -0
  90. package/src/utils/truncate.ts +64 -0
  91. package/tests/auth-smoke.test.ts +480 -0
  92. package/tests/fetch-tool.test.ts +595 -2
  93. package/tests/reader.test.ts +353 -0
  94. package/tests/sanitizer.test.ts +52 -0
  95. package/tests/search.test.ts +456 -0
  96. package/tests/threat-reporter.test.ts +266 -0
@@ -0,0 +1,261 @@
1
+ /**
2
+ * Threat Reporter
3
+ *
4
+ * Generates structured threat reports when prompt injection or PII is detected.
5
+ * Two output layers:
6
+ * 1. TOON-formatted findings array (token-efficient, machine-readable)
7
+ * 2. Markdown compliance report block (human-readable, renders in Claude Desktop)
8
+ *
9
+ * Aligned with:
10
+ * - OWASP LLM Top 10 (2025)
11
+ * - NIST AI 600-1 (Generative AI Profile)
12
+ * - MITRE ATLAS (Adversarial Threat Landscape)
13
+ */
14
+
15
+ import {
16
+ classifySeverity,
17
+ aggregateSeverity,
18
+ countBySeverity,
19
+ getSeverityEmoji,
20
+ type Severity,
21
+ type OverallSeverity,
22
+ type Finding as SeverityFinding
23
+ } from './severity-classifier.js';
24
+ import { getFrameworkMappings } from './framework-mapper.js';
25
+
26
+ /**
27
+ * Threat finding with compliance framework mappings
28
+ */
29
+ export interface ThreatFinding {
30
+ id: number;
31
+ pattern_id: string;
32
+ category: string;
33
+ severity: Severity;
34
+ confidence: number;
35
+ owasp_llm: string;
36
+ nist_ai_600_1: string;
37
+ mitre_atlas: string;
38
+ remediation: string;
39
+ }
40
+
41
+ /**
42
+ * Threat report structure
43
+ */
44
+ export interface ThreatReport {
45
+ generated: string;
46
+ source_url: string;
47
+ overall_severity: OverallSeverity;
48
+ total_findings: number;
49
+ by_severity: Record<Severity, number>;
50
+ pii_redacted: number;
51
+ sanitization_applied: boolean;
52
+ frameworks: string[];
53
+ findings_toon: string;
54
+ report_markdown: string;
55
+ }
56
+
57
+ /**
58
+ * Input to threat reporter
59
+ */
60
+ export interface ThreatReportInput {
61
+ patterns_detected: string[];
62
+ pii_redacted: number;
63
+ source_url: string;
64
+ timestamp?: string;
65
+ detections_by_severity?: {
66
+ critical: number;
67
+ high: number;
68
+ medium: number;
69
+ low: number;
70
+ };
71
+ }
72
+
73
+ /**
74
+ * Generate pattern ID from category name
75
+ * Format: PI-XXX where XXX is a zero-padded number based on hash
76
+ */
77
+ function generatePatternId(category: string): string {
78
+ // Simple hash to generate consistent IDs
79
+ let hash = 0;
80
+ for (let i = 0; i < category.length; i++) {
81
+ hash = ((hash << 5) - hash) + category.charCodeAt(i);
82
+ hash = hash & hash;
83
+ }
84
+ const id = Math.abs(hash) % 1000;
85
+ return `PI-${String(id).padStart(3, '0')}`;
86
+ }
87
+
88
+ /**
89
+ * Build findings data structure from pattern detections
90
+ */
91
+ function buildFindings(patternsDetected: string[]): ThreatFinding[] {
92
+ return patternsDetected.map((category, index) => {
93
+ const severity = classifySeverity(category);
94
+ const frameworks = getFrameworkMappings(category);
95
+ const patternId = generatePatternId(category);
96
+
97
+ return {
98
+ id: index + 1,
99
+ pattern_id: patternId,
100
+ category,
101
+ severity,
102
+ confidence: 0.95, // Default confidence; can be enhanced later
103
+ owasp_llm: frameworks.owasp_llm,
104
+ nist_ai_600_1: frameworks.nist_ai_600_1,
105
+ mitre_atlas: frameworks.mitre_atlas,
106
+ remediation: `Content sanitized. ${category.replace(/_/g, ' ')} removed.`
107
+ };
108
+ });
109
+ }
110
+
111
+ /**
112
+ * Generate TOON-encoded findings string
113
+ * Using manual TOON format to avoid Jest ESM compatibility issues
114
+ */
115
+ function generateToonFindings(findings: ThreatFinding[]): string {
116
+ if (findings.length === 0) {
117
+ return '';
118
+ }
119
+
120
+ return generateManualToonFormat(findings);
121
+ }
122
+
123
+ /**
124
+ * Fallback manual TOON format generation
125
+ */
126
+ function generateManualToonFormat(findings: ThreatFinding[]): string {
127
+ const header = `findings[${findings.length}]{id,pattern_id,category,severity,confidence,owasp_llm,nist_ai_600_1,mitre_atlas,remediation}:`;
128
+ const rows = findings.map(f =>
129
+ `${f.id},${f.pattern_id},${f.category},${f.severity},${f.confidence},${f.owasp_llm},${f.nist_ai_600_1},${f.mitre_atlas},${f.remediation}`
130
+ );
131
+ return `${header}\n${rows.join('\n')}`;
132
+ }
133
+
134
+ /**
135
+ * Generate Markdown report block
136
+ */
137
+ function generateMarkdownReport(
138
+ findings: ThreatFinding[],
139
+ overallSeverity: OverallSeverity,
140
+ bySeverity: Record<Severity, number>,
141
+ piiRedacted: number,
142
+ sourceUrl: string,
143
+ timestamp: string
144
+ ): string {
145
+ const emoji = getSeverityEmoji(overallSeverity);
146
+
147
+ let markdown = '---\n';
148
+ markdown += `## ${emoji} Visus Threat Report\n`;
149
+ markdown += `**Generated:** ${timestamp}\n`;
150
+ markdown += `**Source:** ${sourceUrl}\n`;
151
+ markdown += `**Overall Severity:** ${overallSeverity}\n`;
152
+ markdown += `**Framework:** OWASP LLM Top 10 | NIST AI 600-1 | MITRE ATLAS\n\n`;
153
+
154
+ // Findings Summary
155
+ markdown += '### Findings Summary\n';
156
+ markdown += '| Severity | Count |\n';
157
+ markdown += '|---|---|\n';
158
+ markdown += `| ${getSeverityEmoji('CRITICAL')} CRITICAL | ${bySeverity.CRITICAL} |\n`;
159
+ markdown += `| ${getSeverityEmoji('HIGH')} HIGH | ${bySeverity.HIGH} |\n`;
160
+ markdown += `| ${getSeverityEmoji('MEDIUM')} MEDIUM | ${bySeverity.MEDIUM} |\n`;
161
+ markdown += `| ${getSeverityEmoji('LOW')} LOW | ${bySeverity.LOW} |\n\n`;
162
+
163
+ // Findings Detail (only if we have findings)
164
+ if (findings.length > 0) {
165
+ markdown += '### Findings Detail\n';
166
+ markdown += '| # | Category | Severity | Confidence | OWASP | MITRE |\n';
167
+ markdown += '|---|---|---|---|---|---|\n';
168
+
169
+ for (const finding of findings.slice(0, 10)) { // Limit to first 10 for readability
170
+ const confidencePct = Math.round(finding.confidence * 100);
171
+ const owaspShort = finding.owasp_llm.split(' - ')[0]; // e.g., "LLM01:2025"
172
+ const mitreShort = finding.mitre_atlas.split(' - ')[0]; // e.g., "AML.T0051.000"
173
+
174
+ markdown += `| ${finding.id} | ${finding.category} | ${finding.severity} | ${confidencePct}% | ${owaspShort} | ${mitreShort} |\n`;
175
+ }
176
+
177
+ if (findings.length > 10) {
178
+ markdown += `\n*...and ${findings.length - 10} more findings*\n`;
179
+ }
180
+ markdown += '\n';
181
+ }
182
+
183
+ // PII Redaction
184
+ if (piiRedacted > 0) {
185
+ markdown += '### PII Redaction\n';
186
+ markdown += `- **Items Redacted:** ${piiRedacted}\n`;
187
+ markdown += `- **Standard:** NIST AI 600-1 MS-2.6\n\n`;
188
+ }
189
+
190
+ // Remediation Status
191
+ markdown += '### Remediation Status\n';
192
+ markdown += '✅ All findings sanitized. Content delivered clean.\n\n';
193
+
194
+ // TODO: PDF export hook for future visus_report tool
195
+ // This is where the PDF generation would be triggered in Phase 3
196
+
197
+ markdown += '*Report generated by Visus MCP — Security-first web access for Claude*\n';
198
+ markdown += '---\n';
199
+
200
+ return markdown;
201
+ }
202
+
203
+ /**
204
+ * Generate threat report (main entry point)
205
+ *
206
+ * Returns null if no findings (injections_removed === 0 AND pii_redacted === 0)
207
+ */
208
+ export function generateThreatReport(input: ThreatReportInput): ThreatReport | null {
209
+ const {
210
+ patterns_detected,
211
+ pii_redacted,
212
+ source_url,
213
+ timestamp = new Date().toISOString()
214
+ } = input;
215
+
216
+ // Omit threat report if nothing was found
217
+ if (patterns_detected.length === 0 && pii_redacted === 0) {
218
+ return null;
219
+ }
220
+
221
+ // Build findings from detected patterns
222
+ const findings = buildFindings(patterns_detected);
223
+
224
+ // Calculate severity
225
+ const severityFindings: SeverityFinding[] = findings.map(f => ({
226
+ pattern_category: f.category,
227
+ severity: f.severity
228
+ }));
229
+ const overallSeverity = aggregateSeverity(severityFindings);
230
+ const bySeverity = countBySeverity(severityFindings);
231
+
232
+ // Generate TOON findings
233
+ const toonFindings = generateToonFindings(findings);
234
+
235
+ // Generate Markdown report
236
+ const markdownReport = generateMarkdownReport(
237
+ findings,
238
+ overallSeverity,
239
+ bySeverity,
240
+ pii_redacted,
241
+ source_url,
242
+ timestamp
243
+ );
244
+
245
+ return {
246
+ generated: timestamp,
247
+ source_url,
248
+ overall_severity: overallSeverity,
249
+ total_findings: findings.length,
250
+ by_severity: bySeverity,
251
+ pii_redacted,
252
+ sanitization_applied: true,
253
+ frameworks: [
254
+ 'OWASP LLM Top 10',
255
+ 'NIST AI 600-1',
256
+ 'MITRE ATLAS'
257
+ ],
258
+ findings_toon: toonFindings,
259
+ report_markdown: markdownReport
260
+ };
261
+ }
@@ -10,6 +10,8 @@
10
10
  import * as cheerio from 'cheerio';
11
11
  import { renderPage } from '../browser/playwright-renderer.js';
12
12
  import { sanitize } from '../sanitizer/index.js';
13
+ import { truncateContent } from '../utils/truncate.js';
14
+ import { generateThreatReport } from '../sanitizer/threat-reporter.js';
13
15
  import type { VisusFetchStructuredInput, VisusFetchStructuredOutput, Result } from '../types.js';
14
16
  import { Err } from '../types.js';
15
17
 
@@ -151,10 +153,49 @@ export async function visusFetchStructured(
151
153
  }
152
154
  }
153
155
 
154
- // Step 4: Build output
156
+ // Step 4: Apply token ceiling truncation to combined data (AFTER sanitization)
157
+ // Combine all field values to check total content size
158
+ const combinedData = Object.entries(sanitizedData)
159
+ .map(([key, value]) => `${key}: ${value || 'null'}`)
160
+ .join('\n');
161
+
162
+ const truncationResult = truncateContent(combinedData);
163
+
164
+ // If truncated, we need to reconstruct sanitizedData from truncated content
165
+ let finalData = sanitizedData;
166
+ if (truncationResult.truncated) {
167
+ // Parse truncated content back into fields
168
+ // This is a simple approach - in production you might want more sophisticated handling
169
+ const lines = truncationResult.content.split('\n');
170
+ finalData = {};
171
+ for (const line of lines) {
172
+ if (line.includes(':')) {
173
+ const [key, ...valueParts] = line.split(':');
174
+ const value = valueParts.join(':').trim();
175
+ if (key.trim() in sanitizedData) {
176
+ finalData[key.trim()] = value === 'null' ? null : value;
177
+ }
178
+ }
179
+ }
180
+ // Preserve any missing fields as null
181
+ for (const key of Object.keys(sanitizedData)) {
182
+ if (!(key in finalData)) {
183
+ finalData[key] = null;
184
+ }
185
+ }
186
+ }
187
+
188
+ // Step 5: Generate aggregated threat report
189
+ const threatReport = generateThreatReport({
190
+ patterns_detected: Array.from(allPatternsDetected),
191
+ pii_redacted: Array.from(allPIITypesRedacted).length,
192
+ source_url: url
193
+ });
194
+
195
+ // Step 6: Build output
155
196
  const output: VisusFetchStructuredOutput = {
156
197
  url,
157
- data: sanitizedData,
198
+ data: finalData,
158
199
  sanitization: {
159
200
  patterns_detected: Array.from(allPatternsDetected),
160
201
  pii_types_redacted: Array.from(allPIITypesRedacted),
@@ -168,8 +209,14 @@ export async function visusFetchStructured(
168
209
  content_length_sanitized: Object.values(sanitizedData)
169
210
  .filter(v => v !== null)
170
211
  .join(' ')
171
- .length
172
- }
212
+ .length,
213
+ ...(truncationResult.truncated && {
214
+ truncated: true,
215
+ truncated_at_chars: truncationResult.truncated_at_chars
216
+ })
217
+ },
218
+ // Include threat_report only if findings exist
219
+ ...(threatReport && { threat_report: threatReport })
173
220
  };
174
221
 
175
222
  // Log to stderr if threats detected
@@ -195,7 +242,8 @@ export async function visusFetchStructured(
195
242
  */
196
243
  export const visusFetchStructuredToolDefinition = {
197
244
  name: 'visus_fetch_structured',
198
- description: 'Fetch a web page and extract structured data according to a schema. All extracted fields are automatically sanitized for prompt injection and PII before being returned.',
245
+ title: 'Fetch Structured Data (Sanitized)',
246
+ description: 'Fetch a web page and extract structured data according to a schema. SECURITY: All extracted fields pass through prompt injection sanitization (43 pattern categories) and PII redaction BEFORE being returned to the LLM. Each field is independently sanitized to ensure safe consumption of untrusted web content.',
199
247
  inputSchema: {
200
248
  type: 'object',
201
249
  properties: {
@@ -217,5 +265,9 @@ export const visusFetchStructuredToolDefinition = {
217
265
  }
218
266
  },
219
267
  required: ['url', 'schema']
220
- }
268
+ },
269
+ readOnlyHint: true,
270
+ destructiveHint: false,
271
+ idempotentHint: true,
272
+ openWorldHint: true
221
273
  };
@@ -8,6 +8,8 @@
8
8
 
9
9
  import { renderPage } from '../browser/playwright-renderer.js';
10
10
  import { sanitize } from '../sanitizer/index.js';
11
+ import { truncateContent } from '../utils/truncate.js';
12
+ import { detectFormat, convertJson, convertXml, convertRss } from '../utils/format-converter.js';
11
13
  import type { VisusFetchInput, VisusFetchOutput, Result } from '../types.js';
12
14
  import { Err } from '../types.js';
13
15
 
@@ -36,17 +38,37 @@ export async function visusFetch(input: VisusFetchInput): Promise<Result<VisusFe
36
38
  return Err(renderResult.error);
37
39
  }
38
40
 
39
- const { html, title } = renderResult.value;
41
+ const { html, title, contentType } = renderResult.value;
40
42
  const rawContent = html || '';
41
43
 
42
- // Step 2: CRITICAL - Sanitize content (injection detection + PII redaction with allowlisting)
44
+ // Step 2: Detect format and apply format-appropriate conversion
45
+ const detectedContentType = contentType || 'text/html';
46
+ const formatType = detectFormat(detectedContentType);
47
+
48
+ let processedContent = rawContent;
49
+
50
+ // Apply format-specific conversion (skip Readability for non-HTML)
51
+ if (formatType === 'json') {
52
+ processedContent = convertJson(rawContent);
53
+ } else if (formatType === 'xml') {
54
+ processedContent = convertXml(rawContent);
55
+ } else if (formatType === 'rss') {
56
+ processedContent = convertRss(rawContent);
57
+ }
58
+ // For 'html' format, processedContent remains as rawContent
59
+
60
+ // Step 3: CRITICAL - Sanitize content (injection detection + PII redaction with allowlisting)
43
61
  // This step CANNOT be skipped or bypassed
44
- const sanitizationResult = sanitize(rawContent, url);
62
+ const sanitizationResult = sanitize(processedContent, url);
45
63
 
46
- // Step 3: Build output
64
+ // Step 3: Apply token ceiling truncation (AFTER sanitization)
65
+ // Anthropic MCP Directory enforces 25,000 token response limit
66
+ const truncationResult = truncateContent(sanitizationResult.content);
67
+
68
+ // Step 4: Build output
47
69
  const output: VisusFetchOutput = {
48
70
  url,
49
- content: sanitizationResult.content,
71
+ content: truncationResult.content,
50
72
  sanitization: {
51
73
  patterns_detected: sanitizationResult.sanitization.patterns_detected,
52
74
  pii_types_redacted: sanitizationResult.sanitization.pii_types_redacted,
@@ -57,8 +79,16 @@ export async function visusFetch(input: VisusFetchInput): Promise<Result<VisusFe
57
79
  title: title || 'Untitled',
58
80
  fetched_at: new Date().toISOString(),
59
81
  content_length_original: sanitizationResult.metadata.original_length,
60
- content_length_sanitized: sanitizationResult.metadata.sanitized_length
61
- }
82
+ content_length_sanitized: sanitizationResult.metadata.sanitized_length,
83
+ format_detected: formatType,
84
+ content_type: detectedContentType,
85
+ ...(truncationResult.truncated && {
86
+ truncated: true,
87
+ truncated_at_chars: truncationResult.truncated_at_chars
88
+ })
89
+ },
90
+ // Include threat_report only if findings exist
91
+ ...(sanitizationResult.threat_report && { threat_report: sanitizationResult.threat_report })
62
92
  };
63
93
 
64
94
  // Log to stderr if critical threats detected
@@ -84,7 +114,8 @@ export async function visusFetch(input: VisusFetchInput): Promise<Result<VisusFe
84
114
  */
85
115
  export const visusFetchToolDefinition = {
86
116
  name: 'visus_fetch',
87
- description: 'Fetch and sanitize web page content. Returns clean, injection-free content in markdown or text format. All content is automatically scanned for prompt injection patterns and PII before being returned.',
117
+ title: 'Fetch Web Page (Sanitized)',
118
+ description: 'Fetch and sanitize web page content. Returns clean, injection-free content in markdown or text format. SECURITY: All content passes through prompt injection sanitization (43 pattern categories) and PII redaction BEFORE reaching the LLM. This ensures safe consumption of untrusted web content.',
88
119
  inputSchema: {
89
120
  type: 'object',
90
121
  properties: {
@@ -105,5 +136,9 @@ export const visusFetchToolDefinition = {
105
136
  }
106
137
  },
107
138
  required: ['url']
108
- }
139
+ },
140
+ readOnlyHint: true,
141
+ destructiveHint: false,
142
+ idempotentHint: true,
143
+ openWorldHint: true
109
144
  };
@@ -0,0 +1,143 @@
1
+ /**
2
+ * visus_read MCP Tool
3
+ *
4
+ * Extracts clean article content from a web page using Mozilla Readability,
5
+ * stripping navigation, ads, and boilerplate. Full prompt injection sanitization
6
+ * and PII redaction applied before content reaches the LLM.
7
+ *
8
+ * CRITICAL: ALL content MUST pass through the sanitizer. This cannot be bypassed.
9
+ *
10
+ * Pipeline order:
11
+ * 1. Playwright renders page (full JS execution)
12
+ * 2. Reader extracts main content (reduces input size)
13
+ * 3. Sanitizer runs on clean text
14
+ * 4. Token ceiling applied (24,000 token cap)
15
+ */
16
+
17
+ import { renderPage } from '../browser/playwright-renderer.js';
18
+ import { extractArticle } from '../browser/reader.js';
19
+ import { sanitize } from '../sanitizer/index.js';
20
+ import { truncateContent } from '../utils/truncate.js';
21
+ import type { VisusReadInput, VisusReadOutput, Result } from '../types.js';
22
+ import { Err } from '../types.js';
23
+
24
+ /**
25
+ * visus_read tool implementation
26
+ *
27
+ * @param input Tool input parameters
28
+ * @returns Sanitized article content with metadata
29
+ */
30
+ export async function visusRead(input: VisusReadInput): Promise<Result<VisusReadOutput, Error>> {
31
+ const { url, timeout_ms = 10000 } = input;
32
+
33
+ // Validate inputs
34
+ if (!url || typeof url !== 'string') {
35
+ return Err(new Error('Invalid input: url must be a non-empty string'));
36
+ }
37
+
38
+ try {
39
+ // Step 1: Render the page using Playwright
40
+ const renderResult = await renderPage(url, {
41
+ timeout_ms,
42
+ format: 'html'
43
+ });
44
+
45
+ if (!renderResult.ok) {
46
+ return Err(renderResult.error);
47
+ }
48
+
49
+ const { html, title: pageTitle } = renderResult.value;
50
+
51
+ // Step 2: Extract article content using Readability
52
+ const readerResult = extractArticle(html, url);
53
+
54
+ if (!readerResult.ok) {
55
+ return Err(readerResult.error);
56
+ }
57
+
58
+ const article = readerResult.value;
59
+
60
+ // Step 3: CRITICAL - Sanitize content (injection detection + PII redaction)
61
+ // Sanitization runs AFTER Readability, not before
62
+ // This step CANNOT be skipped or bypassed
63
+ const sanitizationResult = sanitize(article.content, url);
64
+
65
+ // Step 4: Apply token ceiling truncation (AFTER sanitization)
66
+ // Anthropic MCP Directory enforces 25,000 token response limit
67
+ const truncationResult = truncateContent(sanitizationResult.content);
68
+
69
+ // Step 5: Build output
70
+ const output: VisusReadOutput = {
71
+ url,
72
+ content: truncationResult.content,
73
+ metadata: {
74
+ title: article.title || pageTitle || 'Untitled',
75
+ author: article.byline,
76
+ published: article.publishedTime,
77
+ word_count: article.wordCount,
78
+ reader_mode_available: article.readerModeAvailable,
79
+ sanitized: true,
80
+ injections_removed: sanitizationResult.sanitization.patterns_detected.length,
81
+ pii_redacted: sanitizationResult.sanitization.pii_types_redacted.length,
82
+ truncated: truncationResult.truncated,
83
+ fetched_at: new Date().toISOString()
84
+ },
85
+ // Include threat_report only if findings exist
86
+ ...(sanitizationResult.threat_report && { threat_report: sanitizationResult.threat_report })
87
+ };
88
+
89
+ // Log to stderr if critical threats detected
90
+ if (sanitizationResult.metadata.has_critical_threats) {
91
+ console.error(JSON.stringify({
92
+ timestamp: new Date().toISOString(),
93
+ event: 'reader_critical_threat_detected',
94
+ url,
95
+ patterns: sanitizationResult.sanitization.patterns_detected,
96
+ severity_score: sanitizationResult.metadata.severity_score
97
+ }));
98
+ }
99
+
100
+ // Log to stderr if reader mode failed (non-article page)
101
+ if (!article.readerModeAvailable) {
102
+ console.error(JSON.stringify({
103
+ timestamp: new Date().toISOString(),
104
+ event: 'reader_mode_fallback',
105
+ url,
106
+ reason: 'Readability could not extract article structure'
107
+ }));
108
+ }
109
+
110
+ return { ok: true, value: output };
111
+
112
+ } catch (error) {
113
+ return Err(error instanceof Error ? error : new Error(String(error)));
114
+ }
115
+ }
116
+
117
+ /**
118
+ * MCP tool definition for registration
119
+ */
120
+ export const visusReadToolDefinition = {
121
+ name: 'visus_read',
122
+ title: 'Read Web Page (Reader Mode + Sanitized)',
123
+ description: 'Extracts clean article content from a web page using Mozilla Readability, stripping navigation, ads, and boilerplate. Full prompt injection sanitization and PII redaction applied before content reaches the LLM. Optimized for context-efficient, safe web reading in Claude Desktop.',
124
+ inputSchema: {
125
+ type: 'object',
126
+ properties: {
127
+ url: {
128
+ type: 'string',
129
+ description: 'The URL to fetch (must be http:// or https://)'
130
+ },
131
+ timeout_ms: {
132
+ type: 'number',
133
+ description: 'Request timeout in milliseconds (default: 10000)',
134
+ default: 10000
135
+ }
136
+ },
137
+ required: ['url']
138
+ },
139
+ readOnlyHint: true,
140
+ destructiveHint: false,
141
+ idempotentHint: true,
142
+ openWorldHint: true
143
+ };