visus-mcp 0.3.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +22 -0
- package/LINKEDIN-STRATEGY.md +367 -0
- package/README.md +491 -16
- package/ROADMAP.md +167 -30
- package/SECURITY-AUDIT-v1.md +277 -0
- package/STATUS.md +801 -42
- package/TROUBLESHOOT-AUTH-20260322-2019.md +291 -0
- package/TROUBLESHOOT-JEST-20260323-1357.md +139 -0
- package/TROUBLESHOOT-LAMBDA-20260322-1945.md +183 -0
- package/VISUS-CLAUDE-CODE-PROMPT.md +1 -1
- package/VISUS-PROJECT-PLAN.md +7 -0
- package/dist/browser/playwright-renderer.d.ts.map +1 -1
- package/dist/browser/playwright-renderer.js +7 -0
- package/dist/browser/playwright-renderer.js.map +1 -1
- package/dist/browser/reader.d.ts +31 -0
- package/dist/browser/reader.d.ts.map +1 -0
- package/dist/browser/reader.js +98 -0
- package/dist/browser/reader.js.map +1 -0
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +37 -5
- package/dist/index.js.map +1 -1
- package/dist/lambda-handler.d.ts +0 -6
- package/dist/lambda-handler.d.ts.map +1 -1
- package/dist/lambda-handler.js +97 -25
- package/dist/lambda-handler.js.map +1 -1
- package/dist/sanitizer/framework-mapper.d.ts +22 -0
- package/dist/sanitizer/framework-mapper.d.ts.map +1 -0
- package/dist/sanitizer/framework-mapper.js +296 -0
- package/dist/sanitizer/framework-mapper.js.map +1 -0
- package/dist/sanitizer/index.d.ts +2 -0
- package/dist/sanitizer/index.d.ts.map +1 -1
- package/dist/sanitizer/index.js +14 -1
- package/dist/sanitizer/index.js.map +1 -1
- package/dist/sanitizer/patterns.js +1 -1
- package/dist/sanitizer/patterns.js.map +1 -1
- package/dist/sanitizer/severity-classifier.d.ts +33 -0
- package/dist/sanitizer/severity-classifier.d.ts.map +1 -0
- package/dist/sanitizer/severity-classifier.js +113 -0
- package/dist/sanitizer/severity-classifier.js.map +1 -0
- package/dist/sanitizer/threat-reporter.d.ts +65 -0
- package/dist/sanitizer/threat-reporter.d.ts.map +1 -0
- package/dist/sanitizer/threat-reporter.js +160 -0
- package/dist/sanitizer/threat-reporter.js.map +1 -0
- package/dist/tools/fetch-structured.d.ts +5 -0
- package/dist/tools/fetch-structured.d.ts.map +1 -1
- package/dist/tools/fetch-structured.js +54 -6
- package/dist/tools/fetch-structured.js.map +1 -1
- package/dist/tools/fetch.d.ts +5 -0
- package/dist/tools/fetch.d.ts.map +1 -1
- package/dist/tools/fetch.js +42 -9
- package/dist/tools/fetch.js.map +1 -1
- package/dist/tools/read.d.ts +51 -0
- package/dist/tools/read.d.ts.map +1 -0
- package/dist/tools/read.js +127 -0
- package/dist/tools/read.js.map +1 -0
- package/dist/tools/search.d.ts +45 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +220 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/types.d.ts +64 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/format-converter.d.ts +39 -0
- package/dist/utils/format-converter.d.ts.map +1 -0
- package/dist/utils/format-converter.js +191 -0
- package/dist/utils/format-converter.js.map +1 -0
- package/dist/utils/truncate.d.ts +26 -0
- package/dist/utils/truncate.d.ts.map +1 -0
- package/dist/utils/truncate.js +54 -0
- package/dist/utils/truncate.js.map +1 -0
- package/infrastructure/stack.ts +55 -6
- package/jest.config.js +3 -0
- package/package.json +9 -2
- package/src/browser/playwright-renderer.ts +8 -0
- package/src/browser/reader.ts +129 -0
- package/src/index.ts +49 -5
- package/src/lambda-handler.ts +131 -26
- package/src/sanitizer/framework-mapper.ts +347 -0
- package/src/sanitizer/index.ts +18 -1
- package/src/sanitizer/patterns.ts +1 -1
- package/src/sanitizer/severity-classifier.ts +132 -0
- package/src/sanitizer/threat-reporter.ts +261 -0
- package/src/tools/fetch-structured.ts +58 -6
- package/src/tools/fetch.ts +44 -9
- package/src/tools/read.ts +143 -0
- package/src/tools/search.ts +263 -0
- package/src/types.ts +69 -0
- package/src/utils/format-converter.ts +236 -0
- package/src/utils/truncate.ts +64 -0
- package/tests/auth-smoke.test.ts +480 -0
- package/tests/fetch-tool.test.ts +595 -2
- package/tests/reader.test.ts +353 -0
- package/tests/sanitizer.test.ts +52 -0
- package/tests/search.test.ts +456 -0
- package/tests/threat-reporter.test.ts +266 -0
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Threat Reporter
|
|
3
|
+
*
|
|
4
|
+
* Generates structured threat reports when prompt injection or PII is detected.
|
|
5
|
+
* Two output layers:
|
|
6
|
+
* 1. TOON-formatted findings array (token-efficient, machine-readable)
|
|
7
|
+
* 2. Markdown compliance report block (human-readable, renders in Claude Desktop)
|
|
8
|
+
*
|
|
9
|
+
* Aligned with:
|
|
10
|
+
* - OWASP LLM Top 10 (2025)
|
|
11
|
+
* - NIST AI 600-1 (Generative AI Profile)
|
|
12
|
+
* - MITRE ATLAS (Adversarial Threat Landscape)
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import {
|
|
16
|
+
classifySeverity,
|
|
17
|
+
aggregateSeverity,
|
|
18
|
+
countBySeverity,
|
|
19
|
+
getSeverityEmoji,
|
|
20
|
+
type Severity,
|
|
21
|
+
type OverallSeverity,
|
|
22
|
+
type Finding as SeverityFinding
|
|
23
|
+
} from './severity-classifier.js';
|
|
24
|
+
import { getFrameworkMappings } from './framework-mapper.js';
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Threat finding with compliance framework mappings
|
|
28
|
+
*/
|
|
29
|
+
export interface ThreatFinding {
|
|
30
|
+
id: number;
|
|
31
|
+
pattern_id: string;
|
|
32
|
+
category: string;
|
|
33
|
+
severity: Severity;
|
|
34
|
+
confidence: number;
|
|
35
|
+
owasp_llm: string;
|
|
36
|
+
nist_ai_600_1: string;
|
|
37
|
+
mitre_atlas: string;
|
|
38
|
+
remediation: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Threat report structure
|
|
43
|
+
*/
|
|
44
|
+
export interface ThreatReport {
|
|
45
|
+
generated: string;
|
|
46
|
+
source_url: string;
|
|
47
|
+
overall_severity: OverallSeverity;
|
|
48
|
+
total_findings: number;
|
|
49
|
+
by_severity: Record<Severity, number>;
|
|
50
|
+
pii_redacted: number;
|
|
51
|
+
sanitization_applied: boolean;
|
|
52
|
+
frameworks: string[];
|
|
53
|
+
findings_toon: string;
|
|
54
|
+
report_markdown: string;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Input to threat reporter
|
|
59
|
+
*/
|
|
60
|
+
export interface ThreatReportInput {
|
|
61
|
+
patterns_detected: string[];
|
|
62
|
+
pii_redacted: number;
|
|
63
|
+
source_url: string;
|
|
64
|
+
timestamp?: string;
|
|
65
|
+
detections_by_severity?: {
|
|
66
|
+
critical: number;
|
|
67
|
+
high: number;
|
|
68
|
+
medium: number;
|
|
69
|
+
low: number;
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Generate pattern ID from category name
|
|
75
|
+
* Format: PI-XXX where XXX is a zero-padded number based on hash
|
|
76
|
+
*/
|
|
77
|
+
function generatePatternId(category: string): string {
|
|
78
|
+
// Simple hash to generate consistent IDs
|
|
79
|
+
let hash = 0;
|
|
80
|
+
for (let i = 0; i < category.length; i++) {
|
|
81
|
+
hash = ((hash << 5) - hash) + category.charCodeAt(i);
|
|
82
|
+
hash = hash & hash;
|
|
83
|
+
}
|
|
84
|
+
const id = Math.abs(hash) % 1000;
|
|
85
|
+
return `PI-${String(id).padStart(3, '0')}`;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Build findings data structure from pattern detections
|
|
90
|
+
*/
|
|
91
|
+
function buildFindings(patternsDetected: string[]): ThreatFinding[] {
|
|
92
|
+
return patternsDetected.map((category, index) => {
|
|
93
|
+
const severity = classifySeverity(category);
|
|
94
|
+
const frameworks = getFrameworkMappings(category);
|
|
95
|
+
const patternId = generatePatternId(category);
|
|
96
|
+
|
|
97
|
+
return {
|
|
98
|
+
id: index + 1,
|
|
99
|
+
pattern_id: patternId,
|
|
100
|
+
category,
|
|
101
|
+
severity,
|
|
102
|
+
confidence: 0.95, // Default confidence; can be enhanced later
|
|
103
|
+
owasp_llm: frameworks.owasp_llm,
|
|
104
|
+
nist_ai_600_1: frameworks.nist_ai_600_1,
|
|
105
|
+
mitre_atlas: frameworks.mitre_atlas,
|
|
106
|
+
remediation: `Content sanitized. ${category.replace(/_/g, ' ')} removed.`
|
|
107
|
+
};
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Generate TOON-encoded findings string
|
|
113
|
+
* Using manual TOON format to avoid Jest ESM compatibility issues
|
|
114
|
+
*/
|
|
115
|
+
function generateToonFindings(findings: ThreatFinding[]): string {
|
|
116
|
+
if (findings.length === 0) {
|
|
117
|
+
return '';
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return generateManualToonFormat(findings);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Fallback manual TOON format generation
|
|
125
|
+
*/
|
|
126
|
+
function generateManualToonFormat(findings: ThreatFinding[]): string {
|
|
127
|
+
const header = `findings[${findings.length}]{id,pattern_id,category,severity,confidence,owasp_llm,nist_ai_600_1,mitre_atlas,remediation}:`;
|
|
128
|
+
const rows = findings.map(f =>
|
|
129
|
+
`${f.id},${f.pattern_id},${f.category},${f.severity},${f.confidence},${f.owasp_llm},${f.nist_ai_600_1},${f.mitre_atlas},${f.remediation}`
|
|
130
|
+
);
|
|
131
|
+
return `${header}\n${rows.join('\n')}`;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Generate Markdown report block
|
|
136
|
+
*/
|
|
137
|
+
function generateMarkdownReport(
|
|
138
|
+
findings: ThreatFinding[],
|
|
139
|
+
overallSeverity: OverallSeverity,
|
|
140
|
+
bySeverity: Record<Severity, number>,
|
|
141
|
+
piiRedacted: number,
|
|
142
|
+
sourceUrl: string,
|
|
143
|
+
timestamp: string
|
|
144
|
+
): string {
|
|
145
|
+
const emoji = getSeverityEmoji(overallSeverity);
|
|
146
|
+
|
|
147
|
+
let markdown = '---\n';
|
|
148
|
+
markdown += `## ${emoji} Visus Threat Report\n`;
|
|
149
|
+
markdown += `**Generated:** ${timestamp}\n`;
|
|
150
|
+
markdown += `**Source:** ${sourceUrl}\n`;
|
|
151
|
+
markdown += `**Overall Severity:** ${overallSeverity}\n`;
|
|
152
|
+
markdown += `**Framework:** OWASP LLM Top 10 | NIST AI 600-1 | MITRE ATLAS\n\n`;
|
|
153
|
+
|
|
154
|
+
// Findings Summary
|
|
155
|
+
markdown += '### Findings Summary\n';
|
|
156
|
+
markdown += '| Severity | Count |\n';
|
|
157
|
+
markdown += '|---|---|\n';
|
|
158
|
+
markdown += `| ${getSeverityEmoji('CRITICAL')} CRITICAL | ${bySeverity.CRITICAL} |\n`;
|
|
159
|
+
markdown += `| ${getSeverityEmoji('HIGH')} HIGH | ${bySeverity.HIGH} |\n`;
|
|
160
|
+
markdown += `| ${getSeverityEmoji('MEDIUM')} MEDIUM | ${bySeverity.MEDIUM} |\n`;
|
|
161
|
+
markdown += `| ${getSeverityEmoji('LOW')} LOW | ${bySeverity.LOW} |\n\n`;
|
|
162
|
+
|
|
163
|
+
// Findings Detail (only if we have findings)
|
|
164
|
+
if (findings.length > 0) {
|
|
165
|
+
markdown += '### Findings Detail\n';
|
|
166
|
+
markdown += '| # | Category | Severity | Confidence | OWASP | MITRE |\n';
|
|
167
|
+
markdown += '|---|---|---|---|---|---|\n';
|
|
168
|
+
|
|
169
|
+
for (const finding of findings.slice(0, 10)) { // Limit to first 10 for readability
|
|
170
|
+
const confidencePct = Math.round(finding.confidence * 100);
|
|
171
|
+
const owaspShort = finding.owasp_llm.split(' - ')[0]; // e.g., "LLM01:2025"
|
|
172
|
+
const mitreShort = finding.mitre_atlas.split(' - ')[0]; // e.g., "AML.T0051.000"
|
|
173
|
+
|
|
174
|
+
markdown += `| ${finding.id} | ${finding.category} | ${finding.severity} | ${confidencePct}% | ${owaspShort} | ${mitreShort} |\n`;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (findings.length > 10) {
|
|
178
|
+
markdown += `\n*...and ${findings.length - 10} more findings*\n`;
|
|
179
|
+
}
|
|
180
|
+
markdown += '\n';
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// PII Redaction
|
|
184
|
+
if (piiRedacted > 0) {
|
|
185
|
+
markdown += '### PII Redaction\n';
|
|
186
|
+
markdown += `- **Items Redacted:** ${piiRedacted}\n`;
|
|
187
|
+
markdown += `- **Standard:** NIST AI 600-1 MS-2.6\n\n`;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// Remediation Status
|
|
191
|
+
markdown += '### Remediation Status\n';
|
|
192
|
+
markdown += '✅ All findings sanitized. Content delivered clean.\n\n';
|
|
193
|
+
|
|
194
|
+
// TODO: PDF export hook for future visus_report tool
|
|
195
|
+
// This is where the PDF generation would be triggered in Phase 3
|
|
196
|
+
|
|
197
|
+
markdown += '*Report generated by Visus MCP — Security-first web access for Claude*\n';
|
|
198
|
+
markdown += '---\n';
|
|
199
|
+
|
|
200
|
+
return markdown;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Generate threat report (main entry point)
|
|
205
|
+
*
|
|
206
|
+
* Returns null if no findings (injections_removed === 0 AND pii_redacted === 0)
|
|
207
|
+
*/
|
|
208
|
+
export function generateThreatReport(input: ThreatReportInput): ThreatReport | null {
|
|
209
|
+
const {
|
|
210
|
+
patterns_detected,
|
|
211
|
+
pii_redacted,
|
|
212
|
+
source_url,
|
|
213
|
+
timestamp = new Date().toISOString()
|
|
214
|
+
} = input;
|
|
215
|
+
|
|
216
|
+
// Omit threat report if nothing was found
|
|
217
|
+
if (patterns_detected.length === 0 && pii_redacted === 0) {
|
|
218
|
+
return null;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Build findings from detected patterns
|
|
222
|
+
const findings = buildFindings(patterns_detected);
|
|
223
|
+
|
|
224
|
+
// Calculate severity
|
|
225
|
+
const severityFindings: SeverityFinding[] = findings.map(f => ({
|
|
226
|
+
pattern_category: f.category,
|
|
227
|
+
severity: f.severity
|
|
228
|
+
}));
|
|
229
|
+
const overallSeverity = aggregateSeverity(severityFindings);
|
|
230
|
+
const bySeverity = countBySeverity(severityFindings);
|
|
231
|
+
|
|
232
|
+
// Generate TOON findings
|
|
233
|
+
const toonFindings = generateToonFindings(findings);
|
|
234
|
+
|
|
235
|
+
// Generate Markdown report
|
|
236
|
+
const markdownReport = generateMarkdownReport(
|
|
237
|
+
findings,
|
|
238
|
+
overallSeverity,
|
|
239
|
+
bySeverity,
|
|
240
|
+
pii_redacted,
|
|
241
|
+
source_url,
|
|
242
|
+
timestamp
|
|
243
|
+
);
|
|
244
|
+
|
|
245
|
+
return {
|
|
246
|
+
generated: timestamp,
|
|
247
|
+
source_url,
|
|
248
|
+
overall_severity: overallSeverity,
|
|
249
|
+
total_findings: findings.length,
|
|
250
|
+
by_severity: bySeverity,
|
|
251
|
+
pii_redacted,
|
|
252
|
+
sanitization_applied: true,
|
|
253
|
+
frameworks: [
|
|
254
|
+
'OWASP LLM Top 10',
|
|
255
|
+
'NIST AI 600-1',
|
|
256
|
+
'MITRE ATLAS'
|
|
257
|
+
],
|
|
258
|
+
findings_toon: toonFindings,
|
|
259
|
+
report_markdown: markdownReport
|
|
260
|
+
};
|
|
261
|
+
}
|
|
@@ -10,6 +10,8 @@
|
|
|
10
10
|
import * as cheerio from 'cheerio';
|
|
11
11
|
import { renderPage } from '../browser/playwright-renderer.js';
|
|
12
12
|
import { sanitize } from '../sanitizer/index.js';
|
|
13
|
+
import { truncateContent } from '../utils/truncate.js';
|
|
14
|
+
import { generateThreatReport } from '../sanitizer/threat-reporter.js';
|
|
13
15
|
import type { VisusFetchStructuredInput, VisusFetchStructuredOutput, Result } from '../types.js';
|
|
14
16
|
import { Err } from '../types.js';
|
|
15
17
|
|
|
@@ -151,10 +153,49 @@ export async function visusFetchStructured(
|
|
|
151
153
|
}
|
|
152
154
|
}
|
|
153
155
|
|
|
154
|
-
// Step 4:
|
|
156
|
+
// Step 4: Apply token ceiling truncation to combined data (AFTER sanitization)
|
|
157
|
+
// Combine all field values to check total content size
|
|
158
|
+
const combinedData = Object.entries(sanitizedData)
|
|
159
|
+
.map(([key, value]) => `${key}: ${value || 'null'}`)
|
|
160
|
+
.join('\n');
|
|
161
|
+
|
|
162
|
+
const truncationResult = truncateContent(combinedData);
|
|
163
|
+
|
|
164
|
+
// If truncated, we need to reconstruct sanitizedData from truncated content
|
|
165
|
+
let finalData = sanitizedData;
|
|
166
|
+
if (truncationResult.truncated) {
|
|
167
|
+
// Parse truncated content back into fields
|
|
168
|
+
// This is a simple approach - in production you might want more sophisticated handling
|
|
169
|
+
const lines = truncationResult.content.split('\n');
|
|
170
|
+
finalData = {};
|
|
171
|
+
for (const line of lines) {
|
|
172
|
+
if (line.includes(':')) {
|
|
173
|
+
const [key, ...valueParts] = line.split(':');
|
|
174
|
+
const value = valueParts.join(':').trim();
|
|
175
|
+
if (key.trim() in sanitizedData) {
|
|
176
|
+
finalData[key.trim()] = value === 'null' ? null : value;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
// Preserve any missing fields as null
|
|
181
|
+
for (const key of Object.keys(sanitizedData)) {
|
|
182
|
+
if (!(key in finalData)) {
|
|
183
|
+
finalData[key] = null;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Step 5: Generate aggregated threat report
|
|
189
|
+
const threatReport = generateThreatReport({
|
|
190
|
+
patterns_detected: Array.from(allPatternsDetected),
|
|
191
|
+
pii_redacted: Array.from(allPIITypesRedacted).length,
|
|
192
|
+
source_url: url
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
// Step 6: Build output
|
|
155
196
|
const output: VisusFetchStructuredOutput = {
|
|
156
197
|
url,
|
|
157
|
-
data:
|
|
198
|
+
data: finalData,
|
|
158
199
|
sanitization: {
|
|
159
200
|
patterns_detected: Array.from(allPatternsDetected),
|
|
160
201
|
pii_types_redacted: Array.from(allPIITypesRedacted),
|
|
@@ -168,8 +209,14 @@ export async function visusFetchStructured(
|
|
|
168
209
|
content_length_sanitized: Object.values(sanitizedData)
|
|
169
210
|
.filter(v => v !== null)
|
|
170
211
|
.join(' ')
|
|
171
|
-
.length
|
|
172
|
-
|
|
212
|
+
.length,
|
|
213
|
+
...(truncationResult.truncated && {
|
|
214
|
+
truncated: true,
|
|
215
|
+
truncated_at_chars: truncationResult.truncated_at_chars
|
|
216
|
+
})
|
|
217
|
+
},
|
|
218
|
+
// Include threat_report only if findings exist
|
|
219
|
+
...(threatReport && { threat_report: threatReport })
|
|
173
220
|
};
|
|
174
221
|
|
|
175
222
|
// Log to stderr if threats detected
|
|
@@ -195,7 +242,8 @@ export async function visusFetchStructured(
|
|
|
195
242
|
*/
|
|
196
243
|
export const visusFetchStructuredToolDefinition = {
|
|
197
244
|
name: 'visus_fetch_structured',
|
|
198
|
-
|
|
245
|
+
title: 'Fetch Structured Data (Sanitized)',
|
|
246
|
+
description: 'Fetch a web page and extract structured data according to a schema. SECURITY: All extracted fields pass through prompt injection sanitization (43 pattern categories) and PII redaction BEFORE being returned to the LLM. Each field is independently sanitized to ensure safe consumption of untrusted web content.',
|
|
199
247
|
inputSchema: {
|
|
200
248
|
type: 'object',
|
|
201
249
|
properties: {
|
|
@@ -217,5 +265,9 @@ export const visusFetchStructuredToolDefinition = {
|
|
|
217
265
|
}
|
|
218
266
|
},
|
|
219
267
|
required: ['url', 'schema']
|
|
220
|
-
}
|
|
268
|
+
},
|
|
269
|
+
readOnlyHint: true,
|
|
270
|
+
destructiveHint: false,
|
|
271
|
+
idempotentHint: true,
|
|
272
|
+
openWorldHint: true
|
|
221
273
|
};
|
package/src/tools/fetch.ts
CHANGED
|
@@ -8,6 +8,8 @@
|
|
|
8
8
|
|
|
9
9
|
import { renderPage } from '../browser/playwright-renderer.js';
|
|
10
10
|
import { sanitize } from '../sanitizer/index.js';
|
|
11
|
+
import { truncateContent } from '../utils/truncate.js';
|
|
12
|
+
import { detectFormat, convertJson, convertXml, convertRss } from '../utils/format-converter.js';
|
|
11
13
|
import type { VisusFetchInput, VisusFetchOutput, Result } from '../types.js';
|
|
12
14
|
import { Err } from '../types.js';
|
|
13
15
|
|
|
@@ -36,17 +38,37 @@ export async function visusFetch(input: VisusFetchInput): Promise<Result<VisusFe
|
|
|
36
38
|
return Err(renderResult.error);
|
|
37
39
|
}
|
|
38
40
|
|
|
39
|
-
const { html, title } = renderResult.value;
|
|
41
|
+
const { html, title, contentType } = renderResult.value;
|
|
40
42
|
const rawContent = html || '';
|
|
41
43
|
|
|
42
|
-
// Step 2:
|
|
44
|
+
// Step 2: Detect format and apply format-appropriate conversion
|
|
45
|
+
const detectedContentType = contentType || 'text/html';
|
|
46
|
+
const formatType = detectFormat(detectedContentType);
|
|
47
|
+
|
|
48
|
+
let processedContent = rawContent;
|
|
49
|
+
|
|
50
|
+
// Apply format-specific conversion (skip Readability for non-HTML)
|
|
51
|
+
if (formatType === 'json') {
|
|
52
|
+
processedContent = convertJson(rawContent);
|
|
53
|
+
} else if (formatType === 'xml') {
|
|
54
|
+
processedContent = convertXml(rawContent);
|
|
55
|
+
} else if (formatType === 'rss') {
|
|
56
|
+
processedContent = convertRss(rawContent);
|
|
57
|
+
}
|
|
58
|
+
// For 'html' format, processedContent remains as rawContent
|
|
59
|
+
|
|
60
|
+
// Step 3: CRITICAL - Sanitize content (injection detection + PII redaction with allowlisting)
|
|
43
61
|
// This step CANNOT be skipped or bypassed
|
|
44
|
-
const sanitizationResult = sanitize(
|
|
62
|
+
const sanitizationResult = sanitize(processedContent, url);
|
|
45
63
|
|
|
46
|
-
// Step 3:
|
|
64
|
+
// Step 3: Apply token ceiling truncation (AFTER sanitization)
|
|
65
|
+
// Anthropic MCP Directory enforces 25,000 token response limit
|
|
66
|
+
const truncationResult = truncateContent(sanitizationResult.content);
|
|
67
|
+
|
|
68
|
+
// Step 4: Build output
|
|
47
69
|
const output: VisusFetchOutput = {
|
|
48
70
|
url,
|
|
49
|
-
content:
|
|
71
|
+
content: truncationResult.content,
|
|
50
72
|
sanitization: {
|
|
51
73
|
patterns_detected: sanitizationResult.sanitization.patterns_detected,
|
|
52
74
|
pii_types_redacted: sanitizationResult.sanitization.pii_types_redacted,
|
|
@@ -57,8 +79,16 @@ export async function visusFetch(input: VisusFetchInput): Promise<Result<VisusFe
|
|
|
57
79
|
title: title || 'Untitled',
|
|
58
80
|
fetched_at: new Date().toISOString(),
|
|
59
81
|
content_length_original: sanitizationResult.metadata.original_length,
|
|
60
|
-
content_length_sanitized: sanitizationResult.metadata.sanitized_length
|
|
61
|
-
|
|
82
|
+
content_length_sanitized: sanitizationResult.metadata.sanitized_length,
|
|
83
|
+
format_detected: formatType,
|
|
84
|
+
content_type: detectedContentType,
|
|
85
|
+
...(truncationResult.truncated && {
|
|
86
|
+
truncated: true,
|
|
87
|
+
truncated_at_chars: truncationResult.truncated_at_chars
|
|
88
|
+
})
|
|
89
|
+
},
|
|
90
|
+
// Include threat_report only if findings exist
|
|
91
|
+
...(sanitizationResult.threat_report && { threat_report: sanitizationResult.threat_report })
|
|
62
92
|
};
|
|
63
93
|
|
|
64
94
|
// Log to stderr if critical threats detected
|
|
@@ -84,7 +114,8 @@ export async function visusFetch(input: VisusFetchInput): Promise<Result<VisusFe
|
|
|
84
114
|
*/
|
|
85
115
|
export const visusFetchToolDefinition = {
|
|
86
116
|
name: 'visus_fetch',
|
|
87
|
-
|
|
117
|
+
title: 'Fetch Web Page (Sanitized)',
|
|
118
|
+
description: 'Fetch and sanitize web page content. Returns clean, injection-free content in markdown or text format. SECURITY: All content passes through prompt injection sanitization (43 pattern categories) and PII redaction BEFORE reaching the LLM. This ensures safe consumption of untrusted web content.',
|
|
88
119
|
inputSchema: {
|
|
89
120
|
type: 'object',
|
|
90
121
|
properties: {
|
|
@@ -105,5 +136,9 @@ export const visusFetchToolDefinition = {
|
|
|
105
136
|
}
|
|
106
137
|
},
|
|
107
138
|
required: ['url']
|
|
108
|
-
}
|
|
139
|
+
},
|
|
140
|
+
readOnlyHint: true,
|
|
141
|
+
destructiveHint: false,
|
|
142
|
+
idempotentHint: true,
|
|
143
|
+
openWorldHint: true
|
|
109
144
|
};
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* visus_read MCP Tool
|
|
3
|
+
*
|
|
4
|
+
* Extracts clean article content from a web page using Mozilla Readability,
|
|
5
|
+
* stripping navigation, ads, and boilerplate. Full prompt injection sanitization
|
|
6
|
+
* and PII redaction applied before content reaches the LLM.
|
|
7
|
+
*
|
|
8
|
+
* CRITICAL: ALL content MUST pass through the sanitizer. This cannot be bypassed.
|
|
9
|
+
*
|
|
10
|
+
* Pipeline order:
|
|
11
|
+
* 1. Playwright renders page (full JS execution)
|
|
12
|
+
* 2. Reader extracts main content (reduces input size)
|
|
13
|
+
* 3. Sanitizer runs on clean text
|
|
14
|
+
* 4. Token ceiling applied (24,000 token cap)
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { renderPage } from '../browser/playwright-renderer.js';
|
|
18
|
+
import { extractArticle } from '../browser/reader.js';
|
|
19
|
+
import { sanitize } from '../sanitizer/index.js';
|
|
20
|
+
import { truncateContent } from '../utils/truncate.js';
|
|
21
|
+
import type { VisusReadInput, VisusReadOutput, Result } from '../types.js';
|
|
22
|
+
import { Err } from '../types.js';
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* visus_read tool implementation
|
|
26
|
+
*
|
|
27
|
+
* @param input Tool input parameters
|
|
28
|
+
* @returns Sanitized article content with metadata
|
|
29
|
+
*/
|
|
30
|
+
export async function visusRead(input: VisusReadInput): Promise<Result<VisusReadOutput, Error>> {
|
|
31
|
+
const { url, timeout_ms = 10000 } = input;
|
|
32
|
+
|
|
33
|
+
// Validate inputs
|
|
34
|
+
if (!url || typeof url !== 'string') {
|
|
35
|
+
return Err(new Error('Invalid input: url must be a non-empty string'));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
try {
|
|
39
|
+
// Step 1: Render the page using Playwright
|
|
40
|
+
const renderResult = await renderPage(url, {
|
|
41
|
+
timeout_ms,
|
|
42
|
+
format: 'html'
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
if (!renderResult.ok) {
|
|
46
|
+
return Err(renderResult.error);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const { html, title: pageTitle } = renderResult.value;
|
|
50
|
+
|
|
51
|
+
// Step 2: Extract article content using Readability
|
|
52
|
+
const readerResult = extractArticle(html, url);
|
|
53
|
+
|
|
54
|
+
if (!readerResult.ok) {
|
|
55
|
+
return Err(readerResult.error);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const article = readerResult.value;
|
|
59
|
+
|
|
60
|
+
// Step 3: CRITICAL - Sanitize content (injection detection + PII redaction)
|
|
61
|
+
// Sanitization runs AFTER Readability, not before
|
|
62
|
+
// This step CANNOT be skipped or bypassed
|
|
63
|
+
const sanitizationResult = sanitize(article.content, url);
|
|
64
|
+
|
|
65
|
+
// Step 4: Apply token ceiling truncation (AFTER sanitization)
|
|
66
|
+
// Anthropic MCP Directory enforces 25,000 token response limit
|
|
67
|
+
const truncationResult = truncateContent(sanitizationResult.content);
|
|
68
|
+
|
|
69
|
+
// Step 5: Build output
|
|
70
|
+
const output: VisusReadOutput = {
|
|
71
|
+
url,
|
|
72
|
+
content: truncationResult.content,
|
|
73
|
+
metadata: {
|
|
74
|
+
title: article.title || pageTitle || 'Untitled',
|
|
75
|
+
author: article.byline,
|
|
76
|
+
published: article.publishedTime,
|
|
77
|
+
word_count: article.wordCount,
|
|
78
|
+
reader_mode_available: article.readerModeAvailable,
|
|
79
|
+
sanitized: true,
|
|
80
|
+
injections_removed: sanitizationResult.sanitization.patterns_detected.length,
|
|
81
|
+
pii_redacted: sanitizationResult.sanitization.pii_types_redacted.length,
|
|
82
|
+
truncated: truncationResult.truncated,
|
|
83
|
+
fetched_at: new Date().toISOString()
|
|
84
|
+
},
|
|
85
|
+
// Include threat_report only if findings exist
|
|
86
|
+
...(sanitizationResult.threat_report && { threat_report: sanitizationResult.threat_report })
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
// Log to stderr if critical threats detected
|
|
90
|
+
if (sanitizationResult.metadata.has_critical_threats) {
|
|
91
|
+
console.error(JSON.stringify({
|
|
92
|
+
timestamp: new Date().toISOString(),
|
|
93
|
+
event: 'reader_critical_threat_detected',
|
|
94
|
+
url,
|
|
95
|
+
patterns: sanitizationResult.sanitization.patterns_detected,
|
|
96
|
+
severity_score: sanitizationResult.metadata.severity_score
|
|
97
|
+
}));
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Log to stderr if reader mode failed (non-article page)
|
|
101
|
+
if (!article.readerModeAvailable) {
|
|
102
|
+
console.error(JSON.stringify({
|
|
103
|
+
timestamp: new Date().toISOString(),
|
|
104
|
+
event: 'reader_mode_fallback',
|
|
105
|
+
url,
|
|
106
|
+
reason: 'Readability could not extract article structure'
|
|
107
|
+
}));
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return { ok: true, value: output };
|
|
111
|
+
|
|
112
|
+
} catch (error) {
|
|
113
|
+
return Err(error instanceof Error ? error : new Error(String(error)));
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* MCP tool definition for registration
|
|
119
|
+
*/
|
|
120
|
+
export const visusReadToolDefinition = {
|
|
121
|
+
name: 'visus_read',
|
|
122
|
+
title: 'Read Web Page (Reader Mode + Sanitized)',
|
|
123
|
+
description: 'Extracts clean article content from a web page using Mozilla Readability, stripping navigation, ads, and boilerplate. Full prompt injection sanitization and PII redaction applied before content reaches the LLM. Optimized for context-efficient, safe web reading in Claude Desktop.',
|
|
124
|
+
inputSchema: {
|
|
125
|
+
type: 'object',
|
|
126
|
+
properties: {
|
|
127
|
+
url: {
|
|
128
|
+
type: 'string',
|
|
129
|
+
description: 'The URL to fetch (must be http:// or https://)'
|
|
130
|
+
},
|
|
131
|
+
timeout_ms: {
|
|
132
|
+
type: 'number',
|
|
133
|
+
description: 'Request timeout in milliseconds (default: 10000)',
|
|
134
|
+
default: 10000
|
|
135
|
+
}
|
|
136
|
+
},
|
|
137
|
+
required: ['url']
|
|
138
|
+
},
|
|
139
|
+
readOnlyHint: true,
|
|
140
|
+
destructiveHint: false,
|
|
141
|
+
idempotentHint: true,
|
|
142
|
+
openWorldHint: true
|
|
143
|
+
};
|