visus-mcp 0.6.2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +6 -1
- package/.env.status +7 -0
- package/CHANGELOG.md +65 -0
- package/CLAUDE.md +3 -0
- package/README.md +15 -7
- package/SECURITY.md +2 -0
- package/STATUS.md +203 -9
- package/dist/content-handlers/index.d.ts +36 -0
- package/dist/content-handlers/index.d.ts.map +1 -0
- package/dist/content-handlers/index.js +59 -0
- package/dist/content-handlers/index.js.map +1 -0
- package/dist/content-handlers/json-handler.d.ts +28 -0
- package/dist/content-handlers/json-handler.d.ts.map +1 -0
- package/dist/content-handlers/json-handler.js +116 -0
- package/dist/content-handlers/json-handler.js.map +1 -0
- package/dist/content-handlers/pdf-handler.d.ts +29 -0
- package/dist/content-handlers/pdf-handler.d.ts.map +1 -0
- package/dist/content-handlers/pdf-handler.js +77 -0
- package/dist/content-handlers/pdf-handler.js.map +1 -0
- package/dist/content-handlers/svg-handler.d.ts +35 -0
- package/dist/content-handlers/svg-handler.d.ts.map +1 -0
- package/dist/content-handlers/svg-handler.js +206 -0
- package/dist/content-handlers/svg-handler.js.map +1 -0
- package/dist/content-handlers/types.d.ts +42 -0
- package/dist/content-handlers/types.d.ts.map +1 -0
- package/dist/content-handlers/types.js +7 -0
- package/dist/content-handlers/types.js.map +1 -0
- package/dist/tools/fetch.d.ts.map +1 -1
- package/dist/tools/fetch.js +62 -4
- package/dist/tools/fetch.js.map +1 -1
- package/package.json +2 -1
- package/server.json +2 -2
- package/src/content-handlers/index.ts +72 -0
- package/src/content-handlers/json-handler.ts +137 -0
- package/src/content-handlers/pdf-handler.ts +91 -0
- package/src/content-handlers/svg-handler.ts +243 -0
- package/src/content-handlers/types.ts +44 -0
- package/src/tools/fetch.ts +69 -4
- package/.github/ISSUE_TEMPLATE/bug_report.md +0 -47
- package/.github/ISSUE_TEMPLATE/false_positive.md +0 -43
- package/.github/ISSUE_TEMPLATE/new_pattern.md +0 -49
- package/.github/ISSUE_TEMPLATE/security_report.md +0 -31
- package/.github/PULL_REQUEST_TEMPLATE.md +0 -39
- package/.mcpregistry_github_token +0 -1
- package/.mcpregistry_registry_token +0 -1
- package/CONTRIBUTING.md +0 -329
- package/LINKEDIN-STRATEGY.md +0 -367
- package/ROADMAP.md +0 -221
- package/SECURITY-AUDIT-v1.md +0 -277
- package/SUBMISSION.md +0 -66
- package/TROUBLESHOOT-AUTH-20260322-2019.md +0 -291
- package/TROUBLESHOOT-BUILD-20260319-1450.md +0 -546
- package/TROUBLESHOOT-COGNITO-AUTH-20260324-2029.md +0 -415
- package/TROUBLESHOOT-COGNITO-JWT-20260324.md +0 -592
- package/TROUBLESHOOT-FETCH-20260320-1150.md +0 -168
- package/TROUBLESHOOT-JEST-20260323-1357.md +0 -139
- package/TROUBLESHOOT-LAMBDA-20260322-1945.md +0 -183
- package/TROUBLESHOOT-PLAYWRIGHT-20260321-1549.md +0 -217
- package/TROUBLESHOOT-SSL-20260320-1138.md +0 -171
- package/TROUBLESHOOT-STRUCTURED-20260320-1200.md +0 -246
- package/TROUBLESHOOT-TEST-20260320-0942.md +0 -281
- package/VISUS-CLAUDE-CODE-PROMPT.md +0 -324
- package/VISUS-PROJECT-PLAN.md +0 -205
- package/cdk.json +0 -73
- package/infrastructure/app.ts +0 -39
- package/infrastructure/stack.ts +0 -298
- package/jest.config.js +0 -33
- package/jest.setup.js +0 -9
- package/lambda-deploy/index.js +0 -81512
- package/lambda-deploy/index.js.map +0 -7
- package/lambda-package/browser/__mocks__/playwright-renderer.d.ts +0 -25
- package/lambda-package/browser/__mocks__/playwright-renderer.d.ts.map +0 -1
- package/lambda-package/browser/__mocks__/playwright-renderer.js +0 -119
- package/lambda-package/browser/__mocks__/playwright-renderer.js.map +0 -1
- package/lambda-package/browser/playwright-renderer.d.ts +0 -40
- package/lambda-package/browser/playwright-renderer.d.ts.map +0 -1
- package/lambda-package/browser/playwright-renderer.js +0 -214
- package/lambda-package/browser/playwright-renderer.js.map +0 -1
- package/lambda-package/browser/reader.d.ts +0 -31
- package/lambda-package/browser/reader.d.ts.map +0 -1
- package/lambda-package/browser/reader.js +0 -98
- package/lambda-package/browser/reader.js.map +0 -1
- package/lambda-package/index.d.ts +0 -18
- package/lambda-package/index.d.ts.map +0 -1
- package/lambda-package/index.js +0 -238
- package/lambda-package/index.js.map +0 -1
- package/lambda-package/lambda-handler.d.ts +0 -28
- package/lambda-package/lambda-handler.d.ts.map +0 -1
- package/lambda-package/lambda-handler.js +0 -257
- package/lambda-package/lambda-handler.js.map +0 -1
- package/lambda-package/package-lock.json +0 -7435
- package/lambda-package/package.json +0 -74
- package/lambda-package/runtime.d.ts +0 -50
- package/lambda-package/runtime.d.ts.map +0 -1
- package/lambda-package/runtime.js +0 -86
- package/lambda-package/runtime.js.map +0 -1
- package/lambda-package/sanitizer/elicit-runner.d.ts +0 -48
- package/lambda-package/sanitizer/elicit-runner.d.ts.map +0 -1
- package/lambda-package/sanitizer/elicit-runner.js +0 -100
- package/lambda-package/sanitizer/elicit-runner.js.map +0 -1
- package/lambda-package/sanitizer/framework-mapper.d.ts +0 -24
- package/lambda-package/sanitizer/framework-mapper.d.ts.map +0 -1
- package/lambda-package/sanitizer/framework-mapper.js +0 -342
- package/lambda-package/sanitizer/framework-mapper.js.map +0 -1
- package/lambda-package/sanitizer/hitl-gate.d.ts +0 -69
- package/lambda-package/sanitizer/hitl-gate.d.ts.map +0 -1
- package/lambda-package/sanitizer/hitl-gate.js +0 -101
- package/lambda-package/sanitizer/hitl-gate.js.map +0 -1
- package/lambda-package/sanitizer/index.d.ts +0 -63
- package/lambda-package/sanitizer/index.d.ts.map +0 -1
- package/lambda-package/sanitizer/index.js +0 -105
- package/lambda-package/sanitizer/index.js.map +0 -1
- package/lambda-package/sanitizer/injection-detector.d.ts +0 -34
- package/lambda-package/sanitizer/injection-detector.d.ts.map +0 -1
- package/lambda-package/sanitizer/injection-detector.js +0 -89
- package/lambda-package/sanitizer/injection-detector.js.map +0 -1
- package/lambda-package/sanitizer/patterns.d.ts +0 -30
- package/lambda-package/sanitizer/patterns.d.ts.map +0 -1
- package/lambda-package/sanitizer/patterns.js +0 -372
- package/lambda-package/sanitizer/patterns.js.map +0 -1
- package/lambda-package/sanitizer/pii-allowlist.d.ts +0 -49
- package/lambda-package/sanitizer/pii-allowlist.d.ts.map +0 -1
- package/lambda-package/sanitizer/pii-allowlist.js +0 -231
- package/lambda-package/sanitizer/pii-allowlist.js.map +0 -1
- package/lambda-package/sanitizer/pii-redactor.d.ts +0 -41
- package/lambda-package/sanitizer/pii-redactor.d.ts.map +0 -1
- package/lambda-package/sanitizer/pii-redactor.js +0 -213
- package/lambda-package/sanitizer/pii-redactor.js.map +0 -1
- package/lambda-package/sanitizer/severity-classifier.d.ts +0 -33
- package/lambda-package/sanitizer/severity-classifier.d.ts.map +0 -1
- package/lambda-package/sanitizer/severity-classifier.js +0 -113
- package/lambda-package/sanitizer/severity-classifier.js.map +0 -1
- package/lambda-package/sanitizer/threat-reporter.d.ts +0 -66
- package/lambda-package/sanitizer/threat-reporter.d.ts.map +0 -1
- package/lambda-package/sanitizer/threat-reporter.js +0 -163
- package/lambda-package/sanitizer/threat-reporter.js.map +0 -1
- package/lambda-package/tools/fetch-structured.d.ts +0 -51
- package/lambda-package/tools/fetch-structured.d.ts.map +0 -1
- package/lambda-package/tools/fetch-structured.js +0 -237
- package/lambda-package/tools/fetch-structured.js.map +0 -1
- package/lambda-package/tools/fetch.d.ts +0 -49
- package/lambda-package/tools/fetch.d.ts.map +0 -1
- package/lambda-package/tools/fetch.js +0 -131
- package/lambda-package/tools/fetch.js.map +0 -1
- package/lambda-package/tools/read.d.ts +0 -51
- package/lambda-package/tools/read.d.ts.map +0 -1
- package/lambda-package/tools/read.js +0 -127
- package/lambda-package/tools/read.js.map +0 -1
- package/lambda-package/tools/search.d.ts +0 -45
- package/lambda-package/tools/search.d.ts.map +0 -1
- package/lambda-package/tools/search.js +0 -220
- package/lambda-package/tools/search.js.map +0 -1
- package/lambda-package/types.d.ts +0 -167
- package/lambda-package/types.d.ts.map +0 -1
- package/lambda-package/types.js +0 -16
- package/lambda-package/types.js.map +0 -1
- package/lambda-package/utils/format-converter.d.ts +0 -39
- package/lambda-package/utils/format-converter.d.ts.map +0 -1
- package/lambda-package/utils/format-converter.js +0 -191
- package/lambda-package/utils/format-converter.js.map +0 -1
- package/lambda-package/utils/truncate.d.ts +0 -26
- package/lambda-package/utils/truncate.d.ts.map +0 -1
- package/lambda-package/utils/truncate.js +0 -54
- package/lambda-package/utils/truncate.js.map +0 -1
- package/lambda.zip +0 -0
- package/test-output.txt +0 -4
- package/tests/auth-smoke.test.ts +0 -480
- package/tests/elicit-runner.test.ts +0 -232
- package/tests/fetch-tool.test.ts +0 -922
- package/tests/hitl-gate.test.ts +0 -267
- package/tests/injection-corpus.ts +0 -338
- package/tests/pii-allowlist.test.ts +0 -282
- package/tests/reader.test.ts +0 -353
- package/tests/sanitizer.test.ts +0 -358
- package/tests/search.test.ts +0 -456
- package/tests/threat-reporter.test.ts +0 -334
- package/tsconfig.cdk.json +0 -35
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content Handlers Module
|
|
3
|
+
*
|
|
4
|
+
* Central routing for content-type specific sanitization handlers.
|
|
5
|
+
* Detects MIME type from Content-Type header and routes to appropriate handler.
|
|
6
|
+
*
|
|
7
|
+
* Supported content types:
|
|
8
|
+
* - application/pdf -> PDF handler
|
|
9
|
+
* - application/json -> JSON handler
|
|
10
|
+
* - image/svg+xml -> SVG handler
|
|
11
|
+
*
|
|
12
|
+
* Unsupported types return structured rejection (no throw).
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { handlePdf } from './pdf-handler.js';
|
|
16
|
+
import { handleJson } from './json-handler.js';
|
|
17
|
+
import { handleSvg } from './svg-handler.js';
|
|
18
|
+
import type { HandlerResult } from './types.js';
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Normalize Content-Type header to base MIME type
|
|
22
|
+
*
|
|
23
|
+
* Examples:
|
|
24
|
+
* - "application/pdf; charset=utf-8" -> "application/pdf"
|
|
25
|
+
* - "application/json" -> "application/json"
|
|
26
|
+
* - "IMAGE/SVG+XML" -> "image/svg+xml"
|
|
27
|
+
*
|
|
28
|
+
* @param contentType - Raw Content-Type header value
|
|
29
|
+
* @returns Normalized MIME type (lowercase, parameters stripped)
|
|
30
|
+
*/
|
|
31
|
+
export function normalizeMimeType(contentType: string): string {
|
|
32
|
+
return contentType.toLowerCase().split(';')[0].trim();
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Route content to appropriate handler based on MIME type
|
|
37
|
+
*
|
|
38
|
+
* @param content - Raw content (string or Buffer)
|
|
39
|
+
* @param contentType - Content-Type header value
|
|
40
|
+
* @returns Handler result (success or error/rejected)
|
|
41
|
+
*/
|
|
42
|
+
export async function routeContentHandler(
|
|
43
|
+
content: string | Buffer,
|
|
44
|
+
contentType: string
|
|
45
|
+
): Promise<HandlerResult> {
|
|
46
|
+
const mimeType = normalizeMimeType(contentType);
|
|
47
|
+
|
|
48
|
+
// Route to appropriate handler
|
|
49
|
+
switch (mimeType) {
|
|
50
|
+
case 'application/pdf':
|
|
51
|
+
return handlePdf(content, mimeType);
|
|
52
|
+
|
|
53
|
+
case 'application/json':
|
|
54
|
+
case 'text/json':
|
|
55
|
+
return handleJson(content, mimeType);
|
|
56
|
+
|
|
57
|
+
case 'image/svg+xml':
|
|
58
|
+
return handleSvg(content, mimeType);
|
|
59
|
+
|
|
60
|
+
default:
|
|
61
|
+
// Unsupported content type - return structured rejection
|
|
62
|
+
return {
|
|
63
|
+
status: 'rejected',
|
|
64
|
+
reason: 'UNSUPPORTED_CONTENT_TYPE',
|
|
65
|
+
mime: mimeType,
|
|
66
|
+
message: `Content type ${mimeType} is not supported by Visus-MCP.`
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Re-export types
|
|
72
|
+
export type { HandlerResult, HandlerSuccessResult, HandlerErrorResult } from './types.js';
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSON Content Handler
|
|
3
|
+
*
|
|
4
|
+
* Handles application/json content type. Recursively traverses all nodes in the JSON
|
|
5
|
+
* object tree and applies the full injection pattern registry to every string value.
|
|
6
|
+
*
|
|
7
|
+
* What it handles:
|
|
8
|
+
* - All string values in the JSON tree (any depth)
|
|
9
|
+
* - Arrays, nested objects, and mixed-type arrays
|
|
10
|
+
* - Falls back to plain text pipeline if JSON.parse fails
|
|
11
|
+
*
|
|
12
|
+
* What it strips:
|
|
13
|
+
* - Nothing (preserves original structure)
|
|
14
|
+
*
|
|
15
|
+
* What it passes through:
|
|
16
|
+
* - Sanitized JSON with original structure preserved
|
|
17
|
+
* - All non-string values pass through unchanged
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { sanitize } from '../sanitizer/index.js';
|
|
21
|
+
import type { HandlerResult } from './types.js';
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Handle JSON content
|
|
25
|
+
*
|
|
26
|
+
* @param content - Raw JSON string
|
|
27
|
+
* @param mimeType - Original MIME type
|
|
28
|
+
* @returns Sanitized handler result
|
|
29
|
+
*/
|
|
30
|
+
export function handleJson(
|
|
31
|
+
content: string | Buffer,
|
|
32
|
+
mimeType: string
|
|
33
|
+
): HandlerResult {
|
|
34
|
+
const startTime = Date.now();
|
|
35
|
+
|
|
36
|
+
// Convert Buffer to string if needed
|
|
37
|
+
const jsonString = Buffer.isBuffer(content) ? content.toString('utf-8') : content;
|
|
38
|
+
|
|
39
|
+
try {
|
|
40
|
+
// Parse JSON
|
|
41
|
+
const parsed = JSON.parse(jsonString);
|
|
42
|
+
|
|
43
|
+
// Track sanitization metadata across all fields
|
|
44
|
+
let sanitizedFieldCount = 0;
|
|
45
|
+
const allPatternsDetected = new Set<string>();
|
|
46
|
+
const allPiiTypesRedacted = new Set<string>();
|
|
47
|
+
const allPiiAllowlisted: Array<{ type: string; value: string; reason: string }> = [];
|
|
48
|
+
|
|
49
|
+
// Recursively sanitize all string values
|
|
50
|
+
const sanitized = recursiveSanitize(parsed, (text: string) => {
|
|
51
|
+
const result = sanitize(text);
|
|
52
|
+
if (result.sanitization.content_modified) {
|
|
53
|
+
sanitizedFieldCount++;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Aggregate metadata
|
|
57
|
+
result.sanitization.patterns_detected.forEach(p => allPatternsDetected.add(p));
|
|
58
|
+
result.sanitization.pii_types_redacted.forEach(p => allPiiTypesRedacted.add(p));
|
|
59
|
+
allPiiAllowlisted.push(...result.sanitization.pii_allowlisted);
|
|
60
|
+
|
|
61
|
+
return result.content;
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
// Re-stringify with 2-space indent
|
|
65
|
+
const sanitizedJson = JSON.stringify(sanitized, null, 2);
|
|
66
|
+
|
|
67
|
+
const processingTime = Date.now() - startTime;
|
|
68
|
+
|
|
69
|
+
return {
|
|
70
|
+
status: 'sanitized',
|
|
71
|
+
content_type: mimeType,
|
|
72
|
+
sanitized_content: sanitizedJson,
|
|
73
|
+
sanitization: {
|
|
74
|
+
patterns_detected: Array.from(allPatternsDetected),
|
|
75
|
+
pii_types_redacted: Array.from(allPiiTypesRedacted),
|
|
76
|
+
pii_allowlisted: allPiiAllowlisted,
|
|
77
|
+
sanitized_fields: sanitizedFieldCount
|
|
78
|
+
},
|
|
79
|
+
processing_time_ms: processingTime
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
} catch (error) {
|
|
83
|
+
// JSON.parse failed - fall back to plain text sanitization
|
|
84
|
+
const sanitizationResult = sanitize(jsonString);
|
|
85
|
+
|
|
86
|
+
const processingTime = Date.now() - startTime;
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
status: 'sanitized',
|
|
90
|
+
content_type: mimeType,
|
|
91
|
+
sanitized_content: sanitizationResult.content,
|
|
92
|
+
sanitization: {
|
|
93
|
+
patterns_detected: sanitizationResult.sanitization.patterns_detected,
|
|
94
|
+
pii_types_redacted: sanitizationResult.sanitization.pii_types_redacted,
|
|
95
|
+
pii_allowlisted: sanitizationResult.sanitization.pii_allowlisted,
|
|
96
|
+
sanitized_fields: sanitizationResult.sanitization.patterns_detected.length
|
|
97
|
+
},
|
|
98
|
+
processing_time_ms: processingTime
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Recursively traverse JSON tree and sanitize all string values
|
|
105
|
+
*
|
|
106
|
+
* @param obj - JSON object/array/primitive
|
|
107
|
+
* @param sanitizeFn - Function to sanitize string values
|
|
108
|
+
* @returns Sanitized object with same structure
|
|
109
|
+
*/
|
|
110
|
+
function recursiveSanitize(obj: any, sanitizeFn: (text: string) => string): any {
|
|
111
|
+
// Handle null
|
|
112
|
+
if (obj === null) {
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Handle string - sanitize it
|
|
117
|
+
if (typeof obj === 'string') {
|
|
118
|
+
return sanitizeFn(obj);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Handle array - recursively sanitize each element
|
|
122
|
+
if (Array.isArray(obj)) {
|
|
123
|
+
return obj.map((item) => recursiveSanitize(item, sanitizeFn));
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Handle object - recursively sanitize each value
|
|
127
|
+
if (typeof obj === 'object') {
|
|
128
|
+
const sanitizedObj: Record<string, any> = {};
|
|
129
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
130
|
+
sanitizedObj[key] = recursiveSanitize(value, sanitizeFn);
|
|
131
|
+
}
|
|
132
|
+
return sanitizedObj;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Handle primitives (number, boolean, undefined) - pass through
|
|
136
|
+
return obj;
|
|
137
|
+
}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PDF Content Handler
|
|
3
|
+
*
|
|
4
|
+
* Handles application/pdf content type. Extracts text and metadata from PDF files,
|
|
5
|
+
* passes all text through the injection pattern registry, and returns sanitized plain text.
|
|
6
|
+
*
|
|
7
|
+
* What it handles:
|
|
8
|
+
* - PDF body text (full document)
|
|
9
|
+
* - PDF metadata: title, author, subject, keywords, creator, producer
|
|
10
|
+
* - Annotation text
|
|
11
|
+
* - Form field values
|
|
12
|
+
*
|
|
13
|
+
* What it strips:
|
|
14
|
+
* - Embedded binary objects (fonts, images, attachments)
|
|
15
|
+
* - Returns only extracted text, not original binary
|
|
16
|
+
*
|
|
17
|
+
* What it passes through:
|
|
18
|
+
* - All extracted text after injection pattern sanitization
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { PDFParse } from 'pdf-parse';
|
|
22
|
+
import { sanitize } from '../sanitizer/index.js';
|
|
23
|
+
import type { HandlerResult } from './types.js';
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Handle PDF content
|
|
27
|
+
*
|
|
28
|
+
* @param content - Raw PDF binary data as Buffer or string
|
|
29
|
+
* @param mimeType - Original MIME type
|
|
30
|
+
* @returns Sanitized handler result
|
|
31
|
+
*/
|
|
32
|
+
export async function handlePdf(
|
|
33
|
+
content: string | Buffer,
|
|
34
|
+
mimeType: string
|
|
35
|
+
): Promise<HandlerResult> {
|
|
36
|
+
const startTime = Date.now();
|
|
37
|
+
|
|
38
|
+
try {
|
|
39
|
+
// Ensure we have a Buffer
|
|
40
|
+
const buffer = Buffer.isBuffer(content) ? content : Buffer.from(content);
|
|
41
|
+
|
|
42
|
+
// Parse PDF using pdf-parse v2 API
|
|
43
|
+
const parser = new PDFParse({ data: buffer });
|
|
44
|
+
|
|
45
|
+
// Get text and metadata separately
|
|
46
|
+
const textResult = await parser.getText();
|
|
47
|
+
const infoResult = await parser.getInfo();
|
|
48
|
+
|
|
49
|
+
// Extract text and metadata
|
|
50
|
+
const bodyText = textResult.text || '';
|
|
51
|
+
const metadata = infoResult.info || {};
|
|
52
|
+
|
|
53
|
+
// Build combined text from body + metadata
|
|
54
|
+
let combinedText = bodyText;
|
|
55
|
+
|
|
56
|
+
// Append metadata fields
|
|
57
|
+
const metadataFields = ['Title', 'Author', 'Subject', 'Keywords', 'Creator', 'Producer'];
|
|
58
|
+
for (const field of metadataFields) {
|
|
59
|
+
const value = metadata[field];
|
|
60
|
+
if (value && typeof value === 'string') {
|
|
61
|
+
combinedText += `\n\n${field}: ${value}`;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Pass through injection detection pipeline
|
|
66
|
+
const sanitizationResult = sanitize(combinedText);
|
|
67
|
+
|
|
68
|
+
const processingTime = Date.now() - startTime;
|
|
69
|
+
|
|
70
|
+
return {
|
|
71
|
+
status: 'sanitized',
|
|
72
|
+
content_type: mimeType,
|
|
73
|
+
sanitized_content: sanitizationResult.content,
|
|
74
|
+
sanitization: {
|
|
75
|
+
patterns_detected: sanitizationResult.sanitization.patterns_detected,
|
|
76
|
+
pii_types_redacted: sanitizationResult.sanitization.pii_types_redacted,
|
|
77
|
+
pii_allowlisted: sanitizationResult.sanitization.pii_allowlisted,
|
|
78
|
+
sanitized_fields: sanitizationResult.sanitization.patterns_detected.length
|
|
79
|
+
},
|
|
80
|
+
processing_time_ms: processingTime
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
} catch (error) {
|
|
84
|
+
return {
|
|
85
|
+
status: 'error',
|
|
86
|
+
reason: 'PDF_PARSE_FAILED',
|
|
87
|
+
mime: mimeType,
|
|
88
|
+
message: error instanceof Error ? error.message : String(error)
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
}
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SVG Content Handler
|
|
3
|
+
*
|
|
4
|
+
* Handles image/svg+xml content type. SVG is XML, not a binary image, and can contain
|
|
5
|
+
* executable code and external references. This handler strips dangerous elements and
|
|
6
|
+
* attributes unconditionally, then sanitizes remaining text content.
|
|
7
|
+
*
|
|
8
|
+
* What it handles:
|
|
9
|
+
* - All text content in SVG elements after stripping dangerous parts
|
|
10
|
+
*
|
|
11
|
+
* What it strips (unconditionally, no attempt to sanitize):
|
|
12
|
+
* - <script> elements and all children
|
|
13
|
+
* - <use> elements with external href or xlink:href attributes
|
|
14
|
+
* - <foreignObject> elements and all children
|
|
15
|
+
* - All event handler attributes (onload, onclick, onerror, etc.)
|
|
16
|
+
* - <set> and <animate> elements that reference external resources
|
|
17
|
+
* - data: URI attributes
|
|
18
|
+
*
|
|
19
|
+
* What it passes through (after injection scan):
|
|
20
|
+
* - Path data (d attribute)
|
|
21
|
+
* - Text elements and their content
|
|
22
|
+
* - <title> and <desc> elements
|
|
23
|
+
* - Presentation attributes (fill, stroke, transform, etc.)
|
|
24
|
+
* - viewBox, width, height attributes
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import { XMLParser, XMLBuilder } from 'fast-xml-parser';
|
|
28
|
+
import { sanitize } from '../sanitizer/index.js';
|
|
29
|
+
import type { HandlerResult } from './types.js';
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Handle SVG content
|
|
33
|
+
*
|
|
34
|
+
* @param content - Raw SVG XML string or Buffer
|
|
35
|
+
* @param mimeType - Original MIME type
|
|
36
|
+
* @returns Sanitized handler result
|
|
37
|
+
*/
|
|
38
|
+
export function handleSvg(
|
|
39
|
+
content: string | Buffer,
|
|
40
|
+
mimeType: string
|
|
41
|
+
): HandlerResult {
|
|
42
|
+
const startTime = Date.now();
|
|
43
|
+
|
|
44
|
+
// Convert Buffer to string if needed
|
|
45
|
+
const svgString = Buffer.isBuffer(content) ? content.toString('utf-8') : content;
|
|
46
|
+
|
|
47
|
+
try {
|
|
48
|
+
// Parse SVG XML
|
|
49
|
+
const parser = new XMLParser({
|
|
50
|
+
ignoreAttributes: false,
|
|
51
|
+
attributeNamePrefix: '@_',
|
|
52
|
+
textNodeName: '#text',
|
|
53
|
+
preserveOrder: false,
|
|
54
|
+
removeNSPrefix: true,
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
const parsed = parser.parse(svgString);
|
|
58
|
+
|
|
59
|
+
// Track sanitized field count
|
|
60
|
+
let sanitizedFieldCount = 0;
|
|
61
|
+
|
|
62
|
+
// Strip dangerous elements and attributes
|
|
63
|
+
const stripped = stripDangerousContent(parsed);
|
|
64
|
+
|
|
65
|
+
// Extract all text content for injection scanning
|
|
66
|
+
const textContent = extractTextContent(stripped);
|
|
67
|
+
|
|
68
|
+
// Run text through injection detection
|
|
69
|
+
let sanitizationResult;
|
|
70
|
+
if (textContent.length > 0) {
|
|
71
|
+
sanitizationResult = sanitize(textContent);
|
|
72
|
+
if (sanitizationResult.sanitization.content_modified) {
|
|
73
|
+
sanitizedFieldCount = sanitizationResult.sanitization.patterns_detected.length;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Rebuild SVG
|
|
78
|
+
const builder = new XMLBuilder({
|
|
79
|
+
ignoreAttributes: false,
|
|
80
|
+
attributeNamePrefix: '@_',
|
|
81
|
+
textNodeName: '#text',
|
|
82
|
+
format: true,
|
|
83
|
+
suppressEmptyNode: true,
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
const sanitizedSvg = builder.build(stripped);
|
|
87
|
+
|
|
88
|
+
const processingTime = Date.now() - startTime;
|
|
89
|
+
|
|
90
|
+
return {
|
|
91
|
+
status: 'sanitized',
|
|
92
|
+
content_type: mimeType,
|
|
93
|
+
sanitized_content: sanitizedSvg,
|
|
94
|
+
sanitization: {
|
|
95
|
+
patterns_detected: sanitizationResult?.sanitization.patterns_detected || [],
|
|
96
|
+
pii_types_redacted: sanitizationResult?.sanitization.pii_types_redacted || [],
|
|
97
|
+
pii_allowlisted: sanitizationResult?.sanitization.pii_allowlisted || [],
|
|
98
|
+
sanitized_fields: sanitizedFieldCount
|
|
99
|
+
},
|
|
100
|
+
processing_time_ms: processingTime
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
} catch (error) {
|
|
104
|
+
return {
|
|
105
|
+
status: 'error',
|
|
106
|
+
reason: 'SVG_PARSE_FAILED',
|
|
107
|
+
mime: mimeType,
|
|
108
|
+
message: error instanceof Error ? error.message : String(error)
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Strip dangerous content from parsed SVG
|
|
115
|
+
*
|
|
116
|
+
* Removes:
|
|
117
|
+
* - <script> elements
|
|
118
|
+
* - <foreignObject> elements
|
|
119
|
+
* - <use> with external href
|
|
120
|
+
* - Event handler attributes
|
|
121
|
+
* - <set> and <animate> with external references
|
|
122
|
+
* - data: URIs
|
|
123
|
+
*/
|
|
124
|
+
function stripDangerousContent(node: any): any {
|
|
125
|
+
if (typeof node !== 'object' || node === null) {
|
|
126
|
+
return node;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Handle arrays
|
|
130
|
+
if (Array.isArray(node)) {
|
|
131
|
+
return node
|
|
132
|
+
.filter((item) => !shouldRemoveElement(item))
|
|
133
|
+
.map((item) => stripDangerousContent(item));
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Handle objects
|
|
137
|
+
const result: any = {};
|
|
138
|
+
|
|
139
|
+
for (const [key, value] of Object.entries(node)) {
|
|
140
|
+
// Skip dangerous elements
|
|
141
|
+
if (key === 'script' || key === 'foreignObject') {
|
|
142
|
+
continue;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Handle <use> with external href
|
|
146
|
+
if (key === 'use' && typeof value === 'object' && value !== null) {
|
|
147
|
+
const href = (value as any)['@_href'] || (value as any)['@_xlink:href'];
|
|
148
|
+
if (href && (href.startsWith('http://') || href.startsWith('https://') || href.startsWith('//'))) {
|
|
149
|
+
continue;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Handle <set> and <animate> with external references
|
|
154
|
+
if ((key === 'set' || key === 'animate') && typeof value === 'object' && value !== null) {
|
|
155
|
+
const href = (value as any)['@_href'] || (value as any)['@_xlink:href'];
|
|
156
|
+
if (href && (href.startsWith('http://') || href.startsWith('https://') || href.startsWith('//'))) {
|
|
157
|
+
continue;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Strip event handler attributes
|
|
162
|
+
if (key.startsWith('@_on')) {
|
|
163
|
+
continue;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Strip data: URIs
|
|
167
|
+
if (typeof value === 'string' && value.startsWith('data:')) {
|
|
168
|
+
result[key] = '';
|
|
169
|
+
continue;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Strip attributes with data: URIs
|
|
173
|
+
if (key.startsWith('@_') && typeof value === 'string' && value.startsWith('data:')) {
|
|
174
|
+
continue;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Recursively process
|
|
178
|
+
result[key] = stripDangerousContent(value);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
return result;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Check if element should be removed entirely
|
|
186
|
+
*/
|
|
187
|
+
function shouldRemoveElement(element: any): boolean {
|
|
188
|
+
if (typeof element !== 'object' || element === null) {
|
|
189
|
+
return false;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Check for dangerous element types
|
|
193
|
+
const dangerousElements = ['script', 'foreignObject'];
|
|
194
|
+
for (const dangerous of dangerousElements) {
|
|
195
|
+
if (dangerous in element) {
|
|
196
|
+
return true;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
return false;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Extract all text content from SVG for injection scanning
|
|
205
|
+
*/
|
|
206
|
+
function extractTextContent(node: any): string {
|
|
207
|
+
if (typeof node !== 'object' || node === null) {
|
|
208
|
+
return '';
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if (typeof node === 'string') {
|
|
212
|
+
return node;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
if (Array.isArray(node)) {
|
|
216
|
+
return node.map((item) => extractTextContent(item)).join(' ');
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
let text = '';
|
|
220
|
+
|
|
221
|
+
for (const [key, value] of Object.entries(node)) {
|
|
222
|
+
// Extract text from text nodes
|
|
223
|
+
if (key === '#text' && typeof value === 'string') {
|
|
224
|
+
text += value + ' ';
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Extract from title and desc elements (can be string or object)
|
|
228
|
+
if (key === 'title' || key === 'desc') {
|
|
229
|
+
if (typeof value === 'string') {
|
|
230
|
+
text += value + ' ';
|
|
231
|
+
} else if (typeof value === 'object') {
|
|
232
|
+
text += extractTextContent(value) + ' ';
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// Recursively extract from other children
|
|
237
|
+
if (key !== 'title' && key !== 'desc' && typeof value === 'object') {
|
|
238
|
+
text += extractTextContent(value) + ' ';
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
return text.trim();
|
|
243
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content Handler Types
|
|
3
|
+
*
|
|
4
|
+
* Shared interfaces for content-type specific handlers.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Success result from a content handler
|
|
9
|
+
*/
|
|
10
|
+
export interface HandlerSuccessResult {
|
|
11
|
+
status: 'sanitized';
|
|
12
|
+
content_type: string;
|
|
13
|
+
sanitized_content: string;
|
|
14
|
+
sanitization: {
|
|
15
|
+
patterns_detected: string[];
|
|
16
|
+
pii_types_redacted: string[];
|
|
17
|
+
pii_allowlisted: Array<{ type: string; value: string; reason: string }>;
|
|
18
|
+
sanitized_fields: number;
|
|
19
|
+
};
|
|
20
|
+
processing_time_ms: number;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Error result from a content handler
|
|
25
|
+
*/
|
|
26
|
+
export interface HandlerErrorResult {
|
|
27
|
+
status: 'error' | 'rejected';
|
|
28
|
+
reason: string;
|
|
29
|
+
mime: string;
|
|
30
|
+
message: string;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Union type for all handler results
|
|
35
|
+
*/
|
|
36
|
+
export type HandlerResult = HandlerSuccessResult | HandlerErrorResult;
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Content handler function signature
|
|
40
|
+
*/
|
|
41
|
+
export type ContentHandler = (
|
|
42
|
+
content: string | Buffer,
|
|
43
|
+
mimeType: string
|
|
44
|
+
) => Promise<HandlerResult> | HandlerResult;
|
package/src/tools/fetch.ts
CHANGED
|
@@ -10,6 +10,7 @@ import { renderPage } from '../browser/playwright-renderer.js';
|
|
|
10
10
|
import { sanitize } from '../sanitizer/index.js';
|
|
11
11
|
import { truncateContent } from '../utils/truncate.js';
|
|
12
12
|
import { detectFormat, convertJson, convertXml, convertRss } from '../utils/format-converter.js';
|
|
13
|
+
import { routeContentHandler, normalizeMimeType } from '../content-handlers/index.js';
|
|
13
14
|
import type { VisusFetchInput, VisusFetchOutput, Result } from '../types.js';
|
|
14
15
|
import { Err } from '../types.js';
|
|
15
16
|
|
|
@@ -41,8 +42,72 @@ export async function visusFetch(input: VisusFetchInput): Promise<Result<VisusFe
|
|
|
41
42
|
const { html, title, contentType } = renderResult.value;
|
|
42
43
|
const rawContent = html || '';
|
|
43
44
|
|
|
44
|
-
// Step 2: Detect
|
|
45
|
+
// Step 2: Detect content type and route to specialized handlers if applicable
|
|
45
46
|
const detectedContentType = contentType || 'text/html';
|
|
47
|
+
const normalizedMime = normalizeMimeType(detectedContentType);
|
|
48
|
+
|
|
49
|
+
// Check if content requires specialized handler (PDF, JSON, SVG)
|
|
50
|
+
if (normalizedMime === 'application/pdf' ||
|
|
51
|
+
normalizedMime === 'application/json' ||
|
|
52
|
+
normalizedMime === 'text/json' ||
|
|
53
|
+
normalizedMime === 'image/svg+xml') {
|
|
54
|
+
|
|
55
|
+
// Route to specialized content handler
|
|
56
|
+
const handlerResult = await routeContentHandler(rawContent, detectedContentType);
|
|
57
|
+
|
|
58
|
+
// Handle unsupported or error cases
|
|
59
|
+
if (handlerResult.status === 'rejected' || handlerResult.status === 'error') {
|
|
60
|
+
return Err(new Error(handlerResult.message));
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Type guard: ensure we have a success result
|
|
64
|
+
if (handlerResult.status !== 'sanitized') {
|
|
65
|
+
return Err(new Error('Unexpected handler result status'));
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Handler success - use the already-sanitized content
|
|
69
|
+
const sanitizedContent = handlerResult.sanitized_content;
|
|
70
|
+
const sanitization = handlerResult.sanitization;
|
|
71
|
+
const truncationResult = truncateContent(sanitizedContent);
|
|
72
|
+
|
|
73
|
+
// Determine format_detected based on MIME type
|
|
74
|
+
let formatDetected: 'html' | 'json' | 'xml' | 'rss' = 'html';
|
|
75
|
+
if (normalizedMime === 'application/json' || normalizedMime === 'text/json') {
|
|
76
|
+
formatDetected = 'json';
|
|
77
|
+
} else if (normalizedMime === 'image/svg+xml') {
|
|
78
|
+
formatDetected = 'xml'; // SVG is XML-based
|
|
79
|
+
} else if (normalizedMime === 'application/pdf') {
|
|
80
|
+
// PDF doesn't have a format_detected value in the current schema
|
|
81
|
+
// Leaving as 'html' for now
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const output: VisusFetchOutput = {
|
|
85
|
+
url,
|
|
86
|
+
content: truncationResult.content,
|
|
87
|
+
sanitization: {
|
|
88
|
+
patterns_detected: sanitization.patterns_detected,
|
|
89
|
+
pii_types_redacted: sanitization.pii_types_redacted,
|
|
90
|
+
pii_allowlisted: sanitization.pii_allowlisted,
|
|
91
|
+
content_modified: sanitization.sanitized_fields > 0
|
|
92
|
+
},
|
|
93
|
+
metadata: {
|
|
94
|
+
title: title || 'Untitled',
|
|
95
|
+
fetched_at: new Date().toISOString(),
|
|
96
|
+
content_length_original: rawContent.length,
|
|
97
|
+
content_length_sanitized: sanitizedContent.length,
|
|
98
|
+
format_detected: formatDetected,
|
|
99
|
+
content_type: detectedContentType,
|
|
100
|
+
...(truncationResult.truncated && {
|
|
101
|
+
truncated: true,
|
|
102
|
+
truncated_at_chars: truncationResult.truncated_at_chars
|
|
103
|
+
})
|
|
104
|
+
}
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
return { ok: true, value: output };
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Step 3: For HTML/XML/RSS - use existing format conversion flow
|
|
46
111
|
const formatType = detectFormat(detectedContentType);
|
|
47
112
|
|
|
48
113
|
let processedContent = rawContent;
|
|
@@ -57,15 +122,15 @@ export async function visusFetch(input: VisusFetchInput): Promise<Result<VisusFe
|
|
|
57
122
|
}
|
|
58
123
|
// For 'html' format, processedContent remains as rawContent
|
|
59
124
|
|
|
60
|
-
// Step
|
|
125
|
+
// Step 4: CRITICAL - Sanitize content (injection detection + PII redaction with allowlisting)
|
|
61
126
|
// This step CANNOT be skipped or bypassed
|
|
62
127
|
const sanitizationResult = sanitize(processedContent, url);
|
|
63
128
|
|
|
64
|
-
// Step
|
|
129
|
+
// Step 5: Apply token ceiling truncation (AFTER sanitization)
|
|
65
130
|
// Anthropic MCP Directory enforces 25,000 token response limit
|
|
66
131
|
const truncationResult = truncateContent(sanitizationResult.content);
|
|
67
132
|
|
|
68
|
-
// Step
|
|
133
|
+
// Step 6: Build output
|
|
69
134
|
const output: VisusFetchOutput = {
|
|
70
135
|
url,
|
|
71
136
|
content: truncationResult.content,
|