visus-mcp 0.6.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/.claude/settings.local.json +6 -1
  2. package/.env.status +7 -0
  3. package/CHANGELOG.md +65 -0
  4. package/CLAUDE.md +3 -0
  5. package/README.md +15 -7
  6. package/SECURITY.md +2 -0
  7. package/STATUS.md +203 -9
  8. package/dist/content-handlers/index.d.ts +36 -0
  9. package/dist/content-handlers/index.d.ts.map +1 -0
  10. package/dist/content-handlers/index.js +59 -0
  11. package/dist/content-handlers/index.js.map +1 -0
  12. package/dist/content-handlers/json-handler.d.ts +28 -0
  13. package/dist/content-handlers/json-handler.d.ts.map +1 -0
  14. package/dist/content-handlers/json-handler.js +116 -0
  15. package/dist/content-handlers/json-handler.js.map +1 -0
  16. package/dist/content-handlers/pdf-handler.d.ts +29 -0
  17. package/dist/content-handlers/pdf-handler.d.ts.map +1 -0
  18. package/dist/content-handlers/pdf-handler.js +77 -0
  19. package/dist/content-handlers/pdf-handler.js.map +1 -0
  20. package/dist/content-handlers/svg-handler.d.ts +35 -0
  21. package/dist/content-handlers/svg-handler.d.ts.map +1 -0
  22. package/dist/content-handlers/svg-handler.js +206 -0
  23. package/dist/content-handlers/svg-handler.js.map +1 -0
  24. package/dist/content-handlers/types.d.ts +42 -0
  25. package/dist/content-handlers/types.d.ts.map +1 -0
  26. package/dist/content-handlers/types.js +7 -0
  27. package/dist/content-handlers/types.js.map +1 -0
  28. package/dist/tools/fetch.d.ts.map +1 -1
  29. package/dist/tools/fetch.js +62 -4
  30. package/dist/tools/fetch.js.map +1 -1
  31. package/package.json +2 -1
  32. package/server.json +2 -2
  33. package/src/content-handlers/index.ts +72 -0
  34. package/src/content-handlers/json-handler.ts +137 -0
  35. package/src/content-handlers/pdf-handler.ts +91 -0
  36. package/src/content-handlers/svg-handler.ts +243 -0
  37. package/src/content-handlers/types.ts +44 -0
  38. package/src/tools/fetch.ts +69 -4
  39. package/.github/ISSUE_TEMPLATE/bug_report.md +0 -47
  40. package/.github/ISSUE_TEMPLATE/false_positive.md +0 -43
  41. package/.github/ISSUE_TEMPLATE/new_pattern.md +0 -49
  42. package/.github/ISSUE_TEMPLATE/security_report.md +0 -31
  43. package/.github/PULL_REQUEST_TEMPLATE.md +0 -39
  44. package/.mcpregistry_github_token +0 -1
  45. package/.mcpregistry_registry_token +0 -1
  46. package/CONTRIBUTING.md +0 -329
  47. package/LINKEDIN-STRATEGY.md +0 -367
  48. package/ROADMAP.md +0 -221
  49. package/SECURITY-AUDIT-v1.md +0 -277
  50. package/SUBMISSION.md +0 -66
  51. package/TROUBLESHOOT-AUTH-20260322-2019.md +0 -291
  52. package/TROUBLESHOOT-BUILD-20260319-1450.md +0 -546
  53. package/TROUBLESHOOT-COGNITO-AUTH-20260324-2029.md +0 -415
  54. package/TROUBLESHOOT-COGNITO-JWT-20260324.md +0 -592
  55. package/TROUBLESHOOT-FETCH-20260320-1150.md +0 -168
  56. package/TROUBLESHOOT-JEST-20260323-1357.md +0 -139
  57. package/TROUBLESHOOT-LAMBDA-20260322-1945.md +0 -183
  58. package/TROUBLESHOOT-PLAYWRIGHT-20260321-1549.md +0 -217
  59. package/TROUBLESHOOT-SSL-20260320-1138.md +0 -171
  60. package/TROUBLESHOOT-STRUCTURED-20260320-1200.md +0 -246
  61. package/TROUBLESHOOT-TEST-20260320-0942.md +0 -281
  62. package/VISUS-CLAUDE-CODE-PROMPT.md +0 -324
  63. package/VISUS-PROJECT-PLAN.md +0 -205
  64. package/cdk.json +0 -73
  65. package/infrastructure/app.ts +0 -39
  66. package/infrastructure/stack.ts +0 -298
  67. package/jest.config.js +0 -33
  68. package/jest.setup.js +0 -9
  69. package/lambda-deploy/index.js +0 -81512
  70. package/lambda-deploy/index.js.map +0 -7
  71. package/lambda-package/browser/__mocks__/playwright-renderer.d.ts +0 -25
  72. package/lambda-package/browser/__mocks__/playwright-renderer.d.ts.map +0 -1
  73. package/lambda-package/browser/__mocks__/playwright-renderer.js +0 -119
  74. package/lambda-package/browser/__mocks__/playwright-renderer.js.map +0 -1
  75. package/lambda-package/browser/playwright-renderer.d.ts +0 -40
  76. package/lambda-package/browser/playwright-renderer.d.ts.map +0 -1
  77. package/lambda-package/browser/playwright-renderer.js +0 -214
  78. package/lambda-package/browser/playwright-renderer.js.map +0 -1
  79. package/lambda-package/browser/reader.d.ts +0 -31
  80. package/lambda-package/browser/reader.d.ts.map +0 -1
  81. package/lambda-package/browser/reader.js +0 -98
  82. package/lambda-package/browser/reader.js.map +0 -1
  83. package/lambda-package/index.d.ts +0 -18
  84. package/lambda-package/index.d.ts.map +0 -1
  85. package/lambda-package/index.js +0 -238
  86. package/lambda-package/index.js.map +0 -1
  87. package/lambda-package/lambda-handler.d.ts +0 -28
  88. package/lambda-package/lambda-handler.d.ts.map +0 -1
  89. package/lambda-package/lambda-handler.js +0 -257
  90. package/lambda-package/lambda-handler.js.map +0 -1
  91. package/lambda-package/package-lock.json +0 -7435
  92. package/lambda-package/package.json +0 -74
  93. package/lambda-package/runtime.d.ts +0 -50
  94. package/lambda-package/runtime.d.ts.map +0 -1
  95. package/lambda-package/runtime.js +0 -86
  96. package/lambda-package/runtime.js.map +0 -1
  97. package/lambda-package/sanitizer/elicit-runner.d.ts +0 -48
  98. package/lambda-package/sanitizer/elicit-runner.d.ts.map +0 -1
  99. package/lambda-package/sanitizer/elicit-runner.js +0 -100
  100. package/lambda-package/sanitizer/elicit-runner.js.map +0 -1
  101. package/lambda-package/sanitizer/framework-mapper.d.ts +0 -24
  102. package/lambda-package/sanitizer/framework-mapper.d.ts.map +0 -1
  103. package/lambda-package/sanitizer/framework-mapper.js +0 -342
  104. package/lambda-package/sanitizer/framework-mapper.js.map +0 -1
  105. package/lambda-package/sanitizer/hitl-gate.d.ts +0 -69
  106. package/lambda-package/sanitizer/hitl-gate.d.ts.map +0 -1
  107. package/lambda-package/sanitizer/hitl-gate.js +0 -101
  108. package/lambda-package/sanitizer/hitl-gate.js.map +0 -1
  109. package/lambda-package/sanitizer/index.d.ts +0 -63
  110. package/lambda-package/sanitizer/index.d.ts.map +0 -1
  111. package/lambda-package/sanitizer/index.js +0 -105
  112. package/lambda-package/sanitizer/index.js.map +0 -1
  113. package/lambda-package/sanitizer/injection-detector.d.ts +0 -34
  114. package/lambda-package/sanitizer/injection-detector.d.ts.map +0 -1
  115. package/lambda-package/sanitizer/injection-detector.js +0 -89
  116. package/lambda-package/sanitizer/injection-detector.js.map +0 -1
  117. package/lambda-package/sanitizer/patterns.d.ts +0 -30
  118. package/lambda-package/sanitizer/patterns.d.ts.map +0 -1
  119. package/lambda-package/sanitizer/patterns.js +0 -372
  120. package/lambda-package/sanitizer/patterns.js.map +0 -1
  121. package/lambda-package/sanitizer/pii-allowlist.d.ts +0 -49
  122. package/lambda-package/sanitizer/pii-allowlist.d.ts.map +0 -1
  123. package/lambda-package/sanitizer/pii-allowlist.js +0 -231
  124. package/lambda-package/sanitizer/pii-allowlist.js.map +0 -1
  125. package/lambda-package/sanitizer/pii-redactor.d.ts +0 -41
  126. package/lambda-package/sanitizer/pii-redactor.d.ts.map +0 -1
  127. package/lambda-package/sanitizer/pii-redactor.js +0 -213
  128. package/lambda-package/sanitizer/pii-redactor.js.map +0 -1
  129. package/lambda-package/sanitizer/severity-classifier.d.ts +0 -33
  130. package/lambda-package/sanitizer/severity-classifier.d.ts.map +0 -1
  131. package/lambda-package/sanitizer/severity-classifier.js +0 -113
  132. package/lambda-package/sanitizer/severity-classifier.js.map +0 -1
  133. package/lambda-package/sanitizer/threat-reporter.d.ts +0 -66
  134. package/lambda-package/sanitizer/threat-reporter.d.ts.map +0 -1
  135. package/lambda-package/sanitizer/threat-reporter.js +0 -163
  136. package/lambda-package/sanitizer/threat-reporter.js.map +0 -1
  137. package/lambda-package/tools/fetch-structured.d.ts +0 -51
  138. package/lambda-package/tools/fetch-structured.d.ts.map +0 -1
  139. package/lambda-package/tools/fetch-structured.js +0 -237
  140. package/lambda-package/tools/fetch-structured.js.map +0 -1
  141. package/lambda-package/tools/fetch.d.ts +0 -49
  142. package/lambda-package/tools/fetch.d.ts.map +0 -1
  143. package/lambda-package/tools/fetch.js +0 -131
  144. package/lambda-package/tools/fetch.js.map +0 -1
  145. package/lambda-package/tools/read.d.ts +0 -51
  146. package/lambda-package/tools/read.d.ts.map +0 -1
  147. package/lambda-package/tools/read.js +0 -127
  148. package/lambda-package/tools/read.js.map +0 -1
  149. package/lambda-package/tools/search.d.ts +0 -45
  150. package/lambda-package/tools/search.d.ts.map +0 -1
  151. package/lambda-package/tools/search.js +0 -220
  152. package/lambda-package/tools/search.js.map +0 -1
  153. package/lambda-package/types.d.ts +0 -167
  154. package/lambda-package/types.d.ts.map +0 -1
  155. package/lambda-package/types.js +0 -16
  156. package/lambda-package/types.js.map +0 -1
  157. package/lambda-package/utils/format-converter.d.ts +0 -39
  158. package/lambda-package/utils/format-converter.d.ts.map +0 -1
  159. package/lambda-package/utils/format-converter.js +0 -191
  160. package/lambda-package/utils/format-converter.js.map +0 -1
  161. package/lambda-package/utils/truncate.d.ts +0 -26
  162. package/lambda-package/utils/truncate.d.ts.map +0 -1
  163. package/lambda-package/utils/truncate.js +0 -54
  164. package/lambda-package/utils/truncate.js.map +0 -1
  165. package/lambda.zip +0 -0
  166. package/test-output.txt +0 -4
  167. package/tests/auth-smoke.test.ts +0 -480
  168. package/tests/elicit-runner.test.ts +0 -232
  169. package/tests/fetch-tool.test.ts +0 -922
  170. package/tests/hitl-gate.test.ts +0 -267
  171. package/tests/injection-corpus.ts +0 -338
  172. package/tests/pii-allowlist.test.ts +0 -282
  173. package/tests/reader.test.ts +0 -353
  174. package/tests/sanitizer.test.ts +0 -358
  175. package/tests/search.test.ts +0 -456
  176. package/tests/threat-reporter.test.ts +0 -334
  177. package/tsconfig.cdk.json +0 -35
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Content Handlers Module
3
+ *
4
+ * Central routing for content-type specific sanitization handlers.
5
+ * Detects MIME type from Content-Type header and routes to appropriate handler.
6
+ *
7
+ * Supported content types:
8
+ * - application/pdf -> PDF handler
9
+ * - application/json -> JSON handler
10
+ * - image/svg+xml -> SVG handler
11
+ *
12
+ * Unsupported types return structured rejection (no throw).
13
+ */
14
+
15
+ import { handlePdf } from './pdf-handler.js';
16
+ import { handleJson } from './json-handler.js';
17
+ import { handleSvg } from './svg-handler.js';
18
+ import type { HandlerResult } from './types.js';
19
+
20
+ /**
21
+ * Normalize Content-Type header to base MIME type
22
+ *
23
+ * Examples:
24
+ * - "application/pdf; charset=utf-8" -> "application/pdf"
25
+ * - "application/json" -> "application/json"
26
+ * - "IMAGE/SVG+XML" -> "image/svg+xml"
27
+ *
28
+ * @param contentType - Raw Content-Type header value
29
+ * @returns Normalized MIME type (lowercase, parameters stripped)
30
+ */
31
+ export function normalizeMimeType(contentType: string): string {
32
+ return contentType.toLowerCase().split(';')[0].trim();
33
+ }
34
+
35
+ /**
36
+ * Route content to appropriate handler based on MIME type
37
+ *
38
+ * @param content - Raw content (string or Buffer)
39
+ * @param contentType - Content-Type header value
40
+ * @returns Handler result (success or error/rejected)
41
+ */
42
+ export async function routeContentHandler(
43
+ content: string | Buffer,
44
+ contentType: string
45
+ ): Promise<HandlerResult> {
46
+ const mimeType = normalizeMimeType(contentType);
47
+
48
+ // Route to appropriate handler
49
+ switch (mimeType) {
50
+ case 'application/pdf':
51
+ return handlePdf(content, mimeType);
52
+
53
+ case 'application/json':
54
+ case 'text/json':
55
+ return handleJson(content, mimeType);
56
+
57
+ case 'image/svg+xml':
58
+ return handleSvg(content, mimeType);
59
+
60
+ default:
61
+ // Unsupported content type - return structured rejection
62
+ return {
63
+ status: 'rejected',
64
+ reason: 'UNSUPPORTED_CONTENT_TYPE',
65
+ mime: mimeType,
66
+ message: `Content type ${mimeType} is not supported by Visus-MCP.`
67
+ };
68
+ }
69
+ }
70
+
71
+ // Re-export types
72
+ export type { HandlerResult, HandlerSuccessResult, HandlerErrorResult } from './types.js';
@@ -0,0 +1,137 @@
1
+ /**
2
+ * JSON Content Handler
3
+ *
4
+ * Handles application/json content type. Recursively traverses all nodes in the JSON
5
+ * object tree and applies the full injection pattern registry to every string value.
6
+ *
7
+ * What it handles:
8
+ * - All string values in the JSON tree (any depth)
9
+ * - Arrays, nested objects, and mixed-type arrays
10
+ * - Falls back to plain text pipeline if JSON.parse fails
11
+ *
12
+ * What it strips:
13
+ * - Nothing (preserves original structure)
14
+ *
15
+ * What it passes through:
16
+ * - Sanitized JSON with original structure preserved
17
+ * - All non-string values pass through unchanged
18
+ */
19
+
20
+ import { sanitize } from '../sanitizer/index.js';
21
+ import type { HandlerResult } from './types.js';
22
+
23
+ /**
24
+ * Handle JSON content
25
+ *
26
+ * @param content - Raw JSON string
27
+ * @param mimeType - Original MIME type
28
+ * @returns Sanitized handler result
29
+ */
30
+ export function handleJson(
31
+ content: string | Buffer,
32
+ mimeType: string
33
+ ): HandlerResult {
34
+ const startTime = Date.now();
35
+
36
+ // Convert Buffer to string if needed
37
+ const jsonString = Buffer.isBuffer(content) ? content.toString('utf-8') : content;
38
+
39
+ try {
40
+ // Parse JSON
41
+ const parsed = JSON.parse(jsonString);
42
+
43
+ // Track sanitization metadata across all fields
44
+ let sanitizedFieldCount = 0;
45
+ const allPatternsDetected = new Set<string>();
46
+ const allPiiTypesRedacted = new Set<string>();
47
+ const allPiiAllowlisted: Array<{ type: string; value: string; reason: string }> = [];
48
+
49
+ // Recursively sanitize all string values
50
+ const sanitized = recursiveSanitize(parsed, (text: string) => {
51
+ const result = sanitize(text);
52
+ if (result.sanitization.content_modified) {
53
+ sanitizedFieldCount++;
54
+ }
55
+
56
+ // Aggregate metadata
57
+ result.sanitization.patterns_detected.forEach(p => allPatternsDetected.add(p));
58
+ result.sanitization.pii_types_redacted.forEach(p => allPiiTypesRedacted.add(p));
59
+ allPiiAllowlisted.push(...result.sanitization.pii_allowlisted);
60
+
61
+ return result.content;
62
+ });
63
+
64
+ // Re-stringify with 2-space indent
65
+ const sanitizedJson = JSON.stringify(sanitized, null, 2);
66
+
67
+ const processingTime = Date.now() - startTime;
68
+
69
+ return {
70
+ status: 'sanitized',
71
+ content_type: mimeType,
72
+ sanitized_content: sanitizedJson,
73
+ sanitization: {
74
+ patterns_detected: Array.from(allPatternsDetected),
75
+ pii_types_redacted: Array.from(allPiiTypesRedacted),
76
+ pii_allowlisted: allPiiAllowlisted,
77
+ sanitized_fields: sanitizedFieldCount
78
+ },
79
+ processing_time_ms: processingTime
80
+ };
81
+
82
+ } catch (error) {
83
+ // JSON.parse failed - fall back to plain text sanitization
84
+ const sanitizationResult = sanitize(jsonString);
85
+
86
+ const processingTime = Date.now() - startTime;
87
+
88
+ return {
89
+ status: 'sanitized',
90
+ content_type: mimeType,
91
+ sanitized_content: sanitizationResult.content,
92
+ sanitization: {
93
+ patterns_detected: sanitizationResult.sanitization.patterns_detected,
94
+ pii_types_redacted: sanitizationResult.sanitization.pii_types_redacted,
95
+ pii_allowlisted: sanitizationResult.sanitization.pii_allowlisted,
96
+ sanitized_fields: sanitizationResult.sanitization.patterns_detected.length
97
+ },
98
+ processing_time_ms: processingTime
99
+ };
100
+ }
101
+ }
102
+
103
+ /**
104
+ * Recursively traverse JSON tree and sanitize all string values
105
+ *
106
+ * @param obj - JSON object/array/primitive
107
+ * @param sanitizeFn - Function to sanitize string values
108
+ * @returns Sanitized object with same structure
109
+ */
110
+ function recursiveSanitize(obj: any, sanitizeFn: (text: string) => string): any {
111
+ // Handle null
112
+ if (obj === null) {
113
+ return null;
114
+ }
115
+
116
+ // Handle string - sanitize it
117
+ if (typeof obj === 'string') {
118
+ return sanitizeFn(obj);
119
+ }
120
+
121
+ // Handle array - recursively sanitize each element
122
+ if (Array.isArray(obj)) {
123
+ return obj.map((item) => recursiveSanitize(item, sanitizeFn));
124
+ }
125
+
126
+ // Handle object - recursively sanitize each value
127
+ if (typeof obj === 'object') {
128
+ const sanitizedObj: Record<string, any> = {};
129
+ for (const [key, value] of Object.entries(obj)) {
130
+ sanitizedObj[key] = recursiveSanitize(value, sanitizeFn);
131
+ }
132
+ return sanitizedObj;
133
+ }
134
+
135
+ // Handle primitives (number, boolean, undefined) - pass through
136
+ return obj;
137
+ }
@@ -0,0 +1,91 @@
1
+ /**
2
+ * PDF Content Handler
3
+ *
4
+ * Handles application/pdf content type. Extracts text and metadata from PDF files,
5
+ * passes all text through the injection pattern registry, and returns sanitized plain text.
6
+ *
7
+ * What it handles:
8
+ * - PDF body text (full document)
9
+ * - PDF metadata: title, author, subject, keywords, creator, producer
10
+ * - Annotation text
11
+ * - Form field values
12
+ *
13
+ * What it strips:
14
+ * - Embedded binary objects (fonts, images, attachments)
15
+ * - Returns only extracted text, not original binary
16
+ *
17
+ * What it passes through:
18
+ * - All extracted text after injection pattern sanitization
19
+ */
20
+
21
+ import { PDFParse } from 'pdf-parse';
22
+ import { sanitize } from '../sanitizer/index.js';
23
+ import type { HandlerResult } from './types.js';
24
+
25
+ /**
26
+ * Handle PDF content
27
+ *
28
+ * @param content - Raw PDF binary data as Buffer or string
29
+ * @param mimeType - Original MIME type
30
+ * @returns Sanitized handler result
31
+ */
32
+ export async function handlePdf(
33
+ content: string | Buffer,
34
+ mimeType: string
35
+ ): Promise<HandlerResult> {
36
+ const startTime = Date.now();
37
+
38
+ try {
39
+ // Ensure we have a Buffer
40
+ const buffer = Buffer.isBuffer(content) ? content : Buffer.from(content);
41
+
42
+ // Parse PDF using pdf-parse v2 API
43
+ const parser = new PDFParse({ data: buffer });
44
+
45
+ // Get text and metadata separately
46
+ const textResult = await parser.getText();
47
+ const infoResult = await parser.getInfo();
48
+
49
+ // Extract text and metadata
50
+ const bodyText = textResult.text || '';
51
+ const metadata = infoResult.info || {};
52
+
53
+ // Build combined text from body + metadata
54
+ let combinedText = bodyText;
55
+
56
+ // Append metadata fields
57
+ const metadataFields = ['Title', 'Author', 'Subject', 'Keywords', 'Creator', 'Producer'];
58
+ for (const field of metadataFields) {
59
+ const value = metadata[field];
60
+ if (value && typeof value === 'string') {
61
+ combinedText += `\n\n${field}: ${value}`;
62
+ }
63
+ }
64
+
65
+ // Pass through injection detection pipeline
66
+ const sanitizationResult = sanitize(combinedText);
67
+
68
+ const processingTime = Date.now() - startTime;
69
+
70
+ return {
71
+ status: 'sanitized',
72
+ content_type: mimeType,
73
+ sanitized_content: sanitizationResult.content,
74
+ sanitization: {
75
+ patterns_detected: sanitizationResult.sanitization.patterns_detected,
76
+ pii_types_redacted: sanitizationResult.sanitization.pii_types_redacted,
77
+ pii_allowlisted: sanitizationResult.sanitization.pii_allowlisted,
78
+ sanitized_fields: sanitizationResult.sanitization.patterns_detected.length
79
+ },
80
+ processing_time_ms: processingTime
81
+ };
82
+
83
+ } catch (error) {
84
+ return {
85
+ status: 'error',
86
+ reason: 'PDF_PARSE_FAILED',
87
+ mime: mimeType,
88
+ message: error instanceof Error ? error.message : String(error)
89
+ };
90
+ }
91
+ }
@@ -0,0 +1,243 @@
1
+ /**
2
+ * SVG Content Handler
3
+ *
4
+ * Handles image/svg+xml content type. SVG is XML, not a binary image, and can contain
5
+ * executable code and external references. This handler strips dangerous elements and
6
+ * attributes unconditionally, then sanitizes remaining text content.
7
+ *
8
+ * What it handles:
9
+ * - All text content in SVG elements after stripping dangerous parts
10
+ *
11
+ * What it strips (unconditionally, no attempt to sanitize):
12
+ * - <script> elements and all children
13
+ * - <use> elements with external href or xlink:href attributes
14
+ * - <foreignObject> elements and all children
15
+ * - All event handler attributes (onload, onclick, onerror, etc.)
16
+ * - <set> and <animate> elements that reference external resources
17
+ * - data: URI attributes
18
+ *
19
+ * What it passes through (after injection scan):
20
+ * - Path data (d attribute)
21
+ * - Text elements and their content
22
+ * - <title> and <desc> elements
23
+ * - Presentation attributes (fill, stroke, transform, etc.)
24
+ * - viewBox, width, height attributes
25
+ */
26
+
27
+ import { XMLParser, XMLBuilder } from 'fast-xml-parser';
28
+ import { sanitize } from '../sanitizer/index.js';
29
+ import type { HandlerResult } from './types.js';
30
+
31
+ /**
32
+ * Handle SVG content
33
+ *
34
+ * @param content - Raw SVG XML string or Buffer
35
+ * @param mimeType - Original MIME type
36
+ * @returns Sanitized handler result
37
+ */
38
+ export function handleSvg(
39
+ content: string | Buffer,
40
+ mimeType: string
41
+ ): HandlerResult {
42
+ const startTime = Date.now();
43
+
44
+ // Convert Buffer to string if needed
45
+ const svgString = Buffer.isBuffer(content) ? content.toString('utf-8') : content;
46
+
47
+ try {
48
+ // Parse SVG XML
49
+ const parser = new XMLParser({
50
+ ignoreAttributes: false,
51
+ attributeNamePrefix: '@_',
52
+ textNodeName: '#text',
53
+ preserveOrder: false,
54
+ removeNSPrefix: true,
55
+ });
56
+
57
+ const parsed = parser.parse(svgString);
58
+
59
+ // Track sanitized field count
60
+ let sanitizedFieldCount = 0;
61
+
62
+ // Strip dangerous elements and attributes
63
+ const stripped = stripDangerousContent(parsed);
64
+
65
+ // Extract all text content for injection scanning
66
+ const textContent = extractTextContent(stripped);
67
+
68
+ // Run text through injection detection
69
+ let sanitizationResult;
70
+ if (textContent.length > 0) {
71
+ sanitizationResult = sanitize(textContent);
72
+ if (sanitizationResult.sanitization.content_modified) {
73
+ sanitizedFieldCount = sanitizationResult.sanitization.patterns_detected.length;
74
+ }
75
+ }
76
+
77
+ // Rebuild SVG
78
+ const builder = new XMLBuilder({
79
+ ignoreAttributes: false,
80
+ attributeNamePrefix: '@_',
81
+ textNodeName: '#text',
82
+ format: true,
83
+ suppressEmptyNode: true,
84
+ });
85
+
86
+ const sanitizedSvg = builder.build(stripped);
87
+
88
+ const processingTime = Date.now() - startTime;
89
+
90
+ return {
91
+ status: 'sanitized',
92
+ content_type: mimeType,
93
+ sanitized_content: sanitizedSvg,
94
+ sanitization: {
95
+ patterns_detected: sanitizationResult?.sanitization.patterns_detected || [],
96
+ pii_types_redacted: sanitizationResult?.sanitization.pii_types_redacted || [],
97
+ pii_allowlisted: sanitizationResult?.sanitization.pii_allowlisted || [],
98
+ sanitized_fields: sanitizedFieldCount
99
+ },
100
+ processing_time_ms: processingTime
101
+ };
102
+
103
+ } catch (error) {
104
+ return {
105
+ status: 'error',
106
+ reason: 'SVG_PARSE_FAILED',
107
+ mime: mimeType,
108
+ message: error instanceof Error ? error.message : String(error)
109
+ };
110
+ }
111
+ }
112
+
113
+ /**
114
+ * Strip dangerous content from parsed SVG
115
+ *
116
+ * Removes:
117
+ * - <script> elements
118
+ * - <foreignObject> elements
119
+ * - <use> with external href
120
+ * - Event handler attributes
121
+ * - <set> and <animate> with external references
122
+ * - data: URIs
123
+ */
124
+ function stripDangerousContent(node: any): any {
125
+ if (typeof node !== 'object' || node === null) {
126
+ return node;
127
+ }
128
+
129
+ // Handle arrays
130
+ if (Array.isArray(node)) {
131
+ return node
132
+ .filter((item) => !shouldRemoveElement(item))
133
+ .map((item) => stripDangerousContent(item));
134
+ }
135
+
136
+ // Handle objects
137
+ const result: any = {};
138
+
139
+ for (const [key, value] of Object.entries(node)) {
140
+ // Skip dangerous elements
141
+ if (key === 'script' || key === 'foreignObject') {
142
+ continue;
143
+ }
144
+
145
+ // Handle <use> with external href
146
+ if (key === 'use' && typeof value === 'object' && value !== null) {
147
+ const href = (value as any)['@_href'] || (value as any)['@_xlink:href'];
148
+ if (href && (href.startsWith('http://') || href.startsWith('https://') || href.startsWith('//'))) {
149
+ continue;
150
+ }
151
+ }
152
+
153
+ // Handle <set> and <animate> with external references
154
+ if ((key === 'set' || key === 'animate') && typeof value === 'object' && value !== null) {
155
+ const href = (value as any)['@_href'] || (value as any)['@_xlink:href'];
156
+ if (href && (href.startsWith('http://') || href.startsWith('https://') || href.startsWith('//'))) {
157
+ continue;
158
+ }
159
+ }
160
+
161
+ // Strip event handler attributes
162
+ if (key.startsWith('@_on')) {
163
+ continue;
164
+ }
165
+
166
+ // Strip data: URIs
167
+ if (typeof value === 'string' && value.startsWith('data:')) {
168
+ result[key] = '';
169
+ continue;
170
+ }
171
+
172
+ // Strip attributes with data: URIs
173
+ if (key.startsWith('@_') && typeof value === 'string' && value.startsWith('data:')) {
174
+ continue;
175
+ }
176
+
177
+ // Recursively process
178
+ result[key] = stripDangerousContent(value);
179
+ }
180
+
181
+ return result;
182
+ }
183
+
184
+ /**
185
+ * Check if element should be removed entirely
186
+ */
187
+ function shouldRemoveElement(element: any): boolean {
188
+ if (typeof element !== 'object' || element === null) {
189
+ return false;
190
+ }
191
+
192
+ // Check for dangerous element types
193
+ const dangerousElements = ['script', 'foreignObject'];
194
+ for (const dangerous of dangerousElements) {
195
+ if (dangerous in element) {
196
+ return true;
197
+ }
198
+ }
199
+
200
+ return false;
201
+ }
202
+
203
+ /**
204
+ * Extract all text content from SVG for injection scanning
205
+ */
206
+ function extractTextContent(node: any): string {
207
+ if (typeof node !== 'object' || node === null) {
208
+ return '';
209
+ }
210
+
211
+ if (typeof node === 'string') {
212
+ return node;
213
+ }
214
+
215
+ if (Array.isArray(node)) {
216
+ return node.map((item) => extractTextContent(item)).join(' ');
217
+ }
218
+
219
+ let text = '';
220
+
221
+ for (const [key, value] of Object.entries(node)) {
222
+ // Extract text from text nodes
223
+ if (key === '#text' && typeof value === 'string') {
224
+ text += value + ' ';
225
+ }
226
+
227
+ // Extract from title and desc elements (can be string or object)
228
+ if (key === 'title' || key === 'desc') {
229
+ if (typeof value === 'string') {
230
+ text += value + ' ';
231
+ } else if (typeof value === 'object') {
232
+ text += extractTextContent(value) + ' ';
233
+ }
234
+ }
235
+
236
+ // Recursively extract from other children
237
+ if (key !== 'title' && key !== 'desc' && typeof value === 'object') {
238
+ text += extractTextContent(value) + ' ';
239
+ }
240
+ }
241
+
242
+ return text.trim();
243
+ }
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Content Handler Types
3
+ *
4
+ * Shared interfaces for content-type specific handlers.
5
+ */
6
+
7
+ /**
8
+ * Success result from a content handler
9
+ */
10
+ export interface HandlerSuccessResult {
11
+ status: 'sanitized';
12
+ content_type: string;
13
+ sanitized_content: string;
14
+ sanitization: {
15
+ patterns_detected: string[];
16
+ pii_types_redacted: string[];
17
+ pii_allowlisted: Array<{ type: string; value: string; reason: string }>;
18
+ sanitized_fields: number;
19
+ };
20
+ processing_time_ms: number;
21
+ }
22
+
23
+ /**
24
+ * Error result from a content handler
25
+ */
26
+ export interface HandlerErrorResult {
27
+ status: 'error' | 'rejected';
28
+ reason: string;
29
+ mime: string;
30
+ message: string;
31
+ }
32
+
33
+ /**
34
+ * Union type for all handler results
35
+ */
36
+ export type HandlerResult = HandlerSuccessResult | HandlerErrorResult;
37
+
38
+ /**
39
+ * Content handler function signature
40
+ */
41
+ export type ContentHandler = (
42
+ content: string | Buffer,
43
+ mimeType: string
44
+ ) => Promise<HandlerResult> | HandlerResult;
@@ -10,6 +10,7 @@ import { renderPage } from '../browser/playwright-renderer.js';
10
10
  import { sanitize } from '../sanitizer/index.js';
11
11
  import { truncateContent } from '../utils/truncate.js';
12
12
  import { detectFormat, convertJson, convertXml, convertRss } from '../utils/format-converter.js';
13
+ import { routeContentHandler, normalizeMimeType } from '../content-handlers/index.js';
13
14
  import type { VisusFetchInput, VisusFetchOutput, Result } from '../types.js';
14
15
  import { Err } from '../types.js';
15
16
 
@@ -41,8 +42,72 @@ export async function visusFetch(input: VisusFetchInput): Promise<Result<VisusFe
41
42
  const { html, title, contentType } = renderResult.value;
42
43
  const rawContent = html || '';
43
44
 
44
- // Step 2: Detect format and apply format-appropriate conversion
45
+ // Step 2: Detect content type and route to specialized handlers if applicable
45
46
  const detectedContentType = contentType || 'text/html';
47
+ const normalizedMime = normalizeMimeType(detectedContentType);
48
+
49
+ // Check if content requires specialized handler (PDF, JSON, SVG)
50
+ if (normalizedMime === 'application/pdf' ||
51
+ normalizedMime === 'application/json' ||
52
+ normalizedMime === 'text/json' ||
53
+ normalizedMime === 'image/svg+xml') {
54
+
55
+ // Route to specialized content handler
56
+ const handlerResult = await routeContentHandler(rawContent, detectedContentType);
57
+
58
+ // Handle unsupported or error cases
59
+ if (handlerResult.status === 'rejected' || handlerResult.status === 'error') {
60
+ return Err(new Error(handlerResult.message));
61
+ }
62
+
63
+ // Type guard: ensure we have a success result
64
+ if (handlerResult.status !== 'sanitized') {
65
+ return Err(new Error('Unexpected handler result status'));
66
+ }
67
+
68
+ // Handler success - use the already-sanitized content
69
+ const sanitizedContent = handlerResult.sanitized_content;
70
+ const sanitization = handlerResult.sanitization;
71
+ const truncationResult = truncateContent(sanitizedContent);
72
+
73
+ // Determine format_detected based on MIME type
74
+ let formatDetected: 'html' | 'json' | 'xml' | 'rss' = 'html';
75
+ if (normalizedMime === 'application/json' || normalizedMime === 'text/json') {
76
+ formatDetected = 'json';
77
+ } else if (normalizedMime === 'image/svg+xml') {
78
+ formatDetected = 'xml'; // SVG is XML-based
79
+ } else if (normalizedMime === 'application/pdf') {
80
+ // PDF doesn't have a format_detected value in the current schema
81
+ // Leaving as 'html' for now
82
+ }
83
+
84
+ const output: VisusFetchOutput = {
85
+ url,
86
+ content: truncationResult.content,
87
+ sanitization: {
88
+ patterns_detected: sanitization.patterns_detected,
89
+ pii_types_redacted: sanitization.pii_types_redacted,
90
+ pii_allowlisted: sanitization.pii_allowlisted,
91
+ content_modified: sanitization.sanitized_fields > 0
92
+ },
93
+ metadata: {
94
+ title: title || 'Untitled',
95
+ fetched_at: new Date().toISOString(),
96
+ content_length_original: rawContent.length,
97
+ content_length_sanitized: sanitizedContent.length,
98
+ format_detected: formatDetected,
99
+ content_type: detectedContentType,
100
+ ...(truncationResult.truncated && {
101
+ truncated: true,
102
+ truncated_at_chars: truncationResult.truncated_at_chars
103
+ })
104
+ }
105
+ };
106
+
107
+ return { ok: true, value: output };
108
+ }
109
+
110
+ // Step 3: For HTML/XML/RSS - use existing format conversion flow
46
111
  const formatType = detectFormat(detectedContentType);
47
112
 
48
113
  let processedContent = rawContent;
@@ -57,15 +122,15 @@ export async function visusFetch(input: VisusFetchInput): Promise<Result<VisusFe
57
122
  }
58
123
  // For 'html' format, processedContent remains as rawContent
59
124
 
60
- // Step 3: CRITICAL - Sanitize content (injection detection + PII redaction with allowlisting)
125
+ // Step 4: CRITICAL - Sanitize content (injection detection + PII redaction with allowlisting)
61
126
  // This step CANNOT be skipped or bypassed
62
127
  const sanitizationResult = sanitize(processedContent, url);
63
128
 
64
- // Step 3: Apply token ceiling truncation (AFTER sanitization)
129
+ // Step 5: Apply token ceiling truncation (AFTER sanitization)
65
130
  // Anthropic MCP Directory enforces 25,000 token response limit
66
131
  const truncationResult = truncateContent(sanitizationResult.content);
67
132
 
68
- // Step 4: Build output
133
+ // Step 6: Build output
69
134
  const output: VisusFetchOutput = {
70
135
  url,
71
136
  content: truncationResult.content,