npm - visus-mcp - Versions diffs - 0.6.2 → 0.8.0 - Mend

visus-mcp 0.6.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (177) hide show

package/.claude/settings.local.json +6 -1
package/.env.status +7 -0
package/CHANGELOG.md +65 -0
package/CLAUDE.md +3 -0
package/README.md +15 -7
package/SECURITY.md +2 -0
package/STATUS.md +203 -9
package/dist/content-handlers/index.d.ts +36 -0
package/dist/content-handlers/index.d.ts.map +1 -0
package/dist/content-handlers/index.js +59 -0
package/dist/content-handlers/index.js.map +1 -0
package/dist/content-handlers/json-handler.d.ts +28 -0
package/dist/content-handlers/json-handler.d.ts.map +1 -0
package/dist/content-handlers/json-handler.js +116 -0
package/dist/content-handlers/json-handler.js.map +1 -0
package/dist/content-handlers/pdf-handler.d.ts +29 -0
package/dist/content-handlers/pdf-handler.d.ts.map +1 -0
package/dist/content-handlers/pdf-handler.js +77 -0
package/dist/content-handlers/pdf-handler.js.map +1 -0
package/dist/content-handlers/svg-handler.d.ts +35 -0
package/dist/content-handlers/svg-handler.d.ts.map +1 -0
package/dist/content-handlers/svg-handler.js +206 -0
package/dist/content-handlers/svg-handler.js.map +1 -0
package/dist/content-handlers/types.d.ts +42 -0
package/dist/content-handlers/types.d.ts.map +1 -0
package/dist/content-handlers/types.js +7 -0
package/dist/content-handlers/types.js.map +1 -0
package/dist/tools/fetch.d.ts.map +1 -1
package/dist/tools/fetch.js +62 -4
package/dist/tools/fetch.js.map +1 -1
package/package.json +2 -1
package/server.json +2 -2
package/src/content-handlers/index.ts +72 -0
package/src/content-handlers/json-handler.ts +137 -0
package/src/content-handlers/pdf-handler.ts +91 -0
package/src/content-handlers/svg-handler.ts +243 -0
package/src/content-handlers/types.ts +44 -0
package/src/tools/fetch.ts +69 -4
package/.github/ISSUE_TEMPLATE/bug_report.md +0 -47
package/.github/ISSUE_TEMPLATE/false_positive.md +0 -43
package/.github/ISSUE_TEMPLATE/new_pattern.md +0 -49
package/.github/ISSUE_TEMPLATE/security_report.md +0 -31
package/.github/PULL_REQUEST_TEMPLATE.md +0 -39
package/.mcpregistry_github_token +0 -1
package/.mcpregistry_registry_token +0 -1
package/CONTRIBUTING.md +0 -329
package/LINKEDIN-STRATEGY.md +0 -367
package/ROADMAP.md +0 -221
package/SECURITY-AUDIT-v1.md +0 -277
package/SUBMISSION.md +0 -66
package/TROUBLESHOOT-AUTH-20260322-2019.md +0 -291
package/TROUBLESHOOT-BUILD-20260319-1450.md +0 -546
package/TROUBLESHOOT-COGNITO-AUTH-20260324-2029.md +0 -415
package/TROUBLESHOOT-COGNITO-JWT-20260324.md +0 -592
package/TROUBLESHOOT-FETCH-20260320-1150.md +0 -168
package/TROUBLESHOOT-JEST-20260323-1357.md +0 -139
package/TROUBLESHOOT-LAMBDA-20260322-1945.md +0 -183
package/TROUBLESHOOT-PLAYWRIGHT-20260321-1549.md +0 -217
package/TROUBLESHOOT-SSL-20260320-1138.md +0 -171
package/TROUBLESHOOT-STRUCTURED-20260320-1200.md +0 -246
package/TROUBLESHOOT-TEST-20260320-0942.md +0 -281
package/VISUS-CLAUDE-CODE-PROMPT.md +0 -324
package/VISUS-PROJECT-PLAN.md +0 -205
package/cdk.json +0 -73
package/infrastructure/app.ts +0 -39
package/infrastructure/stack.ts +0 -298
package/jest.config.js +0 -33
package/jest.setup.js +0 -9
package/lambda-deploy/index.js +0 -81512
package/lambda-deploy/index.js.map +0 -7
package/lambda-package/browser/__mocks__/playwright-renderer.d.ts +0 -25
package/lambda-package/browser/__mocks__/playwright-renderer.d.ts.map +0 -1
package/lambda-package/browser/__mocks__/playwright-renderer.js +0 -119
package/lambda-package/browser/__mocks__/playwright-renderer.js.map +0 -1
package/lambda-package/browser/playwright-renderer.d.ts +0 -40
package/lambda-package/browser/playwright-renderer.d.ts.map +0 -1
package/lambda-package/browser/playwright-renderer.js +0 -214
package/lambda-package/browser/playwright-renderer.js.map +0 -1
package/lambda-package/browser/reader.d.ts +0 -31
package/lambda-package/browser/reader.d.ts.map +0 -1
package/lambda-package/browser/reader.js +0 -98
package/lambda-package/browser/reader.js.map +0 -1
package/lambda-package/index.d.ts +0 -18
package/lambda-package/index.d.ts.map +0 -1
package/lambda-package/index.js +0 -238
package/lambda-package/index.js.map +0 -1
package/lambda-package/lambda-handler.d.ts +0 -28
package/lambda-package/lambda-handler.d.ts.map +0 -1
package/lambda-package/lambda-handler.js +0 -257
package/lambda-package/lambda-handler.js.map +0 -1
package/lambda-package/package-lock.json +0 -7435
package/lambda-package/package.json +0 -74
package/lambda-package/runtime.d.ts +0 -50
package/lambda-package/runtime.d.ts.map +0 -1
package/lambda-package/runtime.js +0 -86
package/lambda-package/runtime.js.map +0 -1
package/lambda-package/sanitizer/elicit-runner.d.ts +0 -48
package/lambda-package/sanitizer/elicit-runner.d.ts.map +0 -1
package/lambda-package/sanitizer/elicit-runner.js +0 -100
package/lambda-package/sanitizer/elicit-runner.js.map +0 -1
package/lambda-package/sanitizer/framework-mapper.d.ts +0 -24
package/lambda-package/sanitizer/framework-mapper.d.ts.map +0 -1
package/lambda-package/sanitizer/framework-mapper.js +0 -342
package/lambda-package/sanitizer/framework-mapper.js.map +0 -1
package/lambda-package/sanitizer/hitl-gate.d.ts +0 -69
package/lambda-package/sanitizer/hitl-gate.d.ts.map +0 -1
package/lambda-package/sanitizer/hitl-gate.js +0 -101
package/lambda-package/sanitizer/hitl-gate.js.map +0 -1
package/lambda-package/sanitizer/index.d.ts +0 -63
package/lambda-package/sanitizer/index.d.ts.map +0 -1
package/lambda-package/sanitizer/index.js +0 -105
package/lambda-package/sanitizer/index.js.map +0 -1
package/lambda-package/sanitizer/injection-detector.d.ts +0 -34
package/lambda-package/sanitizer/injection-detector.d.ts.map +0 -1
package/lambda-package/sanitizer/injection-detector.js +0 -89
package/lambda-package/sanitizer/injection-detector.js.map +0 -1
package/lambda-package/sanitizer/patterns.d.ts +0 -30
package/lambda-package/sanitizer/patterns.d.ts.map +0 -1
package/lambda-package/sanitizer/patterns.js +0 -372
package/lambda-package/sanitizer/patterns.js.map +0 -1
package/lambda-package/sanitizer/pii-allowlist.d.ts +0 -49
package/lambda-package/sanitizer/pii-allowlist.d.ts.map +0 -1
package/lambda-package/sanitizer/pii-allowlist.js +0 -231
package/lambda-package/sanitizer/pii-allowlist.js.map +0 -1
package/lambda-package/sanitizer/pii-redactor.d.ts +0 -41
package/lambda-package/sanitizer/pii-redactor.d.ts.map +0 -1
package/lambda-package/sanitizer/pii-redactor.js +0 -213
package/lambda-package/sanitizer/pii-redactor.js.map +0 -1
package/lambda-package/sanitizer/severity-classifier.d.ts +0 -33
package/lambda-package/sanitizer/severity-classifier.d.ts.map +0 -1
package/lambda-package/sanitizer/severity-classifier.js +0 -113
package/lambda-package/sanitizer/severity-classifier.js.map +0 -1
package/lambda-package/sanitizer/threat-reporter.d.ts +0 -66
package/lambda-package/sanitizer/threat-reporter.d.ts.map +0 -1
package/lambda-package/sanitizer/threat-reporter.js +0 -163
package/lambda-package/sanitizer/threat-reporter.js.map +0 -1
package/lambda-package/tools/fetch-structured.d.ts +0 -51
package/lambda-package/tools/fetch-structured.d.ts.map +0 -1
package/lambda-package/tools/fetch-structured.js +0 -237
package/lambda-package/tools/fetch-structured.js.map +0 -1
package/lambda-package/tools/fetch.d.ts +0 -49
package/lambda-package/tools/fetch.d.ts.map +0 -1
package/lambda-package/tools/fetch.js +0 -131
package/lambda-package/tools/fetch.js.map +0 -1
package/lambda-package/tools/read.d.ts +0 -51
package/lambda-package/tools/read.d.ts.map +0 -1
package/lambda-package/tools/read.js +0 -127
package/lambda-package/tools/read.js.map +0 -1
package/lambda-package/tools/search.d.ts +0 -45
package/lambda-package/tools/search.d.ts.map +0 -1
package/lambda-package/tools/search.js +0 -220
package/lambda-package/tools/search.js.map +0 -1
package/lambda-package/types.d.ts +0 -167
package/lambda-package/types.d.ts.map +0 -1
package/lambda-package/types.js +0 -16
package/lambda-package/types.js.map +0 -1
package/lambda-package/utils/format-converter.d.ts +0 -39
package/lambda-package/utils/format-converter.d.ts.map +0 -1
package/lambda-package/utils/format-converter.js +0 -191
package/lambda-package/utils/format-converter.js.map +0 -1
package/lambda-package/utils/truncate.d.ts +0 -26
package/lambda-package/utils/truncate.d.ts.map +0 -1
package/lambda-package/utils/truncate.js +0 -54
package/lambda-package/utils/truncate.js.map +0 -1
package/lambda.zip +0 -0
package/test-output.txt +0 -4
package/tests/auth-smoke.test.ts +0 -480
package/tests/elicit-runner.test.ts +0 -232
package/tests/fetch-tool.test.ts +0 -922
package/tests/hitl-gate.test.ts +0 -267
package/tests/injection-corpus.ts +0 -338
package/tests/pii-allowlist.test.ts +0 -282
package/tests/reader.test.ts +0 -353
package/tests/sanitizer.test.ts +0 -358
package/tests/search.test.ts +0 -456
package/tests/threat-reporter.test.ts +0 -334
package/tsconfig.cdk.json +0 -35

package/src/content-handlers/index.ts ADDED Viewed

@@ -0,0 +1,72 @@
+/**
+ * Content Handlers Module
+ *
+ * Central routing for content-type specific sanitization handlers.
+ * Detects MIME type from Content-Type header and routes to appropriate handler.
+ *
+ * Supported content types:
+ * - application/pdf -> PDF handler
+ * - application/json -> JSON handler
+ * - image/svg+xml -> SVG handler
+ *
+ * Unsupported types return structured rejection (no throw).
+ */
+import { handlePdf } from './pdf-handler.js';
+import { handleJson } from './json-handler.js';
+import { handleSvg } from './svg-handler.js';
+import type { HandlerResult } from './types.js';
+/**
+ * Normalize Content-Type header to base MIME type
+ *
+ * Examples:
+ * - "application/pdf; charset=utf-8" -> "application/pdf"
+ * - "application/json" -> "application/json"
+ * - "IMAGE/SVG+XML" -> "image/svg+xml"
+ *
+ * @param contentType - Raw Content-Type header value
+ * @returns Normalized MIME type (lowercase, parameters stripped)
+ */
+export function normalizeMimeType(contentType: string): string {
+  return contentType.toLowerCase().split(';')[0].trim();
+}
+/**
+ * Route content to appropriate handler based on MIME type
+ *
+ * @param content - Raw content (string or Buffer)
+ * @param contentType - Content-Type header value
+ * @returns Handler result (success or error/rejected)
+ */
+export async function routeContentHandler(
+  content: string | Buffer,
+  contentType: string
+): Promise<HandlerResult> {
+  const mimeType = normalizeMimeType(contentType);
+  // Route to appropriate handler
+  switch (mimeType) {
+    case 'application/pdf':
+      return handlePdf(content, mimeType);
+    case 'application/json':
+    case 'text/json':
+      return handleJson(content, mimeType);
+    case 'image/svg+xml':
+      return handleSvg(content, mimeType);
+    default:
+      // Unsupported content type - return structured rejection
+      return {
+        status: 'rejected',
+        reason: 'UNSUPPORTED_CONTENT_TYPE',
+        mime: mimeType,
+        message: `Content type ${mimeType} is not supported by Visus-MCP.`
+      };
+  }
+}
+// Re-export types
+export type { HandlerResult, HandlerSuccessResult, HandlerErrorResult } from './types.js';

package/src/content-handlers/json-handler.ts ADDED Viewed

@@ -0,0 +1,137 @@
+/**
+ * JSON Content Handler
+ *
+ * Handles application/json content type. Recursively traverses all nodes in the JSON
+ * object tree and applies the full injection pattern registry to every string value.
+ *
+ * What it handles:
+ * - All string values in the JSON tree (any depth)
+ * - Arrays, nested objects, and mixed-type arrays
+ * - Falls back to plain text pipeline if JSON.parse fails
+ *
+ * What it strips:
+ * - Nothing (preserves original structure)
+ *
+ * What it passes through:
+ * - Sanitized JSON with original structure preserved
+ * - All non-string values pass through unchanged
+ */
+import { sanitize } from '../sanitizer/index.js';
+import type { HandlerResult } from './types.js';
+/**
+ * Handle JSON content
+ *
+ * @param content - Raw JSON string
+ * @param mimeType - Original MIME type
+ * @returns Sanitized handler result
+ */
+export function handleJson(
+  content: string | Buffer,
+  mimeType: string
+): HandlerResult {
+  const startTime = Date.now();
+  // Convert Buffer to string if needed
+  const jsonString = Buffer.isBuffer(content) ? content.toString('utf-8') : content;
+  try {
+    // Parse JSON
+    const parsed = JSON.parse(jsonString);
+    // Track sanitization metadata across all fields
+    let sanitizedFieldCount = 0;
+    const allPatternsDetected = new Set<string>();
+    const allPiiTypesRedacted = new Set<string>();
+    const allPiiAllowlisted: Array<{ type: string; value: string; reason: string }> = [];
+    // Recursively sanitize all string values
+    const sanitized = recursiveSanitize(parsed, (text: string) => {
+      const result = sanitize(text);
+      if (result.sanitization.content_modified) {
+        sanitizedFieldCount++;
+      }
+      // Aggregate metadata
+      result.sanitization.patterns_detected.forEach(p => allPatternsDetected.add(p));
+      result.sanitization.pii_types_redacted.forEach(p => allPiiTypesRedacted.add(p));
+      allPiiAllowlisted.push(...result.sanitization.pii_allowlisted);
+      return result.content;
+    });
+    // Re-stringify with 2-space indent
+    const sanitizedJson = JSON.stringify(sanitized, null, 2);
+    const processingTime = Date.now() - startTime;
+    return {
+      status: 'sanitized',
+      content_type: mimeType,
+      sanitized_content: sanitizedJson,
+      sanitization: {
+        patterns_detected: Array.from(allPatternsDetected),
+        pii_types_redacted: Array.from(allPiiTypesRedacted),
+        pii_allowlisted: allPiiAllowlisted,
+        sanitized_fields: sanitizedFieldCount
+      },
+      processing_time_ms: processingTime
+    };
+  } catch (error) {
+    // JSON.parse failed - fall back to plain text sanitization
+    const sanitizationResult = sanitize(jsonString);
+    const processingTime = Date.now() - startTime;
+    return {
+      status: 'sanitized',
+      content_type: mimeType,
+      sanitized_content: sanitizationResult.content,
+      sanitization: {
+        patterns_detected: sanitizationResult.sanitization.patterns_detected,
+        pii_types_redacted: sanitizationResult.sanitization.pii_types_redacted,
+        pii_allowlisted: sanitizationResult.sanitization.pii_allowlisted,
+        sanitized_fields: sanitizationResult.sanitization.patterns_detected.length
+      },
+      processing_time_ms: processingTime
+    };
+  }
+}
+/**
+ * Recursively traverse JSON tree and sanitize all string values
+ *
+ * @param obj - JSON object/array/primitive
+ * @param sanitizeFn - Function to sanitize string values
+ * @returns Sanitized object with same structure
+ */
+function recursiveSanitize(obj: any, sanitizeFn: (text: string) => string): any {
+  // Handle null
+  if (obj === null) {
+    return null;
+  }
+  // Handle string - sanitize it
+  if (typeof obj === 'string') {
+    return sanitizeFn(obj);
+  }
+  // Handle array - recursively sanitize each element
+  if (Array.isArray(obj)) {
+    return obj.map((item) => recursiveSanitize(item, sanitizeFn));
+  }
+  // Handle object - recursively sanitize each value
+  if (typeof obj === 'object') {
+    const sanitizedObj: Record<string, any> = {};
+    for (const [key, value] of Object.entries(obj)) {
+      sanitizedObj[key] = recursiveSanitize(value, sanitizeFn);
+    }
+    return sanitizedObj;
+  }
+  // Handle primitives (number, boolean, undefined) - pass through
+  return obj;
+}

package/src/content-handlers/pdf-handler.ts ADDED Viewed

@@ -0,0 +1,91 @@
+/**
+ * PDF Content Handler
+ *
+ * Handles application/pdf content type. Extracts text and metadata from PDF files,
+ * passes all text through the injection pattern registry, and returns sanitized plain text.
+ *
+ * What it handles:
+ * - PDF body text (full document)
+ * - PDF metadata: title, author, subject, keywords, creator, producer
+ * - Annotation text
+ * - Form field values
+ *
+ * What it strips:
+ * - Embedded binary objects (fonts, images, attachments)
+ * - Returns only extracted text, not original binary
+ *
+ * What it passes through:
+ * - All extracted text after injection pattern sanitization
+ */
+import { PDFParse } from 'pdf-parse';
+import { sanitize } from '../sanitizer/index.js';
+import type { HandlerResult } from './types.js';
+/**
+ * Handle PDF content
+ *
+ * @param content - Raw PDF binary data as Buffer or string
+ * @param mimeType - Original MIME type
+ * @returns Sanitized handler result
+ */
+export async function handlePdf(
+  content: string | Buffer,
+  mimeType: string
+): Promise<HandlerResult> {
+  const startTime = Date.now();
+  try {
+    // Ensure we have a Buffer
+    const buffer = Buffer.isBuffer(content) ? content : Buffer.from(content);
+    // Parse PDF using pdf-parse v2 API
+    const parser = new PDFParse({ data: buffer });
+    // Get text and metadata separately
+    const textResult = await parser.getText();
+    const infoResult = await parser.getInfo();
+    // Extract text and metadata
+    const bodyText = textResult.text || '';
+    const metadata = infoResult.info || {};
+    // Build combined text from body + metadata
+    let combinedText = bodyText;
+    // Append metadata fields
+    const metadataFields = ['Title', 'Author', 'Subject', 'Keywords', 'Creator', 'Producer'];
+    for (const field of metadataFields) {
+      const value = metadata[field];
+      if (value && typeof value === 'string') {
+        combinedText += `\n\n${field}: ${value}`;
+      }
+    }
+    // Pass through injection detection pipeline
+    const sanitizationResult = sanitize(combinedText);
+    const processingTime = Date.now() - startTime;
+    return {
+      status: 'sanitized',
+      content_type: mimeType,
+      sanitized_content: sanitizationResult.content,
+      sanitization: {
+        patterns_detected: sanitizationResult.sanitization.patterns_detected,
+        pii_types_redacted: sanitizationResult.sanitization.pii_types_redacted,
+        pii_allowlisted: sanitizationResult.sanitization.pii_allowlisted,
+        sanitized_fields: sanitizationResult.sanitization.patterns_detected.length
+      },
+      processing_time_ms: processingTime
+    };
+  } catch (error) {
+    return {
+      status: 'error',
+      reason: 'PDF_PARSE_FAILED',
+      mime: mimeType,
+      message: error instanceof Error ? error.message : String(error)
+    };
+  }
+}

package/src/content-handlers/svg-handler.ts ADDED Viewed

@@ -0,0 +1,243 @@
+/**
+ * SVG Content Handler
+ *
+ * Handles image/svg+xml content type. SVG is XML, not a binary image, and can contain
+ * executable code and external references. This handler strips dangerous elements and
+ * attributes unconditionally, then sanitizes remaining text content.
+ *
+ * What it handles:
+ * - All text content in SVG elements after stripping dangerous parts
+ *
+ * What it strips (unconditionally, no attempt to sanitize):
+ * - <script> elements and all children
+ * - <use> elements with external href or xlink:href attributes
+ * - <foreignObject> elements and all children
+ * - All event handler attributes (onload, onclick, onerror, etc.)
+ * - <set> and <animate> elements that reference external resources
+ * - data: URI attributes
+ *
+ * What it passes through (after injection scan):
+ * - Path data (d attribute)
+ * - Text elements and their content
+ * - <title> and <desc> elements
+ * - Presentation attributes (fill, stroke, transform, etc.)
+ * - viewBox, width, height attributes
+ */
+import { XMLParser, XMLBuilder } from 'fast-xml-parser';
+import { sanitize } from '../sanitizer/index.js';
+import type { HandlerResult } from './types.js';
+/**
+ * Handle SVG content
+ *
+ * @param content - Raw SVG XML string or Buffer
+ * @param mimeType - Original MIME type
+ * @returns Sanitized handler result
+ */
+export function handleSvg(
+  content: string | Buffer,
+  mimeType: string
+): HandlerResult {
+  const startTime = Date.now();
+  // Convert Buffer to string if needed
+  const svgString = Buffer.isBuffer(content) ? content.toString('utf-8') : content;
+  try {
+    // Parse SVG XML
+    const parser = new XMLParser({
+      ignoreAttributes: false,
+      attributeNamePrefix: '@_',
+      textNodeName: '#text',
+      preserveOrder: false,
+      removeNSPrefix: true,
+    });
+    const parsed = parser.parse(svgString);
+    // Track sanitized field count
+    let sanitizedFieldCount = 0;
+    // Strip dangerous elements and attributes
+    const stripped = stripDangerousContent(parsed);
+    // Extract all text content for injection scanning
+    const textContent = extractTextContent(stripped);
+    // Run text through injection detection
+    let sanitizationResult;
+    if (textContent.length > 0) {
+      sanitizationResult = sanitize(textContent);
+      if (sanitizationResult.sanitization.content_modified) {
+        sanitizedFieldCount = sanitizationResult.sanitization.patterns_detected.length;
+      }
+    }
+    // Rebuild SVG
+    const builder = new XMLBuilder({
+      ignoreAttributes: false,
+      attributeNamePrefix: '@_',
+      textNodeName: '#text',
+      format: true,
+      suppressEmptyNode: true,
+    });
+    const sanitizedSvg = builder.build(stripped);
+    const processingTime = Date.now() - startTime;
+    return {
+      status: 'sanitized',
+      content_type: mimeType,
+      sanitized_content: sanitizedSvg,
+      sanitization: {
+        patterns_detected: sanitizationResult?.sanitization.patterns_detected || [],
+        pii_types_redacted: sanitizationResult?.sanitization.pii_types_redacted || [],
+        pii_allowlisted: sanitizationResult?.sanitization.pii_allowlisted || [],
+        sanitized_fields: sanitizedFieldCount
+      },
+      processing_time_ms: processingTime
+    };
+  } catch (error) {
+    return {
+      status: 'error',
+      reason: 'SVG_PARSE_FAILED',
+      mime: mimeType,
+      message: error instanceof Error ? error.message : String(error)
+    };
+  }
+}
+/**
+ * Strip dangerous content from parsed SVG
+ *
+ * Removes:
+ * - <script> elements
+ * - <foreignObject> elements
+ * - <use> with external href
+ * - Event handler attributes
+ * - <set> and <animate> with external references
+ * - data: URIs
+ */
+function stripDangerousContent(node: any): any {
+  if (typeof node !== 'object' || node === null) {
+    return node;
+  }
+  // Handle arrays
+  if (Array.isArray(node)) {
+    return node
+      .filter((item) => !shouldRemoveElement(item))
+      .map((item) => stripDangerousContent(item));
+  }
+  // Handle objects
+  const result: any = {};
+  for (const [key, value] of Object.entries(node)) {
+    // Skip dangerous elements
+    if (key === 'script' || key === 'foreignObject') {
+      continue;
+    }
+    // Handle <use> with external href
+    if (key === 'use' && typeof value === 'object' && value !== null) {
+      const href = (value as any)['@_href'] || (value as any)['@_xlink:href'];
+      if (href && (href.startsWith('http://') || href.startsWith('https://') || href.startsWith('//'))) {
+        continue;
+      }
+    }
+    // Handle <set> and <animate> with external references
+    if ((key === 'set' || key === 'animate') && typeof value === 'object' && value !== null) {
+      const href = (value as any)['@_href'] || (value as any)['@_xlink:href'];
+      if (href && (href.startsWith('http://') || href.startsWith('https://') || href.startsWith('//'))) {
+        continue;
+      }
+    }
+    // Strip event handler attributes
+    if (key.startsWith('@_on')) {
+      continue;
+    }
+    // Strip data: URIs
+    if (typeof value === 'string' && value.startsWith('data:')) {
+      result[key] = '';
+      continue;
+    }
+    // Strip attributes with data: URIs
+    if (key.startsWith('@_') && typeof value === 'string' && value.startsWith('data:')) {
+      continue;
+    }
+    // Recursively process
+    result[key] = stripDangerousContent(value);
+  }
+  return result;
+}
+/**
+ * Check if element should be removed entirely
+ */
+function shouldRemoveElement(element: any): boolean {
+  if (typeof element !== 'object' || element === null) {
+    return false;
+  }
+  // Check for dangerous element types
+  const dangerousElements = ['script', 'foreignObject'];
+  for (const dangerous of dangerousElements) {
+    if (dangerous in element) {
+      return true;
+    }
+  }
+  return false;
+}
+/**
+ * Extract all text content from SVG for injection scanning
+ */
+function extractTextContent(node: any): string {
+  if (typeof node !== 'object' || node === null) {
+    return '';
+  }
+  if (typeof node === 'string') {
+    return node;
+  }
+  if (Array.isArray(node)) {
+    return node.map((item) => extractTextContent(item)).join(' ');
+  }
+  let text = '';
+  for (const [key, value] of Object.entries(node)) {
+    // Extract text from text nodes
+    if (key === '#text' && typeof value === 'string') {
+      text += value + ' ';
+    }
+    // Extract from title and desc elements (can be string or object)
+    if (key === 'title' || key === 'desc') {
+      if (typeof value === 'string') {
+        text += value + ' ';
+      } else if (typeof value === 'object') {
+        text += extractTextContent(value) + ' ';
+      }
+    }
+    // Recursively extract from other children
+    if (key !== 'title' && key !== 'desc' && typeof value === 'object') {
+      text += extractTextContent(value) + ' ';
+    }
+  }
+  return text.trim();
+}

package/src/content-handlers/types.ts ADDED Viewed

@@ -0,0 +1,44 @@
+/**
+ * Content Handler Types
+ *
+ * Shared interfaces for content-type specific handlers.
+ */
+/**
+ * Success result from a content handler
+ */
+export interface HandlerSuccessResult {
+  status: 'sanitized';
+  content_type: string;
+  sanitized_content: string;
+  sanitization: {
+    patterns_detected: string[];
+    pii_types_redacted: string[];
+    pii_allowlisted: Array<{ type: string; value: string; reason: string }>;
+    sanitized_fields: number;
+  };
+  processing_time_ms: number;
+}
+/**
+ * Error result from a content handler
+ */
+export interface HandlerErrorResult {
+  status: 'error' | 'rejected';
+  reason: string;
+  mime: string;
+  message: string;
+}
+/**
+ * Union type for all handler results
+ */
+export type HandlerResult = HandlerSuccessResult | HandlerErrorResult;
+/**
+ * Content handler function signature
+ */
+export type ContentHandler = (
+  content: string | Buffer,
+  mimeType: string
+) => Promise<HandlerResult> | HandlerResult;

package/src/tools/fetch.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import { renderPage } from '../browser/playwright-renderer.js';
 import { sanitize } from '../sanitizer/index.js';
 import { truncateContent } from '../utils/truncate.js';
 import { detectFormat, convertJson, convertXml, convertRss } from '../utils/format-converter.js';
+import { routeContentHandler, normalizeMimeType } from '../content-handlers/index.js';
 import type { VisusFetchInput, VisusFetchOutput, Result } from '../types.js';
 import { Err } from '../types.js';
@@ -41,8 +42,72 @@ export async function visusFetch(input: VisusFetchInput): Promise<Result<VisusFe
     const { html, title, contentType } = renderResult.value;
     const rawContent = html || '';
-    // Step 2: Detect format and apply format-appropriate conversion
+    // Step 2: Detect content type and route to specialized handlers if applicable
     const detectedContentType = contentType || 'text/html';
+    const normalizedMime = normalizeMimeType(detectedContentType);
+    // Check if content requires specialized handler (PDF, JSON, SVG)
+    if (normalizedMime === 'application/pdf' ||
+        normalizedMime === 'application/json' ||
+        normalizedMime === 'text/json' ||
+        normalizedMime === 'image/svg+xml') {
+      // Route to specialized content handler
+      const handlerResult = await routeContentHandler(rawContent, detectedContentType);
+      // Handle unsupported or error cases
+      if (handlerResult.status === 'rejected' || handlerResult.status === 'error') {
+        return Err(new Error(handlerResult.message));
+      }
+      // Type guard: ensure we have a success result
+      if (handlerResult.status !== 'sanitized') {
+        return Err(new Error('Unexpected handler result status'));
+      }
+      // Handler success - use the already-sanitized content
+      const sanitizedContent = handlerResult.sanitized_content;
+      const sanitization = handlerResult.sanitization;
+      const truncationResult = truncateContent(sanitizedContent);
+      // Determine format_detected based on MIME type
+      let formatDetected: 'html' | 'json' | 'xml' | 'rss' = 'html';
+      if (normalizedMime === 'application/json' || normalizedMime === 'text/json') {
+        formatDetected = 'json';
+      } else if (normalizedMime === 'image/svg+xml') {
+        formatDetected = 'xml'; // SVG is XML-based
+      } else if (normalizedMime === 'application/pdf') {
+        // PDF doesn't have a format_detected value in the current schema
+        // Leaving as 'html' for now
+      }
+      const output: VisusFetchOutput = {
+        url,
+        content: truncationResult.content,
+        sanitization: {
+          patterns_detected: sanitization.patterns_detected,
+          pii_types_redacted: sanitization.pii_types_redacted,
+          pii_allowlisted: sanitization.pii_allowlisted,
+          content_modified: sanitization.sanitized_fields > 0
+        },
+        metadata: {
+          title: title || 'Untitled',
+          fetched_at: new Date().toISOString(),
+          content_length_original: rawContent.length,
+          content_length_sanitized: sanitizedContent.length,
+          format_detected: formatDetected,
+          content_type: detectedContentType,
+          ...(truncationResult.truncated && {
+            truncated: true,
+            truncated_at_chars: truncationResult.truncated_at_chars
+          })
+        }
+      };
+      return { ok: true, value: output };
+    }
+    // Step 3: For HTML/XML/RSS - use existing format conversion flow
     const formatType = detectFormat(detectedContentType);
     let processedContent = rawContent;
@@ -57,15 +122,15 @@ export async function visusFetch(input: VisusFetchInput): Promise<Result<VisusFe
     }
     // For 'html' format, processedContent remains as rawContent
-    // Step 3: CRITICAL - Sanitize content (injection detection + PII redaction with allowlisting)
+    // Step 4: CRITICAL - Sanitize content (injection detection + PII redaction with allowlisting)
     // This step CANNOT be skipped or bypassed
     const sanitizationResult = sanitize(processedContent, url);
-    // Step 3: Apply token ceiling truncation (AFTER sanitization)
+    // Step 5: Apply token ceiling truncation (AFTER sanitization)
     // Anthropic MCP Directory enforces 25,000 token response limit
     const truncationResult = truncateContent(sanitizationResult.content);
-    // Step 4: Build output
+    // Step 6: Build output
     const output: VisusFetchOutput = {
       url,
       content: truncationResult.content,