visus-mcp 0.6.2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +6 -1
- package/.env.status +7 -0
- package/CHANGELOG.md +65 -0
- package/CLAUDE.md +3 -0
- package/README.md +15 -7
- package/SECURITY.md +2 -0
- package/STATUS.md +203 -9
- package/dist/content-handlers/index.d.ts +36 -0
- package/dist/content-handlers/index.d.ts.map +1 -0
- package/dist/content-handlers/index.js +59 -0
- package/dist/content-handlers/index.js.map +1 -0
- package/dist/content-handlers/json-handler.d.ts +28 -0
- package/dist/content-handlers/json-handler.d.ts.map +1 -0
- package/dist/content-handlers/json-handler.js +116 -0
- package/dist/content-handlers/json-handler.js.map +1 -0
- package/dist/content-handlers/pdf-handler.d.ts +29 -0
- package/dist/content-handlers/pdf-handler.d.ts.map +1 -0
- package/dist/content-handlers/pdf-handler.js +77 -0
- package/dist/content-handlers/pdf-handler.js.map +1 -0
- package/dist/content-handlers/svg-handler.d.ts +35 -0
- package/dist/content-handlers/svg-handler.d.ts.map +1 -0
- package/dist/content-handlers/svg-handler.js +206 -0
- package/dist/content-handlers/svg-handler.js.map +1 -0
- package/dist/content-handlers/types.d.ts +42 -0
- package/dist/content-handlers/types.d.ts.map +1 -0
- package/dist/content-handlers/types.js +7 -0
- package/dist/content-handlers/types.js.map +1 -0
- package/dist/tools/fetch.d.ts.map +1 -1
- package/dist/tools/fetch.js +62 -4
- package/dist/tools/fetch.js.map +1 -1
- package/package.json +2 -1
- package/server.json +2 -2
- package/src/content-handlers/index.ts +72 -0
- package/src/content-handlers/json-handler.ts +137 -0
- package/src/content-handlers/pdf-handler.ts +91 -0
- package/src/content-handlers/svg-handler.ts +243 -0
- package/src/content-handlers/types.ts +44 -0
- package/src/tools/fetch.ts +69 -4
- package/.github/ISSUE_TEMPLATE/bug_report.md +0 -47
- package/.github/ISSUE_TEMPLATE/false_positive.md +0 -43
- package/.github/ISSUE_TEMPLATE/new_pattern.md +0 -49
- package/.github/ISSUE_TEMPLATE/security_report.md +0 -31
- package/.github/PULL_REQUEST_TEMPLATE.md +0 -39
- package/.mcpregistry_github_token +0 -1
- package/.mcpregistry_registry_token +0 -1
- package/CONTRIBUTING.md +0 -329
- package/LINKEDIN-STRATEGY.md +0 -367
- package/ROADMAP.md +0 -221
- package/SECURITY-AUDIT-v1.md +0 -277
- package/SUBMISSION.md +0 -66
- package/TROUBLESHOOT-AUTH-20260322-2019.md +0 -291
- package/TROUBLESHOOT-BUILD-20260319-1450.md +0 -546
- package/TROUBLESHOOT-COGNITO-AUTH-20260324-2029.md +0 -415
- package/TROUBLESHOOT-COGNITO-JWT-20260324.md +0 -592
- package/TROUBLESHOOT-FETCH-20260320-1150.md +0 -168
- package/TROUBLESHOOT-JEST-20260323-1357.md +0 -139
- package/TROUBLESHOOT-LAMBDA-20260322-1945.md +0 -183
- package/TROUBLESHOOT-PLAYWRIGHT-20260321-1549.md +0 -217
- package/TROUBLESHOOT-SSL-20260320-1138.md +0 -171
- package/TROUBLESHOOT-STRUCTURED-20260320-1200.md +0 -246
- package/TROUBLESHOOT-TEST-20260320-0942.md +0 -281
- package/VISUS-CLAUDE-CODE-PROMPT.md +0 -324
- package/VISUS-PROJECT-PLAN.md +0 -205
- package/cdk.json +0 -73
- package/infrastructure/app.ts +0 -39
- package/infrastructure/stack.ts +0 -298
- package/jest.config.js +0 -33
- package/jest.setup.js +0 -9
- package/lambda-deploy/index.js +0 -81512
- package/lambda-deploy/index.js.map +0 -7
- package/lambda-package/browser/__mocks__/playwright-renderer.d.ts +0 -25
- package/lambda-package/browser/__mocks__/playwright-renderer.d.ts.map +0 -1
- package/lambda-package/browser/__mocks__/playwright-renderer.js +0 -119
- package/lambda-package/browser/__mocks__/playwright-renderer.js.map +0 -1
- package/lambda-package/browser/playwright-renderer.d.ts +0 -40
- package/lambda-package/browser/playwright-renderer.d.ts.map +0 -1
- package/lambda-package/browser/playwright-renderer.js +0 -214
- package/lambda-package/browser/playwright-renderer.js.map +0 -1
- package/lambda-package/browser/reader.d.ts +0 -31
- package/lambda-package/browser/reader.d.ts.map +0 -1
- package/lambda-package/browser/reader.js +0 -98
- package/lambda-package/browser/reader.js.map +0 -1
- package/lambda-package/index.d.ts +0 -18
- package/lambda-package/index.d.ts.map +0 -1
- package/lambda-package/index.js +0 -238
- package/lambda-package/index.js.map +0 -1
- package/lambda-package/lambda-handler.d.ts +0 -28
- package/lambda-package/lambda-handler.d.ts.map +0 -1
- package/lambda-package/lambda-handler.js +0 -257
- package/lambda-package/lambda-handler.js.map +0 -1
- package/lambda-package/package-lock.json +0 -7435
- package/lambda-package/package.json +0 -74
- package/lambda-package/runtime.d.ts +0 -50
- package/lambda-package/runtime.d.ts.map +0 -1
- package/lambda-package/runtime.js +0 -86
- package/lambda-package/runtime.js.map +0 -1
- package/lambda-package/sanitizer/elicit-runner.d.ts +0 -48
- package/lambda-package/sanitizer/elicit-runner.d.ts.map +0 -1
- package/lambda-package/sanitizer/elicit-runner.js +0 -100
- package/lambda-package/sanitizer/elicit-runner.js.map +0 -1
- package/lambda-package/sanitizer/framework-mapper.d.ts +0 -24
- package/lambda-package/sanitizer/framework-mapper.d.ts.map +0 -1
- package/lambda-package/sanitizer/framework-mapper.js +0 -342
- package/lambda-package/sanitizer/framework-mapper.js.map +0 -1
- package/lambda-package/sanitizer/hitl-gate.d.ts +0 -69
- package/lambda-package/sanitizer/hitl-gate.d.ts.map +0 -1
- package/lambda-package/sanitizer/hitl-gate.js +0 -101
- package/lambda-package/sanitizer/hitl-gate.js.map +0 -1
- package/lambda-package/sanitizer/index.d.ts +0 -63
- package/lambda-package/sanitizer/index.d.ts.map +0 -1
- package/lambda-package/sanitizer/index.js +0 -105
- package/lambda-package/sanitizer/index.js.map +0 -1
- package/lambda-package/sanitizer/injection-detector.d.ts +0 -34
- package/lambda-package/sanitizer/injection-detector.d.ts.map +0 -1
- package/lambda-package/sanitizer/injection-detector.js +0 -89
- package/lambda-package/sanitizer/injection-detector.js.map +0 -1
- package/lambda-package/sanitizer/patterns.d.ts +0 -30
- package/lambda-package/sanitizer/patterns.d.ts.map +0 -1
- package/lambda-package/sanitizer/patterns.js +0 -372
- package/lambda-package/sanitizer/patterns.js.map +0 -1
- package/lambda-package/sanitizer/pii-allowlist.d.ts +0 -49
- package/lambda-package/sanitizer/pii-allowlist.d.ts.map +0 -1
- package/lambda-package/sanitizer/pii-allowlist.js +0 -231
- package/lambda-package/sanitizer/pii-allowlist.js.map +0 -1
- package/lambda-package/sanitizer/pii-redactor.d.ts +0 -41
- package/lambda-package/sanitizer/pii-redactor.d.ts.map +0 -1
- package/lambda-package/sanitizer/pii-redactor.js +0 -213
- package/lambda-package/sanitizer/pii-redactor.js.map +0 -1
- package/lambda-package/sanitizer/severity-classifier.d.ts +0 -33
- package/lambda-package/sanitizer/severity-classifier.d.ts.map +0 -1
- package/lambda-package/sanitizer/severity-classifier.js +0 -113
- package/lambda-package/sanitizer/severity-classifier.js.map +0 -1
- package/lambda-package/sanitizer/threat-reporter.d.ts +0 -66
- package/lambda-package/sanitizer/threat-reporter.d.ts.map +0 -1
- package/lambda-package/sanitizer/threat-reporter.js +0 -163
- package/lambda-package/sanitizer/threat-reporter.js.map +0 -1
- package/lambda-package/tools/fetch-structured.d.ts +0 -51
- package/lambda-package/tools/fetch-structured.d.ts.map +0 -1
- package/lambda-package/tools/fetch-structured.js +0 -237
- package/lambda-package/tools/fetch-structured.js.map +0 -1
- package/lambda-package/tools/fetch.d.ts +0 -49
- package/lambda-package/tools/fetch.d.ts.map +0 -1
- package/lambda-package/tools/fetch.js +0 -131
- package/lambda-package/tools/fetch.js.map +0 -1
- package/lambda-package/tools/read.d.ts +0 -51
- package/lambda-package/tools/read.d.ts.map +0 -1
- package/lambda-package/tools/read.js +0 -127
- package/lambda-package/tools/read.js.map +0 -1
- package/lambda-package/tools/search.d.ts +0 -45
- package/lambda-package/tools/search.d.ts.map +0 -1
- package/lambda-package/tools/search.js +0 -220
- package/lambda-package/tools/search.js.map +0 -1
- package/lambda-package/types.d.ts +0 -167
- package/lambda-package/types.d.ts.map +0 -1
- package/lambda-package/types.js +0 -16
- package/lambda-package/types.js.map +0 -1
- package/lambda-package/utils/format-converter.d.ts +0 -39
- package/lambda-package/utils/format-converter.d.ts.map +0 -1
- package/lambda-package/utils/format-converter.js +0 -191
- package/lambda-package/utils/format-converter.js.map +0 -1
- package/lambda-package/utils/truncate.d.ts +0 -26
- package/lambda-package/utils/truncate.d.ts.map +0 -1
- package/lambda-package/utils/truncate.js +0 -54
- package/lambda-package/utils/truncate.js.map +0 -1
- package/lambda.zip +0 -0
- package/test-output.txt +0 -4
- package/tests/auth-smoke.test.ts +0 -480
- package/tests/elicit-runner.test.ts +0 -232
- package/tests/fetch-tool.test.ts +0 -922
- package/tests/hitl-gate.test.ts +0 -267
- package/tests/injection-corpus.ts +0 -338
- package/tests/pii-allowlist.test.ts +0 -282
- package/tests/reader.test.ts +0 -353
- package/tests/sanitizer.test.ts +0 -358
- package/tests/search.test.ts +0 -456
- package/tests/threat-reporter.test.ts +0 -334
- package/tsconfig.cdk.json +0 -35
|
@@ -55,7 +55,12 @@
|
|
|
55
55
|
"Bash(/tmp/test-google.sh:*)",
|
|
56
56
|
"Bash(git reset:*)",
|
|
57
57
|
"Bash(npx visus-mcp:*)",
|
|
58
|
-
"WebSearch"
|
|
58
|
+
"WebSearch",
|
|
59
|
+
"WebFetch(domain:glama.ai)",
|
|
60
|
+
"Bash(unzip:*)",
|
|
61
|
+
"Bash(mkdir:*)",
|
|
62
|
+
"Bash(comm -13:*)",
|
|
63
|
+
"Bash(comm -23:*)"
|
|
59
64
|
],
|
|
60
65
|
"deny": [],
|
|
61
66
|
"ask": []
|
package/.env.status
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
# Sensitive infrastructure values for STATUS.md
|
|
2
|
+
# This file is gitignored and contains the real values that are replaced with placeholders in STATUS.md
|
|
3
|
+
|
|
4
|
+
AWS_ACCOUNT_ID=080746528746
|
|
5
|
+
API_ENDPOINT=https://wyomy29zd7.execute-api.us-east-1.amazonaws.com
|
|
6
|
+
LAMBDA_FUNCTION_NAME=VisusRendererStack-dev-RendererFunction3AA1789A-554zTOoz3FVg
|
|
7
|
+
MAINTAINER_EMAIL=lowmls@gmail.com
|
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- **PDF Content Handler** (`src/content-handlers/pdf-handler.ts`)
|
|
13
|
+
- Handles `application/pdf` content type
|
|
14
|
+
- Extracts text and metadata (title, author, subject, keywords, creator, producer) from PDF files
|
|
15
|
+
- Passes all extracted text through the 43-pattern injection detection pipeline
|
|
16
|
+
- Returns sanitized plain text, discarding binary objects
|
|
17
|
+
- Returns structured error (`PDF_PARSE_FAILED`) for corrupt or encrypted PDFs
|
|
18
|
+
|
|
19
|
+
- **JSON Content Handler** (`src/content-handlers/json-handler.ts`)
|
|
20
|
+
- Handles `application/json` and `text/json` content types
|
|
21
|
+
- Recursively traverses JSON object tree and sanitizes all string values
|
|
22
|
+
- Preserves original JSON structure in output
|
|
23
|
+
- Handles arrays, nested objects, and mixed-type arrays correctly
|
|
24
|
+
- Falls back to plain text sanitization pipeline if JSON parsing fails
|
|
25
|
+
- Tracks and reports count of sanitized fields per request
|
|
26
|
+
|
|
27
|
+
- **SVG Content Handler** (`src/content-handlers/svg-handler.ts`)
|
|
28
|
+
- Handles `image/svg+xml` content type
|
|
29
|
+
- Strips dangerous elements unconditionally:
|
|
30
|
+
- `<script>` elements and all children
|
|
31
|
+
- `<use>` elements with external `href`/`xlink:href` attributes
|
|
32
|
+
- `<foreignObject>` elements and all children
|
|
33
|
+
- All event handler attributes (onload, onclick, onerror, etc.)
|
|
34
|
+
- `<set>` and `<animate>` elements referencing external resources
|
|
35
|
+
- `data:` URI attributes
|
|
36
|
+
- Extracts and scans text content (title, desc, text elements) for injection patterns
|
|
37
|
+
- Preserves safe presentation attributes (fill, stroke, transform, viewBox, etc.)
|
|
38
|
+
- Returns structured error (`SVG_PARSE_FAILED`) if XML parsing fails
|
|
39
|
+
|
|
40
|
+
- **Content Type Routing** (`src/content-handlers/index.ts`)
|
|
41
|
+
- Central routing system for content-type specific handlers
|
|
42
|
+
- Normalizes MIME types (strips parameters, lowercases)
|
|
43
|
+
- Routes content to appropriate handler based on MIME type
|
|
44
|
+
- Returns structured rejection (`UNSUPPORTED_CONTENT_TYPE`) for unsupported types
|
|
45
|
+
- No unhandled exceptions - all errors return structured responses
|
|
46
|
+
|
|
47
|
+
- **Updated `visus_fetch` Tool** (`src/tools/fetch.ts`)
|
|
48
|
+
- Integrated content handler routing for PDF, JSON, and SVG
|
|
49
|
+
- Checks Content-Type header and routes to specialized handlers before existing HTML/XML flow
|
|
50
|
+
- Maintains backward compatibility with existing HTML/XML/RSS conversion logic
|
|
51
|
+
|
|
52
|
+
- **Comprehensive Test Suite** (`tests/content-handlers.test.ts`)
|
|
53
|
+
- 20 test cases covering all three handlers
|
|
54
|
+
- Tests for clean content (no false positives)
|
|
55
|
+
- Tests for injection detection and sanitization
|
|
56
|
+
- Tests for error handling (corrupt/invalid content)
|
|
57
|
+
- Tests for edge cases (nested structures, arrays, malformed input)
|
|
58
|
+
|
|
59
|
+
### Changed
|
|
60
|
+
|
|
61
|
+
- Added `pdf-parse` dependency (v2.4.5) for PDF text extraction
|
|
62
|
+
|
|
63
|
+
## [0.6.2] - 2026-03-14
|
|
64
|
+
|
|
65
|
+
Previous releases documented in git history.
|
package/CLAUDE.md
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
4
|
|
|
5
|
+
**Repository:** https://github.com/visus-mcp/visus-mcp
|
|
6
|
+
|
|
5
7
|
## Project Overview
|
|
6
8
|
|
|
7
9
|
**Visus** (`visus-mcp`) is an MCP tool that provides Claude with secure, sanitized access to web pages. Unlike other MCP browser tools (Firecrawl, Playwright MCP, ScrapeGraphAI), Visus runs ALL fetched content through an injection sanitization pipeline before the LLM reads it.
|
|
@@ -519,3 +521,4 @@ Both README.md and SECURITY.md must lead with the security narrative, not featur
|
|
|
519
521
|
- PII redaction types and format
|
|
520
522
|
- Honest limitations (novel obfuscation, AI-generated benign-looking instructions)
|
|
521
523
|
- Vulnerability reporting: security@lateos.ai or GitHub Security tab
|
|
524
|
+
- remember that my gitHub repo is located at https://github.com/visus-mcp/visus-mcp
|
package/README.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Visus — Secure Web Access for Claude
|
|
2
2
|
|
|
3
3
|
[](https://www.npmjs.com/package/visus-mcp)
|
|
4
|
-
[](https://github.com/visus-mcp/visus-mcp)
|
|
5
5
|
[](https://github.com/visus-mcp/visus-mcp)
|
|
6
6
|
[](https://modelcontextprotocol.io)
|
|
7
7
|
[](https://github.com/visus-mcp/visus-mcp/blob/main/LICENSE)
|
|
@@ -47,17 +47,23 @@ visus-mcp fetches the same page and delivers:
|
|
|
47
47
|
## How Visus Works
|
|
48
48
|
|
|
49
49
|
```
|
|
50
|
-
URL → Playwright Render →
|
|
51
|
-
→
|
|
52
|
-
→
|
|
50
|
+
URL → Playwright Render → Content-Type Detection
|
|
51
|
+
→ Specialized Handlers (PDF/JSON/SVG) OR HTML Pipeline
|
|
52
|
+
→ Injection Sanitizer (43 patterns) → PII Redactor
|
|
53
|
+
→ Token Ceiling (24k cap) → Clean Content → Claude
|
|
53
54
|
```
|
|
54
55
|
|
|
55
56
|
### Security Pipeline
|
|
56
57
|
|
|
57
58
|
1. **Browser Rendering**: Headless Chromium via Playwright fetches the page
|
|
58
|
-
2. **
|
|
59
|
-
|
|
60
|
-
|
|
59
|
+
2. **Content-Type Routing**: Detects MIME type and routes to specialized handlers:
|
|
60
|
+
- **PDF** (`application/pdf`) — Extracts text and metadata, sanitizes all fields
|
|
61
|
+
- **JSON** (`application/json`) — Recursively sanitizes all string values, preserves structure
|
|
62
|
+
- **SVG** (`image/svg+xml`) — Strips dangerous elements (`<script>`, event handlers), scans text
|
|
63
|
+
- **HTML/XML/RSS** — Uses existing conversion and reader extraction pipeline
|
|
64
|
+
3. **Injection Detection**: 43 pattern categories scan for prompt injection attempts
|
|
65
|
+
4. **PII Redaction**: Emails, phone numbers, SSNs, credit cards, and IP addresses are redacted
|
|
66
|
+
5. **Clean Delivery**: Stripped, formatted, token-efficient content reaches your LLM — with a compliance report attached if anything was flagged
|
|
61
67
|
|
|
62
68
|
**This pipeline runs before content enters Claude's context window** — reducing token consumption, keeping PII out of conversation history, and generating audit logs when injection patterns are detected.
|
|
63
69
|
|
|
@@ -848,6 +854,8 @@ Copyright (c) 2026 Lateos (Leo Chongolnee)
|
|
|
848
854
|
|
|
849
855
|
Built by [Leo Chongolnee](https://github.com/leochong) (@leochong) as part of the Lateos platform.
|
|
850
856
|
|
|
857
|
+
**Repository:** https://github.com/visus-mcp/visus-mcp
|
|
858
|
+
|
|
851
859
|
Inspired by the MCP ecosystem and informed by CISSP/CEH security principles.
|
|
852
860
|
|
|
853
861
|
---
|
package/SECURITY.md
CHANGED
package/STATUS.md
CHANGED
|
@@ -1,9 +1,201 @@
|
|
|
1
1
|
# Visus MCP - Project Status
|
|
2
2
|
|
|
3
|
-
**Generated:** 2026-03-
|
|
4
|
-
**Version:** 0.
|
|
3
|
+
**Generated:** 2026-03-25
|
|
4
|
+
**Version:** 0.8.0
|
|
5
5
|
**Phase:** 3 (Anthropic Directory Prep)
|
|
6
|
-
**Status:** ✅ **v0.
|
|
6
|
+
**Status:** ✅ **v0.8.0 COMPLETE** - PDF/JSON/SVG Content Handlers
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## v0.8.0 Release - PDF, JSON, and SVG Content Handlers
|
|
11
|
+
|
|
12
|
+
**Status:** ✅ COMPLETE (Ready for release)
|
|
13
|
+
**Type:** Feature enhancement + Security expansion
|
|
14
|
+
**Implemented:** 2026-03-25
|
|
15
|
+
|
|
16
|
+
### New Features
|
|
17
|
+
|
|
18
|
+
**🎯 Specialized Content Type Handlers with Full Sanitization**
|
|
19
|
+
|
|
20
|
+
Adds content-type routing for three specialized formats (PDF, JSON, SVG), applying the full 43-pattern injection sanitization pipeline to each format before returning content to the LLM.
|
|
21
|
+
|
|
22
|
+
**Key Features:**
|
|
23
|
+
- ✅ PDF text extraction with metadata (Title, Author, Subject, Keywords, Creator, Producer)
|
|
24
|
+
- ✅ Recursive JSON sanitization preserving structure while neutralizing injections
|
|
25
|
+
- ✅ SVG element stripping (script, foreignObject, event handlers, external use)
|
|
26
|
+
- ✅ Content-type routing dispatcher with MIME type normalization
|
|
27
|
+
- ✅ Full sanitization metadata flow (patterns_detected, pii_types_redacted, pii_allowlisted)
|
|
28
|
+
- ✅ 48 new tests (294 total, all passing)
|
|
29
|
+
- ✅ Zero regressions - all existing tests continue to pass
|
|
30
|
+
|
|
31
|
+
**Supported Content Types:**
|
|
32
|
+
1. **PDF** (`application/pdf`)
|
|
33
|
+
- Extracts text content from all pages using pdf-parse v2 API
|
|
34
|
+
- Extracts metadata fields (Title, Author, Subject, Keywords, Creator, Producer)
|
|
35
|
+
- Combines text + metadata into single string for sanitization
|
|
36
|
+
- Returns structured error for corrupt PDFs (PDF_PARSE_FAILED)
|
|
37
|
+
- Processing time tracked for performance monitoring
|
|
38
|
+
|
|
39
|
+
2. **JSON** (`application/json`, `text/json`)
|
|
40
|
+
- Recursive sanitization preserving JSON structure
|
|
41
|
+
- Field-by-field injection detection with metadata aggregation
|
|
42
|
+
- Uses Sets to deduplicate patterns/PII types across nested objects
|
|
43
|
+
- Falls back to plain text sanitization if JSON.parse fails
|
|
44
|
+
- Returns pure sanitized JSON (no "JSON Response:" prefix)
|
|
45
|
+
|
|
46
|
+
3. **SVG** (`image/svg+xml`)
|
|
47
|
+
- Strips dangerous elements: `<script>`, `<foreignObject>`
|
|
48
|
+
- Removes event handlers: `onload`, `onclick`, etc.
|
|
49
|
+
- Blocks external `<use>` references (e.g., `href="http://evil.com/icon.svg"`)
|
|
50
|
+
- Removes `data:` URIs to prevent base64-encoded payloads
|
|
51
|
+
- Extracts and sanitizes text content from title/desc elements
|
|
52
|
+
- Returns cleaned SVG with text injection detection
|
|
53
|
+
|
|
54
|
+
**Handler Interface Design:**
|
|
55
|
+
|
|
56
|
+
All handlers return `HandlerResult` with full sanitization metadata:
|
|
57
|
+
```typescript
|
|
58
|
+
interface HandlerSuccessResult {
|
|
59
|
+
status: 'sanitized';
|
|
60
|
+
content_type: string;
|
|
61
|
+
sanitized_content: string;
|
|
62
|
+
sanitization: {
|
|
63
|
+
patterns_detected: string[];
|
|
64
|
+
pii_types_redacted: string[];
|
|
65
|
+
pii_allowlisted: Array<{ type: string; value: string; reason: string }>;
|
|
66
|
+
sanitized_fields: number;
|
|
67
|
+
};
|
|
68
|
+
processing_time_ms: number;
|
|
69
|
+
}
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
**Processing Pipeline:**
|
|
73
|
+
```
|
|
74
|
+
URL Fetch → Content-Type Detection → Handler Routing →
|
|
75
|
+
PDF: Extract text + metadata → Sanitize → Return
|
|
76
|
+
JSON: Recursive sanitize → Deduplicate metadata → Return
|
|
77
|
+
SVG: Strip dangerous elements → Extract text → Sanitize → Return
|
|
78
|
+
→ Token Ceiling → Output
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
**Security Guarantees:**
|
|
82
|
+
- ✅ All 43 injection patterns applied to PDF text
|
|
83
|
+
- ✅ All 43 patterns applied recursively to every JSON string field
|
|
84
|
+
- ✅ SVG text content scanned with all 43 patterns
|
|
85
|
+
- ✅ PII redaction works on all three formats
|
|
86
|
+
- ✅ No content bypasses sanitization (fail-safe design)
|
|
87
|
+
- ✅ Corrupt/malformed input returns structured error (never throws)
|
|
88
|
+
|
|
89
|
+
**Technical Implementation:**
|
|
90
|
+
|
|
91
|
+
**New Components:**
|
|
92
|
+
1. **src/content-handlers/types.ts** (60 lines)
|
|
93
|
+
- Shared interfaces for all content handlers
|
|
94
|
+
- `HandlerResult` union type: `HandlerSuccessResult | HandlerErrorResult | HandlerRejectedResult`
|
|
95
|
+
- Full sanitization metadata preservation
|
|
96
|
+
|
|
97
|
+
2. **src/content-handlers/pdf-handler.ts** (95 lines)
|
|
98
|
+
- Uses pdf-parse v2 API (`new PDFParse({ data: buffer })`)
|
|
99
|
+
- Calls `parser.getText()` and `parser.getInfo()` separately
|
|
100
|
+
- Combines text + metadata for comprehensive sanitization
|
|
101
|
+
- Returns error with reason code on PDF parse failure
|
|
102
|
+
|
|
103
|
+
3. **src/content-handlers/json-handler.ts** (140 lines)
|
|
104
|
+
- Recursive sanitization with `recursiveSanitize()` helper
|
|
105
|
+
- Aggregates metadata using Sets for deduplication
|
|
106
|
+
- Preserves JSON structure (objects, arrays, primitives)
|
|
107
|
+
- Graceful fallback to plain text on parse error
|
|
108
|
+
|
|
109
|
+
4. **src/content-handlers/svg-handler.ts** (185 lines)
|
|
110
|
+
- XML parsing with fast-xml-parser
|
|
111
|
+
- `stripDangerousContent()` removes unsafe elements/attributes
|
|
112
|
+
- `extractTextContent()` pulls title/desc text for injection scanning
|
|
113
|
+
- Returns cleaned SVG + sanitization metadata
|
|
114
|
+
|
|
115
|
+
5. **src/content-handlers/index.ts** (55 lines)
|
|
116
|
+
- Central routing dispatcher based on normalized MIME type
|
|
117
|
+
- `normalizeMimeType()` handles charset and case normalization
|
|
118
|
+
- `routeContentHandler()` maps MIME to appropriate handler
|
|
119
|
+
- Returns rejection for unsupported content types
|
|
120
|
+
|
|
121
|
+
**Modified Files:**
|
|
122
|
+
- `src/tools/fetch.ts` - Integrated content handler routing before HTML pipeline
|
|
123
|
+
- Added MIME type detection (lines 46-53)
|
|
124
|
+
- Early routing for PDF/JSON/SVG (lines 50-108)
|
|
125
|
+
- Uses handler-provided sanitization metadata (lines 88-90)
|
|
126
|
+
- Removed placeholder pattern array
|
|
127
|
+
- `package.json` - Added pdf-parse@2.4.5 dependency
|
|
128
|
+
|
|
129
|
+
**Test Coverage:**
|
|
130
|
+
|
|
131
|
+
New test file:
|
|
132
|
+
- `tests/content-handlers.test.ts` - 20 tests covering:
|
|
133
|
+
- PDF: corrupt file error handling
|
|
134
|
+
- JSON: clean flat/nested pass-through, injection sanitization, invalid fallback
|
|
135
|
+
- SVG: clean pass-through, script stripping, event handler removal, foreignObject removal, external use blocking, title injection detection
|
|
136
|
+
- Routing: MIME normalization, unsupported type rejection
|
|
137
|
+
|
|
138
|
+
Updated test files:
|
|
139
|
+
- `tests/fetch-tool.test.ts` - Updated JSON test expectations (2 tests modified):
|
|
140
|
+
- Removed "JSON Response:" prefix expectation
|
|
141
|
+
- Changed to expect pure JSON content with specific fields
|
|
142
|
+
|
|
143
|
+
**Test Results:** ✅ 294/294 tests passing (48 new content handler tests added)
|
|
144
|
+
|
|
145
|
+
**Dependencies Added:**
|
|
146
|
+
- `pdf-parse@2.4.5` - PDF text extraction library
|
|
147
|
+
|
|
148
|
+
**Troubleshooting:**
|
|
149
|
+
- Documented handler interface metadata loss issue in `TROUBLESHOOT-CONTENT-HANDLERS-20260325-1047.md`
|
|
150
|
+
- Root cause: Initial interface only had `sanitized_fields: number`, lost pattern names and PII types
|
|
151
|
+
- Resolution: Expanded interface to include full `sanitization` object
|
|
152
|
+
- Time to resolution: ~10 minutes
|
|
153
|
+
|
|
154
|
+
**Example Usage:**
|
|
155
|
+
|
|
156
|
+
PDF document:
|
|
157
|
+
```json
|
|
158
|
+
{
|
|
159
|
+
"url": "https://example.com/whitepaper.pdf"
|
|
160
|
+
}
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
Returns extracted text + metadata with `format_detected: "html"` and sanitization metadata.
|
|
164
|
+
|
|
165
|
+
JSON API:
|
|
166
|
+
```json
|
|
167
|
+
{
|
|
168
|
+
"url": "https://api.github.com/repos/anthropics/anthropic-sdk-typescript"
|
|
169
|
+
}
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Returns pure sanitized JSON with `format_detected: "json"` and injection detection metadata.
|
|
173
|
+
|
|
174
|
+
SVG image:
|
|
175
|
+
```json
|
|
176
|
+
{
|
|
177
|
+
"url": "https://example.com/diagram.svg"
|
|
178
|
+
}
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
Returns cleaned SVG with dangerous elements removed and `format_detected: "xml"`.
|
|
182
|
+
|
|
183
|
+
**README Documentation:**
|
|
184
|
+
- Updated test count badge from 246 to 294 passing tests
|
|
185
|
+
- Updated "How Visus Works" pipeline diagram to show Content-Type Detection
|
|
186
|
+
- Added detailed content-type routing section explaining PDF, JSON, SVG handling
|
|
187
|
+
- Documented fail-safe error handling and structured response design
|
|
188
|
+
|
|
189
|
+
**Changelog:**
|
|
190
|
+
- Created `CHANGELOG.md` with v0.8.0 (Unreleased) section
|
|
191
|
+
- Detailed entries for PDF, JSON, SVG handlers with specifications
|
|
192
|
+
- Notes on content-type routing and test coverage
|
|
193
|
+
|
|
194
|
+
**Lessons Learned:**
|
|
195
|
+
1. **Interface Design**: Preserve all metadata when wrapping existing functionality
|
|
196
|
+
2. **Type Safety**: TypeScript strict mode caught interface mismatches early
|
|
197
|
+
3. **Test Coverage**: Existing tests immediately caught metadata loss
|
|
198
|
+
4. **Aggregation Pattern**: Use Sets to deduplicate findings in recursive sanitization
|
|
7
199
|
|
|
8
200
|
---
|
|
9
201
|
|
|
@@ -746,9 +938,9 @@ Visus is a security-first MCP tool that provides Claude with sanitized web page
|
|
|
746
938
|
|
|
747
939
|
### ✅ Test Execution
|
|
748
940
|
- **Status:** SUCCESS - All tests passing
|
|
749
|
-
- **Test Results:**
|
|
750
|
-
- **Test Suites:**
|
|
751
|
-
- **Execution Time:** ~7.
|
|
941
|
+
- **Test Results:** 294/294 tests passing (100%)
|
|
942
|
+
- **Test Suites:** 8/8 passing
|
|
943
|
+
- **Execution Time:** ~7.5 seconds
|
|
752
944
|
- **Test Files:**
|
|
753
945
|
- `tests/sanitizer.test.ts` - PASS (43 pattern categories + 5 threat report integration tests)
|
|
754
946
|
- `tests/fetch-tool.test.ts` - PASS (all MCP tool functions + annotations + 2 threat report tests + 14 format detection tests) - **v0.6.0**
|
|
@@ -757,8 +949,9 @@ Visus is a security-first MCP tool that provides Claude with sanitized web page
|
|
|
757
949
|
- `tests/auth-smoke.test.ts` - PASS (24 auth enforcement tests) - **v0.3.1**
|
|
758
950
|
- `tests/reader.test.ts` - PASS (14 reader mode tests) - **v0.3.2**
|
|
759
951
|
- `tests/search.test.ts` - PASS (18 search tests) - **v0.4.0**
|
|
952
|
+
- `tests/content-handlers.test.ts` - PASS (20 content handler tests) - **v0.8.0**
|
|
760
953
|
- `tests/injection-corpus.ts` - Test data library
|
|
761
|
-
- **Coverage:** All 43 injection pattern categories + PII allowlist + authentication enforcement + reader mode + safe web search + security fixes + threat reporting with framework mappings + Content-Type format detection (JSON, XML, RSS/Atom) validated
|
|
954
|
+
- **Coverage:** All 43 injection pattern categories + PII allowlist + authentication enforcement + reader mode + safe web search + security fixes + threat reporting with framework mappings + Content-Type format detection (JSON, XML, RSS/Atom) + Content handlers (PDF, JSON, SVG) validated
|
|
762
955
|
|
|
763
956
|
---
|
|
764
957
|
|
|
@@ -1584,9 +1777,9 @@ npm URL: https://www.npmjs.com/package/visus-mcp
|
|
|
1584
1777
|
|
|
1585
1778
|
---
|
|
1586
1779
|
|
|
1587
|
-
**Last Updated:** 2026-03-
|
|
1780
|
+
**Last Updated:** 2026-03-25
|
|
1588
1781
|
**Build:** SUCCESS ✅
|
|
1589
|
-
**Tests:**
|
|
1782
|
+
**Tests:** 294/294 PASSING ✅
|
|
1590
1783
|
**CDK Deploy:** SUCCESS ✅
|
|
1591
1784
|
**Phase 1:** ✅ PUBLISHED TO NPM (v0.1.0)
|
|
1592
1785
|
**Phase 2:** ✅ DEPLOYED TO AWS LAMBDA (us-east-1)
|
|
@@ -1597,6 +1790,7 @@ npm URL: https://www.npmjs.com/package/visus-mcp
|
|
|
1597
1790
|
**v0.5.0:** ✅ PUBLISHED TO NPM (Threat Reporting + ISO/IEC 42001 - 31 tests added)
|
|
1598
1791
|
**v0.6.0:** ✅ PUBLISHED TO NPM (Content-Type Format Detection - 14 tests added)
|
|
1599
1792
|
**v0.7.0:** ✅ COMPLETE (HITL Elicitation Bridge for CRITICAL threats - 30 tests added)
|
|
1793
|
+
**v0.8.0:** ✅ COMPLETE (PDF/JSON/SVG Content Handlers - 48 tests added)
|
|
1600
1794
|
**Security Audit:** ✅ COMPLETE + REMEDIATED (24 auth tests, 100% compliance)
|
|
1601
1795
|
**Lambda Endpoint:** [API_ENDPOINT]
|
|
1602
1796
|
**Latest Release:** v0.6.0 (2026-03-23)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content Handlers Module
|
|
3
|
+
*
|
|
4
|
+
* Central routing for content-type specific sanitization handlers.
|
|
5
|
+
* Detects MIME type from Content-Type header and routes to appropriate handler.
|
|
6
|
+
*
|
|
7
|
+
* Supported content types:
|
|
8
|
+
* - application/pdf -> PDF handler
|
|
9
|
+
* - application/json -> JSON handler
|
|
10
|
+
* - image/svg+xml -> SVG handler
|
|
11
|
+
*
|
|
12
|
+
* Unsupported types return structured rejection (no throw).
|
|
13
|
+
*/
|
|
14
|
+
import type { HandlerResult } from './types.js';
|
|
15
|
+
/**
|
|
16
|
+
* Normalize Content-Type header to base MIME type
|
|
17
|
+
*
|
|
18
|
+
* Examples:
|
|
19
|
+
* - "application/pdf; charset=utf-8" -> "application/pdf"
|
|
20
|
+
* - "application/json" -> "application/json"
|
|
21
|
+
* - "IMAGE/SVG+XML" -> "image/svg+xml"
|
|
22
|
+
*
|
|
23
|
+
* @param contentType - Raw Content-Type header value
|
|
24
|
+
* @returns Normalized MIME type (lowercase, parameters stripped)
|
|
25
|
+
*/
|
|
26
|
+
export declare function normalizeMimeType(contentType: string): string;
|
|
27
|
+
/**
|
|
28
|
+
* Route content to appropriate handler based on MIME type
|
|
29
|
+
*
|
|
30
|
+
* @param content - Raw content (string or Buffer)
|
|
31
|
+
* @param contentType - Content-Type header value
|
|
32
|
+
* @returns Handler result (success or error/rejected)
|
|
33
|
+
*/
|
|
34
|
+
export declare function routeContentHandler(content: string | Buffer, contentType: string): Promise<HandlerResult>;
|
|
35
|
+
export type { HandlerResult, HandlerSuccessResult, HandlerErrorResult } from './types.js';
|
|
36
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/content-handlers/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAKH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEhD;;;;;;;;;;GAUG;AACH,wBAAgB,iBAAiB,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,CAE7D;AAED;;;;;;GAMG;AACH,wBAAsB,mBAAmB,CACvC,OAAO,EAAE,MAAM,GAAG,MAAM,EACxB,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC,aAAa,CAAC,CAwBxB;AAGD,YAAY,EAAE,aAAa,EAAE,oBAAoB,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC"}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content Handlers Module
|
|
3
|
+
*
|
|
4
|
+
* Central routing for content-type specific sanitization handlers.
|
|
5
|
+
* Detects MIME type from Content-Type header and routes to appropriate handler.
|
|
6
|
+
*
|
|
7
|
+
* Supported content types:
|
|
8
|
+
* - application/pdf -> PDF handler
|
|
9
|
+
* - application/json -> JSON handler
|
|
10
|
+
* - image/svg+xml -> SVG handler
|
|
11
|
+
*
|
|
12
|
+
* Unsupported types return structured rejection (no throw).
|
|
13
|
+
*/
|
|
14
|
+
import { handlePdf } from './pdf-handler.js';
|
|
15
|
+
import { handleJson } from './json-handler.js';
|
|
16
|
+
import { handleSvg } from './svg-handler.js';
|
|
17
|
+
/**
|
|
18
|
+
* Normalize Content-Type header to base MIME type
|
|
19
|
+
*
|
|
20
|
+
* Examples:
|
|
21
|
+
* - "application/pdf; charset=utf-8" -> "application/pdf"
|
|
22
|
+
* - "application/json" -> "application/json"
|
|
23
|
+
* - "IMAGE/SVG+XML" -> "image/svg+xml"
|
|
24
|
+
*
|
|
25
|
+
* @param contentType - Raw Content-Type header value
|
|
26
|
+
* @returns Normalized MIME type (lowercase, parameters stripped)
|
|
27
|
+
*/
|
|
28
|
+
export function normalizeMimeType(contentType) {
|
|
29
|
+
return contentType.toLowerCase().split(';')[0].trim();
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Route content to appropriate handler based on MIME type
|
|
33
|
+
*
|
|
34
|
+
* @param content - Raw content (string or Buffer)
|
|
35
|
+
* @param contentType - Content-Type header value
|
|
36
|
+
* @returns Handler result (success or error/rejected)
|
|
37
|
+
*/
|
|
38
|
+
export async function routeContentHandler(content, contentType) {
|
|
39
|
+
const mimeType = normalizeMimeType(contentType);
|
|
40
|
+
// Route to appropriate handler
|
|
41
|
+
switch (mimeType) {
|
|
42
|
+
case 'application/pdf':
|
|
43
|
+
return handlePdf(content, mimeType);
|
|
44
|
+
case 'application/json':
|
|
45
|
+
case 'text/json':
|
|
46
|
+
return handleJson(content, mimeType);
|
|
47
|
+
case 'image/svg+xml':
|
|
48
|
+
return handleSvg(content, mimeType);
|
|
49
|
+
default:
|
|
50
|
+
// Unsupported content type - return structured rejection
|
|
51
|
+
return {
|
|
52
|
+
status: 'rejected',
|
|
53
|
+
reason: 'UNSUPPORTED_CONTENT_TYPE',
|
|
54
|
+
mime: mimeType,
|
|
55
|
+
message: `Content type ${mimeType} is not supported by Visus-MCP.`
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/content-handlers/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAG7C;;;;;;;;;;GAUG;AACH,MAAM,UAAU,iBAAiB,CAAC,WAAmB;IACnD,OAAO,WAAW,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;AACxD,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,OAAwB,EACxB,WAAmB;IAEnB,MAAM,QAAQ,GAAG,iBAAiB,CAAC,WAAW,CAAC,CAAC;IAEhD,+BAA+B;IAC/B,QAAQ,QAAQ,EAAE,CAAC;QACjB,KAAK,iBAAiB;YACpB,OAAO,SAAS,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;QAEtC,KAAK,kBAAkB,CAAC;QACxB,KAAK,WAAW;YACd,OAAO,UAAU,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;QAEvC,KAAK,eAAe;YAClB,OAAO,SAAS,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;QAEtC;YACE,yDAAyD;YACzD,OAAO;gBACL,MAAM,EAAE,UAAU;gBAClB,MAAM,EAAE,0BAA0B;gBAClC,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,gBAAgB,QAAQ,iCAAiC;aACnE,CAAC;IACN,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSON Content Handler
|
|
3
|
+
*
|
|
4
|
+
* Handles application/json content type. Recursively traverses all nodes in the JSON
|
|
5
|
+
* object tree and applies the full injection pattern registry to every string value.
|
|
6
|
+
*
|
|
7
|
+
* What it handles:
|
|
8
|
+
* - All string values in the JSON tree (any depth)
|
|
9
|
+
* - Arrays, nested objects, and mixed-type arrays
|
|
10
|
+
* - Falls back to plain text pipeline if JSON.parse fails
|
|
11
|
+
*
|
|
12
|
+
* What it strips:
|
|
13
|
+
* - Nothing (preserves original structure)
|
|
14
|
+
*
|
|
15
|
+
* What it passes through:
|
|
16
|
+
* - Sanitized JSON with original structure preserved
|
|
17
|
+
* - All non-string values pass through unchanged
|
|
18
|
+
*/
|
|
19
|
+
import type { HandlerResult } from './types.js';
|
|
20
|
+
/**
|
|
21
|
+
* Handle JSON content
|
|
22
|
+
*
|
|
23
|
+
* @param content - Raw JSON string
|
|
24
|
+
* @param mimeType - Original MIME type
|
|
25
|
+
* @returns Sanitized handler result
|
|
26
|
+
*/
|
|
27
|
+
export declare function handleJson(content: string | Buffer, mimeType: string): HandlerResult;
|
|
28
|
+
//# sourceMappingURL=json-handler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json-handler.d.ts","sourceRoot":"","sources":["../../src/content-handlers/json-handler.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAGH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEhD;;;;;;GAMG;AACH,wBAAgB,UAAU,CACxB,OAAO,EAAE,MAAM,GAAG,MAAM,EACxB,QAAQ,EAAE,MAAM,GACf,aAAa,CAoEf"}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSON Content Handler
|
|
3
|
+
*
|
|
4
|
+
* Handles application/json content type. Recursively traverses all nodes in the JSON
|
|
5
|
+
* object tree and applies the full injection pattern registry to every string value.
|
|
6
|
+
*
|
|
7
|
+
* What it handles:
|
|
8
|
+
* - All string values in the JSON tree (any depth)
|
|
9
|
+
* - Arrays, nested objects, and mixed-type arrays
|
|
10
|
+
* - Falls back to plain text pipeline if JSON.parse fails
|
|
11
|
+
*
|
|
12
|
+
* What it strips:
|
|
13
|
+
* - Nothing (preserves original structure)
|
|
14
|
+
*
|
|
15
|
+
* What it passes through:
|
|
16
|
+
* - Sanitized JSON with original structure preserved
|
|
17
|
+
* - All non-string values pass through unchanged
|
|
18
|
+
*/
|
|
19
|
+
import { sanitize } from '../sanitizer/index.js';
|
|
20
|
+
/**
|
|
21
|
+
* Handle JSON content
|
|
22
|
+
*
|
|
23
|
+
* @param content - Raw JSON string
|
|
24
|
+
* @param mimeType - Original MIME type
|
|
25
|
+
* @returns Sanitized handler result
|
|
26
|
+
*/
|
|
27
|
+
export function handleJson(content, mimeType) {
|
|
28
|
+
const startTime = Date.now();
|
|
29
|
+
// Convert Buffer to string if needed
|
|
30
|
+
const jsonString = Buffer.isBuffer(content) ? content.toString('utf-8') : content;
|
|
31
|
+
try {
|
|
32
|
+
// Parse JSON
|
|
33
|
+
const parsed = JSON.parse(jsonString);
|
|
34
|
+
// Track sanitization metadata across all fields
|
|
35
|
+
let sanitizedFieldCount = 0;
|
|
36
|
+
const allPatternsDetected = new Set();
|
|
37
|
+
const allPiiTypesRedacted = new Set();
|
|
38
|
+
const allPiiAllowlisted = [];
|
|
39
|
+
// Recursively sanitize all string values
|
|
40
|
+
const sanitized = recursiveSanitize(parsed, (text) => {
|
|
41
|
+
const result = sanitize(text);
|
|
42
|
+
if (result.sanitization.content_modified) {
|
|
43
|
+
sanitizedFieldCount++;
|
|
44
|
+
}
|
|
45
|
+
// Aggregate metadata
|
|
46
|
+
result.sanitization.patterns_detected.forEach(p => allPatternsDetected.add(p));
|
|
47
|
+
result.sanitization.pii_types_redacted.forEach(p => allPiiTypesRedacted.add(p));
|
|
48
|
+
allPiiAllowlisted.push(...result.sanitization.pii_allowlisted);
|
|
49
|
+
return result.content;
|
|
50
|
+
});
|
|
51
|
+
// Re-stringify with 2-space indent
|
|
52
|
+
const sanitizedJson = JSON.stringify(sanitized, null, 2);
|
|
53
|
+
const processingTime = Date.now() - startTime;
|
|
54
|
+
return {
|
|
55
|
+
status: 'sanitized',
|
|
56
|
+
content_type: mimeType,
|
|
57
|
+
sanitized_content: sanitizedJson,
|
|
58
|
+
sanitization: {
|
|
59
|
+
patterns_detected: Array.from(allPatternsDetected),
|
|
60
|
+
pii_types_redacted: Array.from(allPiiTypesRedacted),
|
|
61
|
+
pii_allowlisted: allPiiAllowlisted,
|
|
62
|
+
sanitized_fields: sanitizedFieldCount
|
|
63
|
+
},
|
|
64
|
+
processing_time_ms: processingTime
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
catch (error) {
|
|
68
|
+
// JSON.parse failed - fall back to plain text sanitization
|
|
69
|
+
const sanitizationResult = sanitize(jsonString);
|
|
70
|
+
const processingTime = Date.now() - startTime;
|
|
71
|
+
return {
|
|
72
|
+
status: 'sanitized',
|
|
73
|
+
content_type: mimeType,
|
|
74
|
+
sanitized_content: sanitizationResult.content,
|
|
75
|
+
sanitization: {
|
|
76
|
+
patterns_detected: sanitizationResult.sanitization.patterns_detected,
|
|
77
|
+
pii_types_redacted: sanitizationResult.sanitization.pii_types_redacted,
|
|
78
|
+
pii_allowlisted: sanitizationResult.sanitization.pii_allowlisted,
|
|
79
|
+
sanitized_fields: sanitizationResult.sanitization.patterns_detected.length
|
|
80
|
+
},
|
|
81
|
+
processing_time_ms: processingTime
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Recursively traverse JSON tree and sanitize all string values
|
|
87
|
+
*
|
|
88
|
+
* @param obj - JSON object/array/primitive
|
|
89
|
+
* @param sanitizeFn - Function to sanitize string values
|
|
90
|
+
* @returns Sanitized object with same structure
|
|
91
|
+
*/
|
|
92
|
+
function recursiveSanitize(obj, sanitizeFn) {
|
|
93
|
+
// Handle null
|
|
94
|
+
if (obj === null) {
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
// Handle string - sanitize it
|
|
98
|
+
if (typeof obj === 'string') {
|
|
99
|
+
return sanitizeFn(obj);
|
|
100
|
+
}
|
|
101
|
+
// Handle array - recursively sanitize each element
|
|
102
|
+
if (Array.isArray(obj)) {
|
|
103
|
+
return obj.map((item) => recursiveSanitize(item, sanitizeFn));
|
|
104
|
+
}
|
|
105
|
+
// Handle object - recursively sanitize each value
|
|
106
|
+
if (typeof obj === 'object') {
|
|
107
|
+
const sanitizedObj = {};
|
|
108
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
109
|
+
sanitizedObj[key] = recursiveSanitize(value, sanitizeFn);
|
|
110
|
+
}
|
|
111
|
+
return sanitizedObj;
|
|
112
|
+
}
|
|
113
|
+
// Handle primitives (number, boolean, undefined) - pass through
|
|
114
|
+
return obj;
|
|
115
|
+
}
|
|
116
|
+
//# sourceMappingURL=json-handler.js.map
|